From 15f3b920ad7eb7fcca3afee14d16049db2046d4b Mon Sep 17 00:00:00 2001 From: Nathan Luehr Date: Wed, 14 Feb 2018 16:27:23 -0800 Subject: [PATCH 001/960] Fix __shared__ types with non-empty constructor std::complex has a non-empty constructor (zero assignment) that is not compatible with CUDA __shared__ memory. This fixes current reliance on undefined behavior. (and removes an unnecessary run-time initialization). --- .../core/kernels/reduction_gpu_kernels.cu.h | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 15ae4c1fc5..95a3e222b5 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -244,6 +244,33 @@ __global__ void RowReduceKernel( if (row < num_rows && lane == 0) out[row] = sum; } +template +struct storage_type { + T1 val; + __host__ __device__ storage_type() {} + __host__ __device__ operator T1() { return val; } + __host__ __device__ storage_type& operator=(const T1& in) { + val = in; + return *this; + } +}; + +template +struct storage_type> { + T2 real; + T2 imag; + __host__ __device__ storage_type() {} + __host__ __device__ operator std::complex() { + return std::complex(real, imag); + } + __host__ __device__ storage_type>& operator=( + const std::complex& in) { + real = in.real(); + imag = in.imag(); + return *this; + } +}; + // Works only if there are <= 16 columns // each warps sums over multiple rows at once template @@ -268,7 +295,7 @@ __global__ void ColumnReduceMax16ColumnsKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += rows_per_warp * gridDim.y * blockDim.y; for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { @@ -294,7 +321,8 @@ __global__ void ColumnReduceMax16ColumnsKernel( if (blockDim.y > 1) { for (int row = 1; row < blockDim.y; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } } @@ -316,7 +344,7 @@ __global__ void ColumnReduceKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += gridDim.y * blockDim.y; @@ -347,7 +375,8 @@ __global__ void ColumnReduceKernel( min(blockDim.y, num_rows - blockIdx.y * blockDim.y); for (int row = 1; row < numRowsThisBlock; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } out[col * gridDim.y + blockIdx.y] = s; -- GitLab From 08a3509b2ecbd9fdfdb4f50b81e11f491291647e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Thom=C3=A9?= Date: Sun, 18 Feb 2018 17:02:13 +0100 Subject: [PATCH 002/960] Add NumPy style warning when casting complex to float --- tensorflow/python/ops/math_ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index da9957aa2a..2c422ebca4 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -169,6 +169,7 @@ from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import gen_spectral_ops from tensorflow.python.ops import gen_state_ops from tensorflow.python.ops import state_ops +from tensorflow.python.platform import tf_logging as logging # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_math_ops import * @@ -830,6 +831,8 @@ def to_float(x, name="ToFloat"): Raises: TypeError: If `x` cannot be cast to the `float32`. """ + if x.dtype.is_complex: + logging.warn('Casting complex to real discards imaginary part.') return cast(x, dtypes.float32, name=name) -- GitLab From b808636c795e7a96a1e7264076a95d3e9343f430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Thom=C3=A9?= Date: Sun, 18 Feb 2018 20:46:07 +0100 Subject: [PATCH 003/960] Fix quotes --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 2c422ebca4..4c7dc9559f 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -832,7 +832,7 @@ def to_float(x, name="ToFloat"): TypeError: If `x` cannot be cast to the `float32`. """ if x.dtype.is_complex: - logging.warn('Casting complex to real discards imaginary part.') + logging.warn("Casting complex to real discards imaginary part.") return cast(x, dtypes.float32, name=name) -- GitLab From 62a05fe71ba5157e7abeb291f4b8b6ac7abf97fb Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Tue, 27 Feb 2018 11:51:05 +0000 Subject: [PATCH 004/960] Ensure that the backend_deps is a non-frozen object --- tensorflow/compiler/xla/tests/build_defs.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl index 610302ac12..eac2eb286c 100644 --- a/tensorflow/compiler/xla/tests/build_defs.bzl +++ b/tensorflow/compiler/xla/tests/build_defs.bzl @@ -137,7 +137,8 @@ def xla_test(name, backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_gpu"] this_backend_tags += ["requires-gpu-sm35"] elif backend in plugins: - backend_deps = plugins[backend]["deps"] + backend_deps = [] + backend_deps += plugins[backend]["deps"] this_backend_copts += plugins[backend]["copts"] this_backend_tags += plugins[backend]["tags"] this_backend_args += plugins[backend]["args"] -- GitLab From 2e98952221bfe83fadc3054e66b2ff3c23c44a24 Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Tue, 27 Feb 2018 13:52:13 +0000 Subject: [PATCH 005/960] Allow the large R1 slice tests to be disabled --- tensorflow/compiler/xla/tests/slice_test.cc | 35 +++++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index fe36df160d..50cd56d2d4 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -211,6 +211,9 @@ class SliceR1Test : public ClientLibraryTestBase, } }; +// A version of SliceR1Test used to label and disable 'large' tests +class SliceR1LargeTest : public SliceR1Test {}; + string SliceR1TestDataToString(const ::testing::TestParamInfo& data) { const R1Spec& spec = data.param; return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0, @@ -230,6 +233,18 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } +XLA_TEST_P(SliceR1LargeTest, DoIt_F32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_F64) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_U32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_S32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_U64) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_S64) { Run(GetParam()); } + // Tests for R1 slice ops. // The format for each testcase is {input size, start, limit, stride}. // clang-format off @@ -237,12 +252,6 @@ INSTANTIATE_TEST_CASE_P( SliceR1TestInstantiation, SliceR1Test, ::testing::Values( -// TODO(b/69425338): This uses too much memory on GPU. -#ifndef XLA_TEST_BACKEND_GPU - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1}, -#endif R1Spec{10, 0, 0, 1}, R1Spec{10, 7, 7, 1}, R1Spec{10, 0, 5, 1}, @@ -278,6 +287,20 @@ INSTANTIATE_TEST_CASE_P( SliceR1TestDataToString ); +// TODO(b/69425338): This uses too much memory on GPU. +#ifndef XLA_TEST_BACKEND_GPU +INSTANTIATE_TEST_CASE_P( + SliceR1TestBigSlicesInstantiation, + SliceR1LargeTest, + ::testing::Values( + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1} + ), + SliceR1TestDataToString +); +#endif + INSTANTIATE_TEST_CASE_P( SliceStridedR1TestInstantiation, SliceR1Test, -- GitLab From 1e60c2ff7f5537bbaeb9a7cdadbf2d4e7aa441e2 Mon Sep 17 00:00:00 2001 From: Peter Lee Date: Sat, 3 Mar 2018 13:51:25 +0800 Subject: [PATCH 006/960] add support for other cpu tyoe(aarch64) for tensorRT --- configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 97f46757ee..711dee2d65 100644 --- a/configure.py +++ b/configure.py @@ -40,7 +40,7 @@ _DEFAULT_CUDA_PATH = '/usr/local/cuda' _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda' _DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing ' 'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION) -_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/x86_64-linux-gnu' +_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine() _TF_OPENCL_VERSION = '1.2' _DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp' _DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include' -- GitLab From 70b60d9cce9a7879fbff396f283f19bed3b39793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Thom=C3=A9?= Date: Mon, 5 Mar 2018 14:08:19 +0100 Subject: [PATCH 007/960] Move complex->float warning into tf.cast --- tensorflow/python/ops/math_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 56d58016b8..1608393c16 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -762,6 +762,8 @@ def cast(x, dtype, name=None): Raises: TypeError: If `x` cannot be cast to the `dtype`. """ + if x.dtype.is_complex and dtype.is_floating: + logging.warn("Casting complex to real discards imaginary part.") base_type = dtypes.as_dtype(dtype).base_dtype with ops.name_scope(name, "Cast", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): @@ -826,8 +828,6 @@ def to_float(x, name="ToFloat"): Raises: TypeError: If `x` cannot be cast to the `float32`. """ - if x.dtype.is_complex: - logging.warn("Casting complex to real discards imaginary part.") return cast(x, dtypes.float32, name=name) -- GitLab From cf897725ab8c3f09d973c5f242b05ca7eb258801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Thom=C3=A9?= Date: Mon, 5 Mar 2018 18:07:22 +0100 Subject: [PATCH 008/960] Check dtype after convert_to_tensor --- tensorflow/python/ops/math_ops.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 1608393c16..e315a09ea9 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -762,22 +762,22 @@ def cast(x, dtype, name=None): Raises: TypeError: If `x` cannot be cast to the `dtype`. """ - if x.dtype.is_complex and dtype.is_floating: - logging.warn("Casting complex to real discards imaginary part.") base_type = dtypes.as_dtype(dtype).base_dtype with ops.name_scope(name, "Cast", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): values_cast = cast(x.values, base_type, name=name) - return sparse_tensor.SparseTensor(x.indices, values_cast, x.dense_shape) + x = sparse_tensor.SparseTensor(x.indices, values_cast, x.dense_shape) else: # TODO(josh11b): If x is not already a Tensor, we could return # ops.convert_to_tensor(x, dtype=dtype, ...) here, but that # allows some conversions that cast() can't do, e.g. casting numbers to # strings. x = ops.convert_to_tensor(x, name="x") - if x.dtype.base_dtype == base_type: - return x - return gen_math_ops.cast(x, base_type, name=name) + if x.dtype.base_dtype != base_type: + x = gen_math_ops.cast(x, base_type, name=name) + if x.dtype.is_complex and dtype.is_floating: + logging.warn("Casting complex to real discards imaginary part.") + return x @tf_export("saturate_cast") -- GitLab From f7a04228e0368f3c9bad22a66fe7267e41ecb128 Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Thu, 8 Mar 2018 07:05:53 +0000 Subject: [PATCH 009/960] Register half in some ops which support all floating point types --- tensorflow/core/ops/nn_ops.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 910fbaca9e..6d4a3fda51 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -472,7 +472,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -490,7 +490,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -589,7 +589,7 @@ REGISTER_OP("AvgPool3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::Pool3DShape); REGISTER_OP("AvgPool3DGrad") @@ -600,7 +600,7 @@ REGISTER_OP("AvgPool3DGrad") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -618,7 +618,7 @@ REGISTER_OP("MaxPool3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float}") + .Attr("T: {half, bfloat16, float}") .SetShapeFn(shape_inference::Pool3DShape); REGISTER_OP("MaxPool3DGrad") @@ -630,8 +630,8 @@ REGISTER_OP("MaxPool3DGrad") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float} = DT_FLOAT") - .Attr("TInput: {bfloat16, float} = DT_FLOAT") + .Attr("T: {half, bfloat16, float} = DT_FLOAT") + .Attr("TInput: {half, bfloat16, float} = DT_FLOAT") .SetShapeFn([](InferenceContext* c) { return UnchangedShapeWithRank(c, 5); }); -- GitLab From 6834f2ffcfd67b0fb198a3202341137a98fb9983 Mon Sep 17 00:00:00 2001 From: Luke Iwanski Date: Tue, 14 Nov 2017 15:12:42 +0000 Subject: [PATCH 010/960] Fixes automerge --- third_party/sycl/sycl/BUILD.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl index 21b1a2bbf7..b7e9aa8edb 100755 --- a/third_party/sycl/sycl/BUILD.tpl +++ b/third_party/sycl/sycl/BUILD.tpl @@ -21,7 +21,7 @@ config_setting( name = "using_sycl_trisycl", define_values = { "using_sycl": "true", - "using_trisycl": "false", + "using_trisycl": "true", }, ) -- GitLab From 3bed12b81fe5ffc04e14ccaaf1b25ace4222f505 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 8 Mar 2018 13:56:11 -0800 Subject: [PATCH 011/960] Update version string to 1.7.0rc0 everywhere. --- tensorflow/core/public/version.h | 4 ++-- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 9 ++++++-- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 11 files changed, 42 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 22f2c02b78..15082bb337 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 6 +#define TF_MINOR_VERSION 7 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 0481c97885..733c7a6625 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 8f89898c92..421215f367 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 0ee9c849e1..7758520c50 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0 + 1.7.0-rc0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0 + 1.7.0-rc0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0 + 1.7.0-rc0 org.tensorflow libtensorflow_jni_gpu - 1.6.0 + 1.7.0-rc0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0-rc0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0.jar HelloTF.java
+
javac -cp libtensorflow-1.7.0-rc0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.7.0-rc0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.7.0-rc0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 3e8744bf9d..f4d4e65548 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -189,7 +189,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -294,7 +294,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -647,14 +647,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -666,14 +666,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -685,14 +685,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -704,14 +704,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 94defcd18c..055a463718 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py2-none-any.whl @@ -523,7 +523,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py2-none-any.whl
 
@@ -531,5 +531,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index c09c9c2c0c..10840295f9 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0 on Linux: +for TensorFlow 1.7.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc0-py2-none-any.whl
 
## Validate your installation @@ -459,6 +459,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** + + @@ -478,6 +480,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
+ @@ -492,6 +495,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
+ + diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 22c73c3fe1..11f476d12c 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -70,7 +70,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 3690e7dfe5..037d13116e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.6 +ARG TF_BRANCH=r1.7 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 69ba340f92..1fcb6428b2 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -79,7 +79,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 815ea8157d..69825a0d7c 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0' +_VERSION = '1.7.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 2a849d5c1fda91c7cbb16786354d5143519da650 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 17:37:33 -0800 Subject: [PATCH 012/960] Disable checkpointable_utils_test failed http://ci.tensorflow.org/view/Release/job/release-debian-cpu/99/consoleFull --- tensorflow/contrib/eager/python/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 5a6251b871..fad833dd2d 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -266,7 +266,10 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = ["no_windows"], # TODO: needs investigation on Windows + tags = [ + "no_oss", # b/74395663 + "no_windows", # TODO: needs investigation on Windows + ], ) filegroup( -- GitLab From b006115403f4a6592dee630132b0cf9c6519a922 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 19:54:14 -0800 Subject: [PATCH 013/960] Make spinn_test less flaky (#17580) --- tensorflow/contrib/eager/python/examples/spinn/spinn_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 081b0af14f..3f9a7818a5 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -417,7 +417,6 @@ class SpinnTest(test_util.TensorFlowTestCase): if event.summary.value and event.summary.value[0].tag == "train/loss"] self.assertEqual(config.epochs, len(train_losses)) - self.assertLess(train_losses[-1], train_losses[0]) # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) -- GitLab From 32584800fe9032396713baf413914ddd391152dc Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 9 Mar 2018 00:14:49 -0800 Subject: [PATCH 014/960] Hide os from docs generator (#17577) Hide os so we don't generate api_docs for it --- tensorflow/contrib/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index bcf0d7b48b..669d611b01 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -95,6 +95,7 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") +del os del LazyLoader del absolute_import -- GitLab From 4638dd1923055b9aa80ec643c1ccc3a78e41069a Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Fri, 9 Mar 2018 10:19:13 -0800 Subject: [PATCH 015/960] Fix pylint error in single_return.py --- tensorflow/contrib/py2tf/converters/single_return.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 90bc22008f..1194b98f5e 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(gast.NodeVisitor, self).__init__() + super(DetectReturnInUnsupportedControlFlow, self).__init__() def visit_While(self, node): self.cant_return = True -- GitLab From b8f4e763171dcab40defcee1a981c3d2d32aaeca Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 9 Mar 2018 10:54:37 -0800 Subject: [PATCH 016/960] Adding the new variables to path rather than overriding them. --- tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index e1b56b9a25..7d471b4703 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -31,5 +31,5 @@ export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" -export PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +export PATH="$PATH:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" build_libtensorflow_tarball "-cpu-darwin-$(uname -m)" -- GitLab From 41b5fd15e72756dd6ee3a3395db306f107f1e628 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Fri, 9 Mar 2018 11:09:13 -0800 Subject: [PATCH 017/960] Disable tensorflow/contrib/learn:monitors_test for pip gpu --- tensorflow/contrib/learn/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 895f70eecf..cc69678a2d 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -227,6 +227,7 @@ py_test( size = "small", srcs = ["python/learn/monitors_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip_gpu"], # b/74437598 deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", -- GitLab From 8ed55f4d54fbc85e2cd605aa6540b2fb5909500d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl=20Thom=C3=A9?= Date: Mon, 12 Mar 2018 10:43:32 +0100 Subject: [PATCH 018/960] Change to tf.DType --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e315a09ea9..c095be2aaf 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -775,7 +775,7 @@ def cast(x, dtype, name=None): x = ops.convert_to_tensor(x, name="x") if x.dtype.base_dtype != base_type: x = gen_math_ops.cast(x, base_type, name=name) - if x.dtype.is_complex and dtype.is_floating: + if x.dtype.is_complex and base_type.is_floating: logging.warn("Casting complex to real discards imaginary part.") return x -- GitLab From 3abebb0618cb6f830f5afaf2cd0b8c938e584aad Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Mon, 12 Mar 2018 11:36:17 -0700 Subject: [PATCH 019/960] Update RELEASE.md for r1.7 (#17583) --- RELEASE.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 6f54dee58f..c63d9f20c9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,63 @@ +# Release 1.7.0 + +## Major Features And Improvements +* Eager mode is moving out of contrib, try `tf.enable_eager_execution()`. +* Graph rewrites emulating fixed-point quantization compatible with TensorFlow Lite, supported by new `tf.contrib.quantize` package. +* Easily customize gradient computation with `tf.custom_gradient`. +* [TensorBoard Debugger Plugin](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md), the graphical user interface (GUI) of TensorFlow Debugger (tfdbg), is now in alpha. +* Experimental support for reading a sqlite database as a `Dataset` with new `tf.contrib.data.SqlDataset`. +* Distributed Mutex / CriticalSection added to `tf.contrib.framework.CriticalSection`. +* Better text processing with `tf.regex_replace`. +* Easy, efficient sequence input with `tf.contrib.data.bucket_by_sequence_length` + +## Bug Fixes and Other Changes +* Accelerated Linear Algebra (XLA): + * Add `MaxPoolGradGrad` support for XLA + * CSE pass from Tensorflow is now disabled in XLA. +* `tf.data`: + * `tf.data.Dataset` + * Add support for building C++ Dataset op kernels as external libraries, using the `tf.load_op_library()` mechanism. + * `Dataset.list_files()` now shuffles its output by default. + * `Dataset.shuffle(..., seed=tf.constant(0, dtype=tf.int64))` now yields the same sequence of elements as `Dataset.shuffle(..., seed=0)`. + * Add `num_parallel_reads` argument to `tf.data.TFRecordDataset`. +* `tf.contrib`: + * `tf.contrib.bayesflow.halton_sequence` now supports randomization. + * Add support for scalars in `tf.contrib.all_reduce`. + * Add `effective_sample_size` to `tf.contrib.bayesflow.mcmc_diagnostics`. + * Add `potential_scale_reduction` to `tf.contrib.bayesflow.mcmc_diagnostics`. + * Add `BatchNormalization`, `Kumaraswamy` bijectors. + * Deprecate `tf.contrib.learn`. Please check contrib/learn/README.md for instructions on how to convert existing code. + * `tf.contrib.data` + * Remove deprecated `tf.contrib.data.Dataset`, `tf.contrib.data.Iterator`, `tf.contrib.data.FixedLengthRecordDataset`, `tf.contrib.data.TextLineDataset`, and `tf.contrib.data.TFRecordDataset` classes. + * Added `bucket_by_sequence_length`, `sliding_window_batch`, and `make_batched_features_dataset` + * Remove unmaintained `tf.contrib.ndlstm`. You can find it externally at https://github.com/tmbarchive/tfndlstm. + * Moved most of `tf.contrib.bayesflow` to its own repo: `tfp` +* Other: + * tf.py_func now reports the full stack trace if an exception occurs. + * Integrate `TPUClusterResolver` with GKE's integration for Cloud TPUs. + * Add a library for statistical testing of samplers. + * Add Helpers to stream data from the GCE VM to a Cloud TPU. + * Integrate ClusterResolvers with TPUEstimator. + * Unify metropolis_hastings interface with HMC kernel. + * Move LIBXSMM convolutions to a separate --define flag so that they are disabled by default. + * Fix `MomentumOptimizer` lambda. + * Reduce `tfp.layers` boilerplate via programmable docstrings. + * Add `auc_with_confidence_intervals`, a method for computing the AUC and confidence interval with linearithmic time complexity. + * `regression_head` now accepts customized link function, to satisfy the usage that user can define their own link function if the `array_ops.identity` does not meet the requirement. + * Fix `initialized_value` and `initial_value` behaviors for `ResourceVariables` created from `VariableDef` protos. + * Add TensorSpec to represent the specification of Tensors. + * Constant folding pass is now deterministic. + * Support `float16` `dtype` in `tf.linalg.*`. + * Add `tf.estimator.export.TensorServingInputReceiver` that allows `tf.estimator.Estimator.export_savedmodel` to pass raw tensors to model functions. + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +4d55397500, Abe, Alistair Low, Andy Kernahan, Appledore, Ben, Ben Barsdell, Boris Pfahringer, Brad Wannow, Brett Koonce, Carl Thomé, cclauss, Chengzhi Chen, Chris Drake, Christopher Yeh, Clayne Robison, Codrut Grosu, Daniel Trebbien, Danny Goodman, David Goodwin, David Norman, Deron Eriksson, Donggeon Lim, Donny Viszneki, DosLin, DylanDmitri, Francisco Guerrero, Fred Reiss, gdh1995, Giuseppe, Glenn Weidner, gracehoney, Guozhong Zhuang, Haichen "Hc" Li, Harald Husum, harumitsu.nobuta, Henry Spivey, hsm207, Jekyll Song, Jerome, Jiongyan Zhang, jjsjann123, John Sungjin Park, Johnson145, JoshVarty, Julian Wolff, Jun Wang, June-One, Kamil Sindi, Kb Sriram, Kdavis-Mozilla, Kenji, lazypanda1, Liang-Chi Hsieh, Loo Rong Jie, Mahesh Bhosale, MandarJKulkarni, ManHyuk, Marcus Ong, Marshal Hayes, Martin Pool, matthieudelaro, mdfaijul, mholzel, Michael Zhou, Ming Li, Minmin Sun, Myungjoo Ham, MyungsungKwak, Naman Kamra, Peng Yu, Penghao Cen, Phil, Raghuraman-K, resec, Rohin Mohanadas, Sandeep N Gupta, Scott Tseng, seaotterman, Seo Sanghyeon, Sergei Lebedev, Ted Chang, terrytangyuan, Tim H, tkunic, Tod, vihanjain, Yan Facai (颜发才), Yin Li, Yong Tang, Yukun Chen, Yusuke Yamada + + + # Release 1.6.0 ## Breaking Changes -- GitLab From d392b1c9ebf131b9ac64ff289d26e43afea21c10 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Fri, 9 Mar 2018 18:17:43 -0800 Subject: [PATCH 020/960] Fix the windows build --- tensorflow/core/kernels/snapshot_op.cc | 30 +++++++++++++++++++ tensorflow/core/kernels/snapshot_op.h | 26 +++++----------- tensorflow/core/kernels/snapshot_op_gpu.cu.cc | 10 +++---- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc index 50157d5d48..fe04dcf72e 100644 --- a/tensorflow/core/kernels/snapshot_op.cc +++ b/tensorflow/core/kernels/snapshot_op.cc @@ -22,6 +22,26 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +template +class SnapshotOp : public OpKernel { + public: + explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + Tensor* output = nullptr; + // Try to use buffer forwarding to avoid an explicit copy. + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &output)); + if (!output->SharesBufferWith(input)) { + functor::Snapshot functor; + functor(context->eigen_device(), input.flat(), + output->flat()); + } + } +}; #define REGISTER_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -31,6 +51,16 @@ typedef Eigen::ThreadPoolDevice CPUDevice; TF_CALL_POD_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL +#if GOOGLE_CUDA +#define REGISTER_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Snapshot").Device(DEVICE_GPU).TypeConstraint("T"), \ + SnapshotOp); + +TF_CALL_POD_TYPES(REGISTER_KERNEL); +#undef REGISTER_KERNEL +#endif + #if TENSORFLOW_USE_SYCL typedef Eigen::SyclDevice SyclDevice; #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h index b94834f159..a18065d42b 100644 --- a/tensorflow/core/kernels/snapshot_op.h +++ b/tensorflow/core/kernels/snapshot_op.h @@ -26,29 +26,19 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { +namespace functor { +// Functor used by SnapshotOp. template -class SnapshotOp : public OpKernel { - public: - explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); - Tensor* output = nullptr; - // Try to use buffer forwarding to avoid an explicit copy. - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 0, input.shape(), &output)); - if (!output->SharesBufferWith(input)) { - // We had to allocate a new buffer since the refcount on the input was - // greater than 1. Copy the input to the new buffer. - const Device& device = context->eigen_device(); - device.memcpy(output->template flat().data(), - input.template flat().data(), - input.NumElements() * sizeof(Scalar)); - } +struct Snapshot { + void operator()(const Device& device, + typename TTypes::ConstTensor input, + typename TTypes::Tensor output) { + device.memcpy(output.data(), input.data(), input.size() * sizeof(Scalar)); } }; +} // namespace functor } // namespace tensorflow #endif // TENSORFLOW_KERNELS_SNAPSHOT_OP_H_ diff --git a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc index 52070be838..f1c0ed2eae 100644 --- a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc +++ b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc @@ -24,13 +24,11 @@ limitations under the License. namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; -#define REGISTER_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Snapshot").Device(DEVICE_GPU).TypeConstraint("T"), \ - SnapshotOp); +// Definition of the GPU implementations declared in softsign_op.cc. +#define DEFINE_GPU_KERNELS(T) \ + template struct functor::Snapshot; -TF_CALL_POD_TYPES(REGISTER_KERNEL); -#undef REGISTER_KERNEL +TF_CALL_POD_TYPES(DEFINE_GPU_KERNELS); } // namespace tensorflow -- GitLab From 73f2da07577330648cd294d321545d089b600748 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Sat, 10 Mar 2018 22:24:27 -0800 Subject: [PATCH 021/960] Disable keras:convolutional_test. --- tensorflow/contrib/cmake/tf_tests.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 1c4ebd7f0c..e2ed5f6c73 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -208,6 +208,9 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py" # Test is flaky on Windows GPU builds (b/38283730). "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/gmm_test.py" + # Disable following manual tag in BUILD. + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py" + ) if (WIN32) set(tf_test_src_py_exclude -- GitLab From df4cbfa33d711c1fad107bfaea0862bfdc8c3fd8 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Mon, 12 Mar 2018 16:49:40 -0700 Subject: [PATCH 022/960] Make tensorflow/python:framework_importer_test large tensorflow/python:framework_importer_test sometime times out during release builds --- tensorflow/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 04e926ff16..6dd53ffdf6 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1048,7 +1048,7 @@ py_test( py_test( name = "framework_importer_test", - size = "medium", + size = "large", srcs = ["framework/importer_test.py"], main = "framework/importer_test.py", srcs_version = "PY2AND3", -- GitLab From 4b0bb6d5d01c0b2aa89df11b45ff8d728d4c2059 Mon Sep 17 00:00:00 2001 From: Siby Jose Plathottam Date: Mon, 12 Mar 2018 22:48:49 -0500 Subject: [PATCH 023/960] Fix to 'Model' object has no attribute '_container_nodes' error when using tf.keras.utils.plot_model(). Fix to #17633. Duplicate of #17658 'Model' object has no attribute '_container_nodes' error when using tf.keras.utils.plot_model(). Replaced if node_key in model._container_nodes: with if node_key in model._network_nodes: # pylint: disable=protected-access in tensorflow\python\keras_impl\keras\utils\vis_utils.py. --- tensorflow/python/keras/_impl/keras/utils/vis_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py index 45c1b92075..4761cece82 100644 --- a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py @@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): layer_id = str(id(layer)) for i, node in enumerate(layer._inbound_nodes): node_key = layer.name + '_ib-' + str(i) - if node_key in model._container_nodes: + if node_key in model._network_nodes: # pylint: disable=protected-access for inbound_layer in node.inbound_layers: inbound_layer_id = str(id(inbound_layer)) layer_id = str(id(layer)) -- GitLab From fa5c66ba74a505e4a4b8472332918798bb17bb39 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Sun, 11 Mar 2018 15:38:16 -0700 Subject: [PATCH 024/960] Fixes a race condition in function instantiation. Previously, if the same function was being concurrently instantiated and released: 1. Thread one could begin to instantiate the function, determine that it already existed in the runtime, then be preempted. 2. Thread two could release the handle on the function, causing it to be freed and removed from the `FunctionLibraryRuntime::items_` map. 3. Thread one could then incorrectly assume that the function still existed, and fail to find it in the `FunctionLibraryRuntime::items_` map, causing a segfault when it attempted to increment the refcount on an uninitialized object. PiperOrigin-RevId: 188661500 --- tensorflow/core/common_runtime/function.cc | 24 +++++++++++++++---- .../kernel_tests/filter_dataset_op_test.py | 8 +++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 3e937ceb64..7174a876f6 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -479,11 +479,26 @@ Status FunctionLibraryRuntimeImpl::Instantiate( InstantiateOptions options_copy(options); options_copy.target = device_name_; const string key = Canonicalize(function_name, attrs, options_copy); - *handle = parent_->GetHandle(key); - if (*handle != kInvalidHandle) { + + { mutex_lock l(mu_); - items_[parent_->GetHandleOnDevice(device_name_, *handle)]->Ref(); - return Status::OK(); + *handle = parent_->GetHandle(key); + if (*handle != kInvalidHandle) { + FunctionLibraryRuntime::LocalHandle handle_on_device = + parent_->GetHandleOnDevice(device_name_, *handle); + if (handle_on_device == kInvalidLocalHandle) { + return errors::Internal("LocalHandle not found for handle ", *handle, + "."); + } + auto item_handle = items_.find(handle_on_device); + if (item_handle == items_.end()) { + return errors::Internal("LocalHandle ", handle_on_device, + " for handle ", *handle, + " not found in items."); + } + item_handle->second->Ref(); + return Status::OK(); + } } Status s; @@ -536,6 +551,7 @@ Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) { } LocalHandle h = parent_->GetHandleOnDevice(device_name_, handle); + CHECK_NE(h, kInvalidLocalHandle); mutex_lock l(mu_); CHECK_EQ(1, items_.count(h)); Item* item = items_[h]; diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py index 2c71723167..4f2216f0a3 100644 --- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py @@ -176,6 +176,14 @@ class FilterDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testParallelFilters(self): + dataset = dataset_ops.Dataset.range(10).filter( + lambda x: math_ops.equal(x % 2, 0)) + iterators = [dataset.make_one_shot_iterator() for _ in range(10)] + next_elements = [iterator.get_next() for iterator in iterators] + with self.test_session() as sess: + self.assertEqual([0 for _ in range(10)], sess.run(next_elements)) + class FilterDatasetBenchmark(test.Benchmark): -- GitLab From 8198a84e1a584cd3d14acd0bd52e04cf2d66f341 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Tue, 13 Mar 2018 16:38:08 -0700 Subject: [PATCH 025/960] Update documentation --- tensorflow/contrib/tensorrt/README.md | 46 +++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 461e627e99..6eafc1754c 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -1,15 +1,15 @@ -Using TensorRT in TensorFlow -============================ +# Using TensorRT in TensorFlow + This module provides necessary bindings and introduces TRT_engine_op operator that wraps a subgraph in TensorRT. This is still a work in progress but should be useable with most common graphs. -Compilation ------------ +## Compilation + In order to compile the module, you need to have a local TensorRT -installation (libnvinfer.so and respective include files). During the +installation ( libnvinfer.so and respective include files ). During the configuration step, TensorRT should be enabled and installation path should be set. If installed through package managers (deb,rpm), configure script should find the necessary components from the system @@ -22,4 +22,38 @@ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py directory +will be available. An example use can be found in test/test_tftrt.py script + +## Installing TensorRT 3.0.4 + +In order to make use of TensorRT integration, you will need a local installation of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). Due to compiler compatibility, you will need to download and install the TensorRT 3.0.4 tarball for _Ubuntu 14.04_, i.e., **_TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz_**, even if you are using Ubuntu 16.04 or later. + +### Preparing TensorRT installation + +Once you have downloaded TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz, you will need to unpack it to an installation directory, which will be referred to as . Please replace with the full path of actual installation directory you choose in commands below. + +```shell +cd && tar -zxf /path/to/TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz +``` + +After unpacking the binaries, you have several options to use them: + +#### To run TensorFlow as a user without superuser privileges + +For a regular user without any sudo rights, you should add TensorRT to your `$LD_LIBRARY_PATH`: + + ```shell + export LD_LIBRARY_PATH=/TensorRT-3.0.4/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + ``` + +Then you are ready to use TensorFlow-TensorRT integration. `$LD_LIBRARY_PATH` must contain the path to TensorRT installation for TensorFlow-TensorRT integration to work. If you are using a VirtualEnv-like setup, you can add the command above to your `bin/activate` script or to your `.bashrc` script. + +#### To run TensorFlow as a superuser + + When running as a superuser, such as in a container or via sudo, the `$LD_LIBRARY_PATH` approach above may not work. The following is preferred when the user has superuser privileges: + + ```shell + echo "/TensorRT-3.0.4/lib" | sudo tee /etc/ld.so.conf.d/tensorrt304.conf && sudo ldconfig + ``` + + Please ensure that any existing deb package installation of TensorRT is removed before following these instructions to avoid package conflicts. \ No newline at end of file -- GitLab From 6db713c99b2f5f929f94a86997c0462a2c7a5486 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 14 Mar 2018 13:10:27 -0700 Subject: [PATCH 026/960] Fix tensorflow/python/eager:benchmarks_test. PiperOrigin-RevId: 189073615 --- tensorflow/python/eager/benchmarks_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 4255677a68..8c6e7e5758 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -55,7 +55,7 @@ def c_tfe_py_fastpath_execute(a, transpose_b=False, name=None): ctx = context.context() - assert ctx.in_eager_mode( + assert ctx.executing_eagerly( ), "The prototype doesn't contain C code for graph construction" try: return pywrap_tensorflow.TFE_Py_FastPathExecute( -- GitLab From 15d97a4bb76fd613b5c8f8eb3178396c66f0529e Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 14 Mar 2018 13:11:57 -0700 Subject: [PATCH 027/960] Removing underscore prefix from _scalar_summary and _histogram_summary. PiperOrigin-RevId: 189073787 --- tensorflow/python/framework/python_op_gen.cc | 3 +-- tensorflow/python/ops/logging_ops.py | 4 ++-- tensorflow/python/summary/summary.py | 6 ++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 00a63650b7..271597c459 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -100,8 +100,7 @@ bool IsOpWithUnderscorePrefix(const string& s) { "fused_batch_norm", "histogram_fixed_width", "stack", "batch_norm_with_global_normalization", // TODO(annarev): replace these ops in the next change. - "broadcast_gradient_args", "enter", "histogram_summary", "ref_enter", - "ref_identity", "scalar_summary"}); + "broadcast_gradient_args", "enter", "ref_enter", "ref_identity"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index a7ea7dc6e1..222b8ebc9d 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -109,7 +109,7 @@ def histogram_summary(tag, values, collections=None, name=None): buffer. """ with ops.name_scope(name, "HistogramSummary", [tag, values]) as scope: - val = gen_logging_ops._histogram_summary( + val = gen_logging_ops.histogram_summary( tag=tag, values=values, name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val @@ -346,7 +346,7 @@ def scalar_summary(tags, values, collections=None, name=None): buffer. """ with ops.name_scope(name, "ScalarSummary", [tags, values]) as scope: - val = gen_logging_ops._scalar_summary(tags=tags, values=values, name=scope) + val = gen_logging_ops.scalar_summary(tags=tags, values=values, name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index 2a3918b9b4..f1b2be0a1a 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -98,8 +98,7 @@ def scalar(name, tensor, collections=None, family=None): """ with _summary_op_util.summary_scope( name, family, values=[tensor]) as (tag, scope): - # pylint: disable=protected-access - val = _gen_logging_ops._scalar_summary(tags=tag, values=tensor, name=scope) + val = _gen_logging_ops.scalar_summary(tags=tag, values=tensor, name=scope) _summary_op_util.collect(val, collections, [_ops.GraphKeys.SUMMARIES]) return val @@ -191,8 +190,7 @@ def histogram(name, values, collections=None, family=None): with _summary_op_util.summary_scope( name, family, values=[values], default_name='HistogramSummary') as (tag, scope): - # pylint: disable=protected-access - val = _gen_logging_ops._histogram_summary( + val = _gen_logging_ops.histogram_summary( tag=tag, values=values, name=scope) _summary_op_util.collect(val, collections, [_ops.GraphKeys.SUMMARIES]) return val -- GitLab From 4ff81a7d891c2affadb9c9068cf37fdb109c0e77 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 13:46:57 -0700 Subject: [PATCH 028/960] Set shard count to 4 for tensorflow/contrib/linalg:linear_operator_block_diag_test to avoid timeouts This was getting flaky timeouts because it was taking over 300s about 25% of the time. This change increases the shard count to 4 to reduce the total elapsed time. With 4 shards, the maximum time per shard is about 180s. PiperOrigin-RevId: 189079453 --- tensorflow/contrib/linalg/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD index d4f2e70631..3bc1427bd2 100644 --- a/tensorflow/contrib/linalg/BUILD +++ b/tensorflow/contrib/linalg/BUILD @@ -58,6 +58,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + shard_count = 4, ) filegroup( -- GitLab From 16632df29d09b3bbe5cc75df39f5ec720225e6d1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 13:47:22 -0700 Subject: [PATCH 029/960] Remove nsync header files from tensorflow/core/platform/default/mutex.h The implementation of tensorflow/core/platform/default/mutex.h uses the nsync library, so mutex.h has included nsync header files. This has been awkward, because each TensorFlow build (bazel, cmake, make, plus the instructions for compiling individual custom ops, on all the various platforms) has needed to handle the include paths correctly, reaching into a package that is downloaded separately from TensorFlow itself. This change avoids that awkwardness, instead taking on two different irritations: - mutex.h now defines two structs that are large enough and aligned enough to contain an nsync_mu and an nsync_cv. This is an abstraction violation, because TensorFlow's source should not need to know how big these data structures are. However, this is unlikely to cause problems because: 1) this is checked by a static assertion in mutex.cc, so we will notice immediately should a change be needed, and 2) this will likely never fail because we have no intent of allowing nsync's data strcutures to get bigger. - The methods of mutex and condition_variable can no longer be inlined, because that too would require mutex.h to include the nsync header files. (Or we'd need to declare the nsync functions directly in mutex.h, which would be another abstraction violation.) However, this is a small imposition because the overhead of a procedure call is typically small. The assumption behind this CL is that these irritations are less important than the ongoing frustration of maintaining the complex include path in multiple build systems. PiperOrigin-RevId: 189079523 --- .../compiler/xla/service/interpreter/BUILD | 5 +- .../contrib/android/cmake/CMakeLists.txt | 1 - tensorflow/contrib/cmake/tf_shared_lib.cmake | 7 +- .../contrib/makefile/proto_text_cc_files.txt | 1 + tensorflow/core/BUILD | 1 - tensorflow/core/platform/default/mutex.cc | 89 +++++++++++++++++++ tensorflow/core/platform/default/mutex.h | 54 +++++------ tensorflow/python/platform/sysconfig.py | 1 - tensorflow/tensorflow.bzl | 17 ---- tensorflow/tools/pip_package/setup.py | 3 +- 10 files changed, 121 insertions(+), 58 deletions(-) create mode 100644 tensorflow/core/platform/default/mutex.cc diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD index 0819ab3b90..0db3863f24 100644 --- a/tensorflow/compiler/xla/service/interpreter/BUILD +++ b/tensorflow/compiler/xla/service/interpreter/BUILD @@ -63,10 +63,7 @@ cc_library( name = "platform_id", srcs = ["platform_id.cc"], hdrs = ["platform_id.h"], - deps = [ - "@nsync//:nsync_headers", - "//tensorflow/core:stream_executor_headers_lib", - ] + if_static( + deps = ["//tensorflow/core:stream_executor_headers_lib"] + if_static( ["@protobuf_archive//:protobuf"], ["@protobuf_archive//:protobuf_headers"], ), diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index a115d1610e..ecf1a103d2 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -75,7 +75,6 @@ target_link_libraries(tensorflow_inference include_directories( ${PREBUILT_DIR}/proto ${PREBUILT_DIR}/protobuf/include - ${PREBUILT_DIR}/nsync/public ${TENSORFLOW_ROOT_DIR}/tensorflow/contrib/makefile/downloads/eigen ${TENSORFLOW_ROOT_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/..) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 6d36d5fc5c..9738bbeb9a 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -100,8 +100,7 @@ if(WIN32) endif(WIN32) target_include_directories(tensorflow PUBLIC - $ - $) + $) install(TARGETS tensorflow EXPORT tensorflow_export RUNTIME DESTINATION bin @@ -133,10 +132,6 @@ install(DIRECTORY ${tensorflow_source_dir}/tensorflow/stream_executor/ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src/google/ DESTINATION include/google FILES_MATCHING PATTERN "*.h") -# nsync headers -install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/ - DESTINATION include/external/nsync - FILES_MATCHING PATTERN "*.h") # Eigen directory install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/ DESTINATION include/Eigen) diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt index d56e388477..77c936d8c5 100644 --- a/tensorflow/contrib/makefile/proto_text_cc_files.txt +++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt @@ -17,6 +17,7 @@ tensorflow/core/platform/env_time.cc tensorflow/core/platform/setround.cc tensorflow/core/platform/denormal.cc tensorflow/core/platform/default/tracing.cc +tensorflow/core/platform/default/mutex.cc tensorflow/core/platform/default/logging.cc tensorflow/core/platform/cpu_info.cc tensorflow/core/lib/wav/wav_io.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9f5b8027f3..213315f40e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1958,7 +1958,6 @@ cc_header_only_library( deps = [ ":framework", ":reader_base", - "@nsync//:nsync_headers", ], ) diff --git a/tensorflow/core/platform/default/mutex.cc b/tensorflow/core/platform/default/mutex.cc new file mode 100644 index 0000000000..79830a4738 --- /dev/null +++ b/tensorflow/core/platform/default/mutex.cc @@ -0,0 +1,89 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/mutex.h" +#include +#include +#include "nsync_cv.h" +#include "nsync_mu.h" + +namespace tensorflow { + +// Check that the external_mu_space struct used to reserve space for the mutex +// in tensorflow::mutex is big enough. +static_assert(sizeof(nsync::nsync_mu) <= sizeof(mutex::external_mu_space), + "tensorflow::mutex::external_mu_space needs to be bigger"); + +// Cast a pointer to mutex::external_mu_space to a pointer to the mutex mutex +// representation. This is done so that the header files for nsync_mu do not +// need to be included in every file that uses tensorflow's mutex. +static inline nsync::nsync_mu *mu_cast(mutex::external_mu_space *mu) { + return reinterpret_cast(mu); +} + +mutex::mutex() { nsync::nsync_mu_init(mu_cast(&mu_)); } + +void mutex::lock() { nsync::nsync_mu_lock(mu_cast(&mu_)); } + +bool mutex::try_lock() { return nsync::nsync_mu_trylock(mu_cast(&mu_)) != 0; }; + +void mutex::unlock() { nsync::nsync_mu_unlock(mu_cast(&mu_)); } + +void mutex::lock_shared() { nsync::nsync_mu_rlock(mu_cast(&mu_)); } + +bool mutex::try_lock_shared() { + return nsync::nsync_mu_rtrylock(mu_cast(&mu_)) != 0; +}; + +void mutex::unlock_shared() { nsync::nsync_mu_runlock(mu_cast(&mu_)); } + +// Check that the external_cv_space struct used to reserve space for the +// condition variable in tensorflow::condition_variable is big enough. +static_assert( + sizeof(nsync::nsync_cv) <= sizeof(condition_variable::external_cv_space), + "tensorflow::condition_variable::external_cv_space needs to be bigger"); + +// Cast a pointer to mutex::external_cv_space to a pointer to the condition +// variable representation. This is done so that the header files for nsync_mu +// do not need to be included in every file that uses tensorflow's +// condition_variable. +static inline nsync::nsync_cv *cv_cast( + condition_variable::external_cv_space *cv) { + return reinterpret_cast(cv); +} + +condition_variable::condition_variable() { + nsync::nsync_cv_init(cv_cast(&cv_)); +} + +void condition_variable::wait(mutex_lock &lock) { + nsync::nsync_cv_wait(cv_cast(&cv_), mu_cast(&lock.mutex()->mu_)); +} + +std::cv_status condition_variable::wait_until_system_clock( + mutex_lock &lock, + const std::chrono::system_clock::time_point timeout_time) { + int r = nsync::nsync_cv_wait_with_deadline( + cv_cast(&cv_), mu_cast(&lock.mutex()->mu_), timeout_time, nullptr); + return r ? std::cv_status::timeout : std::cv_status::no_timeout; +} + +void condition_variable::notify_one() { nsync::nsync_cv_signal(cv_cast(&cv_)); } + +void condition_variable::notify_all() { + nsync::nsync_cv_broadcast(cv_cast(&cv_)); +} + +} // namespace tensorflow diff --git a/tensorflow/core/platform/default/mutex.h b/tensorflow/core/platform/default/mutex.h index 044c754e80..a12d92795e 100644 --- a/tensorflow/core/platform/default/mutex.h +++ b/tensorflow/core/platform/default/mutex.h @@ -22,9 +22,8 @@ limitations under the License. #include #include #include -#include "nsync_cv.h" -#include "nsync_mu.h" #include "tensorflow/core/platform/thread_annotations.h" + namespace tensorflow { #undef mutex_lock @@ -38,26 +37,26 @@ class condition_variable; // lock. class LOCKABLE mutex { public: - mutex() { nsync::nsync_mu_init(&mu_); } - // The default implementation of nsync_mutex is safe to use after the linker - // initializations + mutex(); + // The default implementation of the underlying mutex is safe to use after + // the linker initialization to zero. explicit mutex(LinkerInitialized x) {} - void lock() EXCLUSIVE_LOCK_FUNCTION() { nsync::nsync_mu_lock(&mu_); } - bool try_lock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { - return nsync::nsync_mu_trylock(&mu_) != 0; - }; - void unlock() UNLOCK_FUNCTION() { nsync::nsync_mu_unlock(&mu_); } + void lock() EXCLUSIVE_LOCK_FUNCTION(); + bool try_lock() EXCLUSIVE_TRYLOCK_FUNCTION(true); + void unlock() UNLOCK_FUNCTION(); + + void lock_shared() SHARED_LOCK_FUNCTION(); + bool try_lock_shared() SHARED_TRYLOCK_FUNCTION(true); + void unlock_shared() UNLOCK_FUNCTION(); - void lock_shared() SHARED_LOCK_FUNCTION() { nsync::nsync_mu_rlock(&mu_); } - bool try_lock_shared() SHARED_TRYLOCK_FUNCTION(true) { - return nsync::nsync_mu_rtrylock(&mu_) != 0; + struct external_mu_space { + void* space[2]; }; - void unlock_shared() UNLOCK_FUNCTION() { nsync::nsync_mu_runlock(&mu_); } private: friend class condition_variable; - nsync::nsync_mu mu_; + external_mu_space mu_; }; // Mimic a subset of the std::unique_lock functionality. @@ -139,26 +138,29 @@ class SCOPED_LOCKABLE tf_shared_lock { // Mimic std::condition_variable. class condition_variable { public: - condition_variable() { nsync::nsync_cv_init(&cv_); } + condition_variable(); - void wait(mutex_lock& lock) { - nsync::nsync_cv_wait(&cv_, &lock.mutex()->mu_); - } + void wait(mutex_lock& lock); template std::cv_status wait_for(mutex_lock& lock, std::chrono::duration dur) { - int r = nsync::nsync_cv_wait_with_deadline( - &cv_, &lock.mutex()->mu_, std::chrono::system_clock::now() + dur, - nullptr); - return r ? std::cv_status::timeout : std::cv_status::no_timeout; + return wait_until_system_clock(lock, + std::chrono::system_clock::now() + dur); } - void notify_one() { nsync::nsync_cv_signal(&cv_); } - void notify_all() { nsync::nsync_cv_broadcast(&cv_); } + void notify_one(); + void notify_all(); + + struct external_cv_space { + void* space[2]; + }; private: friend ConditionResult WaitForMilliseconds(mutex_lock* mu, condition_variable* cv, int64 ms); - nsync::nsync_cv cv_; + std::cv_status wait_until_system_clock( + mutex_lock& lock, + const std::chrono::system_clock::time_point timeout_time); + external_cv_space cv_; }; inline ConditionResult WaitForMilliseconds(mutex_lock* mu, diff --git a/tensorflow/python/platform/sysconfig.py b/tensorflow/python/platform/sysconfig.py index 5c50fa023d..fdd2b903fc 100644 --- a/tensorflow/python/platform/sysconfig.py +++ b/tensorflow/python/platform/sysconfig.py @@ -68,7 +68,6 @@ def get_compile_flags(): """ flags = [] flags.append('-I%s' % get_include()) - flags.append('-I%s/external/nsync/public' % get_include()) flags.append('-D_GLIBCXX_USE_CXX11_ABI=%d' % _CXX11_ABI_FLAG) return flags diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 818d67f7b5..ae305a28e0 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1158,22 +1158,6 @@ def transitive_hdrs(name, deps=[], **kwargs): # the libraries in deps. def cc_header_only_library(name, deps=[], includes=[], **kwargs): _transitive_hdrs(name=name + "_gather", deps=deps) - - # We could generalize the following, but rather than complicate things - # here, we'll do the minimal use case for now, and hope bazel comes up - # with a better solution before too long. We'd expect it to compute - # the right include path by itself, but it doesn't, possibly because - # _transitive_hdrs lost some information about the include path. - if "@nsync//:nsync_headers" in deps: - # Buiding tensorflow from @org_tensorflow finds this two up. - nsynch = "../../external/nsync/public" - # Building tensorflow from elsewhere finds it four up. - # Note that native.repository_name() is not yet available in TF's Kokoro. - if REPOSITORY_NAME != "@": - nsynch = "../../" + nsynch - includes = includes[:] - includes.append(nsynch) - native.cc_library(name=name, hdrs=[":" + name + "_gather"], includes=includes, @@ -1182,7 +1166,6 @@ def cc_header_only_library(name, deps=[], includes=[], **kwargs): def tf_custom_op_library_additional_deps(): return [ "@protobuf_archive//:protobuf_headers", - "@nsync//:nsync_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), ] diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 4b6f123daa..e1a5f091ba 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -200,8 +200,7 @@ headers = (list(find_files('*.h', 'tensorflow/core')) + list(find_files('*.h', 'tensorflow/stream_executor')) + list(find_files('*.h', 'google/protobuf_archive/src')) + list(find_files('*', 'third_party/eigen3')) + - list(find_files('*', 'external/eigen_archive')) + - list(find_files('*.h', 'external/nsync/public'))) + list(find_files('*', 'external/eigen_archive'))) setup( name=project_name, -- GitLab From 61eab3f8c4ed8bdf4324e99508a104307483da2a Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 14 Mar 2018 14:07:43 -0700 Subject: [PATCH 030/960] Removing underscore prefix from _enter and _ref_enter. PiperOrigin-RevId: 189082871 --- tensorflow/python/client/session_test.py | 6 ++-- tensorflow/python/framework/python_op_gen.cc | 2 +- .../kernel_tests/control_flow_ops_py_test.py | 30 +++++++++---------- .../kernel_tests/control_flow_util_test.py | 10 +++---- tensorflow/python/ops/control_flow_ops.py | 10 +++---- 5 files changed, 28 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index ccd7a5117a..781725d63b 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -190,12 +190,10 @@ class SessionTest(test_util.TensorFlowTestCase): a = constant_op.constant(0.0, shape=[2, 3]) # NOTE(mrry): The original_op is nonsense, but used here to test that the # errors are reported correctly. - # pylint: disable=protected-access with sess.graph._original_op(a.op): b = array_ops.identity(a, name='id') with sess.graph._original_op(b.op): c = array_ops.placeholder(dtypes.float32) - # pylint: enable=protected-access def exc_predicate(e): return (e.op == c.op and e.op._original_op == b.op and @@ -1785,8 +1783,8 @@ class SessionTest(test_util.TensorFlowTestCase): # Ensure that errors from building the graph get propagated. data = array_ops.placeholder(dtypes.float32, shape=[]) # pylint: disable=protected-access - enter_1 = gen_control_flow_ops._enter(data, 'foo_1', False) - enter_2 = gen_control_flow_ops._enter(data, 'foo_2', False) + enter_1 = gen_control_flow_ops.enter(data, 'foo_1', False) + enter_2 = gen_control_flow_ops.enter(data, 'foo_2', False) # pylint: enable=protected-access res = math_ops.add(enter_1, enter_2) with self.assertRaisesOpError('has inputs from different frames'): diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 271597c459..6ee8e554de 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -100,7 +100,7 @@ bool IsOpWithUnderscorePrefix(const string& s) { "fused_batch_norm", "histogram_fixed_width", "stack", "batch_norm_with_global_normalization", // TODO(annarev): replace these ops in the next change. - "broadcast_gradient_args", "enter", "ref_enter", "ref_identity"}); + "broadcast_gradient_args", "ref_identity"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index b429fa5c42..d47b030fa1 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -144,7 +144,7 @@ class ControlFlowTest(test.TestCase): enter_v = control_flow_ops._Enter(v, "foo_1", is_constant=True) nine = constant_op.constant(9) - enter_nine = gen_control_flow_ops._enter(nine, "foo_1") + enter_nine = gen_control_flow_ops.enter(nine, "foo_1") op = state_ops.assign(enter_v, enter_nine) v2 = control_flow_ops.with_dependencies([op], enter_v) v3 = control_flow_ops.exit(v2) @@ -164,9 +164,9 @@ class ControlFlowTest(test.TestCase): def testEnterMulExit(self): with self.test_session(): data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") - enter_data = gen_control_flow_ops._enter(data, "foo_1", False) + enter_data = gen_control_flow_ops.enter(data, "foo_1", False) five = constant_op.constant(5) - enter_five = gen_control_flow_ops._enter(five, "foo_1", False) + enter_five = gen_control_flow_ops.enter(five, "foo_1", False) mul_op = math_ops.multiply(enter_data, enter_five) exit_op = control_flow_ops.exit(mul_op) @@ -178,12 +178,12 @@ class ControlFlowTest(test.TestCase): v = variables.Variable([0.0, 0.0], dtype=dtypes.float32) # If is_constant=True, the shape information should be propagated. - enter_v_constant = gen_control_flow_ops._enter( + enter_v_constant = gen_control_flow_ops.enter( v, "frame1", is_constant=True) self.assertEqual(enter_v_constant.shape, [2]) # Otherwise, the shape should be unknown. - enter_v_non_constant = gen_control_flow_ops._enter( + enter_v_non_constant = gen_control_flow_ops.enter( v, "frame2", is_constant=False) self.assertEqual(enter_v_non_constant.shape, None) @@ -257,8 +257,8 @@ class ControlFlowTest(test.TestCase): false = ops.convert_to_tensor(False) n = constant_op.constant(10) - enter_false = gen_control_flow_ops._enter(false, "foo_1", False) - enter_n = gen_control_flow_ops._enter(n, "foo_1", False) + enter_false = gen_control_flow_ops.enter(false, "foo_1", False) + enter_n = gen_control_flow_ops.enter(n, "foo_1", False) merge_n = control_flow_ops.merge([enter_n, enter_n], name="merge_n")[0] switch_n = control_flow_ops.switch(merge_n, enter_false) @@ -275,9 +275,9 @@ class ControlFlowTest(test.TestCase): one = constant_op.constant(1) n = constant_op.constant(10) - enter_i = gen_control_flow_ops._enter(zero, "foo", False) - enter_one = gen_control_flow_ops._enter(one, "foo", True) - enter_n = gen_control_flow_ops._enter(n, "foo", True) + enter_i = gen_control_flow_ops.enter(zero, "foo", False) + enter_one = gen_control_flow_ops.enter(one, "foo", True) + enter_n = gen_control_flow_ops.enter(n, "foo", True) with ops.device(test.gpu_device_name()): merge_i = control_flow_ops.merge([enter_i, enter_i])[0] @@ -301,9 +301,9 @@ class ControlFlowTest(test.TestCase): one = constant_op.constant(1) n = constant_op.constant(10) - enter_i = gen_control_flow_ops._enter(zero, "foo", False) - enter_one = gen_control_flow_ops._enter(one, "foo", True) - enter_n = gen_control_flow_ops._enter(n, "foo", True) + enter_i = gen_control_flow_ops.enter(zero, "foo", False) + enter_one = gen_control_flow_ops.enter(one, "foo", True) + enter_n = gen_control_flow_ops.enter(n, "foo", True) merge_i = control_flow_ops.merge([enter_i, enter_i])[0] @@ -324,8 +324,8 @@ class ControlFlowTest(test.TestCase): def testDifferentFrame(self): with self.test_session(): data = array_ops.placeholder(dtypes.float32, shape=[]) - enter_1 = gen_control_flow_ops._enter(data, "foo_1", False) - enter_2 = gen_control_flow_ops._enter(data, "foo_2", False) + enter_1 = gen_control_flow_ops.enter(data, "foo_1", False) + enter_2 = gen_control_flow_ops.enter(data, "foo_2", False) res = math_ops.add(enter_1, enter_2) with self.assertRaisesOpError("has inputs from different frames"): res.eval(feed_dict={data: 1.0}) diff --git a/tensorflow/python/kernel_tests/control_flow_util_test.py b/tensorflow/python/kernel_tests/control_flow_util_test.py index 23185eaeec..39e96f74b0 100644 --- a/tensorflow/python/kernel_tests/control_flow_util_test.py +++ b/tensorflow/python/kernel_tests/control_flow_util_test.py @@ -41,17 +41,17 @@ class ControlFlowUtilTest(test.TestCase): self.assertFalse(control_flow_util.IsSwitch(test_ops.int_output().op)) def testIsLoopEnter(self): - enter = gen_control_flow_ops._enter(1, frame_name="name").op + enter = gen_control_flow_ops.enter(1, frame_name="name").op self.assertTrue(control_flow_util.IsLoopEnter(enter)) self.assertFalse(control_flow_util.IsLoopConstantEnter(enter)) - ref_enter = gen_control_flow_ops._ref_enter(test_ops.ref_output(), - frame_name="name").op + ref_enter = gen_control_flow_ops.ref_enter(test_ops.ref_output(), + frame_name="name").op self.assertTrue(control_flow_util.IsLoopEnter(ref_enter)) self.assertFalse(control_flow_util.IsLoopConstantEnter(ref_enter)) - const_enter = gen_control_flow_ops._enter(1, frame_name="name", - is_constant=True).op + const_enter = gen_control_flow_ops.enter(1, frame_name="name", + is_constant=True).op self.assertTrue(control_flow_util.IsLoopEnter(const_enter)) self.assertTrue(control_flow_util.IsLoopConstantEnter(const_enter)) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index ff4f452bed..24c30802b5 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -264,10 +264,10 @@ def _Enter(data, data = ops.internal_convert_to_tensor_or_indexed_slices(data, as_ref=True) if isinstance(data, ops.Tensor): if data.dtype._is_ref_dtype and use_ref: # pylint: disable=protected-access - result = gen_control_flow_ops._ref_enter( + result = gen_control_flow_ops.ref_enter( data, frame_name, is_constant, parallel_iterations, name=name) else: - result = gen_control_flow_ops._enter( + result = gen_control_flow_ops.enter( data, frame_name, is_constant, parallel_iterations, name=name) if use_input_shape: result.set_shape(data.get_shape()) @@ -282,7 +282,7 @@ def _Enter(data, parallel_iterations=parallel_iterations, use_input_shape=use_input_shape, name=name) - indices = gen_control_flow_ops._enter( + indices = gen_control_flow_ops.enter( data.indices, frame_name, is_constant, @@ -293,7 +293,7 @@ def _Enter(data, if isinstance(data, ops.IndexedSlices): dense_shape = data.dense_shape if dense_shape is not None: - dense_shape = gen_control_flow_ops._enter( + dense_shape = gen_control_flow_ops.enter( dense_shape, frame_name, is_constant, @@ -303,7 +303,7 @@ def _Enter(data, dense_shape.set_shape(data.dense_shape.get_shape()) return ops.IndexedSlices(values, indices, dense_shape) else: - dense_shape = gen_control_flow_ops._enter( + dense_shape = gen_control_flow_ops.enter( data.dense_shape, frame_name, is_constant, -- GitLab From ac8ce1fe760efff6585d790b784ec67255198879 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 14:34:15 -0700 Subject: [PATCH 031/960] Rename KMeans _parse_tensor_or_dict to _parse_features_if_necessary and add a unit test. PiperOrigin-RevId: 189087384 --- .../factorization/python/ops/kmeans.py | 4 ++-- .../factorization/python/ops/kmeans_test.py | 23 ++++++++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index 7319eaa7de..c092f85d35 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -105,7 +105,7 @@ class _InitializeClustersHook(session_run_hook.SessionRunHook): logging.info(e) -def _parse_tensor_or_dict(features): +def _parse_features_if_necessary(features): """Helper function to convert the input points into a usable format. Args: @@ -166,7 +166,7 @@ class _ModelFn(object): # input_points is a single Tensor. Therefore, the sharding functionality # in clustering_ops is unused, and some of the values below are lists of a # single item. - input_points = _parse_tensor_or_dict(features) + input_points = _parse_features_if_necessary(features) # Let N = the number of input_points. # all_distances: A list of one matrix of shape (N, num_clusters). Each value diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py index f9598bfc08..06a2c52c11 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py @@ -226,6 +226,28 @@ class KMeansTest(KMeansTestBase): self._infer_helper(kmeans, clusters, 10) self._infer_helper(kmeans, clusters, 1) + def test_parse_features(self): + """Tests the various behaviours of kmeans._parse_features_if_necessary.""" + + # No-op if a tensor is passed in. + features = constant_op.constant(self.points) + parsed_features = kmeans_lib._parse_features_if_necessary(features) + self.assertAllEqual(features, parsed_features) + + # A dict is transformed into a tensor. + feature_dict = { + 'x': [[point[0]] for point in self.points], + 'y': [[point[1]] for point in self.points] + } + parsed_feature_dict = kmeans_lib._parse_features_if_necessary(feature_dict) + # Perform a sanity check. + self.assertEqual(features.shape, parsed_feature_dict.shape) + self.assertEqual(features.dtype, parsed_feature_dict.dtype) + # Then check that running the tensor yields the original list of points. + with self.test_session() as sess: + parsed_points = sess.run(parsed_feature_dict) + self.assertAllEqual(self.points, parsed_points) + class KMeansTestMultiStageInit(KMeansTestBase): @@ -394,7 +416,6 @@ class KMeansCosineDistanceTest(KMeansTestBase): true_assignments = [0] * 2 + [1] * 2 + [2] * 8 true_score = len(points) - np.tensordot( normalize(points), true_centers[true_assignments]) - kmeans = kmeans_lib.KMeansClustering( 3, initial_clusters=self.initial_clusters, -- GitLab From 5ac329bd86e400d47155e0c890669f4ee688771d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 14:47:40 -0700 Subject: [PATCH 032/960] Adding non-linear image warping ops to tf.contrib.image New ops are: tf.contrib.image.sparse_image_warp, tf.contrib.image.dense_image_warp, and tf.contrib.image.interpolate_spline. PiperOrigin-RevId: 189089672 --- tensorflow/contrib/image/BUILD | 109 +++++++ tensorflow/contrib/image/__init__.py | 7 + .../kernel_tests/dense_image_warp_test.py | 264 ++++++++++++++++ .../kernel_tests/interpolate_spline_test.py | 261 ++++++++++++++++ .../kernel_tests/sparse_image_warp_test.py | 251 +++++++++++++++ .../test_data/Yellow_Smiley_Face.png | Bin 0 -> 14060 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-0.png | Bin 0 -> 18537 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-1.png | Bin 0 -> 19086 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-4.png | Bin 0 -> 18884 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-0.png | Bin 0 -> 18109 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-1.png | Bin 0 -> 19251 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-4.png | Bin 0 -> 19132 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-0.png | Bin 0 -> 17500 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-1.png | Bin 0 -> 18058 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-4.png | Bin 0 -> 19313 bytes .../image/python/ops/dense_image_warp.py | 196 ++++++++++++ .../image/python/ops/interpolate_spline.py | 285 ++++++++++++++++++ .../image/python/ops/sparse_image_warp.py | 192 ++++++++++++ 18 files changed, 1565 insertions(+) create mode 100644 tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py create mode 100644 tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py create mode 100644 tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-0.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-0.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-4.png create mode 100644 tensorflow/contrib/image/python/ops/dense_image_warp.py create mode 100644 tensorflow/contrib/image/python/ops/interpolate_spline.py create mode 100644 tensorflow/contrib/image/python/ops/sparse_image_warp.py diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 3ff02e085e..760ed70fbb 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -78,7 +78,10 @@ tf_custom_op_py_library( ], srcs_version = "PY2AND3", deps = [ + ":dense_image_warp_py", ":image_ops", + ":interpolate_spline_py", + ":sparse_image_warp_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:array_ops", "//tensorflow/python:common_shapes", @@ -194,6 +197,112 @@ cuda_py_test( ], ) +py_library( + name = "dense_image_warp_py", + srcs = [ + "python/ops/dense_image_warp.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + "//third_party/py/numpy", + ], +) + +py_library( + name = "interpolate_spline_py", + srcs = [ + "python/ops/interpolate_spline.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + ], +) + +py_library( + name = "sparse_image_warp_py", + srcs = [ + "python/ops/sparse_image_warp.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":dense_image_warp_py", + ":interpolate_spline_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + ], +) + +cuda_py_test( + name = "sparse_image_warp_test", + size = "medium", + srcs = ["python/kernel_tests/sparse_image_warp_test.py"], + additional_deps = [ + ":sparse_image_warp_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + "//tensorflow/python:image_ops", + "//tensorflow/python:variables", + "//tensorflow/core:protos_all_py", + ], + data = glob(["python/kernel_tests/test_data/*.png"]), +) + +cuda_py_test( + name = "dense_image_warp_test", + size = "medium", + srcs = ["python/kernel_tests/dense_image_warp_test.py"], + additional_deps = [ + ":dense_image_warp_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + "//tensorflow/python:image_ops", + "//tensorflow/python:variables", + "//tensorflow/core:protos_all_py", + ], +) + +cuda_py_test( + name = "interpolate_spline_test", + size = "medium", + srcs = ["python/kernel_tests/interpolate_spline_test.py"], + additional_deps = [ + ":interpolate_spline_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:image_ops", + "//tensorflow/python:variables", + "//tensorflow/core:protos_all_py", + "//third_party/py/scipy", + ], +) + tf_py_test( name = "segmentation_test", size = "medium", diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index cc8ed117ba..e982030bc8 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -30,6 +30,9 @@ projective transforms (including rotation) are supported. @@transform @@translate @@translations_to_projective_transforms +@@dense_image_warp +@@interpolate_spline +@@sparse_image_warp ## Image Segmentation `Ops` @@ -47,6 +50,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.image.python.ops.dense_image_warp import dense_image_warp + from tensorflow.contrib.image.python.ops.distort_image_ops import adjust_hsv_in_yiq from tensorflow.contrib.image.python.ops.distort_image_ops import random_hsv_in_yiq @@ -57,7 +62,9 @@ from tensorflow.contrib.image.python.ops.image_ops import rotate from tensorflow.contrib.image.python.ops.image_ops import transform from tensorflow.contrib.image.python.ops.image_ops import translate from tensorflow.contrib.image.python.ops.image_ops import translations_to_projective_transforms +from tensorflow.contrib.image.python.ops.interpolate_spline import interpolate_spline from tensorflow.contrib.image.python.ops.single_image_random_dot_stereograms import single_image_random_dot_stereograms +from tensorflow.contrib.image.python.ops.sparse_image_warp import sparse_image_warp from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py b/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py new file mode 100644 index 0000000000..24d99ccaa6 --- /dev/null +++ b/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py @@ -0,0 +1,264 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for dense_image_warp.""" + +import math +import numpy as np + +from tensorflow.contrib.image.python.ops import dense_image_warp + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes + +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + +from tensorflow.python.training import adam + + +class DenseImageWarpTest(test_util.TensorFlowTestCase): + + def setUp(self): + np.random.seed(0) + + def test_interpolate_small_grid_ij(self): + grid = constant_op.constant( + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], shape=[1, 3, 3, 1]) + query_points = constant_op.constant( + [[0., 0.], [1., 0.], [2., 0.5], [1.5, 1.5]], shape=[1, 4, 2]) + expected_results = np.reshape(np.array([0., 3., 6.5, 6.]), [1, 4, 1]) + + interp = dense_image_warp._interpolate_bilinear(grid, query_points) + + with self.test_session() as sess: + predicted = sess.run(interp) + self.assertAllClose(expected_results, predicted) + + def test_interpolate_small_grid_xy(self): + grid = constant_op.constant( + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], shape=[1, 3, 3, 1]) + query_points = constant_op.constant( + [[0., 0.], [0., 1.], [0.5, 2.0], [1.5, 1.5]], shape=[1, 4, 2]) + expected_results = np.reshape(np.array([0., 3., 6.5, 6.]), [1, 4, 1]) + + interp = dense_image_warp._interpolate_bilinear( + grid, query_points, indexing='xy') + + with self.test_session() as sess: + predicted = sess.run(interp) + self.assertAllClose(expected_results, predicted) + + def test_interpolate_small_grid_batched(self): + grid = constant_op.constant( + [[[0., 1.], [3., 4.]], [[5., 6.], [7., 8.]]], shape=[2, 2, 2, 1]) + query_points = constant_op.constant([[[0., 0.], [1., 0.], [0.5, 0.5]], + [[0.5, 0.], [1., 0.], [1., 1.]]]) + expected_results = np.reshape( + np.array([[0., 3., 2.], [6., 7., 8.]]), [2, 3, 1]) + + interp = dense_image_warp._interpolate_bilinear(grid, query_points) + + with self.test_session() as sess: + predicted = sess.run(interp) + self.assertAllClose(expected_results, predicted) + + def get_image_and_flow_placeholders(self, shape, image_type, flow_type): + batch_size, height, width, numchannels = shape + image_shape = [batch_size, height, width, numchannels] + flow_shape = [batch_size, height, width, 2] + + tf_type = { + 'float16': dtypes.half, + 'float32': dtypes.float32, + 'float64': dtypes.float64 + } + + image = array_ops.placeholder(dtype=tf_type[image_type], shape=image_shape) + + flows = array_ops.placeholder(dtype=tf_type[flow_type], shape=flow_shape) + return image, flows + + def get_random_image_and_flows(self, shape, image_type, flow_type): + batch_size, height, width, numchannels = shape + image_shape = [batch_size, height, width, numchannels] + image = np.random.normal(size=image_shape) + flow_shape = [batch_size, height, width, 2] + flows = np.random.normal(size=flow_shape) * 3 + return image.astype(image_type), flows.astype(flow_type) + + def assert_correct_interpolation_value(self, + image, + flows, + pred_interpolation, + batch_index, + y_index, + x_index, + low_precision=False): + """Assert that the tf interpolation matches hand-computed value.""" + + height = image.shape[1] + width = image.shape[2] + displacement = flows[batch_index, y_index, x_index, :] + float_y = y_index - displacement[0] + float_x = x_index - displacement[1] + floor_y = max(min(height - 2, math.floor(float_y)), 0) + floor_x = max(min(width - 2, math.floor(float_x)), 0) + ceil_y = floor_y + 1 + ceil_x = floor_x + 1 + + alpha_y = min(max(0.0, float_y - floor_y), 1.0) + alpha_x = min(max(0.0, float_x - floor_x), 1.0) + + floor_y = int(floor_y) + floor_x = int(floor_x) + ceil_y = int(ceil_y) + ceil_x = int(ceil_x) + + top_left = image[batch_index, floor_y, floor_x, :] + top_right = image[batch_index, floor_y, ceil_x, :] + bottom_left = image[batch_index, ceil_y, floor_x, :] + bottom_right = image[batch_index, ceil_y, ceil_x, :] + + interp_top = alpha_x * (top_right - top_left) + top_left + interp_bottom = alpha_x * (bottom_right - bottom_left) + bottom_left + interp = alpha_y * (interp_bottom - interp_top) + interp_top + atol = 1e-6 + rtol = 1e-6 + if low_precision: + atol = 1e-2 + rtol = 1e-3 + self.assertAllClose( + interp, + pred_interpolation[batch_index, y_index, x_index, :], + atol=atol, + rtol=rtol) + + def check_zero_flow_correctness(self, shape, image_type, flow_type): + """Assert using zero flows doesn't change the input image.""" + + image, flows = self.get_image_and_flow_placeholders(shape, image_type, + flow_type) + interp = dense_image_warp.dense_image_warp(image, flows) + + with self.test_session() as sess: + rand_image, rand_flows = self.get_random_image_and_flows( + shape, image_type, flow_type) + rand_flows *= 0 + + predicted_interpolation = sess.run( + interp, feed_dict={ + image: rand_image, + flows: rand_flows + }) + self.assertAllClose(rand_image, predicted_interpolation) + + def test_zero_flows(self): + """Apply check_zero_flow_correctness() for a few sizes and types.""" + + shapes_to_try = [[3, 4, 5, 6], [1, 2, 2, 1]] + for shape in shapes_to_try: + self.check_zero_flow_correctness( + shape, image_type='float32', flow_type='float32') + + def check_interpolation_correctness(self, + shape, + image_type, + flow_type, + num_probes=5): + """Interpolate, and then assert correctness for a few query locations.""" + + image, flows = self.get_image_and_flow_placeholders(shape, image_type, + flow_type) + interp = dense_image_warp.dense_image_warp(image, flows) + low_precision = image_type == 'float16' or flow_type == 'float16' + with self.test_session() as sess: + rand_image, rand_flows = self.get_random_image_and_flows( + shape, image_type, flow_type) + + pred_interpolation = sess.run( + interp, feed_dict={ + image: rand_image, + flows: rand_flows + }) + + for _ in range(num_probes): + batch_index = np.random.randint(0, shape[0]) + y_index = np.random.randint(0, shape[1]) + x_index = np.random.randint(0, shape[2]) + + self.assert_correct_interpolation_value( + rand_image, + rand_flows, + pred_interpolation, + batch_index, + y_index, + x_index, + low_precision=low_precision) + + def test_interpolation(self): + """Apply check_interpolation_correctness() for a few sizes and types.""" + + shapes_to_try = [[3, 4, 5, 6], [1, 5, 5, 3], [1, 2, 2, 1]] + for im_type in ['float32', 'float64', 'float16']: + for flow_type in ['float32', 'float64', 'float16']: + for shape in shapes_to_try: + self.check_interpolation_correctness(shape, im_type, flow_type) + + def test_gradients_exist(self): + """Check that backprop can run. + + The correctness of the gradients is assumed, since the forward propagation + is tested to be correct and we only use built-in tf ops. + However, we perform a simple test to make sure that backprop can actually + run. We treat the flows as a tf.Variable and optimize them to minimize + the difference between the interpolated image and the input image. + """ + + batch_size, height, width, numchannels = [4, 5, 6, 7] + image_shape = [batch_size, height, width, numchannels] + image = random_ops.random_normal(image_shape) + flow_shape = [batch_size, height, width, 2] + init_flows = np.float32(np.random.normal(size=flow_shape) * 0.25) + flows = variables.Variable(init_flows) + + interp = dense_image_warp.dense_image_warp(image, flows) + loss = math_ops.reduce_mean(math_ops.square(interp - image)) + + optimizer = adam.AdamOptimizer(1.0) + grad = gradients.gradients(loss, [flows]) + opt_func = optimizer.apply_gradients(zip(grad, [flows])) + init_op = variables.global_variables_initializer() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(10): + sess.run(opt_func) + + def test_size_exception(self): + """Make sure it throws an exception for images that are too small.""" + + shape = [1, 2, 1, 1] + msg = 'Should have raised an exception for invalid image size' + with self.assertRaises(ValueError, msg=msg): + self.check_interpolation_correctness(shape, 'float32', 'float32') + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py b/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py new file mode 100644 index 0000000000..1cba46e17e --- /dev/null +++ b/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py @@ -0,0 +1,261 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for interpolate_spline.""" + +import numpy as np +from scipy import interpolate as sc_interpolate + +from tensorflow.contrib.image.python.ops import interpolate_spline + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util + +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + +from tensorflow.python.training import momentum + + +class _InterpolationProblem(object): + """Abstract class for interpolation problem descriptions.""" + + def get_problem(self, optimizable=False, extrapolate=True, dtype='float32'): + """Make data for an interpolation problem where all x vectors are n-d. + + Args: + optimizable: If True, then make train_points a tf.Variable. + extrapolate: If False, then clamp the query_points values to be within + the max and min of train_points. + dtype: The data type to use. + + Returns: + query_points, query_values, train_points, train_values: training and + test tensors for interpolation problem + """ + + # The values generated here depend on a seed of 0. + np.random.seed(0) + + batch_size = 1 + num_training_points = 10 + num_query_points = 4 + + init_points = np.random.uniform( + size=[batch_size, num_training_points, self.DATA_DIM]) + + init_points = init_points.astype(dtype) + train_points = ( + variables.Variable(init_points) + if optimizable else constant_op.constant(init_points)) + train_values = self.tf_function(train_points) + + query_points_np = np.random.uniform( + size=[batch_size, num_query_points, self.DATA_DIM]) + query_points_np = query_points_np.astype(dtype) + if not extrapolate: + query_points_np = np.clip(query_points_np, np.min(init_points), + np.max(init_points)) + + query_points = constant_op.constant(query_points_np) + query_values = self.np_function(query_points_np) + + return query_points, query_values, train_points, train_values + + +class _QuadraticPlusSinProblem1D(_InterpolationProblem): + """1D interpolation problem used for regression testing.""" + DATA_DIM = 1 + HARDCODED_QUERY_VALUES = { + (1.0, 0.0): [6.2647187603, -7.84362604077, -5.63690142322, 1.42928896387], + (1.0, + 0.01): [6.77688289946, -8.02163669853, -5.79491157027, 1.4063285693], + (2.0, + 0.0): [8.67110264937, -8.41281390883, -5.80190044693, 1.50155606059], + (2.0, + 0.01): [6.70797816797, -7.49709587663, -5.28965776238, 1.52284731741], + (3.0, + 0.0): [9.37691802935, -8.50390141515, -5.80786417426, 1.63467762122], + (3.0, + 0.01): [4.47106304758, -5.71266128361, -3.92529303296, 1.86755293857], + (4.0, + 0.0): [9.58172461111, -8.51432104771, -5.80967675388, 1.63361164256], + (4.0, 0.01): [ + -3.87902711352, -0.0253462273846, 1.79857618022, -0.769339675725 + ] + } + + def np_function(self, x): + """Takes np array, evaluates the test function, and returns np array.""" + return np.sum( + np.power((x - 0.5), 3) - 0.25 * x + 10 * np.sin(x * 10), + axis=2, + keepdims=True) + + def tf_function(self, x): + """Takes tf tensor, evaluates the test function, and returns tf tensor.""" + return math_ops.reduce_mean( + math_ops.pow((x - 0.5), 3) - 0.25 * x + 10 * math_ops.sin(x * 10), + 2, + keepdims=True) + + +class _QuadraticPlusSinProblemND(_InterpolationProblem): + """3D interpolation problem used for regression testing.""" + + DATA_DIM = 3 + HARDCODED_QUERY_VALUES = { + (1.0, 0.0): [1.06609663962, 1.28894849357, 1.10882405595, 1.63966936885], + (1.0, 0.01): [1.03123780748, 1.2952930985, 1.10366822954, 1.65265118569], + (2.0, 0.0): [0.627787735064, 1.43802857251, 1.00194632358, 1.91667538215], + (2.0, 0.01): [0.730159985046, 1.41702471595, 1.0065827217, 1.85758519312], + (3.0, 0.0): [0.350460417862, 1.67223539464, 1.00475331246, 2.31580322491], + (3.0, + 0.01): [0.624557250556, 1.63138876667, 0.976588193162, 2.12511237866], + (4.0, + 0.0): [0.898129669986, 1.24434133638, -0.938056116931, 1.59910338833], + (4.0, + 0.01): [0.0930360338179, -3.38791305538, -1.00969032567, 0.745535080382], + } + + def np_function(self, x): + """Takes np array, evaluates the test function, and returns np array.""" + return np.sum( + np.square(x - 0.5) + 0.25 * x + 1 * np.sin(x * 15), + axis=2, + keepdims=True) + + def tf_function(self, x): + """Takes tf tensor, evaluates the test function, and returns tf tensor.""" + return math_ops.reduce_sum( + math_ops.square(x - 0.5) + 0.25 * x + 1 * math_ops.sin(x * 15), + 2, + keepdims=True) + + +class InterpolateSplineTest(test_util.TensorFlowTestCase): + + def test_1d_linear_interpolation(self): + """For 1d linear interpolation, we can compare directly to scipy.""" + + tp = _QuadraticPlusSinProblem1D() + (query_points, _, train_points, train_values) = tp.get_problem( + extrapolate=False, dtype='float64') + interpolation_order = 1 + + with ops.name_scope('interpolator'): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, interpolation_order) + with self.test_session() as sess: + fetches = [query_points, train_points, train_values, interpolator] + query_points_, train_points_, train_values_, interp_ = sess.run(fetches) + + # Just look at the first element of the minibatch. + # Also, trim the final singleton dimension. + interp_ = interp_[0, :, 0] + query_points_ = query_points_[0, :, 0] + train_points_ = train_points_[0, :, 0] + train_values_ = train_values_[0, :, 0] + + # Compute scipy interpolation. + scipy_interp_function = sc_interpolate.interp1d( + train_points_, train_values_, kind='linear') + + scipy_interpolation = scipy_interp_function(query_points_) + scipy_interpolation_on_train = scipy_interp_function(train_points_) + + # Even with float64 precision, the interpolants disagree with scipy a + # bit due to the fact that we add the EPSILON to prevent sqrt(0), etc. + tol = 1e-3 + + self.assertAllClose( + train_values_, scipy_interpolation_on_train, atol=tol, rtol=tol) + self.assertAllClose(interp_, scipy_interpolation, atol=tol, rtol=tol) + + def test_1d_interpolation(self): + """Regression test for interpolation with 1-D points.""" + + tp = _QuadraticPlusSinProblem1D() + (query_points, _, train_points, + train_values) = tp.get_problem(dtype='float64') + + for order in (1, 2, 3): + for reg_weight in (0, 0.01): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, order, reg_weight) + + target_interpolation = tp.HARDCODED_QUERY_VALUES[(order, reg_weight)] + target_interpolation = np.array(target_interpolation) + with self.test_session() as sess: + interp_val = sess.run(interpolator) + self.assertAllClose(interp_val[0, :, 0], target_interpolation) + + def test_nd_linear_interpolation(self): + """Regression test for interpolation with N-D points.""" + + tp = _QuadraticPlusSinProblemND() + (query_points, _, train_points, + train_values) = tp.get_problem(dtype='float64') + + for order in (1, 2, 3): + for reg_weight in (0, 0.01): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, order, reg_weight) + + target_interpolation = tp.HARDCODED_QUERY_VALUES[(order, reg_weight)] + target_interpolation = np.array(target_interpolation) + with self.test_session() as sess: + interp_val = sess.run(interpolator) + self.assertAllClose(interp_val[0, :, 0], target_interpolation) + + def test_interpolation_gradient(self): + """Make sure that backprop can run. Correctness of gradients is assumed. + + Here, we create a use a small 'training' set and a more densely-sampled + set of query points, for which we know the true value in advance. The goal + is to choose x locations for the training data such that interpolating using + this training data yields the best reconstruction for the function + values at the query points. The training data locations are optimized + iteratively using gradient descent. + """ + tp = _QuadraticPlusSinProblemND() + (query_points, query_values, train_points, + train_values) = tp.get_problem(optimizable=True) + + regularization = 0.001 + for interpolation_order in (1, 2, 3, 4): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, interpolation_order, + regularization) + + loss = math_ops.reduce_mean(math_ops.square(query_values - interpolator)) + + optimizer = momentum.MomentumOptimizer(0.001, 0.9) + grad = gradients.gradients(loss, [train_points]) + grad, _ = clip_ops.clip_by_global_norm(grad, 1.0) + opt_func = optimizer.apply_gradients(zip(grad, [train_points])) + init_op = variables.global_variables_initializer() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(100): + sess.run([loss, opt_func]) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py b/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py new file mode 100644 index 0000000000..017969d230 --- /dev/null +++ b/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py @@ -0,0 +1,251 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sparse_image_warp.""" + +import numpy as np + +from tensorflow.contrib.image.python.ops import sparse_image_warp + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import image_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest +from tensorflow.python.platform import test + +from tensorflow.python.training import momentum + + +class SparseImageWarpTest(test_util.TensorFlowTestCase): + + def setUp(self): + np.random.seed(0) + + def testGetBoundaryLocations(self): + image_height = 11 + image_width = 11 + num_points_per_edge = 4 + locs = sparse_image_warp._get_boundary_locations(image_height, image_width, + num_points_per_edge) + num_points = locs.shape[0] + self.assertEqual(num_points, 4 + 4 * num_points_per_edge) + locs = [(locs[i, 0], locs[i, 1]) for i in range(num_points)] + for i in (0, image_height - 1): + for j in (0, image_width - 1): + self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) + + for i in (2, 4, 6, 8): + for j in (0, image_width - 1): + self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) + + for i in (0, image_height - 1): + for j in (2, 4, 6, 8): + self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) + + def testGetGridLocations(self): + image_height = 5 + image_width = 3 + grid = sparse_image_warp._get_grid_locations(image_height, image_width) + for i in range(image_height): + for j in range(image_width): + self.assertEqual(grid[i, j, 0], i) + self.assertEqual(grid[i, j, 1], j) + + def testZeroShift(self): + """Run assertZeroShift for various hyperparameters.""" + for order in (1, 2): + for regularization in (0, 0.01): + for num_boundary_points in (0, 1): + self.assertZeroShift(order, regularization, num_boundary_points) + + def assertZeroShift(self, order, regularization, num_boundary_points): + """Check that warping with zero displacements doesn't change the image.""" + batch_size = 1 + image_height = 4 + image_width = 4 + channels = 3 + + image = np.random.uniform( + size=[batch_size, image_height, image_width, channels]) + + input_image_op = constant_op.constant(np.float32(image)) + + control_point_locations = [[1., 1.], [2., 2.], [2., 1.]] + control_point_locations = constant_op.constant( + np.float32(np.expand_dims(control_point_locations, 0))) + + control_point_displacements = np.zeros( + control_point_locations.shape.as_list()) + control_point_displacements = constant_op.constant( + np.float32(control_point_displacements)) + + (warped_image_op, flow_field) = sparse_image_warp.sparse_image_warp( + input_image_op, + control_point_locations, + control_point_locations + control_point_displacements, + interpolation_order=order, + regularization_weight=regularization, + num_boundary_points=num_boundary_points) + + with self.test_session() as sess: + warped_image, input_image, _ = sess.run( + [warped_image_op, input_image_op, flow_field]) + + self.assertAllClose(warped_image, input_image) + + def testMoveSinglePixel(self): + """Run assertMoveSinglePixel for various hyperparameters and data types.""" + for order in (1, 2): + for num_boundary_points in (1, 2): + for type_to_use in (dtypes.float32, dtypes.float64): + self.assertMoveSinglePixel(order, num_boundary_points, type_to_use) + + def assertMoveSinglePixel(self, order, num_boundary_points, type_to_use): + """Move a single block in a small grid using warping.""" + batch_size = 1 + image_height = 7 + image_width = 7 + channels = 3 + + image = np.zeros([batch_size, image_height, image_width, channels]) + image[:, 3, 3, :] = 1.0 + input_image_op = constant_op.constant(image, dtype=type_to_use) + + # Place a control point at the one white pixel. + control_point_locations = [[3., 3.]] + control_point_locations = constant_op.constant( + np.float32(np.expand_dims(control_point_locations, 0)), + dtype=type_to_use) + # Shift it one pixel to the right. + control_point_displacements = [[0., 1.0]] + control_point_displacements = constant_op.constant( + np.float32(np.expand_dims(control_point_displacements, 0)), + dtype=type_to_use) + + (warped_image_op, flow_field) = sparse_image_warp.sparse_image_warp( + input_image_op, + control_point_locations, + control_point_locations + control_point_displacements, + interpolation_order=order, + num_boundary_points=num_boundary_points) + + with self.test_session() as sess: + warped_image, input_image, flow = sess.run( + [warped_image_op, input_image_op, flow_field]) + # Check that it moved the pixel correctly. + self.assertAllClose( + warped_image[0, 4, 5, :], + input_image[0, 4, 4, :], + atol=1e-5, + rtol=1e-5) + + # Test that there is no flow at the corners. + for i in (0, image_height - 1): + for j in (0, image_width - 1): + self.assertAllClose( + flow[0, i, j, :], np.zeros([2]), atol=1e-5, rtol=1e-5) + + def load_image(self, image_file, sess): + image_op = image_ops.decode_png( + io_ops.read_file(image_file), dtype=dtypes.uint8, channels=4)[:, :, 0:3] + return sess.run(image_op) + + def testSmileyFace(self): + """Check warping accuracy by comparing to hardcoded warped images.""" + + test_data_dir = test.test_src_dir_path('contrib/image/python/' + 'kernel_tests/test_data/') + input_file = test_data_dir + 'Yellow_Smiley_Face.png' + with self.test_session() as sess: + input_image = self.load_image(input_file, sess) + control_points = np.asarray([[64, 59], [180 - 64, 59], [39, 111], + [180 - 39, 111], [90, 143], [58, 134], + [180 - 58, 134]]) # pyformat: disable + control_point_displacements = np.asarray( + [[-10.5, 10.5], [10.5, 10.5], [0, 0], [0, 0], [0, -10], [-20, 10.25], + [10, 10.75]]) + control_points_op = constant_op.constant( + np.expand_dims(np.float32(control_points[:, [1, 0]]), 0)) + control_point_displacements_op = constant_op.constant( + np.expand_dims(np.float32(control_point_displacements[:, [1, 0]]), 0)) + float_image = np.expand_dims(np.float32(input_image) / 255, 0) + input_image_op = constant_op.constant(float_image) + + for interpolation_order in (1, 2, 3): + for num_boundary_points in (0, 1, 4): + warp_op, _ = sparse_image_warp.sparse_image_warp( + input_image_op, + control_points_op, + control_points_op + control_point_displacements_op, + interpolation_order=interpolation_order, + num_boundary_points=num_boundary_points) + with self.test_session() as sess: + warped_image = sess.run(warp_op) + out_image = np.uint8(warped_image[0, :, :, :] * 255) + target_file = ( + test_data_dir + + 'Yellow_Smiley_Face_Warp-interp' + '-{}-clamp-{}.png'.format( + interpolation_order, num_boundary_points)) + + target_image = self.load_image(target_file, sess) + + # Check that the target_image and out_image difference is no + # bigger than 1 (on a scale of 0-255). Due to differences in + # floating point computation on different devices, the float + # output in warped_image may get rounded to a different int + # than that in the saved png file loaded into target_image. + self.assertAllClose(target_image, out_image, atol=1, rtol=1e-3) + + def testThatBackpropRuns(self): + """Run optimization to ensure that gradients can be computed.""" + + batch_size = 1 + image_height = 9 + image_width = 12 + image = variables.Variable( + np.float32( + np.random.uniform(size=[batch_size, image_height, image_width, 3]))) + control_point_locations = [[3., 3.]] + control_point_locations = constant_op.constant( + np.float32(np.expand_dims(control_point_locations, 0))) + control_point_displacements = [[0.25, -0.5]] + control_point_displacements = constant_op.constant( + np.float32(np.expand_dims(control_point_displacements, 0))) + warped_image, _ = sparse_image_warp.sparse_image_warp( + image, + control_point_locations, + control_point_locations + control_point_displacements, + num_boundary_points=3) + + loss = math_ops.reduce_mean(math_ops.abs(warped_image - image)) + optimizer = momentum.MomentumOptimizer(0.001, 0.9) + grad = gradients.gradients(loss, [image]) + grad, _ = clip_ops.clip_by_global_norm(grad, 1.0) + opt_func = optimizer.apply_gradients(zip(grad, [image])) + init_op = variables.global_variables_initializer() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run([loss, opt_func]) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png new file mode 100644 index 0000000000000000000000000000000000000000..7e303881e213a82e412d18de9d9d86f368726f06 GIT binary patch literal 14060 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}9Bd2>47O+4j2IXg*pj^6T^Rm@;DWu&Co?cG zu$OrHy0YJ7yv!2(t%1$ISo z&l83--`$s=x@`68)xTz)^St-`NHe8~I9CILp2YXt%nukZ-GS zrPOjh(~VYon`(bDd|{e!Yq|e&MNZDSZY^zX-}w3YU&=^I3f)^7>=EJ7p|7OG6y(f& zC^hn%i+gp7=cK^6dk;QVC{*v|-Fo%vr0x6mE!?(k+ro9r)B?hzU$39Gn~liN&78FJTnFf4B8duwHDXIB-oJui0Bq)EGuELrB(vV2)vnWx{k z7IhVmj_%2VLQ0O)R2hsIIOY0z6c{4aUaQy^D9xV|x;-;vLO_IuqJ6wmUfzPV_xIXP z&5^11E!?<&(V|1~5wWrVo&Ed{M={K1xU%KM)ePoW3{k6R9654A^5sj;>1wK_Z|Bdt zwe|K4!@!MyfB#kr3QB2d)pXFB;Kg=Yh;_4FJmW0Z7*FNbPuK&awG-dRao*m>x@zCg zWAU@*zt>;6BICy7>CgYihl<)U2VBevUomwxbB03ln;T15Yk#Tu+dZA~?qm8KLFwu9 z-hC{6sdVu1Ld7SIuX)m}j4lVxKDT)BUa>OHNh%AMxE$iQXL%L2jz{L?$xMw^tGH@5 zmaqS!)(}7YPuJF@xoaHwXIgChKA(;6jp@HL#?{CAnjxXohQ|;FBtk zpA%kResN;0OXgcE-aWhK&DBl5x~gsQ{Tg8n9ldGS-rsIdXMB@$Y4r`RQX|H@xo2iP zu{JjP6>pRoH2=uQ$I6QyJ<8y?nyPAadXr0Iu1Mikk5#>|ju~VxeCnQT6*T$98U58d z-jiDH#Bk>CRTZuO+#6HyMdO}*g}{H7fLkfy+eBC1Wu7}l#4z*Djz<1IX>)_Wdp-+2 zs{S^`mg_*3x8o&|A6II&HZAYHQx$#SnRFz#ScF)b;Dayy&eKlTRbGsqrkZn;>E+Vt z6TaWhzY!YH(0=;#$5P?AcVgkytX3&6F6>}UK0fc$@0*+3zepLYtqHEM;9hrvee=X9 zZh^nb3EE|kYU-{m`|f){q%t?Ra;wg6{!Q|0b0;z~goYl7&R_WG@Ar-GPYBvn|1h}c zV4WkBlkd>pD}O)xr*bUsMo`;aU0WaHT=^ z;w{@=_T8Ot_UY5<*Vk1&rGK83$#^2w$nfE$`ijbru5ZqtKYr;>j1A+0du#O^ll*)`M2MRW%ItPP9`508_T}^`SzyPpQ;5ncj_>|zP|X~ zN%f89_VWGm77f>%{slz^wAtL-Y)jXUWlZW=(FsA0Co0`SAK> zu2;nx`{vqKKZ*YQ+<#WVF-gNfyZKjc{9@2}kQJ@oqtkAoB)wf*I+}Z5zggFTHRWlx z8)h9hd%W@|<5{z+3!x2DQ|F1UT+cqQM#$anzfAn!U)o{|d+)a91UvLgPv4aB?ak(Q z`#+s(JUQ2T^?|IltarN>em|2XV#+qV_n3*yqHSq!w>>(c-FIitiBsD$GU`LFDQywU z&`x;%oVomNYwYf_hYSDy{gq0-!I z%UTJgcpavo-YFJ9fwTgc@d~NLv_60Y- znO{HiY3mop8JUM#rmTLq^LZD4-tOF(s-z^3j^mFTk3VL8(0Kgu#WPbo5BNPhWyCNc zmyv1XBz@!g8o{?G)}+SzhS!N?*(97^G5O3-o+|lQ`+Mr7BftO5nq?RN`*GUoW-sIU z+EO+Wg3sr+|7+)ed}x=I$h7I(-`#$>R`SJ(oyEuX;x|1x6@A`HThq)Wgj*)O*c_-lzKC_^z3o+Uzy3;{efhM5 z9w%FpcJ)zxVbFb-Zonl@a9=6Y_?QX9)dG5rdj@d6>*enhc3_N-v=UmjpW_HE9WzyTy z(jRYnl=Q~*k5aV5T0N)q)TRI4ov)ww|JOZn!kLjaGGHBHJQzJTG@+t@9Wu{AuG&$eof?yiPPI9 zuU^&A(LKs{kTp|qtJ(tF=`lusma@yusK4IAd3W*NIDY0$S;{Bout^8}&&g=Gn!5J> z)hKQ&U;9dvfQ;|?W*`4-|5R!7<7Lq?hiu)pZ|nztWh__te1MH%KK~3W(Ys2@g=vnA zLfu^-B%?Z`xGyAji~0U~^RRvW4g*tDBc2aM4q@t6N}CxjE4;p3S?M*4X^os$ZRJek zjkizCs$KQ_!PXNQ2UE5`-1;PgF?{;Z1ncz@T?^P6dU#6gzJ~Ptcx%q1uW_JV!EU+h zRjGz|Rp0M!-uM4>eA+xQ;m4mpMH!xWV%jjf_tGB?pOie^7bcU|_3zsASL(0Ek&Dy* zD?faieelWFvKa@nV+@}xm1KPTk-^iG@!nmunst`9E?dd|x{H71 zD;|`;UcW0V<^6>K{VjqK@(Z{Ai`*(@vLPk%*^=g4Vccu9k~?~JTi4v|x9AsLzedcf z?ueLc>b$cBY+L4a#T zK{a-)ai!~ePk3!vmKe5p71O3k%btC_mGV^h_R7jXD^9s3?45Hsi*L8aqHcw2a?9_f zMX=xUoTW6M-{QH9%DY>wD^pW{EZQWb+9aG~WNLk(*I@nL4~HB**`@cMRW`l-DKRx~+A%%mhU?sOYR!a- ziVChxRQ|DP6_-1&|B+Nv?F)Z@AOEuZ_q)pYv$IS!PQDIh2z|@e7}7QHgkUEBNMq*O{?Ll%F9_O-)S2mBnK{jILFIq>9U$Njz1lfAq)>{_+zXVn{1 z=?iyvxqge;l=APFx4!HwC#B6?8^zMzcczM_SSfwZTh#G}O?K(mdo#~IW=>BLpQUAF z{W~yY?bN>ILpl@ z+Vh2lm~4LQ_{}w7oX^lvSYw&9!m@$y_vv`OxcaYG7gznR={mnuc?)}kKl6G!uk7b* zmaAPll(R*zZQ}HFd5w*08eU!NcjD2T`&{hAqT04r#{YlR+fvh#EE^22`}r+c#&)at ztf_GL|9{_u+gjSS3pBT{VKh6m>6`QGjeFHkWhie53|XeUQEG zV*1e|4FCU`pOY|M^-^9WBZ$BDr0U`SpIm_W4=Y({D_-K3=(a(F0io@cZ@u|JW~G=GK<|{@2XIYks9aJ@q15m*JOFcJ`lh3Axz|qHN7;_lZtednn74 zpJCEuL8a3k=T9DNymV<YTF#O=-)Fmf_F^vX zb#iH8*Y>~IW^QRK|9Afk*$HzU)_z%j?u5hgH#t2z8V6pz%9?U8)wH#}Z_l?|pTp)> z=e@6J`2XFxJ3cX3r~O3Si|>y2U2QC%oZGo8kTKhKDU1-)~AP2b>ECNm;G~?-|!!}rQ*n4 z>mA*8Eg^Ku>$CGWpDz2O^)0s=cm)A@wQ7_z`Vvnj%`B1XO`9?-WS>H z&;I%_$@`b;C9#I<^PAH3w>&zP)rJg=nniZdlu_voh7 zo3oZLdVBH>`xCn#n|Sv8&eK)%({T<7;k(mP$|JdD3 z+~P{cy1e05b`igJ#NKefzI@)S{mB;>IiCCyYrE8Y#lO7y`_g4!ul1L?)7jzoENaG4 zW$DgkrrDbckBYuuv}aFPg;DVGZwEJ}8Vg8EbG`k*ZK#KSPZeqJ}H@V%o8 zo%fteOuKhwEk{m`US%bt&zy)H4r7KX(@$Icud$D-yZLkT>bwY^K&iRkUo7^Jw-V8} zu)E;K%36|@^(FK3GheH{-r{eYXRqp2$gKIVEw`t2t6Y`#p(dV;h0E;2kBP^>IoNAn z*V4w89sj0JR&~dRLsM6*Pv5&$^mfA0{nqUJ_r)Ds6M6K;#>4q5Yn+)2{(fbw`Pgdx z?Qq83JgL3+7PA}VN}cQJJ<4&My-T5So?Y#i)57X*?kfeSf4!iSlB05!Ig7_w`sB;M z4V(EyL}U)%+4){UV1xKlvwXWI>vs|Sb~RxOOeb%yy;JBeX_mk3J6op%!wl`B zOD;P&BB1hHGyBdK`#%qr*TmN!xM=zB%Vois0!OJylav;+_lF;R`jp)`^ZUlqY}s|Y z&)pREy?WKbmDQkl+2XVQCo*?$4&0?QpTDEe?acc9j|wU+w^>@+Ef=&Z`SfICIlt5s z?qGK5=e#XSpU&h+-S!dB@ZooenVFP$?LYUMd$l6#_wPIUP?KpB&zSTrouq2VZP(omb6u*ROw`x?oi;IS-I=4`-Yzv@Kc{hD z@5`@0YooI_R(&muK7MZ1LdJ?O7Z;Qz?yfwiGxy%|N#)07U(BD?6+K60zVL#>%Y7s_ zZ}692@6WHj{;AV!zkFpWl{*qSfky*&$AmvW&Mv>>^R3t0@7+4QWX8cv_L%J3d%sSz z?tilPzg>X%lN)Xa-O}Ua_Sz&e9jg9%h{>Yj&{4AskB@)fRex^}qwN;)yC?n&UYN2l z{;2X@X_NZ+4@l^2_6AB+2JzKVpHnV@*L&OH5GL&{aNjNBHfqg!4^WiNViQSj!;BfA$b zdsqMY+2Jz3vmvw1wVN3_l~&I!kxKcTvA1&b?#J?Z_uibVd-E$r`M^}8lAGD<9QVuF z8k|38c$4YZm&^WpYcD9|YcL8W>}UC>ees4zj?B`tie~-Swr;I^bEELcxs@|tJ)b|7 zW37FDvctiX&5rZzvhwe~(z{??#`1o#=*^EO-G1)Ts6A)d;o)<{x2*5D(S^=V!QiyB zQg_<73Gcam^OntD-JIMMbEfInQ;z>$s38}1_y4)Mu8!ODHr(3QoBJ~1;j{#vm60K0 z`QJaEf48se_Ivg+vxCC5VKtlPYP|i-I6bE6-^Ps=mtPr1u>JaSnO#rP+Ds=%M6~k# z=fw|&e?LjcVVD21*!`~jlI1&gh*fdilERVJoTkTM8{r5{F^WnocrPaP{GRuR`f z-u*Adn4>;Pcw_;2Yu9v-QdMuuHcmTcY*{gRWnR2=JRVG(uP z`}V@VP2sQSZoPeTtE5{w_s{=gqN4AZW8?goeVsL%Zd9>|y>IvNWh~iwda*e>_l6he z79MBTiDSllJTGJ-1ozjUlUmv~*rs)V8Zzvn94X_}CEI z=Q&YUTd8foX2ZLzlM}5bpRl{3>z`kKGu7k8efjOPYPY?Nozci#-P_+CtYoy{AKQ{+ zsh*SF{DG6HXUwn-&LizaN&Um2iLs|(%2o=e77&o z{xOdVqeRk{xevF`n>lk!#+Mfz8E?fkjL&mlI`C-?+kK}Mf9Kw9oN=$-rA6Gi?TBQ& z;cO{GC9xWXv)7upD=EA0;{I}B;ghwYzN)(!q$5SDel~Fn%%7E2qkTuw%XRs-TSw5X|=U%&L*+0}V18E?PHTe$V)3-_gG?f?G? z+59>sGBa~-;j5Jw=VT{{EM&|+;Iu)$RLV?;W#!6uy|>B^vaWmUrJ??y>)YZpd*c*z zdG9io2u>Aenma?HGA}GF$NXyC3h@}}*0TwZm+pSL*mbR(&@%t`y)QD#XT8_kyG8wo zea^S}?c3j(SWPzRcR%*#=DU!R|mpn^Ogw(}K&Fh4o}D zE(kAT@Y&TS<(_(v!{b?h=teVV7QMOoVk=c!+N%prs@9%ezogSf?!l+i!3lh7IqPn3 zRtk@K$ai6Ly7}6Av)~QelK)h+e1Fr~?fR5QspL9i?5+<{CO6EIi+9}FDE&?OTWmQm z|E{gc&(5Tzq-c~K)_66`HAQaAwY4WcJhYorUsv$oLQ(O+{C^@>^m0z>-gbPyzPfxt zu=+<{v5VX9YU-}dmZ@cKF}Z&6Vj%yfZSAtsoogHNm-cWZ{oTgQ=JI&e>R-2arb$)W z9AM_>X)@fCcCDGs#6tVigXW2jjvsg5XZ-ui^8P;6BiUa)yqsiMnwNGr$k~?Vg!EkV z%FwraJ9lfX#S4u*wiQ$R+~3sQ$c>n=<#+vyg@GPEJzL7YO582-w&#%+-~jKU|Auc{OcH@c%PePdr?{zB>B;%XwBSJG)d;-KuP4cfEn{`LeJWAcs;m0y z&Fqa2{zt62C2AB~ool})s#Gm0xmP>v^0Lz7+wb?edwFU6n4f*r{j(0c{;bc}&aUgwT@K4e(B%J9m@jR#hR9NN2eY;}9iTMvm- zeEav1>@JL$KQBAam;nEUlRD<%J?K(@!}V1 zfyI)ls--JdeNr|wsYvn$yonxM&!oAujq*66n|~5xa%Gq9PGS%0{7B}l{GiN?7CyYAMkU*r!DUv zUn>_a<<6MtY5s5bvW0!!+^#Wm`n{gJx(RMk`rw-SCW^m0Oht+5#pUB~c5UtH`LlWM zLG_pGR=#}sf=Pjw=fbWDcR}+$Cr{2RxEHVV`_6*0yDvhy(ykroT3e81|KhRnW*Zyn zge|?1BIQZ8H{QO9oH*I_?=M9yJGotc8+LKbogiRor0CdWq1Eh_tll}9|J){?x)ign zYkTHzIy3Y0B8lDSnigzTy0&<;dii{UH{3W^HS|EhK(B~%BD!_MYES= zWJK)WVexd)Cmr^==Vm8t;5#RCW!J^&$?O@%2hOf(&^W7i#_hB~%{*c4qE9Q``57AC z?OV?GqNC%&IYmXTEw2`bZd$YUjpTafERki7rk~D*?J@cN^y7il>#lKEI5JYL@9mPE zaaL*HRQZORzfP+6x0h_-;&c?5y`0&~=#seGue=q3Gh4%>HJ2PT?K-ok)PcvsKgVU+ zWmkvK3s{es9p-S@Y%}rb2C0p28)`l=t<})+v9H_0_u|8c2?bvtX`U@AO5Qy84#%z? zsm*^F4*X$Y*jJvmc>BzVjF+kRc5!4m@C(gN6Ir{~C3CgiEuOfD2>Z$-b`8=}tKKhI z!fd5;Mta@XlU^!ei?&|5A)_An_WYz}dv|_(%sVY{lg=c;3#abn%w*pbDI#+4dE4@` zsCBicPhEca@S&JeNyZBG5ZQaDZ|_Jhbe+J!=R1qZ{I{9TtAd%9yLBdUBw3y1$S|I8 z@XXqOe`hnk+w$nfzqM^EdT)9C-g15Q^XbLOuG3DgR^HRQefv>S$HLQWv4$*@**-LG z(>7l4vecMcY1W;NYhhY_X$H-k?|n-t_u<=I`?IF1!NQ@s=1FL7Sfbe09EPeJ2R+NR zv;}8Pf0nW-OK?p?*0mOPN6TF~wzd`#kf|A&%7zQ&9)T3iX)+WjZTXh#&k~np-xF{+QqJJ0szKe3a z(C1^%IO)HuRV(+Do33=#ZN<|i)m!dVT*z5s7G-8DIyrUswZrd@G@XquPjbFd@-61p z@*b;yvppy2xV*S^tIf*P>2tK%k=zRpLbzVm{PMWV78RMkc$V-2#?NB>+j{PP%UsOi zUw`bRruKyER~^6avzEP3TFSdM>$yr%bL%>uNxM7TrrHZ$*f1f@HEfZQ^>fEF>ozNB zpMG;kC+hQpZA=yHXVQ`ue)}DGdfG+C*kIum;rgrgr8-AThFnTzVc!yblkpZu!GWl5 z_ETF?7*_ibE{tR&Gejha`jwkse+!fmu-VJJR56WUA?XZsDIp1nl|}zBF7w`gLQpV zizgqfuM0}E+t2)L*_DTf85(MHcJaNKB=rCM&!15fdK38y*rse`YWv$Bw|bS-hfk;7 z142U5v#pa~WG;BqbUyF&y3hCK-!Mzi(_x9LlU%VT=cV#Rua6gZ?sUl1tV{b`>hS#e z?{YZ{ffbB%_V`~oCmsLxfd8^?y;BF9SSu^u?M&+a9dN0+^G@1v^{#7gw&k*?G5olF zzk!vzpZ-nQ^aRj@`>2_1f{6IJ&+SM}4UKOG$( z9xUo!#PWN~tH$0%`ur(kF&8CN(k{-r&UDGD^rGt$^XNRi4T;W|>!!utNR_;?U*44A z(@h(>3yqh8>StP6Bs;LLuG87QZ2gzNkL6Vv8o2d)oJve$?wG%qKcV#{Y_X~S`ciEx zsTr1&b?pC^oR)3pcz4QcO0%%&j>F+$AwRlby*d+Zc4VsKa^>z%%k%fGJjN~`qGWQV zS=IYdPLA}MLYB*q4*oNUcyYUJnvUZ7jTQg@F6+M(rh59o$Hxw(rd$<2V-~Dl{ks0f zX7@iwweH>Ei}IMOmHP6B-MP1!LicE_X%C9`Oj+A$c;b;_NvdVT z&(GW&lih8FglcyD|0i+x`OW!JtBx+tef976$?AjUZy27P=jZb=*J@+fkjky~`O^nc zeSedrZ~ph|Uh9YM6Y&g+*5+97r)z=E>%_uayA)3g(uXM%81{VTt6Y1OU7n%*&O^T~si|yMwqM?b$1fB)kan-m z#o_vsr$4<_J9AHNs1j~vJFu^R?Rx%%dwY^>KJ)C{{PW4=jWbL%&pi4T7Pmad)OLng z|2FBVk-jsTZ2nn9{`-C3bIH2vUyHM)J}OlN3VJcr9#=W=z_xYjRNmcQUT0=K=ur%; z=UKxm=sbOHZ2!s0-5W|WP4~n-$gl4{xORKlvzl!eZ*OqPH2QH|Un_t4(ZzpVQs?FE z=B;=lc;GSPy^3asec97Ln&sW#E~{^Cb;#8%>pI2Jc<%i7{r?~L2kqrr_A`E0?f*4? zQ>QZOLJ~XbF6F7bQjBfqDKTm%?uiZOk;*Y&?TiNAottRdAInVgCQ`{_8 zvS7)UkYnHP|K3WVOza$`JV|~5z zM4jp?O@Yb$6H1-y1vkdJUVYA$DtTkCb;I&`s~i&-ww&^kY;6_#_J(tgMMgjS3@cNf z?f0ZQ7b!_zk6r$a=gb+MgZK8n>2|I-aOrhegIUuI-Q&zVYTZ+!7SFsM^Y6L+l0!ev z7#|RiXZZh1e5Q?D!UG2dL&1(NCHp4(e}WPQ2WH+)sFz^pUnIaZgE4tQV4&a+D{JeN zFAol~_=TEV18~<|t>pU58l44+9+*Z#&GrOk4r1oMC$)uSbk6P*VRWy?^PU)tJu5hD#L+|aw*%_ zT-DH;v>|7vyj(x~#fuI>lf;vcw<#$$)^q6Zk7;M+US*nDS8v`jC*hXX1)CL>uaz?V z<0dd3c<=t0+aq-EH>rZFp?^LdJ$j(ue$uIESJsW3(_`I~w#BYqc!Njv?fQDxkH+T} zxG#L)6rA$whDX}6@6$!KYn(gl-ITA?yytnb*TnO2b z`%4PlI1zN=Wmv{F&alHQzb##(jX&gSq~*6PEsCkW1S2S7%ksp=@S_# z*CVym_DH92|Kl5m<@a=MGj56y6+O_xd27oVpW0a-ehZ$u7P)#)f8^oEH_!BUjCQ`m z{hfJp=lJhmb1&=5R;CR%>!#^lmD-VfJVhrWKwZP%Jy|{Q&y%)@i8{>JW1OqDUab0C z`uT0or#T@kQ$O9WZD(Ro+g`MWvG-Dt&i!4YlP*qH&$-W+e1W;CXpQZJ3G?{wl+X7s zwmChG^+SLCmBVw{3;gr?&s@IjKej^}`Db{g%Hztrgra zXJjMCe{F5By|w^D*qRB22U(3y+|IXNP`WC1cf-oyuXBIM*G$-=EbYm%LbRe+$&%sE z_x&32o7t{kzpi}#ROJn+eHlkMa(?|t?)midLPfJH%La?{I`=&q8D8ptxS4+I%M_6Z zr}aMith-*W%s({gVcoO-o0Vyg^8 z!y+Y{?Pc76_!;Z=WkfbM>{-!{=KD+iv;TYLp6A@2Uq6x+w7b9TT=4Ys z#BA%bHD7Bjf99QC^tt^0!ha_O_izQR-mY_cs*|gz=#7dO0?*FOY-UmRKjSqeeM;Vv zFC{{2!`EM1cK!O~^aoX67~J}{l;^2;&#RZ4Vcy^K;N+{F&pqy^8Q*NZ;dNl~>A(NJ z-`HAfz0NI|nO8wTsOiJO=EAi1_p-hRD+j!F@L4|7(Kd}|rA^J@I}fL;Bs}%{6mC_j zwL-2+eS21RH(o#i}%+B(>@|^5(v^DQs@RmyB&IiHECfMy%-(2=yZLL`L zwGW@=>m+u3kNf<$qapRQ#m4Dg_vHUIypgpokj-&RKcIMlVabJnkka>ko7s4sEY>%O z?pV9zV)4XHn;K4dB)pBPjZ0Mga7ug0qT*Yb&wgj+Pg41`=iL6uy&}C5jqmM$am;-9@X4VIxtANh znq_EbhJ882ec-G)Gw-f)Wx-x)_B~&O3{pBi7w383+SqLK^D}dKk7CjH_w)a=i0N(F zX0Of57WSQb>1U0mjF3!sRm91}8HKpPqEBBXvKh0Oycq2A4&9J?F%dS^J zbgJOe&Trp;Jl6R2r#R->_r)h}1-?w(e$#Jp7gx^DFTpia=f8i)IeV>i<}H&4yWbzN zJuD@4;_w`1HU@t?&MQ|UZs$i|`1tsU9gl>=xxg>}Q|I|D`Zh)L+PCa)cddS|yQyg} z|LNA&i0Ihf1@BZ?L>DqXI3c*kwt!)?-X4LE4fpge6e(LpcV7AUF{NbwotOvLc60-GX@$j0%WK>9E6>E4 zl!TU=uJqaWfhi!4Z^_b$`yX#xQ2V>#xSaBYb#cDuT$jAa2&#|RbmSNRb-$1@silk$ z_I?l8m?bJUefsC)t2ZlOfAvS%E#a*7`-H0N>$Em_*)yn%I$X5co#R^lohdwq@z0mZ zm67YG{}I)XaA5nBmm9cy+q5r{buU#vId4rmdaO@%vE0|!>3@sY9LS!M9;O+xY?D#K zx15?ywbti0)|DjfyxB3e;G%29|M&ID3b!96ir-q+uxnxC9oF67<6b+5ySg(!pX03b zcjt4axz@AuFN>^Nr?!37{rdT5-pc%M=4Ud$oAUnIBx!@quS=wCO%5Jvy|Fim)sH%^VPZ3l zj6H9c2d`Mh_Tyi55qrs9t4km6oe=+QlFnB-Ma%N$FEh6bUDM_s(mj1(_4-9mTE#ac z+_%%&US5{8vu?JcGV}WX&+TX2-fh0eW<_)FF}KrYU*2v%F#o^LGd~e2uV3-}vR-Vu zZ|=|ka{Ap(VGplM{O1m>+Bfgcmy@TJ=dSwpM$wkF{_oT~S3kdw`<3k*8F?W7KUa@@ ze!jK3!R6Cs4%1Q<)Y%(Zxj!7!-~XWe+_@?G*BjXcCQfbr>ODOvBtk;|#Wf~VhpK0H ztJj;>=eo!47P>7ey!hW;bCVT|Ij2tD`MWatI1|6E%dd4eLW>W_#Pi2gX`1EUXy9b9 zh+Q&gwPu6c_jfPMYpv{R{}g^%tld#|pp&~vG*fe!P~7*UJC8r#Jo}C zewN(dnd|p;JrdO}$aZA>ztsNVDo>>?iyj{Sc;hht<7pQc3nr#nyPS3lys@H5!e>j^ zmO|y1(R*twcY0o)^y)JE4AbsAHXAShoTo0hVUNvroemx5*=7sBz1#iq%(mPe+B=K| zR(m$GH3Tl+@aF0C!ay&s8oz~R(PmzYpC8Qlo>#=2_BP6>srIX{^Mh$mzfXR) zL)raAQRviLj9NxQFD|v3{49O%m$1#Mp(4JwzB8NO?|D1FzHen{ zsKujqk)N)rtd+6(`FF*t6$g&*E_?W9W$?rU((e!Q7-+AR4!bIMP;9Htjk&u8=SoX8 z+|ECKj#s)ME>rWZz8tyiHnLTSnt@pWqlgE6oZy_V9^(iYkxj*tIe-96S~QmyrwBf z{6>OOtv9n@^T~DW%$qXu)O}~Zdesn}cW@i8bU~dXPfX-4ENA7A#h6 zohG`XGWCsvkG^&9wKOs3X^p8S z8zm(L_zWJX#Vo2d-9C3ilpD)BjrE#2?6!NqNp=1ISH5RQskYd`gBtF>3$HPmFf3TX zG5_fkh4SlqJvN_xN|Lp^-@4{8oLy&bdf`h|tn7vg!*6S2I-|?qnbcp6DqeL}F*vGA z_UF$ZKVGjlv@;g=G&KCMAwI>R)-1}ob<@se^8AJ?}}*trkIHH3dDTLsnff1DGv>D6cDQ`H zEY~ekvf#AtjlF5Sd1ucZyMN(`?X#x9Y&I!|fVjT7H*Z#?A7)D_dDqk8*nH(+&I|q1 zdRxMN7ny`kDHmG$cFk_lnbUQhkJ^5JaC-gzV?R?P+b-Ulz zpy`<_X7upb*r#%--7jR{rG1#SW7k}**I(|uGRlvglD6F^`@_XOCt`w`ZT^~c+5HvC zE`N7~mqCXw`?A#)u|_tABS#WS?(eI3wrsY+mp`8cbFa;hirl{WOtkpYo4S{!qO2lT zOxJgR`uY6h>33pc;=|hF&-8Ejy7Da3S`A&^a8>o>k8igx{(q#Cqx{pAQzto0_U_4( zeKoI6s`}HX58IvF-|Tf;yLIJR=DRBjCjkM;hMN|h z7PXi2a$JMhS!TwBr(Rd~eBvtVlR3Eh_xHz=41DZz*|k~u4%(lVE=h>{{%*(9N8K0S zcWDb;-nsd&b&qeVxya63>z-$!p$BH?FM4yBe`0V%$9{2<2f14&{S(hywU?>ZY2(Hd zUVg`Ho7oZ{G_k(0%@UDx5!!HFC~3;GB|H!KE~K3;w*Sil+RL#$oxe_7BLIl$5-@EgrbHdUD~53jUwBDp^*q>*IMi zdpi4*CksApzaKDlcK57LMIkcweNDHbYUWRGU(fi5V>&x$zW*zRTl@AfR55tGy~ew| zeDjOLX7#KWV{an^L HB{Ts5wR(V` literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png new file mode 100644 index 0000000000000000000000000000000000000000..7fd9e4e6d69f3120428d1d778846d495cea1a989 GIT binary patch literal 18537 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3Fz|G745^rtlpw*nn1P*v zfq~)we<{|*4C+IOdE|U2Z!?1cv!lKG#DB9{InVitat0sNyU+JyzGR)99iR84jLX+} zR6RFvE@p$ML1Bd&SOSW7s1_BNb3(wZKDMZ|?_#fR_sueh?bHYsBZ4nXQn9BOY>-&!< zp3!EJZ7TFTp0UhRzdJVE{JdD;(G`NBURp~T1^2MOzZoUzF3HB2z`*#U?ZkY}1NQ$P zFE&+7QqpYoei-$;wm7?Ho^ATQZTsHODc-NA_UQ48t%o-6*aP;v)TGYl{{G(S)0^Ad zoBR8n9UFrk8@-*Hf*l&>&YvIa(-Z2^F|)tFarSR};mh`J$Nozg_D<#PtoyU+|Aio) zO~?NKU-;<%zrVl#{{H{{{r`QnzrX)9-lFsK%gg(3Z`bR`|11ChZ~yoA|Ns8^`Tpzc z|NHBBODu#yi4m0Oj5?d=&yNk5;O)>59W%$g)Z<>i_L)e*AxQ)PY@(51Ov8bT-eP9U3tsI%0-@&?I;F_V~z|;Z9A_UR}{HEuGWF zFBvr`MQ}EnvI=MzZ|FL(Sf>6_!*Yf#EliJ-Jgxr!|NsB*@B97o|Lgz$`u_6r{@>sK zzrXwY{VCD5@bwb+_ND&#khp+ZmJ@1MNhkBAIa4MGOqd{W;hNZ`Yhn{7KAbw`iI{Nm z!>6eUiH_<WIkeI(qE?{~1&Hik7)wZVHo2Sew0}TY>YR>)Pgje}A*TWc>U4|NsAw zkMp?q8N9iF{=*Bw!?VqKz}Kmd z{%iOCzt9zQe%&LXox1~q-{*ZzQ-AMX5E6Q?^w_N`nc0aNx3|6ceP@QrX_1N|XMx-; z50ol<{{MTnVX~W4$GSzwwB%It^#mP4ZLTnV-!XBOO6Tp62R1j$mvvUAMcy#GCMwwX znAuAYoP?!*965OYyq>zchQ5AYqTOv3=RnI;m%Zv=R@`{|B1P zakKyYKmXw|`{BR8d3Ki@-`RP%;I$ZtV_$Dn^WU%E0+e9?|CeewqNsCb;S!Z)i?fAt>aDm`F-`a{x`eZtl8z* z9U@}7!<(a`vLWnJg1^j(T)sQ8(iMS?cYd^TI58Fli_8oA_dor|hsKBBa@)GaCHrKP zLE-zexJ^GkLo^i3;l`i9%1Fv`Sbx>lQ6|%5Y7I#j^9yzq18j zwGQ9i&C||jILFR+wVeUPisB!`rT}m(07H6tSh_;k($j=jSE&)h_nqfh3qkb&nF#(xyy*zI|ih=PxGZrBhSW z(-V@Kz8FoI%=B#=tIi|I3y<0u_ul--@5?3krR5Z-j#`45hoRN#8HN38#<8$=1!m6DK6NdJAvpaG$~(QLV8fm|4lIfHzMm++@X7uA-QvX#zWUp(b< z%WPTNaPY;}MJpX&KQ=HBm}v1J#^TrC*$TxY2y^12}c}jP9-?#x`NW@=k|v`3fr!(meh+&*-_4K z#pBNgwbbd+gNKRNFHI^ay!i2h!L`f6X{o6#OV}c;8g|WO+^Xf+RhrVWl%Zkr4q?W$ z&Zhk~4#r|j7dEV9%T5*!KXSPD(2Lm$rxbHSIipr7X%#-m>;`51H%-DSq^-LEw?IX7-JG&DHyTlvJvCPuNjJQ^h`!2wytbDFf zV8<`}dI`Ilgoyp~5?_b4tqzy8t5r%25Cx|fCH2A$8w@->K14@l9XS#)Yu2gHA2%8r z8X6cW^*F5yNYHF(Y-D_-@kW*9-Z_qcuO7%xaywJR6wc*vxkkt(dHas83`poEDH5HNv&AbrJ&BePfYF%zE+I})2z6lPfV7`!cb zSMTQLHiN_HWQR%9p$08(L;h|tm9H5LzN&Xh)^th!`|o~EJVR|kW=hNR(v;Ny|NkF1 zV6=9)#;0i+5Zk23#4aPl&zTXC zn%&)>4UCKo3>2=beF$Ol7vtH`@`-<%)Pq*WF0m^a4olc(SQJG#d%C%?g$TAt^*lM+ zAa-9uWnIXj2Cde2Y%1rK4*dVmv%#h3s*lP313Uj09@SIvJ+SnHI(J2B^Rn)lGadc= z9UU4RothF4vn^UB0d<2};f9Tdr%!xfXTSa9$BXQ&uFoGf7#bNV9%pg;<|utN!D(&4 zj0qC|6AI5aq$ot4OLoz!yO1n$HEqquvmMr~3fc({svlPz&tfY3{{R2^j#&*GmNW0* zC`x9U&aq6-rybO_AeT_mB13yc6~^ z$?+$G{wJ*D_5RM-9_Lg2KlyUg-m9IyALAe8IEuTxeX(e3?!&jiE{_&WICAR8&&dy8 z2De=cYjbXwu&X;F_Z1q{b1e4k^_?{Nt*q?Sn>Qm9GMVPtJ?NC($;xo@=KqOFe6N38 zwU00s&w-mfM~(`6da_^En4-sTc3i;6uwL?l^{yA4%`+4O*LX7a zE3o|idHBOiVdqfM!v~vr^6w{gRzaz}{8#l^Nqj_!<%keGO< z_s*e&maVD}b5i90HGeJoe>jM(DEYI!kE)Zv%B!9BH5-pHUYqu^o`*G~O<+qaAH&VW zj)DfBx&$|yYfBi|S*rI0WCu*1*u~X6UHtI(_dLJ98{1SidB{UN=;SYR=1kA3RZ4Yr zZk(LwgoI+woas2NxRI}8nvy5tfAN%lD`f$HI&xf}&;g)^+t)3Z-JWnEyuVM{m{Ir5|lUQ_gn|-}R-Jb-D0)|`lkXD|Pr_7>7Hg$E&7A{ni zl4AOC_jtpV;?@rjRMr{Vg{+O>uC&o?kZ}C-?7{U8`3q~i4)T4tqR%7Y%DRkWo*BDQ zLR#8}O-<9})%}fXejR~0I;qjqfQzfh#U-MpRWc*XDb45KVWr>pRbB}R4yVpe_{Y)m zmw#(T!*t0E)?@V{Z+TiIB6hMcuJmjv(`?|HDjxOAp)Y?;b8<%WDwg(m#fI+ggLmeD zS{&l~5_<7z5IdUG_~hjH?Cl?Sa0r{43WkNTRcz*)X7u19XX;`1j)}k4W!U{c-XI~J z>X6l#q-*dmxnr@af(e`4-V>X6ZY=r0d@Iy&$+W%~4B^%y;nht3P6tmlFc=!LRadw9 z&zHEjHyL6}Qer}afWU)ITS7cNHUtI>KY7x!gp)1emqVFMn##fIRrP`1;uVEW4lBI( z2ZTK0Tl}Ag=YReGf4_f!muG9M&&lyQbLPZTfmzl6l8xsxKJ)X5^h)lw`SAVy|M>m? z>i_?n-}}~{o&8CKAY0B)M(zWb6;cm`{hna;nTfHeh{w!~=lpzQs5=A&61Hy(^YYtv z^{Qw}a&nV8caPFT`J8P$JIhb8x?dp=uO7+d1Q|EEs%xViPTwe`frs7Q2gt!up98o4lGn%E4jhYd4l zI(l~>yf_h(oRThHnez4H#qAp>CM72~xT+NW7q~aiL9oU0qkPV}9>%RR8nvC*#awXu zBR?r=O30^APp2mzmwR)w`1zHU>f7`7{{8gy`u_U=k`fX(Zq96Nu~@RS($usyCwHl` z(iIEKnGda>7aZ5P=Agdm-oa$4Fw=+w-{1fDpZ{-X#>MT$&+GQq{Ct17z5YY3YEBN% zp+h~NKJ_eH*5m2vbL$q*hSr-cj0e9oo=H8BQTafqk%5noEjSq3tV_zt=jY(PI(f>I z2Hsxf^Q#S&dZ!yNERCG_YaNe7b3(_AW7{5yUspN0e}Db|xII5gPEPt9z3tA9^7nV| z@4w+Zr@PxVYS!@!0c(~V@dyrXm_567;zxa*Q~?>a|NT~pZKrcjXmx&MvikY|KPY+Z zt^1pPtVe%!c>Uh;_vg>g*Vt-ZWb*lgK}Lqbv**T@RYn|~f0b9p9k$xqrrB3o#rk@N z)f{ecd6#5UxhcfEdwwTpu2pa6#ECN-8yPiI4HoiEOZJq^cqx9{De1=2 z^Z!3AeFla;O-yP!Kg-BaX_?)>L(-BP)-cRpmVUrYsZXdl!<02^gHpG1@yUMq{r+<* zBEq-7esQtgh3)us{r&p!b=IY?o|@{+pe?5Tve%yuQKl~Fm&U@`~S<2N^t2QQbi zFMK4(>3HbCfddB$UR*G)s|zzU$c7f4)Zq`2GL(SADI1du#n{vxz(BR{pPJi;iY9HD!y8 z6e>w{)1A|;*?lcNG2uYa90LPThnM01|E6_+(o&A>QWeN8cqGqU@{m>P*Xa{K#J8C| z)IS!)wx(NJpy&Vp|M@qIa&K>wwy&E|dFSfu>-Kqfer%}xd|2IopOX`V+oP451uGK| z1RQ8kalP^6`}_a7H$HrQa#CJDep02(q5btd(c27ScDbBeY1`AIp}ahUXH8Rqt^nI6 z3rHw9=_dStctg5F^slS8p24wrm6lj;qhDWM{=T@FKkrV3YmtXS@izr3tJ zA^1kY5t)OFni!pJnfdn4cwoq+k9npk&?cr>A+!-%qNP<1Ug(?yc}) z3lo-*fVO9wObiUWYrjlsELp&yBKzm)RF?@Qe~(5_aM-EtZ?~uJ@74GBm!u1;eE9fy z{q61caeIILJJ@`_!kdFlCfA>B$)Pv$W}1IE`MT!W{H%R+T@pkOv@+;)tIsA#3P;^rG|I*{{DSu=ll@MD}O$vemb9UQ1oVn zm*5&Th~Z5p9+C;VzGu|bPig5d@#7NqQ7#H+UB}~Z_>bT1UwXoi|Ig3wZ|A?izs|RH zamK&Dzv~|#JI>B;XKc`s+#P!+PH2|(#BN@;B^}3Sosihjb!X4d-={=h%_*;cEygS? z49ZYGJ}sijOlxYRw$$m&O4`A+!wq6{lB3_ho}Ld=7}w`LxU>I6CTp37bd3Oy_<^WO zWl*}Ee}Dh~e}AnT74?%2?5_uni2izXv^;04%C#uA?5_tKqPem&8z((`cmMz2ABLrV z6aW61`S371Xi&|<;>5+ioAQm!XFl@BoMeS05l20(2?8t!es8J2=p}fAE%1wL(u_VA z)r*IJU0(kF|B8|+&;I}Ck+=I(_5YvxyG@N1E&<$|L<^!N=Gp%K{8G4d&*76NKm506 z<~~_Hb0Nd)9x3K+ElpcrbAzfVP##d{Qws_HXJoWUNAkD?OY(tfMiXvkgqo(gexBd> z@cw-JdWG-lfvh#ZzWlzsd%ncS_G1dm`V@@5E|B=~^z{AX{r_wJc1`3x$>ik3V_!F6 zzJ-KDff(lu=F}ZsVvx{k3fG=+vG7NNICD_vp@wM|6TVG7%=-WEj)I0~dT(0Ug@u`| zts|#unH@NB;=||X@rxEuV|4EpUE6YCvBb^U|L^br@13a5D#^h)(Z;EZJ3)-6LQASc z#|7m2|NoO3eRuzF&o@m-`O_VE^niNggwyE8I0S7=rb56^!^SB@a=WTMO+me(Kv(seT>mX$+{PTAG}JAUev$J#gR z)}2#Obd+7(vBt~b_6BIU>0E8>sZZ`(kSza9DJg_G*F^Sp#!Gv)w)#7F&iwkdC@1F% zXCNbEn55*n`2GJD9J_DNHfQ25dF6Ni6TO9hOy*0f@d%wdbI&|=H3Oq00ii`rmKm8% zjJGY=uRSzkV}sW9PV#o{vUi)7NUsU}C!W0FkNmL}me*Jsjdq+n@gv^fKO#PU3fI8{ z2M(M*y=d`a_T4L56RIECXEizW=O|6$^Llyc^yvlb&VBlfh zCs!JHE}Y`u5?KxLOH!k`>&2+K9xv>fw{$YDO!{~D&4D$(8@5?qkjOY{%FNCloUq{2 z;dc4>y^CH538tmFm6fR}E7#@arCsrGOaePPDW z=g-vAQ&l|^1w1?pGqOH?`e;~OV6f=eeftpaLQ&}*E*FkS7O2eq-_+D2B-C^4*cuTt z>z*D7CudC_o)U)jb{}|FCxb?c82ZhKn};zeE?~b)T5SE1O0CrY8JYzwB5_%8{I$ zGd=lkjOGdH=^b5399&ApC+37S#LP($)8*mefmoR26!olm=H~y8G}JFh>|psAVpwoY zzT)$?ZPzr+!g_l?HFtJ8dUtPM;W2aWTtQ{o2Tl=EygM#)%}Zk2x$Ecg10U|6>atX8 zo7=M@DLtKoFYU#Pgal>dW&wtbi~VdHwn0bKlbraBmQ=CqU6GLThyC@tN9#>LoJd;6 z|3u@Dw$h=ns8!J+OAa4&bo!N$o?cWUa`sHflxb5F}kHD9E_OqT`n_W$(aYL+j4d}(iW%X_`Y4->a-7WVX= z`u(F|^V=zZ*t?C-%s6&`&jHnrt*wpI>;hasRm;am^DT?(bYp-0zO4Sn)XGReBqbv! z#ls`v!NY?$&6dbKxYf~+&J1n1{`sgpud$KQ-O9m_uV=$8j$FG|#>Lv*k`fWZ>}kIm zjQ;)qU;pyb?##=1I|E(-M*oDpgexv_G55JNsMkL5 z(yssi=jZc6==S_~`fZb9;B!{yx1r+`jJ54^T=y zb7sY|V~<~Iipl6?W@>i4&Ei}0l3{78L&>j(1^aK9JThcI_4n)R|7T~{o9F!4oqBrz z>+6#HY8uZS;^r{W&@jlzHa>FPc*cx?Vh?6}1vN*7&Co*p&qw81jH9TXMDNoR8D?h4Oh{7W{432{-t+EEOKh3X(0TF30Ut3I@!ttX z1xtC>ys4K6Vv&}~sGOc4^H|gPg9`T_H(s%Z{qMUpp+5Qf|G$3L$;Ley%&Yh^pFgfw z`lS}}=Fl=B<4A?#iQe3g1P@BF*Sk7z@NSxUYeIir!hvNu1&mFcOp=hy(DdHs*cum! z0B@!k?WzwuS!Q!wQ)21u4YN^LF43gP94vWIOygR_@x}8B>{+h{H;X^;*M?|JYHVfw z*xAgumE&iBDA~k~DBBaJ6C9?B$6OJkOB);Qc$kvW7Vv zAC`cI#~J?rKNMADV3gp`b;R6ggVtdm;T=tkErYkjLdqlELX2^()kP z?oDB2JR%}-LN*HG;H1X;N7~HTx*~G~%+&p(`gy?Gu*XEg#8?@xPFT{?qPRnIj+h zmFIof?)1ULw?kgu*{uEqgVJs-*gS#bSM_Nd^>b3$c3s`!x~}lsl7s1$Icu%xGm#YjSw}NK5yFI^Q1- zf29P4UExMg7@t)+rf8;^fwP!XcJTuL)rZoHBqbjnUKVkC)ee^pYOFqAXDnr9&PqvG z#?!R)L_N>c9j|8nKf=3>@8^`pty>Q4su5s<+9|Er>S27S%A)7PY2OLAlp>lrbsv2c zk>i*OSv_EHY^;1~#O|AUkBp*A9yRhzxUU+;KAvm-S_wG{dwlhj)2mIsvK-R z6^DddfBe?`arvNvmhQVdi8pplb#!PrczJSGy5R$+9YUa0LI3|Ztzk)auv*j3n9a}X zDW>b2P?o_7nvVYQ;bD33<o#mQ&dA*O>4Sly=I{Pha=N|-3P)8pDI82`tXB}- z{6Ny9;EjZHxOnpE`Tze< zpFDZu)F}m}Lq`sM;&i-S0A--qqs$!y7 z|F^CE_4?f0`PbL|oiZt@tni`B)z+1QuO(mAPdM`b@SK+Ve*0^G_nw$1rPDBRqGRC1 zgC{y3{w!|0y$rL6llJ1uKw5e z&(H6#4&Q%%_VJAe>8bsCmKae=3d9r62`(8_u%#Eg#7a#p79<&(#aFP&fw2q@4#OT z%v+RL`k_PKNrx;ZJhl(o^rJf@VZs#mi&y?Ps&xPV|NrgX-&^gg==ly?=O((8$&D-3IC9sXQ;Nm;PIpBO& z`hS0ZzMpBl-^h5U!IorGuI&1j#wjgEM<%h#eE4jBcWtp6<^s(S9o$M5g|>&O3_YyCa@VAJ|`zP~je zAHDCF4_LZp4Nq2<&;Gi~2L1!+oy}xI8 zXWv-{%f`Ou=EnB+hi9eR-rkms+n2(@o$TS4_Tb5pA{VQV+-E;tw}$4R$T!TMEdT#I zm!C8;FgTDc(~)UlHqWT};pgY|*Vp~Mx98{aHIec6_qE2&IM`9u^nboR&#PBH@%#Th zewKD1O@ukiD&c_G>mL^l7-eL5s=qnhw68N@VF5MRZ0gPw{4j8ilsvr1l}AvJhmVhE z)hZq)rWaM5$82XMndriM#ZD$36Wu)iD_@tIkuJaZnm*5mmaosx|1W?4Z*A1q+QV({ zpP&2t@596W9UP0NhS>8}@#!;6Iv*b4WD5^xla^**#Jt7O5k?q8Cn>j!a*)Y8%~>>9Vc#ko!2 zUf7v`n*F5Er+X9b%#__{qr@>M`iW{(>ipD70omI9k-LRt3*TJb>NRQhwYb}PF51HBE%I$1!x_>jd34M&D)~=Xr~D z5_v9(1gfmw8$bWQg|5mWV zkN5xg@-S#^-^|Xw`Sb~ckDrb8qP{%7vGKl>Q_uT*ieKvfo)Qo^aOlJj`Ni_xGk{baYjX^p>gd|)=-PVl#tg>@iKjvGXB-V;I3exKJ9F1^H77{4&hN}HsBsnK znYQ)rfdd+B`UhY1N~xa^5H!*0x?!sMFE??AyjjHkm-@@+d}x_o{H|J?|LwNfuTQ@g z-;>OKVO#dBbIUK6*!jB}nx9RQ2ohMaZ@oOJ|e!g`>pz9T=Ok<-S=MAm1@Xy z`r5Hyo)R@IP5CE(r1vVPb_$fpHY^Ibe&d@kgGujU#>+=aJ%t|4(0N?6aMg+87KiJc z&NlqF|2_OV)iMtp!gppFay2^$xbAl|%Vgo+C~)M`A?-JsHm$wWuejMXwDvxD*L60E zBdjyIt3$=BsKLkfu8rNEhxuB&zwawEtW-~)cYUAwQZdC9xzl&mg<=ywIesOZo386OgJd9WQoBMlZmOaQJv344vJg+tM5=yJyxj9>>!}f z*Pq?Sc%Ak61DQ;&r4ycKNR`O8F-9%cd1xUmlV1DxFH1EOc-8AHTdw8+ftKUT9R(aW zh>IMVuN)Ck5!auzoLwNgQq13_^T=J3PEVdmcG0}iXQur&JhS*s@s1tEFD!~~=cQKW z&RYB4Q!}h>28vAAF}@VCJ=gd8%a`YtopdUQy+6rikMZ$cN`0=^110MA z_Zt^_SpTb^;mD#H7`pzc&EfBkC9`rP#F`k^btfJ$aPZP#J?|`1Yw`Y(df6TD;?yUx zyE$6|1m@hWZTu+Q!{fqZVjys2-d4ViY}^|d3}loN&v`CIkD}1ut34ASvr$f8M!JJ)|lUC+I()aws+?3tDg&w#4F8E>s69soBq7U zm32?BR-Ge@;FrJmlPe{XWaQxpnJ=GM@!w zk&Ln7L^I$1IWCY=w<+crv%b~?F=HcxKhGZ}UAUGuWlGA!2MJG}rapO`oRHMiUy!g( zCQxiL7q{`9-QUAqS{~js7TNB=^YGcf<7du%c*?EJ&%e&?SO?qwI>SA6XFt3WJ-j+R z05o|U7|52F2O15&v2pJ1e#xg5Ts%p~j{p0g4z*cMf<3eSkh`nmQVwnH<_3nv3lx}{ znO&wa>T#Ph-ZV)$_WJt&B}*me+5Od5Zho=S*uY`>?f-vHe)#>p-ptHq?v06jar@IO z3Yi2e-@M_`(&G95KiOZB$HMHQvB3g$rWmdbUXVhx>D;M9(vlw*`v3T+`h$c0m4Mcv zf{xo0j@JMGcmC|z4?jNIGctmf+|8IN`QZEIn%~C`9Q*eFv^ZN?8E9fXBJzK-aq*I7 z^L&XuxzrzDxdjt#Y(TSQD^@+a(zLURdF8|gr6&7Y1#Qrf-T(iGV%{{{^tbM8`XK*i zO`*wAjZ+bk|MypYoj!f~!*6f@uUW%$!d$$t&@g8Au>)t%e)#nCzkU6L_lsp%13;@q z54ZF5%Ng3#^$7g=+1O|K|DsI=lYWf9YtPFMm{X%^IHl^~N=SPks2NdsWPpPyN7=BOhK0 zAMTUoxxY_w+UJ5IqZ6l%KYVzwC!X7L`BBIIddaxGNeL+_760Zuyspot zrp9*a{Vf^42any`Hm6IzyPI6`TTIYMMuw-R=E%FagI9kXh-!Y|>5Qo0cOHBFzkbRl zMom+P)1psO8Y9mfxVdJ{|9g9WoqSR@ zle|4>Np{7Tj)z||+veK~KKlRPSVF>J!=_1M0WM6udf@wxpVTvGv1P}IdkVWwzeL;KVQ)jw65&o z`*^k+MW(ly($dzx{g?9k8JAyI<$1sE@(Q2J3M_{m1SY6t)TSu@f7Tx4BYgUhMbPyS z(V$JaYhG)9c(zB2>(nG2<@27wHS;b0Z8UW7`ZaIc($_oO@@)API~Mit*vXp~{qgy% zndxQVB)(?uMy_TDo;G_Mc6kRYtsRz~oBbbT-+!Rck=Hc2`9alo*?C^=UeT+AG|q}R zG1>p&*`NzS|Ab? zeBFfi+Pi?CcAiboW3N37fBf5~u*CYT`r4P(@4yaTGRK;)$w8paeiA3I+X@eh#v{fR zJb5P+-@I92zG35*s1-U(9GUYHH{axNOn!9#+~OJERVGht-KGEaaq#g-*GUtWbxcSt zx!aSXTYKJQp{bIKpzq^PMKU+@HcnG;ICArZ|C;Xj-E#Lrzgs7Ea7^dkEuSoBpfv-M1T| zd$zxcf7bcF@sVWD!-^-YptixB+55Sg5=4&Hch&t=cj2%zkZze4w_y2;n^g;zJ!31( zC_3fKImKm*Lq}WFD{bjrmuJ2!*eP>v=bUT}8G|*JY%iZ=$FA==?z8;LRw-NdQ%s~P`WpzMjELQZ^^e-S_6pBG_C!=$?J3{8{{p*Z&A|~fXO=DZAqSo| z`R+##BCjVs?BdhoitYL@A2{LBwUx~m4mz*3SfN{WXl3)G?M-btCnxYvGFQ}$`x^K9 z)AF)UF4J0bm08w#%h=D>X36ZjR$}$#_m$Tce1R^TJ5whmr+&7y7)m&=Y`He&bc$r8+uCCZ4M8o6hZQUTH5_VX+SdENzUEW) z?61`?Ur+j!8n*XSh0prqImb9bQMhK7A$L=NNUOb^63e#@2UHcA7B19Osi@x_|3G5O zYHw#3g^ZUI_9!eo*d`aKcJabrX7P8HHFubgSA~9GY4tYY(y^2JI-*S%-m54Al9KTo)ncp&A#XO^~`jen14 z-p#pi>*T{-W%_3h&FFkk;o{Yz;A<)KqB>0b(Tvwq|H>&!xNnPI#kEw!d-+*?@ik9t zp4HVqzi*-Dd8f$k*mwS?yTfXwXI_f(Q{QxE?n`EnALhi^a<>@p)H*w}7rshzEV%4> z!Tw!;K}msD-l82GO&tq59uypIRhoC=ZmAY;Yt6?=)18h!Kbt;b$A->y4o@K$!A)=C zjy^tM(#TPJUN+1GxGQ&$r5xZeuLv zO%4cENc!~k>?SX%!?QQF1f3|}q1SR?lLE(uZ5O0|_f@t{k#Ohxe^W{GVWmfK#^24o z+a(L8#RVLf+vi;zSkV0q+-FEScDIZn%PMk6;E^eEwe(|x;p}*hxr55~8 zG~V*TOFT{}U}@QdhmB@G_SdHy7=XKh;pqWEP48UO($W%=nv^=O*8elGsXkWll@%KgW(@Nd(|##vCVhlsXpAJ z^pEwSpH#$+#;4KGY7XCV_|R>%&P40u68`(@z9)^^9-etv&s(_v*n`R=hK>!Xf^90C zmFv0%4|T5Ve!PM)?edFVn{-?RQ$1rZn`~M8>tW@x_gjy&yQy8eZ{J|^AX9|phFPd< z;GWp8ERx4-;`w*ynjcqAo}sp@ukeZ_{|9iyuYowRb-$@*Q1K5KDsnfaO+$8Y~w{AY7cw&OEy-&xjY-}qp2cSY#5ExOn4HScHtdd!65L~$Rp z;47)Mnp}q_=qz4!VU~s6#(-1v-Ly+LJyQBytKMb6mOMl4nWW&f*1{uElUOFUuUA}d zvh;Pvto!d5-52LuUwKDuN73yU)oqQXdw=PJi^Mgvb@^Hxcv|->GKz3=GWqXtG@a)B zV5fSLNQR17;}iGIO`MEvb5ka-&6@T6)|T3{s~2v1rDR*~a_7PIUmeM(gw{&7?H6?y zkVu=cY}rHkD+*l!T{_M!HtDx5;$ELSezErW=e$XI+hffg1zjZEr?u`{mV0qlW>V}+ zt7nq;&8xT9UjKRW(1b+`R(VWXFJb+9+kdO1E3u{($7Z+ftB-5813NLP<~yHZwUa1Y z*oSAl&R$%HkM&xJF4)2OjM2<2Lr`6bNtU@mXVc%0&5q&I9i5sUKKExcG-P9DW@BY- z(~n=URYF}Iw6?dW$7If%)-^7PiHV>UL{Gih($d-{t4sRLO-e{jt@tzJ;W=4QXXyEP z$$x*76Ox^0)~IvLm?5!dt>mj$5^vsYP)|B@i)C}s!Nm-|fBx6Mu&IodJ-jZqqjo1~ zqK@JJ|21>GA?=joUg26I{R>`*+BlwnyHditf*!iNp-j!fi>J>#&jtK{OF zPYMD;|5$_`N&K^T8vSr%mh}18M;{;jd+zNqn|i;#vjqw)>$-!=EGA6SNsbg~GTHh= z{#XosY%P;o47Tvq|DW=;e3qJ@`jkbP1+l;8?Q+vxvLttB%nK`qr!cms8`{<9_7T2TxHe3miL?AFVS>NP>G+mF_(ogGL6AQsJQ3H zdC9-*mv7}*{E|EEl-+XRvp`eju|f$p<@lvXN@uD)l2{6I>n4-)S>?MK{>Zf37az8L z?IeBN@p8cRExJK*+K^;%WzI&f1_K_K`#~>M_#Oqlc>XK9u5od9^zIvF1p!=qXHx=8 zzMPn1di?1FrxqC-zq$Q?9>h5{%$cbcwCMfJ^pjy;B}ux93rvq~SeL}pUB70&^k1gq zjX^ommfWJV*q*!SBp->|v;1{N^u!5LTc6fMXyngY`fYA%qJ)I|&OJLHuz%_!B$ZD8!&+}6q(sKt|W>cIPiRm`uJ@uVF6p)Jnw`>A)^*VmHY-X>Rk z=c_)IkdOd1z>!<9V)+;Qtn=(Q)pv;eIWCbb+oH<2tp4MPB&`L;2SOJbBrM^1qQn?E zjWIf|_V+Z}?)@pqVe5E8)OgqX)MNL$!H$c;fdP#q0);xSyS^(H&|CDS?w3dz6d?rt=kaE^ajn zi?nLdDROf8Y&=hiG5ke>8h1=v!1eO~f!4}AEtgor*qHmKeEh6_`0Q++=@ugAy-Nz+tXIPFj=n8B! zdQkQ8xcR>{mQ5T7LrynL4A>wR_~Wdi(c}mIGCWVz1XI4e5sRAmVd8*FV<($1Qh ziMLGH^z_(*gI}butbEDr%b0dyLI~qSCKh9*e=II3Txv#!$ z!eV}T2{GNI8ygy1rW`zQ0Gc{dQc@l~N!hR=;l$Zg1H(qWlEX5$4$0hgntVvaQP@2# z;i(Ykg;xrHPJMWx8YEulF7$=z+MKpb!L&yS2c~(Q&^FswpFYE&G4Z)y8#}+mzIycs z2`NyMe?oS4X@^UsolQzeNH8#M^?Z8(+PdsOaN&dZ?~<4%y^>1W@<5;G!kiX&<*4i_3Hy0gTka5ImrHe-6cK#v_jjJN zXD{64Pnk0rI(y2=Z)nI?Tidoog-u#I~b*l+oSa^99oAb zGkuuR_RPSfkx?ZucYhh}UX1l6=$knzBAfwDNEqQ%$?ucva)ckHf*c z1cLV``0AbLnQt$7ZeHpPv)+eCIJ?3OK`HA0f2a6_KksA@uMFmi-)B(s=fs1jM-QCm zzx#)eea2NT!(O4JB-U_^WHy-pcb1|`i=$IhV=L=kg;oZo-|LPn=V1%7 zQeauD5aIZsT=ZJX!3HVMBNo!DHy(My=yP;OmY7A=myX24Y@mWD@9v^QU9pg0I26w8 z?QL99ZQRmkv|^=WKSP2`)ThG@+s(VDcI$M!Fv>vmpC^!IbzR@hi4?)%=10uP96Xm3UcSE*WkSl zplK>eNuF1)cor?{XinSkEnm_gh+*9(mYpx#@7AV#t^KgwtRvHS(iAn0vZn<%n?hTD zOnUst6*STI?S1-=I@!NA3adat3NllXCn8c(L_{JcMnXnb(!^BK#YMpT?+2b70Uwzk z!EDTey#8w%-Y_xj{~00MaFfB^to_6>qluYJhkktNOnev9_V~DD-aSw>Tj}yZD;uS? z7g9i*$x|mxOZ)I8Wy7|#3DeUBL{%?vHD(_{06_b{=TR@v8pbm-s!e?dU%GM>JN~&_bZ+|dQ?H<#DWC}9=tlWV9Ak$gaZm{ZJXC}ZSDEO#?B%jEXm_CUE>Gi z#J!?1yoqb{j`*%utV}A#F>k>Oh0z&l!DF~ z1?|%dmLE^ZIFXRTVtbS=%jz8?|C&zb?5AfN1m8utN_x|KMdJl9=u=vAl$9=gZz6FF6OMPAHSzJ&XlZDx&HAnPyRiFn4O0{ zypaS=FyG&I@$l9eP=`PJ!KbawY;4RdF5bMwgjrs`dA4+OC+C+mqZ5(&KW?uo(3`lH z#pYCo`1c*FoKlWI$jn_(`2SC0#b?mg%C`IaLFwSee)%e{dykLv)c<$7J)0RcSt@^vfz8c4z2w9cuJrGZv znQP6};K1V|f2b~irTV=>_5bdOn>V^!?M0Z)8n*@;Jn=-hV9sHQuU$Ic zThl(z6Ih|OsVDWI<)=9&dk;9i`Q#S+U}DUkL&u*kab2_{h3DEa6Txl1XWiyrUj09F z(R)cY7Y(j8OLH@&j!jfaoVPOjuE=Yz>l@!kJ((}R8gvH8|Nm$1T7ynoIrecsf0G3B z!LS^b_w}0Do;CJd+h;g_W8$`E=g@59(R}v(?;gv0r;6uVdpgAii&(f{zVN-`{O1JS zT<`0Fp*@R}*4;~9`^w$0DAsF9y>^^ok<6}{xje25PqGJHKC)>;?i=gpD=%_(+1V z6Fhe?YR>YyxvFa)?3yqsPh7C4@pqWolQ}CymudtZs%}|OX2`bJnPsaJ%UVs&O4oBo zYK5jb73-W)7H(}dS=h(CS9@;JLI#bn(~;4_o+oD6O#kfnGq`4_BdCD;|DTyb`c>Br TwZ<3qpvd%e^>bP0l+XkKzc=NG literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png new file mode 100644 index 0000000000000000000000000000000000000000..86d225e5d2158804f88dca881f69ed3ab287d866 GIT binary patch literal 19086 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s33iWhx45^rtlpw*nn1P*v zfq~)we<{|*4C=IH9_imGctC;YNKY@PEbs4r59i6&x8IB0WGs<#(Av&?&M;m2apu~t z4XaiZt$A5rm21E*zH`ZutHptb<$y}g zh1S5pgWdoC&(=`l<7k@v&i0LdOO?}JWB1#?Y?d$kxh$kS*0gVa;)cvk`?gOx5*u>V zC*(heL(4m-PnAtev^*FXnph45DW$)$o~C+D^tRY@$?KxuWBV3A`Ey9$9&Fc@kd0i; z4kFd%&%dq<3K0+w{P6$(iK9o)dOFs2zh8Q|G~>HdX4`}{iYM!*AC~Y^Ubiaxx~OmP zm9LB6zd!uo_O+=?&l)}6+k5eu<#@(Gq!S zZqDp;oBI~~OkVSP>-$YQhyJ|I*w+Y(mH+>*Ots}|P7rV@zrLVGLdBJ->=EOU-|+{e zE0_+({c*f7cO#eKy#sk|;pdun&X-vw7TMG$Z2R1cwvC7EZ3N(N|6Wp0${g)f32Y z>)Np!?H7)|Xg^rRzjkVIZ?VC%N=~8f#{$NW7SDNfc2|;$cj7spPL=DW^2e=wOXuy> zxnahj;8+*)%v%N=-coNmn`h3PICpOA)Txm%bG)6MgWTP(9dQW=ojP;&?1{a-oz2aS zQ$?jJZ*XyL`NW_tzW0oSuXcf6QPIJ3ZR&zMES3cP{Q3F*a{vG5=KU>yaNvLScZqv@ zlPi8tdwAcz&Ay(kT8j~4(Vouc$&+U`H^+udarX7~2@k(_E6T~axwreZy|J;f^zHhz zvt`UzpWL6J=te*gZyU*7)T z-+zDqUta!S-QVcVos%EFXdYf2&Xa%N=+EENA6{Q~XgB49X0x77XNQKlvuAtz_QnSX zA3NuF^U9Rz^Z(l`Kdu)OV{<$9zo%zLU|&b+44on_hxNV>CUQ-0>NGGg_;WOPv%*rI zH!)APw1)gXaC&O}|9^k~f9(c&t^B=&S#FX=)s%-{v)gu;OaA+lDAAG|0kQGSwAF$K z6a-w#jb_Y95D1(p!6TcXIg@WiquwLq6&nOO^yV{ND9^k9ZqLRC25Hx-KGv1y@h5H$ z({ydPtit(2w0mAgv0LNm)lY5rZ!;|7c@lc)zB|zKbO4k$t zO#QDqpndtJrgGi?!5S2e(ffQM~l?? zYVz0y3duNmIif4<S`YU`FttSERdu) z17!BHH9C1oi(Wigl9ZyNs(O@5Q$2x|f!StWL4VTU!xLKF_?J9!_e#|0WZtRcxJubT zdf|yq=4#32sK*XBSNO_2xx}=#Co$oGhgiYH{q>UTVv{Wjn;!lsY~vP}xVKMsh6>ah z4M!BUwV%9Lk(a+dDN9RR|2%7Ek&%Ic!gK)-Q;us^tm4e;p6M4Ya?IVQP@=e8dX2AN zkJVHAv?i?ilfL`$4#7XC0vCi=fvw$!1} z%?X}XD>~Vh`Z>NT|8vyGaE2+j+tyF=_dX>Y2;6x;Ovmo~lq!e6~`)I?Kke=@14I2ZeO@E%8nCRdq<{^0gz=03_jg5?!Nd{@Q8@4M3 zOkJp|_T(J*yp|^0ix0AL6|#>@DE@7C=Bk|`dNPHnlUc9z4%g4m?GGQhw!OVAxh*Gg zM|pqSMNkXl|Nkws_VTtG@U-mjR$^PQAdy*xE8^i*okJEp$E_A_$Wd@w?{LVq4Q;rYs{ckHoOPig#DmDdYf8OE>GsT8>OVb?}Ah4g1L*Ck8bF#GoRuk_#P;1X$3 z=pzrmvQuYD0>Z@7($iBC5*m)k1kL;?ztXJHRa=QgQEOo!hqo}#hk&${BclE%Rx;X# zO%N6fYD#eF(%Q1(Z-16fm*xNe|J{qsX2o{2eEM&1^Kg;mp(6~k9BMK%CZsej3ksXo zIDful%$$W=r#?I*d8lmyD5x3!|6f$J_0W+c2hN|LIQ_Z2WTuMl+4E<5J_zl3(qL{m z@z(7Bk_Bb%hOY&ul(B7N`mcZNj`Th+mSC~1Z#!f4KgmdE$$k;f-6xm1hG%n;;;J;q z)$tRuXU?~me0Mkb#ft=sl8%SJ%eDBoLlbM-qlCo7ppYkL&+~u&d|62BSyEEdQyq=J z^CwPZ43jvJ%(u4H(PuJKw?)(<$J{l4jvBB`?2*{edwlWPqrx|p8ePA|&(P@#{IE^e z?KiVv!wFt)8>JIXANunA`42C2<~ctvu=p6LGW-93QPiV^hKt({yNkHsis-$G#FH;M( z%>PQuwU#uoq(3~^FiXV1Gv&)W*~3px^6V}*wy8Mq;kS9|BWRi8H@msH`RE}hLw)@V z*TUw_o;`Kq#7@SjnZ}A&r+k?5vA?lV(Cptt zyqp>1!Va1&+M4o^VXbO6(}ry_fx1pYsVPTFY*w9?ytwDXfBRX<7jloBa?M_M$R}Aq zC`m&v?ZM;Z8(TUbzKd;}Z*LguX9UToYoZ+a4hRUyEPD8W|E>O=KMVI4d@()$an@VM za82K;0+qumnw(x|(Eo-YdFxXlZX`zBlWf zP}0%h!(SEJrTX<&{=O1#@pcUpL!h{ErRI;XET79;J}an_R2-(qV^83IV0-d z;+?#jx<&^c$*p>7uXI*<4%bK4O|cQ}<^@ZcV?ud^g_)(LovW;vnVAnavGVlG2Vbd& z*sf#`wmtLYl&O{0*4mpl86_{_QFA%u5Temh#-{Qk;iwLCkZo_wJY{t$wrERKuPc9;acXy{_CdFq2!D(&HTy$Ib+9U@(n;8l`5mOJg$Q=J5 z(88dicgM_ai4e!Ep9z(M>lm~ZRxm_P6P&Fer`uy8!6Ol~;&)=Ndy1LWArTj`#DoLu zc&6`YU~qO}Xq5fW9R(&oaW{#*A{MM^=J3cY?hp6 zG($pi=7eAdB?ZAyek1W?&6$m?+SjJBuq2$l$ugNwu0ir+&yLiIk`cLyF5Z#_u_-Ft zJ2+?Tn!vhrspOwO5;?h&EvGP=K+6cY8okVSWyq+y8AWR{J~gm(A~s&f7N)XW%d z)i7VaF?1TEra@_`79UzU!ax@N|F5+6?7@>Kv+~vn2_`OFb*kTVy>h5t z&ju!DrN8{H^8{^oJd8?M?EE2+<;MlqDAm96?9#jfZ`LHa8SlHsuu973Xv9(p!`n(9 z%FMI0rLVqhd6sYJI$^c~&xREnHz)u2G4bKs=(f|-J*90}!BM89Sh!)cv9!dABgbY* zOM5##^EFt_z~<7(*q@+wMbKbNKym|roq>_j0f~sk?8a&n+V}iRK4MiT%(0B&5o1x) zfh`<&_ZzS&zp^-6AJlIC;d}f9Mq}gVZtlaUr}M<`Gq|(+_=o5G8|5HHypm#}hZh$M ztK{ult2S*kJh144{hqn3<%-*4`d+^H?-8tfg6je2Yx%HZg9G(FOB~|X2ruT|%%HHZ zC-K0vvcOA*%EuJg#ln-MxgCT1O;taBFxauf094q1QayaU-{r^D#>U1*Xg=F^_Tcg3 znfdEqzFMWK(&FJiVNaink~PQ1Su$Hy98@%CC`M#S1~H^1HY{g4^dyKoVVj>spx~?| zoqwwP)SEQ>)-*6`Dm`fO^5WUIkLTDio;PofRB=KAOslYy@OGmyz+}YKEO>J9dTqkY6*w zVPmM*TiV9&WOAHD}9EK^(}AyIIu zsKCI&(r}Lb`45j+8~@&f#L1!Q7CUxE>gqg^l@)DmRXn8-$h}cm?)!vS&73PYHLQ8i zbbh1ZfkWJuh7Y?u*erhkuW#F5|KG0WN8QCm#q4}{>c79U78Yh!SMT0qv%j@ z#zu}aHIux!n_NW>^7!TmHM;!YS-k(-yT6r>j;wd@|5pdAUCbGG^Kl@RkR$0lW zrnX4UuESG4L+Jxcsf`Tzf+=@u#~EU__Hu3TkJSgx07 zCcfcG>5@atsi)>OGVXlyvp-kyx%~{z|NsB~{{H9Z@ulA9&(AD=_V#xEeEa`@{z_Z% zR=>S9n_E0i#`f2ZyRnK|UUMf;?(FPjEKgBB#M19yxZ?Qywhc@!Dt~?-|M2$qe^%~& z@9zA3d~2)y`nbP;Ke1{ZJ$0(!>#O~u+7iM-Gm?^LE?KJSB|jsGheyM+MKJR`w@X)N z=fP854{sQ@WncfG3Tlrr{QrMwrbR{NO*XU;6yxl>Y7z&Y-JqFBN*p(m;~JqAK@ zDH*ks>u+%JYyA5D{{G9${pvmzo3gLJ{r-OcfA`*w^YiO#|NVJ=w0pgNTutr&f7L~s z%GP~vvF`nL zfBwm)ulo1p<@KG#>+Amg`TYO?{xv$Xd3j{yil>~Pa^AyPKM372OHKt0rfd0Bn@A@ zFo@aHVGQk=9g3Ut@#Dtm=+iT2coY;Q^aVd$6?4H{(!l6nYiZJdrK?giG?^`H4ffRi zHPz35ywLgk$0a-*KmPqazcu^)ww#^&OJ6tp&)=|ao0`Pmg47)h>r7hQxgYN?|8JJ} zXJ78^@3*)Af8ZtQu`j`*YKo%vYkN<2?F7E!hc6!`@pU*Zsi=^ssg?ZlRdPc6dT@W@ z|NletERG!cv3Bj#)Ktqt6^<`wl|S@p*PP(K*MGEZ%O(4sDIfW_J${^@{CM5tHYos?{9W)E_vB%U+)p| z<;S__GJD2#R~oc^9~iYvo_z4~WXD+9>TYPRRZ`PAlb)%mt$)5Rrtko}`h~K7$6xCN z9JefK$ZeKNi)vif^Z)<<`@6rFU0R|nZ!e_PVR+#4^ZVO!tABiY`h9(Tz2Z{KH4M6u z0+C%0m|kp}Z}+$Q%Zu;V*51FjN3k;SeLFwT<}|~Yyqh#J2)ljKwz4J_J%Df z0U<~1ZbO2=$zNv88lR}BMQ(1BWMp_Gc)6!U3SGJKL0;4M!7AT>hrS6Vd8*d`|7ThG z>EXk}{2p8Z96$d2{C;}+{eJoSkMCrQPB$k6ItD2nP}!JK^YzvHjmh@+_Wk|+R=Uuz zhs$qHf3U$RERfoVnyqVDbg|M|_$_w4-tYX12Y&1EuuVSo7P zX`bVK6PIz<&lXwB*%WFmF5Wy%Y}!f-sO6qAH8nY6Vvnq>YI=J98)%8V$z%!3VQgHHrSTus(;@YHyJ z@9*yF@89q57s++1`P%jHiD%Mm*%J(Fa}KC>pK-k=Bz8Em?T(zwyAT1ctL`J!O{iWY^uL~ytueO<+`T8hqt%qYlq+a|KlUO zdsf1Mi7o}A6L<_vXRz`xPtswk{r&Czr>FPr>w}*0vbDAIOTN3W?ymfAnZ~?hnl5`6 zD>I9WcY#9n|9>YHzPh?HX=zSV)3PTAoZglsh#$N*=|}fOsSiu0*2~-1DKBR4W~=-2 z{!1>;Ggv&QF`)Nw!$R)JIgFu(yJr6X|G)n4uk-KkPf_dhpD%H5pK7Fd)NJO+ zIS(2nSSqR)i_1Y{PlYcpk1sVlM3YVzjhE`Ifb!_ZK1yKzCN_W!`kEG9b-_?an1^e^bH z`Ty^?{89J0e}8>_e|h=+`u|B*65((D9iGj%;gu`fi%p*S+woCQ>pD=sQjI-P_%}+C~@=jaT-rL(b zbLN`0`y?DcyqBN%rf)0T&vtX(g)5KiOp}-})zH}ZQ779Y#&sw8CNUiR7<`7$ASGNE zG(*Dh|9_Jkr?vI$HER|uP>5RjVLs#YGm>7ROxA`HYdsE}`q!Ox3zW~9nAX_XOlf16 zmKHWQ=f0#Mafgd7CzUPj=_h_=--IwFo{A77qkqpIC#R$?b$aE{rx?U&nEYx-NoS`a z2d5$nq@CNO#wRBi#>&blv%KxUyEm6X;g?fPrCIZ4oMO$IvOrsiAQnOX5G`X0ROcl7Asy#B@fV?%$xqf3j=UKV$Y53iFF6E9qIJM#e2he=9G zNJvOc4GMa6@#@n<9F97IoQj>&YR64EzKQpW2tN)8d!}w|lhDxH>lrz-y|-5|+OmDW zs-szwgW0sXa~)kJ2=+lO>B3I3Hw9ND6JR;W~8kWI`HS?Efw|0~+vH!kCU)5@-{zIo|V6?S%4_9=#jhFRI4KYrRIFr7of zAWJ@GC40Zf9St_Nw*PP=FAz9Q0+5kc&1I``6Kc~#OAMH)8@iAZ+Jd^;<2#_ z&;u91VEYmi#e^R}eei>Q_a$*%*#%1_*7SDn-w`dH@wR8b#Ib*=DSy)Q^cF5T(ti1> zOT&Eo|EpFMbarkO3iNoeGF75uh06x+t`37$D-DA!&M9i0dho>MnWN1V29K`9q@)KA z97>=QJ55t$d^T;`^TADZTWIb7gz_1Gox3jXUm?m8mHCl{dwzQ z!wJo56I~u2o;!DVii!dRVWF9lA|R4-`N9K-b&emV$Y+>eQ_%f(FlzY&FZmbBFKlei zR905S#H3u)iHSM$@S)M7ML8?l7YMKMRZ#kIS|TRqOUjWGKjJ+)47IwN`}@z#YMnb* zQM1!AY?{qo4hDbWh;)^L-U&_&_V&%Qq_%v3*_oIa6#8`Xv^1A*T6f|`DlCg&>G zs96^;PW1KY=-iW*86fcWwa6dO#zw|kZ*F^g&}hh!zcBlfl0?LwiHW#O5&ocjXJRm? z%t5s@#l?vgTHGaZc|pib0{o01k53-(4L*(vfQDe+*a z{QtuboP-3FHf_AB`9t_@e9qDH@oa7NO-(v`_oi*zCiLO81qV}HoLF^rpt-rBr6nH^ zd$O+U2NjMOPex0Y4cAOLl2uhCY;7ZRb0Sz+Jw4=$ig-RhGwkRVUf;~cQ2AJRPe7xL zE4$<(NZUIpDJe-r_^}J0mz&!bGZwF45j&o)Id%!U0YTn^I-7p|ViH!L_4~&Ofy*-< zJxK`*ew>+i?ZV~9iD}1tuFc4lHNN}Dwd?)<9l`?4>sGONzLIA?T%Y!=e$}d;yLacb zwfVS+F;`Xb1TQz*v1^jl&M8bWqP*tjTh2m?n53k%v?)_l4|6!WI5lqA_}2Mnd*sW6 zCCn);p<=qcf76qGtzUoMzIo) zk_IfC%Qzp2E|_uV*thtwZ~rH$u9>WEoOkEP_h)DS$M0L9BcGCzA|RGF!>IM(`ToYq zmu51*Ver2#{C}5LVj`%^8WzR|b?L-Oj|4MSa=2&Td-SY8@I()`D~qJh{moBQ+>t;3a4YxwKDpZB@9(DT$JcG%Xz1y&Av@!P zg!E@^&BBcv4Ijk4?ot=qc+5nu#XdDbH{_q8;os*E6IZNA;NtjE|L4bkZt;Kib!R^O zwPv=qZeFC++|1m(T6^m3`5Ft)bl9@~`QNXf>QKVTZOp-8U|?(vb?n2($>KtXd?ZhN z?BzC0Ft`xJ92Ir&QAmy}2Ul}*^T9(-7Y;c+xDW0_{{8-5URqjNSeW_P*Q-a4*dDyp z`a`6_T`bVWF)BYQiYF?nr~d!H_B->A9CPcJV$oJ8Qd~Ac094Qa|7QrbE-^j*>9c?Q z-U2;5JSWNp3>9M?T4t(UaFH;$#GV#)utfISA?8C4>o_E4%=ou?v+*IGgQDwMxY?el zsTgKHm0wl;LF0fjcjk&k>n|)ZR;W{!)z;p^2AQ2rN=#2rOlg@enK6}vxk$;uOM{1p z=YQc8c^(g*HL1tyb9(vT9A)ckV$|i6@X%M`n8xBF<*}o%Yl;jz&x7?|8=CglOaA+x zj&P@2ZovV;WgP3QSSO!tFt7dNTx@giQ}c`?jvML@PFCw!6Yt?=m7&HXp`aY;aNz!a z(9keUbF#BtAA8XpgFj-0Z%#68U6v8 z{r~zCKkIo`aI#igixjRGVrCA~X4!nR(Y`PtRpGd;(PMd`X2x=nBrYQXFV!O=LPwVH zpZRwD0CNm0vt#Up<^?dr5*o~%Bn_nIu~=*JNaV2X$!uZ_Wh`n+I1pB6kl-T0oRwnm zpu;9+<7o%S#zt+`#AW@Cj0L=<0xT1fnqlLWNr|bcEmxKQPvomo-Zc4;L)aq+wiz#( zXC9el*Wq~NOHzksg@+WMEF5;+V2ue6?GOG<)^u$sP?i z*Z=%Cm(gejq)U<1cy>*azrc;EwE_(L=kzeDo$HWtP*LSuqsl&~TSk?qV)9XYm9Iy> z2S{u<`AC2A1Fi?R*u@)MER7B=Wf1=Jhf`J&TCO*){e5V@k;H{l{5i%Oc~`%wV_e+^E>}09p+o^ zsv!#v4xRgz-f}mcVSk<^Uq}9gMN@eT1lPE7{JV0%!R2>za7VjB1wl`V+8BGmS6(WRLKALi0GihvhG?;22 zaJT>HERF(I*)0#JE#xg&I`#eig`Lp0;?M8z-~azV!P@ZO-~asYuQ@$C^3l|_bG282 z6_2!dcGN*1F&>LW0`(gm1G*Yt#GMJyGU#Cc@a5xQ>n+3-*da%>|9bE%*PQF!x72Pe-d2W{S!al-&{A+>4nsrAHA6+GvC$;zB-`_m8 zwmi%I3Iu6VnO1w|YEfXZGPWdThT+ zfq7NZ4(osUcYf6U`%^9>!^6gwGiQ#E!PFnx;x?&l%$>R1O|LfYo@1Br*sbl9SKAB; zHgE4;w|>8qJ$$&G=l(uJ0|Nu7Un)O8d;k0Ue-Y7#-6uGCI+Y9s4lmxo##MABX>E+a zo$dMm|NZ{{|N8p>lO~<{|Np=9BqwiSo)sTkv}+T#DRyK!Jn#}=zQ5udjx2Wl=7HPGOz`Ty_x&(HUd9-Vpg$P5ch!^h0o&CD~qZ~7g{nJIMnpF?&iPtB%; zgoGde=Q}zyIEGGrcviYi+T6oKrb3AEp9pjM{+dpU7ebIc{p;J?|I7VPSl@Wy_^rS3 z;s5{t1wMkjaMM7rNz`nX6wB}b|M&m@_jhl}%ijl^?=M*zIcKh=;kP@B-j@IBS8P39 zW^;SRhF!cr3k07$N^f9PR&HLR(!5gha4R>@^>v0df4vTgXnRiHn6dK7|N6Fm`Guui z%*@cF|Lf=H`}6JVANmGLG%aKEn2_kp>k_lP$#`nazCVlCzn8VPPWIvHZ($K$^+vvj zrE#X(2D5}tPxcH={>_#~5e+*mn)li^uh4Lgmp{BU+a*)6=)92Jhv)o*5!3RbASE2wKZN(PiEo5 zn3iAGJ1()z>z$S%_3&c9_s0a5o*o`YN1nDe9{qTuFJA)pQ3p3^Yi-3%8l2)MC{i6Z+PQ>fa`}6w+UDI z4G)|?ec<%z58reTPuJ&}ZEn1$>ga?C2S8(fYHf840yjfBi?k~L&3SlPy+sEc##iR< z=4~+GagpCIxQY4jMuFWI3>cIzED+hE$zgIs#DhhME9y>U*~@2h{dWn?c)nMICBWAH z)1=htua0uAHvg$DWZ<;AaLwATgtzxfqi_F8Qh;W=D}qevz>BGrlvff zuG&xU(Y5c_M(T6o&E9V z^!mi`$Yfc*gZ6t4Ht0@GaCMgc^SgaVUYu*IqgU6%m%(l8^x7UloHJK)%NB{2 zmV$dyJ}I-AmA*dV1eH%GwyQw%QDQ(|HsPrkozzKl%Iu3b9Ywyk-8@9*txyth1)l9DD&PyhA#xjZ+w zvYS;x!^T2U2_9v|NQVRS?Rid5bG*4`4UeiSPyIgwpLu6MOa1Qb>Nsc?SZA+{3> zIa!VRxmQ>HtN-!g|JT>`BQcyjrpmjdTAwo74Z#v-|t|$4AiYVqu}0 zt?jfXCO&ufFjdtwKEAe{#q|oefBpJuucyb>^TH)LA-CN~VXL}?LDtMf)_;GKK?_=s za%-5|%z60Ko2|5zEi{x(Qx9#t5Nj~|s9Yw7VZyLAV-YyvtDaj|7^5A*;iPOms z9xASt|NW5p@NDxJ<)@)#Kw?5dLQ2YypP%0!@AqM1>gmxi*7<)?Y=Tp7(>5~+y_g@y zrLX#%+3V|nAN%n6`TzU-B`Yc=b#)?QVk2#BBBoBAX=y2`kl8VL@;E>% zTO4E9Jdsg(?E^2>!`oIku+{%Js`+!`!#mk2r;oK+Y??QF_Q5Mt4qlnEa7D+%qug!E z?hHqxl|AYk`^2!Vr zE$jckLiB9Cn;U4_jg!N`!fMl}&&D1e1}|T1kdQRg)-pOM!`^#Q-u|C`^|$Z8N?pv7bk@&t^xb@oRgA7YzKYy;!RPEP_ zRge6p#kFzTs!7WXQ#Fba(z7=5=InNsFU{Fb*)L&T;Kgyh{8eeH zM3Bz=R!5fP0>0;U6JIJ%V|_0#+}B|aF6o-)`F(wP`G5ZXe}8`-K5(jQ&#QLEpbCYx zQh)eoA52e3Nf8iA+OjdRX{(Qf%cI@LnD?Ir;sa1 ze});^#`ku&+h}$zE6?b!3}!VjG`zF(@P|*TQ?xZt9yr>nnwr4Z_*m1pKvZ{ozH!Wc zzPn7FJW#vT`t;)^&dvLKzM&*qy5<{8=`UVYvB2;O1$99e8Dpb=yK8^1U!kFFllbYA z(Vw4(51jiaUq0(c{}G8uRyG;X!q#6VB^Q{2$}=((!#xY6Soweapa1Z8d0YBbNe@(_;4O{46i0vYRYhM1z?&=j1N55;<`C zvfdoqGZgApRUi;@n!84vCI$JLNvv1=lD*Y_J@`JNKaQXI^Zw=K`ZH&EK18dr zuu9t1{`&v?e0|up`W31yM%6COprwy@5_eRKpL+T@dBW7AGmRuBJk#sQY+1B)Dm%Z# zxp_zDfo3om{{Me7SC{XA0#ECHmfnZ|o;Nx_;N+Xou}nBQ=jMq?KW)PUzhAok<-3Zz zW{ZIu8{hfH=-|3Ex$vElwGyELf~9>DrD73TOUru#1#2%nVt2@{@b@~rMeO;#bF$|r ze8@bUdVR{A>$8}0z^O6GDU54ALkqK;+l!wUJx;R=p5?xwajkyIC05;i{|${m6RY~4 zY>)Hs@Vt5R|Nh?J|9^ka*mQuY?r+kL>gf*;v9_G}{+=f+EJDrqL%IKkRW88{k`9O8 z9C3ogj8nvq@A`-T{^t4qZlOePqOcd2L#!i*ZpejOllIsB{r&&H{im59=FH*gmpA6% z{-2&E=PMj>;Qf6b^L!(lx*mZ>pOp{tD;`a3y6!$h^V!$zw&Ld(+|NFQnyu1jUoY|Q zPGZIXc{8fGs}&D6yxkYc*Y|(E-QWNJWG8<3|0nUmv$TpY9S^VT3$bOn@m2hv_wYmF zk$((!c5JMyY<_-Bp=^e^K_(NyNi_AxAIaTfY;2(JAH)CuPMWLs)h6$#lfCGa=AhfR zA*xfYp|kGKkMoBP9XQdk>EFla{%z&&CF=e!d#!gtk#S#LDrhbFpUNX1X2!T^&W(o62M@vq_nkatKr@p!8229PWYny6IHkH}S6kzbXZ%Yhe|pI5 z^yGU!XvuR$g+$#S2l+Gd4DhG|M2#Fo~9;__Zv4Dcz8Si&TL*4 z1*#oynI?sQxWwepW`1stH&vZS*n%~Dh{5EH{wq`arcMcMAE;D0(BBZw~wx3B-bZ>>A z;iR)CPbw%kr&=1SIe0rnukFK252Ppf?ilOyZK0%`RE zmhg=Qk_Q=|oJ-U;J0!~D`myQ9toHwV`F{*1r3<@IGJ#wIQASW$OPKfL8y%muv3e zxT5fTp8kEx8z1?zRxAJipZ??5)Q3knzl6@Yc8%xJ zBcA&Ii}V%UvK|_A((K81$U@A6yaUxVOXn z!c9jJ**SMuN(;m@Wku%ys<3ZXt@ir0)bm!`y@v*Ei3>6(6s=#|f1@z0*x^I}@#t$$ zb9}RpO{`qDj+4oTKlxjHkK7&mWt_K<6?zDLnzL{DYmF}N`!Cgv@|-oNDD_PgQtVJV zRIuB+CSl&}(CMu@i*;763fFqIi=**3+M&Dsk3r;rh!K=e943{I>I)EGXXR%-qj)$bsir{jEiJ8e4Z+ z+}zhRCCU2NqJ7;89_>xt5nju^mM_-Y)~(QSnDd{tBTwX})|gLEPnWkn+Y$3_`L<^# z=eM7{ef#qf*B+K{Kd<}Fo+8t)BUT}ITJ^D^qeJ8)3!ZivK8{9#ExJw#x99kAIIeFG z`tqBH?TO{IqZuxf7Kj9Gy1g+j_3;pTyO&;0NImoXOq!3{4!uMV zna%ef|7N)U+L`ZwiLTX^r>nGPwV9-DeNyDSp%=6i-z-{CUv?cJ#xZwH31&?@wql{8C+Z_RZHw(Zihd0iV53+&9l>B;Jk#q3}|&h_VN3J^F}KSj4l@spB);6>NB zR*m0ZyCrH{H{3gLqE)X+?0nP?gQ~4B93{jq<+LZ79X|8@to$;2i(Au7mTE+-y?3r^ zB^Ps4;%3Qt*AsmgPdwK4-MX;nal-AK?Hr9k9-WT^u1`_zW(^7wsgMgQvM`umyt~jU zQKIMgT<*31C$*$L{LOK&q$_BaX~@-p&(~+A&)a-oxNU~oY~H{lj~icaiYwkL#PTcq zR?*3i9RgESeHZuhpG%+Aa(ZcSu$u6q7uf}W+UDDDi9D9~-7@a4nm@RDpEGMecT<4C zar>iz<9=1$=6FIj7l6?RvRG`dyoT`Th5Wdxh+L zZ{|g=7ktDm`F7(+$(J&6QJW(k?9vEzJtcJcNtNpPLYYY|FGY?&EK0OVGjZ~2Ik4%* zTM3>EyZ$Y+nXC2u=bv^r4#&U+rk^bx52PI~I+=4UCFt!w9cQmgQ~vH-q_gerN2lu> z%)i|CjS_2Tp4XncSS0bp+qU<{GLur*gv~i$As6(iVu8qU8$U{AN_b8_4hVpE5VvshFmQM0(S1pZ_CreA~tES4<<3) zdE#|}414W)oB31EADvb=op+&*=cMf8%JUrF@%kBmSD$oOvN`C9rI#Q}ilnP!=t9%m zZH6t1flp?*cmMf6cbN(AWtZ!QJUZO3Pgzto9kmX7=yLvO^o{fa{x@5ft?PW;zWCm; zL)vn`{cc-XFP<(BPC19njsASoJ$z*)&$n+J*&F&Q4GbQvE>UoBX;D;Ycz8}$>$~iP z+;s3_hzXNGiz0sfnR)Qiq=P3q9R2zldwU&2rZ_q?-%@X}kh_$7j9KBXYDZOUESs%u z+jMn78c$j?i+jH)TxBLa|>2?`|}q4c)Tj(!lIWi0^2sHd!BBD zRQGe{_;NV}h_vpXGCzndi0xR3=hL&f!r+bsGX_00v3zV=&9t#st4z-b%{&Hcm6 zz~QuJrXp8sfPgE%8mn*Ayn|{pSZ-@Ko!)pmw$IyuJFHMtX-A``ln#I3p9kMRr!zAj zo_lz%HNXAR)Pe(EP&z}c zwD+|V%jK5b;@x{!`nElq6zr!i$KTXoqkh`X=2Pb3d)iUEuRmS*erL>zRrB9}WdgbW z%*>5^&H*BA_RZEEYDzqMYWfLARSuKOKUk@`O%7FA*UFg5e#xwNS?2P8>kIbHvA=IQ z|KSCbNi8<&U2?(o;o8d!_9!e833a`HdD|N8>D;mHuZ7ya*akha=uzI+pJ{adhFROe zO$tXC59S^EKkfD9$^IP6I+rPOXn0Ray1gP>r+Zzp1YiF5J&)I|dVRz&vuld_g8z5a zez%=A>UzZOx$`-=VpPhqYnVP=QM-HLf`-Oc*6K|Mrg{}jmH$xC(j#hQ>|9+kA;)F= zSK$(d3*HYSCG6@zgFTRq-E47jZRYtBEG!cD_NNO7H1xf25DQLXvbo4Vv30t@iI|S7 z|Nk4mxp(%$%}%5F?lzE2prj>dSD$ucL*v1lGY=kM*t?aHpXpDeWbq+ZrUyG&jI>WX z9G`0Xg>ix$FEjI&WddStU8heN7#KXb-NnQh5zHLL|IPm0xwqZopdr~idpw^RK*pJr zww3Iwot=1yrS1IujbD;CRxxj8G_7D*rd)WEPv<}hXWFI%m%aY}o16I9txeY2Bd`4s zRG(SlpWnwf-YEbsQVk8 znc;XS!4ldeRazMP{W1IDGc$SE`2?N@__VC)-o7gOtpVX6WmU+%7vXc z*t*)j$u!d8!eV!xzkhlB=LbEJfCk6fw*Ma*7jB>K7$;|VuYpbBbliml|NmbQ)81r0 zVd_Kqs^ccS#hsi{a}K_BDprva-~6#Z_uSqN;~sGa~A7jd5nyBSXp^wWCFq$rRs0_^w|Dh z&_nxed%RT+EESKG>Ho>H_FsQgOG0S>)+tFYvFrP#%D=Rx?MlLP}OO_a%IQ3{vyu=1m(+d(D(=S@oPMBiBVbwNc60>ok z!2>@ro*R=mqE6k|)|>b$q^+LDb$mh|U^T#)Q%PA)Q=`)tGG547?dLFts-=634a^p2oM+<(Ntqo#>P2?>~ zdz6rnkdT}_VM2n2ZfZecqJWs%oi_(gIau^HePdTs*teP~yx&{$+LZ$iE4CbXC3l8R zlX)9gD`QxKfq}uEy0aJVo!d}r-B?_y0b0fi@?z1W1_nk(w)ptA6CP~L%o&1C8=fBQ zFtl|{IdwqzR|MlM-4up1*Yy4!IoPoMi9$rng0s4BZXPzMwK{xq@)rw3$by4Ko*;KU zc#t3W_Zz*rcXmSpW`@b>`De=t* z8s@CuQl@+O;^L0Ik4xB~Cd5B#U{F+KmC6)q4}|1jm_1IEKIFN4bJH}}qdc*tB@c5xys-pU^}F!=eg2@0|? zGqZ7Xvx$nbm6oz`a=I)QeQ2@gDCZ`Nd3jm~Z!z*KEY5D8Q^UE&M27$WrDhyqWIS_*XU`sgYM%{RXW6|0Y-boAdCVZQJ|%lI!A9E$YA=^L(~XPf}t)uKxdDN~DvS*P&1E=@Zbj zLqIra-d{i{>B6O?3zwSaO+9?<7-;hI`0)eB+B$ykuNSoL*uXT&&4uHd?+zXpZA-hl zlpmj1Uu~FeF6lQv4K%Ov*c~+7XIGm98qs+jF4b2b%mK0L%pF6nRs#Xoi>p_-Fm+Bi zr1jumdCJn>*47&=E+z?v%>Tvp4s35VKNC{C&qC(Eq1*3uT22ereVixpvij+bA~m)B^ec|IA%Ot_B01)`#^Ty=(9NpTH*@ z&LkXs*N@8-yi-cv48HIzb1Q?581HaeG=z?Zbh2`+w{b{w{qD(`q!67 zZPTsW>gv%sB|tUZYvGgt?O2PoQ%h4NW~dz#G|ozmyzza4_?bn%$5?muzZcGb3eH$k zlRBF#4jf~9tF<_HrBj-*0`C@wE&+}tmszGQQ?%{t3~K)K$u4dYF`kgD`R||Xk9n-R fkPyUvQWP^oPUPjdAN#Ie1F7bP0l+XkKdlJbi literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png new file mode 100644 index 0000000000000000000000000000000000000000..37e8ffae114625d0cc6a07ab2b8dbbb7413a3829 GIT binary patch literal 18884 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3?DlkV45^rtlpw*nn1P*v zfq~)we<{|*4C?rpNBR#6GAQ!2#4P?6zh3S1-gT_08Vp~o9>@jF-t4Zi$UURypSEXE zwD*B~HyX|>UY@yb(J8Y~UEzw_qHcbW1xL;s3NZ-sI9!xSlk{9({(o|&XbaQHdrjp{ z|L?wTsP2e4HzTOyqjO0|--)VxX9fmSwhjlEXltcaRWr`dsqVS^JMmueyQpx@?#Q_H zF`t-0M&7w%&DCHaz zCVu$*#<@W8M(NsRC%@RvNshf7#gxV4$tt4Yuz-Q(i?iU_$ocN)H^13^OyhP~(wXCm znkQ?Yzn(PZ-^`QTV9PFTcT#_;FDB z&0l`y3YN7Z(yL>GSyh*K3mSd(G2V0kaY0{pt;A+#4u-t9YO2OT{AC$PEsVb?{Ib@2rM~A;p&*?)>E`ELiv9TVWouLsw z?1hC*_DM=kTr6Y!>*Es9Gg@qW|2U-nZ+-puzx=$61xoe*{#5_@ale(jKL75o-=CiT zH%|Yzzv}3R&+3P#>odzXEdkj8O2bl}&66ibM$8EJ=(v5z$tN__$=^T3ySsP(fBP&Y zhrHNFD}4nn_RN~}Z=tAAh469vC1yG*g$qoQA9nr!`}_N+r~7Mv|BK!G>;KQIMO&v=K`0|8&7uKg$uNtRrsDbJ^hy3wl-Qa?{3nL z>gf+3I=8*QFIo35MdHk2HK;vHI-6(CoY>wT>(LP%AAjab$jy_Ue!jgkdwJ8d9K;;8 zjQ{=r|DS&=hogydg61>(ET8}B2LdY;#6@phI$mG#uyNLse@>U!l^@ri_;B<4dmeT^ zgEu!07JSz`ytkTXzP<6EU#1?DIUrG90=BBTIW%U@@jEg1j=9~v8R-->Nl<8dPY=(D ztL%MWK8h=`_Ip=6ayb3#Llb-UdX}~3-Alxc1ez8Jntom2t++k$|NrzK|K~rvZ{PO+ zzocGlQpDD-hmTy_+W95z>Qa7uPQRck2=$mtXLD!g#MV|%pPt*7f^MGnK7P#2#i60I zw^wlS#!d};11Yf&iaKYesZCR2+$`j{I>sPbHoCLv+N~e$cVb*~SN}^+XjsXm-OF=F!EmJ2??(BSN zzoz9tQ0juO-XFI)eC$X_UtAh#D3G?XaqF%|^}hWEg)Wa8ChV^_uK9K3!w1E~*Vgh> ze>bwJK4wti<_f=BIDEhv!=J+}>{d=HA&4pHvU` z%QH(V3qbAoaU?N8;ql|-7cVrHuQ{WyaNy9PLk&^Q3gPk*ml?$z&#Yp}WMW;)cj&6p z!4;YTDTcq)Ui1cEDQQ+aeV}2Yg~F5b^LftCGmP1HW<#Cr;hn`i=jZcBEOCGuej+kb zRh7}(JGZ``H#XLglantjj4eWukRP zSDbqA!v2hj8e3bvM^gruf?@j_9?1iu&LKud1_nwy1%f3X|Nr09(_?X4_rY|{xtMh+^ z^g@ZB5`kt9^5Pb}JgSpiFx57~?bro3qYbN$ygA`ub=AQ1hBqGvr=z^R(HxtT1wRZ9 ze|yW*FVF8I!w5;y4M!HP(n`o%CoZ!lHAO{R>Clk_2O6%ta>;8<>dmRx%v_z#yj8W) zQmAX`y94vXF1R@vzUDjQ(yUzg$Mq5`>xq+1jGCzjZf?&roMU--Lx$kt$H$oiS|HBe zGj$_xg8>iA{SHO}W)~5U4USUJPHYH?RLlM#a!hT>B8MGCap$aqmh9iZQRwlMpgY?t zW83#FPtBPuY;e(ew=zHT*|p5`E}4qFb6YY~uwmB!s}1qrcZCF9R94}Yy3S^bjp zd>u$IEOLL8kdkuYib!#>@u#m>jg2NHrKdMc74F&a?$K-D#e1s3Xec>xq@G>{ofEL`x*R4LGxi)z;L3 z!9`%_pM->j3l|e7OidLKOS`e9^Wi1cHgWwIhGA^rNM2MqyScUXYqxO48u`HaUGh~4U7jp}Au_Eg`j@XIvKoE)ypN>45prgTp@9(Pgw z-UL}@dD(*!PaK{tGBNE?1QlYxr$4-)*miffVOWj#KU#2LhWkYA%!WCPoABqzhcIs9K)6Az%%dM zh8Cf)22;lcjE0773l!Mo@KN~{ ztpd!-;^S`vjsUp8CyB`96og5zySt-ffY+x!;UAMe8UpVX_SzhTu_$w;6cKJ zXK5B?JrA#HYncZ^im63uAX_h8n#9AsnUBLDEJ(;@-Ia&?oV4U76x(}!Is944&9+Hs zM~{Z%S9wnsUDgAqMW6iYV6@EIIF*;j!&Q)Zl@}Y^lohUWPYxt2d^2ejdwL=!MwM%m zg|TsSFE6vUHnX(!;f2mT`f&lqwa}Cl1q$PkU}0|l?RZNJD5t*u@ik2YgsEv;?^(RBPY3WgehubCo?! zC^6x{bgv1nGiFHW=tx9FNJvP6tKy%NvR*Pna*ZD-Elrpz%E!H#i^pW*6 zHeAf!aee~(zJJC>|9=0U&uMn4A*gvlU%OIzA={dnLYqqz*KJB@xtbAib-{9{C5L#z z-=(*d2PNnxu~mF(HBarx?|P&eexYHbVaUOEcX{&f2Q9A!b$R~(U*z{lK!D6c=V3?`0yM%#`FK5Va@+@8`Akp%}zi= z)#=fL=g+w~4g0$`1%!w+sj^m_N;srp`R}lr@4v&RIN5r7_)gB{@!(|5?m9TD!65ip zJ)4du^K`v4M$FSi$|7YrMY`iZta+=j?7#t$l%ymB)6^fI+aKOGZ*y*6v0)kyBp^)+ z3yL@Y{3WHL60v#(hp{23R8?HcllEjvW6Z(>&wT^98Qf;L{Wzk@?4v)$Ex7H8fLqSQ zcD3&1@m-Nx1_>FCuZ0tJTLN}?eC1GHXU4uKAtB+wrPQNQM;~Z{;_u8EiGB6yKfZQ9 z{8898-+m)2+eN7FGzvFt-n?UXwz}%g%*;+>W8*_ok~}L?*^0I@v1d%>`1FIFxu-j9 zt8&}sk0ItWwh06^toA!N`J4pDQl2MMy6#b0gFuPBr{sh|4ihI=jhe5#JnTABWUr z*{vb%_mPX2&XtC zD>F_>VPXDXc-;1y#Gz9MSPfD)u1Y_$mZ4j;hoj+x<{TF>#m>evQzs^Ja3vX-rza#I zerJ#Ha78Z$?7KyIz;sbYCm^%*oKACdx1V{dgB(V(w4G~Wobt4mc>tZsS5}rI{ zbkTAS?y*l@urV;p$1P&A>^!xCrJ`%N46ZulCZ&Mt@e2tZy{QJKsR>Hj^B}%Za-TeP zs*h9C-bG6GZRQ56*($UWorKz#b5^|h=l)HU?LfR*(llO`Om#QLy3Z4rN)#4PWWD%F z_*=yBP?ZE<`5Dg)jEoEn4J9NESy&BMteW)5RRZdA^+FdnD{0Bj%^Uu-n+h2g8z^0y zxL98E(Zi~Xd)igsbTD@ab(HLV^72qrPQp}I=ZKI;o?)^1*^%4}?W z(vAg^dUOj73=NMRjr{#%MNzTw!C0QJO3KW8vKl%M-~7N@!%^t-RzTQ5NNirl;Wy0- zHhwhdU`bI4-l1o+lCe5UF{0c+V4|JDSwZ1s53eKxbM>3jP)F((&Y0n`Z0XJJjtvLB zxt}cLxDskpF!BG2Zu5m4^BN>1FMeC~fbqZKBFXSy3LGc26CN;ZzTuGfFiBOiV2P-V zn85Wb4U>5!JXvmPh>Eto$Y67IRnlz-)vgTx|0^jL78Gn?VYkl8{%l~lkk?YdOh93) z_C#}qgO}M;x&;?aIB@ZhgEz~LS2`BrlmE&qe+`%+rN9t=MJVUR15dGtQ~Vap_1se$ z8RvGdv$ohb4{d#q~wpEl3H34 zTef(-*a#LnbDkU%!0%`h5KSdVc-lsj0De<8I6s{VT!2 zi*bh1j&JvW{W4k;bMnI@)`y(X_Jq=%%|>R^-oE)_WGtMTl$4f~l#-B;mXeZ`kkF)i zy}@9{18qx%O*}jr#+nAx6`$vPIop%*;^yZ5-R1YAHavK9aWQ}W|6`k8-8IgD;?ZeO^l>ET6Xrfbkd5K#2tt19ujg1J25BcLT~G)O z68iXM%Jav`t^JXP3${sbkX8P}|FUG>{t3EAC8B2Z)n$3-k z55K>!H_zKM+xq*dPT}+aoUZF6EaPYEi8wwr#vn9F^dL)yax72%f1{k+-)e7eTF)={ z=ivTnj~+U=ZQ4{Hz3q>B=1H-6a$MJrX)Y_%(`#Fy!6qokBnAnwL(?sG?1{Ya-n{wo22^7H|9@z@#gZi~GBQhATW=;Dj?@V3Hfw9_ z+%R`x!)meg27`Zpe^);~cKX}5zwhr0Uw!`jd;i1R+waddzrVlq_2J*&?F(J(|9Q3a zd|_nNn5s6X=HsLHKR@sP_Wu9Bui6{GUitl<=k8se>H5OE=L^FtvfT!l6lSb=>-{0?FTer97$A{WS z;d+06e*XXO@A>2X`zt;?5Z?BQhi}6)Rh}Cnpqi#&n!LE)ADiN5&mSLG(K-A3|NMt9 z0^6o)OWxbB9>)F8h&fnh^M{1wWCPRGfS{y=gLmIRYPmx*Em~T3=<2Gfs9G92%d9=f znD1|3WOQIve|^O9bB2>xy*fYr|G)om`}?(##Xq0>UlOx5GBkX5>24*Do#=6%q+g8PmiC*H(U%FQ0Wl$L2wmVD7~D{dM_w zD&Ahxoy33k_s`GoH>cmvyZ`UYtE=X0Y-~YNG8HM3212~c+)7sNF0a48_xJ22Nz8oj z?j}dy#0PcOKU4Yi^_x36n>QHv9fl^sc@{f%Mt=S3`t+%yZN!s_+GjMwToyU* z-ouo?v2khC&Vq+G8=1e?{Zo0qMoLWVn6mr4`X3*J%M!S*O=+|*Jisa$@K|oOVvmHI z;h+EKKm7Q3pIf|c;&!$7-}BpSt0m6OQFP=!Ske%x8q?C*=@>QZ;DLt5X6D^Lp>h6> zpOG;yIXSSfP;7}nqnpZ`D(NS?*i>Z`=BV+#U!|~kzl7(L`{(AG%iAdIy5`Dyf8Srr z@^@#iuiwAF_IK}<6RvDWrU?I-t-OQ5M6&kJkLPc1@1L$Waev^G-{oyzUwdA5+VHb; zKew{t!Gy%d&1rBCySVNQ4Q;(~GjI}X7I(8lKC5QupQ9eyCs^5>Qm(JNy@ZuV=*Q2` z_ut?DKhOUEw};G2+&i>2A82tFnEDzlHQ;URvakR5{p)LcdAmQ0p9oBKZkO0stL)3Y zPEFmgH%w2DO;wf6(D02Kq|iAu$zsQjh_7E;MMZaBh!S0KHfT-nsd|sOaWK`6n z>|$d5Z?4_nCy(8`%KrWP`}_U3xBdI;{~w*m^zUI`_#aU&y~r&gM?bv0yq}%FZu5o- za)BNX4zsrfFPAv6dz*y+f7bqvB`VErY~L;#K&_c%QBisG>Xnv=$cwA^JiB7c8<{eN=u zevw~~{@1rjo1eI;(0_A-!{np4)KU`?64KKrOg(D16B2|@{xWOU`0U!%q@^`SLXvND zl;g_02boNDhJqhoyuWXMeqODLrA@%W_3{7r)&D5ef_^bmA|JQ`EYU4t=XveKM}jqb-;jQPeQwZmZI~WPyfT+j6_tHIC{ORDC(AZ^zwlg-^4kaXLa7a zDRB7hv1{}Fs}yt07v?+PUOt7%tL56e$yGjr%7*c);;WjQ4~O<&&+G{B`jMH+c%^H~ zxtIIYbf!hRu$CjFA!3nKK{!*rV1mo{ihl#eSoA_Kry$`{H_*E>&q|H8MBv zo+BfEYlq#c$AMgyIV)N8jy4GXopG|2(etJ~TigGb7@k$DbVT@?3=Ir2G7~%1l^C!6 z)B1O?lp`TAQ9!^UoD*V^r;LvekDJ>CHufAr@u(Sx^v}1N+ieIFYYAqKda`o$=hLS@ zPn1;Q0f9Sr&RALf$;s)F&}86u3O?r062}&!#KhEd>(-i#^u3e0SXd-{d`{T-G4ow` z#kYnx^hu-@$A#r-v(7a;{impzJP}^|NuB?8QkZ;+H&a_-_!zc5N2x&aR*N z>FU0o9R|k6uU>xq^x?rx-Nvb>8v++OFf21VQ_`HYY=y?+Ra&3l-@mb_DPzIPlM7aQ zb~658xOP>pV8Pn63aYJ2ka%+Plwo4J^XO5AVrxvQ!}=GhI~)X}zx;R3t^echtM^}0 z;$Lb?#^krTa&lsVf@YPKesOVPIak@2aa@y{oWRJ;cw=g3hk=%cL3fwY!}uAU@-A*h z9$u5y%W5CE{~)13RPNNN7mpL5dDm0s&z~3{pAL(|Wv#|Vfgk1F*E&9#!ewP-EWCN! zG==m3gMwu2?1avo;gOhr_pv+A5m95Fif&$w=7}HoPY_6UVPTh-pJ~c%Z|_`XRT;IB z`O)mco*tf0&-QF$j%87C*1|Nj@gR#*Q!s3qva_2d74{a3DK^^y~+G?w%fEclxL|M1Rl z--IF~qwd^U!n$^kO}a}<&khS|c6Mbq#rCk;gk@}No-$5kBT(fpo!&3;{t`*9URKS!d?}MU&9nSpV=R)`5WJ1(YNY}zMHDkf@6Y0Nx3;DCr$(? ztFeFhv}wjn&TU&Vx;i&1`tDRzKJ)0IeBX>?k&yzEQXRz7lYg&Yb?VBcLn{{__IVO9 zZL#v<7EYd9Pt1=OGX1$LZ1Um1ee)KRQ^)e4$w!5ck58y=rB61?~c{}@q`3sM-&X_4_VG&>giG?J`Ny$y6p$p6#55H?l zNN|w(#lO>@tu8k)iywy@YsR!0cIEGFDGY9vzwe_V)bv zou58FJ|2JOOwXiAGHPl}=gujyvDNVW;N$$oZY;J|=U}MK1IHOl1)0s911_-}f3AL$mi0&!_H1)`ET8r0NB=^GnJ(o#KBwz%WGPf^YUnmDC@`?F;IOvf z`1|+w`{Vul|Nr~@_(_UWZk$0vw@u8%F7=5*qXC5AvUg>-qLs-Rmu6cFf6BrvCq-&8(o{aC1vrqFpO!o^XJRzaq17 zPTquWE)z4q{R@mHC^H;PO-pN-)8p-O?pTkONg>;wPUe2jAD4Nrg>i3rk(8*oA$#Vv zh8ZHw+k_y)&`v6S|Nl?)?)&et@}YdnTJcx09RDY3$+G3V-giNWi@ER1NBOCL<)xb? zTvAem52rb->XGQU)~BB;&?hFvye9jA!O{ytpcO~||2wJl)&KwZ_vh#S#)tp^|NpOG z{#0SAz>HMpTS9?bmNe`LWbI6GNLp<9&Hjvx)-wJrE0PXOy!&v|k>zr0ns#taYV?=n zxnZRr!98idePCZBERXc_%qpm9urBQ1;VSJ2>4pC%{!;r;_QL*#(wF}$ zx+ZoDSUC8}|2OOsNMTDmmEh1T*TJz)s`12?hLx=KcU#nVJ_xdzz;Hf^FNPsE(HK%+ zg3OXy^5LEKmPoEn?wK@i&26Lk4^ui5V?dbm({f& zLd+v3GtGMAz;q&j{oj;E&EL=Yy|%(r{*V8!#ZxpmFFi;|O8LW`J9}oLO;IJ<9rv{kul}Kb|an2A5fww|!#vKXo8}&*Z2%38Lu^ zz11-%lNy5ASOk0|kC=EsQoGa2u)6>My06SjXt*ld16o8-%2hP0AzW`l8hcdhp@xI0 z4x-0PWQ&9j`l$X7Jo@ptwq$3UilEKXa|f6|1s(hUpJ#vFLYP_JG7~Z*ez;!r@mL(! zvw=^`h`BhYnbWkPU@5;&Uh~8mvQcML4mDilI(YTjfyq@H%seCmxS84Zq@GE5Gh;!h zhDxa3{`$pJWT3wDl(DZ@nl5v~dC>>!Cdmu0M3z{JdpvB6`CuThD(s)KB*$~31w|Xx z_=3Cz*Cg}r*z&-im&c;3_aN(sImNv@6NTqB?5lU52x>U~|L^22GeNWT*#G~6$8R~i zIsN~ie!xnE&0F$>^(sjNFA<-Hn9U1X-_+-*tYc|ZH#HEL9@8P1av*RHPtE$4kcQU_ zN;rOeWL7PNx=i14LYw=-r|imG!hZHo(<$I-H#l&EN22M+ot{$;&xC^OCQME&k&VGGk^&Jsvo#+hQ4 zR&r=3&x)Tj_9QhYOf;6bJx_(v=57D~|C0ayB>wm(>&gpBr!y;le|-FYd;b55O2OkA z2WBlgu~vZX#+2LfIeR177D+EW!F7R)Cq-}zQ!``v9wP&Rj^rdMnH@(AcW-s(5y+Xj zum1dp&+1*!(GsVb(Ut%Iy+7XnzoK%Y^z0K$kJk5Cim+Yz)Xcb;L-Fk26+9-BW*l+! zIWM+fD#=Hps$s^$OV7_s-rM7tSq$}@O5gqc|LyDl{l2?_(dAfLei(_@Bi~3K0f|`ef>qVT?-XdG}m}L{#QPk^e)iB z-Mz0SFRyN2@pJZ^==6^n?s*bI{r{BSHXeADlVbko_=Kh9jfKzooNH|lw{nXtk+}{d9-j^Fb5Z|NlGPwEOb*_I~sHe+C8{{`^1BGSjm0q=C{@)r`l>c}~Re%zyCZ z=KudZ=J|E;`~Lj>|NsA+Hx@sCC}^L(=*68PxQ553H`qk4vX!ey;ezOzh=T^1LWlR) z^Bg;-vV?!jN1ik1wB&mEFN3F~9(HoG|7xIJK+j&M| z^RhSZmwMk%KmYH*$&)jt9MRT1*)oyi!}|uO;Lq~TGOy#TV%|xXXfiV`St2oKj>L~2 z5?0od=jN$yyX#Q)u<*0m;m5~ctb`^9P>pP4Q}yNj+vqPDZKBc|2e|@2|Nmd#;U!Z$ z$3COB_y7O@clZ7-KG@X1-0yEe!Gn&TCx<5LbZB~SQ0u5)u~h2dmaq8%OF1nP($YG5 zQZ8IiulUlDcuJ%#`?^O*>7QI~lc@rq-bA*YpMNnGS}5Po{PFGW{q^zxlaBnYmk{p@ z&=RrP8=vy!{r&r^!|Q!!eJMQJr9WxXnVB|ZYN!})~jiMJOHa+k6kXG%`W zn{fPu;9apRnW1b;981_#*fue>`Op6+Yxk%4VAJ~D<^L1X(oURwxNUp7^Q+|ves_!o zvbvOtC%$g2w^+g!SUuSzA?M75i3cxSJ-VSp^YH2EJgKQS*6&~NWY5?C|0V6}k6n^V zO@Q^N{AJ?z|EvD@=l%QpJ!j7FL_B{Hc)i%*fXJWr;{VR;9omZj|Nrsv|GByL(cAyU zYKfb>XkxH)xO&9{pp7VX11WBNWd}RLX$NUf%_|VUIt335U#P`;F z{jSkx8cjYinil;1_5Snodh3!G;(Mge9y+vO*|9(W|J%=SJL9J#85_xVCo}1VDmTZE z@BI%SySM4bOGrpa)cs4bsGs-nFne3}b@0SqiG+%ZL`6lxyt@u5Q9u9Be|Sc+OGn9TW{;$UwC&FS_2^Zq>k_jmvOeSiP|`T76$_WzX?5>uzlSiITmM! z@WjROgs(R`GwbMvVs+30-R*gXcXl5?aOB8=GiMYuP6Pxtrs^t$F$nIjoBQyd?HloS zNaTZDz4v$hrzi2%)oM5LR9{Tui)d=_UwOb>_0Rs&*Wd5%uK)Kx{mH{b5%Kg*8xt>G zNii}?+PE=MRIrh6pMCAG@YuO8pPjAe;o*tj_oqGQR-1)TXUc(#VF%(nc@C!>JaFK{ zfBVB17xS!7xUR&;W>oW=&Gw#i^z4NjCoWvj@bG1D+v(|&ar;t!{F(XizCD||dRzT} zi5W9E&a5>sP!NgPf9}H@$!~t3M#2C8cjkhVgzNottCmP?h`QjFySL^0{X&%VgKxv@`u|S|K~ptNR&BT7$9QEqMT%Bo8}YI;bL&lm?!M(#MCKEGo@~q zzK_fljF`6KR5#CpRXtl*UVm<`H2vf3+drHu&)dvr{l04X(i6pV4qM)uwl6*QX1UXX zb?a+=x2G%XZtu<9RrhN0JaDV$P~D^FyUYK_?M<38tt~J7=l}oudmOp+btM+u7L=GV zQ!+GE^6Hg{1orEOhqUTg9z0IoQSq_AgX8gR^Z$|(ptTn}cS`>I^Yi$jCYirHD^@l( z1{|K8xI>Mb1GI?dil#=iS65>@`{}C}KZH0HToM&_SoZS$or5ziAerRQCEj1(`ybx3 zZL9ts&}R4l|Ns7S6%nb#72?9qOF@gfe}1+vEnUVhzo75$@97VJmrq%JE>UG-hK0C9 z!G~9(hp(;<_;!Ck&$ewW=Qvci*XdFl|C z_LHx6AHFFwugPSRXKZHnSUk0{_38hK%JnyHEO@u0{QdkL7JH%+4_q`%ba+)|&@+LB zk-h%E(HzUeAHHZ#(N;R7pwqLC%~JEg%^h95K1c2AL2IHvd{p)`hx*5^;nQpJ!=Il| z5Yzco!lqPwxNq07dG9AKS@0Htj1(T6 zlWjB4Uyvg#+&16d(~Qyo@2CIvpmP8E`p3IFg&J(&ivRzOYkq%=5BOl;=62@iHo;js5)u;k_9R-A@mT#i zaP+8x#t8+5hIMS`btNkpY}VS;o+@}JvyZzEnx86j-d$eKbAO-Wb^j9|+4sKSza((5 zC%k8>x!8cAg9#H9JR2R-1t!bQd9r`6%)DiEdvD1zyH?` z|L1IVxu=Kc`}_aTpQI$DACKzn`2x;`Z>0}^ea+)P&td1GLp-~7{W!Ksf#dTrS)0Gp z&(D|SlR0qxK?1BsJT%i{UrpkU3c;fmJzqLByaR5_N6b>_m-{O$`j_9_-J#*(-QEA~ z>;4?SV9EumZtUvQEb3$o|14eB_H)~Z}COv3y&$RgWFLg&D^Q|ea?_`p1vA=$*q|ULW zu&=)S{lE74_kaJlKin{wXZjd*>T{*FViW%-^({o6l~sMAvW~{ z``JZD1&t2KSp5G#Kk=zoi|7t$ikxZj?|*v5uPF~-haXZEHDlG_Y@PVgUR-=p=$Ru& z6jY8B{QsA6>Hy!peW@0elO7&s*9ecA6JTfCF25@M** zgyQ@AQY*gmWogWs{b9?ri#4C#%a?F!vlti|ZP;XFQ*q$HIlm~r1*ShfFkY;d5%}<& z|M1(}JmPu^Zn}{X5;e6OV-6ndD7kI&?_klHj@>mgAD)wav%2~M%yf%&@##N4vD%s@ zlr8(%yuV&@U%k7@N`;?~pZmAD_e=cyC%fUx$LIW@4upNZkxtYR0ii<+793cx?AU_U zrxg?rE?9f^!u8`HUW_>|K0<-|hz291FN^ z!kg+Dx=?3^m!`E5z;BsioEL*Nt2c8!D zn|>cy%DSF!*e@ z?G*fH-J|^Xr-@}!tl+`#_h(F5l)G_SZt^_NrsZ>1GAlb>Ht73eo7!pUBkUE+)x0&L z`&x!?;DqNcf`vX3c1)K(M~JQJTK9WV=B!C7>R-ci9^~AuFXLq{tu6jtv=O`;Zq3|< zTx|{lE&GccdD^no470X$=kMWvpd8WSy!)ktfxOZoc@bB>kPr9wB_@`Yl}X-s{(ax! z0~SKZr+a;<`g4zrlQ}?|n^_-*W?wxNF?ae03BT0b%!NxeY^8vtJ>*zoANF^=ng($=~5>6 zQ>_jhc1z~kaR6!kg_+TT`UU+2NMU!VgFT|^#c|K+Hrgg`? z&zmT+?DD&8z+=Ie>N)T7wlB7oev=efP8StCogcupbiyPJB@PZ}8~)QyUfbTTD}P-) zr<#{J)OD87afxHDe3y@WuQZhZTPI=HdD(V%pU&=Tvj)g0<(#>O+)V)j$LseTacoJ? z=RLUdpkhngG`s9Cfhrt@LM#UzrS2&#bN1uAdLi1#^VP9t>30_I>g(V2&GzqGrg%DH z{qoM&9Pw(*Un(}lY>EmDbyH@Eb)O`_a>Q_-zeJFTM(dO-ra?)!x9EB)vP3;QK3!HG<%ERETFA~TjttxHzf!h ztLKthnzYNr8H%)nhV0QbtQ9cJVSgBZ<70v)$M#eQg5c;L?J~E=WE%s+7uMd-|nwZ zk$i64%oynwdFlP;`(n9getnxW>~93;V3>bw5(1s4y64Q(?xQw8k5Yxa z9N8Ur{n7B4xUI7J$&;cYk9{W!UR2)_JASj1RYvmI&kjMSe_wLHRRvzZVdmK+Ai-9s zGf|IAroD2R&d#`*YHf^)5n{&zpS=i6ls@^_zb8D@rvEl?tb6EEmCL0za=&kW-)YL* zuxP^b8EUiKLUNGOK zqvmImV?Bdcy_$aEfHcT2Q|9<`H#rEj%`Xa^`0bkE3Fl`glOOPZt1o$b?O^hZdqtXJ zLaj=Z_@rwiTK0ZVWP6snZ^q2HCnt(eZMt2sKg`NLsq*M zEi!ddU~z2}h*_UeoBptwrM_@K^ZCQucE)Vbc{D>qiQ~iH=XZB){8N**Th%vmZN&Qb zhj-nNt~$2TYoW+tMbDU%inSl;o9GY=2EbP1i9y5;Td;9mzD zEYj@WXT4OJ;$Oonr&g&u{ru9y^L8+T{Ig_^EmwmCkia?)N9B9{G6tUid_dwW{+Y!sYs%({iNS z4qCk0wkyc$*Xc#u|1UlIr{3y@*}V2CGt?X%B!padto!%k@+K4I?_b~B^50kYotge> zmd)~2U#fr9m1G@%_t+%U$(+~W_979d01*qm*mePpqXr639ithRsCb^68lm@2=6i{z z_pE0{r~YvHgUc-?twI9>gP471K0IJdGU4$*+hqE+qr*T#+BoM{i;~CP$jBX6=gi^x z^M~jDKEn+g3?d>8GqQ|0I1LL54K*|jUc3a6e}1#e&OCZwyp^YK*EY% zEF3DnvOzbADw!49R2?b!XS3zz!G>FZj@(i>>Z5e;9*?c<6AMA{g!FbXv!Ws%`+B3A z|K}32PAyo$5i7J{`SFA_HeZ%Qhj^5fIC_12TzYqI+-P_p+$B2FFe1{>rshP!SFOXl z%X$3g8OYe2DEMQ+tgnCg_V$o@b}8xUAHKO28b}?O%CTY9pYQz-f0ed1vwvJ`bR0B1 z&hY>LoH>S|p1`xBvY0bPq0tn-~3EBC}=jrP298F3}m#1oqY zSe9t6>%OMK8F_5mLes*Y_lr$`=ZR?6AKhi-d&BIo!)>F!Gsdyy%QA%T|LoIP^nT)} zr9U$j-_6gs?>BXkPUfuoDWKNe|NmQN8}hjZ2)Ochdff|tA+ksy$#7b#GFNBoiWj0K zTvJ&&=Ed6x+J98-syEV}vGb%)-m`rMc{crLi+&4u0pRx9J{E`pccWW=F$Vjp+S+XiaYRaPb znb9o^UT@IhX|Al>^lJSMy~?`%!ShNBR!c0^;M(-&tL7@uBDDYi*UYu$Y7P)+vFEOA zQfkt)X%G`LyLX8BiGth#Wsh^JH=;V4Cb&s=PL298`I)0ii$hbeZgh_E;Y06q-ta#X zXnH->WNPluZiRc3+^?8!m#*}?og{I?tkz#3VHQW@>qizj((~GvZCv+b-p)EXCx1TCA*Z5{mYa?a;=hn^>v zhaILZRX-$A*taP~#*pp(#aW9EM7{c@VfJp@Bx4nei>fTQrmg#Z?{gR=!>(1>SKgoa zyX-}PmGZW=|4!tb=wVjrn|PVs*tjHyF;|s=nR!v1#3jvU#%FbWd>JXLR~s7`7@Rm| zWN<*O?ZHaa2m?1k8w4*sD;l{41i{v29QYBrXfB#Y=_RM&AQCY|D z;L|*t+?S24Kb&Wl@wNF#9tyTsv41dYvXb=P>f;9aa)*1RMJz=jF0*X6t3MYI&As4bg#!aik5&;9+9)AUkr?4A82P#-#+rnLOWz1`Cj?^c=Pqz&QZkM}z-rY%j@%E@uoC9da1Z3LghoB|RPfqg4+fSN&>7gputx%@|HkUb# zj7#_XwYf$xi!*Vw*)8*m>5?q?t~tTh^}??IH%>(<*|J{$`i$4P-v02`>?@JtpswZr z|4PL-Ufj0_?Y0aFmHhN6;MTnv5B}Fr`C4Bf^!kdd)NgyYX-7p4<}=mB?l`h>npj1b zfX%l>qG>;BTV`I7C|K^t)}!+2ul3~PXdD+S?}}lFngP8H)#KAQ&Ye> zvz8@nDGgye4@_OiY94jh#X!w~QF7%g`)#xMEUg~qITvevJa>4#(Al#t)66rJ{{J)l z^YgGlGTY%@r7Ajts-SK14FCTx$~f@zWpLX(IW|Q_Hg=mW;v$|6UCi?=F5C#%9k}yE(~OsGAq`hq)=XvfR0}xTI%BCCv!*J$(f9lI zZS(CV#dH#8nDPF8_-zU(MS>jXWS5$fGGSWUiI~h)zo=;4lLrYKHYR%brf%5o9`nC(tKY$?0TKl+ z_C;ZB%SBIdi#Fx*C51d(eBW`)}A{60_V;xSanK3_UwbFtt(O`3QR0MglJb- zaUXG!Sf|r{ye7rL^N~VC!hx#~{KX26^~>|TzGkQscc$Q<&D=CYsKY`ZH86yQvDMbH zm6x|YNqHf;`p0hFA2&}lxol(qsLt1PYh_UJLw}hJt_{;9L8JY9PF%QpwBV=FOzwXt zKtoHQaL#?yz|6|p_8!@-dezrpK z#}L)~`wU}t9odj0dHCJkD>A=SgrNQkew2`qkdT@xAea~soc!TiT0nTZfvIc!^QP6^ z-0VlV9|bx5QD8s#t3%;HaKeH@!^ubY*Mr&$F?&vINMt@d)0ij!Uc&yg?rTu<;vY3I z^YO7UGO|fZvUz*6>Fcwxva<2(H9p#pw+$q%E90p6Id=tiSbCRWb9@1 zml4-D%DI28;ETrLx3^6sZz(~`Ua5v7jXW-{k`)yaKE9Gurb?QaO7ieZ9yy|syHQBl zbqUj-UePtF%wa44uXy})hU?LOd7k)v1~T^N42snbPgW0^r=$qo<0bU~y!@3XCkM3f zYtkehCMF&^Ii6p?c$}Pgf`WLOnlv7(o`~A8$kPM~A_VorW2ihJSvs`nsJkFI8SH zedZji;bYYwjMqdO_V;&Pvv!@?l@N$(9ySsn*jY5fTDg`_s}Y`Q+(Bb=O2zl~9KYABBCVj$gf(l92G@ zQ!8kM-haMC+@1uBiV2|8IK=fO?(LQC;bfA8y7@{cv+#xu4*rpXf{pLG9zRODa3#gS zAYsC^ws*@~=Bzq!u(7yYrS&J|Nl!RbuvpHcr6|l_M$IRPmj&ekgc?o&DE7nP%z`;xl^YOoIQKsG`Fjb zjmB0X=8KuEn_h^<&2XH=`eel!^ZW(s8#in)sQGuM;IGx;mzR0g$AJpt51&*`c0Smt zn|%Q^UkeJ5L!He00zyr4Cz&$$Ib^8@2C^k4zUV!7<_wRH4$qo38Y=AUCAK;)j4X~z zA|fUK{Y|d;KkwmP>$dds67y^lEB?z>ZTP>xo+tml)A^VdnSH-sUQSrA-vL@?2r_+M zC$q4DpbdnO%QA^AWsab}h(45^rtlpw*nn1P*v zfq~)we<{|*4CJIHd9|{-!_I@()a%p#=`C_vAjvcg#_|8tlH1r~;>Fe!?Sv1!A`dl^OZeW_Q zv?X%a@imb%TC;i{?tNbRv+`HrkIzqPpZ`84kvT0*QgH9`SN^>zGed+ z*2w~@UMo^2PVD{vU$K*;_TD{KJAMB7bQ>?if3xp(^XSeKSycXtQBKL4Oe?oLhKZf&tHEuHP{jZ?qcD=S}naYXoX z#FYi2iV~V_4qm_h%g28E|G%DxN8m`qk_YlCNB{i%{Qvv=`u+9)|9^gde{=f(^>Kg8 zpPjj%ef?kUj}QOPO1JULvzcAl2+A-Zn{_&y+uNu1_j?CSi1z9_ej~!eu`xbsmV4B! zV3(Gevwz!jbDQvSOy&J=FvZbDV!~sg>{$oq>J&_qUtxOAe#;Z4hl|t})Mse@|NsB{ z|NsBZ^Z&)}`0)S7$NKQ~MtkbdfB2$#_;^20{ePoBzmI?T|KGt4l)eA|mpXCe;Hgt5 z&Y#!RJEyLxnUSWpXrZEl_GxXsbBZcQ&K&$Wzmrq$1y96cnPaI52@OXWnYX1hsw!O! zl8EX_y8Mq{_vM1DpY5yIbayIf|6^a@kf8NnQS-<5_xJy+`%9dgo4liL?!%M9ZM(}Q zY?$MEhqokzNzvoY!Xqd9xu$$}59A5Fn16TcyXm?B#dGKw~RE>;O zwr`sl&(=KYXpd-#nVsOR@jqbepZ0Lo35iqv=S#%xO{(}a{T>r@#Q_Yq&aHBpTh?1 z*(}Bjj>%th;@Ir>vws%T1uv#2ZCcZ`UjF~Te}f+X72XM78mD>H{ZISxZ|=iKu5DLW zOX|fYRs5Xv@UHbOHyMb1xsL?Io?W>nx6jnL4pI9I9pFsV`C#@xH-#i z1?9*8|NsAg@RFq0tOH?d6|}gw9BFhmoEF65%oxSnw70c!>kWssF&B^CBKg&fPVw>lqgruY?Q=e|$5Kvfpb=%gdQ>UdRB_uSsax!nb z)WjIRMq#<*EU(H|o)YM@hlq;^n2G z+c#gna9vzbAfffEtE5E0uDKtK8W@ca8ALDe=;isN%A&+nplX-H$?YmrI)~YK{o_sI z!Mw~u-}SexYGIrfFyZ#X3{UpzM+ct#`+xq!dwEdi?3XiK6L+@Yi^k!}>OAuHL5hBi z5L=xdJ$RlT93ZfHTiEx{moHtN`s8tP(^n742}~0ix6VpGAlxnC$zr?vaI=Ka!3}O~ zHrFO{^VFQ=aOU!MI+Bt5z-Q%+DpieFhOhcS@tm5Fa3E~$ANP`|fXS@ajxaL*|7ZB; z_wj;HDu-`v<@x>1(5BWa^9-n}|NnoHN#TlBT|0NbuBdqN;#C(n_h#ouoq}0fMas;Z z4>+7Y5|Pa3{k1XV?t@j?AGT;4a!ar+S;gWVmEs`6JmXtZ?vy`=C&oOuv!6$!Ir;wv z!v%TD5jhHzKeT*$DSWt*nJ4?2LCn50A6|(v-<*6HQXVK77aABEa&Q*jxbdQ`T{=4I z6gTHaV`Jw}hRQ`v4!SlEJXz8O4nGpznV~o#yRfHK;PSa2?Ye(mPi3O2lW?cLID zU0u2I^oauJYbOp|V!X04Lz6k&B;lHJgtVkbC!1}e;}vCdafL3<(%cW`$`dwQ>mAMb z%DT)gg>^z@)tcDIHLC0n)7Y=2Fg|y>A#Z08v-i}7a{a@*N_o!DSKcDP3AN3#kb}Fp zy83lW%8!hU4}}E=i`BSYTA7qD2C-FT9SC0dAggQ9RjD7+8aEzIV$9BUU9suFI}Z!- zQWZJXWgC}@rbN1|Y@TNE!QA@8gcCjX^`N50qGm?oEt9s{=8_&!+u6X$MM<|XBg;!w z?bgvF5uZOA8X6iN3SwhcJ|yA6koYb3Vzb=hmknxta=%VIlJMj*=1t6Bz;)aiph_jQ<-Mghrmj0BGl$4N|@!%zsYN^5{7Ms)uJ+4bW zaz3dz8a3^YyRX#`B?jgD$!G8Iw1^0suutFnW9NreEeVetJRasxc=UclLg>%;@`soE z^HhH~(uq1!@Ym|_@%|rCiV)jsqIU{581PKC;9Aj;%w)Epg>{poTC%~kg=}2Mk2x&( zIQOY~|J3y!KTk?6di_e^n8fl*xm(+mGS6%ycj!Rx_i485>o3cN}URGPl z|9G{m`sQWk?_^hm{o9r5Yj06f!vD2m=TBGF%QLjp?<;t4sfYLq?O0pHtb0Lurk+u& zIO{An#Rcy->O_R++G|ytj3<*}&o6XI-%1S!CJWDJsA|xeeCd^B8$;w?Y zMTTd^XSEGX4V-R93ZgtG)D)7sCr-}s$+f{!<KZSq~)Hm2Vt6pv5QN zve>>JRG(>RCG99+{9U34PHahsL>@hR=HdVS)QJt(FHK5FOl(`qtfs;ytgI|NhtZLT z$A^dKjK`(`-4ic4)ebS_Z*-`#?TO%WaZ_nA^-WfaImM=aAkOK4*+aua<&PEl#SVC? zsj*1=a?NZE73+(LkPs1-ymd<=Cr9GO&5ah2-0QSH%*CZ5I5>9FWL*`NA2Sy4@;tFf zbJB7Z*c-WIQnTR75C4y!;K+A#m~^;)Mxe%{9opUhB{p_1*IU3Mp!X*;Q(3uf`v1eX zB{L-1>$#bIWp?gmT3Ug)!g5=#ZD7+mdapUWp9yf{wmQJsu02r!^Ls@*8%G`AqHEakSH9+Oh_v z-50F%XBhVGKXX7N#X-x7g;jFRT1mV5w2J@p9$wRJf##s5ZI&USu1?Or!a_2vEH`G% zT)0@mV<{7}56dApKNFt~O)Qq03fYat7t`Vl7(X6#4{}KUrvHNTUvj}i4&Q_IA*nAI zvL7?A6>Zx3lrcRax6)xk2S;-^x3dpV^A?jsZjfx6l$4M#VdBH`vTMBj+ow%PNKQ?4 zD&pF-`~P7s<|qF+mdO3=wvzfNAC{^V#Azfx{rSWr4-?rVzcZM!O_%C<;8w_KW_l!J zq3nIQt*uK2T@k{r}i~e}3QD8GnBM|G)qK%BOgEMa^;yof;TC*)imcJvTRV z-fjVgow`k{(;RQ~E;i+w-YUIx@!zFK(ar!$0LYGBJ0DB-kvEf zc@OUI=h@-FWA2V=phEMD-x; z7RG39W?fBz^#=|%KD??8>O$n*OMyB+W{1Jeotca zvnuDjk+DoY;JM|Nj!joLsWdS+%isVPr(4fW>p!O4%*@SOO%MP43`?DkS+Pb3 z7Rle**08kyzsKjLGdAaac=xVeg>9Zrw}S!f&oxNBE)d)yw&qo2q~xN-(1z)psNGyG1_E<#{C@wF%X{{f zf6fmMT=H8t(Zp_+vSjy*jk~z+wdR(YEs<(dG5%CGvw){`lBRU*ZtZE>@7w}Ka#XtgSWyje^GIH^vUn1qS=EJoLXicF{{t1{x^E; zcxmU-Rm%h}O+GL+PI1Ayi>3DU&9A<{;@rvX-!8&>N(j>NN@^4p`Y`E%{hmJ#TE-F& zlw&u1V$R}YKDB}8+M3SD@6N)f-ruk1m)-UK$4B-{Obnbge?cK|{LLGmW5=8xJ$hqf zbLK|Xb)JgH626SKhHa`11oBLzTtFnZXrS6MwjZnAxGzoHwf0?q9)r%>IdHT16(~<$vPL3#VFjC_VZ0 zb^81J_5VLMFFL#b{{H{A5HzL}S$D)hn%q{lmaW>DK4_;ngZhZDl^7bBQzOHDPI5F{kTv9OGCcQI>dTc#HtgH2w`0%AdQgYLF%Ll$(Kl&=zI!@HT zkU3p{rA&i4=x1Y&#xD-o>&wPTK0Z9S5y6pT<#VT<;U?oZk)3k>kXFv`~QFW0)>L7r@W1r88~ZxfBXLIZ2$9fdk>05 zpV-QLP3fV1mT!wrN95)N-#87^i+ij0=iU3$TW`}W@!_jBv$i&~WO`tdtd9G%rp+78 zd?#FmTG&)Ax=r?{e%7XjaKns8^@*ZfX;O#Q#7sE+OMi{sum6vZs+*b$PO9za-BbJf z^P8LH8)iL~cl)!@U9r#NU;B|(2Af^~7O!eIdUyZ-=Vxc*Po3ZM@Be&9mzFt44mK=y za`fiPC|j7>4(dxY{QsZibna14&xymc3MSU`bf|u=pV-A)v8ZveJr7U4!7Q~Itz1qq z&dxJr?k6|QxBnlz=f}Tez5Nms+gI;%xRPhFWx>nT1Ir|g?;6uZyk>oXgVK^h)OKmW_`<6yqDr-481Pr8H84&z1dTpi@4j;>fTQ8MEucOwhS z4N=jbC%@YYB_t(1c#=}};{$h}#bF(W3#+8hv@|_S@QF`Qn^K>D@6X>)PvxzxAJ4a+ zvW!12jx8~fjggV7*HQk!fm3a(U9x9E0!> zPlltW?%B0#b(M6^{QUN|yy#Ca%fCNA&!0a1>(kTnJ(kkpcapd18gvUc_5A1I;mN=E z=jY$w_HXvob~Q{BYhI-E^OodBUD3n$YmdHp2kDHw=j@-kzT3wZ*3-wKoh7-5=4vteE8f(`65)Wmpif3Eg`jOpP zMsOAv*B(YjttoAc%*@Qphv(b>-?Fvx|G&TOQj2#;as4-xY2Iq&Wc*N`k2OF~UcPyo z8TZpzQ8l%;2?A`KoLxMQueS*p7j=PR36vU|Ow1COuU+V(oAX1)Tk^wydzG~bWu6>j ziW-mp{8_VcuhJQTF<|;F7d<1#0}fh7I=Ma5f!ks2Nn0A#JASs-@!>7NnWhST8#p# zH_X`hk3V{rgSJxumrz((+oL39XXik#n$u^_NJwd&J$keykn53SglK}Vq-uv z`4b;C>*V$6&6C!x*=XqO#&+(a&nCu+t{g^?fxe`~gdgnhCmmpOemGgD!0gb{|Njp! zi}I@7F{-h`U znm=n*VuHeysii+YJ^g;9dR~9Os-B;of`e1c9A4$Dk5|+pyv1W&Io2^TU!G9U!}C8i zMdI(Di1{`@x7YmCf8_ruI5bs5C&j}@<=$%-#x<$ThOQjU%+Q)4T>HbS7D2PGOp?66 z+owoPSoUh;70v&J1^*XslAO>a#I)e!!E@&ll9hKQw^#d~z3Dq!Zf~u3#i{6!DY|NI z+cpGvhE0>rk5gZ^?7;k_4fn3bk6%(BzMQE-3#|Ig>dFaG>&`R(od_f&uHZftb)>N+@hT(As4Yy7mdyZxNETGAR^7sGl?)iCtlInlGxRy9xO@#vs79Co! z?%adNhabE-qu>?j-fM7RzSM^$pqV6w|NpndIC3#4a>yL)zF__R|MrCkZaBY5JLolC z)W6K2V1~oomOUx$N50jZ`21VPsonF7Qs-lfXl`Z{yF5YcxP8u9KDM4K45so^8>e=={X3qpc&YTC+@}8)pS1reG3s(j z7KpITP)pd#k^RUa%pigxOD^1W!S>vU|reaInflfvR0g{>zXv!C)6Pv>muu`cU*>%HoMTSPVrMTvg!Vv3SfgU1a6NUU$YQrG{em{7ntaTg=!G zGBNW(tz2VwT!+6Ps>S(<{kCK5>y|Vu6AZU>6nL0l|G-N!qS~ZllHfAk8J%ey3`UnA zqf|+bpXK)a?z&cUG^2e|NYn$R!or6+YeIR7S^`%uIyl+F;IdehmE4|&A5Ki51~tR~ z|4yDVH_rU(k6fgn`|QBvuoHgrA5soB2-?ng$!7e?U};BxDcho}JTsY^8Kccbi#24m zrNO>x>Mu@U{cpI_A}#bn(8>B7r5zqIo-CVn9KBgScpNenaJqDY{a;Gs%T)ee!@xp{QKOrVPQ84d#vyy!3YPq&S#XW-$LaC2P^C z(|6JhTJ$=-KjkF)|M;a3N3MA$9`KNyp|bDFhxhmYPuKr{XV=$Q_jc!q8Iqn{Yt0AgZb^Qs ze`)RVj$F+X(aMS9TK^AkyUQCXbNyW;%gqgW`HpUFj$U1jGiN?L!ucs&LHPcm1EG&0 zW+ojHZ2C~dWVu#BT#IEg>$<|6fF8p_Igig|j&Chjqrt2~1AfaI0JW zD9@r4M#i2Vo;PoJ=FQ`Y-)HdVe*fQlZcz+pf*@m$Nl6a*)BiS4{dwcFWLj*q;I9+E zlRvsX|M~eopZvdL$Bt#BA6L}x*PfZQR_)(o&w?9L@)5$97kG;mD5xAs$U1f4@L|xj z)!ErguIi|i#urXi5`dWN^d(F}Qc_^LlA@c@1uL}zwsn1+dZ{mZ^6&rCjr;TQ%}x2T zG8sX^7#^PbiC_LX2lXtQuF&-QZ~H}O$Fc(p*nE6={QP*LqIiz?8^5`Ec)@}L2hRF7 zm94qTBAE*+fI->zNqA4&(fSI@#wullb-QHzJDbk^`2YX?mdy9z>;LI!pPn@Tyt>Mf zhQ&Mw;!PJ^Rm*rAyhioDfQO`jg5tpmGfo^hbjYAs?eJ3XCoZ~0)ssEGWJooPLG&b^V&oeA3G1yb(TDQIR$3AT-h{=b7E+6jybg=os_AKGpia)JO zdcME^?=$bu%4N?E?=Js;>GICSi!5f&EL2rnqslmO>6HuGS_X{#{LX=b&Aq&bpPl8g zwB&hzU-)+0fmJ;{Um*2aQlq|Y`+6bQT$2Uc?E{W`e%xYtA({s?%lP&6{WCM?tNZ`^ z^yz85zCK@VZ5SgX8(Ukw$F6^g60J8s{b+CQURiO1xwMomGqdggf5~fW5)+b>SFBGD z2vRag^D+<=m~RJhxs$Y=d**~g(^~$x3m?C><8Y6k$hUs^|LfxZ-o5eTgg&|FyoAI(p?qgYq@bZEo%SKmN>ocz{u(`g0nanOR#C6I*TVqA!p9 zS&m$Rb+enyEhHrW8!ouFyV)e=-+acWe=KZdqkO*n|NVXc?d|pF&)4rSea-K2Dj^}E z;{U(<6P4}b_xxy1Zt0z&#k_f?^_>HIDtYAX9B$6HH`a+e6A*Q3L%luo$MQb{K1w_m zRpy8R(1T&K4HB*hzIZg@5sRntx1`7K@BbIq`!mn%?de6X_YK&K{pS6dJMrMTh83$e zh)HC`JgH(oB-fV7#1ywb?ZyViiF`V`l6Ex-H+D}44UzQlv^6k%IlR8GH2&nf4bZTP ze#0EzvMqzz+nPC9heg%u|KH#LKR(_+*Sh}9oS$ERe2hPHhG*HbHU9Jdw9eo7{jD^! zwe{g^Yxf%&-vq@ytI5QA4<+Z_8$AU;bHZOV4zG{z5Da2wW)l~0Q+AiQwkGk!$wZ5? z9z~-`2M;tjIxy5$%F7%GXL+QrIDv6f6~qxudfy!53c0L>ni(@^t1t)6Yz%17={xh| z^Yj0!!|QL~-oGa9@7#$K6>a7mzA@wJnVI&-`xnd1-v&!GUe3UfZX95Sf;AL1e-59(MkJI+34h ze|=eRU-xJG#zbG=ry_!hH6I?lKRJ2-lP5FeY@Mz!&6?%2J@0R7Qc_ydp*JjvYJK&} zioOm9?(gTBZ*Oc<*CX)oQIdgi@`I#4nK>)cOQj z_SL1{*v@}&Z+X9C=+uX&#b4ZXm^O`viHXO_iRboq|LE_k6+Hg}>^TSa&Qu#B-rEsl(?FD2?j>n`isG`tdRO>hJ%m zetlWL-8?!=t9`PtHaj~r^Wp#6;r}=|3}kG6%x-Rec$~jQw5F!Vz5n0WkBPs2ezv!^ zW_~xtP|1ti;sA+{nfB#7At8<;gD3@j`RX3S9Vo93jab>W-pgD4jcP=tb-Fb89Pvg^-`g8GYgWag#Wxe#cm1vZ^+jG^|M0`Z^&%oO=Gpx{edGv( zz4)Kc&;LJq)Z;(zPxaql_VV(~s~##FMjkl$_BK!ceIuJ{wx^#y7<~Agyfe|+vr|#K z`{4Qh#IsUuywVbW^V2H6^?=qk6+f5g>XP{KwR3I81W|?c0-(VB|3AsGuc&9~;{O+S z2(T^D`T6(v|L5obKR^3>U*6q*Vf8v`34`t1i;o}K(A{;&pEdQz*Vpq~SPtLYTYu%s zjBjs$pFMl_!&mJ?noUhS_xJz*|NHy>`}=z~FcmLR=VoJTv#*!fSC_h@kU6mE&;Rou zewmhpN<6zIpPAV%_u zT5~l!2-Lo|QqhRMaU=Rv!o%L2J+tQ>a^~R9e#jVhFw618agl5X6-D+FSB$UP+_Q=M zcx?VuS+RWIs(WV@AEde;v#^_$mU}z&b@epX(%QAFxQiEv?3wv&d52l@yug-qyIU7p z*eOjEx~6pA@civf3ciIw2KCYB5+w|L6`K5ym+_@?W=~X+|JCXs(=6$=P$Pk7OWTwe z)dtNkv_thIt)eHzx`kvD-_jhaR>GMmy>usvP`3Fq6cyFGm#f-Cu zn?wEj1k3bnnVFfFZ2a{9>+Apab$@Q}{A_M)e0X#E|B0`s?5~%ssF2uK^Yi~VT~Kj1 z-~NC7-(UW(WOpCt|MT# zV`gsq{$6s+R>^&}$q$~puTZ>k%R$=-l8vv#D)NDb>dmg3gnVG+`M~tA;fY|4Nb|vo z4?H5?-xu8f?(!RpqU*W7o3CvRJSDX8 z+N9@+p{}WtqOH~OwZ$yvfv3)G{$i`n+4gYP1rx>74l37nO}a5-`iT$sLj)2c9a^>> zo%m+P!5)*=Yx4?!d%LT2K6bBtAwO#YM;X{Xcjj*7YEY19@#o-dZ<{7}YgL}=oys=_m-47>B$c04XPcEX~| z@XOxY->sSMt8+wzPhwfC$+hNal*_*_6&(viZXerpp;W(ZN|MA0qeU;e*Hm2J?_+;X zom;d5ywt)e@;sxY+Wx}F=XaIve|GNg!6Qc&tUM_rcP=aI)S5+ymaI9WqtH-O$r^Q@ zDRAMRzo$QZe7t{s{QuZ}fBqkAu77_oaid(&gpO|GJvAp6tUdeTpY0YuQDNn4;fHw) z3=M1koB%BXeR-KDCx_?5vETdac}|_Wz*wrNSfeH|$0YH<>D+h!<<2PrNxqRgY72H*Xo#Q4>^{N+imfwq4MEG3UE^oD@N~5H zGUgpP_Hiepw0S}j1E-;)hm+L>rKy)LZIt_;5P400-+rCwx7~402EK9G@gm#bvZhVz zR1rL{ykxg}*+HJ?9M196UT<0x`tzB%QHQKclBu;l4xSk+$gqm-PfP} zg(s4E&MB$}{bS{x>#TWf;=bkU;#rQ{B)?7W`+KRk+4`Sn${ywAvr3%bfs^%?xxQS7 z3p>KXMW6n z;Cy!1**2A#Pb(kj_Md8hfA-Tmt%shUb}TB0Ew`4rBk(Vr!%$8lv+G)lWN2&99KSc; z=bT>?=Fur|z{Go>u!#|<;key5;(g-p2267D4@Rm*cInTSsh+o!bW7(heQ~)n z@qmQOjyTKtmU18Gty-j^;k{*<#-`Q-87`MEnB;6<6L#c8$*H1~`%jn5Z+K_ra^wBS zdB5k*&u30PvXf_GeAA!1U&XlcXHSQWHLRJno9mE+fNTFVM<=bDBAP90FW+GKZ&RO; zD%#Q8t7tc&fm2iGh{^`BbwXjHL`6=%=$U!ITqUXyZ@5YJXvhR ze|nL|hgz9-o9mu;-rsy{VlY*8Hm}ozU3c=XY@NAMeWF>4(ZNkCv@VGxY*g^M;ku&SF^5mB^sO-W8qlH{xV*H``M8?EyyC=DRXT><(k`k zr=}L)gqv@3*F0j}-qhr*S4obFYt-nbico!-PbwN$W-(Gvxeo5Y&bN$Yvhk_oOWTGhv&3HyU8jZnTMB~ zZl8Vian;k3Me8)VmTZ|5*0t}r1RuLi|MSV$PpAFaAFX$+urMdtpzZNQ0~gIlGc+C* zB}xc1DQsx_AO6-zeZlLZ{lTG2G@~Xz=WdM=SA1brwmUxJ=QDxm>KXfg{q|jC4$fI~ zW^Dv{$Yp+lfRCU^a`LR-e5?Ot{yTClPPgTV(~+Pl++u;}Ln%)Eb^<2({TTVEk7!D8%;Zs-*n^onG2V$ zT7$i_X0|PNvjb0?{2z(DmkVV0WaqZ7yZh(#U%{iUf(yI5O5P+~^`1fuiXOo&26rFr;IG*B{l8bMYGBu8&fCj!HMtgbC?srG?&Eac z)bmzsX+f+_f1*L%t!ST;rsr&9C_6PE#-Yh6zWVjwpz2THDdu>p9W2 zIl*aF%!5$=XMfKaL_7WRYuY~P+iR1X3#PYT?$mMSxFx1&@$2}XiiF6X#n&GC9{%bT z`Ka=Yu~*`(E2fSP^Rl;UEM=>?Ip5Ucr+4i3C{E3lS~c@k9+o{WQuOUimTCVyQ)AI< z4(B$;(}zrEs^vt_{`Ol|afQ~ed5au06VCG}a2W8!eE)Fa-(kNOd3}GTR5Czzo6MPO z2ufV8_jegKMQ|uJ&E4B;{^5$*yaP@w{R=pW(1bzx|)ge~(YB zmTL0nmybZmG%DRqgalYr|H!&J{n7fN^8m&Jaxc_np!xkiN8s;<)G`Gn)LZcTx;0n9p}o( ze6g{$nxm_f#C>*-Zv7;E$w}%n&PzSM8zAR?Yr}k}^>+Dl&a#OHa4Ej?yCl*4+Q`>} zFY5ivr01d9;#Zj{-F;ar<+SFx<^Hyfp{Fl0TNkr4|*>0mev3UDU*H6BM zxO$c$SE~U}+y1q;mMT0-apiyTaBpj|`&=Chjiw9SKia-^uK2>MAG}a<)+fP9e|Dzx zy)R~LeQlYvPis#49A}O-Pq+E~`SRRa@=Go&i^91jfB6NTv#WAODW?jQoSL+2*{T!8 zQ=C>F*f@V?Rcy)a8EVG_lOv`n{Muu-*WbENpw&?0sKEsj4#)e8O=I0}C+WH#+{Z3< zeX7aPozp9(I(co<+4fR6@zL|($Kj9?6colS1_CbglNPs@bvZG8tK0Kmu|r;A+4hME zQo$|SldSnZTJ+z}+U=WQbo0!FCvQ&eQ+}{qxm2dzJ0~t9OKQq5yG_%X%d@)waqd62 zX6d?@R;#*H6weDP)d=aa{61I6aMt4hc=PM@%{zFN`Gvw!MORl>>lvEKliKD#F<05M;85)UBR#P#$ z&gXxR&-(waY~JM5Gsa#Qx0&!}zG^$aYuPG>HL08H$A%(N!P7)q-wRg+E!DrPz5L8Qu*0^@-pkeE zz|&%{tYKDYW%_g!imVrC)4I&u$A8G2xBeCaxWNC;EZbgwh~q1y2dKV-quz z=0%3@Nss)N5X&DR=QMkrmY2%5lywGd$r;h7J}rLmBe#D~>coy+E*0RS>QJ6~nvmkM zKRec)dhpgPNB{oD>C+uuS`J>A z;OO1`@Hl_l{~KXXCRu&s=@jyq;Al2EcJHYVuSA=Bc@J+)zHqSgL=J~!LqgA{O-3(X z8fs`7dU!uJVT{}I_+{IJ_3j%s{r{8r;6VbUjlE_zWS5(K9hYKQSlO*?g{dpQ+_`t? zp4wK8ldLS=9T6)cQuH?HaOananeCkZ@UGXtAKo&LWkQeID6^FI8a5U$PInMdRL%4X zyzt%QmqtXkR#96hw zjdQBzD~Rt^>fe-de`WTPl*k)qYyKYFr1G&a{JJ4eiezs0H4)Zx7fs|8=KS898WC#8 zChb1uki@mA`_gB1DD^3qtgj51r_2(ySVZ90b4`I)&VI6mXKaeWy`MF+?71BRM2^@ur@A$D>wttvejGtTWMJ zWpui`pTL9PTXain_4M3-OtAA?|H+6g`35+ZY=Op2yx<1Iq{r(|2nOBcXi}=k%YVQc zH0#QwYv+{0nliYabDTc3AwTWt+3oX+=Dv46U;JuTZTh^8(>AGiE}AJcAyu4-(_~ks zg!>dHjxB1wmVMLh=kFI~GSie~TjQF2Yg+2T>Yl_{O|CiJ2G=f_tl0glv{D06I9~wnQI8jNv`vGEHYBn;~a91o;t-`;h3M4`8?#CnVf@wH@nCj zT{p3<5o^t^<_k~ybAZ2d@z>U{eX|t5v*(L{IJ{<#S)YdM{mUji%%=~T*v|jBZPk}u zLdPYXB_^f*uq*TlOyHU4E+T99XSwGd^RtD|F^# z4m`3+fJLcPqxJbh)6A}GB1cPq{lD1#yr$s!0bOgpV-r^`()kH?;FdWM2i{*K=C`(a@gxv*8Z7UJ$c9L=SQ=2k8OCb zanxdwNN%?wACpj1Wtqh3U6WGtw#J4&v2;>c`g?!mvr`>fzBDCOkz)>Hl!%L9kkIk|v{&t(1>*6h1@mIWuU-0n#nkt(G$^vS%(3Na zFc4|pzw=*=gT~&5+2&uW&NW@w*er7J+}j1t++4D2I`YrROcFS>`-I~&HOq6SkBI-D zd-UYA;xopD5xdrH$<0z;qQdfO);@QRS#60PoA1q6I(m1Z_lN%Hwu%9vI^0ePK0*@N zfnB+aUk5Gg^UdhAnik>KXFutg#f|c1orNyhe&BFw3e!Fz8}&zpxigVH(&_t8EMQ1!s&+O3G+>t+O=b6CN4@XC+x`iDsShuO55K?9lazE}qkzY|g;n0d+08Q3MI)B7 zg`O5)n~JYqTA&d&oWY@J@7O6r^P-~H{Qf9k0YJWLO? z6nAq;NGuS3%EQwU#k_OQf+U{>s@!KnoC3t}T<2+4=uG&^S7h0^hHdSK16h36CKY{7 zYg6}^*jJza8(WF`LDq{`92n}@7r#98 zMRSg?|NF(RTFcB0MJLSC`S(Bl$KTnY4W85W1-UOl%y61t(Y$%%HRTygPaF_a%m{4N z@lcuW@pSu?ed4AWk7fk%@bIwn8N}>3@Zp_o(jv&V z*+X$_6LbwGyp_1|iTQFe%Vdi=-I0ufTUuvKVvcOla%_y$GB8?HV0hJFft&rT%!toI zGBLZ46}%QZyg6N?Jr(MNgFcKqF0r#LSSrM}Ys>Ww#~8y*f9&k3cz)o*k_nUTVpuNA zm>3&geenK%BAfNH>JKh9^QJVeZgEZq4R}B0K74jIXe(OHpN=F*EtaHsW7~ny90P*| z%e73*LK(vD3xzaydk4s@mf8~1B-t8a%#(J-=z&Mv46TXWoF7C9lX z=s}TOn()*aLDLq*|4V+Yq}csfEB{||$w7st^7lqDI}Q|lR{Qnypa3K)oL;Vd;3W{j z&9h~LLvr#jne5r~6L=3b2);YA;>cpTehazO9gl?0mh-f{(qH3x;{X2}r4Re_*{*aP zH<`ewoN8dCbmRYjm= z_?Z3h-fEuo^PpgDs1(xxP4I(ff1V24?3G~aIpDN9%uso%M1$$kgj0)HS?X9%^esH% zS&^mM#2XZ|X$P0YAG@PH)h4MCsT05cYxg}n-|FawV)aL1;yw`XA2Q-N*0A`1!m_vn zPZQ7BaW9LDVaSd;xQb!5)T3bLWnO>I^8NYAa#rRH3uC`WlW({Qf6`gk8JSF*|1@mg zU!QJKIVtflTbsK7hqqRcGVIX4Uw`b~OdWl^xi07b*j;d<=?AxTlUCtU&MisI(|?}W zG-IzZn@p-=T0??p%k?i2(V`KPxo3&an3g0aQ88uV(w_bGl6>-MH@0*>{Fck~+Yp+F zW^Ot@-%di}!eqgJe?Xzbnjv>MbhmNB5xuM3yj!01sdcPga^#lR(f37bnkr;?_U+M)QmKk8TB99Z@2e|=m1f6(6k8{7LIUeR1s zCIoR|(!mn*G>rr?iHb=~>kcTX9Tz@RQ*mQP%o?Q~4D0^%ZvAi|bb>(w%Zd6&S)omO z$7eAWbUyZyyuQxx&CSCFKMW3ke(o}NAuNsaaBtah{FcA->69HxqL(N0?C?(W`CP~< z`L>JKr*{Vz{~AlVbv*uD#~w5MS=er0`@mCg!j!`Ia)-aY<@x>H_|AU+H$RnN@w=Ug zea2d?KYzK`^z!UD8DZOLDk~tliDl}Byd<3`jmI7yCM{eFH zQy5?FSQi1Fiagw|^5!$NRCs?%DZ2m7&;L7SEAKePA9GYQtsvoQANK=);RtSqq#(|9 zER9!vY>vt;GpfjE^|dWpcff=_e3xTzgSX6~wl~&)zV#-)3TX>oE+J=|^y5=&i*PW+ zkxnzC4?Z5)XK=S8(yLvFYTn3@G z&STE5Y>141;HA35XgD8dNGH>pkE%;$@@;lQ3s98qZ-7}YWo;m9>P5uOfzo>J{)#jHfO1=u0tZaD2 z*Q{g-e=at?cSqN>{}Q=AKF9w5e-Xp+_)jG$$ok~dEox`YnH=~45@yT{nzKFPPMUgU Og7}`UelF{r5}E*j-wS^L literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png new file mode 100644 index 0000000000000000000000000000000000000000..df3cf2004312ed0ed0ebf1f0340cbfec7fd9ac46 GIT binary patch literal 19251 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3`sL~37*a7ODM5mDF#|gT z0|UeV|5B`r8Po?o^It<}XJcpP0$D>7MzFJw^gD932JkS?zW(ZXYsYu{r3xDR%zkp) zHa(D0n0)X$`@NLNwnCl-o^G4Eu7`ep^ZM89WsC*i#Tq-<7!1l&qo&>wOi@QXwg132UAc6Ux5Yq!<$z@JHJND@5}kiO zZ`NVq*Yp2w*U==gYTtC_mS_ju~2tR+Q`-HAmCD-Z(5{uf>ZJ3yZ`@BI%yv~9+C1;*FpG|CfyB-nvOxXV1~d+1&!3 zIg{5uGYx*Tr+!MaWq_)~X`^+kOit$n+Ptzl_jyU~d|zQH<#Q7a7j6}CJ=Jr1%l&V4 z?(5HWuCJN+Y_tEW^lvBLSkFDYDPfjDAA`c8!wu`4?`Y$zGed+w~P92T!LC2IxL->oxR`XE5&ErXckC0kj8N9OZtmzRo@>~v;7zE)43e= z+GOdjZN9Jg)T@0yE9U z9ba6?{_-z*VMY6+3$6u5g)5V~&&|?wHrjjqfBnS65^J|^+a`Vdprx^|lZ)5Z+I}O6 ztPmH;V_R)beq83E=q{;}zx43b`lpP>>GAn%r^LHd++Ug*(UN?`COL9~&>nY=i0~yW zf;qRZ8=SKQhl13JBL|Ni`}6(1y{#>0X{n{KaH6KBA~SQKq-5Lu{r@*^_>-QLl#uX) z`~2CSp8o;HRz*CHlQ_QJVl-Cb+-ehW#?e^J`0v6_hgA|Dlf~xQJ?Lb1IuvvI!+ZI| z%l&!Q#~J?lb+q88(cyP@dF<UI-qZ zXUlVay>ZQt10P-rGyk{fI#R9|wu+!`2_aLxJzCsSaBf~Ip=XV^{$W_IJ+KmX5vcqx2% zdp^%}ePbDm0|j5T4mY#&yuWAo=a1)hiDamK?vEZkOZ)qOzM;~gJ6A(}V`D=+JEu<- zjWbNv{J(K4=fA{mr=(TVZ%%T@RX*&fI5UYMJhye8MS!ncAdkP;iq1)mjHZzalmGuW z{`2?rhu7kVR|fN>pEH=zEnNeAJno>(+;KNsKUnN0ELahTOm`&TldO8a?YzPPndX$>f)ULy9W3GH%?Sv_l^5b~6 zDH`tn!XA&=BaTc2`D2OSv01Y(u1PpBc|!s38b#jl1xs@7bTaRCjbDDneJ+SM*UU8JJ7mD)dVhOD;>xJbox#(J`tB`j zf4#7(i;uIp)3aM*!ij{VeSd{-$A6k|y8hv=cV_4NQ)dS6`uij9g(R2(*0U5sx#rhe*wXMvM7>nTR&_#zYQWuM!$mL7RK zFJ$V&HP3A49cYwayVuqqs^URLvBGri7gGhd zDTe4Q6+Gs_)-`)#+M5Zfik%WtlXm>tenRO)kA1zwy}ijdHn%4}^=exkE@@YPY~HpL zpyUlobbgN>JW08BaUwV8#-2ViagmhtloqvOHa51LlT3S+4jgpI&PrG&aVCvL$tsZj z%Em3y6J#2~#2>6t_;5x4$7_)*EL~>~%(U3BjMF6b1n<@R`z8BilOwivJv_qM_Wb-x z;}xKm*#G~FoVFf1d^jy}p^#YG)5nX|6%L%^@D?|a3aU6XnSN}Es=U|e@AgeCBx3PB$rDk8Uha~himw3&~U%Tu`=?3 z=#hFEl_LT!vRsJ{t0dN#avL?6TTJ*Q-NZdHDJdx-IeEk8WC5Wh0rB)3Te}|~WY+qU z1oHg<|4U{b!g9p7kS{wnM3R>lp2)3wg{EP30fsFXBO>sVIV<{%@*xAW|Ne+=VZH+lJs37kSl z*EF+T^R|vk->pR}!Px15&Ee`LEpz_YeVE4pP5~N) z8#Wt9M{268Sw%%_fBvkzoG0VN;aSQ56ICsRbrwvN&il~euI(i7lT+!_e|wvhi(C>O z_8fAdKa@D*?j4X}D`#f*;oYOko$sAA(bahV#Et#Kg1kwwzp{i9CW zl*5cMT>e6dDJcN~2^~Et4;~~$Z0~#cDYa|X3P`vt(kRTxKAn zV)l5fv77;Mhmw7vhIaS#>Fm}P8pVYh4<72^j4L{Dhf81g;Z?3BTn!8!=Av>pKg4hd zwrp7`@buLG|2`YqRAdfxGfq)(^D5@?n0Tr|N}DGmlhxGM!08m9O6rSOigv9l6I%)e zpPsFM;Z)|PQrOZT;H@?%myazclQr9!g>S|Ti5oX17cG{&aYJI6Z}N^x;r^RHpzbjS zxo7J1-<1_BHg0m_;!$&RQ(Mx*C!f>McUplr*+9WmUqP|^j-In#vgE``Qp!PG4U#82 z#9nI@c{H7PYR^3BVL|GLZ(N5MGS)VA_^rw87S!D7(9N@?O?Q@*&teW0ZVyJ6tY{X# zhzJQLX32<1Nj0B@9i`nH6k!3PUbtdK$CBmNtt}FpHW?kdCHa9P&8f?b?ZykSTM~lT zp0O4QH8MQmdTPH$u`}l4f+LbeO%1Eu`1Q{i$?U!1pr>dcrTSte`^ubzhRF6gc@yUL zX4EWLmDr;0Sg~0o+b7w~!hoCKr&mUXXWl%XGiP}I{^couH(}2QS#bJPvMfAt>XWNW z#PK5$MFj_L&->t!nv~#>S7~q{OV~wOWb1?m^LH0&TIc8_eCp_84rW=a^q*h4G1VcI z;d)=#Y=Z@>jx6Q5$--9gMLC-Hu~L@ml{W5K!59CWX83M&OMyM*4u{dI#;wk$8>WO^ zV46I8_Q9)DA0FjyJK8NNXQ#5Zo*Uwb+{Fs~{Hr%_t_zr8Xm8KGN3ccasRxgmM^xh! zRf$w)uZNAx=1vq06I&Q=&QtJJqLWj2fvD4rR}()QsvR=n-5(esaAdB)dxt|@o-C(Q zlbn8tN*=tD{P5AlBp0c=mg33^2_Yc~85xO|){U*gdC+)Y_#`1I>B^-^{9Hz1Ay3?S znL2_6o-zha{%o&P|G)if*o3VH3nZIVe#mFrIh;K{BT_)`oI^}tBPWN5=!!=!QXKkc zBqSvyCM*kASjzqFm|9)KR*r*9KmM<0B~9)3&|yU45_E!w9!4dOX^1e+$H)bYz*hJ}h+Vl!{+>H%4gvRzLnC zC~cJ3oh&R7Cr%U?8X6u5VvFj3VE^oZK~bFJRHtv8%0�Jxz)+ zH>D&KbiE}yv_BV0cC^jdV96Yw*nHJC!drHWPm_kFM2Cjl1kXRYf;+tI?AX-R+2Z5b z?CcgXf!b65|0`(~c61e~s@{x<@;V^aZG515#s&)q$u~{sgN%$8i5v9v@MJWxybcd( z*dVw_x?Re|!AC6pfPmr*CC*hx*2EiZU0~h3#EEI=9n~3j5|WZjmPp*WE9v7aIb&vF zH6I()BU*(wZmeKsf4zB=(sBk1huIU`mrML;v1XnmQY2t-XqBQ*!}5Y-s~E!5j>bO; zV4Vh*g?as=^#?2E2n_F3%CkQ0&hqP3b=2%Rb>iXwzZ%Rsp z!bU!S`4gE;$2UE!KfyIYTXsg^|BX+J)-bF#@;UgwBDIrc$0cn&wmQ8cD}`k{oF*NZ z_?wd}DfO^tSy|f-3%2ZRwz9G{*OZ{{onWzJN5q@Arfb%!CNljG6bw69-_ui{m>uzN z(YZDSb^VxS51Ck$=e@b&pr{aGktkL)RV8&)NzqzH(leE6O| zVS++}DI`x$uvoIBMMg%XwdKZzQ*29iF-QK9Uvt1gVT-Pc%M`JSN!)sZNnAXiCh>W0 z6-vuxN>AQ4!NXu?0(WyGqw3uSOBLDJN{;gI@Z{z3+N)(%z=UQ=F0?u!rdQNf<7Y z4;O6?ZDBmA{A5eZhwUbXQ&o2eiD_#)7Z^0=>4F`7XuidbGiRqRS!x+DTYSROi5a|n zUznJK(iBRg|2_0$c5 z|MU0v|LOYw_t&Q@9G<&m8UGGFww@A)(~Rg z@L05{LF!J{5>1m6hwPqM20gjbuyxLIKYv67x(9#Q66KL~;lSLk`MoP8e8qwdU0Oc=|Nq~+|ma z`}^VHeSZ0Wh6aDagOa{{PM$JZ`JsnT#hS-a(rkgv=}~-#o<2)6FipL&W8%Zh>Tjy! zpbnjBQBiT@&D*c3DG`YiPjRRxh8=nFxNw+rt5K?V33Szuwa^%1P zhMNsv{y#lk-_G~<&YqvgCo1osXY-T2lc%hVEibQao!+q?o@EOZ+*DPCj%~X*iS?F^ z$TJn)XEF9EXRchF`0!_OTmJnG5s*A`Xr{%NFH4%5p3I#2KlzI0iOeR!m46!a+0q&& zf2h;%ak(AWAQXCje*HYt*GIm*RFB{Hr%7x_{=J&}TVEf1dHFoQOvQ}3Yx6TT4<0?r zBDwnLMWux@0+VNOs3{+-|Nrm*zq18DKfRCKyf5eGr^i2wr^x*Jtp3EK^?z?~z5l$F zGw0H_Y)*dgQ1Ok0+nv*P5)v1VPUPmlH98sw zU%pKf7fd{SS6gja$bqA3LIz4x#WKD*_5S)l|KaKB|9^k~Kf~nZ<(0wv_thNCJi5Q0 z=l1sh|G&Nc|NlRao?eT)g2aSN?AJo=GqfZPR>|!UoX=3o*l%ymbO{?ySu z8WE{@@qELUvNpl3azFW3yGu-v_`t-x@7f$es^zamE_DDKs@R&@ExLI(ZBTGsyHX};l!P(jSkN5x2$op6E_SXCN z_c>Me{r@l7CzmQ9kRTwOY+!gWNGpNq%$I`=%9;iXrpXk%<>BF3wTfrgE}r!BNt!m$ z6nTjyDJ^Zvbn&2|N6Cpl`)_6cJ>O7O!T8Fs|3aYt4UQZ2SJvrBIeBWFpI;xh``f3x zyYv75R~KmF;o;d||G)av6YuorJ8i4w_Sf@UK4;TiwLwg=LW}i`N^9hwgw*6^mMlMC zUfzFsdHvPv^$KdK2lgD>P;Gv=PnPHXec{D(8X4W~TN#fxNQ9aC`0&)#@hB-p@PNjP z|NlQU$s!_h=GLvPOPB7HxbS_m@-c%6?7KH?3Uohg5X^b!%>V!Y|KH#L|J=>Z{`G&A z%DC_EujeU$R}-`G(YJSZ`vcp3E_kp#QkqxMkjZrJOX0US=TlGbi`n<5`@|GR#`yh4 zId@JJJY+igJN9Ewzu|&qiYBH!JUmubJexK}oMC}P-6V^D|I;sAnIa&P(lS*tqw=qQ zQBfm*gOSl8;e+u@9u}`jWs7-Q&(>By-@bl-+1sa!-Q_*FdDz z{<;<^hBc^1Ca5W|;h4TLLxsh_uIkJ02M6E3zE-Q6eE!u`ZJz0RhBXIoemTUqfv+|s^Gs=-K}yC`f%{z-s<_`Yj>5uzqfq8 z{r?2*4~DZvcqBIQ%n#T~NJU!eVUtq#<;etI+Bpc7v{|09chBQY`b8y{P z^RxWbmG9Nx>uP_w_-$ll{Qlnf&kx6_&CIVSSgMH|FLZDb5dpQRA_A<}!(GtT)%5Gv zn+czv8_t@Rq4eJ{n4!;#b*GM_w_g4I{r|r`<^H+5w*KFr@>f@0OPkvz95G3gN%l4T zZxF=5+@^P?h$+&V>&Ks;_dhXG8lY`E3L*ZMVnFZ%gY^2g7M zypZ(gG*Vgdk;Rgkb;+&b{9Xt_b4O%fbxtb zM$L@QV&R2NX+Qq+%m0(J{`UE*wimzu&%d)*EaVGfTsNtC;zUP({@MaDaN>6IlsR*z zCn@PoQ`M6v~Waa+)|6Cje0goP<=hxla_xI)N>-Q_P z1=*CcB-vCx)^7-UXy10Fo~N+oVf_Ao)n8tG-=6>Ex~71`w~L$^y}oQRqFibQhKT|q zE`pHC#K~L6M@!3Q(V{aOcDQqEx?DMAwVK0h62~(Ii8Loi5yhAook^4Kq_nXxGcz+E zetv%c`qg)6|%~f<|~F0~;nO{P_I*{@dI0C)`@X1 zkn&C!SIHwsPhRX;UU49&Mj<;fCFRGrUPpiarM{4H7$;Aem>3-|FD8$JT``@1k{u#t zxcv3%4JUET^I}QOm1t2^JGOASx|g4a!k)7y4j=yX=coCd)Wi0B0{h~{vMpP-*8Mzw z;KTpp{r~su@ezp&)6zbzpz2zdAt3S9NzDD-V*^9O6|0Rud{VjsO_(2BriiScC&!*P z>p@ z*RR#5{YY|xM8>WE4?P45iWfA!dBYPFbY_Z-+L?m~A3Sq=v&^K4VeR`aOLJ~^A0M9j ze+#yh!Y#G2c~k%IPpZS5c|XO&$`zWVFU7kCG4#It|KHHv>Ejl&f6Cq)HyNd*yx6#T z@_n}CCoLp)fq+#Hs!l$PvR#=#(r}v{s?#a(z`y~W>H0%{TSt`TC+&6w~Z+ys~ zwXxB_H#&O5k|PuL9CCJO2o0YO8gjC>R^Dd9sQBM#feOcy5GS8q2PZ!eX!`&E|MV#- zAz^9PuBjQ9c61x7s83!lDw`lC#Nw@-nJE$r4Nw(6H#fJuJh8mI75Njh#qLaAH^X^; z#BnnY^N^V{{;{&0SiU@ak|3A7{lBtuaZ%x9XQ>&A5y}r%CVdFE+Hp&M!hxUXPn>x0 z_3O&BcxZYS;GMuhGzKx-$|wK{{H`l1{*eP zR9wWnQp-`WrMD_^u4kjUQ{htPlrQYMs;1_%ckS|;EG6dY`AJLb&Y?p(8KI)@zOfXQ zHbmAbi02$p5iGfEVk#*s6LIE@hqpWv6AvpZPhH)TUvkz5td=zL7>G;%|9}3&KU=SG zXyK;P7r+1CrcFf&B0?F8Kc+M8{mbY&VH<~8%htb*r6OPT<>fE$sodu1_(@0S$hPA- zIW4iVu3x|YG)NNHJ``;5z$+p`a)z6t$e;iJ?KL{C-ks~}*B23AP-G+`YM7EPymiYA z1qVH?4_h1zxu?q82UK}NgEn(QSJ$RQ6OISG{uP@X5*I96V$bv9D*HM!&8ntmWl71t zo153JT2;c?r)+3w_~PY9RrS+9e|K-*w8&CAdoq)+KsICZ)`|nc2@W@SPI$IY{5ZdX z!MV_|IznRh4vU9h4jwsTP^#q|DS36*`htczMjbN~6H`(iJaoLG1GQhIR!67j&>&Q z{r~-|w89b}{`a2tXJ74a`)i#hX=!Y=wQcj{*xcP?KDr7X&~f?@Za8NSPfkupZ#mS3 zDtv)~WqI%H|Nr}&+Sb?nCpjUZA-?#_e|ur&$MH87ZtgMYURW(?Fp1+1XGAdDr!s!F zGe-^{_%MIs$NjTs@0dCB@5jgMudl1!Q~!VU@&5gP|NWic+36TG>GCnR6Zhi}9dwL{ znKO0bL`L_Y{r0~)JFAmK{u~x?>EPh{r>~W?ZNrcKxwr2tyZ^hphjZ`Rc{67^#>_c* zWs0J~goCHM9j7ch$(+QvXKI%wAA3y=Py9ZGbDJPpb<12^&@5}~{G{yTJd45$i{JfS zugEpm#pJx>k)+T91Lw9iKPyg7m@mxtyk{%7XOV5*Ud4M^Dv#O4d;YAgh}qzw+4<9< ziG#6AkP{?c@yt7UrUSsRK8l$zv*M=?C*Upj8cnZr*F-bV>%Z2TW*zrW2hs` zxxYqRCf*Vjo4zY}nnc^@iRalsL)b}F=#5B}Fbxx9Mf_ale8 z1=yC9G?-3Zuv?)1SA#Mi9~&QESWM*EW4gue<{qNq#as_xCwUmH7Ps|35h;#ldRsOwLZ;l}Qo--M#-;oa9K9 zWou^CG(E8M#L^S>A%a^X5*sG&Ffu$Sk$T`Ri(_7_&}6j_D%>f0o356y9tlxB@!-0? zh$hr*mA?A_|LT8#;}=Vrz%4&xL#X5jr547QPx9Gvc-Z!=4Rn)Wp6TK(#v@_q5_Hp{ zMrLw~#k!EzYabocE=xb~664u0rP1>Fr~mecSBGn)8^R6UU;qE_kAn>UG8@9wmEXJ) zR@?IB;DLr3MKZooZ>(GPq{hs+(>F&5~04#LQy|7QH?KU(mjU%O^j&zTjdonRQNgWKKfY1gW!6mh!MYaVbT-lp`KWAGT?IQebCZ=%(SfJZS^31q>-75(PKS^8-Yn?ujh^ z@R+?sfM@3)2R-8pzcdBdZrz<}5SjG1>C~~Qb_HAAI{TxZ%<-+UY>bzTl9JV7xOb~z zIoqF;Oicg&I@|w*tN=JP%i>>hPga7j%c) zoco4N-@#92hHr?)@Bj5}+w&#&Es{G48qoaz-^o+PzW!h9UFCioo35>_U3adhHk`d< zw1BU}aKWZRR;9E@Kl*#S=XiDWcn4iIm@uz_P18Z!?89j%iTry8HUIk-@j%)t+Lr$g z+?H9P8(gtoG;XTFg3{E7lYe_WGc+>zqrU4&!T}9q34=w9{tp{hA2?wDr*(gEcl_A| z7M9)XBd$6#q&Z{ru%@(grUKi7*}tCiAAWwG z=g*%b^QQT$9Ef`P=l}T+?_>|pwSI9G-X8byskg8Bp{~T#pQCnTh2=Fi%{xMFds5kU zc~7u>fA8;1wsH08_t|NsAQkj?*}_}KgZg>7mIy*hsn z@AYOC|1QsbI9_ep|IO+9Z|?f~`1Ex9jW1nq*L0WOwp(_ho=5oKLaEkFyEz}_ug+;? z)Q>k>6LWGy3g_Wwc9rdV8vPiW5_{Py zyqfZN{%e=ow?j!tO1@wIf5n#<;>O0!`|PYQ`7PhSLR08+){|WQ8J!I0)3nxjid}AH z=CQDl`0+zR&i+{4Oh!kCtW!V!&wu!@b_#2g0L)`D_5Y3bRDKqJtKw(kl6JMx^}ejTMua>P#Y{=GN5kJbJ|AV%R%k=#J@7!H-W^zWND$6t%XSY*7K0ZD_Mf1I!{lB?WkLv56 z?^%1`N4uBO>3{BC<}BWVYtjzgwfAV;X676sa(Ht(&+4#+I(H9qb>vS^xccLZ=Hb`Z zBl13hyzu|Olef(On>YXe{mstlSi#wt;i>Sp$z?UGz)F_#$fl0v|Ns9#H}`kp+gtit zT5B>h@7!3#8?0{=nc&MiLzVs76$6$@tSo>2pBIonf8f}$4KdXEA#oC|qgzug`+YQqO33mra+Ah?{{R2~{blzw>FN2u@!UmY0|TYliyb!K zzscX`VBd84-~XSV>pwnP-`gv1U-RSojT7%*ojP`=$7UA;`-zPmHerr`0@$BLToYhE zbcn~$5VTGu=gx@(M~)ce%l))o`|t;c@=9mBAb5aJZH=GcWpnTD+}aexo!r7UgHxetRP? zEL>#$*nUkiQ)+_9Ck{b3H?|vdbu@Ud{{R2K_V>5%7Z%DdT`I5U_vg>2r~Ci^mpnKB z|ItHD`)#{#wjEgGXrL{Xa@5&6)xf~u&;RoUk622!Bp>Hvv$t;xT^$hi>EKjB`)!SD zx*HoGo|A10U%w$6T6Ah!Mnp(NME?Kx?{9xEuTj}Q{;(p4O*(t#IB4qrbIzaN%(ue1 z&(106{D;TK|35vw-+$((uaA!27jQQ)GCFa}Xm8C=_b91(Ru9iAo=IwEbf4I;+DX#x z!Eye!^7jiC->Ux)S~qv$!hsF-_Ra0=&HL<{m#CaGUew{E{r|u59IK-r*p8Q)L!%os zAjA_B^WU!iAHVgAX2!)U8S`@u4rNKQxw(1FW?q{6@xA=v>+5;=<^EU}J-Pnq=>79^ ze(GwRSh4>6rza=xo9F-g|Nnf!&rkFH=l}EaJjh;oA&c?MD#o3epX8@@$L!?ME_lRp z_~qq*GA}M>dHKW7&hm7N8^`QFmymQQA>+h?B}WdNKY!rt*#oCfA2{6n{)c|d($imG zM|3HH`sR>UQ~iIVn&034UtV7CU6YmXU_G@tYe5LCX$LPUhKPZ>*tZ)X{Cs!DW<@Wwc~@^8Vxe;-C`pws{-B ze1RBfnhWZ*e+D+Srw$zY#J_UR5B7L_9(|{P@ETBM{Qq6a~zrXui`|ZvB!|nC@ z@&Ae+9eK~r|BsWyKuW@ZoqhA}of|nh4VNwdpOD;~_w?Dbw3=^k);BR7k$Ixj$e5p9 zVo~DU!SesV@td2>vR9`~OMCDz@xryV3)j*L*{?9i5Ur`|;D;v3Z(G6y%8!j#qk`fYmcYhr|+&n|1?&~Xg>D%=; za-YxWY+zItoZ2Q;Rqul!lQxRHO&y9RNU3ClDd zzr9^=ZM}be#K+g)^Vt{~-8ebjyuHJurNfGg#cFHQ^z_&~YU=;}d4El}MZ2VQ=8T#D z|Ns2l-^%(f<;fbl{)=pP$zY3H5MtzEM)@;o|bywrx#Umyd_L+P~z4gde}Z|DUX0AHT0<8rPgT zAWI60|EDJ%nx!D&R<< z>V5N5gpI9D-Ctr|+|lqNNZIftW;fRX1)egmy0k!oPXc1>>4;+yFYPt0_Fk?gZPsDN*2kLu-}oBjTX zi80KWD70t(`A;q@zAn_QnHh5TN5`by3m7tZ4fnaug@m}g;@=+=AO8IO|HDW3C8E+7 zRyav%wJMzmkvXRO?f=BBHNU>R|MYafyq!bt!^g=F9{kYP;@Zyfdc|uFUr7N_jK05r zVaD%&a}OS2Iqj9aMU0J^*(>TG*MpZYzvpLcZ34|PffAj*<-h;w6`!8Sui5g0UH)a$ zRGA3}d0tFlYhir-T4}e?Ba@7kp#IH|kN2g`{~eA!cKGm$b>|M8`}h97eaHNb-(ubM z5>iqsev4gvFkxESgJ)?!{z`jqX5OjN&|+UxB)|4~6WhOksXNMgA6{0kIR^>NH**d7 z8VopGz2+@%Fzi^R^ z3}`-Y`h*|+yT7NJL@sFf|6j81Z&JlCk&FF5KD9df^{qI%&aZ#dHMhUj-}6MtmS2E``^>^KW_iO@*f}Se=Iy{UuWRq zVbIlSSWuwwzD|NQ02H=gwGYp=?qId}^oeK98WG>K$xX?}Ij(3TQoTN?lH9RF;@=;~ z)%X4#K7Me*v)d995+CG`{GZ?0`0#yvTetZCzdt_8&*PslQ&L1k;@{uj|DT_)_gc4G zYTbkD`k>{idG}m8Hf#z?yf1z0!}Yn=EyAD#@c+M4zl@q%+kAV;ef7sLeM@XOd(PNE z;g$>!56_9mFFYQL1-SqDaqz?2+xxSx3vi{RO5V8f&)X}ga`F8S3=@7-e`HF{>FVuu zbZFRO%Qb^xr|zeh!iR5f?_mB=2r4!}>C0bcf4%XY{pSzx?zWRzE7>Ua{e)kWwqx;y z>+ApX$^GenbH~13LPb?FB64Lx{M8T46Re(IRA#FX?wvZ-F;Fo6l!G_diCQre7JH9( zkKJ`<%Nv3W{{P?U{GR{+jrUZu=dMaHF_`$3i(Pu_vs<3B$R z9ysc{{I-h0!TI)}^@STY7#J8juD%$)USeNetBy*7fUxtOwOudEts2DjCGPEaeVd#**nd!1GmvrRSL z-4>*z?=_XF*RAC$N9Dgo0`~>YCZBz$GI^(f`}Mb5uPU{r$*XOZ+{?bjOlX&qkMO?p zHquXz+h5pb!h5)Z!_{%#V$)R5(1*KL2k3C$R^#YUdNf1DQS+GK`Q2M3_bq+DGJ8`F zzq%D);*NbB)4S(Pb~z`r?xIQU)Y1rWakOQ&Enl+%PpiEulbGIt1LqVYA}lqgpA_0? zXuLr{lUaDd1;v0Xr&jIy!1qA@W#qKKIf{=h7{jl=edEm}rm6I8LGIz5{RLB%d^No% zwPf4=5?NIH{$=$t&i9gRYmQ1xV_kY-mOxAW!onjGyZQt&q;5^yrFL4QE84l9>G!!^ zNpC%`RWwh2p1$_q&gTLj)y@_goVRGdw*SkG4$wlL|NpnlT+7w!Akb=WIEz(K$F8C5 z;2Y`udykkongS1Zom_NK;Sg{2M|O9e@AC{251o^|VQYPO;?%`DCqH|ioI7)cfKvYK z7_l;M)`HkW0?8dp2R(mrUYlBZ?DLB3Ez2@zsO{=owW}xLTa?XQhWp!Bi+Bkh7h#Qb z6PU2<$G`YGnd1v{CE1RBe6TH7rE`bPz6W{tZ)fn-{=e~CQXU*|Yvyj`Y%maUk#CAT zESRGcqq2Et`_xwD9SyEJtg0MhW}1sy7iEYjS3JEs|M&Nu&Gp7IP6@ilWb$?NFP@ux zx$tM79@_o7LPWToFMAy*a7Ag0?! zzqk)OTo0VK{8+w>@%cEd*HuB^Cv2Ryubwlx95hT0o=>m)n`BYWr@C^{RkecXi&Z;k zylm^`P0irnq%ya&^Wpb=HcrmA{q+m4+NeIbZJxoovt0l1-Q7I;aRzfN4iry97OS33yN&H*YcEW?r+4;pMW-dk*EBl%Ri_)&fR#w5JGiP{g zYV7PbiBy&Yt~8jJdrfMoo=T!$_h+Bm_dedA zz0gJTrIk(prX?CPmgSa7I6VLSTwjgF>4Hg$WTw}o2fvSeR1>jVviZjDn$U~iuYA22 zzP3}T>40pLYV+qS1&`kIc};wt-@TcMf#LuEDKl;PTmuAJ?WgV#?42fhqlq_SdFl!c z7sZR&d5%tt1V3JmIjQYYQ8}+**GZG$nwgf9i+PV5C_I(>7W1hv@}HNc39lkwi0|Z8 zi{3|yE#o}B%XnvcZgpww%ygb+lcnokS}p3>2dMR5 zxbo|<9XWV%;o|7O4A1MD@4r%87aPl>*`dc*re&ql(a4vX>zZBrSUT^)#0kI8{jId* zS$O-7)Tv#w6;HJit2RkprlB7soi#Y|1zdJajfE6yYs$h@UFgHYNrL5 zH2Xhoxw+`J*>hvZ120>i?=>o18`yQcdE3)6<@5hnNUmJAFq6Z1Qj3LJS3&Hej>0Fh z*6*i2oO=1lSFb<=9`4l{4G)){vzisZzw7rDqn#I79y<6Jg*n-S8lPHb+Kcc>t&0%0!=pR zo;&W|cpI@+qW`4P$z$;xnk_yDmnrgmw22YFeas|&*OBMD*XKSh$e*;~zVPum=U@D4 zU+dxQ9n8~ww8b_uE;PnvE>)NN6{#m|c%GUSBvsdn$ zGU5BRc*FY|LDyyY>fcCKgJY^??q04#1|qHY>Ki>2SA>3GcQ=oX;j(iqzkhyu#=>ar zA52`YUOZaV5!IEs@X>{UtzVz>D5mWGAehJZeZu*IB^%GY7FGPT_SoxF*5ZM!o^&HT4wnvptJzPH{KITN2L z)#kX=X>ntw(M$t5ZGqtET`}kG-w14(QnH(e&1bpJ^c{MAhu<#ylz3l+Wr|mY+-lLe zeA{|cA6E9bt%^AOplD&(;}WOqmNFg5OOAYxTtD;dr;SI-WBs*>Z9$q4;<&us_i>paVDwpv4U#QrT4{l$+LFo zIc>N(VcDANEr~BH<}KCWIGEA0EOfbszW%O#pD%csbNss?TEGr2CFa<2Hx&pRtM}B+ z>=sxU{osniLlMm@cf{XHTd+KQB<8Es!F%>%(xLl~N2I32>Au|dc*pdI59jTcey45G z)ueEsQ10hE&v%=1YgSE-5Zf%d`|fF{YO6qzHP?NXzp$EjyuxRa0?S{!M342ypB}2~ zUsYsxecxK~y{nue)9j8LTqyMszP(P}Wx?&gkGb?bO>a(D?Pfi#x~}_qp^eaS7RT=D zX%}B^xN9Wy=y!zJb?wE1&5TY9GF=3_4n1rTSn|@^-b-k;=wwglC;J%+&7WG%_C37$ zqI?l}q4uHcOU%sEBlgZ}oGfhklA+LK$-5Tg3pX-4nIA2iBJ$;6jhh>roLq)=j9=fw zlfrC*f}oye+@6F7kCQ8YPI~yieoW&04qJyR($HTB1znT{?kj7IT)|61?j)Y0L2_Kauh zRGzrF8_$CkbdA@1{v^<%_VvB|;kD5`l9D{(>n1#igtW-l%vR)Uao}-{_iA!-x)AYZ zhGD`74wmE2#gak>9C?lgEQh-!%WiI%AQ*jh$tvI9zh`grFE!!$VjE?>v3GObx8vUf zyLK(h+;=YW``lE=meN$2k2cS4_Sxr`l%C`fT-1@Pd|Jc%(i8;+&$_1becWuLM#fZCVS zE&lyUw5XhP@DNiL2o$!s^g;i+}%8Eh;7`noez;EnRy;X6MJo z&M4^lB`EY=V@Uel&d(#RXHfI&$c9Ye5|wA) za)oaB@bK{H=zw^O-=x3=)|QzY zIa?e=T;%n1BqTYw#5VYHh$SQ_vXvz(iYcgN<*gB&F=K~XQ%u3EHPz)3cU~vh9X2@g zzxImGVx9B8hYFel19=zUxoP%l)~ZFXXDlnu|Izn)(|Z>|FGUtlr&&s>`}`&B!hYvJ zlRUmK_ht38)@6>_Thd|$4++hDe`o3>g#({8xE@I?eZ&9D|L)~ioa=T~$@C~!o-?qS z_8DAFE}34g$(g+0%0>z3Y=I5ycLPIc3|E2gQQ^CmB|R?3N1;dD%x^(}u^;gM~E zt|F}WkDCOBHYKzd)%%IMyUu&Zw0x<4vh(~}|Im%=QY3v38w7~FunKL}QRX;fJj?C+ zLyHKpb>0$-Uaxqi+PqW#M&I6c@8>rropzce;JRP1PZzYq<^TUBvvs)|90XeTCkdA& z&OW-y{DaHAgUS`UyU!+Vh~$~ZdRXd+t1g@1|621I4?pg0oVhc<;kn@PB`*ut%t<}P zYTuA}Kw^#R`An%}6BjvnwH%OWYA`rrxkat(MfDV?Q`fh?E%9FI{FhTF`(y2@u62hk zPYEfm$X>>|^xF3qm2R)UCVkIYx!aVHUtrT$uiKMUQf^KU%x45;!zHkGr{jxv3VKVE z`ClxoYQ1-yo1^!_tAOQQKf({@pPS%tsG8;8@{3!KS*}@IZ_!uud7r$Tb=N63~^oC^Mi-AR~I((s}21SY3GkXp-?gW1g>1 z9_vbjQ<>7Wxl5>^HdT&2F%aeY7(U-XwE@|pH#@On`G<>>ne2eVm+!?Pn z&T4qcI_Zy-qQmBi2?~o>{`@^1v^jHg`bp?o6eY95n*Zl6Ts-*UIsdt5{B=hh3cnp? zzTni$7+tbxsuIg&(`6RRes1|Jvh49mhw^s@YhpZ|;-!S4mM50J*;&kEoSx(<`Qz^a zhtms=$md4PZNI;D$0Sxg?Fln>G%KtJkQS? z=iGF9`6~&Fnmr)eLKXwaDG9*weDdj5FKmpzyLEfBkFjDQTJ-FIZ+{qse@^ zX3edO5*hmrKdt+-?ScFLdgCu&4Q;C3(!5$h>*qo4(JlP*`*^_zg~NL)BecYoq?Kl5 zvg*$0;8b|zEX>qVD3NA#=+w~?v%1fmRg8LlcTOMR@o(DaV77ek+=q88g~DncLObnB zmCP@u>+^7n8@+gCWMOG&uwdQ4q$Atdq$-{I9?W2uYnaKoA+7trMUmDVZ=ZaQNPXp3 zLh2mSmzU2_%3$bcN+~?!a`-@lDr>?ro{CAgwsa<5R%81yas3pKk3sHCFaNMPooDOT z3wMv5ko*0;zkucO%-6^H|1DmpoS1MxD4pj9lhkFINep4p9~j)0wCcM=^}eh)qtdim zrfGHik{<1FiD&22E539{%ok(>nfd>}QgKLQXQyNQ{KR`UY?+xg$}f0$UgVfeV*LKC z#)d&x;9?R}_{_!}&3PHeq}Km9esIEHrwb8lo@$4&%}Z)#T+H$Fdp~IZ+^?#(x3@j# z%{mTU;jt*=#L3g$3)lBMM$BkzWULlSOgJ#fXv4Xafio;5K1`E5amr&W({yV|!>}ul zwln!ZN^tPy<*}H=%AB>a(d_)Er`*o*@`wNYRLSvHgwB2}(kL`AHa>F1z{JF$p!l(j z#c``Xzm#7GLMI8wS5D^LAeJ`cDf=cPC1Lp})s8uvI48|G%BIr!=jY)K35?F^>Wx>s zmOvwE(F+%$6AW+n*Ylh`%M%mBBDH)*P==znn`&>oO!ixcnnefK^mA-0Du`@fz4eGg z$)*GcEwc&F-zhfDHZ$~@;qY|I!2{3fnzY|BhedIsPi`%qiCJM+psAb^iZ1)`>e~P^k2% z!BPw6ymKnb?Ci~be23>)@=W0V%VB0MQLselQS`DMLhQ`j*7$UrvCq1iqTcs^MXYp; zE{}z{CCHlwnL>v@J>}t-pY*`=5oCyL(KDVc`|O;HjhlB`PWzg0M4gZAQIeo;Scj+l z5;eZ0TQU_s$0jQjT=jC9+3FQ<&2x?o+;piH9jEx&dfuJ9yBm}TbRrzK z#%YH^&H8ss)X%SNgF)M?tQQM+pK#@1-nLHXW^L4;PG(=0=^HD&L~L#qW;*(@L(9@_1&`Sd&obqS z-ezD^=d+_|3Cul-j}j6R5)u<9Oi1wXO5)&66%ccqQ`>FYnQ-k6OY)QxM;vt;c!F9ol!_93<5tEZ${kei428Ul>=4t0Q-c!^0^b6lEP^g1W1So$5ZeZ%^ zu~k;G#m2H(T7q^F%F4dz^AI_nc*P<@S}9{;|MDyaW`Ue^f#AP?kDKwFYIt>Mf4!vN z{InZ;XFt4W+je>R!sB{;pg}Fr;Yu@(Fm9bKxNB>SPmiNlm!ntL!DDR)kFZqTX5;5i z+QhbB(d1Obj$P6fQt~|(X}2!;G&IET2Q8oY@xA}y-|DtrX~{mB#EO4&9zHzmSR%{| zuA`64-OJhLAmB1z@6p`l5e*ZiZq>_&C9wZ%T$J>Qb$d-?T9XB9j>$>BGqd&27Eb4I zZQL3n>gzmDxGhC8)id_7%;hC(bhAB!djgMLx5@6jB(2w1*r#zx>uKSJbve?%&sDy< z*X3^4mEm~jFl(=(=ej%R{~JrP_+MLEvqN)H`LB8Fk|QPDZ-uSgc=$urqOT72U^kt) zyOFEKfalo9{T;1gQ2~Nq|K2}&_NcEoyLa@2L%e4X+BO_c<()Zc&HnfM`yOxKBKCdZ znzcG>Eb1R?lnX0=_vO%N-73)>^r(5M$~Q-jB`Sv6HTAnTG^x5SU9}`SZL3@MEwSvv zzIBV=OR`N_w2re;;^fR_2Q8n?%oGy4SY32XVp{9-hc=dcd&FlH+>Z?JbiQG>&U?;| h^3reRufaV=W`?;7RzxxUV7w1X)Sj+>F6*2UngG!74J-fv literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png new file mode 100644 index 0000000000000000000000000000000000000000..e1799a87c8542d7e515b6185d7e8f6f75fe73f3e GIT binary patch literal 19132 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3YW8$-45^rtlpw*nn1P*v zfq~)we<{|*4C?rpM<#3(WKiH~dGLL{Wq#sgmWfTX4&P^7t--ZIMJwyj-w6z7wy;_` zd*428|M9$>H$!w~n)n&p2D2IVF*GEok%MqUM`by^;t(Alh+?!#1H{S9SGsty!E;;hGD)6u#Nb>&AZ|HVY z;(fjO-4}->d|2eZ%AJ#Zw%Kx-XZu6>L+js{%9y&%OW&^}lq~MF{FTdz>f=6V($db& z>=2ARXJEW2Ogk%g>4GWKjMs7h?khdI?V&}%ydCeq*?4ej)^0w{4Yn)Pk*mc(V7A!l z5?v#us*kV!|9`TPqxF@e$a3TA7j}t>iOzbQmaPu|(q%f#Siaobx>h-Bn(CR)tKZvB zly^7xJ!>GL^w!E%B>QzGM}w5|DyD$crA#58g@rGEo-#*O_2xDq<>g0?AN+mJ@YAko zbH46-TKQ$ymMQV&A654-g6x<2)Y&|D?#$NKnNz2F22BbM4fP0_67SaLA3ohZaAIW8 zq{hjg^_3sjFX?bkR$e6U$)eXcFCgK~e;%It|NsAg|Nj2}`T76v?*0A#)YSdY&;PIc z`|Er=fBih0pP&EP9=^Z-|NpEg&q+!@WJ;x!K>V%iXOl zIAqGiW@fqL9~Lo%KT=TWzWKvJ>|of%1uA?h2kYhS{_L;+|Gt_1f83rQ1=LqpE`ZIci_bH2OPZI-H%-gity{}oGL1%Izfb? zQ}Ig1v8zn31^@rIt1GP0Bnh|M#DlVo@>S;X~)P-`^$U z_9XoHnjH{s4mE!XDAK3Sog3=ze&c|{(c|uSPjrZ0+Le;jq{6y}mAPtW3jycO`JFxH785ngJ7PxfWZ^g!g9UDi z*H*FU{xf(ja%K}_Fk?|?)6A*~r`zf+rYu}CyZ-+LEh#oOwi9c3LH;^F-}uhHvm4TQ z58vI*<3FEooe~Qq9q4pAhfE3f_P%z^?c|*pr^rYLr>42RZ|!gW5OvFKS}omVdhKF2 zOUzV`O-WB2gVu0ztPF7oZd)4AIFW~EgAmg>=Sr%o~St6W*9dG1K#&0Q)Xeh;+F0#5O; z<@oYmnbJ7NMImbfx9E<{`iWhVY;0GKFe={Po4li__2D7bw&ddyda+4AJ|sTaZ49;H z!I6clw6gNn zeAm`94l7+Alqqg?3%s>TVzI*ihq>HaR6hN;Km7YU&vHLQ8QW6@pHvS2{mmn9&lhsW zz`(!&YT1S(ikc@EFHm^;e0gI2`jv|hpFPqN!(gCr)MVW$ChsGUSIJ(OwZdaleW8i> ze?`^4{Sn$~SM+$Qwl)QRQ23sj!0k3+5=WF)<>$7<*I{j&($N{UcFh9p00lKAR~Vjn@uLu)&mD7c^R~An4m5DW^Sm=-bfMC6{0LQ zYaNbpbh(-|m>l3;OyE6Q;>$cs_X<+*bKnVqILSMg6>ohuMF*i!g(ueNpzK zgrp=9@#ld-lX&>ezkRwiX-Z1lQqD6hALXN0Hmpwk$e;X5G~MXXGKPK*FVmJVg9N|R ziPG8bFHb3TE>K}j(UVHPZ`Z~vEx{*~@Z(eK!`IG|#q>s#|Mmg#Wjbo&dz}&+`sORCy^@gp zpS;aea>G>l`u|2Xe@__{s~zqX=9#X4c`FB`@LJ^lC?PHF$>YaYE=}s|+VtVmB{89- zw8X@e2EN!o?psv)Yv1CGPECe))|oB(N$UB_t9Wui`-Bx%G#VMXI?Rx}@2j@bq3${n;;;Yr!Sj!nvZ8Ig+tR zv+16UXvpeLAql{Q{TJ7Dc0`vq3(3dpLf6Zhv^&yiL|x!Yn6YM@i?yui0#~^wdDD zsQ>>LB|duaFwxv}s=rT3SwVo9Ktgh2V#`v_j`jjmqYLSm3QaivJq-BCzw>P4#0?jG z4o@!GaQ?w#KP!z)*0o)Za*D3Lwk_FWY**S&?62p!zu!1!_puE{s)zsmEfJ2hN{6$I@NxVR?`tKk>k4$!*VAmE4uCikv8AoY%#6?(gXX2O3HxL{=&@YaY8G z-gU_6dwq!J998zL#q8()aYsLCTv2Xt>}*Hwv_m%qSZ3b3yJu$NL#MXg<(WpZko>S_ z_D0?Y1p(Ll*N+_YlaglNSjZWBaP6Gau5WC)TNIL>hdyztvncxSSZqIeaptPTg3mVV zUSIj18>{_o-*-a+y(ukXbv$d0tQ{A&SNsa?JhV1_nzYrPV7o;Xl3Fhxd2%?Uc}O0s zc-yndLu)R(T}0BfuqtNd?xaZuyRMh&aG%?l`^NhC$Is$Vm_QA(MQM*7JV*!(7Ut*Q z-rK!dKal)=e;s=-xy(WBr0NM6;o?g9~R+%fY1^R=95yk3M(!%&8NPF1-2o|9`!YX2^F1Yvy?x ziBmP!Bs44)-BGks&B+h1M6X@~brk;p zU*rcWsIFa|+S^^s!)5g3d3t(EO3PB7430nl|I3TDet7YxJ!biV&n7_+8-;jJs2aEk zggr}E<96%W;ga^p{c4lY!4+~7iOFjGPx^l*JfT1tz> zOitaDiAf@=ZvwazQyh1cZDC?t#HO2IU~KCoWBM#0Wumh5SzaEGr!12!=J0+LD!Sx; z&3nNL1&!9NuM(4z49wcN-rYa{;hn5#+X=Xp0YQ^Gx;F8083_qINJvRZN=RsO(aKuP zcxgwn(<@&CLnEc_VuFe7Yd@bn(6F@WKuG^Jivz2;^VHVmsYgj3YY62Hn%MA6ZpFzD zZQQfGB*f!ZI4te(RGPt)a^T33grq~DHq`m~JiosMCQjpn!b@R##x_F=-6jm5^4R(qQ{u>EyrTZb=he zb>9lC4b0gkVE604{4CW4S9P)s8PL?Xx?&KPPxgo(N^LIro5-jXCIO;d!j+7dwJ@q2J4FkhK4g{8`u0l_u;kp;f={6YfT}o zCnfvB51)%!*rnawW}Q6AX<&Hpvz1D!f#UPJh@L~*B2T2+8Fd>9MAv52`t_`0iTox% zNu;Rc-|>ZVavuwX{7ZY)?zbiG;8cy1-JfiH3%>kwh2}2 zHc$!;IuUfjmv_xy{~fVMnAdc&S;{7eDQ4(778(RaHf>&^5Hw-NmxCuyCZr$#@QnBH z@qQljya|u;Rp2(gc*X6bJ%B_v6I~TZ`>9HBUY@R>=;N7_o4>GrH z&zFqbuXa2cVo{QkGrzfcwNFoTYpaa7xbd162a^j4n#>bj5;$2cbrvwJj68neL`P7_ z0fr@#zvWAPWO);=iJUm~FL^?RNHUYnQ+A`yXQn1*lL|Zbuzb}oTH?fVamfVMiz$Y@ zj$%`$q;zyA8yF=eq#b)}e+Octl4W5*(WWEEqksL%;^x?}u(F3SY^Q?Zj08{iz!eU+ z6?;B>X_=!VVAooMIxi5;kS>iNH-tMDpQ zMxmR)gH@3QD^m_k)48yt?GSVJDTirCLqiii*?A-c`r`O!U=^@&z@BYV$fmxC#qFFJ^ur=W!WN<4z1Drw^%8qVdV_Q*-}1^YqSbAo0*$>17da=WDCDp z%Nz5uErIvYfd;*fQ1MNTH8nhmi9D-T@#q}k&4+qItMJ6>?y9OR4(`oL?&?9_Oo#rp z&q7UT^D=_uuyr$2yWSTsWMBupw-P#39 zg?uzUeuwz>9=tj8#xzZE>8v!z;>XXMu`ySAdR=GElr&f*78iWL{rJUH!)t$&mhsrA zvdru0TYcjImxq^O1f!>K$QsU8p5g*CSIG^k+-yBY%)WCP8yy`Q4jyeicwt zxuT_|t`QLu8$=~;TxQvJrC}#)BV%+bqw9yxBlh+GYJYwC{P?(h{l7oXvtm`2xi;x| za69-&HpH#{^ZPicbUohBbASKhl`3yM{-m-oyBRWzr#Scq)zt8qnei-J)^hv`Gy*Od z35q@2ye&*bG_|>sVIJRvofE8D1v3|lGe5gx^G=!JV)C#1`~Tb5|Jzsa@aDh2)8MQ z|Ns9u_~F^v{q6k!@9h5m{N&{Q@%#TJr~O;M=+J}Lr&Saj-e~a8GCCtFCtw<}{OJVW zfbjGS*V8S^c#L}&fdYl$|NleNEg~Xk-nk>v+j}%Y{fzOVzBq$JNv)h=(;U|P=Z`%9#^&hA$IZR+LjiAn zeQI1>+7c^1@dH;o4n{dAhB^xvJPK=@Wc2S(;*P4X=VzJTH_!c5`1x77}4G$WH^)53>GINOqsf4IpCPJ4%@TU1n9`uI*g zawH-!yRqKD{(xkLV9%Xm=X1g{I+=U_{(pYn{&>Iri&s_^kB&U%m*1!S{!7*OcgMHo ze)pSKQ}*G(;(d13a&lr7%rgUO9y75lmN&M2Xe6`wgF{FC|9|zDm&xC~JOBJ#=bKx9 z{4_p%b@l)Jdw=R*U;FPpT_QMGQdPBcmR;(R84KLCB`zGf=*8C|e6R(66{3&wqda z_nuz&{>`77mqOn>etd3!`0efg%gg@Gw)uJg^71D+ng?#BJ5F;7W1jx>Ny>+hi4&$D zPX~>2{{Meyrp1jLGuEuVnwl~rFgzq-8PA!u?>fvhgHDTV_V1pTBxh3*9=`qcmzV1H z^^fkD|NC!$`1W>tKG~{ES4ti|J}$rF`pgHK_7WR3orC$;vKojyHs|F}`1ASs|7&aS zi|hO-dV4GXPqyX#J&7~y|NpCd@y zL=>*iOH|?hQp4{rzrp0=bjHOQj!%`icseZa@Bcr``umkm;r_b6N(GA!?63bHzwb}k zhXZE^Ss9zXJ)5KBoHj^@;?PWsoSZABrjZl8Ikq%%dDydVzR@sU z>_!)FjO&U2-qYvlMwdN2ExzcTz>m+*|1Wl*FRr_*@c+N#`tfy%N5qO|IWYeZl>Be_ zXdYwvm&Z&elqESnot$jX&R27P^Yh=|-@8bD{cnHx?QI_IFaw(!k1aN4uHPG;wXVKDPk&`QW^TviWyDdx%dv6?&d39i#PJq_3{q_I#;%eUg{Oo_E zjf?qkzkI#C{l85)H{UMz-~Z_B@uG;w0%-5)e#=(Xw{L%5blYXX&D?XP zVJ6=OF$n>oz^^vd-wMyqyY0d2v*?EneEDU1cwH<{{(Yg31-_ys?q}{QP<~Gn<45-`|6Vr>#m}+-zo_ zpv1oO)B&B(7Z?oB3Z7ZT6)fg$dE~^2f*&9Ht;_4`|M}G1WnumQ&+yKUg9XocIHNXf zGygb6EW=mW*LRLKbR6i=B#R?QcdlJ?#Nd$ly$1)PoD`JgG*kbyF05Z68}+5>$(NVv z_xDdOF1olrzJ7a7<(CHs({J=fu?Vs(II7L8XnG{v6IAY=KXRnt(UJb_>nCiwU%riQ z3tsMVxvA7aHaxR&*~FPM4_=$rI9;3)zvHSmhKGy~3)H7<&)>hN^7GsK_C-SM%*>!Uk@t6g);#CilcqR>uV5<6 z4llu^E@74*A0Pj3<+iu4QTXq6bAP?0oUPKrn~aPVv!r(H(k)-!8=rqaC`$XmeEp-$ z4<_eE?BwytWU_NTe)#Z$wPz0;Jow?QbXOqMxB8YYE)^{;Hx#zdPtaD}ASS)zmEa-) z;mh@Wd|g7vkN5ANXZQEp-{10g1kX2^%OBgte%P>bWg$0*!6I3;{#&OV z_DQfV-I9>v@_I+o+av#6jVH{XAL`iH|73y#6DYoZeYri`+&^sit3|Bs)N_B9EA{?7h0%hkWXWvx34W6{ILnJJ1VTU%Qj zXG_=ql7skfb=Zt0ODrDL9kZAubMeiAYugT-a?M_sl)wGd$wF&r>jSUYU-;sCLiv$$TU(nK6@E(>G zRQS@;+(JX;3=P#P794ngw6R^}*wQ;Q&UXKoaDFx;IX>gf{xfHIK7NdP^X5tW$`%%O zdwXp+t^@JD3!c_(xx&Eca?OIn`Nt0l6O)L#ze@XW7?`C6gsVrJGcyb723R<)OA-;4 zOi2xdj{Q4nTC%WOva()$^j@Di>Ek(jw`2c%dOnDJ;NSVZ@#4gv-3Q&Y?E|8g9XxpI z*RLnrZ6YEir%v7Z@6S(f7ZIiDtt?5i0zNjY-u3V@I(fn%_|gLpFQs^W=6817jtaWH z`({F#cTOsO|Nl$w*lBs=hJf+Q71v)L;^z1G)v%mx&0N7Vr&ui=7d*XE&%^V7%NCZ) zm$ybnp0sT9^68n|-|rebS32{gVbqmHiaw4pp`2+_Ne2|UO~c~mc}C6hjG86WZ(v}s zVue9V`{d=KvJS^~82na?U{qFS78kE-7J*3pRt#v?FFEO#xY)8MPgULBG(5bEEv&oi{{4}@-CB2IBL}a^qHeJ>Q$F$^_9;p2 z>G>ZTG9xCY<3?@=V@s>#s#PZ>A0FVk5X79O&N{E^!K;gY@1cp&Q)|+sH#dqzKkY3m zI2Lcwxp=|>AF&UoUhtdg{;#N*k$z@_sGwrfd^sTjk(8-ZpPHD9KX{gQsKX}4lxbbj zQ~NfBgSVc@>)cd)V`PwEW|A^-y87A=32A8^{mcC}Hb#0Ryxo`NxAnw{1szcPDjMd_ z_4Mju-7N6$zx-NT#{H8d?>+d@E%JIt+nx>60-TlxCS_1y{lb^P$*2QN=PdhC3NGuOw5M^KQbwbdg0%|w<0 zRo0kLo;jY^U8=Qk;iOHQ-t0)`Gv1l- zUE!S*lW}6ARJZG?v%Qwe&1X+{v;Hp-Gd6b4)I7YWl4sJSC*>v^+W6y+P?l;*y2b>+@oT?GXONiO{JR;aptlV@gTez%Inzsu26 z{>+*GckW1tibg&=pB}OE)A`58BOW<>b|zj_V$;_4`5(ygfLS8qwRlW-`oI6_51zP8 zgyk~6w6taO?f?J#&%b~3d-_T1U~`{>%}_S)y<@)fYvF)F-DyxmK_6g&H4=POl zi6>q%pL+Hv>B}df@1KNze|~=d{r!4L$^V--8X6fIKIoJCwBv{1L{0u|;pg(okL{Bm z|37xD=j_@4imNks?8vaQ@(ViE(ER>>pb%tKf6d&DTn7|*T;@->`Tu`V;U?LZf{Rz2 zGTAs8UEj?+;(1uEdVW_m`^EkhU7r|Lm7G#qb8K4Xd$;r`a;9Cc`5#iEZu~f*K1*v8 z(ZNAqZ!TTo|qPPn{1Bw6tJE`>5|NnPTn4S4>JsVq^o7i%{Wy%mRMj=uQ)|LT6TeODI_I>?Zj z_{aU#?n5_wcrIL?_uzQ_LoKI(7;fVh#*-#bcBCAb>r${(=+mNU3q%a3@?E>cWZB5a zaA2y!1BTUazQ5O)y%HJ(dY1ocezWBl8yFZktTXzz(X=ZGJ|35 zhXYY92}*21x8y616sn3(3X;3_>}>+e|Np`ofp)b{nxKVc4FCT-y<7XI`a7R-p#m5G zl7svnKL1^{{1o2uu{qgBm_QK+s)b~p|M)-u;bZqH&dZ^WlVqv>|Ia)}k^hsyi~18Q*mP3=xXPNP9=P_&;oi2! z`bg$IR}MxMCHRV+C_HwcLG@avr%45u+$Qe+gG>sZjFN(!7tCHj3w%$R`F4MQ{rRb{ zuP+>wFLo=vu{84G$Htg6hd!tF_^!^d%?m7f!<(5`b6or4$fGpl@kbFGma7V<{~a#) zpTIc7lSR9!x&FVzzB*S!X!F!lX1@LZy6^A!_4Qx>|NsBr|MZsk!A-GmPjz%jOz;x_ zp}Uf2iHXI6eGal>zxxlW+0{Nc|CGnY#q!#gqYbOLckbs^XXjSfx}~ANt@hWE56q?i zq#>!$Qzm}jpSthw`uErv?@4A_>GjX~G)qM_Q}%b!nWr00Da^2t2xykD^fg%UER8K^ zV#}F?1F<;*KD;x!Bn`eSS}7CLuxGQ5!Y;cy7k?ROIC{$5-~Yez@2~Uw>(`t)qfr}v z=5$kFesJ1>i=K|sHx9fsb7Gdu_}HT3?V~?sfzg?VAxRes7(N(1yKmnXzW&1YZ;-JG zCr_FEb$@IB{P^B4pL6F<$3E>fiH!2M9p0TgAlbvC5z0Kzk*~(@s9RF%0#}=57KTfi zZf008Y+3dEIDcF9_Y3EbLreSBVgK&!{QUd#^ZGk?{wJ>9AF%A%!@XfWQY@GB5)%>< z9C9lYa_fu|CMP-gGR{!i@u>6154B~2R*xpJx;*)u-WI)G!fvtLOAbhAtq%Klf6vd? zXJ^+xdURnUZ_eu~344?OP+b#DBGFQ1;yU+(`uE7LGR zg`16y&Er4o+GM8G1J7C?s@orAR9<)B@BjG^pPr7NI`#aMClODd&fL1?L3Z@TU+o#2 z3zvvVv&@#|*&%%SKtuX@iBqQ~=h-Aqn4WH6s95&UaAItjPb?1)kAA$7O>LJR)JlJu z_xJzTetKe_*2>hMd?oeRgk5cojg5^H1$Tb!V%rd|*VDsufB*m5e}A^OvCWGXz1dUBUWUjKG*4PNm zFY>qleSQ7^_V)UJe;Uu+{h!<*ySrhFppc@g$)P%(<7=bs!`A*fa_pFd`0>+cdR9J_ zzZ51O^XNnF*BM&>6IcA{j=q^NQ?ijkOpJ}6zwQ2h$!)pH4~1Ai9r*vB=l;ILm&c7@ zYnz<>W$OR^ss8xr`|0Ts!Uy9+4*vJp_*Fhq)sXYW@&5gj)%RC_dg31=vwoePc2-H! z-S=O_XDckTSQpZ2dh~#;(Tj$?w#~DpK?}@gn+xx2Y`qc~n9|F`!*hO~;ho*>OP@gV zl)udWdgD90zCORG{3YS<|Nr_qetdOd?6a6WUj6J=PF;|4=l}ozdNDr|uddQ>YVz^* zW!g~t>&If@nIGjZ`z+a8a%4&8!Asi?uv%D3PMRdKuRc9uOUJ`kp-GSDEm@`^b7$}A zg6DkGR3NGRkWuQdhllrDm;W0_m7B} z(b&P#7#Wh_%UEx4CBe6>*(%b3L0P$Zs;F~-z~Pz3JagyrxWv706@B1kSn%nd?VIJ9 zki30p)Anb_`~S<^|7*?hYb=ijm1c9fjyD)SV9cNI^etWK{8^&~u7~H_|DUe+_tu3E zAD^Dy-`mSHZ{CF5bv!G&F3xJt*gLh};G2DjqYbY=TS#;pLqbZ*gb4{AJ}DN(ZHY&@ z+UoyHEb~be5K?;e%OiwC^Xt-mVo|2I8^K*{hrE+f{D|pJ~ zTx;v>!qS**<018LSFQBv8ycYDd62&+S%4-Y3LoE}ZT^4BQppF0B|Nw*A27%Fcs!Q* zANYFue;yvt8cwHkRr|``oW1t zhxX0?{@A^umrwHdFLr6+tdvEXl*Vq20BqSuHr3nZn{rT~6eFw*5Be@I z|L-&B$KPjXL4o!E*Vq5Y`~S1DN}fF%$;5m!Cido&r!#NdkeH_T_uSdDAHHfIzQ3QR zrl#l38y=6qZqG%kef29IWo$iT!1DjUam;?c*z1})XAT@YwqO|uCFGyqP_BRYU^9<- zp242l)1WEA=jVCm+Z*56aqz(#o~s@k&N2$te`t94DfLTs*aL`T@60jeYH{Fko&Va5 zi`gvrcBEzn_j&{NFD^;R+8j|Fw>UOQ=}uw1d6PTmyCO@FjmLxfM{UXXSFX}_)ADD3 zx8bJz>w|H+zMo%O1#+$3tMTdakEzS*=I!5nCj3xfYeobIjoU71P|)CL41_Se5$o{mW`C`1ulKB-`dQ&jD9Zhbr4n zOxORvuQvI~v$Q||2#C4u_iJr#Zk#JyeY3$PvR`yh?dgI) z7W+1aL!#=?JPU;9+hsFj`%nur4x zLBVScHf(fU%w=}qEC0(iegAoQd?sYECNEW(CI8RZz@X;enGIE6_4)YP^y4KYBqZ!= zf90nf;V_raS5E%1U3Wv%{(4EhxE49tZ;<8ZO-&I~9GiaKx1ZGR{{L@s#ot*EAG_CN zLA|px{NwNPir}a6%wIyHT-Nd@D*K)|7~P#wTokbK`T75LRbRv>8O@j>@#f8pgmRC? zyc-uAx&?4E^BNc%*Zl43u*uDxqI)ib;pdgWHD9^YW;aiq`0#`v)As9-mj0oc7IrlW z7DX(H*Z%*XaD|~a@JB0`$HpK0(=sOH?{Z%tYrWtrleu~G4vWPN4F6aRA4N%IJUGtZ zmVJGJvF4gJJUKZx-m^M;b7in!UbK|+iOA>Y{D*IDX3^7tw9O99w3sndG9n^iS>?a~ z?)#d5*efefTm9DXR1atza9{oZ?U|S56>l_Ny}#c$BlGX{DS!HP9Tl?WFEVU0;Vx=E z^!`4NjLeJa3lo^OgfzKJGBY1OJ)KA1P9S0dXoJxI|A%fqOGrtn_}=&My!?^QnHv@c zas0N=Y1NFGAUJ)tUCodGFD}|wtd0ns>KG%lcCt~?YW|o9_wC!Z=P&%6IA;z|P>_YP z&}&ZrAIJAeNJyNUlUVV&tpU`r0LAi5a79zv-6;C<$^ZWy&M*J{caORHQNGeac3B&z z<+7VM|L5QN!EfiLt#n93Bsl?_xoh)A9`Pih?Kl>m$&`(|E2IDA6r{D$Ak4A*BAFNl^i&6;=^?}FabycJRyYU3;EgE;a#s43^ zd+_-21IzhPXAY~a70w3zaeh{C@bmMF{uPHni2)R4;YBrnP89qy>DoEtauv@CPS&fw z|Ns9_|M7*>@Y1LE@}P?1(xsAJf#Uo>zVlnHvdB1iu({)iqQ(gYy}r%A5)OpzRAAmB z*jN8wf=|Zben7t{)V1Q?Yj)^sHZKX@@%w{F())XgQCo9zc$%6b^6ZvLSNxv#@U%Fa zn;R&*>%}I4_ADIZ1+VO`Yw;^HHZ<($GSbjCe(~Da!oom8!eO<&S&?3~iZEye<3F2{ zE>WRvO-&JQ2NXW`{$G;x#EF&f-JQfAe`bCVJkG-d4X`bPZ5B(`joeDYLtulF2<2A`M%uSL|<*pibMnIx_K@{l1Yq~`ar z4=)7IJ=2wf#EnwG&;S3 z&Q9pc`S2Fpv(b-NT6WXI;==U82jyDNYPgt#Hb4Gd-qtP7S)QZ_amJdt)?5t^JTCHD zC#K0AxpnDu#Ua~cT^wfLZogz*^q{G=DM`@Yg55(jj)~LQ{OrTOjlHKQeLP~W)M6`{ zwR(e&MDvOB$*J=?m43}zmMD=Cy+*Z<)3Lhvm%{UcKAYoFrzWjZ@;tGrTuf@$r749= z*74uz3J`fAIN7T6yv0nlU4GUd=4}Np^;Vhy%BL2EO$X2MCGy_-?-a&(G!L z!9#~W{ILu%Y`w|1=-*wFa!S83bz^^$&b7gf$J{b_Xg>};O!^+r0e zrzXrgm5_2oLHF!}H9iXx{Zw{TO?~*lvCV(JKx10cAqAbDJc~~$ILwQ261!yIJGfC6o7) zuVudhx00sJRpf3C5NWkHYd4ED(u@i}^639fdy@#C!+ISvOms7a&%UbW_AkgWxSzMD z;Jj{oG5?$b{u7EUxw{wd*T1|`VcXun8$&;>lX++IvF1@_#56to5DS~WFSgH(J)I(r zc%J-RHoVACC{&wB2SLc3TXtJsogj4EjpJMOs;;YC z$v56+{5IVy)&J3kjrq}xpL1^ZC@fgFQAeiz;f9T7+oin<_ZICeuq(Ei^gPFG&Gj96 zrGGz9uD6<)`ok{sRFS8IkgJZE$H|PAYqo_O3^UF?beI$%lE0hj^2^CD*ME;cAOG=V zX7rY2I{&@SM&AV&C_Qs@`I`y^j@8$jNwoShNvAE5kP-T-EU==E$$r}X0}tM?G#f_> zZ1!#9l-b32m|^k*?wDN#yScx=xw-f4F88UPhyI>RxS;e{-0KJ1#E%lU^U6IUk9|Fr z*8E=CbA{F{x5%K-x4D6H*<2NQP8Od_FSEWhMdMMCW5>QD8lDz>hZWlzUmMLgo@@3+ z!R2_&Y7LKIo$jMePbYnyrJ!)W^YQ%pvOM#agsn*?hfHqfeY8oLd9&xaaBTjM_pv!w zy{0W()uPzW{M&YU@aurU89p1{>TmoPEmEl+}-Tx-7J65|5C?b5AMEmBO9%!2RR{C`n2tG-TY&Gj{5Z@zbx+;e8Rd}Ldp zj1KpC=gSiPFG~!3f7QO471wXG{C4X0#{~*~ok~aZczK!)j(nG&yM5-O+NeLL?wtrd zn&dd`ufT6nknfhv-pJLIAkbhhvS^{etMUyOWA8f^lxM#Tlv0hVk($3SPGN3|P@#ah zU{qh5impn}zLz`R_T^i?%L`k*sN-*)LGq`$Auj=oX#D2t=_o)dFJNS!0Q!q zN)~*lRof1xIfrhHDA;|mG-kaB>-XOY` zvjtRAt?t+`QL1C(PTq)yjkXaDwi$-k5(K9yXdId~Q$ljZ!Mq~w z-y>`}t8L%$?{7Ij<`zXfnUVha_ZcR=(*jMORn7-?y;kD6S>94G^@GW;K$k!T)l|>e z$;*!Q{|J_0zuXekqkOpG`QNbMlKHnP`;+IjD>!&fYgI|S-O#_)Z}p;B_t2Y5V>l9@ zzMoyXH23V$9o2il4pq`BjEFE;6M6W+xxPsv1xxukq$U6SJ-wmQ*f~gOT5`ZcQ&wT& zZ&$DGV33yHqAMwIV8QzH3aUpFGEXXKd++$_uKVGW>fz_-d8X?dpP6^|!xzoNmzT3l z`?;90qkd!9l%5`*OP6?-F6Hs`e32agL50j2R0y$4{EXqo>DHQ*+_utb+{m>{53Ww?BLt z+_uykG`w8#Z_dN7*^|1R#l+Y$GuzJiwEd~kY0ix3=BoI3pkb{=qlk1npJ7V+WMNP} z0$L|;RwyB9m~*RTbK1eFE*B0Q@8>z*Z|pPYM8OLIP#bf9eKNnb2j{`_{CCeJryf0U z{(M3PC=fuCr%Hzooc7*iZ^&45?yyynGuw|ZosCncCSKQTn{6)n?r!ppEuD?CXFohA zTk}_^LquA-d5=x=OiAY|tLC-3e(aCESat|;rJOOho}GAJE@^flbeqo_(D;ObfNOke zvf@h?R_TLIEPES!kAG5-bJ^RLAT7POfy3#|Hjf*J-gGqu`x#H*IsE)@;yb%$0TbTT z%!-Sixd-069aNIP)g{e#Yg(n7*ODXOZWT<|G(+WJ;fdm|rpF4W<2EG!$Pc)A z;MlqQ`U^dK(jsfmZ!_%E>Au-7*p#Wn6UcR5nWG?fRhP-$UAMpd{TeJBlM!TVwySTK z*{tuGptX|!|0^B*e()gA-@iODF*mdYpWI?u`{97p5zuy?zo$SQoay>J=jS_%Ti5@PufvbJgPsg}OCPUEdM*V+@x<3gOKPEJ`w?90_ z%ciN>R{mb1qGG}Gu2ZLtH*9n)meo7{SX1kR$`tc_iLNe*vUe(L%bG!hnV@()c=^yF zp3~C|B_st`gl$l&V%jpzC`MqqkxxSy_kpQ_?XXcGFugl~UR@(}EHXgxMsBs%W^TJUTY3 zYQkK1PKEQK8J_>|+qYR3OMH8?@QN+eh@D^SKPoxb+beC7oZ!SBZZEyI;ovOM3C(Pi z8`Z4}Q*~^1Jz$t_Y;aY0<_rmgNqo$Lnr%Ns8r1sse0^qr$>h|#yFA?Dlgt>qMWNS4+_VKD5D!U9Og2;T=?gB#Gmi-rM`*>r@gGdaX@?CmL|%7BE zcx`WOWV|>v)xox#V>07bxoBoy{~eDf9g$^j^yYvk6%#ZT<{e1p>$7b<8xGCZ* z6DiJo@5kXSeE(;#>k0n(mvkU7Dq&K^D{;0ZQ$BuBbdHrh+{nzrSF8kDECxzR`=6gX zc1%FvfI%V?v$i(pNv6pz7OjHSTORt$@U)zgws|U0WZ9q2(Dfig;@XkMXJ$5E#hJ6R z5*&QRcz7a$nA1*Gd}vs>Z{94;X4njnQoZ}<_3=EfuN(jQef+}h(+5s=<|jypvmKgz zbmK9Nbw?WAxg9l|%qHF1b0AA$-7(>?$TgbGlN;}x+fg@n;jUQ^pLiyjFA;$Hc~Qxk z{)rP0-kh2E$mNJ6N#|=>jC)+)Eaaz1>Yqn%n)d$%;f0(OoZE0^|adu;K zPEsmK*4=aXP{TyE2P!tRI#+J)OcCbXaIK_b7V8c!%O+3B>2D);9@*~s|8Me+>gkD( zTwbiv?}a44MbG9)XlpmOvN~sKHqVqy>J!X9b>NATj7v0Vyi`mAD&ZUCForQ@Kfu>HnsA`Wh-Fg(0A`pU~qk6?cM*RpbgK=WQ(=Tg(=9-c-m zwpUD-vc=ePRx)n>a_qi9->Tt6)5w0Qj3y{-7UMB14Ii(09wE*oD+ zZMmrKQ3HdaA!rJ2f&iPc@{yIrGjjRZx`H{ERhdjKT*@Exn#rg4h#3-QfvyQs^Vl9;BqC2al64TuAuvEv=w1F(61O;HbhKmld2& zHU}o#dFakGFio=J^lj-W(@^{Q-~RBiUY_FTMsw_ZpUslp1a(v5BL}t#Z;wu#$YW*I zvG+8O#Vm!2NlczkEz`Cjclsj4u`2j z6a1p!M-9x3jBJL6Y?YO4?(S^a*=&rAY;tmJa&mK0IS(qb*f0c(g|o9vw|t_?XX2h@ z%I>fH_+M??+Gt5WIq;z&Z=<#PvnPUPR6xn9{E-7&Qxi{44v&@=kCPKmRTa;*YdkeI zJbU)=JbI+@cx}eUj+nFwvn)J98|qgyH+EF9thRV&aP0PWp6&UdQQHlb#-O7c*2nXA zIoCk^Ds|#WBU?pKdNPZ zF#Zeb`r*bV#az_#@cw?D+uMzE?wtXh3bHkuXMg?Vf=yhYqX8KH|Ce&n zsG!KHKO;3c`NE~74V#+g9aK^~rl7_qTabL<8jE5k3+sNV7+wV%*~}+P8f;d5{Aql6 zZ8T5$dn21#@KFMRB zd9IdH-LXKSd4U46y}i;!2Zo79IoCz}`7K^0bm9O13rcT)7Po1KORS4&T-N#ey5u}t zP$l*7>+6H_L<}GSe+9Z6OMaV>$g4;H>*KeJiLkS8Y~H@Dx_`~hzLctRd$Z&1ea!D~ zmhVw_3sOq|clY?H6-DUs;~f8>Q(Q zo_OxM#De%2;GUz@yiR6e10$!)jLKi4QrN`A*v!l{n3b3;C;t6i{-QJBMEdJP`Fno< zk=zyd{(lj}tpy)Ivz3%)XEV=%j*Y8&8yTNQEKB5v6e$D`lVWDr?eejJ SmEWitq`}kG&t;ucLK6V`eLJDEwk^kok&p`cK54y=As{TBNmmvDU87 zGV_FIPCThq=B}osk$BzAgMm$_^4RWsS9{X8yq2sDI<$M~?TPiDz;SqG(nhWp0|D0Z z-il)aEgox*{Qtj6M0$f@(~8~Itw$TTDtxavdgbt+a+zZf3`{=1S$4ATvHCH!7OR$B z@3r6eexF)=qclTzi>{JZ!wM~i7(s?RKgzF4A7)jG>Uws@#p#Gt|M`#DguDV(M&eZjN+madv13_UMR?nd2Tf(K~FKbJVO*pPtBo2@_|} zZft&RudU5&{JH*5;J^R&yA+g_^`_2<`j;5Y>6`Wc|9|IeKc1ieZ=V0}-rnEezrEdm zef|HP#n0bgUS4ls`|J1L-}~p=|NsBb*5IHA8#JkwbT-eO8|l#C95Tf{Xi~hV=ZO;@ z4zY86otir5|F^fcW){9|pEL1PgVa-=86SI>u|>-4oN!<%_cAui&OgqVIT(3({s#!( zv}bGk|NsC0-~a#rU+(|^-Mzo%&(6%hy!`*Y9UtGnyqZ2(|df zk%K2r95{ViP3h3;1qvw%3h&rW#sbytrzka2)Y{7v;Cp$IXDt`R+-`=Lyz`)>7dPW)^ndiWKi0Lg z9^SQX`~F_C?(d@fDG4CIG5r6(sQl4`=g$KI1PslmfBibu)bQccXKqIX(oz|{RW&9a zIdP&vM)E{dfw~minnx!z`E{)%CuGT7dG%P{Z1F$E%*Jr@{{g2ZrX)2p?!EMr{qm3Z z|NsB@pKl~%b7Die{^6&mdDh1p*Zg*S9?=B#;i6!$ePLq71p!?A;US@^>4}LAuEte8 zNzY2x1omllERtKf=iodJ18-KFjtNFnZm}4fE-;JwuXx?`geQw;>YwJRCOrTD8P@zb zQ1DUd@V~!2%l(aG_W8Wu!~#y6i*g@5c#tq{vT$K(C^u)|)$7mGQd1pH{rzuv^Twlj zjg1fGwZ!FAC;WOUzi<;{B=SzGitt07Ig7r@pv0qQS`GtrbuCB%W{4; zHc)z5_~Nv9+w1F+d~zu_c1?YF&(_M+7Zia1|1a`9}17 zR?9_PJanL8mfeJ#=J`fC5eGIDs~=t+&SN5F%?Js-s7DFO$zef4Wo6g=y)RFnoSdAR zns8vQ+5=Z6W#vUr`vsr=mz11ny;P&;|Ns9F_4zj_O%i#$GT;Ks#8sRrkw-Y#-);HC zzw%yVxWT`OOkeWJas_2(* zW?o)KVF3xLsi`eTZJAYM7fk#Ag4gYi>rwljIcFPYHUyL|oGx=>nc#e#2d_GFdW{y8 zId4#vI#Z{!=iyKOP#N8+4$P9_KMx)_@Zr1u;lu4budf^D+&WV5n2lLmIF+}8!_Ok0{*R{lTKFvo5~m`TCM>+2=ed=hSK>wWmt`&G{_NK`4= z7k>D(DKbh+Mcvxk!sF8i17oF`Ew`Se9PnU&ku2&_crL_vhA$?Bp=CJv?(Fs2Y=?$q%WC@yb*6Qne>TTy8l8xeb+y{)SJgR-6-e2 z-#-Rzhy}WZ9$s7_VO4A_FSW;g`1zAmQ?dis`xo+&Jjo& zS28Qyu*s;h;>EM4J0qh`85tg&e6&8|k;A&+3Dp}uXt?`{{WqA!myybrrgVTOHRfXT zwUPtzjuQgc2?TFmV7NhT;{O#ZpU5*;{T7MaxnMb`4_n%)hT{G8={L4^FWfov;X~(1 z!k~uM|Nm>kt@#=hcxp}cUZmaF(KUPV@`A@Y_j}%oH>NZW*gT*-$ST`8XIsEyKGiyfIn^TpK;x6>QtO%MJsvKsM zsK175RaN$%raOMq+qY?jgg!Z8$g8!r;=WZe$3vNClJ}BhCtlhvn!*lh@hQa=X=ry( zomSP_B2ipuXl!6$WMt%&G-cvMM#~oyqojF0%xSq5*1#YA_wb9VKh9^r>^POUMpC4H zYX546154YMxUm`L#Kax`qP*u|W7uf{Ju{AL`v3nM+ti&2h(2BL!{G4q^GDS9ph<9o zMNf~2w)WpUcT6H8AD(Oy42v=_Qc$hjax-KJ8@o-U!gaL*vui=({UY;ZyU)dZ6+ z3AA=T*5{M{pZG(NTRMBIQ;n9R^L%?_5iz4T_x*EzK}aD^02?|B0iTA= zk^x(d7jzjU1h;V(T$NpNP4oZ9BF^mgDHlFyw6_>X9c^6q+`dSaHI!e{?CHbA6>C!y z($a41p8oK%`k`yg;PiQDo<&7PL`>{gZtmKi-la2VEXZt{IFVCG_{Jsns?0{4@N*|P zUVc=%#Lj#;e#sO@e}NA#(p|cWnm#i6i_I`(bf2LVAuPO1%!j#?kwx|o|KY?RUzC<} zvh{>~dM|G!E^J_E_~-ZW14oW1=${XWI|nMv&duequNV3NY6kuP-!jvdtHFS$HJ+)p zse3Yef!O+sCwEt{=|3-&>GtSt4hiBd&Gq)W|9jTIBMpmlYdId<_#eJtD11PYJ1nK3 zbNRa7iAL5d-FwgR&SG}%tNHm$vdlZ&S9%%a(U7ne%TKGkkzRQ2>}G!jrQdp*o|hk3 zs2rQ@v8TQGe)F0bmAx|^!a8?0JbUJ_d+}|?FSgTIZ?~2H{h7KS94=1z#T*<36%{Y0 zPGw!SlGD)G_;4_rN?DmA!4zaP|~`F?J0 zytG-({oUW6?=JtpulD!%hllG=Wbp_JvibW@yAY5jF7C$1_e9|Eyxa-FY$;b{^!TP9 z)#7vi|Ig5->WF~!>4L{>`=Diklc$W0%^E+ysL)Wys;WsnJw7}lpQIrHa_ z2*ap<|2GyID$CWkvt|b!xZI#R$7NmI-<_G4uUCHFXH)Uv`F;C2o0pwA!y_ftW1P0( z#tnt+<9r{??F*N3rhNIV4r9Wt!20 zn6!wkOqw(~cT0miKLG+;NC)x|q#L?xh~PtG{3W_}E@b>Yjbom-Z81Efy`}k(BgFP35t% zu@DmEdEn2!8xon(E3T zsT29f_0d1)SEmy-jq~o;#m4MOJw5I9j$8L47>$jU&CR`oCb<=D+SSFy&(5y=?n=XI zl>nd3bqwY;0IU7OAp& z%xszIwIR$<;NboJl19doW$%yKwQ_@#Vbi%enp#~~uKkLR*50(iz`$Yl@tzA8dE8vA zIh3#cKNQ8I5qR!I$4(WAb8Zda-|yFtC`h}$E?T8kmYMnR{r&&#Yk%!cICyr(jK9&_ z?S6e`ue4-kp5&UbZShg1S-j+Whuc^3{QUnb*Vk{a`g(o7y{ES% z4-e1%{l;rzPj4txewb=KVMXI{K4D=e7T+$xy}6vCDQRgd)}&0Bcrecn?9`;BgoFuG zMOUo&P*{4kVJ%;KwWzW6A{B18WA+|7Tf`37_dG~*;QMG_|Ie=GN8YC=$@cY=ELZ+{ zczAuc_b(sOkKE;{|Ce z|MR?<%XenVM{(1s3EP}6dMAj99pjZ=S5P2ey0e*mj?K@iUteDDF0*aqOz>riILfIe zo6;ASZV_VW{b|R3w z|F^wCi-wtmY?mxqwD_wVXi2xpZ}kqx7XF(=s%x> z_n{EuzrV=|X=ye!Kd#QVuQz-&i7`K(*;sRdD)*mOmT5&#UtYF9Kkx6-8qOcz-}5Y5 zr1B=`aQw~$Q5Qd@J$raouH@n3(ny0eRg;oZ|MIVP|F@VoPVuYPg-1X5b#^BBiq&si zs<{!W?3gmK!Iby!;e-FqANcV8 z>}>m*njRmYDZe}-W;jO7XzcE0(cNIlt|4w5`Cw9NLPA1Xnt@T0fqD9aCoU2kkUZdY zSE^;2TEHWTc?u8C`d5TD1#VfODy!po^4PHh$B(~ywaUfsL}J7F`TtLxD%xB7oB6QP znxAr;-40x7-TL8xPO`*>NqowW>qSMyPHkWC@`b^NPm`9hW*%YO`@3tuC{v)0%8>_8 zjwmQL3P2;;>D<{9);berOJz*_``?~ReCO(>iL3r4N;GM>Ub)&TEBn;N)$^q6j|26t zuIuFO|0Rg9ReZAgC&dyTbU?&?hK~C2C2P+3>`c$em7Frw^IGd1JBbh1Oe3P@!oql* zoH#B+)3cNB*9j_3L3Igb{t}Y^ldk9#eB$30(xmyLzwx2Gy87a&Q%hW0)L0WaSe=}5 z)~xCI^GD~+n;waaZF~P5fADB!+k;K@6K_Oht^WW2e`cnkx3_U;hrr#l2iSJ(l>GBo zQbkphUqXi={326kWlNh;M~4F|G$kcD=v@BAzB(*m7T^E>&dK5{4;-*M)OhWm!!xDn z@*1r>XGzsZ&g@*a%;00K*_sE(`|TSWC)wEeY)HEt-&4}CT&$N3Jo>1m4#FxQJCZqwdF*EQ44Et_!T9V zs0j;H6&49b{rxn(^X2{?p7Q$rdzY%Z-c-@@3Uh9DkC?$4z3g~Y%Y&@sB;O-DPANW_ zCBE?3jTtA8wuX9lpE-5(bhn~vYh&xCi;jP0@@B^}GnSR{bah>5Gk{o@nDB!=*8bmo z#(fK9@;LHN|1(~|H|rql#2qJ84bt8-Omx3{H?_4TA}g!!{q65h&d>kPqrzluov5#0 zsH|*vWljIo#ux*Ggk)vyVf+6aPW)8Q)>hxyxyFCqpPwHd-rtm+n)3JW(S&5h{SySVwY8a_ z&GVAE*twkNfc#$<%Sk&nJ&|WV{Qvv=|8MV~FZgM6c(FUroH;z-zVUQ*fzY8tSA4`y zykxG^WBzwFDdSY#pM)FR`xh>0fo9G>|Mqfe{+c-XXuZbSAM(BerQXUm{>i?QJRXcK zC)?P~4@x+7r~L_RTE5aFrDxLI9)rosMx7mIT)W>LVfd5E{4b>Fm58$=Bh+t=KUr84 z#3Xn+Oc=LHxh+!Vdt%b4_CsFt!Gy!ymexiI*@{L627ivKwa(nbVa8g(X(X_G?hKKh zmEyK7FDDp1V9*sz*ebiPZ-U^qmyONMj&5yGS2ng<%P4HCKH&3{e>>Z)=YP*L%wFmo z)-cV;fYp$BO5+rrZfzzF9h(p83R_%!YO9qOzO#%#mkN@Sa1NE zFde9- zUaN*_A_l4d|0XxQ;fGXFP19xmC*GbcA-N!xd6R+>Gt&fVnHeXW8X0Y0Y+S~}w#TvG z{vGEjp-A@!UMw@h3eG z6YmfuArbTBQtOW?2ODN>oN$@t<6PDzQ;us(oNHz`&v?=Pn{~6RQ43?y=EtwYL7g^+ z|Nos#JOYIonVF}})0)HR>2N~uOoPiKPs#tC4o6&sil!tSvr5ZS-Y4ypW#p8`@8(eO z*GhTY;wA|Ri78W|4sd#R>R%42R0vE3;|J41QihR5=P ziH*C=XEC~)7(Yl5o^RygrL^Rlsf$+HfwhGmOfjA;O6N{=T>8|IvrAAU1rlLSdBqDR zXI#!P5Ylb>=ppDM%yUDDVYPys;G|n(N0tb+Dc;?dkZ|DowdYwq4v#-{@H)! zNlTZ4yM0tPbEx4#5r-ays2sxu*Eo12xC|aoO2|CepzQh3U(%U>!j*3>!Y$$|5c`_S zQ~xji>Z{O{@sfERPqd8j0;7M48V9D96dYBWcysQXhdO^aneH8yuVVQBhSP;BSj_hc zB6XTFG~oG2+XjodVmUAIV?RTWo)0%YlmtKA>*Z@jgQrcjGg&Tqi0XDfclbmohLi z+OPrCBmmXFhorWy=w#S*`n`PCg2hYaVix))|_>lBqcT@jW6!Np+gIn z9Qp8C{P5r3JYFI)L5@#2tY%4Bh=Ut1PI{LA4xi#Y<9xqB^ku|nrFllodpoNB|NDMw zs=S1R1Q!=eWQ0MnW!l+K;$c;Pj~4J8RAPU3_8^~&tK^)ylJjhnEvlz+ElX4rRC*o* zjX@_aql(rhVa3=Pzu*6_Ke4ac<|Ie9iSnXX*VlvQm~QO*yLzc=dnc!~^zC|?uOc>% z2af)KU?%x`-5JHnItmT_{Ra;Ha^Vu=GW`i#d1j_Cvd^2xla(kEt zDR_71{DTBNc6PI{us3q2Caq-)R$Lni9h%sD9O1``6eDSlIkbs1hYoE58x^hl4uUK>7 zign0jRu%&zqn1{~7q7ux_G0zGC=cBPj#ZGjZ<_z6d22+QxXOu5KTa|C`1DMB`QOOk z&)@(5?>{>mUsYvSP-NscM`OXNQ!LguAKI_ttnlJqQ+qvelFkDMmzIUQW<9*F-?q)H zOyHi~4yFujzuO z&+7k!29Iv+p1z{mYy#uIrO#(*O@w;j<}(MYB!Q&^53)&2g)ALOL6@82)$J(F3PnGfII&ZDZ@ zA-pbrPXY&DTEsq?=WF|p^iO}RPBIehB?fkTCOAx(5z1Ml&0_wm{^O(dD^_g3 zdw2f-zsU}Ex4wVRZD==S0CT6Q`K4X^yU~ zj-gW(jVCvDb}n4hl=v!y&CINAfdX4z-WOw)i##rD8Cn;w{a#QA4GRAG&R(of_}iPNOV}9Vp_Li3KLP9>l%RacdY$z@^HZV3mal$~c{M!Hj z=?Q6RJ4#-@Kid6&p55Q__xI-K-*?N%WMrS9XWBy`!6^a5EW5uX5QI1c|n!#!zh@9*ypO3wk-PtggCn^TdlsN8>lA)kg3B z#?E~BzvtwNMT>4IsMY`ck@I@%h<* z`+A8dPbGKk)U=hXJJI<6zhvFtq>67n56{WARexVlzQ?hlhv)G5`8;#xXas7g90`ay zqo8ysAoOU#Un}Q){ljyuc`jb;n40_{LOm_kFbtm5x2i}OB+E`q`j@zbhwX{ji|zUU z`DA`rrk~q>cX$2G+nYAu0_z787<+nn?(Z|)Q+aqpt#wI=2-A)o694`r zUbve2;8@w?$QqmGIWo@4%7;6JFKn4Q=}dcoe!PEsd;gj>|Mg;iDC?g6@aLyJ zBcl#zPXF+1a~^ShBb(Y&1s@a+UtJAavGM1J&a_Wk|E?aC`w z8y6HjkdfHBEp3LyL`8*$hu?DB_Et-(`8e!cD`9!fuz!lnYTpZHcOk9mGjnwLnjJ)1 z>^%f$^Ex^>Sum`7{y?naT&rWE$A>wLosMgnd=+(fi!R~a61w)~oChm^o;-c#<>a6Z z-b&S}iT)C=vv$3U5}9r@f9VpPyDEh(4gE*Ojudkl+Wv^IM;)8Ey75jg#IlEM! zJF(;1obx`yfi2f=eDk-RZ{Kcs#`v+v?L{J9vT_pPu38W7Cdcw-^Pt6R;1IPNExrSWL4k9kc@2=>4 zac!HKlH7sdn-x6yj}+hN94U)d+}pZlkyzKeWgZ>|8GI8g4;$G0`!MtLKhDDPmsY2H zwoOg7T>kRNPpRYERd()L=l1fPq?hL50N>5Mi}&2$zRx{&btaP{``$;Fmj^j{74V&B zPL^QfV7%@s(IjBNcKVRbVUzgCPx6;cN@QCc0vD`ucv+smN#{dtKv(Hs)|Y+F8cpjC zZ=3l1gb~ZZ^tHV1HC1kF@@>J+zB1F0ugO8cb-tob22bnOEAtw!SyyO8^*xN_U}eAH zlBD6I&~PB{k)D{;;zb*`&CPmt{>2Z^uXe8 z{^_Ta7A4pn-?lQFhxzi8D%Z;sp30fJO746)<%fKg72~_|G~-i~9#6Dbrjapg`ah4L z{?^1lP03b&_w}!voEmYh`c?EvbM8v8kG9O&%hePha=czFD%@xmQ(<->M~y`MQuP;e zXEkSJW(7paHZ2I~RNAHRB=>_*t!MD&5(d6g9-^gh<6 z88{_~$IeV#%kvbXE@e)<=FVM>c}pUvC}lm5z`Pu_j>^Nzb6k2lq7iRyG~ICb1# zW;g5GrAa?4Oq6(vAL(>X75Kq;Wau8_Se|pxF4k;DEB$wH%Yn1OhZ20D? zQ!C&dot>?%E0iPTa#GbP;(J%DQ^4%oS!Jsl=coRwPn-60(x>Gw*S~U=d>f*)^wj6) zX~FBR?=Q^J6}$g=r?J$>dF~EZw=zv#=6^o;_S!Q>TW_zld0wP=x}m=3#H$}BIbIJd zZRFfuTl4MnpJy!bVAm4O$A;T{Cq4Mh!uZ|#y7u9P3)MWEQU%+-RFzm=F?BuoHtop5 z*HZ5;y-m6uA?9@9y)cW<@*{?mIRjhXOWR4bok`QT)yjT`SRWh~}e$k{(oVF^5vb+oAS@t&r;cD>g!q=RO$e|VJ7E&TP{(_K6N zF@o}T&n!c(LjgQ(`!6j|&}CJB8DbS89?qVwuKz%HPmB6TrM$gvnj&8EyEHVWKKg9= zC2+~othNb?x8*14D^3eaysxoRD^?_H|9)A%r&+3HyDz3G+pJ%26z4P{HOt^ZMM&kk z#}V_-dt}~wwLj<8{|H`f-ZSQn0?!IQc^d0(`pNWU>$GX99!IyG^<6r%Fv@tgVA~J7 zb*Dd{OmPx1oF6<*!&N|l#p7hbi4w296MK)}dA~z1^yuTp#|1kdRL=2h+p$!|_oUII z8Ly|R7@pmf>a-zd)7?#Rn(^Kt}B1_ zCO?sE+kW-c#)yummu8%2Z#yV{_=16uQ|in$TB65`_J`kIYc=0;u7%vg`79JRa~64MvF195&cZV(2_5(ha)_Hq4W!@T}t}O8AX#FJRF4`+Yc=~cYT7( z1efCXRo4!Gp7U8s>Gk%r0vDo+_j*m@*?o6u=DR|h`+3hD6QacCyB|BPmaFZ$@O03( zYD>A3Mz^NzdR8Vae`Wv4VCTaP99|#S)J{EA`m$Pv{ruwB5jqiiE49M3&!y)`-+oqd z$mC>kgNbW4U;9Mu$%Z_KCY){4IP`ty*}A6VODtw29sYii85ART=GbyII|#JyH%Vg_ z(h=WysXMZAR?LCj9;&S!mcg_H2d+j`@kx0J-rvNSi{0Us*tTG$}LpuT|!ya8c1`M_l?kIKnhF=g3aJzyE*z&rk2)-~T^d|NrlAZ}0#6 z`+vS&>W;$ZhnLmc%=6h6oaXw@mZErfeLT;mO*~9YJofcQd#a8uSbu)OnllP2EV5Hl z5*zt?xj76zd@|aw(J^^$W8=f;{%!L13l>kauQRCmdFaDC*_4~cC2Bq0+*Vt-?kBmJ z^E*9DR@4-=^YY%7nNiBMjLAe3(#hWwV-4C5Qp+N~g~LtIW!vVj4ELOD+R8urY>PYM z)zLbkL*>}bLsL#a+O**^-(_35J^aF7_O5%q;Ps(pl`5R~4jr;oQqnwcu%yst`&${d zT}?-7kMH^yJtHRXS)l`mi{KWv=PR`CGd+~a?cUTPU~o)*Z)BT|d!Rz!LXjz--YqZQ zAa<;xfH{FDDtVrw>avs9YkNQ5JGQCj`@y%<6`Qtt1RFfN`u=m}_iuLHH4%Hk8DY&V zTabfWEHEW8fZkcoZabz`bcS7RA)-tYxN*9xyx_EczJWQT7(^F7_<6-UL zmWr)!qdCM}e|$=qp>V+FwTbKXm$mzU{fi3s=zP54{(1ZINO{4F`bz_*J&?KGw#+eB zxi67t6HjRB-rPmsTnY~hF!VHDn|4m6bFZ0bY(SLA0~xPG8AF~4LYG@UyUa_DT$A`p z|Cx=RlWR|;%J1g26YeO~IRAY-Bfezi>w92l@0qn1RA9FpKV%&sl2hBcSy-A|K1N5U zhCy1|Rfl!!1mTZ$K@}{DYAc0qU0(2hV{SzF7Tr}`UX20;|Fm8FL?oQ2Df&)S`LIX7_fNU}rRbMtaXY<0 zL;3&z@0n%K-IO44q&{oCl`2PLvjxXa(Q|oqKeqj1XzYH>&864I^=pfW=3nWiydC}p zcRt#buh!W-<&meDu*dbz#~y(`x!T7chfFK7@(f*QT6)&vQtAJIw$!`-njH@qJox>l z%z|&5@2pMtm79WG1U-wqGtO5VJ~8iM-`|}1Xx9$CJHHh!r)*X{-E)7hb>}#BvnH1RXLWj@{_wxbdT!&qRiW9t-jYYk`~Ux* z*_PbR0Rk=dDitM7WpZ=HxXk|jzjiKYgNR6U_wq-K`$Q^PngxP9mfcI;om!UmtuWMR z$&}Mdtrl&L*S=mXb&ou?ZE5C=^snFc=l)la+;whW@~j_rmt7`JNR1HdbDk%B{M1{X zrv7Qwh4)ORxz4)uox@S$Q6;GLw=3zk2#;t5U8{H@LwaF7@$o zYPHz2`>CMeKTzx&aw z=lt+9uiJT^H=Rmd`+irlO#im8mtF2ZmN_jX_K!J*wS%IkiZ4ykNtRflwPZ==1R=#4&%L9+`TdC&yfjNiu}gp{JMfkN$H@)9 zlDm(z@X z{8(`Mu8*%|Pme@JrKF3iq>ip63yVbE|1<%ywr`Fv_<#IdDE;ZmRmmAMCH3OdethY4 zjG5Ee+xzfMSE;E+tzxe%P&N&aEXEPTmoaba(vs5Q^rA6J(%eN#%RZbVg{+efU zb=M~Yfi@G~ef`hx-ZVOvp{LDpe5srBXG|7U0j8Zi8R zef|IHZ-1))|NDQi`Tzevi4|WscHI%bv~{t+_2cXMY;kdHmX>Yj=S%A7NSrw%@#BZY zks}go)+`8J-@xE2);gIhB4WY9l}lJ+Ltni)bKuOGf`?3p*T?hd=zxa%K74j&zZntP z8KuD)&&w$}SyVK2!nCx6#6(a){SFMj{f2wdJ!Y{L85XV$96 zmC18mZ>>qazcQOwL5+KR@rUkN=;4@6Y#VXa65=|G%&9 z@BcHBpel6o`~Uv}6VG*eFhuemJ$Ft){rHEk+RYOMn`cTkFHmS+q0u~5lv!N-@csQE zua{k7>gWJ9QWO*pB%~cH_^MUH^Y%sHi$zb{`6Z7Wk(g)Kx~!r8|AK$5DUm;}*?O$z z+cZORlfS!1hoetVV}F0+RMDmWjobof4p)dQm?{sckk8D4R224ob6PEJK1exk*eYoK z@vn9KfiM9U-M^0O0}NZ8f?7-(Csej1&i+|eo_Dw)Wk)!uY-6M^BuYcSpuzz;X zWm&}zrD?4us;58H_9&j;dH?3N@2sN#l{NLZ%vV{X{kY?r40~y>;Thv~-BY&c&U3&2 z*dqL$_4H$2iMmnAx``cASQxzok6Z8^OL+Ua@L6-5@XP5p9)o+lXXY4kwL0*)$o~*b zR^HUw+dB0HX}7+BDpXc=<2sa%YmVg*ykcq?++h%k9?BcfXsVX3ukrb=LFK zA&;dwy02BQ>dIZ5yHPCpMDdzX-EW0yEJrr-t<98h-}|>~#j_XHT~2$A1h0uEn?&x= zaS@Dc<4E|;YI0aNVgHi_oJ%{B*DPH%BmG$57NP9Gu5GG3FRL>z>T*XV^F*rYvRqg7 z{b=(^M&RRvkFU#~Tza~becI{D1aLR!%aVrjYjhqHMl zck+>0$ttQ_ragIR8+vrtBGbz)-+Pp=EfTrrWzNa?d`0%p3hgfOO$VEp_cXU(5L?J5+q7cag7T*Nio@q0*RJPIJU>%n z;-;y|`%B#Rt_fQC_{63J9@E_i=&))a%b zDFuDGhpSr><~2_1nm2iM`rdz%;rH*ngj;?#(QH{B^A){-wssPDt;`AxF(7!8BCqAp8) z@7=0#bXy|N^;W|<>c7u@elVvbd~KxIp7dEPdw7p~U3b{A2W)FnN^o zLP7QDhc}YU($dWh3?+A8&$kDyM6js{-4CtI90XOCAV9dWuOcbEXY!wTbE2ba-zf0zu(cJ!O^?>;9XfOC6}Pz-4YF@KRCku``s9VQjHA_Ykn}? zmH+*iy?Kes;kUPW{`}#YZ*Sbu>3Dsi<0=V{9|zc9cCzg7;xpnBf|@)D zhUPv#W^wW67MA8t&Jtc0(dB*``92r62(rDba`+ZuVBp~X^TtiV_LBwyd&)Q~WlgVL z%Xle1(|1&5k3d)@*zpkRyeCq^eZhu>+b~h`P zsSeYw8yf^HlymeJII?B+hE*vk(4}FEvL7{w%(JgIl8`jy;4=E~(eTAfLkUU8*{?%h z&PzCEvM0S?(kCOgdd)+R?S3**Ef-hB_T=cy6BO*w0efaq{G*0B<=_5VgFGPO5k33h z9hqHvZCbv(F_*cH9RGD-8NU{j(Vd=|GLXu1QShULw6uiO)E^(49mA)CmP@s^I>yRY z9k5hZeptHs2t(FU?YC9hUl^BI$(U6DB4mBqRicr5PAE%k6V} zxGR!F!PJFgm3wf5aGD$N2dN zZ%9;CO8*NE?_@c0L?i9UWRq2fTe$d*(z-vKfF^*@M+r$ydE3GQ69t5le*B&7=-z(t zhJ@8+4NXs0?T*ZlNAkB7F8L@;esG}a{|&RNJ{m{F`6EP= zuYxvU{{O!y?NNh|;s4+4cPD&)%H6!t@XIN~vNASFNv$WCzfxqH3hlZ)78&qol`;+09#ZQZ~y=R literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png new file mode 100644 index 0000000000000000000000000000000000000000..6f8b65451cc08a463e4305ddc4be0dbe2879fae9 GIT binary patch literal 18058 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4qj3-okx45^rtlpw*nn1P*v zfq~)we<{|*4C>$({aM~`q#Tr)_v9Y?N1wZD=^(Q%StVGiFhw8xVyb0B}#MZOl6J& z!M*F=SDunsTdt;+mO0IVVV%?}z9XA2)xTi}Ma7v*o_S1$0<0HSZM&Vx%)LP*rBn91 zz0`_KHvZJzb!I!AmG&y{*?&^7>3_|WDqcmIZ@)E)U)B1p+os&w_Vs!2{d`|e&A@X4 zj+#-cyLcRW9kaC-S@z#!Z17*Dq7DNOGx=cy;(kVsQg5>QN! z4KMAR-nv#bc}3V1Cc#hhp3SUw_i}MLcTvKGP4J@0#DAxE*@F|;kx2*h4jBlz953Wh z>=NGbBtlQ&}3Nvy0NSuTDp*-*G@Q?0{mFS)!r ze@?8rr76g8t>{X*Nw4QE5-7TT;`5Tn7i=zDc%9SiT9J97)I(^C?kg#m6;VnOtIx^b`WUYuY1Tz;b_RpoS>y*U;Zt>z^w6x(Pg87vF4xX zSe?0_KBiTCuzTia;W97s+o{j_fqPDWQf@kNxH^o(&u2;3!S@_Te)_xAEV1g`v2|Tc z`TnXMn_MS$Jht4fomRDH!{Zlsn3mq;`Mak{@?4_Z{l~Vk?zb0t#R@a|I)@tav>dQl zbjsvz_+6uVu!|OXK6>yl(cEZ~kwHLV`Sq~Chl0Y7Q<9sXRlB*l9Q^SAzrEYB`VT@( z?9!?`4UCjr9Mtr#>T;?&z5f782Ip6n*_&O^Fxmw=&vbk2rG2Bc|w=d z&8?!FC9oo)By@sw$&00|Zo;D3D;t(eHm){Mh&ZsolKG(9s;YkrRRx#aVh`Mzpldnd zX2wdEro4M87KKfThuPYs%_Z#WmN@NagIK9ixM70JHiFozeyQsjx*y!K? z|MMA*JQ(u(4rYnzsIss*t@btW7N63?;cvL%%4hk|SN1vwA9`x@NG#=J&MG=M)o#Ny z!;UYFAvYdqz54g}e}BxS1tEGF4B49!9JHJyBqaX*Oa1Y={oxtOw%pqie6mRuzotC= zobI(~IwQ;(D^_%{u|>JKN3UANWo&eaol8<;!B-BOJr}fCx43)|*Zlg&)l2NZVTe=v z@(DX7rgB8R_@90tOk6Hj13G751ryYQKP9-|KGjq`hK1ZhyLs5tYBWNb#Q9lzsF*Z$qGrV zrm}yM9Wt36yBY8J3ZMDXs4l5E-MJ&BWy%ymzo!2Ci$$e*I?{9&r6nGirncc*%!M5v zLO#8hKYX~IXLY!-PVA`-AaI-h-n1R9hFSf5wZTtHt<{P+nnKEOq zV1oirYy5{B2dv-Pwl63VlbT*+RAwmlvDHIe)?TsY`@}mRrTgYA58C$G=3DJHVWwm4 z0dMAI>UMu}^Qn96;5zBJ+}Y0VRccwu&z#~D6yLAeFsJmEkK#VRg~~OGZVOI{KL{1G zlP-3C%<~*RlW5cTXWxa7p(?84R2mhQSUa&2bpC!KZ!mpVQ9iH#>ewxeN-gc$x z$)4pYAI-Itnwj<`AAMaX=%gN0!!6HqH}MO*f)L}2w#hlNs!ToCA|_4H3`{ZJ?ri1D zvR>r!i_Lv|Hd#CljyGXwm2g}mA7FMVkKgR+*}GEe>*6LAI)DCh%iBHXbyrY9YzFW1 z&i?mopti!4u#LP26nKggUNYyzTwvDv$Xc!c);;zYQ;EaE!o-Z33PCKMJ#Xb7e25E* zD!%6w@k_h1sQ0t9UDvnO4CgM{Wlu1kyY)%q^lLp25*#9KZJ5p1YSp{BMr7^kbNh69 z<1cGp5ZSzHbLG0^*UDM$Ete0nPEy<&wpiYt>CiD(0p)M9&pRIpxnnh8h@DP zHKl)Z*R5Rr`hQH#1n;_NFD=)tuXbIT<$byroQzt+9r+F@aAcnj-*$KJ?7tcR_rH6f zaN>*`hav9`kIulq%XiFue(krOZA!CLX2|3lO1&bPFLe(UUjD(pyg|85vr4?=&z6j3 z?n}9L7<0B={U9oL=4pkZO6a6o?g>)_WM>PCeX!gn@w52ht;Ss*uddWGY-xLasPMp> z_>a<{U{EqE)X?r`Ws5p-sz{;VY>`-AM?~XN&WbMz+dTwaHFSamMR|?HCNRKR2%8v%pK;yUsI6FDvfF*7so;t11=F}yOh2UPJ! zMe$s__Ci!R3{uT2nHAo+p&=o`VQge{U=piMr-1@*pmPiZzx9+4PN&YExGKOLb;`l( zN5o~uS@SOlF{Uvk9SG>pR9JVVEtD}V^+b#h8}qWy7bbBtzdO;EUfW^O;u}=ul zV`s+`6~(h=&5p;j8yg!NA+?N>Rv`yx;q4nQIJk=!@EREz7#JA+YxdcEFzd_z4S|6! zlHD>98zwa{nx-yj)m(StFi-6tPsW?GD~jy@nY4uRUOUpIE|~nvHBou0#tB!oho6`u zdFON+$S%IoV9?VeQBxzq!cwq%B9eV`EFvN#a&tod{F!p|rldrHPDVqoyzzp*4rMEj z4^1MS-Rj~-55!UqOl&SpjcE!qNJ@UZQT%e{jxO^Uy(1=!t=-z14?GPcSU;{vOilgp zJ>9@KIU)V{^F(Oq&9V6R?{9vN&f!C!#KmrYU^{LfVwM!go4JX1(=mBXh6$G}w(U@i zka(bfd!yE2o%Rh{#~!n-c`6ph{83&1kKfS=3fjLWZdIO~(bmyn@Zz;`LD8dPNpP8= zG{?fkwDs(nCpT|OPIz`IkcZQI%J^%#DojmK6K7+qS;?-~%~nwC|QD`!3Ide45E_w1HttQb2>r+omJ2YRX;MGwd z_K4d{B1DZ7yv=!dc&w~=`ucuE$w7l8rYIt!W8>!k?y;$##aEtf49YPu_~W`Z=0Xr# zQLInUPx7HMwjCUtkF<{Nw3gLZ8|MAw9@m-T0OKK!?5b~`a)noGgPjjP=KJb8F{goJo@?Xp<@ z37S3)YSt}MO3l*Z@s{k_u(N2zPYIEQmmO@HcBBaA{r>s+{^R5S zxw($EJCX z470fSH(Opv034cbF=K{=sR@gv#lM5g{2xwU@&DoEr7OI-9t5lOY*=!*A*A!bYXvqH zHj~af`~T;io|b)i`TRYVpLvy)58vN^fo;vEO%40^XT{8!Hf@>$H#hT{vNJ*)kzC1)|GG?|FS>)`uhKEIX~^Hzisap|L-s@Y{N#wf+C|o|Ia^ocu++0WJcaO z1x?R6sXY;C4<3jqY9}Wgu*>}Z-=0}pySbhH@G{>U!tCHwd}zLfL`o{_jG2-XCb73zh|NsA&zu%`9SyXqdNBI1_g*wy!{h$Bv++6;> zdzVkw+bboybmEjHYuBASf9Ozy;NJ=^PPUwrtl3-&G4mc9%+dJp@$vto-TQrJe){^% zcgk0h{dJDd4}`7%SN@F0IYuTqORKqsg;`ke@Z=LOna>~5Va{r7ToxNR(b1*l;ZbfK z>tB#~KlGfXuux7|xH2s4$t#9pUG+uDpZ@=kS9uqobK2>@^285~0&U61<-UEBbMuxw z@!)88{$%y|j^4Y5_5Zf+um7)jcB|0?4K0psY#lkd3A$Y$%+&%e{Qu9hzs~T@y}!z8 z&tgNbs&ARnm~^DRt@^t}Rn(Ix?=z?3$K&Jj zKb#+ae_#Ln{C+(ho08w(e$SDSwY1Dz;?uKrZa14jGM}%^>Z&yjEvueCJ-t7Ad)?cY zKP#S}lTVtKQ2X^Zw%Eeiv3|=EK+5 z*YDbWzPxsnHL4Oj(R8GXO&5&C2lvheYqi@>Tnr?CANQ;Y0L_tngs|wSU zciL;%yCoD|edRNh{!2*w`}_Cz_v8Kd`Q`ViE2>CHi0SQ#jI4Zgb@luDe+yJCEFAhg z*09=19Z={_I@-TzO^oK1m6QvNs7kO1o2Hc`ROuH%;x^ zXZOuh3>pG6K^ak0gk|T>i_g3SW-m-k%b(n2ZpB(9V&QwsylwahxOeT zpP%24-fowbX<7bOs%VvyAMXsMBg;AeXfjOa)X3Ub|9^ksiJ+mA_N>oaZ#-t9;cOe#2TZjsv&k59YHf z=VWTh%Qvsk5PJ&^h0LG*^7S)jL`X^+^7|eLGWhfVzj*ZAPvV}2QQoY(n7|h)70*Hd2;suwq{?y z{`z`-?O(6H{Sp!q`|8{CDpffjtPCj7Y%_3i-S|Eonqy~Lu&~?^6=9iZwm#{Y$*mf$ zs7pVaSN+ppWt7pfaiihM)1M~?YC9iT8!bOwZ*TQOrWC`k^;b$7EYr?6NKW)Gg{Z~<|NnQ5y}>yBZ}Td-iIZl`{O{@gd4d?5$AkO!ZI_qde{=6| z&x;o>-t7MaE(?U2X1rv`-r00XZ(s5A?>me4*G-CC>cA*3&nzyU8TCMwy^eK8y92Zt)+QJnx^e!Jqnnf6l+YK1EJJOQl71 z8wb-9$DlQ`!ak85)8y?p%(()!aHd7w-(N@1oaw5P<1JY?b)w+J--;($w{C6Rd!0+z zo$q@+56}N8Q-AvUcFy)Yz*hI~Px;$h$CcgV61%_FugHmUfri$k#e)01S$lcePi!jCJ#&WV{e9yEu~N9LDylzIQX&kJzT3abkVvyS zkdyf5s9LK=duqy#yLV4=aY{URkYH8*PWRp?hH#(cWfC!uKA1FWo^Cky(8&R{f&){lP51a zl9+l_LEU{>+@HysJUlOyUkE6uwJjBcW>ZPk`}_Y_{rn{UWQSyy*c+UDN)E`B@HXM&hU z%>j|k>$@Wp6u6ZZP3U}CU$tx9ym=gz2S5J(9bZ=`=I57|bFsg>SDp7Dt7L&Xx6R78 z^LJWmtE#rI)!k@)X#-P?E9<&X%t~Kh2u?GDR^&4+R8)4but)?p^H1WKpgp~whv$PF zR}|NQsDuYu6aQSkD43IBpsMe}!W#Mi&(Gy+&o);T7|sh~37_?&dm&FY*N^tIH7ly( z;@FIh!}Rpr{zN+!Fp7z_wXw0q#T9)~Vm`BoDN-QU*^HU_@N)l%ZDG)my!MU3=_^OA(WxczSt(z|Su)&5u3fj4|afY~`Al z(sG364Pk4m-;l(dJ-5m9>z~AgAE(d!S-<_dk&@HQC7+rnPmc8LjCOCI zK7YRC;yKRep1yEZSZKshfAh2aVT~3eN5^9m1jt;Bq$kH^}28m}KT>0f^KUQ@H}oSz>fqnnbj zm9TL0YHeqK{^kt^6aO9KoG`uPkYvR#9UdMY`#OU^e>ziHIU#k*WQ#fnHRD8Iu8apx zQVRtfuY6;l7s$J&^?3Xhj^`bN19*_dhDiCwO_$$`uBJn-^$nbR^y~VT+ADq}9nXBm3|LmWhnU#)lst=dm}q z_6VA8XIiwh-rTW6V&Wz#rKZ5^xqqB>1OF*r)y!DRDkW~YP(fVfzomslcJ|kAZ;MS$ zMUxT_&U34%jEsqOy?x6hDQU@sb=AMRrHV6J|HtpS%CgRiBbduywngCKi=lPwYvG zWq~A=DY1L`S`0Y;Jt&&Dc$UQ%*&06f`4^@aoAdRqJ1Z))M=CeyvNDeY2Tu(%v%Tk< zYhTl4cJEt1V`j(U!o}AMyPq+*2XH2&wUn~#`cOQLL&ngKVb1*OJ=aT1&(+R;7AsP6 zf6;0Edz%f9D^At8IdMhT_f5H*bd1ht>7EkG z1dj?HdbuV4{y&@AU)-kmjSUU{xLSSZJ(SvF>MM~@eOKax8s8JwNCAfJOuDsyk{i~g z{y%=;z=1P={3B*amK{*`{5j*ti~sD!{856R_)jMO-=KMHmdgSa<;V77)9Mc$`rq36 zKRfGDZ>SbyQz{$Ff$*lgdrp4%sx4Lw36VoHE$aUL`TgZ(KHH7RRU0PPEIGln z=|k32`)x_hjJNG3{+Gz9s4)=e3S`?;%0BTb*FT}w#vMxR+P~$^gz|LGGEUTFmb|m` z@Q0_|V(=<=#on5q{M)LgG&1fw{nlPt*_ipujPe(P6aW8LJ~y#-?i3H7txePHCY}JN& zmzHAs3r;01H8GM0$BpTu|0J;<@wjVZge>E^SBT>_@H3CnocdL}h9Mo*r=sJwjA zETa>z_aun<{Wo|c%g|7Ig?louV*K7imA6bGrOb4TeKkM-zPcK(sCc4qGtY{*JU`}2 zbUf}z{2NiB>Qu9tdGncs1A?AMj%fVrP}wAMFiVPAPV!Ay1mukzyGgKt7vTkrK*I6E1EnWj}v%qM9FFB9Y1z7v?#%ijd`0**H&fcASNZ# zm4*r025mv75)u-26th3KZG@Pl6i~FM_P4m$i=GXCbN^3B`fGp5f_;x)qC&{*gpb@= zSqTTEqcbKRY0$kMG(+Ko_A%2ph7prkKWa8NHa>h8J4Y2-d)!y}%j_xh|G)DoHV?%{ zi(-juEQw8XKQ)(qVv0J|P(0CDpto0M--(00(>NqPq-AV*#FCw)q`dRN#*6?JZnmBm z>S-*0pZU(=jexl7&~#Ay>-)dI@=Mx8Yt&fl+9icA9ni3;HAo22s5r&{B*9hNSi+-U z-A0M0MMzBUvn0n%`-mVmsU~hGw^J9dI55oH%?L?zN(P($KRhfSvHFqypK10>q@LJc z%CJ(Nq{F<;F-W98VZ%gW7OCk!Bo0L-2wWCUJdnuBY<9F+FEWAY$v=);KGH>&AOBkK z>+XaaHpk-M|NqAsUUwz<%vE4JC?RnC?}6jTB?7iS;Br5cavnccJ+8Ej56iHmRMI4Gl^eqf@`hn`uF z{q>Ga5pK4PG)Qo>Xdzt4Yv+snySnEZFn@;7igX7lfV z`Vq6$hZ@SOSX+O8er~U(_T#AIjEzm)4<%n8QEk=i%RCyOrS+g8!LpI_-96QUUQ2MM zQQE-s?|A?I_xJxd^D-UUuJ)(B`@zljrXxp195=-8{dM#1-#-T${db7ZTx59Rjnayz z@|TiAeRS&$1@4qH&os)rlW=1L<7qvUB!+`6OoBV><{o(H2zTYZ{r^Az`@8@D|B2r< z7A)b3d358DtoNxO|Nr0b5`AA)6(?)+gVS06+L8wC+6R%3`=(WWSQ3@8pGW1#&&dmS z&Rn>%^WibxMOxfj40+htzP!`kBxUjap|j5K`{BK934MX;?16B%biU0qd{qVz((HLsQ1dACnCHL1f-q$P-y{d3kJwg>CWs zCC<%FHZW1SFB9|i^#A`n>*EA73JeSkpkcM+Yv0HJ^-r=S0-GJKwI<)L?cbW)7`M>C z;Lq>R&)=V%eBW>GFFAn&i`Sg#Stt>n+qB1@FPQh77Eg4nu|zN%GxHalB0s;j(A6Fm z!4rPm<6>q$yfwQ8IzFZ}0W^$IQL*6R@Bj9H)bum%I0Uiz%rW`$|Ns8w{`F^Pe|L0h zI(ojpyStk)f6}3=OAag*GIr!yq{eD^;L;>V-`Lfa;*bINFUn1wg#^m*GdGaufV z%pxVj!vmUL1Z9_qqCd}{y5{XY+|4ybP3w}JMa9ZK^`n3O{{Mgf`}_K5XMPsEy3)VF zATK#NOy(%_IyU9BN`~kE+NWwnnBV4YOkrgF^M_~VOrH66hHIjafB0wnFo<#IsRKUR zGmN*0>j{)W8tO_DEdKrZ`Te!Ha?uoLKQ-4^m;brj{$LOk_IRYR@!$XdpaIXkJ3mt2 z-SNM6?M-Lroi}ePl;px)o3EK3z4g-d-CRD~2XD?KWSrPgs(pB&v&+ikK8!Q|igWyY z&VR0S@d;>DOt84O@9+0VtV)k2o?FwZCh{ct;>T%XCypNY@7%s`>$ZK*pQr0+Kb|@{ z`OrkE4X=*&RIg#>EZUv2g2~XZZHGnM`}>k={>QezV$wdaN@AW}+@6FVKPGNagd2Y2 zhQ$4Se@`D^xS9Q*zwGmu`Xy$XTNHl&m#@<|Ra(1XTgrc9gFnB&zrX+R@cp8qd;6-t zZ{IcR{HFES@AE@69&K*2;K9tfBye(f4Ez` ze!BjDyV6(Fe}Dh~@BjIN|8^xyIe7jwzdU4muF>Ye(S|8}Y+pNATR|fmv&|*<)g=D- z-k*3?t8II}WK@*Im#>l^=DcjMRO!0uqWWnTEc<{H#_=;}7`i3gmP}#1+ug9m^^twe zd#*ck3qBogub(@2{&Jt6T$&P+l9D2#Gv)06fpU__O`aP<>`_ezO#hhL9Q^*CN8a8k z{c8Q+B#V*`MfLWBw`U(b+Un@u{-JP6#BUvW`=tA3y`cUKD8WwvwFv+J`DtHU>%sX^ zK91MW$Z2|?)4NB>DS!G)w5$)g+SeJ}+4nd7c;9^Ew3_NKFV;^I`uzU>e|ei9-~Zbm zKG?iJD*9rZkz|0fv@z=f)#(rC=qNn+nB4aD^};tjJZJ&>))B8!ae`Y*9%DpHD(l$_9oKw!AzxOUpK9cF-!08ygxOqh>YEpWirpwxY!hMN`qJ2?w+V-FjYED%@G_ zFL`gT@~JXtD!B11;lib+o)m{Szx($rl9*>8At9k~^3lV?_0ik@RQ>qyey(+Wzx@Bd z|Nhns3c7iFUwe^JCM|vK^z{EVzgSFPotJOBzW)Ep6$XditxY{J&Ei2T`zEF*(@qtz z)&Dm-Glw(R^3-YLj!wgjOv4v14J|ATbmGnwJYqTg{5;S4I70(N!W2^=~kE{Q3ERNePL#y}!PHeQlqXrk2C&zs5JPZDMeO z!+~aYp5^||H>091ELOOALqbMY^2`~DdwUXh6f{0ODcqKSU&6$6;ckt0KrZ|L|H+(RC)30xS{UlM-|vpV%ejD)IUCbNB0a!|vSs!UCEX zelmA0Z-WDmTl~W#je9<`oMg~B;J|ZI$wOuui-*P|y+2%R4~k7qYdwE2_597~J}q$C z^+P!gSuQS*j2#%KxvuuQbfw+AOC&W!WFSPcKb8S%Pky-H=HewIQ3ys_m+Qj2vxQSQ{>-0#pkA`CanFpGJ6FZbeCYI`Z$aC4k)|uNdK8x$ z%w|qIH*>D=@+Ko^uT5nRp*q_h_gfi-zxUwW%G6-kQTX}mX4_*)Ler;KZfOhJ_pJcZ z44Pn(k|I&}@6Y+epXW~$EZlSX|9>7Ho)5|r3wT4#%-mR6J))o3t9(tc_%@*>XpegtEw=|K|&yaxFT1^7!!&zs;F1UD9G>%h6~zuWxlG|dA26O<9AyA)uhei@zp=Yt zzv6S&1heDy7ShVE|2seZ_xwr9jyl<>IR%A=9-e>ye|`P``udDugT$9*i)t~e6KBtyS+J%j z|Fxjefv7nL7?qU|pP%pIyA#?}JT%?n-{0gPzos^}vRY~MZJGANKIl}#t*$WHMLU1o z-Cdu5?@xRF=|8`wI{Nf9wm-MGwr0+fHQ@rSHmmqK>ETiCjHZSo=JRHnSsb{<;^?h? zeci*`|2sfSuNc5{IRE~qKX~kHCN)p(L8|bU0}WfJN<5UBxct6^q~wg5|KH#E$Uo1* z#YLi`LZa^Pum7K)*MG`0kvhgONlh=VrRe|PWC4*B0U@O|g0CePXFPd)yk&adAxLl^ z3TG`WGFq`J>7E%!tRYus`T;+)wr6!63PHk8~6-tm`Oo@hHFWm@{a}2lfB9?3SFLrFZS>W8L#4ap8l|QrVE6dgE z#Lq7myd4&qcAWXKSfpp8;hz0Q+fs`sxxAmPSAB#Tl-HFmRERdl-r(?`UGRIuw*Uh@ z;~(cw{CIzTeZ9EepZ4zyA|fT9JeAzBQ?l-_(!vgD#Ruo*+1%XPgw+LHS&faGcUYWT zDm~+4lTK^c@(-Vk?(Fw>dITwH4}~Auu-W*-XXl$9r%e~|+)&Y-(yi0_#)*xsEqePu z`?@m+&h&gR;O9Slem+l3%#}TLZ5|(=&*S66BO}9e=8TK!mlvFMe-bPzCOrI{ z?y^l#AVEMl`NwzuYW*n_1*LBOcrC*5`=qc?5F0ZyB$e)&yPK=QfX9`8QS;>P$sG^g z{r}(Zyx?uZ+^**Jju%&Hlo)0ku@njK;Pl*g;NhLuyRIIbvG?Yim0xt1b3VWD>(Sn8 zB1f;e@P9B3`BRe>y(U$nLurZ=i(t>f*WVULcsk9Rl={Rn)id_8Nr>p@GZsR}>kni^ zzNlW~`lRQ@H;rGq-Sh5TU9W0>L_^dev_TXUxLfAfa~pP#F4 zwyl_f`#;A`rRLXK8ISFjc}PA}?f>ch?!?A0i%rM zJoj*Qk7LU=*Oo$;mc#|`Us`SBP2MtDQs}5f7dty;A1lQbMSHrkG2+@nTIW}eRcbn{rN{?U+wZ-=LHqk(uX${ z$h<#(s#2-RkcYX{_xi$9HkZHmJZ^n?=d$!s!&N@Qp{|nxrirjdwryRad2L_KwstMw zW{(>y7j@XUt4y))U;kwL$(FK0GbnW?L=6CzJ5wQ|rUm;cdtJJ+-|RAH0uu zX{!4*CGlfY+tzH!H*Y0-dL^Gckq8Nul##hm%`&Cgf4)TA9*6ls>1k;X9v&28jd&g) zv0yj?_&xYCq6u8Z=NC| zx=PgJ^S_tDZQj!*?CR5Y6f`E@HfsxCuW)$Nkv=P7=6$uvH}=hac!ZNpS((k#vn~0! z#Gbv9CMFVf|56@2UVNp%<22i*FR!m^w`~6ZKmEeRgLT!G2VLVo{9--SwuUh|Svm2C z3^-Wk%+lp*0IgqF+P0Q$V|9mZ+ji0Bz|9Ajs7>A=<`{6nplN@{YwH6BJo0Cnzu$Xc z(6;XO-siSP`|^t~ZZhCm&eSL}^T<*&UdIpZ{5Ml(Nw~*Mx46gj&PM%ofy}d?*}~_- zj!jVcVdv^NOUO0D_qI^m|8%d=<*PKjU;Sd)@_gZy&xx+?E^+_5EjH}ie&Tf_I9<<~ zwUM{MfQKcYVMarsKygZt8^hAYOFcX%a=B(|csM@vU}%Q22?%CRtT+^$0D(1h+lFd3i%w88u zC7f>`6VrDxcxl_HwpM=|qlNqB4;6_gwl7*4I^pbeff>s-odJ!Y{{O#bt~FPK1CNV* zhunc%+8kB9{x4O($TW5IUTr$j)jD?(hnV)i{SQP|P8RSm;yL;EZE?$i(&>9Mwrw_l z&0#6+c&kjmWqtOnwnYaF%*=PnzjjXbz1*SH)~KS$F=3L8`ssVEzvX|;Tjg~9W9=o8 z!|kr`(s>^?;`)?GsB$iKzYa+4$1S3HsCZP$|0v^NoH6$It}DRXVPS{!&< z?OR+^1vVCXN%ueS+_PGI!v#OVY)1j^?v)`a8*V10u-=R{3BB}$J??>^$0vT7evZfM z73c2EN{v)s(YoOM&GJ=UDhCVsrd56XVA#7!=Z4v3m)OOou8z|ZBY6(r{kyiiLN2l6 z{o`HkRZ+(0HA=Hj-ss3YvMEKTQ)yP4iL2KVmA;#1EkEL>H}+L3trw~EyMA%op1Aa4 zYraGe0UN!yheCail-AbF{s37RIoFoEDM8?P{f`d{8XhesCYj9_glZ?+ZPRmV>Q0v8 zeW+k6;vaVFLi5Gh7wc`_S?qWm_phM9-s-_|{cHM}yC0{%C~-P|G4+b0W{X1K$J#EY z%PA&{*2!O9-hKD0*JTCO*}Tn+flI!>HBj+1=s9lbW8ZQ?TqXB(ij%;D%AVsq?Ga)@ zS^dj}k1Ow%y{)@iH1gY5<9pWEvR8H4sQX6h6vU<(KPx^ZbpLt5fo(fu1e&%jT_@P| zS59;5$s$jQB0Kx7wM%FC)n9&Ar*->f#I^bm)*ayDZpth}?xp~N(CAPQmbudLz*NIjLnJlK{O&>DV z)+C!Wlce28H(AI?@OT7^u=*zNUw->&>BLDldo))4^fBl0nOwJj*@^S#)K`8KI`_PO zj@a}v@G{aVbL_dB0z{71pYT2+<8*SVp^Q*gm)icO+IaPY3s$x{aL5?VD&kOz`r~Z! zMMKzg*XPH2?+e~pKDW(dFIMs7cDyPWkS>@Z>AYBQn(O}mrPKS_fA77cvms)_^AyR; znwh5(6uvG?6+E6I8R{xhXi>0#bKGK$qNvhonO-qfa`TSY=o$ZxXpJZ+yEBsJfW*w$|Xu{`6wH*7J3*Mr!>sc5d8wMUJ>Rh+^uF>p`Hw|VM*d$5Z z$(!RBqi6KhYf@UYq6A-ItmHlOReas@>$Tn|ujhAh`jh@>9=I%=GS`r+IY7WA{-g4Z zl^F`T0++Qa80#l?KjULlX`LFrLEwtfjTIU*)D&CiI|wzWtZIEb?dIIn#2cbTCcN8k z`#*eA+g0-+?O0m#^V)d}-<2Pm_*qg@l8ui&X1Y!K{_y`Va%QV6%$=!rDI}M0$Zl!Y|mPiJ!?~+?iDA6mQ!|erzF)>k1Kl~dg?SS@pjC5?YPT3=XNp0 zJ}*(?@Cc3(Kd#t!^X-akpUIn+ynku6ic3-KiXuW+*aYt?0T;Iywv93!}YBWp${cD#jbY|O!Sy{`TG~!+}->}z8sFBiLLS5n6lBPHBR{<1x3Jy5za zcT$UvvM;~ETh^ELMaQs-`5XVIqCRiU%D?Y-DE_Y*kKl_ppyyfq38Tp@cRl=IUWrhkE4mX>U~x^0`&H=ZeU zNOQNZPuo#7wXwPR;n(ao`+A8TI~VR%Ik7;0qC@2@$%f2?-Uy zrp%cMooiK^U=a~1nRh>J!?t6Vd~Mehj^29n;89XXXQD;joQGetb^Kbo4@9r|z}o84 z_qD6bXvQp~6Q_+=tTw)J%Sb|Up+(OthuFEGWro*u*?fK3Dl6NjYD>oLP5SYj-^$M0 zG9WN=ab0%olNU7pJ7ifFZ#*Htwnigp~ZMb>x&>;n# zGXiqw3jW(2zPwz;TF~CWFwwv;F(4%6!j+T_8y$HhFKVPZG;AIrL z$9uZbo*GVBg@r5z#>OX37)VGOrlc=EfAKNnhai^gHmAS8?}#-}0d;LagJ(9p1%Is$ zvGPc`vZk>e6-i7? zs96VciX9~K?;F`vIBcqxfKBfy&9R8v3)+(A=*;|(?@XP}9n*?Wt9My7PZmDByPU_m zEMSV=jvW&J{w80zoc!Ti+J(!>0-~uGuBc>1v+Fi2Pyp?;J9myJD2Ruh-^ix+l!Er@ z4e9*M;^O|?`kF>dwhL({{70cU+iE}?Dwdi&|ew_T^aWV&Q zs)4ENO38~1>*REp-A+05E%^Um^4c23;$NWQ%>Vzl%vI!WP7r95H|gM7)bUbV?5%u~ zsE*qIR{Luy7j-n!3};-~v|vYXt_9ofs)rWuo<4XOw^Cs@J@GaQ^Y*|#`OzV zrATh;eXPJzde$ON=b>%(tQN&tZSN;PXJ5SQUA^qw`^WFCc|K8vhdD?|HTC4C3npJ| z+Z?aUK3iFNY*R#diL7FP$f6DtRgR9w+}WpgKy&nF)jtTNzvHgli5mrrr*`meRiIujK5T;io`b%z6vH%&zBx$9KHD z*rK|9wTju=*}8wZ!{`6@czWGGcJlMzOt1WF!0ix_%UcZuT;dajIb1eu40rjUb??~k z3RZui3)h0y0n47Yl|q}%{X`v(U65JFa*wG^`CUzt)KvD6)n@)!5+8O$^453RO8`Hf|_V{iS9f z1ak8K|4NF54q{sz!;&6(ip`KaJU@OVQ^y*n0@b85Px|hs%vvV;sF^Q&C0k<6j{_U3 z%`Kzop(4(WS+;-68?1x$3pt=yRA_Y z`Nf(obcDymC*8on;LqRF8wwOpp6)b++Foh#@?C73WwFG+KZ;sYHfkj(Xm>u)RJbOa7!kb5l2X5}L@i3VB!14V( z!!_~#tx3)BjX&uMzurc-T?=cA-`~*{7~?57JzO%NS!i|U4j;7)?yv^UR{MzyE&262 zPAaJRCYPGcGjYG5yt~YBj%9N!<4UMoil;p~A=tJxyVL7Lg#dG3#}QRO8)-fRWr@|s z2DvW6F3*xoc~}3OVRtA?{3E051^xALhG%9t9(wug2Q*kd`>eaaU((82QbbfzN9V$0 z=1rEet0nm;b1FzTZS!jteD7(M(JlGG`kDQ<*4he>Ce4?(&D-APy*OpN z3gi?}P5D1%Zu)tNTel?Q_8gdWOtQdSx#Nts%QKZTdpMpiVl+B)B-Kdo@Wx+K{s9T4 zqRg6_%s0Q){|8UmZ0S64Lm6}s2E+gVi(cGVI$2%v)-8#Bb*?+6_JnD%2xxkwI3#DO zb4<9JaJA!j;*Vf9W@bC~WTOwuV`E;;w@I8~IeF1c5or7@(kT4%AH0$J(o&6T^M8+L zII3AoKQv_27VmAmn$lF3`0D5rp~*%cRK&dmW^um1Z|pPUK*2X1)rpvpITN>UPhkD;IcM836Lwz~`{@fZS*9kMSxXoQ$uF~z>z)wIIV))k z%aI183f8VYo~5Se|1k6XBd^O9lbB}x`2U}${=d6GyJqqA}jSANwl>*8KR;k*vmT!j_g>^iAjR z5>KAz=O2F;bA_7nuP;bavTct|+l2tOygV1><|Ut) ^%n!tEat8-5hoBu<{Ak~g# ztj4MjGs_O#{QuAJ&+p>}U$va8tq)IB-eMar3@+amoh(asbZpyV(sn22$jci?wpebs z_B!HqKF0#D)~5RoMq-N}>-$M?nA#Ow)j84m_>^edx3V_p_7w*2yX2uRxf)keR|jfy z|EqgZ=6XV@l#OkYFgx?hPz%FU9?6Coj~J)V<_t4DqSE*OzhvG2G>e)U3-``__{_Iz z&QWfNBc3|4^6;!$#q;tdPhujEj7&!g8l`P_XicPBY)AlP%~sv`J5MYpKt@$#nBn zhqSguXe~Kk`?`;aUnep#nb%)Hc zj75Dfp3YHyb?l{3p*%7jF6{cdvVE0M}YsFURbTEx+9b|7b82xp@gC z-~GNT*VOIY%=4R<9C;np@9ZU*wsW1>GW9oXAb(!DwveyIK!D}K+q|QkCt8bDc)1S$ z|Ia4XuXNCCMOm3&!NVV{cYajPJhQoQ2J2M?p;>Cnt}OfV^7nU(ZI{1DwOuIfnW$tU z96D>K*4cBv^x5?eN%z(7=vI`qXm43sp~=VapfDk}HqzjH>HX|;rMI)r?H4}W&VNcA zWVO_-PUpah{y{+=US09Qlifq7#zxGT+TA^~zrS_<{Lc3F#<_pxm6aE@IA}{u@Zxy0 zDMOPjXoAA#|NsC0|NVVGzx=Ta*%=%(Y2xhP^1{MZZyOG} z`e}cd(zx8>XLs0j1$C*EZ~N>2@2ma&{l&%or>Fn7tNZi*aQpu6@Bi;Ee}BK7|Ns7) z#2^1^7clpgKvTIgHwzhg7pa1vw|1W=dXuq=izkB=se!sfYtxC z-cx`6!xv3vVd2C7|2r>m1?BPo|E1n^Iy*FkIy(nBJ0H6gXTBG^cKI_qe&?p5V-k@EMf&4FD-r4#2{n^?7`{j+_+&*6L-|q13 z?L71C40%3%G=chPPp7kAUwB;Hr87QG{{8_$K@PEVy&WFdtGFee^@%yk(CEY@)ZNoF z)o`esg@I^)U*`JMSp-i$LenNPXuS#IbnULdNT z@s<19HO@)1_IpVESDLE*Us6J%?tj{j%E=EuB(_PLOXS^8tN76H@U(cFK!fwak%OmB9XWPv)k4LLymgP?uV1v_fTG5U zb0<$8IPjr=;zUNvTN}0-Z2CG}W)A$6gvfTUB@S~+^^B)J-~cTx#{Yw*XXF|oaqTE zG*DpI-nPX_A#39Dh(PPr8XNn3589sz7i=+Pq-9!Gt7{R~{Xo1ELpbJY}DEr193jqd_|ry8rw={o#w| z;f2mT>i$MDHYYwjVx9ZA3lbxX;vWeJJ~S~7E-bsYX>)K;*t691^n`>4y)BKV`VV%> ztLhm_$!kPP>Z<S+(JXtwc6OU$!LtoS`G&+r)3SHX`00tLT*H!ms(FgKg_5MQ<9wV=yg zM$Wu5DUGJu%3qZy?7R`omh+5f9oy=Y9Zsy7HoSS<(~fkR@*cc-V)bbWiGPL$3(QRw z7_y_E{I7587MJv!pBAx4!s?x0HpDMmq71nj40!gujwvV>`OUC!$1jG1$G;iYPAGh3 z`s$8q%afv(33sM!nWa{3AuQ|eOr9We z`)$zOOoqAbdvBiETzlf`Y{~z&`zNv_xJ|m}W%k4Ry#IyzDNlQ*7@XH&-O4w80z0U} zU6goo;bLV;$;`)(7Oh>Yd*sLwhVu(HT-42&*;uV5sw#Z=*Txx3S$+%U|D;XQQz_7RQU{EK@aDzS2Ww2*(J^L({|L(E)zTf@#3P; zlZmNICrx{nk-b_^^W;H>+zKNFQRW^V9s#{2e;Qhj*GEiC`on+n_KTxz%52ja&$Xt; zOnJmOllOGrhezx?B^r2a7QA%QoF5?DA7i7yWfQTo;lZIfEFblkENRJ3Z{aT&(6N`8 z&?S(T#>UOfR#?cUs@i5*ETI>pAR_q$R8ukh|G%i{Q9?>eLRwl;;ls8Cn-bO}S*26ao3YY!=|9{~zb0@|ly&v9&ewflY$K}GL?ny$;69k$&I1XP~ z$>ZLCDdnpGIJYlKdz6rt_T>5VFQ1M5y*8N{Oh`=p*+1uNV&D(?opT!}%1->O8Y6a- z<%vY9LzgsLj~Z)Q4CkgYCw`B?+^qSC~Fe-bM`wIrSrdEp=_nqC4e6#PyeJauZxA|)}Q zBw68PJ#B4E5PYm2Lz8%S^U!Zw7u+~6f zs@sHV0%_kIR;s}yT2$AWqVtwYvwnVF+aixW$iS-6Co(zV#uL&%6&3kN| zD=nGz^$*|M%fl}hkb4&z-cF$K4ha?Q?%LGfw@pC!aa)C4l&VAUv3fQ(H8wW3h}J_a z+=X9mwFt_Eb$pWHO8DzG$FcLuQ|G*i4^$Mx6qZI=aGE~+|NsArP$8?%H!&ga4Ga!k zVpREfP$FY)fy;I=<41*C%{GJ?`uOlDDeGu%r-N;v;X{ux6;!TltiK7Zc=Dq zWOU+mcS~E;(IXKZT}8%*h6jV$bS^x2$mh>ELpPw}Kw@8_fl{kZ(b|5Q;{Sn?JRX|@ z^ftL&31ZPZ$KWqq(7kbMpg@zyQ3mF1M~-s@H(Z*~q^iDU9{02~O=h>C1CzaG%#hfz zL!ziyG9ps)+S=qDh0PDo%g0P~(}6poqvJzsYvj=*5nY{y#zsa5_wcR`7pVMbuj9MG z)bjt~MJ$mM1a+?jBnx^6^J(r>Na6?;U$@LgIqFiQdG$o8*&1?=FH%~}JtQaWw7 z(H6pS@aq5LQEUlJKB+q<@d*nvD=RyvsWD4SA8umhDSszmCSwn^Tcfa`cyoJu^s!@6 zp5C7iFY4voDv{8z<&A`-q=IJi#Eiz|7>V|NuTUutQsYGaNa2OaE7#JBXoF>x|sJY}X3y+5=yYiYhfBVI1Rzyz-v}(!V zYdESG)^JI%HB{^bD}!>@oyT5rC&CnV%Xk-yW@gM0^tD}csv%tOe_|y2JHa^(OTMu) zGc$8@JC~Ro-e1pidAYGoz2CngXNb*nV!+)`V}nA6{DilhwvCO_72V%08g*qgPE5Jf zcva9TVp?)#La@v+jrZpz^q=#s6`39)r2{BQkmv9jo2w!7xN}#srL{BbP6D*pRZ=h1(Bw={VUd*b+O)~&pc0R6>H-zPD=B)F9<3p=_#xwu>J+;O6~&`_D3G3!)Wy zk57PTkB^6A=ZRk{_Bp-tOsbODJjXr6)+4Cm6lYAXf6S|mY6?E)35tQiHaa{L7^J4p zIK(`&d;Zx22M(M$vtaF60r~R_)}3oGOY?=c4V2cNJ$U~7ySHoS&&%WUl#~cO-Ekzr zN$5Ab^p9WMf{wEmx?b!!<(#dTz~t65r7@KE+CL7*10B|p3naU{U5wdP#RYF}Q#he! zA$$1Pv4q^SA6^K88ujJxCyB8@{i0j=;?=36$D%%eGBQ+n7Ou!}^Z$#PZHy++>mXk_Z9ebL6Ujfo+C$(K6rAZ;4$0b#qLYao`DqDN(-YVP7~wdF)=d{as2eK z{>nzRCxPB>71_-bKiWs#WLO_I;jq+7)sOO~*Cv)K&TiIzBxxw};B|=r4oE;dN7b<4nV*^)#N@j%xg&V6XGAgT9ZQ5+C zsLqr!-@w4&z;D41H;zvGJ@M*+y^aRYHcS)aS#i;!=5FJ1sYA^Y1xL(h85@KeC2SCE zI`NjDt;eKM$7zzz9PY#!vy58Wj82>okagnWfd-ygp@%1DW%X+h&qC#;Y%VzmkC{(g z{r|s5_kV$gi68B=n2!61Rwzkqwm7(od$VYhCLZqbC^kUyt3-0Da(H4<-3c-h?8 z7;vz`bm{{y`2s;RGd6Q`Hd|YvsAW(qv_PI{YnPUi_TI3`=s*{5MOFiUkfJ;1uUi~F zK`fJ8IyCe-7;FrCP1BP%eatwREX!=hHcKaL>WOU$VN-Z`ZUnI%JjEJmC?ziG>0uxu zI`Kj=G!nE5B_uqmD>Aq_Hy&hp@bZB@oA|~fY?+4-)-UPR3Rs?OqO)O}@dU;&%`;9- zKNhu0I2cqebk)6fq)T7(!4`?da}o}CnC}Ql%B^&`F>4l&o*qw4jmu7`*>fy@{J2q5 z%WGm%F>^*hnbN_V9)kT5s;yfJUh7}lrJvHvbE2sw#?ePfLI2lE)(v8@JQAABn{OPr zwW(3EaWO~YfsWD`Q@%}sJv}^Y*6AfGjEX~l6LPBT*j zL3Va$7nbG)3W=+s?ww%a;<|Is-rm&InFh1OBLrSe3*Qp*Y5qjPTz?&wgk{WY+7@d% zBpW;kXmD?_Y1kikz(;k8*-C*l!HISf1ydw>Bsh~xHuU+@rappUQU-n~x~-W`~y zbKzW$OIC5hy05Bij*L0xno+;o7>}t+Pgp1(9@(`2gTv#bTi#KZ9R1{ZEN-nm(e+V9 z#LI=~9G)x2iiQq|`V6^}AVe{xzYB4N1gNRsGQYr2>C^Z)<<`|El1hj?zp5&u)^=knYvYbCp-*B* zVwUVJR z|59%3_;`P=HBbIM12;D#4o<$OZ44v>-`6Lmqy&VfUbvoaQ7e`H2RE>7Grf)fr|a3~XWgoP%vN=(p*jEl zKN;(9Yj*s2FirQnUG1;dUCiI?*$xS17!*#L)p#V_Gh((Q&;S3%H9tP&AMca*^nC9> z?@w#??u-Bb^MtQ6@R|9k{&3rWmwCb6TW>aM_ATFi`uOpLv||d|-It+*s)y!TsHn2$ z~It7 zk4;9qzTS|U$K%n;6;&)clh;vAjc3*@i?g74hX4N$O|qCVLn1Y`b?Vfg5fK+I9jV_^ zaa2R`-$M_L9aj1QB_AH_Jv)2-|KIKwv;Y15{r}?P^Rlw({4y1v-b$zBG9BEJt{`&u zhqhGA$&P}h-#$P8&&*!uGvmY0cX#cNd~yHxH`$_YPU7#fHuHQA?KXxrk0odD#55}% zI>eKe#gmi669Fl44ow0TxpU{Ls;JxuY! zf8NJG;x1_0oF2bErtZHJ*&CAAn+^{N0|NH8Y1cTU|SlNnRUA%H+=Lu$T1@Gi9 zyO9urGMT1R5OyFY%qGP?-u$=e44_)BsGpF(~jTV?5`jjKIuR^|NcIi z%8iAOo6pa$PhP^nwugr;=VRCEl0*m5<01#Q%xY|GY@9gpobyTAN>-7K4*uMW4*|NlSzK%kStamk{;K!=Dm_0i@%{b(ar^${-QTy|e_qYO zSObHE?pU@QPNtn6heJIRQxCf?cyW7sefYXRI|?3N?-oyqv#)v1XIXvaLx|Fkokm7H zmo7!{Kx$nle;FgAJ5f1u`04w5c{Q~*`}&TIQ;!*r z_wC(R@=}(WnfYE4bGB{M&AbCk+4`^8hp_VS@cg&0{Z;<^MlFzuzxc`}M^|@tljD`wb49`8z{u;?~D2O9H;^tN;J})z$d;eF0qn_y7O?>gxUY{U`iooSZD!mvyj2 zbaOp<8DpXu7RDAD`lad!vKXiXTC@N}SUaD5&VqWuzXUAn;=50)m<@uNeALZZs^Y_C;`O4*T4h=0V%{we+ zR&D$izHybKp6iYs5NX_p5XQOu-M1{9U8_LT~+UP%|16IZ-9tTK-hb>7pQ$^(E zf2y+U>o+e~Z)C0c!FX#I?;nPFGk*M(Y-tT#0*xpYK2A=*)YNPB|Nk96#koa^Me6AU zZiWf$`zA~}Vc+wmvAa=*u|6_FqN}T?qT>k~&jG#lxPLneEIOiJqR0SvH5c9;BaqsHJ{EN~VA{DT(LPC!RGP6XEv8#qk*$ zx;?qZ5XIZn_F&TlBj&9|KfBjXI54YZ!PDT7DYG^kze`oGU<8e|Jg$$7YD%m z?_^+UapT5~e~Zi97xw)B|Nr2-YPXq*ic7crIDX&*`}d5O^{lM(u7wqUI%f0a=}uOb zh^0#{4IT+Cb#n3T%V=epx1>4J&@jQ2r|18c zt*p0hpFR1(Uifl-eSK&C{kr6y;}6ycbC@X?T-C1+6kUAd#EBpOkN5vqRFrx2NJX8m zw6txT8JpOxd-GcP9vtNq7WUeB@W276l^-lDC1=i5^iEk=o}D~5;qZz`gKE_sv*!Qb z$hm07%xTL4BQqT?HCU;uPu{-0WrKnCCDX(V-oE-9J7#Qd$k?^v+vW#fMXo8aXGy6) zug}g-iTdjvlk`x$*1t!Gi(8i|^maaH;v5l&1FNrRKT?2RbJ{-ni)h z4JC=SXAa~9CCus!TldHL>ev3>xAmT!yO*gMPdxsku-JIZHX|3;iN$NA)eiWuXRHm0 zaaOo=Nke@Zr0#7};|mK@b97Ym^JCk=ADDmQFZ*h*4Q#e)+#;))o~Sgf{`j%KyL;n? zfa23)>WPU=b1l=-+?197Z85177f&=ZW4j`8$cHyVm@8~~lCN-t>x7#;C%A6h_@}L5 z;5YBg%2lUaHk6(`eR@NUrLz;0>)U`o491!Xx*jPh0acqJ;hof^{q#Y?qeo8N8}>_x zUt74<_``JhdP#|Y2ci_~CrbREkg~BX?w_Sq*L;hIfiY_owNCY!_HuJ?Zf)_{wl$ND zt?RIuQ?jn&g-4(4%~CT|86AraN)tWHa&rEgno5f4KKk$}>Dkkxa_MW<@Z7t{!_IH? zel00cvh;YHDgHzL~1s zcp2*BaXiBCK%Y%|gYH%LxDEZ!thPLjIKlJe57*OwKR>@e+Wr3acKhgUZyw&<{NBF) z-{BJ{6x5Erd!QgBto-cqmr+~1Sc={6Yj!~JtaSU{e6OAC&xh_`G6xwzRaILUvk@Bq54w}-CArxT>NY* zY<00Gvi>D{Bzo$dIrE>DMS_LpL|1MQ!?kZL|Mv0;-1MIxkOdvLb5iMpbd{&ip3SHk z^pNKXH^0L7MUHw52@R?f_pf-xIZfvOM3##}EP7^9cMJqh9bxHaj43?t;K8_VOVi1} z+{NlPGx->P_|-QA2uBBpDKezNNu1k4usFPR|j=Z-mH#)W%2qM2xo)7ad1K#vrvuCVH98?>jFX;a z-QN2D{f!UxnVE&N!dY)^Xy`OPBqXrpTVexGk;2_`XB)Pg9ek;&a?D=EY^7kE_N7=s!UZ4R$$I63##;XWcbfU<&xePgC`nFig|*%g=X<3 z1o23?axk+=-xXhx^CYKzrufEV0{^-iPJGj3ewSxp_%dyS3b#j-f^y`a|K~rv6#nI> z1&wV_nfLeqSN;3LZ*R}Mq_WZQ(7^^1p;JdroM5n?!EJT!V1v=&CY}$|W&Q_BX1r`* z)cn-ZuluLz#Kq2d$&X^ZVhxsheQF({s?5xw1v44$)1YDJEn`35=I7dV=Z-X7y5*qf zq;QjaXHlcL77q{4hlK`gGeo4Acb5kUGcsotB)h3hEIi+kJf&$#i9=6sv+M#@4mP$o zQ_sD={z4i$edpvUGvEGy-Jc)g%*@)%hyVZoFL+q`!^D7?WN8V_<`XH5r3PBe*(V)S zwiqUKJH=!&MGE-pGiP-jNj{Xo{=-}FNyw*Prd21Qj`Ng>-}k5T#|QJ8U%X+P53FCV zoEajb&Aj_};S|Y?Uf(SdKl^XR9GHKab6d2hYqRWxTmC=)&wu#T`^yoy0r&U+-&_9v z`2GDh6LyN`_y)2?h<=vOR~MVyUBD(Mo+Kj2BT>r5eD8*8s7&@syE z!faPcobx|?*FXICH_!inh8o(z=_d{47&O!5>{52rO8=cu1G8$!k6&NsZ%(hT{r``D z(*cQXKR?>%JYs12XJqi_@Bjb$Ifhm-ALRQ?U(7h|C#WQCzOTQpE+F{xo6F1Z&)Al* zVWU)QX|D0CEAc$V23r@eZIat?>Cvmuwl_I#|LTPJ=BV%=&|WaxTymbR^ZI^>jfXa^ z`g&^W{P*|&|NhQDOYOmJ!Gx1f<+GX`ShRIi74tg|^%#gu{Qv)d-S2PT54Z1s^yo~* zVtM~xzuN^5Mg*%bvy8ZFcHkrv6KH<;V^iYiG@&rzN0I;ASMmhEU+NvfHw)4dJv7PU z-TnW6KRwN7)7dTi(6CEx(+8tx|LbF{b2g^G`~JRuzV-LsnKK>z`+M8l8Ql#VRwo>o za!5yX!!ov*r)Lu~BQmm#Hf%E5Q+KxDkAIx2@IS=`(I|L-&2 znrtSo8wXGEoba5Tv6SQ5h2ygV{vG7qU;qF4w%qx>y_P+_H)qT!nAxyZ;y@Hbf^WqC zgsV#HrkHn3Vr3~PGP1BX-muB&&7DpoUoW8p!P6EjEPpRyS9c@{?u&hOe}8{^I)8ut zL~qt9E)2I>JJla?F4_-Tn)v7E`9rPp_xAmL{r>#gB}bZ+6BE}4ZP>wuQlGJpqk_SOHd{{HSgzx;!P?UPPk zI3?6`r*lrjzs1M&x-9Ge|GT^4;pb0J=jY^D{P}Bn^Tq}-`yE|MHe31sUHL42a%;1c zhqv*M9|m)*k8j8p|8+w|&*5Ip?d|zI>F161)b+ZGfy>TAQAL0D*Ze$xdism6tZBSW z+b&B5EpnK(PJ)v|(EtDc|9^jff4|WAepJ-G&JKg;Pd=PFVX#n* zW2>Ikz#JFH79Y=MXV*5jjeQU`%f4tDcF64Tr@O^FV`*r{RtY6V_{o1tNnUd94nU()1XJ;MQSzmBP zKjZ0<37%pC2hN;H$UXbu{rQ6bcFx(_hu_}*kvHMUe5L|bagN82-P_vvH)cadN)GLt z`S3JMKNN(@e%NU+@7 zyyq6nBJQG2&2<+=pqVJH=+CdO|4&cfzrXHaw(uFf`}H|FE~-tXD;^(iw+~)c z;+V^uu9GuDfBu>I@Kwgs#aGCpv~t|9N+}{f5mGF9^2e^?nA3YNgq0c?7}WebQ}EyJoC!4QD;v(;-u~ah zQt|n5!$aa_3Txjb{^6gyu2p@?{}r1z@z~cJ*;ITezrQcOTda0}$;<64G$t=qef{@$ z{e0Ws-~ZbmetCI+qAELcWM}e?X<{cHoRe)6R$nktT>kulakV@^)OpAT>|G&SyT+hh( zgHN)8_Yg~DHP7tz?o*WF_y5~n_xJtgbWp?o|EsI}rOp4{+4c4Ny}kSO5^^%lzm6D=Kn|+NGk(cV^j(4XpM5jqdDXO^uE;jEFMQ&@_~> zIZ^P_=`)98ylWGcX;&Uja=NF`M)YHd&(&Ja7WeDhlkncWIdvdeV&5h0N*#3;OH~wGHp73AZ`djs*Bk%L?|NH;v=l|E&K~B|IJ(`kw^wHzP zX~~DR)Q*M3{+&8iG^%;i27`ivKgXvTeOTU~cu821%^^>ywM6yDPe}_)$&ykKVq%gI z(@(P~Xn6S0xoy6^b8`;bnQ?)_4^$w|w3snd zQqJ}ld$x~5Uv6T<7W0X(q}YBiT`GS5@7tFe>#8sO#>ULj(#rb!i+5UfZ#K>j5SYEk z##s3B|M&O*|Nrw-KGX8#e}4IYoE!#^Jh*ryIGY%!+Wt9sem+n1Hiw%|P94@Vsi~4( zT@pn_67y`6cT`M#cviYi-d^I$)xx+>9o!CKy$@iHnQkE=@$c`?!v{{C`tjfX^8f!I z^0-r8>l8IJKl=4QEbNS$n$Dj;JVixkmMzmUGV;n`W2a%zf!XFf9}aW8zn>lu+PZ1xOvxQP!Dxp>ORHqv-=rVk`K#90ddA*d z?*H+sCJ$`(`^g;8azX(Xb@q({N7Ov*9{Ja-?f=NwSQ55qUaL@`iQNI+^|z8Nb|l{n zw^-I$cqKJ3TQo?+;3$i}yan;Tc=%I{+9Zd%58A!q63b+_j-eX&)1!jl8Z^)oF%)mHIyd+FQu5%N#$-(2AP z@hPYxdrITPiGdy+fe|w}uSK=p(dlH8v-|V=m1xL>nKK(_&TO13>pwer#`i$ZruzR9 z@9re-C~Srd{5!g`F85M;o-c{h@NPH?z%oHrVF=f1oUrA^ldd zVd6x_vR&MV>%S;7NJvUZ+}r!R{?`}zu&|8je-F3+U$(60&6~w1nbbi`m%rs^WPbc_ zfB5zF8}C`$7>wL5%(qSc@wNLv;7@4cFaMz`FQ1Yix^#9!NsoNWEZ(ZWZ~p)1FH_d8anztNRCrw6wPT`vTboDk>5M zvT0sg4NI0tn@h;qs}*botxo#?|Il=cd3LSpC;xw#(pa{Z^Nkqy7H_8ii`5p?hp+!< zSt}J)_wkd_i4z7NKK^x|)ziAXbHB$MLEe3}$vbML3%_nypm0)rhBEV$TP>hb0DI8# z<$L?p1D-%VFx{f=Ps6gX8G`>O=$Sa4IhX7!aH46x-CygPANn;4{+>1$0ZuV`($a^=VI6C9C@M;bze|JHwdbN^uTg=MP=shrle|$Vkb5e-E2a>13O*JbZ)S`J!UuIkw&vFO49H z@p|;nZ@rFAOo@`rUj9$)iwc`=6(_pLt^M_Tdg9|_^&f&AfBckmagkWERPx4+2bmcn z3!3-WOWxb>erNWVzt)Gpzvuap|NMZC?uVGOC7;!%^>WOBhD3PLpFbxHp7ZUh_2~Kk zq11N6YO@R0i~j%r4|4Ei_5Tl4Z%o(c`SpvZrsn_58E&)1Ee`(w&!eNmv)oV7MMQ)n zLop)ifNxqt!j3ZD`zbmyP-CZC{QK7`_woFJhV_x%SAID7%lGvB-(UN?{GH4pW-BY6 zUAzA8+~GEv)$YS<@xxoQc_vK?(36)xyxjkVbmT(@%dC!_Zjax`>pb6*0P4Vk3dr!a z8#XBT6&_Djz0_hihr3+o@&5Y%{BnQlzw1pof9%+Tb>}|3m){p?Y1L5wU*g}t*0{{J zdXbC;3@Z2cS291X`^Pia^B|YPRrY5uES4F0Y^eX;R(DTB58~WCbFH}=3DKzs zdY9_u)v`dxUT$9J6_bZDr<;(z zz?W^$s^McEJmvG$O z?AkP=^4Rb2X^;|d%Z!B}=eWerICxIaG4Y4c6}y7@CXLcU%v?wLdr~?CIe0jj&TV^i zDNcW+*kHob^Ehsf z+LAqKKlIkS&D(g;vZr$E6NWT!ia9jh0#uNFVA%W2Sx;qx=bBsdZ5&1J`uKpnyn&x^2$w5>C#N zKE9Iw{;3++xC$jCB~^TEdiWyn$OqLW|NCdo{m`Fcuwa@@0cbtSA=aeTqLA$TQE~eJe^6hLUk=oWRnYNiO*`H#4q8{dI?V9R{qr9_ zsUCiLndkjI$GN`2A099sZf55>b&6+qx$%+X#w~3|28Mteq;D0 zw^ovERoAvunY)&E&E=+=b$JLqlDMB9J2Sm!;Blwfd5BsIjr#B_= z_!iqvQ{Xviq<;6wJ-^+0PGzRbN5DG}=ghI?b14uxQs3gIDYQsntwkHN#bWIX){LE9 zVIhkqPWa)Kv+;kD_|4er#_MNNp6xyPq_!#Jex}s3NR#mMn?6>pjk)A{d4tZxgUnq@ zQeR@**Dcog{v|nfV(O(S%M{OdDmm_pmw7T%L6^I&@%`gnGt}A|mvyeXV`A9%#WvSF zHh5ad)#)=2H>e067h!dDm?qeE!)(>B^Sk!uo-p!snzr}hbBl_58y%`X{PI3%(Jph^ z>Gr(}(J$b%)iY-!SF;08n?0YOIr~F4_T_r9bD~~7{*h+aa>4V(rG?I{)}`-NSlROf z9=hCG7Ob&B$i-}BZTmgrgdddK~pF(-`p z*k`dR?sNVi`sH@7McVGC6)Ig3Yd_db`Q@7C-(w?}aklRK%OhW{o?YE^BWIudgmWJk z&j0&#ZDi=2^Z)lXG0uZ{XYPLPrT~HC_6q#$(M1L&9O8;CYVA@|f5ne%tl#*Hokjd$ z%$-?L(sQR&B$)kJ5Fd57;&gSv{O@~@7vA~X@VIJe<_sS>(`n{^E5!KiT&`{_jO}xG zjksR8v%qTOZ9&%%(YD5Q+DsKn>+Kht>$+Soef{6QAW< zbZ43US+vB3<6Qct9u-C3_$Kq&f^CaiKRX)jtN3Ns@c%6zsD$j9wV$ggK;&4x*fCia zLB*VmH>`QhhhuA)r?=f(<6cnwic`s{<%6VCL4lE|Y>d>dfG_9I{m^Y^wzs(d(?Yr6 zis|(3fA#fw-wIFf@wc1*__xbN=apK1>MaX0Z{+QaE0odcUgykF`{G=h2<;;{|zc%IuBYO$h==>*pt3eDdUph^47!sY$>0o;CF^Ih5J{G9tw))#zbG&P*G14h^m)M@kKS zf7Ry9wr~r*XmUwJ(L;!jy(MA0bI1F3rO>6T3S!fY-IUv$W!Mi_y!@%J;kDOjy_Oia zP5;%dvuv6ZC+Sqjf1P6&*s^Z({mplcGKYiA_OBC&V@OzHm z*SzhSH#SA+-Hu5W*2f$B z%szJEW@jC@K+3gw+;)$!6&5a8_wlqiTXuHa_I$}J zS0(LglM)gWLA{7esu^hx-Dz%aprNiSA#Ja+<|xYuuxMufOU}u1dggTF`8mUs6a$+I zho*E;Kk@(nIdcs8Tml5z>`yAIIEnl!=S&aC3eo#;?its*h8LbDi>`LD+)Rq!IPJ}? zzrSaGn*GPP{@Lz@GHlaWk6W&>vP*cJyQxjl(n<|!dC%f=3^7^vBgNisnJ?dYTIhCK?9B9* z9cAa71deO8zK`Etd-T{$`Sgk$Y2VM%W(|;mQ>8f;3Ubcx^o$QgEt_FG{r`=$2PUSH zixx}9?N75PX0z2%xHI2=;mfuq7xO%f<6`F?yg%PDdiKNr^;vy;AF->c@n~uBSXuGp z6aWm^%Gh@j*){`&2?|YC&P*iZv+{CLa!xe&oef5hcgN!xO*X zXpV*B6%$)fGoIo9e^XDiDk?NQJkH-%`B|b*UOj6j zr`~~+oO5q|{hAsOmUiJvih;TMeG{*S28QMahQr7EdD7B&q|J?TZXWvZ%k=OhK+me>73q<8+XidE7Ki_DMmFuTO==L?GRdd4v4+ey^+kNbIXUY$wVz z(?(b*>A|zK2TxK8ijxBZ6DLef{qQaA$M5NlQ>UJ}SkB>aI^D%jRFGNDHYp(`<;L#m ziT~@`&d-rJ#EC8Hs6GDu zq*08xrY6_6vrXsre-)nfu`W_xefyU+DcQcaA6Z=4T3dA4q($*Cg8<7Js^G1j1jQO(;%)cdJI4v)i5F2A^w#UaeTJxQsX>{V>Len4pw_1}O-kwh6 zjY(mfWm&3Sx4asy4bYFmIdG=H{wgcU&OY!+FZzzo#$U^b9Gi4>dVws+g{wFe~#zyZ)?=5l1BhvIUy>4y3Y9WT=;T!IO!-UaQLd{!JOg; zS$~pcJCDm$ID|FFvjhwDdMD0i4xT2MIaBE41N%$A)Xq$4QR~odHB?yIBdjj5YPICL z*u{sOpxbzrDi?hGoDM3Eo;;PTsg;y4m{#}qxY@_HWetp(J2-WH7ic<*9hn?f!1+k< zxN7T5xtM86>Z~T^Li`tkn8O^;m>g`-t#t@GwlbKfx0mPo`rvdAEI%-Us?B9V`*S#?1GQG#*zzk#~4an1sign7Y47H@5XIT-*Eb zqH@-|XONkPMJu-JsU5p;<;a8Arw<%!+kEz5gL0~Z@rGw+F;C587F-ROA!R74y!s%s zf$xE-9fr0|%NTrB_1YIs1-X-bozdgZ>7Z@9%X}9*af^TsG+_Aue^H4>dwaX0@nl8Q zsfB9Nk`+?8XVa6QBhs6 zAxxX6rK|U7w`AH`r#si2OCce+=$68UEhf$D^+D$BuxOmc zI=Ne?1K6HntD8RrTM=P0R?RGojnSYI*SQD)!4aC2p2h4zZo-Al4q{bzoB%noXkXorLA zuz$9`enPNvc+tr(6MJ}^0|cB444S*Rl1?i|X*EXmFOKYr+&n>Cdk2H#<$|U5XWTgD z7ITP6@K~JUVe4Ufd|kgS`MAWjHHj8gQy%^-wwk*GQt2=9d<0q-nwIw9adJR-`iCzm z1!ZXgp+~R(JK!T$A=NV@(8=wRVuaB_-(!mfS9^cRR$nRm>%aZs`};wscf7fKdP9X_ zQn=<_Xefm~a$uXlyZYTb9yd2od!VT3!qJ@ z!JFI1KYUXC`MP8UEEcjs4n2ENMuta8iAP9?M@@}q&Kw>oDIPbs3wu3lYF@C%F4}0w zE&6CO$3&@*>U?Z_UNUFho@@uOxHKwQ+xWu-`@=LA9I6>8qg^- z8;&%xJ^1PA(9k${uA@(nW5kTc=4MCF&VzU6G`6=lPMqi%C%21j?nJ?=6^xt8)+|f( zO5}Q!ANc4}t7f;izS`sa_Mlo?FFtKY{d~|V6^Gj;?dsKjo^S@uw1Fx(DVityTu8WZxp`gLv!|&OCMQprl=R@4TPCNFrCpBFz!JP0?MmyuT2Y6zwE`RRIxu-S1G@? z_^{ya$LrGcx7Ocw&|KoOw#V_@mSc0i{OdfqBasp8g;kx*{0S*30>a4&$;k=n=?@+y z35cdnn3xE1`sCyZ)6xV)Qy)BPnpeSTFT$Fg%oO?IzI~g0{eosc9i0n1+D#Pa+oxB2 zYI*oExlP`Ffw<6}U97fJ??0!%@^8B+3`vZ4<{EMxP!MsJ=W1F}@n86q{J#eef?Tab ztV&;o&C+D&(2xFfxWMLARFINe&rO}3dUKLhSmr3F2i>~;{NCd`&(7xdHAe-ywgd$% zHNBSP=klUf+~s}G$GgiamI{OYpxMdH-=M-FCiddv51tLeArH@Tb0npt{P;Qf;cfG( jeivQ^h&u?LfW^#kCBJ6FvfZ0pK^i<={an^LB{Ts5;0bTK literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/ops/dense_image_warp.py b/tensorflow/contrib/image/python/ops/dense_image_warp.py new file mode 100644 index 0000000000..9403003be1 --- /dev/null +++ b/tensorflow/contrib/image/python/ops/dense_image_warp.py @@ -0,0 +1,196 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Image warping using per-pixel flow vectors.""" + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops + +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def _interpolate_bilinear(grid, + query_points, + name='interpolate_bilinear', + indexing='ij'): + """Similar to Matlab's interp2 function. + + Finds values for query points on a grid using bilinear interpolation. + + Args: + grid: a 4-D float `Tensor` of shape `[batch, height, width, channels]`. + query_points: a 3-D float `Tensor` of N points with shape `[batch, N, 2]`. + name: a name for the operation (optional). + indexing: whether the query points are specified as row and column (ij), + or Cartesian coordinates (xy). + + Returns: + values: a 3-D `Tensor` with shape `[batch, N, channels]` + + Raises: + ValueError: if the indexing mode is invalid, or if the shape of the inputs + invalid. + """ + if indexing != 'ij' and indexing != 'xy': + raise ValueError('Indexing mode must be \'ij\' or \'xy\'') + + with ops.name_scope(name): + shape = grid.get_shape().as_list() + if len(shape) != 4: + msg = 'Grid must be 4 dimensional. Received size: ' + raise ValueError(msg + str(grid.get_shape())) + + batch_size, height, width, channels = shape + query_type = query_points.dtype + grid_type = grid.dtype + + if (len(query_points.get_shape()) != 3 or + query_points.get_shape()[2].value != 2): + msg = ('Query points must be 3 dimensional and size 2 in dim 2. Received ' + 'size: ') + raise ValueError(msg + str(query_points.get_shape())) + + _, num_queries, _ = query_points.get_shape().as_list() + + if height < 2 or width < 2: + msg = 'Grid must be at least batch_size x 2 x 2 in size. Received size: ' + raise ValueError(msg + str(grid.get_shape())) + + alphas = [] + floors = [] + ceils = [] + + index_order = [0, 1] if indexing == 'ij' else [1, 0] + unstacked_query_points = array_ops.unstack(query_points, axis=2) + + for dim in index_order: + with ops.name_scope('dim-' + str(dim)): + queries = unstacked_query_points[dim] + + size_in_indexing_dimension = shape[dim + 1] + + # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1 + # is still a valid index into the grid. + max_floor = math_ops.cast(size_in_indexing_dimension - 2, query_type) + min_floor = constant_op.constant(0.0, dtype=query_type) + floor = math_ops.minimum( + math_ops.maximum(min_floor, math_ops.floor(queries)), max_floor) + int_floor = math_ops.cast(floor, dtypes.int32) + floors.append(int_floor) + ceil = int_floor + 1 + ceils.append(ceil) + + # alpha has the same type as the grid, as we will directly use alpha + # when taking linear combinations of pixel values from the image. + alpha = math_ops.cast(queries - floor, grid_type) + min_alpha = constant_op.constant(0.0, dtype=grid_type) + max_alpha = constant_op.constant(1.0, dtype=grid_type) + alpha = math_ops.minimum(math_ops.maximum(min_alpha, alpha), max_alpha) + + # Expand alpha to [b, n, 1] so we can use broadcasting + # (since the alpha values don't depend on the channel). + alpha = array_ops.expand_dims(alpha, 2) + alphas.append(alpha) + + if batch_size * height * width > np.iinfo(np.int32).max / 8: + error_msg = """The image size or batch size is sufficiently large + that the linearized addresses used by array_ops.gather + may exceed the int32 limit.""" + raise ValueError(error_msg) + + flattened_grid = array_ops.reshape(grid, + [batch_size * height * width, channels]) + batch_offsets = array_ops.reshape( + math_ops.range(batch_size) * height * width, [batch_size, 1]) + + # This wraps array_ops.gather. We reshape the image data such that the + # batch, y, and x coordinates are pulled into the first dimension. + # Then we gather. Finally, we reshape the output back. It's possible this + # code would be made simpler by using array_ops.gather_nd. + def gather(y_coords, x_coords, name): + with ops.name_scope('gather-' + name): + linear_coordinates = batch_offsets + y_coords * width + x_coords + gathered_values = array_ops.gather(flattened_grid, linear_coordinates) + return array_ops.reshape(gathered_values, + [batch_size, num_queries, channels]) + + # grab the pixel values in the 4 corners around each query point + top_left = gather(floors[0], floors[1], 'top_left') + top_right = gather(floors[0], ceils[1], 'top_right') + bottom_left = gather(ceils[0], floors[1], 'bottom_left') + bottom_right = gather(ceils[0], ceils[1], 'bottom_right') + + # now, do the actual interpolation + with ops.name_scope('interpolate'): + interp_top = alphas[1] * (top_right - top_left) + top_left + interp_bottom = alphas[1] * (bottom_right - bottom_left) + bottom_left + interp = alphas[0] * (interp_bottom - interp_top) + interp_top + + return interp + + +def dense_image_warp(image, flow, name='dense_image_warp'): + """Image warping using per-pixel flow vectors. + + Apply a non-linear warp to the image, where the warp is specified by a dense + flow field of offset vectors that define the correspondences of pixel values + in the output image back to locations in the source image. Specifically, the + pixel value at output[b, j, i, c] is + images[b, j - flow[b, j, i, 0], i - flow[b, j, i, 1], c]. + + The locations specified by this formula do not necessarily map to an int + index. Therefore, the pixel value is obtained by bilinear + interpolation of the 4 nearest pixels around + (b, j - flow[b, j, i, 0], i - flow[b, j, i, 1]). For locations outside + of the image, we use the nearest pixel values at the image boundary. + + + Args: + image: 4-D float `Tensor` with shape `[batch, height, width, channels]`. + flow: A 4-D float `Tensor` with shape `[batch, height, width, 2]`. + name: A name for the operation (optional). + + Note that image and flow can be of type tf.half, tf.float32, or tf.float64, + and do not necessarily have to be the same type. + + Returns: + A 4-D float `Tensor` with shape`[batch, height, width, channels]` + and same type as input image. + + Raises: + ValueError: if height < 2 or width < 2 or the inputs have the wrong number + of dimensions. + """ + with ops.name_scope(name): + batch_size, height, width, channels = image.get_shape().as_list() + # The flow is defined on the image grid. Turn the flow into a list of query + # points in the grid space. + grid_x, grid_y = array_ops.meshgrid( + math_ops.range(width), math_ops.range(height)) + stacked_grid = math_ops.cast( + array_ops.stack([grid_y, grid_x], axis=2), flow.dtype) + batched_grid = array_ops.expand_dims(stacked_grid, axis=0) + query_points_on_grid = batched_grid - flow + query_points_flattened = array_ops.reshape(query_points_on_grid, + [batch_size, height * width, 2]) + # Compute values at the query points, then reshape the result back to the + # image grid. + interpolated = _interpolate_bilinear(image, query_points_flattened) + interpolated = array_ops.reshape(interpolated, + [batch_size, height, width, channels]) + return interpolated diff --git a/tensorflow/contrib/image/python/ops/interpolate_spline.py b/tensorflow/contrib/image/python/ops/interpolate_spline.py new file mode 100644 index 0000000000..ad17921991 --- /dev/null +++ b/tensorflow/contrib/image/python/ops/interpolate_spline.py @@ -0,0 +1,285 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Polyharmonic spline interpolation.""" + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import math_ops + +EPSILON = 0.0000000001 + + +def _cross_squared_distance_matrix(x, y): + """Pairwise squared distance between two (batch) matrices' rows (2nd dim). + + Computes the pairwise distances between rows of x and rows of y + Args: + x: [batch_size, n, d] float `Tensor` + y: [batch_size, m, d] float `Tensor` + + Returns: + squared_dists: [batch_size, n, m] float `Tensor`, where + squared_dists[b,i,j] = ||x[b,i,:] - y[b,j,:]||^2 + """ + x_norm_squared = math_ops.reduce_sum(math_ops.square(x), 2) + y_norm_squared = math_ops.reduce_sum(math_ops.square(y), 2) + + # Expand so that we can broadcast. + x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) + y_norm_squared_tile = array_ops.expand_dims(y_norm_squared, 1) + + x_y_transpose = math_ops.matmul(x, y, adjoint_b=True) + + # squared_dists[b,i,j] = ||x_bi - y_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj + squared_dists = x_norm_squared_tile - 2 * x_y_transpose + y_norm_squared_tile + + return squared_dists + + +def _pairwise_squared_distance_matrix(x): + """Pairwise squared distance among a (batch) matrix's rows (2nd dim). + + This saves a bit of computation vs. using _cross_squared_distance_matrix(x,x) + + Args: + x: `[batch_size, n, d]` float `Tensor` + + Returns: + squared_dists: `[batch_size, n, n]` float `Tensor`, where + squared_dists[b,i,j] = ||x[b,i,:] - x[b,j,:]||^2 + """ + + x_x_transpose = math_ops.matmul(x, x, adjoint_b=True) + x_norm_squared = array_ops.matrix_diag_part(x_x_transpose) + x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) + + # squared_dists[b,i,j] = ||x_bi - x_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj + squared_dists = x_norm_squared_tile - 2 * x_x_transpose + array_ops.transpose( + x_norm_squared_tile, [0, 2, 1]) + + return squared_dists + + +def _solve_interpolation(train_points, train_values, order, + regularization_weight): + """Solve for interpolation coefficients. + + Computes the coefficients of the polyharmonic interpolant for the 'training' + data defined by (train_points, train_values) using the kernel phi. + + Args: + train_points: `[b, n, d]` interpolation centers + train_values: `[b, n, k]` function values + order: order of the interpolation + regularization_weight: weight to place on smoothness regularization term + + Returns: + w: `[b, n, k]` weights on each interpolation center + v: `[b, d, k]` weights on each input dimension + """ + + b, n, d = train_points.get_shape().as_list() + _, _, k = train_values.get_shape().as_list() + + # First, rename variables so that the notation (c, f, w, v, A, B, etc.) + # follows https://en.wikipedia.org/wiki/Polyharmonic_spline. + # To account for python style guidelines we use + # matrix_a for A and matrix_b for B. + + c = train_points + f = train_values + + # Next, construct the linear system. + with ops.name_scope('construct_linear_system'): + + matrix_a = _phi(_pairwise_squared_distance_matrix(c), order) # [b, n, n] + if regularization_weight > 0: + batch_identity_matrix = np.expand_dims(np.eye(n), 0) + batch_identity_matrix = constant_op.constant( + batch_identity_matrix, dtype=train_points.dtype) + + matrix_a += regularization_weight * batch_identity_matrix + + # Append ones to the feature values for the bias term in the linear model. + ones = array_ops.ones([b, n, 1], train_points.dtype) + matrix_b = array_ops.concat([c, ones], 2) # [b, n, d + 1] + + # [b, n + d + 1, n] + left_block = array_ops.concat( + [matrix_a, array_ops.transpose(matrix_b, [0, 2, 1])], 1) + + num_b_cols = matrix_b.get_shape()[2] # d + 1 + lhs_zeros = array_ops.zeros([b, num_b_cols, num_b_cols], train_points.dtype) + right_block = array_ops.concat([matrix_b, lhs_zeros], + 1) # [b, n + d + 1, d + 1] + lhs = array_ops.concat([left_block, right_block], + 2) # [b, n + d + 1, n + d + 1] + + rhs_zeros = array_ops.zeros([b, d + 1, k], train_points.dtype) + rhs = array_ops.concat([f, rhs_zeros], 1) # [b, n + d + 1, k] + + # Then, solve the linear system and unpack the results. + with ops.name_scope('solve_linear_system'): + w_v = linalg_ops.matrix_solve(lhs, rhs) + w = w_v[:, :n, :] + v = w_v[:, n:, :] + + return w, v + + +def _apply_interpolation(query_points, train_points, w, v, order): + """Apply polyharmonic interpolation model to data. + + Given coefficients w and v for the interpolation model, we evaluate + interpolated function values at query_points. + + Args: + query_points: `[b, m, d]` x values to evaluate the interpolation at + train_points: `[b, n, d]` x values that act as the interpolation centers + ( the c variables in the wikipedia article) + w: `[b, n, k]` weights on each interpolation center + v: `[b, d, k]` weights on each input dimension + order: order of the interpolation + + Returns: + Polyharmonic interpolation evaluated at points defined in query_points. + """ + + batch_size = train_points.get_shape()[0].value + num_query_points = query_points.get_shape()[1].value + + # First, compute the contribution from the rbf term. + pairwise_dists = _cross_squared_distance_matrix(query_points, train_points) + phi_pairwise_dists = _phi(pairwise_dists, order) + + rbf_term = math_ops.matmul(phi_pairwise_dists, w) + + # Then, compute the contribution from the linear term. + # Pad query_points with ones, for the bias term in the linear model. + query_points_pad = array_ops.concat([ + query_points, + array_ops.ones([batch_size, num_query_points, 1], train_points.dtype) + ], 2) + linear_term = math_ops.matmul(query_points_pad, v) + + return rbf_term + linear_term + + +def _phi(r, order): + """Coordinate-wise nonlinearity used to define the order of the interpolation. + + See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition. + + Args: + r: input op + order: interpolation order + + Returns: + phi_k evaluated coordinate-wise on r, for k = r + """ + + # using EPSILON prevents log(0), sqrt0), etc. + # sqrt(0) is well-defined, but its gradient is not + with ops.name_scope('phi'): + if order == 1: + r = math_ops.maximum(r, EPSILON) + r = math_ops.sqrt(r) + return r + elif order == 2: + return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON)) + elif order == 4: + return 0.5 * math_ops.square(r) * math_ops.log( + math_ops.maximum(r, EPSILON)) + elif order % 2 == 0: + r = math_ops.maximum(r, EPSILON) + return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r) + else: + r = math_ops.maximum(r, EPSILON) + return math_ops.pow(r, 0.5 * order) + + +def interpolate_spline(train_points, + train_values, + query_points, + order, + regularization_weight=0.0, + name='interpolate_spline'): + r"""Interpolate signal using polyharmonic interpolation. + + The interpolant has the form + $$f(x) = \sum_{i = 1}^n w_i \phi(||x - c_i||) + v^T x + b.$$ + + This is a sum of two terms: (1) a weighted sum of radial basis function (RBF) + terms, with the centers \\(c_1, ... c_n\\), and (2) a linear term with a bias. + The \\(c_i\\) vectors are 'training' points. In the code, b is absorbed into v + by appending 1 as a final dimension to x. The coefficients w and v are + estimated such that the interpolant exactly fits the value of the function at + the \\(c_i\\) points, the vector w is orthogonal to each \\(c_i\\), and the + vector w sums to 0. With these constraints, the coefficients can be obtained + by solving a linear system. + + \\(\phi\\) is an RBF, parametrized by an interpolation + order. Using order=2 produces the well-known thin-plate spline. + + We also provide the option to perform regularized interpolation. Here, the + interpolant is selected to trade off between the squared loss on the training + data and a certain measure of its curvature + ([details](https://en.wikipedia.org/wiki/Polyharmonic_spline)). + Using a regularization weight greater than zero has the effect that the + interpolant will no longer exactly fit the training data. However, it may be + less vulnerable to overfitting, particularly for high-order interpolation. + + Note the interpolation procedure is differentiable with respect to all inputs + besides the order parameter. + + Args: + train_points: `[batch_size, n, d]` float `Tensor` of n d-dimensional + locations. These do not need to be regularly-spaced. + train_values: `[batch_size, n, k]` float `Tensor` of n c-dimensional values + evaluated at train_points. + query_points: `[batch_size, m, d]` `Tensor` of m d-dimensional locations + where we will output the interpolant's values. + order: order of the interpolation. Common values are 1 for + \\(\phi(r) = r\\), 2 for \\(\phi(r) = r^2 * log(r)\\) (thin-plate spline), + or 3 for \\(\phi(r) = r^3\\). + regularization_weight: weight placed on the regularization term. + This will depend substantially on the problem, and it should always be + tuned. For many problems, it is reasonable to use no regularization. + If using a non-zero value, we recommend a small value like 0.001. + name: name prefix for ops created by this function + + Returns: + `[b, m, k]` float `Tensor` of query values. We use train_points and + train_values to perform polyharmonic interpolation. The query values are + the values of the interpolant evaluated at the locations specified in + query_points. + """ + with ops.name_scope(name): + + # First, fit the spline to the observed data. + with ops.name_scope('solve'): + w, v = _solve_interpolation(train_points, train_values, order, + regularization_weight) + + # Then, evaluate the spline at the query locations. + with ops.name_scope('predict'): + query_values = _apply_interpolation(query_points, train_points, w, v, + order) + + return query_values diff --git a/tensorflow/contrib/image/python/ops/sparse_image_warp.py b/tensorflow/contrib/image/python/ops/sparse_image_warp.py new file mode 100644 index 0000000000..9f50503d8f --- /dev/null +++ b/tensorflow/contrib/image/python/ops/sparse_image_warp.py @@ -0,0 +1,192 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Image warping using sparse flow defined at control points.""" + +import numpy as np + +from tensorflow.contrib.image.python.ops import dense_image_warp +from tensorflow.contrib.image.python.ops import interpolate_spline + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops + + +def _get_grid_locations(image_height, image_width): + """Wrapper for np.meshgrid.""" + + y_range = np.linspace(0, image_height - 1, image_height) + x_range = np.linspace(0, image_width - 1, image_width) + y_grid, x_grid = np.meshgrid(y_range, x_range, indexing='ij') + return np.stack((y_grid, x_grid), -1) + + +def _expand_to_minibatch(np_array, batch_size): + """Tile arbitrarily-sized np_array to include new batch dimension.""" + tiles = [batch_size] + [1] * np_array.ndim + return np.tile(np.expand_dims(np_array, 0), tiles) + + +def _get_boundary_locations(image_height, image_width, num_points_per_edge): + """Compute evenly-spaced indices along edge of image.""" + y_range = np.linspace(0, image_height - 1, num_points_per_edge + 2) + x_range = np.linspace(0, image_width - 1, num_points_per_edge + 2) + ys, xs = np.meshgrid(y_range, x_range, indexing='ij') + is_boundary = np.logical_or( + np.logical_or(xs == 0, xs == image_width - 1), + np.logical_or(ys == 0, ys == image_height - 1)) + return np.stack([ys[is_boundary], xs[is_boundary]], axis=-1) + + +def _add_zero_flow_controls_at_boundary(control_point_locations, + control_point_flows, image_height, + image_width, boundary_points_per_edge): + """Add control points for zero-flow boundary conditions. + + Augment the set of control points with extra points on the + boundary of the image that have zero flow. + + Args: + control_point_locations: input control points + control_point_flows: their flows + image_height: image height + image_width: image width + boundary_points_per_edge: number of points to add in the middle of each + edge (not including the corners). + The total number of points added is + 4 + 4*(boundary_points_per_edge). + + Returns: + merged_control_point_locations: augmented set of control point locations + merged_control_point_flows: augmented set of control point flows + """ + + batch_size = control_point_locations.get_shape()[0].value + + boundary_point_locations = _get_boundary_locations(image_height, image_width, + boundary_points_per_edge) + + boundary_point_flows = np.zeros([boundary_point_locations.shape[0], 2]) + + type_to_use = control_point_locations.dtype + boundary_point_locations = constant_op.constant( + _expand_to_minibatch(boundary_point_locations, batch_size), + dtype=type_to_use) + + boundary_point_flows = constant_op.constant( + _expand_to_minibatch(boundary_point_flows, batch_size), dtype=type_to_use) + + merged_control_point_locations = array_ops.concat( + [control_point_locations, boundary_point_locations], 1) + + merged_control_point_flows = array_ops.concat( + [control_point_flows, boundary_point_flows], 1) + + return merged_control_point_locations, merged_control_point_flows + + +def sparse_image_warp(image, + source_control_point_locations, + dest_control_point_locations, + interpolation_order=2, + regularization_weight=0.0, + num_boundary_points=0, + name='sparse_image_warp'): + """Image warping using correspondences between sparse control points. + + Apply a non-linear warp to the image, where the warp is specified by + the source and destination locations of a (potentially small) number of + control points. First, we use a polyharmonic spline + (@{tf.contrib.image.interpolate_spline}) to interpolate the displacements + between the corresponding control points to a dense flow field. + Then, we warp the image using this dense flow field + (@{tf.contrib.image.dense_image_warp}). + + Let t index our control points. For regularization_weight=0, we have: + warped_image[b, dest_control_point_locations[b, t, 0], + dest_control_point_locations[b, t, 1], :] = + image[b, source_control_point_locations[b, t, 0], + source_control_point_locations[b, t, 1], :]. + + For regularization_weight > 0, this condition is met approximately, since + regularized interpolation trades off smoothness of the interpolant vs. + reconstruction of the interpolant at the control points. + See @{tf.contrib.image.interpolate_spline} for further documentation of the + interpolation_order and regularization_weight arguments. + + + Args: + image: `[batch, height, width, channels]` float `Tensor` + source_control_point_locations: `[batch, num_control_points, 2]` float + `Tensor` + dest_control_point_locations: `[batch, num_control_points, 2]` float + `Tensor` + interpolation_order: polynomial order used by the spline interpolation + regularization_weight: weight on smoothness regularizer in interpolation + num_boundary_points: How many zero-flow boundary points to include at + each image edge.Usage: + num_boundary_points=0: don't add zero-flow points + num_boundary_points=1: 4 corners of the image + num_boundary_points=2: 4 corners and one in the middle of each edge + (8 points total) + num_boundary_points=n: 4 corners and n-1 along each edge + name: A name for the operation (optional). + + Note that image and offsets can be of type tf.half, tf.float32, or + tf.float64, and do not necessarily have to be the same type. + + Returns: + warped_image: `[batch, height, width, channels]` float `Tensor` with same + type as input image. + flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense + flow field produced by the interpolation. + """ + + control_point_flows = ( + dest_control_point_locations - source_control_point_locations) + + clamp_boundaries = num_boundary_points > 0 + boundary_points_per_edge = num_boundary_points - 1 + + with ops.name_scope(name): + + batch_size, image_height, image_width, _ = image.get_shape().as_list() + + # This generates the dense locations where the interpolant + # will be evaluated. + grid_locations = _get_grid_locations(image_height, image_width) + + flattened_grid_locations = np.reshape(grid_locations, + [image_height * image_width, 2]) + + flattened_grid_locations = constant_op.constant( + _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) + + if clamp_boundaries: + (dest_control_point_locations, + control_point_flows) = _add_zero_flow_controls_at_boundary( + dest_control_point_locations, control_point_flows, image_height, + image_width, boundary_points_per_edge) + + flattened_flows = interpolate_spline.interpolate_spline( + dest_control_point_locations, control_point_flows, + flattened_grid_locations, interpolation_order, regularization_weight) + + dense_flows = array_ops.reshape(flattened_flows, + [batch_size, image_height, image_width, 2]) + + warped_image = dense_image_warp.dense_image_warp(image, dense_flows) + + return warped_image, dense_flows -- GitLab From a12d5cc7d5aa3d159312424a2b84d33a7648775d Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 14 Mar 2018 15:02:20 -0700 Subject: [PATCH 033/960] Ensure zeros constant created is dominated by Switch node of cond context. All values entering control flow context either has to be via a switch or dominated by one. PiperOrigin-RevId: 189092017 --- .../python/kernel_tests/control_flow_ops_py_test.py | 13 ++++++++++--- tensorflow/python/ops/control_flow_ops.py | 5 ++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index d47b030fa1..5257826ebd 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -633,7 +633,8 @@ class ControlFlowTest(test.TestCase): sess.run(r) def testCondGrad_1(self): - with self.test_session(): + graph = ops.Graph() + with graph.as_default(): x = constant_op.constant(10.0, name="x") pred = math_ops.less(1, 2) fn1 = lambda: array_ops.identity(x) @@ -641,8 +642,14 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.cond(pred, fn1, fn2) grad = gradients_impl.gradients(r, [x])[0] - result = grad.eval() - self.assertAllEqual(1.0, result) + with self.test_session(): + self.assertAllEqual(1.0, grad.eval()) + # The gradients computation creates a tensor with zeros by broadcasting a + # zeros constant to the required shape. Verify that the zero constant + # feeding into the fill is dominated by a Switch. + zero = graph.get_operation_by_name("gradients/zeros/Const") + self.assertEqual(len(zero.control_inputs), 1) + self.assertEqual(zero.control_inputs[0].type, "Switch") def testCondGrad_2(self): with self.test_session(): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 24c30802b5..20da66c303 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1467,7 +1467,10 @@ def ZerosLikeOutsideLoop(op, index): branch = op_ctxt.branch switch_val = switch(op.inputs[0], pred)[1 - branch] zeros_shape = array_ops.shape_internal(switch_val, optimize=False) - return array_ops.zeros(zeros_shape, dtype=val.dtype) + # Ensure ops created within array_ops.zeros are dominated by switch in + # cond context. + with ops.control_dependencies([switch_val]): + return array_ops.zeros(zeros_shape, dtype=val.dtype) else: return array_ops.zeros_like(val, optimize=False) -- GitLab From c6dbbdc339ab351700793885417d12fd7172d14c Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 14 Mar 2018 15:25:53 -0700 Subject: [PATCH 034/960] Remove underscore prefix from ref_identity op. PiperOrigin-RevId: 189096108 --- tensorflow/python/framework/python_op_gen.cc | 2 +- tensorflow/python/grappler/item_test.py | 2 +- .../python/kernel_tests/control_flow_ops_py_test.py | 13 +++++-------- .../python/kernel_tests/identity_op_py_test.py | 2 +- tensorflow/python/ops/control_flow_ops.py | 2 +- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 6ee8e554de..846e0c356c 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -100,7 +100,7 @@ bool IsOpWithUnderscorePrefix(const string& s) { "fused_batch_norm", "histogram_fixed_width", "stack", "batch_norm_with_global_normalization", // TODO(annarev): replace these ops in the next change. - "broadcast_gradient_args", "ref_identity"}); + "broadcast_gradient_args"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py index 7c3efd6249..c40de9da0a 100644 --- a/tensorflow/python/grappler/item_test.py +++ b/tensorflow/python/grappler/item_test.py @@ -111,7 +111,7 @@ class ItemTest(test.TestCase): with ops.Graph().as_default() as g: c = constant_op.constant([10]) v = variables.Variable([3], dtype=dtypes.int32) - i = gen_array_ops._ref_identity(v) + i = gen_array_ops.ref_identity(v) a = state_ops.assign(i, c) train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(a) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 5257826ebd..75f8644f69 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -591,10 +591,10 @@ class ControlFlowTest(test.TestCase): # Both v_f and v_t are uninitialized references. However, an actual use # of the reference in the 'true' branch in the 'tf.identity' op will # not 'fire' when v is uninitialized, so this is a valid construction. - # This test tests that _ref_identity allows uninitialized ref as input + # This test tests that ref_identity allows uninitialized ref as input # so that this construction is allowed. - v_f_op = gen_array_ops._ref_identity(v_f) - v_t_op = gen_array_ops._ref_identity(v_t) + v_f_op = gen_array_ops.ref_identity(v_f) + v_t_op = gen_array_ops.ref_identity(v_t) with ops.control_dependencies([v_f_op]): assign_v = state_ops.assign(v, [1.0]) with ops.control_dependencies([v_t_op]): @@ -751,7 +751,7 @@ class ControlFlowTest(test.TestCase): def b(i, x): self.assertEqual(x.dtype, dtypes.int32_ref) - return (i + 1, gen_array_ops._ref_identity(x)) + return (i + 1, gen_array_ops.ref_identity(x)) r = control_flow_ops.while_loop(c, b, [i, x], parallel_iterations=5) @@ -2212,12 +2212,9 @@ class ControlFlowTest(test.TestCase): self.assertEqual(x.dtype, dtypes.int32_ref) - # pylint: disable=protected-access def body(i, x): self.assertEqual(x.dtype, dtypes.int32_ref) - return [i + 1, gen_array_ops._ref_identity(x)] - - # pylint: enable=protected-access + return [i + 1, gen_array_ops.ref_identity(x)] r = control_flow_ops.while_loop(c, body, [i, x], parallel_iterations=5) diff --git a/tensorflow/python/kernel_tests/identity_op_py_test.py b/tensorflow/python/kernel_tests/identity_op_py_test.py index 2cfe420bd4..49fb76d5b4 100644 --- a/tensorflow/python/kernel_tests/identity_op_py_test.py +++ b/tensorflow/python/kernel_tests/identity_op_py_test.py @@ -65,7 +65,7 @@ class IdentityOpTest(test.TestCase): constant_op.constant( [[1, 2, 3], [6, 5, 4]], dtype=dtypes.int32)) self.assertEquals(shape, tensor.get_shape()) - self.assertEquals(shape, gen_array_ops._ref_identity(tensor).get_shape()) + self.assertEquals(shape, gen_array_ops.ref_identity(tensor).get_shape()) if __name__ == "__main__": diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 20da66c303..1278768d8b 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -196,7 +196,7 @@ def _Identity(data, name=None): data = ops.internal_convert_to_tensor_or_indexed_slices(data, as_ref=True) if isinstance(data, ops.Tensor): if data.dtype._is_ref_dtype: # pylint: disable=protected-access - return gen_array_ops._ref_identity(data, name=name) + return gen_array_ops.ref_identity(data, name=name) else: return array_ops.identity(data, name=name) else: -- GitLab From c77422ff3fe8b8cab831488f449fb1a3c64127b1 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 14 Mar 2018 15:30:20 -0700 Subject: [PATCH 035/960] Re-phrase some tests in replicate_model_fn using parameterized tests. PiperOrigin-RevId: 189096779 --- tensorflow/contrib/estimator/BUILD | 1 + .../estimator/replicate_model_fn_test.py | 26 +++++++------------ 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 773c6ab6c7..26d6bc5ae6 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -354,6 +354,7 @@ cuda_py_test( size = "medium", srcs = ["python/estimator/replicate_model_fn_test.py"], additional_deps = [ + "//third_party/py/absl/testing:parameterized", "//tensorflow/python/estimator", "//tensorflow/python/estimator:dnn", "//tensorflow/python/estimator:export_export", diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index d46a18aacf..144b45982c 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import re import shutil import tempfile +from absl.testing import parameterized import numpy as np import six @@ -57,26 +58,19 @@ from tensorflow.python.training import gradient_descent from tensorflow.python.training import training -# TODO(isaprykin): Parametrize all the tests on -# replicate_model_fn._VariableDistributionMode when it's supported. -class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): +class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase, + parameterized.TestCase): def setUp(self): self._model_dir = tempfile.mkdtemp() - def test_complete_flow_with_public_version(self): - return self._complete_flow_with_mode(mode=None) - - def test_complete_flow_with_mode_local_ps_server(self): - return self._complete_flow_with_mode( - replicate_model_fn._VariableDistributionMode. - SHARED_LOCAL_PARAMETER_SERVER) - - def test_complete_flow_with_mode_round_robin(self): - return self._complete_flow_with_mode( - replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN) - - def _complete_flow_with_mode(self, mode): + @parameterized.named_parameters( + ('PublicInterface', None), + ('ParameterServerMode', replicate_model_fn._VariableDistributionMode. + SHARED_LOCAL_PARAMETER_SERVER), + ('RoundRobinMode', + replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN)) + def test_complete_flow_with_mode(self, mode): n_classes = 3 input_dimension = 2 batch_size = 12 -- GitLab From 124a1835637fb71d84087430f79fe166b394f791 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Wed, 14 Mar 2018 15:36:32 -0700 Subject: [PATCH 036/960] Automated g4 rollback of changelist 189071037 PiperOrigin-RevId: 189097692 --- tensorflow/core/lib/wav/wav_io.cc | 74 +++--- tensorflow/core/lib/wav/wav_io.h | 33 --- tensorflow/core/lib/wav/wav_io_test.cc | 319 ------------------------- 3 files changed, 32 insertions(+), 394 deletions(-) diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc index 2165415ba5..77d3c88998 100644 --- a/tensorflow/core/lib/wav/wav_io.cc +++ b/tensorflow/core/lib/wav/wav_io.cc @@ -81,42 +81,13 @@ inline float Int16SampleToFloat(int16 data) { return data * kMultiplier; } -} // namespace - -// Handles moving the data index forward, validating the arguments, and avoiding -// overflow or underflow. -Status IncrementOffset(int old_offset, size_t increment, size_t max_size, - int* new_offset) { - if (old_offset < 0) { - return errors::InvalidArgument("Negative offsets are not allowed: ", - old_offset); - } - if (old_offset > max_size) { - return errors::InvalidArgument("Initial offset is outside data range: ", - old_offset); - } - if (increment < 0) { - return errors::InvalidArgument("Negative increments are not allowed: ", - old_offset); - } - *new_offset = old_offset + increment; - if (*new_offset > max_size) { - return errors::InvalidArgument("Data too short when trying to read string"); - } - // See above for the check that the input offset is positive. If it's negative - // here then it means that there's been an overflow in the arithmetic. - if (*new_offset < 0) { - return errors::InvalidArgument("Offset too large, overflowed: ", - *new_offset); - } - return Status::OK(); -} - Status ExpectText(const string& data, const string& expected_text, int* offset) { - int new_offset; - TF_RETURN_IF_ERROR( - IncrementOffset(*offset, expected_text.size(), data.size(), &new_offset)); + const int new_offset = *offset + expected_text.size(); + if (new_offset > data.size()) { + return errors::InvalidArgument("Data too short when trying to read ", + expected_text); + } const string found_text(data.begin() + *offset, data.begin() + new_offset); if (found_text != expected_text) { return errors::InvalidArgument("Header mismatch: Expected ", expected_text, @@ -126,16 +97,40 @@ Status ExpectText(const string& data, const string& expected_text, return Status::OK(); } +template +Status ReadValue(const string& data, T* value, int* offset) { + const int new_offset = *offset + sizeof(T); + if (new_offset > data.size()) { + return errors::InvalidArgument("Data too short when trying to read value"); + } + if (port::kLittleEndian) { + memcpy(value, data.data() + *offset, sizeof(T)); + } else { + *value = 0; + const uint8* data_buf = + reinterpret_cast(data.data() + *offset); + int shift = 0; + for (int i = 0; i < sizeof(T); ++i, shift += 8) { + *value = *value | (data_buf[i] << shift); + } + } + *offset = new_offset; + return Status::OK(); +} + Status ReadString(const string& data, int expected_length, string* value, int* offset) { - int new_offset; - TF_RETURN_IF_ERROR( - IncrementOffset(*offset, expected_length, data.size(), &new_offset)); + const int new_offset = *offset + expected_length; + if (new_offset > data.size()) { + return errors::InvalidArgument("Data too short when trying to read string"); + } *value = string(data.begin() + *offset, data.begin() + new_offset); *offset = new_offset; return Status::OK(); } +} // namespace + Status EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate, size_t num_channels, size_t num_frames, string* wav_string) { @@ -277,11 +272,6 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string, TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset)); uint32 chunk_size; TF_RETURN_IF_ERROR(ReadValue(wav_string, &chunk_size, &offset)); - if (chunk_size > std::numeric_limits::max()) { - return errors::InvalidArgument( - "WAV data chunk '", chunk_id, "' is too large: ", chunk_size, - " bytes, but the limit is ", std::numeric_limits::max()); - } if (chunk_id == kDataChunkId) { if (was_data_found) { return errors::InvalidArgument("More than one data chunk found in WAV"); diff --git a/tensorflow/core/lib/wav/wav_io.h b/tensorflow/core/lib/wav/wav_io.h index f004524177..adca0ee303 100644 --- a/tensorflow/core/lib/wav/wav_io.h +++ b/tensorflow/core/lib/wav/wav_io.h @@ -21,9 +21,6 @@ limitations under the License. #include #include -#include "tensorflow/core/lib/core/casts.h" -#include "tensorflow/core/lib/core/coding.h" -#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" @@ -58,36 +55,6 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string, uint32* sample_count, uint16* channel_count, uint32* sample_rate); -// Everything below here is only exposed publicly for testing purposes. - -// Handles moving the data index forward, validating the arguments, and avoiding -// overflow or underflow. -Status IncrementOffset(int old_offset, size_t increment, size_t max_size, - int* new_offset); - -// This function is only exposed in the header for testing purposes, as a -// template that needs to be instantiated. Reads a typed numeric value from a -// stream of data. -template -Status ReadValue(const string& data, T* value, int* offset) { - int new_offset; - TF_RETURN_IF_ERROR( - IncrementOffset(*offset, sizeof(T), data.size(), &new_offset)); - if (port::kLittleEndian) { - memcpy(value, data.data() + *offset, sizeof(T)); - } else { - *value = 0; - const uint8* data_buf = - reinterpret_cast(data.data() + *offset); - int shift = 0; - for (int i = 0; i < sizeof(T); ++i, shift += 8) { - *value = *value | (data_buf[i] << shift); - } - } - *offset = new_offset; - return Status::OK(); -} - } // namespace wav } // namespace tensorflow diff --git a/tensorflow/core/lib/wav/wav_io_test.cc b/tensorflow/core/lib/wav/wav_io_test.cc index d8a83fc464..40ddd94abe 100644 --- a/tensorflow/core/lib/wav/wav_io_test.cc +++ b/tensorflow/core/lib/wav/wav_io_test.cc @@ -25,12 +25,6 @@ limitations under the License. namespace tensorflow { namespace wav { -// These are defined in wav_io.cc, and the signatures are here so we don't have -// to expose them in the public header. -Status ExpectText(const string& data, const string& expected_text, int* offset); -Status ReadString(const string& data, int expected_length, string* value, - int* offset); - TEST(WavIO, BadArguments) { float audio[] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f}; string result; @@ -161,318 +155,5 @@ TEST(WavIO, BasicStereo) { EXPECT_EQ(expected, result); } -// Test how chunk sizes larger than 2GB are handled, since they're stored as -// unsigned int32s, so there are lots of ways for conversions to confuse the -// decoding logic. The expected behavior is to fail with an error, since such -// large WAV files are not common, and are unsupported by many readers. -// See b/72655902. -TEST(WavIO, ChunkSizeOverflow) { - std::vector wav_data = { - 'R', 'I', 'F', 'F', // ChunkID - 60, 0, 0, 0, // ChunkSize: 36 + SubChunk2Size - 'W', 'A', 'V', 'E', // Format - 'f', 'm', 't', ' ', // Subchunk1ID - 16, 0, 0, 0, // Subchunk1Size - 1, 0, // AudioFormat: 1=PCM - 1, 0, // NumChannels - 0x44, 0xac, 0, 0, // SampleRate: 44100 - 0x88, 0x58, 0x1, 0, // BytesPerSecond: SampleRate * NumChannels * - // BitsPerSample/8 - 2, 0, // BytesPerSample: NumChannels * BitsPerSample/8 - 16, 0, // BitsPerSample - 'd', 'a', 't', 'a', // Subchunk2ID - 8, 0, 0, 0, // Subchunk2Size: NumSamples * NumChannels * - // BitsPerSample/8 - 0, 0, // Sample 1: 0 - 0xff, 0x7f, // Sample 2: 32767 (saturated) - 0, 0, // Sample 3: 0 - 0x00, 0x80, // Sample 4: -32768 (saturated) - 'f', 'o', 'o', 'o', // Subchunk2ID - 0xff, 0xff, 0xff, 0xf8, // Chunk size that could cause an infinite loop. - 0, 0, // Sample 1: 0 - 0xff, 0x7f, // Sample 2: 32767 (saturated) - 0, 0, // Sample 3: 0 - 0x00, 0x80, // Sample 4: -32768 (saturated) - }; - string wav_data_string(wav_data.begin(), wav_data.end()); - std::vector decoded_audio; - uint32 decoded_sample_count; - uint16 decoded_channel_count; - uint32 decoded_sample_rate; - Status decode_status = DecodeLin16WaveAsFloatVector( - wav_data_string, &decoded_audio, &decoded_sample_count, - &decoded_channel_count, &decoded_sample_rate); - EXPECT_FALSE(decode_status.ok()); - EXPECT_TRUE(StringPiece(decode_status.error_message()).contains("too large")) - << decode_status.error_message(); -} - -TEST(WavIO, IncrementOffset) { - int new_offset = -1; - TF_EXPECT_OK(IncrementOffset(0, 10, 20, &new_offset)); - EXPECT_EQ(10, new_offset); - - new_offset = -1; - TF_EXPECT_OK(IncrementOffset(10, 4, 20, &new_offset)); - EXPECT_EQ(14, new_offset); - - new_offset = -1; - TF_EXPECT_OK(IncrementOffset(99, 1, 100, &new_offset)); - EXPECT_EQ(100, new_offset); - - new_offset = -1; - EXPECT_FALSE(IncrementOffset(-1, 1, 100, &new_offset).ok()); - - new_offset = -1; - EXPECT_FALSE(IncrementOffset(0, -1, 100, &new_offset).ok()); - - new_offset = -1; - EXPECT_FALSE(IncrementOffset(std::numeric_limits::max(), 1, - std::numeric_limits::max(), &new_offset) - .ok()); - - new_offset = -1; - EXPECT_FALSE(IncrementOffset(101, 1, 100, &new_offset).ok()); -} - -TEST(WavIO, ExpectText) { - std::vector test_data = { - 'E', 'x', 'p', 'e', 'c', 't', 'e', 'd', - }; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - TF_EXPECT_OK(ExpectText(test_string, "Expected", &offset)); - EXPECT_EQ(8, offset); - - offset = 0; - Status expect_status = ExpectText(test_string, "Unexpected", &offset); - EXPECT_FALSE(expect_status.ok()); - - offset = 0; - TF_EXPECT_OK(ExpectText(test_string, "Exp", &offset)); - EXPECT_EQ(3, offset); - TF_EXPECT_OK(ExpectText(test_string, "ected", &offset)); - EXPECT_EQ(8, offset); - expect_status = ExpectText(test_string, "foo", &offset); - EXPECT_FALSE(expect_status.ok()); -} - -TEST(WavIO, ReadString) { - std::vector test_data = { - 'E', 'x', 'p', 'e', 'c', 't', 'e', 'd', - }; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - string read_value; - TF_EXPECT_OK(ReadString(test_string, 2, &read_value, &offset)); - EXPECT_EQ("Ex", read_value); - EXPECT_EQ(2, offset); - - TF_EXPECT_OK(ReadString(test_string, 6, &read_value, &offset)); - EXPECT_EQ("pected", read_value); - EXPECT_EQ(8, offset); - - Status read_status = ReadString(test_string, 3, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - -TEST(WavIO, ReadValueInt8) { - std::vector test_data = {0x00, 0x05, 0xff, 0x80}; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - int8 read_value; - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(0, read_value); - EXPECT_EQ(1, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(5, read_value); - EXPECT_EQ(2, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(-1, read_value); - EXPECT_EQ(3, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(-128, read_value); - EXPECT_EQ(4, offset); - - Status read_status = ReadValue(test_string, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - -TEST(WavIO, ReadValueUInt8) { - std::vector test_data = {0x00, 0x05, 0xff, 0x80}; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - uint8 read_value; - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(0, read_value); - EXPECT_EQ(1, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(5, read_value); - EXPECT_EQ(2, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(255, read_value); - EXPECT_EQ(3, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(128, read_value); - EXPECT_EQ(4, offset); - - Status read_status = ReadValue(test_string, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - -TEST(WavIO, ReadValueInt16) { - std::vector test_data = { - 0x00, 0x00, // 0 - 0xff, 0x00, // 255 - 0x00, 0x01, // 256 - 0xff, 0xff, // -1 - 0x00, 0x80, // -32768 - }; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - int16 read_value; - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(0, read_value); - EXPECT_EQ(2, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(255, read_value); - EXPECT_EQ(4, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(256, read_value); - EXPECT_EQ(6, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(-1, read_value); - EXPECT_EQ(8, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(-32768, read_value); - EXPECT_EQ(10, offset); - - Status read_status = ReadValue(test_string, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - -TEST(WavIO, ReadValueUInt16) { - std::vector test_data = { - 0x00, 0x00, // 0 - 0xff, 0x00, // 255 - 0x00, 0x01, // 256 - 0xff, 0xff, // 65535 - 0x00, 0x80, // 32768 - }; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - uint16 read_value; - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(0, read_value); - EXPECT_EQ(2, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(255, read_value); - EXPECT_EQ(4, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(256, read_value); - EXPECT_EQ(6, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(65535, read_value); - EXPECT_EQ(8, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(32768, read_value); - EXPECT_EQ(10, offset); - - Status read_status = ReadValue(test_string, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - -TEST(WavIO, ReadValueInt32) { - std::vector test_data = { - 0x00, 0x00, 0x00, 0x00, // 0 - 0xff, 0x00, 0x00, 0x00, // 255 - 0x00, 0xff, 0x00, 0x00, // 65280 - 0x00, 0x00, 0xff, 0x00, // 16,711,680 - 0xff, 0xff, 0xff, 0xff, // -1 - }; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - int32 read_value; - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(0, read_value); - EXPECT_EQ(4, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(255, read_value); - EXPECT_EQ(8, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(65280, read_value); - EXPECT_EQ(12, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(16711680, read_value); - EXPECT_EQ(16, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(-1, read_value); - EXPECT_EQ(20, offset); - - Status read_status = ReadValue(test_string, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - -TEST(WavIO, ReadValueUInt32) { - std::vector test_data = { - 0x00, 0x00, 0x00, 0x00, // 0 - 0xff, 0x00, 0x00, 0x00, // 255 - 0x00, 0xff, 0x00, 0x00, // 65280 - 0x00, 0x00, 0xff, 0x00, // 16,711,680 - 0xff, 0xff, 0xff, 0xff, // 4,294,967,295 - }; - string test_string(test_data.begin(), test_data.end()); - - int offset = 0; - uint32 read_value; - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(0, read_value); - EXPECT_EQ(4, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(255, read_value); - EXPECT_EQ(8, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(65280, read_value); - EXPECT_EQ(12, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(16711680, read_value); - EXPECT_EQ(16, offset); - - TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); - EXPECT_EQ(4294967295, read_value); - EXPECT_EQ(20, offset); - - Status read_status = ReadValue(test_string, &read_value, &offset); - EXPECT_FALSE(read_status.ok()); -} - } // namespace wav } // namespace tensorflow -- GitLab From 33792456dbe0600b5c23f8cbffea0e74a69386c1 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Wed, 14 Mar 2018 15:58:15 -0700 Subject: [PATCH 037/960] Automated g4 rollback of changelist 188525171 PiperOrigin-RevId: 189100846 --- tensorflow/contrib/lite/kernels/BUILD | 31 +++- .../contrib/lite/kernels/audio_spectrogram.cc | 165 ++++++++++++++++++ .../lite/kernels/audio_spectrogram_test.cc | 122 +++++++++++++ .../lite/kernels/internal/spectrogram.cc | 1 - tensorflow/contrib/lite/kernels/mfcc.cc | 154 ++++++++++++++++ tensorflow/contrib/lite/kernels/mfcc_test.cc | 104 +++++++++++ tensorflow/contrib/lite/kernels/register.cc | 14 ++ 7 files changed, 589 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/audio_spectrogram.cc create mode 100644 tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc create mode 100644 tensorflow/contrib/lite/kernels/mfcc.cc create mode 100644 tensorflow/contrib/lite/kernels/mfcc_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index b8ab6d96a0..9c63269324 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -135,6 +135,7 @@ cc_library( srcs = [ "activations.cc", "add.cc", + "audio_spectrogram.cc", "basic_rnn.cc", "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", @@ -156,6 +157,7 @@ cc_library( "lsh_projection.cc", "lstm.cc", "mean.cc", + "mfcc.cc", "mul.cc", "pad.cc", "pooling.cc", @@ -196,15 +198,42 @@ cc_library( "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite:string_util", "//tensorflow/contrib/lite/kernels:gemm_support", + "//tensorflow/contrib/lite/kernels/internal:audio_utils", "//tensorflow/contrib/lite/kernels/internal:kernel_utils", "//tensorflow/contrib/lite/kernels/internal:optimized", "//tensorflow/contrib/lite/kernels/internal:optimized_base", "//tensorflow/contrib/lite/kernels/internal:quantization_util", "//tensorflow/contrib/lite/kernels/internal:reference", "//tensorflow/contrib/lite/kernels/internal:reference_base", - "//tensorflow/contrib/lite/kernels/internal:round", "//tensorflow/contrib/lite/kernels/internal:tensor_utils", "@farmhash_archive//:farmhash", + "@flatbuffers", + ], +) + +tf_cc_test( + name = "audio_spectrogram_test", + size = "small", + srcs = ["audio_spectrogram_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + "@flatbuffers", + ], +) + +tf_cc_test( + name = "mfcc_test", + size = "small", + srcs = ["mfcc_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + "@flatbuffers", ], ) diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc new file mode 100644 index 0000000000..602f3888c1 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc @@ -0,0 +1,165 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/spectrogram.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +#include "flatbuffers/flexbuffers.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace audio_spectrogram { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +enum KernelType { + kReference, +}; + +typedef struct { + int window_size; + int stride; + bool magnitude_squared; + int output_height; + internal::Spectrogram* spectrogram; +} TfLiteAudioSpectrogramParams; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new TfLiteAudioSpectrogramParams; + + const uint8_t* buffer_t = reinterpret_cast(buffer); + + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + data->window_size = m["window_size"].AsInt64(); + data->stride = m["stride"].AsInt64(); + data->magnitude_squared = m["magnitude_squared"].AsBool(); + + data->spectrogram = new internal::Spectrogram; + + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + auto* params = reinterpret_cast(buffer); + delete params->spectrogram; + delete params; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); + + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + TF_LITE_ENSURE(context, params->spectrogram->Initialize(params->window_size, + params->stride)); + const int64_t sample_count = input->dims->data[0]; + const int64_t length_minus_window = (sample_count - params->window_size); + if (length_minus_window < 0) { + params->output_height = 0; + } else { + params->output_height = 1 + (length_minus_window / params->stride); + } + TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); + output_size->data[0] = input->dims->data[1]; + output_size->data[1] = params->output_height; + output_size->data[2] = params->spectrogram->output_frequency_channels(); + + return context->ResizeTensor(context, output, output_size); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->user_data); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE(context, params->spectrogram->Initialize(params->window_size, + params->stride)); + + const float* input_data = GetTensorData(input); + + const int64_t sample_count = input->dims->data[0]; + const int64_t channel_count = input->dims->data[1]; + + const int64_t output_width = params->spectrogram->output_frequency_channels(); + + float* output_flat = GetTensorData(output); + + std::vector input_for_channel(sample_count); + for (int64_t channel = 0; channel < channel_count; ++channel) { + float* output_slice = + output_flat + (channel * params->output_height * output_width); + for (int i = 0; i < sample_count; ++i) { + input_for_channel[i] = input_data[i * channel_count + channel]; + } + std::vector> spectrogram_output; + TF_LITE_ENSURE(context, + params->spectrogram->ComputeSquaredMagnitudeSpectrogram( + input_for_channel, &spectrogram_output)); + TF_LITE_ENSURE_EQ(context, spectrogram_output.size(), + params->output_height); + TF_LITE_ENSURE(context, spectrogram_output.empty() || + (spectrogram_output[0].size() == output_width)); + for (int row_index = 0; row_index < params->output_height; ++row_index) { + const std::vector& spectrogram_row = spectrogram_output[row_index]; + TF_LITE_ENSURE_EQ(context, spectrogram_row.size(), output_width); + float* output_row = output_slice + (row_index * output_width); + if (params->magnitude_squared) { + for (int i = 0; i < output_width; ++i) { + output_row[i] = spectrogram_row[i]; + } + } else { + for (int i = 0; i < output_width; ++i) { + output_row[i] = sqrtf(spectrogram_row[i]); + } + } + } + } + return kTfLiteOk; +} + +} // namespace audio_spectrogram + +TfLiteRegistration* Register_AUDIO_SPECTROGRAM() { + static TfLiteRegistration r = { + audio_spectrogram::Init, audio_spectrogram::Free, + audio_spectrogram::Prepare, + audio_spectrogram::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc new file mode 100644 index 0000000000..8d460fdfc6 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc @@ -0,0 +1,122 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include +#include "flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); + +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +class BaseAudioSpectrogramOpModel : public SingleOpModel { + public: + BaseAudioSpectrogramOpModel(const TensorData& input1, + const TensorData& output, int window_size, + int stride, bool magnitude_squared) { + input1_ = AddInput(input1); + output_ = AddOutput(output); + + flexbuffers::Builder fbb; + fbb.Map([&]() { + fbb.Int("window_size", window_size); + fbb.Int("stride", stride); + fbb.Bool("magnitude_squared", magnitude_squared); + }); + fbb.Finish(); + SetCustomOp("AudioSpectrogram", fbb.GetBuffer(), + Register_AUDIO_SPECTROGRAM); + BuildInterpreter({GetShape(input1_)}); + } + + int input1() { return input1_; } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input1_; + int output_; +}; + +TEST(BaseAudioSpectrogramOpModel, NonSquaredTest) { + BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {8, 1}}, + {TensorType_FLOAT32, {}}, 8, 1, false); + m.PopulateTensor(m.input1(), + {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_EQ(3, output_shape.size()); + EXPECT_THAT(output_shape, ElementsAre(1, 1, 5)); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {0.0f, 1.0f, 2.0f, 1.0f, 0.0f}, 1e-3))); +} + +TEST(SpectrogramOpTest, SquaredTest) { + BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {8, 1}}, + {TensorType_FLOAT32, {}}, 8, 1, true); + m.PopulateTensor(m.input1(), + {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_EQ(3, output_shape.size()); + EXPECT_THAT(output_shape, ElementsAre(1, 1, 5)); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {0.f, 1.f, 4.f, 1.f, 0.f}, 1e-3))); +} + +TEST(SpectrogramOpTest, StrideTest) { + BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {10, 1}}, + {TensorType_FLOAT32, {}}, 8, 2, true); + m.PopulateTensor(m.input1(), {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, + 1.0f, 0.0f, 1.0f, 0.0f}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_THAT(output_shape, ElementsAre(1, 2, 5)); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {0, 1, 4, 1, 0, 1, 2, 1, 2, 1}, 1e-3))); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc index 66ca694dc4..0e481a9d40 100644 --- a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc @@ -54,7 +54,6 @@ inline int Log2Floor(uint n) { log += shift; } } - assert(value == 1); return log; } diff --git a/tensorflow/contrib/lite/kernels/mfcc.cc b/tensorflow/contrib/lite/kernels/mfcc.cc new file mode 100644 index 0000000000..018db0dc54 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/mfcc.cc @@ -0,0 +1,154 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/mfcc.h" +#include "flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" +#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace mfcc { + +enum KernelType { + kReference, +}; + +typedef struct { + float upper_frequency_limit; + float lower_frequency_limit; + int filterbank_channel_count; + int dct_coefficient_count; +} TfLiteMfccParams; + +constexpr int kInputTensorWav = 0; +constexpr int kInputTensorRate = 1; +constexpr int kOutputTensor = 0; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new TfLiteMfccParams; + + const uint8_t* buffer_t = reinterpret_cast(buffer); + + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + data->upper_frequency_limit = m["upper_frequency_limit"].AsInt64(); + data->lower_frequency_limit = m["lower_frequency_limit"].AsInt64(); + data->filterbank_channel_count = m["filterbank_channel_count"].AsInt64(); + data->dct_coefficient_count = m["dct_coefficient_count"].AsInt64(); + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* inputWav = GetInput(context, node, kInputTensorWav); + TfLiteTensor* inputRate = GetInput(context, node, kInputTensorRate); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(inputWav), 3); + TF_LITE_ENSURE_EQ(context, NumDimensions(inputRate), 1); + + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, inputWav->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); + output_size->data[0] = inputWav->dims->data[0]; + output_size->data[1] = inputWav->dims->data[1]; + output_size->data[2] = params->dct_coefficient_count; + + return context->ResizeTensor(context, output, output_size); +} + +// Input is a single squared-magnitude spectrogram frame. The input spectrum +// is converted to linear magnitude and weighted into bands using a +// triangular mel filterbank, and a discrete cosine transform (DCT) of the +// values is taken. Output is populated with the lowest dct_coefficient_count +// of these values. +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->user_data); + + TfLiteTensor* inputWav = GetInput(context, node, kInputTensorWav); + TfLiteTensor* inputRate = GetInput(context, node, kInputTensorRate); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + const int32 sample_rate = *GetTensorData(inputRate); + + const int spectrogram_channels = inputWav->dims->data[2]; + const int spectrogram_samples = inputWav->dims->data[1]; + const int audio_channels = inputWav->dims->data[0]; + + internal::Mfcc mfcc; + mfcc.set_upper_frequency_limit(params->upper_frequency_limit); + mfcc.set_lower_frequency_limit(params->lower_frequency_limit); + mfcc.set_filterbank_channel_count(params->filterbank_channel_count); + mfcc.set_dct_coefficient_count(params->dct_coefficient_count); + + mfcc.Initialize(spectrogram_channels, sample_rate); + + const float* spectrogram_flat = GetTensorData(inputWav); + float* output_flat = GetTensorData(output); + + for (int audio_channel = 0; audio_channel < audio_channels; ++audio_channel) { + for (int spectrogram_sample = 0; spectrogram_sample < spectrogram_samples; + ++spectrogram_sample) { + const float* sample_data = + spectrogram_flat + + (audio_channel * spectrogram_samples * spectrogram_channels) + + (spectrogram_sample * spectrogram_channels); + std::vector mfcc_input(sample_data, + sample_data + spectrogram_channels); + std::vector mfcc_output; + mfcc.Compute(mfcc_input, &mfcc_output); + TF_LITE_ENSURE_EQ(context, params->dct_coefficient_count, + mfcc_output.size()); + float* output_data = output_flat + + (audio_channel * spectrogram_samples * + params->dct_coefficient_count) + + (spectrogram_sample * params->dct_coefficient_count); + for (int i = 0; i < params->dct_coefficient_count; ++i) { + output_data[i] = mfcc_output[i]; + } + } + } + + return kTfLiteOk; +} + +} // namespace mfcc + +TfLiteRegistration* Register_MFCC() { + static TfLiteRegistration r = {mfcc::Init, mfcc::Free, mfcc::Prepare, + mfcc::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/mfcc_test.cc b/tensorflow/contrib/lite/kernels/mfcc_test.cc new file mode 100644 index 0000000000..0291ca8c1c --- /dev/null +++ b/tensorflow/contrib/lite/kernels/mfcc_test.cc @@ -0,0 +1,104 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include +#include "flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_MFCC(); + +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +class BaseMfccOpModel : public SingleOpModel { + public: + BaseMfccOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + + flexbuffers::Builder fbb; + fbb.Map([&]() { + fbb.Int("upper_frequency_limit", 4000); + fbb.Int("lower_frequency_limit", 20); + fbb.Int("filterbank_channel_count", 40); + fbb.Int("dct_coefficient_count", 13); + }); + fbb.Finish(); + SetCustomOp("Mfcc", fbb.GetBuffer(), Register_MFCC); + + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input1_; + int input2_; + int output_; +}; + +TEST(MfccOpTest, SimpleTest) { + BaseMfccOpModel m({TensorType_FLOAT32, {1, 1, 513}}, {TensorType_INT32, {1}}, + {TensorType_FLOAT32, {}}); + + std::vector data(513); + for (int i = 0; i < data.size(); ++i) { + data[i] = i + 1; + } + m.PopulateTensor(m.input1(), 0, data.data(), + data.data() + data.size()); + m.PopulateTensor(m.input2(), {22050}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_THAT(output_shape, ElementsAre(1, 1, 13)); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {29.13970072, -6.41568601, -0.61903012, -0.96778652, -0.26819878, + -0.40907028, -0.15614748, -0.23203119, -0.10481487, -0.1543029, + -0.0769791, -0.10806114, -0.06047613}, + 1e-3))); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 9537b79a9a..369d3b9886 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -17,6 +17,14 @@ limitations under the License. namespace tflite { namespace ops { + +namespace custom { + +TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); +TfLiteRegistration* Register_MFCC(); + +} // namespace custom + namespace builtin { TfLiteRegistration* Register_RELU(); @@ -123,6 +131,12 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); + + // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that + // custom ops aren't always included by default. + AddCustom("Mfcc", tflite::ops::custom::Register_MFCC()); + AddCustom("AudioSpectrogram", + tflite::ops::custom::Register_AUDIO_SPECTROGRAM()); } TfLiteRegistration* BuiltinOpResolver::FindOp( -- GitLab From e33ef644f42235432027dd7ec2afd74dc3441dd9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 16:03:54 -0700 Subject: [PATCH 038/960] Enable async eager mode in python. Add some benchmarks. PiperOrigin-RevId: 189101670 --- .../python/examples/resnet50/resnet50_test.py | 127 ++++++++++++------ tensorflow/contrib/eager/python/tfe.py | 12 ++ tensorflow/python/eager/benchmarks_test.py | 97 +++++++++++-- tensorflow/python/eager/context.py | 107 +++++++++++++-- tensorflow/python/eager/core_test.py | 50 +++++++ tensorflow/python/framework/ops.py | 50 +++++-- tensorflow/python/framework/ops_test.py | 3 + tensorflow/python/pywrap_tfe.i | 4 + tensorflow/tools/api/golden/tensorflow.pbtxt | 2 +- 9 files changed, 371 insertions(+), 81 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index 65dcc53aab..d6923293a3 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -64,22 +64,29 @@ def train_one_step(model, images, labels, optimizer): class ResNet50Test(tf.test.TestCase): - def _apply(self, defun=False): + def _apply(self, defun=False, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) - with tf.device(device): + with tf.device(device), tfe.execution_mode(execution_mode): images, _ = random_batch(2) output = model(images, training=False) + tfe.async_wait() self.assertEqual((2, 1000), output.shape) def test_apply(self): self._apply(defun=False) + def test_apply_async(self): + self._apply(defun=False, execution_mode=tfe.ASYNC) + def test_apply_with_defun(self): self._apply(defun=True) + def test_apply_with_defun_async(self): + self._apply(defun=True, execution_mode=tfe.ASYNC) + def test_apply_no_top(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format, include_top=False) @@ -98,7 +105,7 @@ class ResNet50Test(tf.test.TestCase): output = model(images, training=False) self.assertEqual((2, 2048), output.shape) - def test_train(self): + def _test_train(self, execution_mode=None): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) tf.train.get_or_create_global_step() @@ -106,15 +113,22 @@ class ResNet50Test(tf.test.TestCase): with tf.contrib.summary.create_file_writer( logdir, max_queue=0, name='t0').as_default(), tf.contrib.summary.always_record_summaries(): - with tf.device(device): + with tf.device(device), tfe.execution_mode(execution_mode): optimizer = tf.train.GradientDescentOptimizer(0.1) images, labels = random_batch(2) train_one_step(model, images, labels, optimizer) self.assertEqual(320, len(model.variables)) + tfe.async_wait() events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss') + def test_train(self): + self._test_train() + + def test_train_async(self): + self._test_train(execution_mode=tfe.ASYNC) + def test_no_garbage(self): device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) @@ -183,59 +197,84 @@ class ResNet50Benchmarks(tf.test.Benchmark): # a sync. This is a roundabout way, yes. tf.constant(1.).cpu() - def _benchmark_eager_apply(self, label, defun=False): - device, data_format = device_and_data_format() - model = resnet50.ResNet50(data_format) - if defun: - model.call = tfe.defun(model.call) - batch_size = 64 - num_burn = 5 - num_iters = 30 - with tf.device(device): - images, _ = random_batch(batch_size) - for _ in xrange(num_burn): - model(images, training=False).cpu() - gc.collect() - start = time.time() - for _ in xrange(num_iters): - model(images, training=False).cpu() - self._report(label, start, num_iters, device, batch_size, data_format) - - def benchmark_eager_apply(self): - self._benchmark_eager_apply('eager_apply', defun=False) - - def benchmark_eager_apply_with_defun(self): - self._benchmark_eager_apply('eager_apply_with_defun', defun=True) - - def _benchmark_eager_train(self, label, make_iterator, defun=False): - device, data_format = device_and_data_format() - for batch_size in self._train_batch_sizes(): - (images, labels) = random_batch(batch_size) - num_burn = 3 - num_iters = 10 + def _benchmark_eager_apply(self, label, defun=False, execution_mode=None): + with tfe.execution_mode(execution_mode): + device, data_format = device_and_data_format() model = resnet50.ResNet50(data_format) if defun: model.call = tfe.defun(model.call) - optimizer = tf.train.GradientDescentOptimizer(0.1) - + batch_size = 64 + num_burn = 5 + num_iters = 30 with tf.device(device): - iterator = make_iterator((images, labels)) + images, _ = random_batch(batch_size) for _ in xrange(num_burn): - (images, labels) = iterator.next() - train_one_step(model, images, labels, optimizer) - self._force_gpu_sync() + model(images, training=False).cpu() + if execution_mode: + tfe.async_wait() gc.collect() - start = time.time() for _ in xrange(num_iters): - (images, labels) = iterator.next() - train_one_step(model, images, labels, optimizer) - self._force_gpu_sync() + model(images, training=False).cpu() + if execution_mode: + tfe.async_wait() self._report(label, start, num_iters, device, batch_size, data_format) + def benchmark_eager_apply(self): + self._benchmark_eager_apply('eager_apply', defun=False) + + def benchmark_eager_apply_async(self): + self._benchmark_eager_apply( + 'eager_apply_async', defun=False, execution_mode=tfe.ASYNC) + + def benchmark_eager_apply_with_defun(self): + self._benchmark_eager_apply('eager_apply_with_defun', defun=True) + + def _benchmark_eager_train(self, + label, + make_iterator, + defun=False, + execution_mode=None): + with tfe.execution_mode(execution_mode): + device, data_format = device_and_data_format() + for batch_size in self._train_batch_sizes(): + (images, labels) = random_batch(batch_size) + num_burn = 3 + num_iters = 10 + model = resnet50.ResNet50(data_format) + if defun: + model.call = tfe.defun(model.call) + optimizer = tf.train.GradientDescentOptimizer(0.1) + + with tf.device(device): + iterator = make_iterator((images, labels)) + for _ in xrange(num_burn): + (images, labels) = iterator.next() + train_one_step(model, images, labels, optimizer) + if execution_mode: + tfe.async_wait() + self._force_gpu_sync() + gc.collect() + + start = time.time() + for _ in xrange(num_iters): + (images, labels) = iterator.next() + train_one_step(model, images, labels, optimizer) + if execution_mode: + tfe.async_wait() + self._force_gpu_sync() + self._report(label, start, num_iters, device, batch_size, data_format) + def benchmark_eager_train(self): self._benchmark_eager_train('eager_train', MockIterator, defun=False) + def benchmark_eager_train_async(self): + self._benchmark_eager_train( + 'eager_train_async', + MockIterator, + defun=False, + execution_mode=tfe.ASYNC) + def benchmark_eager_train_with_defun(self): self._benchmark_eager_train( 'eager_train_with_defun', MockIterator, defun=True) diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index 5aabc9aae8..c6f3f20e78 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -62,12 +62,18 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@executing_eagerly @@in_eager_mode +@@set_execution_mode +@@execution_mode +@@async_wait +@@async_clear_error @@run_test_in_graph_and_eager_modes @@DEVICE_PLACEMENT_EXPLICIT @@DEVICE_PLACEMENT_WARN @@DEVICE_PLACEMENT_SILENT +@@SYNC +@@ASYNC """ from __future__ import absolute_import @@ -95,6 +101,12 @@ from tensorflow.python.eager.context import DEVICE_PLACEMENT_WARN from tensorflow.python.eager.context import DEVICE_PLACEMENT_SILENT from tensorflow.python.eager.context import executing_eagerly from tensorflow.python.eager.context import list_devices +from tensorflow.python.eager.context import set_execution_mode +from tensorflow.python.eager.context import execution_mode +from tensorflow.python.eager.context import async_wait +from tensorflow.python.eager.context import async_clear_error +from tensorflow.python.eager.context import SYNC +from tensorflow.python.eager.context import ASYNC from tensorflow.python.eager.context import num_gpus from tensorflow.python.eager.execution_callbacks import add_execution_callback from tensorflow.python.eager.execution_callbacks import clear_execution_callbacks diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 8c6e7e5758..9ca5041c38 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -82,16 +82,24 @@ class MicroBenchmarks(test.Benchmark): self._num_iters_2_by_2 = 30000 self._num_iters_100_by_784 = 1000 - def _run(self, func, num_iters): + def _run(self, func, num_iters, execution_mode=None): # call func to maybe warm up the GPU - func() - start = time.time() - for _ in xrange(num_iters): + ctx = context.context() + with ctx.execution_mode(execution_mode): func() - end = time.time() - mean_us = (end - start) * 1e6 / num_iters - self.report_benchmark(iters=num_iters, wall_time=mean_us, - extras={"examples_per_sec": num_iters/(end-start)}) + if execution_mode == context.ASYNC: + ctx.async_wait() + start = time.time() + for _ in xrange(num_iters): + func() + if execution_mode == context.ASYNC: + ctx.async_wait() + end = time.time() + mean_us = (end - start) * 1e6 / num_iters + self.report_benchmark( + iters=num_iters, + wall_time=mean_us, + extras={"examples_per_sec": num_iters / (end - start)}) def benchmark_create_np_array(self): func = lambda: np.array([3.0]) @@ -236,9 +244,10 @@ class MicroBenchmarks(test.Benchmark): func = lambda: np.dot(a, b) self._run(func, num_iters) - def _benchmark_tf_matmul(self, m, transpose_b, num_iters): + def _benchmark_tf_matmul(self, m, transpose_b, num_iters, + execution_mode=None): func = lambda: math_ops.matmul(m, m, transpose_b=transpose_b) - self._run(func, num_iters) + self._run(func, num_iters, execution_mode=execution_mode) def _benchmark_gen_math_ops_matmul(self, m, transpose_b, num_iters): def func(): @@ -267,10 +276,14 @@ class MicroBenchmarks(test.Benchmark): self._run(func, num_iters) - def _benchmark_defun_matmul(self, m, transpose_b, num_iters): + def _benchmark_defun_matmul(self, + m, + transpose_b, + num_iters, + execution_mode=None): f = function.defun(math_ops.matmul) func = lambda: f(m, m, transpose_b) - self._run(func, num_iters) + self._run(func, num_iters, execution_mode=execution_mode) def _benchmark_read_variable(self, m, num_iters): self._run(m.value, num_iters) @@ -301,6 +314,15 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_tf_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) + def benchmark_tf_matmul_2_by_2_CPU_async(self): + with context.device(CPU): + m = self._m_2_by_2.cpu() + self._benchmark_tf_matmul( + m, + transpose_b=False, + num_iters=self._num_iters_2_by_2, + execution_mode=context.ASYNC) + def benchmark_gen_math_ops_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -325,6 +347,15 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) + def benchmark_defun_matmul_2_by_2_CPU_async(self): + with context.device(CPU): + m = self._m_2_by_2.cpu() + self._benchmark_defun_matmul( + m, + transpose_b=False, + num_iters=self._num_iters_2_by_2, + execution_mode=context.ASYNC) + def benchmark_tf_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -333,6 +364,17 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_tf_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) + def benchmark_tf_matmul_2_by_2_GPU_async(self): + if not context.num_gpus(): + return + with context.device(GPU): + m = self._m_2_by_2.gpu() + self._benchmark_tf_matmul( + m, + transpose_b=False, + num_iters=self._num_iters_2_by_2, + execution_mode=context.ASYNC) + def benchmark_gen_math_ops_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -357,6 +399,17 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) + def benchmark_defun_matmul_2_by_2_GPU_async(self): + if not context.num_gpus(): + return + with context.device(GPU): + m = self._m_2_by_2.gpu() + self._benchmark_defun_matmul( + m, + transpose_b=False, + num_iters=self._num_iters_2_by_2, + execution_mode=context.ASYNC) + # Benchmarks for AA.T, A of dimension 100 by 784. def benchmark_np_matmul_100_by_784(self): self._benchmark_np_matmul( @@ -370,6 +423,15 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_tf_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) + def benchmark_tf_matmul_100_by_784_CPU_async(self): + with context.device(CPU): + m = self._m_100_by_784.cpu() + self._benchmark_tf_matmul( + m, + transpose_b=True, + num_iters=self._num_iters_100_by_784, + execution_mode=context.ASYNC) + def benchmark_gen_math_ops_matmul_100_by_784_CPU(self): with context.device(CPU): m = self._m_100_by_784.cpu() @@ -402,6 +464,17 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_tf_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) + def benchmark_tf_matmul_100_by_784_GPU_async(self): + if not context.num_gpus(): + return + with context.device(GPU): + m = self._m_100_by_784.gpu() + self._benchmark_tf_matmul( + m, + transpose_b=True, + num_iters=self._num_iters_100_by_784, + execution_mode=context.ASYNC) + def benchmark_gen_math_ops_matmul_100_by_784_GPU(self): if not context.num_gpus(): return diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 87d3ed880a..7953d10a89 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -53,6 +53,8 @@ DEVICE_PLACEMENT_WARN = pywrap_tensorflow.TFE_DEVICE_PLACEMENT_WARN DEVICE_PLACEMENT_SILENT = pywrap_tensorflow.TFE_DEVICE_PLACEMENT_SILENT DEVICE_PLACEMENT_SILENT_FOR_INT32 = ( pywrap_tensorflow.TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) +SYNC = 0 +ASYNC = 1 class _TensorCache(object): @@ -89,6 +91,7 @@ class _EagerContext(threading.local): self.summary_writer_resource = None self.scalar_cache = {} self.ones_rank_cache = _TensorCache() + self.execution_mode = None ContextStackEntry = collections.namedtuple( @@ -131,24 +134,41 @@ context_stack = ContextStack() class Context(object): """Environment in which eager operations execute.""" - def __init__(self, config=None, device_policy=None): + # TODO(agarwal): create and link in some documentation for `execution_mode`. + # pylint: disable=redefined-outer-name + def __init__(self, config=None, device_policy=None, execution_mode=None): """Creates a new Context. Args: config: (Optional.) A `ConfigProto` protocol buffer with configuration - options for the Context. Note that a lot of these options may be - currently unimplemented or irrelevant when eager execution is enabled. + options for the Context. Note that a lot of these options may be + currently unimplemented or irrelevant when eager execution is enabled. device_policy: (Optional.) What policy to use when trying to run an - operation on a device with inputs which are not on that device. - Valid values: - tfe.DEVICE_PLACEMENT_EXPLICIT: raises an error if the placement is not - correct. - tfe.DEVICE_PLACEMENT_WARN: copies the tensors which are not on the + operation on a device with inputs which are not on that device. + When set to None, an appropriate value will be picked automatically. + The value picked may change between TensorFlow releases. + + Defaults to tf.contrib.eager.DEVICE_PLACEMENT_SILENT_FOR_INT32. + Valid values: + - tfe.DEVICE_PLACEMENT_EXPLICIT: raises an error if the placement is + not correct. + - tfe.DEVICE_PLACEMENT_WARN: copies the tensors which are not on the right device but raises a warning. - tfe.DEVICE_PLACEMENT_SILENT: silently copies the tensors. This might + - tfe.DEVICE_PLACEMENT_SILENT: silently copies the tensors. This might hide performance problems. - tfe.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies int32 tensors, + - tfe.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies int32 tensors, raising errors on the other ones. + execution_mode: (Optional.) Policy controlling how operations dispatched + are actually executed. When set to None, an appropriate value will be + picked automatically. The value picked may change between TensorFlow + releases. + Valid values: + - tf.contrib.eager.SYNC: executes each operation synchronously. + - tf.contrib.eager.ASYNC: executes each operation asynchronously. These + operations may return "non-ready" handles. + + Raises: + ValueError: If execution_mode is not valid. """ self._eager_context = _EagerContext() self._context_handle = None @@ -158,6 +178,14 @@ class Context(object): self._seed = None self._initialize_lock = threading.Lock() self._device_policy = device_policy + if execution_mode not in (None, SYNC, ASYNC): + raise ValueError( + "execution_mode should be None/SYNC/ASYNC. Got %s" % execution_mode) + if execution_mode is None: + execution_mode = SYNC + self._execution_mode = execution_mode + + # pylint: enable=redefined-outer-name def _set_global_seed(self, seed): """Set a global eager mode seed for random ops.""" @@ -195,6 +223,8 @@ class Context(object): if self._device_policy is not None: pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( opts, self._device_policy) + if self._execution_mode == ASYNC: + pywrap_tensorflow.TFE_ContextOptionsSetAsync(True) self._context_handle = pywrap_tensorflow.TFE_NewContext(opts, status) finally: pywrap_tensorflow.TFE_DeleteContextOptions(opts) @@ -356,6 +386,43 @@ class Context(object): """List of the names of devices available to execute operations.""" return self._devices + def get_execution_mode(self): + mode = self._eager_context.execution_mode + if mode is None: + mode = self._execution_mode + return mode + + def set_execution_mode(self, mode): + """Sets execution mode for current thread.""" + if mode not in (None, SYNC, ASYNC): + raise ValueError( + "Execution mode should be None/SYNC/ASYNC. Got %s" % mode) + if mode is None: + mode = SYNC + self._eager_context.execution_mode = mode + with errors.raise_exception_on_not_ok_status() as status: + pywrap_tensorflow.TFE_ContextSetAsyncForThread(self._handle, + mode == ASYNC, status) + + @tf_contextlib.contextmanager + def execution_mode(self, mode): + """Context manager for setting execution mode for current thread.""" + old_mode = self.get_execution_mode() + try: + self.set_execution_mode(mode) + yield + finally: + self.set_execution_mode(old_mode) + + def async_wait(self): + """Waits for ops dispatched in ASYNC mode to finish.""" + with errors.raise_exception_on_not_ok_status() as status: + pywrap_tensorflow.TFE_ContextAsyncWait(self._handle, status) + + def async_clear_error(self): + """Clears errors raised during ASYNC execution.""" + pywrap_tensorflow.TFE_ContextAsyncClearError(self._handle) + def num_gpus(self): """The number of GPUs available to execute operations.""" self._initialize_handle_and_devices() @@ -595,6 +662,26 @@ def list_devices(): return context().devices() +def set_execution_mode(mode): + """Sets execution mode for the current thread.""" + context().set_execution_mode(mode) + + +def execution_mode(mode): + """Context manager for setting execution mode for current thread.""" + return context().execution_mode(mode) + + +def async_wait(): + """Waits for ops dispatched in ASYNC mode to finish.""" + return context().async_wait() + + +def async_clear_error(): + """Clears errors raised during ASYNC execution mode.""" + return context().async_clear_error() + + def num_gpus(): """Get the number of available GPU devices. diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 61c5526d48..6dfd8d1afa 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -63,6 +63,16 @@ class TFETest(test_util.TensorFlowTestCase): ctx.scope_name = 'foo' self.assertEqual('foo', ctx.scope_name) + self.assertEqual(context.SYNC, ctx.get_execution_mode()) + ctx.set_execution_mode(context.ASYNC) + self.assertEqual(context.ASYNC, ctx.get_execution_mode()) + ctx.set_execution_mode(context.SYNC) + self.assertEqual(context.SYNC, ctx.get_execution_mode()) + with ctx.execution_mode(context.ASYNC): + self.assertEqual(context.ASYNC, ctx.get_execution_mode()) + ctx.set_execution_mode(context.SYNC) + self.assertEqual(context.SYNC, ctx.get_execution_mode()) + self.assertIsNone(ctx.summary_writer_resource) ctx.summary_writer_resource = 'mock' self.assertEqual('mock', ctx.summary_writer_resource) @@ -208,6 +218,23 @@ class TFETest(test_util.TensorFlowTestCase): with self.assertRaises(RuntimeError): x.gpu(context.context().num_gpus() + 1) + def testCopyBetweenDevicesAsync(self): + if not context.context().num_gpus(): + self.skipTest('No GPUs found') + with context.execution_mode(context.ASYNC): + x = constant_op.constant([[1., 2.], [3., 4.]]) + x = x.cpu() + x = x.gpu() + x = x.gpu() + x = x.cpu() + context.async_wait() + + # Invalid device + with self.assertRaises(RuntimeError): + x.gpu(context.context().num_gpus() + 1) + context.async_wait() + context.async_clear_error() + def testCopyScope(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') @@ -248,6 +275,29 @@ class TFETest(test_util.TensorFlowTestCase): attrs=('T', three.dtype.as_datatype_enum))[0] self.assertAllEqual(15, product) + def testExecuteBasicAsync(self): + with context.execution_mode(context.ASYNC): + three = constant_op.constant(3) + five = constant_op.constant(5) + product = execute( + b'Mul', + num_outputs=1, + inputs=[three, five], + attrs=('T', three.dtype.as_datatype_enum))[0] + self.assertAllEqual(15, product) + # Error: Invalid arguments + context.set_execution_mode(context.ASYNC) + with self.assertRaises(errors.InvalidArgumentError): + execute( + b'MatMul', + num_outputs=1, + inputs=[three, five], + attrs=('transpose_a', False, 'transpose_b', False, 'T', + three.dtype.as_datatype_enum)) + context.async_wait() + context.async_clear_error() + context.set_execution_mode(context.SYNC) + def testExecuteTooManyNumOutputs(self): # num_outputs provided is 50, but only one output is produced. product = execute( diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 44df22cf58..b2f43773fe 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5184,7 +5184,8 @@ def init_scope(): @tf_export("enable_eager_execution") -def enable_eager_execution(config=None, device_policy=None): +def enable_eager_execution(config=None, device_policy=None, + execution_mode=None): """Enables eager execution for the lifetime of this program. Eager execution provides an imperative interface to TensorFlow. With eager @@ -5210,13 +5211,15 @@ def enable_eager_execution(config=None, device_policy=None): Args: config: (Optional.) A @{tf.ConfigProto} to use to configure the environment - in which operations are executed. Note that @{tf.ConfigProto} is also - used to configure graph execution (via @{tf.Session}) and many options - within `tf.ConfigProto` are not implemented (or are irrelevant) when + in which operations are executed. Note that @{tf.ConfigProto} is also + used to configure graph execution (via @{tf.Session}) and many options + within `tf.ConfigProto` are not implemented (or are irrelevant) when eager execution is enabled. device_policy: (Optional.) Policy controlling how operations requiring inputs on a specific device (e.g., a GPU 0) handle inputs on a different - device (e.g. GPU 1 or CPU). + device (e.g. GPU 1 or CPU). When set to None, an appropriate value will be + picked automatically. The value picked may change between TensorFlow + releases. Valid values: - tf.contrib.eager.DEVICE_PLACEMENT_EXPLICIT: raises an error if the @@ -5232,6 +5235,15 @@ def enable_eager_execution(config=None, device_policy=None): - tf.contrib.eager.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies int32 tensors, raising errors on the other ones. + execution_mode: (Optional.) Policy controlling how operations dispatched are + actually executed. When set to None, an appropriate value will be picked + automatically. The value picked may change between TensorFlow releases. + Valid values: + + - tf.contrib.eager.SYNC: executes each operation synchronously. + + - tf.contrib.eager.ASYNC: executes each operation asynchronously. These + operations may return "non-ready" handles. Raises: ValueError: If eager execution is enabled after creating/executing a @@ -5248,6 +5260,10 @@ def enable_eager_execution(config=None, device_policy=None): raise ValueError( "device_policy must be one of None, tf.contrib.eager.DEVICE_PLACEMENT_*" ) + if execution_mode not in (None, context.SYNC, context.ASYNC): + raise ValueError( + "execution_mode must be one of None, tf.contrib.eager.SYNC, " + "tf.contrib.eager.ASYNC") # pylint: disable=protected-access if context._default_mode == context.GRAPH_MODE: graph_mode_has_been_used = ( @@ -5258,8 +5274,10 @@ def enable_eager_execution(config=None, device_policy=None): "tf.enable_eager_execution must be called at program startup.") context._default_mode = context.EAGER_MODE if context._context is None: - context._context = context.Context(config=config, - device_policy=device_policy) + context._context = context.Context( + config=config, + device_policy=device_policy, + execution_mode=execution_mode) if context.context_stack.stack: raise AssertionError("Invariant violated: The context stack must " "be empty when eager execution is enabled.") @@ -5267,15 +5285,19 @@ def enable_eager_execution(config=None, device_policy=None): # context stack; this entry won't ever be popped, as it's impossible to # disable eager execution context.context_stack.push(False, context.eager_mode) - elif ((config is not None and config is not context._context._config) - or (device_policy is not None - and device_policy is not context._context._device_policy)): + elif ((config is not None and config is not context._context._config) or + (device_policy is not None and + device_policy is not context._context._device_policy) or + (execution_mode is not None and + execution_mode is not context._context._execution_mode)): raise ValueError("Trying to change the options of an active eager" " execution. Context config: %s, specified config:" - " %s. Context device policy: %s; specified device" - " policy: %s." % (config, context._context._config, - device_policy, - context._context._device_policy)) + " %s. Context device policy: %s, specified device" + " policy: %s. Context execution mode: %s, " + " specified execution mode %s." % + (context._context._config, config, + context._context._device_policy, device_policy, + context._context._execution_mode, execution_mode)) else: raise ValueError( "tf.enable_eager_execution must be called at program startup.") diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 6daab80408..d96e0708f8 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2919,6 +2919,9 @@ class EnableEagerExecutionTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(ValueError, "device_policy must be one of"): c = config_pb2.ConfigProto() ops.enable_eager_execution(c, c) + with self.assertRaisesRegexp(ValueError, "execution_mode must be one of"): + c = config_pb2.ConfigProto() + ops.enable_eager_execution(c, execution_mode=c) if __name__ == "__main__": diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index b481ddf5d4..39fabb9c1b 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -26,6 +26,9 @@ limitations under the License. %rename("%s") TFE_ContextClearCaches; %rename("%s") TFE_ContextGetDevicePlacementPolicy; %rename("%s") TFE_ContextSetThreadLocalDevicePlacementPolicy; +%rename("%s") TFE_ContextSetAsyncForThread; +%rename("%s") TFE_ContextAsyncWait; +%rename("%s") TFE_ContextAsyncClearError; %rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; %rename("%s") TFE_Py_RegisterExceptionClass; @@ -51,6 +54,7 @@ limitations under the License. %rename("%s") TFE_NewContextOptions; %rename("%s") TFE_ContextOptionsSetConfig; %rename("%s") TFE_ContextOptionsSetDevicePlacementPolicy; +%rename("%s") TFE_ContextOptionsSetAsync; %rename("%s") TFE_DeleteContextOptions; %rename("%s") TFE_Py_TensorShapeSlice; diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 80735cea5d..99e09c3759 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -974,7 +974,7 @@ tf_module { } member_method { name: "enable_eager_execution" - argspec: "args=[\'config\', \'device_policy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'config\', \'device_policy\', \'execution_mode\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "encode_base64" -- GitLab From d7efbfa0c68be25c05de99c1eb099fa9ba3dddb4 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 14 Mar 2018 16:20:13 -0700 Subject: [PATCH 039/960] Automated g4 rollback of changelist 189089672 PiperOrigin-RevId: 189104013 --- tensorflow/contrib/image/BUILD | 109 ------- tensorflow/contrib/image/__init__.py | 7 - .../kernel_tests/dense_image_warp_test.py | 264 ---------------- .../kernel_tests/interpolate_spline_test.py | 261 ---------------- .../kernel_tests/sparse_image_warp_test.py | 251 --------------- .../test_data/Yellow_Smiley_Face.png | Bin 14060 -> 0 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-0.png | Bin 18537 -> 0 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-1.png | Bin 19086 -> 0 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-4.png | Bin 18884 -> 0 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-0.png | Bin 18109 -> 0 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-1.png | Bin 19251 -> 0 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-4.png | Bin 19132 -> 0 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-0.png | Bin 17500 -> 0 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-1.png | Bin 18058 -> 0 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-4.png | Bin 19313 -> 0 bytes .../image/python/ops/dense_image_warp.py | 196 ------------ .../image/python/ops/interpolate_spline.py | 285 ------------------ .../image/python/ops/sparse_image_warp.py | 192 ------------ 18 files changed, 1565 deletions(-) delete mode 100644 tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py delete mode 100644 tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py delete mode 100644 tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-0.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-0.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png delete mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-4.png delete mode 100644 tensorflow/contrib/image/python/ops/dense_image_warp.py delete mode 100644 tensorflow/contrib/image/python/ops/interpolate_spline.py delete mode 100644 tensorflow/contrib/image/python/ops/sparse_image_warp.py diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 760ed70fbb..3ff02e085e 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -78,10 +78,7 @@ tf_custom_op_py_library( ], srcs_version = "PY2AND3", deps = [ - ":dense_image_warp_py", ":image_ops", - ":interpolate_spline_py", - ":sparse_image_warp_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:array_ops", "//tensorflow/python:common_shapes", @@ -197,112 +194,6 @@ cuda_py_test( ], ) -py_library( - name = "dense_image_warp_py", - srcs = [ - "python/ops/dense_image_warp.py", - ], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - "//tensorflow/python:util", - "//third_party/py/numpy", - ], -) - -py_library( - name = "interpolate_spline_py", - srcs = [ - "python/ops/interpolate_spline.py", - ], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - "//tensorflow/python:util", - ], -) - -py_library( - name = "sparse_image_warp_py", - srcs = [ - "python/ops/sparse_image_warp.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":dense_image_warp_py", - ":interpolate_spline_py", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - "//tensorflow/python:util", - ], -) - -cuda_py_test( - name = "sparse_image_warp_test", - size = "medium", - srcs = ["python/kernel_tests/sparse_image_warp_test.py"], - additional_deps = [ - ":sparse_image_warp_py", - "//third_party/py/numpy", - "//tensorflow/python:client", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:clip_ops", - "//tensorflow/python:io_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_ops", - "//tensorflow/python:image_ops", - "//tensorflow/python:variables", - "//tensorflow/core:protos_all_py", - ], - data = glob(["python/kernel_tests/test_data/*.png"]), -) - -cuda_py_test( - name = "dense_image_warp_test", - size = "medium", - srcs = ["python/kernel_tests/dense_image_warp_test.py"], - additional_deps = [ - ":dense_image_warp_py", - "//third_party/py/numpy", - "//tensorflow/python:client", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:clip_ops", - "//tensorflow/python:io_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_ops", - "//tensorflow/python:image_ops", - "//tensorflow/python:variables", - "//tensorflow/core:protos_all_py", - ], -) - -cuda_py_test( - name = "interpolate_spline_test", - size = "medium", - srcs = ["python/kernel_tests/interpolate_spline_test.py"], - additional_deps = [ - ":interpolate_spline_py", - "//third_party/py/numpy", - "//tensorflow/python:client", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:clip_ops", - "//tensorflow/python:io_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:image_ops", - "//tensorflow/python:variables", - "//tensorflow/core:protos_all_py", - "//third_party/py/scipy", - ], -) - tf_py_test( name = "segmentation_test", size = "medium", diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index e982030bc8..cc8ed117ba 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -30,9 +30,6 @@ projective transforms (including rotation) are supported. @@transform @@translate @@translations_to_projective_transforms -@@dense_image_warp -@@interpolate_spline -@@sparse_image_warp ## Image Segmentation `Ops` @@ -50,8 +47,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.image.python.ops.dense_image_warp import dense_image_warp - from tensorflow.contrib.image.python.ops.distort_image_ops import adjust_hsv_in_yiq from tensorflow.contrib.image.python.ops.distort_image_ops import random_hsv_in_yiq @@ -62,9 +57,7 @@ from tensorflow.contrib.image.python.ops.image_ops import rotate from tensorflow.contrib.image.python.ops.image_ops import transform from tensorflow.contrib.image.python.ops.image_ops import translate from tensorflow.contrib.image.python.ops.image_ops import translations_to_projective_transforms -from tensorflow.contrib.image.python.ops.interpolate_spline import interpolate_spline from tensorflow.contrib.image.python.ops.single_image_random_dot_stereograms import single_image_random_dot_stereograms -from tensorflow.contrib.image.python.ops.sparse_image_warp import sparse_image_warp from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py b/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py deleted file mode 100644 index 24d99ccaa6..0000000000 --- a/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py +++ /dev/null @@ -1,264 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dense_image_warp.""" - -import math -import numpy as np - -from tensorflow.contrib.image.python.ops import dense_image_warp - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes - -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradients -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import googletest - -from tensorflow.python.training import adam - - -class DenseImageWarpTest(test_util.TensorFlowTestCase): - - def setUp(self): - np.random.seed(0) - - def test_interpolate_small_grid_ij(self): - grid = constant_op.constant( - [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], shape=[1, 3, 3, 1]) - query_points = constant_op.constant( - [[0., 0.], [1., 0.], [2., 0.5], [1.5, 1.5]], shape=[1, 4, 2]) - expected_results = np.reshape(np.array([0., 3., 6.5, 6.]), [1, 4, 1]) - - interp = dense_image_warp._interpolate_bilinear(grid, query_points) - - with self.test_session() as sess: - predicted = sess.run(interp) - self.assertAllClose(expected_results, predicted) - - def test_interpolate_small_grid_xy(self): - grid = constant_op.constant( - [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], shape=[1, 3, 3, 1]) - query_points = constant_op.constant( - [[0., 0.], [0., 1.], [0.5, 2.0], [1.5, 1.5]], shape=[1, 4, 2]) - expected_results = np.reshape(np.array([0., 3., 6.5, 6.]), [1, 4, 1]) - - interp = dense_image_warp._interpolate_bilinear( - grid, query_points, indexing='xy') - - with self.test_session() as sess: - predicted = sess.run(interp) - self.assertAllClose(expected_results, predicted) - - def test_interpolate_small_grid_batched(self): - grid = constant_op.constant( - [[[0., 1.], [3., 4.]], [[5., 6.], [7., 8.]]], shape=[2, 2, 2, 1]) - query_points = constant_op.constant([[[0., 0.], [1., 0.], [0.5, 0.5]], - [[0.5, 0.], [1., 0.], [1., 1.]]]) - expected_results = np.reshape( - np.array([[0., 3., 2.], [6., 7., 8.]]), [2, 3, 1]) - - interp = dense_image_warp._interpolate_bilinear(grid, query_points) - - with self.test_session() as sess: - predicted = sess.run(interp) - self.assertAllClose(expected_results, predicted) - - def get_image_and_flow_placeholders(self, shape, image_type, flow_type): - batch_size, height, width, numchannels = shape - image_shape = [batch_size, height, width, numchannels] - flow_shape = [batch_size, height, width, 2] - - tf_type = { - 'float16': dtypes.half, - 'float32': dtypes.float32, - 'float64': dtypes.float64 - } - - image = array_ops.placeholder(dtype=tf_type[image_type], shape=image_shape) - - flows = array_ops.placeholder(dtype=tf_type[flow_type], shape=flow_shape) - return image, flows - - def get_random_image_and_flows(self, shape, image_type, flow_type): - batch_size, height, width, numchannels = shape - image_shape = [batch_size, height, width, numchannels] - image = np.random.normal(size=image_shape) - flow_shape = [batch_size, height, width, 2] - flows = np.random.normal(size=flow_shape) * 3 - return image.astype(image_type), flows.astype(flow_type) - - def assert_correct_interpolation_value(self, - image, - flows, - pred_interpolation, - batch_index, - y_index, - x_index, - low_precision=False): - """Assert that the tf interpolation matches hand-computed value.""" - - height = image.shape[1] - width = image.shape[2] - displacement = flows[batch_index, y_index, x_index, :] - float_y = y_index - displacement[0] - float_x = x_index - displacement[1] - floor_y = max(min(height - 2, math.floor(float_y)), 0) - floor_x = max(min(width - 2, math.floor(float_x)), 0) - ceil_y = floor_y + 1 - ceil_x = floor_x + 1 - - alpha_y = min(max(0.0, float_y - floor_y), 1.0) - alpha_x = min(max(0.0, float_x - floor_x), 1.0) - - floor_y = int(floor_y) - floor_x = int(floor_x) - ceil_y = int(ceil_y) - ceil_x = int(ceil_x) - - top_left = image[batch_index, floor_y, floor_x, :] - top_right = image[batch_index, floor_y, ceil_x, :] - bottom_left = image[batch_index, ceil_y, floor_x, :] - bottom_right = image[batch_index, ceil_y, ceil_x, :] - - interp_top = alpha_x * (top_right - top_left) + top_left - interp_bottom = alpha_x * (bottom_right - bottom_left) + bottom_left - interp = alpha_y * (interp_bottom - interp_top) + interp_top - atol = 1e-6 - rtol = 1e-6 - if low_precision: - atol = 1e-2 - rtol = 1e-3 - self.assertAllClose( - interp, - pred_interpolation[batch_index, y_index, x_index, :], - atol=atol, - rtol=rtol) - - def check_zero_flow_correctness(self, shape, image_type, flow_type): - """Assert using zero flows doesn't change the input image.""" - - image, flows = self.get_image_and_flow_placeholders(shape, image_type, - flow_type) - interp = dense_image_warp.dense_image_warp(image, flows) - - with self.test_session() as sess: - rand_image, rand_flows = self.get_random_image_and_flows( - shape, image_type, flow_type) - rand_flows *= 0 - - predicted_interpolation = sess.run( - interp, feed_dict={ - image: rand_image, - flows: rand_flows - }) - self.assertAllClose(rand_image, predicted_interpolation) - - def test_zero_flows(self): - """Apply check_zero_flow_correctness() for a few sizes and types.""" - - shapes_to_try = [[3, 4, 5, 6], [1, 2, 2, 1]] - for shape in shapes_to_try: - self.check_zero_flow_correctness( - shape, image_type='float32', flow_type='float32') - - def check_interpolation_correctness(self, - shape, - image_type, - flow_type, - num_probes=5): - """Interpolate, and then assert correctness for a few query locations.""" - - image, flows = self.get_image_and_flow_placeholders(shape, image_type, - flow_type) - interp = dense_image_warp.dense_image_warp(image, flows) - low_precision = image_type == 'float16' or flow_type == 'float16' - with self.test_session() as sess: - rand_image, rand_flows = self.get_random_image_and_flows( - shape, image_type, flow_type) - - pred_interpolation = sess.run( - interp, feed_dict={ - image: rand_image, - flows: rand_flows - }) - - for _ in range(num_probes): - batch_index = np.random.randint(0, shape[0]) - y_index = np.random.randint(0, shape[1]) - x_index = np.random.randint(0, shape[2]) - - self.assert_correct_interpolation_value( - rand_image, - rand_flows, - pred_interpolation, - batch_index, - y_index, - x_index, - low_precision=low_precision) - - def test_interpolation(self): - """Apply check_interpolation_correctness() for a few sizes and types.""" - - shapes_to_try = [[3, 4, 5, 6], [1, 5, 5, 3], [1, 2, 2, 1]] - for im_type in ['float32', 'float64', 'float16']: - for flow_type in ['float32', 'float64', 'float16']: - for shape in shapes_to_try: - self.check_interpolation_correctness(shape, im_type, flow_type) - - def test_gradients_exist(self): - """Check that backprop can run. - - The correctness of the gradients is assumed, since the forward propagation - is tested to be correct and we only use built-in tf ops. - However, we perform a simple test to make sure that backprop can actually - run. We treat the flows as a tf.Variable and optimize them to minimize - the difference between the interpolated image and the input image. - """ - - batch_size, height, width, numchannels = [4, 5, 6, 7] - image_shape = [batch_size, height, width, numchannels] - image = random_ops.random_normal(image_shape) - flow_shape = [batch_size, height, width, 2] - init_flows = np.float32(np.random.normal(size=flow_shape) * 0.25) - flows = variables.Variable(init_flows) - - interp = dense_image_warp.dense_image_warp(image, flows) - loss = math_ops.reduce_mean(math_ops.square(interp - image)) - - optimizer = adam.AdamOptimizer(1.0) - grad = gradients.gradients(loss, [flows]) - opt_func = optimizer.apply_gradients(zip(grad, [flows])) - init_op = variables.global_variables_initializer() - - with self.test_session() as sess: - sess.run(init_op) - for _ in range(10): - sess.run(opt_func) - - def test_size_exception(self): - """Make sure it throws an exception for images that are too small.""" - - shape = [1, 2, 1, 1] - msg = 'Should have raised an exception for invalid image size' - with self.assertRaises(ValueError, msg=msg): - self.check_interpolation_correctness(shape, 'float32', 'float32') - - -if __name__ == '__main__': - googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py b/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py deleted file mode 100644 index 1cba46e17e..0000000000 --- a/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for interpolate_spline.""" - -import numpy as np -from scipy import interpolate as sc_interpolate - -from tensorflow.contrib.image.python.ops import interpolate_spline - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util - -from tensorflow.python.ops import clip_ops -from tensorflow.python.ops import gradients -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import googletest - -from tensorflow.python.training import momentum - - -class _InterpolationProblem(object): - """Abstract class for interpolation problem descriptions.""" - - def get_problem(self, optimizable=False, extrapolate=True, dtype='float32'): - """Make data for an interpolation problem where all x vectors are n-d. - - Args: - optimizable: If True, then make train_points a tf.Variable. - extrapolate: If False, then clamp the query_points values to be within - the max and min of train_points. - dtype: The data type to use. - - Returns: - query_points, query_values, train_points, train_values: training and - test tensors for interpolation problem - """ - - # The values generated here depend on a seed of 0. - np.random.seed(0) - - batch_size = 1 - num_training_points = 10 - num_query_points = 4 - - init_points = np.random.uniform( - size=[batch_size, num_training_points, self.DATA_DIM]) - - init_points = init_points.astype(dtype) - train_points = ( - variables.Variable(init_points) - if optimizable else constant_op.constant(init_points)) - train_values = self.tf_function(train_points) - - query_points_np = np.random.uniform( - size=[batch_size, num_query_points, self.DATA_DIM]) - query_points_np = query_points_np.astype(dtype) - if not extrapolate: - query_points_np = np.clip(query_points_np, np.min(init_points), - np.max(init_points)) - - query_points = constant_op.constant(query_points_np) - query_values = self.np_function(query_points_np) - - return query_points, query_values, train_points, train_values - - -class _QuadraticPlusSinProblem1D(_InterpolationProblem): - """1D interpolation problem used for regression testing.""" - DATA_DIM = 1 - HARDCODED_QUERY_VALUES = { - (1.0, 0.0): [6.2647187603, -7.84362604077, -5.63690142322, 1.42928896387], - (1.0, - 0.01): [6.77688289946, -8.02163669853, -5.79491157027, 1.4063285693], - (2.0, - 0.0): [8.67110264937, -8.41281390883, -5.80190044693, 1.50155606059], - (2.0, - 0.01): [6.70797816797, -7.49709587663, -5.28965776238, 1.52284731741], - (3.0, - 0.0): [9.37691802935, -8.50390141515, -5.80786417426, 1.63467762122], - (3.0, - 0.01): [4.47106304758, -5.71266128361, -3.92529303296, 1.86755293857], - (4.0, - 0.0): [9.58172461111, -8.51432104771, -5.80967675388, 1.63361164256], - (4.0, 0.01): [ - -3.87902711352, -0.0253462273846, 1.79857618022, -0.769339675725 - ] - } - - def np_function(self, x): - """Takes np array, evaluates the test function, and returns np array.""" - return np.sum( - np.power((x - 0.5), 3) - 0.25 * x + 10 * np.sin(x * 10), - axis=2, - keepdims=True) - - def tf_function(self, x): - """Takes tf tensor, evaluates the test function, and returns tf tensor.""" - return math_ops.reduce_mean( - math_ops.pow((x - 0.5), 3) - 0.25 * x + 10 * math_ops.sin(x * 10), - 2, - keepdims=True) - - -class _QuadraticPlusSinProblemND(_InterpolationProblem): - """3D interpolation problem used for regression testing.""" - - DATA_DIM = 3 - HARDCODED_QUERY_VALUES = { - (1.0, 0.0): [1.06609663962, 1.28894849357, 1.10882405595, 1.63966936885], - (1.0, 0.01): [1.03123780748, 1.2952930985, 1.10366822954, 1.65265118569], - (2.0, 0.0): [0.627787735064, 1.43802857251, 1.00194632358, 1.91667538215], - (2.0, 0.01): [0.730159985046, 1.41702471595, 1.0065827217, 1.85758519312], - (3.0, 0.0): [0.350460417862, 1.67223539464, 1.00475331246, 2.31580322491], - (3.0, - 0.01): [0.624557250556, 1.63138876667, 0.976588193162, 2.12511237866], - (4.0, - 0.0): [0.898129669986, 1.24434133638, -0.938056116931, 1.59910338833], - (4.0, - 0.01): [0.0930360338179, -3.38791305538, -1.00969032567, 0.745535080382], - } - - def np_function(self, x): - """Takes np array, evaluates the test function, and returns np array.""" - return np.sum( - np.square(x - 0.5) + 0.25 * x + 1 * np.sin(x * 15), - axis=2, - keepdims=True) - - def tf_function(self, x): - """Takes tf tensor, evaluates the test function, and returns tf tensor.""" - return math_ops.reduce_sum( - math_ops.square(x - 0.5) + 0.25 * x + 1 * math_ops.sin(x * 15), - 2, - keepdims=True) - - -class InterpolateSplineTest(test_util.TensorFlowTestCase): - - def test_1d_linear_interpolation(self): - """For 1d linear interpolation, we can compare directly to scipy.""" - - tp = _QuadraticPlusSinProblem1D() - (query_points, _, train_points, train_values) = tp.get_problem( - extrapolate=False, dtype='float64') - interpolation_order = 1 - - with ops.name_scope('interpolator'): - interpolator = interpolate_spline.interpolate_spline( - train_points, train_values, query_points, interpolation_order) - with self.test_session() as sess: - fetches = [query_points, train_points, train_values, interpolator] - query_points_, train_points_, train_values_, interp_ = sess.run(fetches) - - # Just look at the first element of the minibatch. - # Also, trim the final singleton dimension. - interp_ = interp_[0, :, 0] - query_points_ = query_points_[0, :, 0] - train_points_ = train_points_[0, :, 0] - train_values_ = train_values_[0, :, 0] - - # Compute scipy interpolation. - scipy_interp_function = sc_interpolate.interp1d( - train_points_, train_values_, kind='linear') - - scipy_interpolation = scipy_interp_function(query_points_) - scipy_interpolation_on_train = scipy_interp_function(train_points_) - - # Even with float64 precision, the interpolants disagree with scipy a - # bit due to the fact that we add the EPSILON to prevent sqrt(0), etc. - tol = 1e-3 - - self.assertAllClose( - train_values_, scipy_interpolation_on_train, atol=tol, rtol=tol) - self.assertAllClose(interp_, scipy_interpolation, atol=tol, rtol=tol) - - def test_1d_interpolation(self): - """Regression test for interpolation with 1-D points.""" - - tp = _QuadraticPlusSinProblem1D() - (query_points, _, train_points, - train_values) = tp.get_problem(dtype='float64') - - for order in (1, 2, 3): - for reg_weight in (0, 0.01): - interpolator = interpolate_spline.interpolate_spline( - train_points, train_values, query_points, order, reg_weight) - - target_interpolation = tp.HARDCODED_QUERY_VALUES[(order, reg_weight)] - target_interpolation = np.array(target_interpolation) - with self.test_session() as sess: - interp_val = sess.run(interpolator) - self.assertAllClose(interp_val[0, :, 0], target_interpolation) - - def test_nd_linear_interpolation(self): - """Regression test for interpolation with N-D points.""" - - tp = _QuadraticPlusSinProblemND() - (query_points, _, train_points, - train_values) = tp.get_problem(dtype='float64') - - for order in (1, 2, 3): - for reg_weight in (0, 0.01): - interpolator = interpolate_spline.interpolate_spline( - train_points, train_values, query_points, order, reg_weight) - - target_interpolation = tp.HARDCODED_QUERY_VALUES[(order, reg_weight)] - target_interpolation = np.array(target_interpolation) - with self.test_session() as sess: - interp_val = sess.run(interpolator) - self.assertAllClose(interp_val[0, :, 0], target_interpolation) - - def test_interpolation_gradient(self): - """Make sure that backprop can run. Correctness of gradients is assumed. - - Here, we create a use a small 'training' set and a more densely-sampled - set of query points, for which we know the true value in advance. The goal - is to choose x locations for the training data such that interpolating using - this training data yields the best reconstruction for the function - values at the query points. The training data locations are optimized - iteratively using gradient descent. - """ - tp = _QuadraticPlusSinProblemND() - (query_points, query_values, train_points, - train_values) = tp.get_problem(optimizable=True) - - regularization = 0.001 - for interpolation_order in (1, 2, 3, 4): - interpolator = interpolate_spline.interpolate_spline( - train_points, train_values, query_points, interpolation_order, - regularization) - - loss = math_ops.reduce_mean(math_ops.square(query_values - interpolator)) - - optimizer = momentum.MomentumOptimizer(0.001, 0.9) - grad = gradients.gradients(loss, [train_points]) - grad, _ = clip_ops.clip_by_global_norm(grad, 1.0) - opt_func = optimizer.apply_gradients(zip(grad, [train_points])) - init_op = variables.global_variables_initializer() - - with self.test_session() as sess: - sess.run(init_op) - for _ in range(100): - sess.run([loss, opt_func]) - - -if __name__ == '__main__': - googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py b/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py deleted file mode 100644 index 017969d230..0000000000 --- a/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for sparse_image_warp.""" - -import numpy as np - -from tensorflow.contrib.image.python.ops import sparse_image_warp - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util -from tensorflow.python.ops import clip_ops -from tensorflow.python.ops import gradients -from tensorflow.python.ops import image_ops -from tensorflow.python.ops import io_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import googletest -from tensorflow.python.platform import test - -from tensorflow.python.training import momentum - - -class SparseImageWarpTest(test_util.TensorFlowTestCase): - - def setUp(self): - np.random.seed(0) - - def testGetBoundaryLocations(self): - image_height = 11 - image_width = 11 - num_points_per_edge = 4 - locs = sparse_image_warp._get_boundary_locations(image_height, image_width, - num_points_per_edge) - num_points = locs.shape[0] - self.assertEqual(num_points, 4 + 4 * num_points_per_edge) - locs = [(locs[i, 0], locs[i, 1]) for i in range(num_points)] - for i in (0, image_height - 1): - for j in (0, image_width - 1): - self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) - - for i in (2, 4, 6, 8): - for j in (0, image_width - 1): - self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) - - for i in (0, image_height - 1): - for j in (2, 4, 6, 8): - self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) - - def testGetGridLocations(self): - image_height = 5 - image_width = 3 - grid = sparse_image_warp._get_grid_locations(image_height, image_width) - for i in range(image_height): - for j in range(image_width): - self.assertEqual(grid[i, j, 0], i) - self.assertEqual(grid[i, j, 1], j) - - def testZeroShift(self): - """Run assertZeroShift for various hyperparameters.""" - for order in (1, 2): - for regularization in (0, 0.01): - for num_boundary_points in (0, 1): - self.assertZeroShift(order, regularization, num_boundary_points) - - def assertZeroShift(self, order, regularization, num_boundary_points): - """Check that warping with zero displacements doesn't change the image.""" - batch_size = 1 - image_height = 4 - image_width = 4 - channels = 3 - - image = np.random.uniform( - size=[batch_size, image_height, image_width, channels]) - - input_image_op = constant_op.constant(np.float32(image)) - - control_point_locations = [[1., 1.], [2., 2.], [2., 1.]] - control_point_locations = constant_op.constant( - np.float32(np.expand_dims(control_point_locations, 0))) - - control_point_displacements = np.zeros( - control_point_locations.shape.as_list()) - control_point_displacements = constant_op.constant( - np.float32(control_point_displacements)) - - (warped_image_op, flow_field) = sparse_image_warp.sparse_image_warp( - input_image_op, - control_point_locations, - control_point_locations + control_point_displacements, - interpolation_order=order, - regularization_weight=regularization, - num_boundary_points=num_boundary_points) - - with self.test_session() as sess: - warped_image, input_image, _ = sess.run( - [warped_image_op, input_image_op, flow_field]) - - self.assertAllClose(warped_image, input_image) - - def testMoveSinglePixel(self): - """Run assertMoveSinglePixel for various hyperparameters and data types.""" - for order in (1, 2): - for num_boundary_points in (1, 2): - for type_to_use in (dtypes.float32, dtypes.float64): - self.assertMoveSinglePixel(order, num_boundary_points, type_to_use) - - def assertMoveSinglePixel(self, order, num_boundary_points, type_to_use): - """Move a single block in a small grid using warping.""" - batch_size = 1 - image_height = 7 - image_width = 7 - channels = 3 - - image = np.zeros([batch_size, image_height, image_width, channels]) - image[:, 3, 3, :] = 1.0 - input_image_op = constant_op.constant(image, dtype=type_to_use) - - # Place a control point at the one white pixel. - control_point_locations = [[3., 3.]] - control_point_locations = constant_op.constant( - np.float32(np.expand_dims(control_point_locations, 0)), - dtype=type_to_use) - # Shift it one pixel to the right. - control_point_displacements = [[0., 1.0]] - control_point_displacements = constant_op.constant( - np.float32(np.expand_dims(control_point_displacements, 0)), - dtype=type_to_use) - - (warped_image_op, flow_field) = sparse_image_warp.sparse_image_warp( - input_image_op, - control_point_locations, - control_point_locations + control_point_displacements, - interpolation_order=order, - num_boundary_points=num_boundary_points) - - with self.test_session() as sess: - warped_image, input_image, flow = sess.run( - [warped_image_op, input_image_op, flow_field]) - # Check that it moved the pixel correctly. - self.assertAllClose( - warped_image[0, 4, 5, :], - input_image[0, 4, 4, :], - atol=1e-5, - rtol=1e-5) - - # Test that there is no flow at the corners. - for i in (0, image_height - 1): - for j in (0, image_width - 1): - self.assertAllClose( - flow[0, i, j, :], np.zeros([2]), atol=1e-5, rtol=1e-5) - - def load_image(self, image_file, sess): - image_op = image_ops.decode_png( - io_ops.read_file(image_file), dtype=dtypes.uint8, channels=4)[:, :, 0:3] - return sess.run(image_op) - - def testSmileyFace(self): - """Check warping accuracy by comparing to hardcoded warped images.""" - - test_data_dir = test.test_src_dir_path('contrib/image/python/' - 'kernel_tests/test_data/') - input_file = test_data_dir + 'Yellow_Smiley_Face.png' - with self.test_session() as sess: - input_image = self.load_image(input_file, sess) - control_points = np.asarray([[64, 59], [180 - 64, 59], [39, 111], - [180 - 39, 111], [90, 143], [58, 134], - [180 - 58, 134]]) # pyformat: disable - control_point_displacements = np.asarray( - [[-10.5, 10.5], [10.5, 10.5], [0, 0], [0, 0], [0, -10], [-20, 10.25], - [10, 10.75]]) - control_points_op = constant_op.constant( - np.expand_dims(np.float32(control_points[:, [1, 0]]), 0)) - control_point_displacements_op = constant_op.constant( - np.expand_dims(np.float32(control_point_displacements[:, [1, 0]]), 0)) - float_image = np.expand_dims(np.float32(input_image) / 255, 0) - input_image_op = constant_op.constant(float_image) - - for interpolation_order in (1, 2, 3): - for num_boundary_points in (0, 1, 4): - warp_op, _ = sparse_image_warp.sparse_image_warp( - input_image_op, - control_points_op, - control_points_op + control_point_displacements_op, - interpolation_order=interpolation_order, - num_boundary_points=num_boundary_points) - with self.test_session() as sess: - warped_image = sess.run(warp_op) - out_image = np.uint8(warped_image[0, :, :, :] * 255) - target_file = ( - test_data_dir + - 'Yellow_Smiley_Face_Warp-interp' + '-{}-clamp-{}.png'.format( - interpolation_order, num_boundary_points)) - - target_image = self.load_image(target_file, sess) - - # Check that the target_image and out_image difference is no - # bigger than 1 (on a scale of 0-255). Due to differences in - # floating point computation on different devices, the float - # output in warped_image may get rounded to a different int - # than that in the saved png file loaded into target_image. - self.assertAllClose(target_image, out_image, atol=1, rtol=1e-3) - - def testThatBackpropRuns(self): - """Run optimization to ensure that gradients can be computed.""" - - batch_size = 1 - image_height = 9 - image_width = 12 - image = variables.Variable( - np.float32( - np.random.uniform(size=[batch_size, image_height, image_width, 3]))) - control_point_locations = [[3., 3.]] - control_point_locations = constant_op.constant( - np.float32(np.expand_dims(control_point_locations, 0))) - control_point_displacements = [[0.25, -0.5]] - control_point_displacements = constant_op.constant( - np.float32(np.expand_dims(control_point_displacements, 0))) - warped_image, _ = sparse_image_warp.sparse_image_warp( - image, - control_point_locations, - control_point_locations + control_point_displacements, - num_boundary_points=3) - - loss = math_ops.reduce_mean(math_ops.abs(warped_image - image)) - optimizer = momentum.MomentumOptimizer(0.001, 0.9) - grad = gradients.gradients(loss, [image]) - grad, _ = clip_ops.clip_by_global_norm(grad, 1.0) - opt_func = optimizer.apply_gradients(zip(grad, [image])) - init_op = variables.global_variables_initializer() - - with self.test_session() as sess: - sess.run(init_op) - for _ in range(5): - sess.run([loss, opt_func]) - - -if __name__ == '__main__': - googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png deleted file mode 100644 index 7e303881e213a82e412d18de9d9d86f368726f06..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14060 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}9Bd2>47O+4j2IXg*pj^6T^Rm@;DWu&Co?cG zu$OrHy0YJ7yv!2(t%1$ISo z&l83--`$s=x@`68)xTz)^St-`NHe8~I9CILp2YXt%nukZ-GS zrPOjh(~VYon`(bDd|{e!Yq|e&MNZDSZY^zX-}w3YU&=^I3f)^7>=EJ7p|7OG6y(f& zC^hn%i+gp7=cK^6dk;QVC{*v|-Fo%vr0x6mE!?(k+ro9r)B?hzU$39Gn~liN&78FJTnFf4B8duwHDXIB-oJui0Bq)EGuELrB(vV2)vnWx{k z7IhVmj_%2VLQ0O)R2hsIIOY0z6c{4aUaQy^D9xV|x;-;vLO_IuqJ6wmUfzPV_xIXP z&5^11E!?<&(V|1~5wWrVo&Ed{M={K1xU%KM)ePoW3{k6R9654A^5sj;>1wK_Z|Bdt zwe|K4!@!MyfB#kr3QB2d)pXFB;Kg=Yh;_4FJmW0Z7*FNbPuK&awG-dRao*m>x@zCg zWAU@*zt>;6BICy7>CgYihl<)U2VBevUomwxbB03ln;T15Yk#Tu+dZA~?qm8KLFwu9 z-hC{6sdVu1Ld7SIuX)m}j4lVxKDT)BUa>OHNh%AMxE$iQXL%L2jz{L?$xMw^tGH@5 zmaqS!)(}7YPuJF@xoaHwXIgChKA(;6jp@HL#?{CAnjxXohQ|;FBtk zpA%kResN;0OXgcE-aWhK&DBl5x~gsQ{Tg8n9ldGS-rsIdXMB@$Y4r`RQX|H@xo2iP zu{JjP6>pRoH2=uQ$I6QyJ<8y?nyPAadXr0Iu1Mikk5#>|ju~VxeCnQT6*T$98U58d z-jiDH#Bk>CRTZuO+#6HyMdO}*g}{H7fLkfy+eBC1Wu7}l#4z*Djz<1IX>)_Wdp-+2 zs{S^`mg_*3x8o&|A6II&HZAYHQx$#SnRFz#ScF)b;Dayy&eKlTRbGsqrkZn;>E+Vt z6TaWhzY!YH(0=;#$5P?AcVgkytX3&6F6>}UK0fc$@0*+3zepLYtqHEM;9hrvee=X9 zZh^nb3EE|kYU-{m`|f){q%t?Ra;wg6{!Q|0b0;z~goYl7&R_WG@Ar-GPYBvn|1h}c zV4WkBlkd>pD}O)xr*bUsMo`;aU0WaHT=^ z;w{@=_T8Ot_UY5<*Vk1&rGK83$#^2w$nfE$`ijbru5ZqtKYr;>j1A+0du#O^ll*)`M2MRW%ItPP9`508_T}^`SzyPpQ;5ncj_>|zP|X~ zN%f89_VWGm77f>%{slz^wAtL-Y)jXUWlZW=(FsA0Co0`SAK> zu2;nx`{vqKKZ*YQ+<#WVF-gNfyZKjc{9@2}kQJ@oqtkAoB)wf*I+}Z5zggFTHRWlx z8)h9hd%W@|<5{z+3!x2DQ|F1UT+cqQM#$anzfAn!U)o{|d+)a91UvLgPv4aB?ak(Q z`#+s(JUQ2T^?|IltarN>em|2XV#+qV_n3*yqHSq!w>>(c-FIitiBsD$GU`LFDQywU z&`x;%oVomNYwYf_hYSDy{gq0-!I z%UTJgcpavo-YFJ9fwTgc@d~NLv_60Y- znO{HiY3mop8JUM#rmTLq^LZD4-tOF(s-z^3j^mFTk3VL8(0Kgu#WPbo5BNPhWyCNc zmyv1XBz@!g8o{?G)}+SzhS!N?*(97^G5O3-o+|lQ`+Mr7BftO5nq?RN`*GUoW-sIU z+EO+Wg3sr+|7+)ed}x=I$h7I(-`#$>R`SJ(oyEuX;x|1x6@A`HThq)Wgj*)O*c_-lzKC_^z3o+Uzy3;{efhM5 z9w%FpcJ)zxVbFb-Zonl@a9=6Y_?QX9)dG5rdj@d6>*enhc3_N-v=UmjpW_HE9WzyTy z(jRYnl=Q~*k5aV5T0N)q)TRI4ov)ww|JOZn!kLjaGGHBHJQzJTG@+t@9Wu{AuG&$eof?yiPPI9 zuU^&A(LKs{kTp|qtJ(tF=`lusma@yusK4IAd3W*NIDY0$S;{Bout^8}&&g=Gn!5J> z)hKQ&U;9dvfQ;|?W*`4-|5R!7<7Lq?hiu)pZ|nztWh__te1MH%KK~3W(Ys2@g=vnA zLfu^-B%?Z`xGyAji~0U~^RRvW4g*tDBc2aM4q@t6N}CxjE4;p3S?M*4X^os$ZRJek zjkizCs$KQ_!PXNQ2UE5`-1;PgF?{;Z1ncz@T?^P6dU#6gzJ~Ptcx%q1uW_JV!EU+h zRjGz|Rp0M!-uM4>eA+xQ;m4mpMH!xWV%jjf_tGB?pOie^7bcU|_3zsASL(0Ek&Dy* zD?faieelWFvKa@nV+@}xm1KPTk-^iG@!nmunst`9E?dd|x{H71 zD;|`;UcW0V<^6>K{VjqK@(Z{Ai`*(@vLPk%*^=g4Vccu9k~?~JTi4v|x9AsLzedcf z?ueLc>b$cBY+L4a#T zK{a-)ai!~ePk3!vmKe5p71O3k%btC_mGV^h_R7jXD^9s3?45Hsi*L8aqHcw2a?9_f zMX=xUoTW6M-{QH9%DY>wD^pW{EZQWb+9aG~WNLk(*I@nL4~HB**`@cMRW`l-DKRx~+A%%mhU?sOYR!a- ziVChxRQ|DP6_-1&|B+Nv?F)Z@AOEuZ_q)pYv$IS!PQDIh2z|@e7}7QHgkUEBNMq*O{?Ll%F9_O-)S2mBnK{jILFIq>9U$Njz1lfAq)>{_+zXVn{1 z=?iyvxqge;l=APFx4!HwC#B6?8^zMzcczM_SSfwZTh#G}O?K(mdo#~IW=>BLpQUAF z{W~yY?bN>ILpl@ z+Vh2lm~4LQ_{}w7oX^lvSYw&9!m@$y_vv`OxcaYG7gznR={mnuc?)}kKl6G!uk7b* zmaAPll(R*zZQ}HFd5w*08eU!NcjD2T`&{hAqT04r#{YlR+fvh#EE^22`}r+c#&)at ztf_GL|9{_u+gjSS3pBT{VKh6m>6`QGjeFHkWhie53|XeUQEG zV*1e|4FCU`pOY|M^-^9WBZ$BDr0U`SpIm_W4=Y({D_-K3=(a(F0io@cZ@u|JW~G=GK<|{@2XIYks9aJ@q15m*JOFcJ`lh3Axz|qHN7;_lZtednn74 zpJCEuL8a3k=T9DNymV<YTF#O=-)Fmf_F^vX zb#iH8*Y>~IW^QRK|9Afk*$HzU)_z%j?u5hgH#t2z8V6pz%9?U8)wH#}Z_l?|pTp)> z=e@6J`2XFxJ3cX3r~O3Si|>y2U2QC%oZGo8kTKhKDU1-)~AP2b>ECNm;G~?-|!!}rQ*n4 z>mA*8Eg^Ku>$CGWpDz2O^)0s=cm)A@wQ7_z`Vvnj%`B1XO`9?-WS>H z&;I%_$@`b;C9#I<^PAH3w>&zP)rJg=nniZdlu_voh7 zo3oZLdVBH>`xCn#n|Sv8&eK)%({T<7;k(mP$|JdD3 z+~P{cy1e05b`igJ#NKefzI@)S{mB;>IiCCyYrE8Y#lO7y`_g4!ul1L?)7jzoENaG4 zW$DgkrrDbckBYuuv}aFPg;DVGZwEJ}8Vg8EbG`k*ZK#KSPZeqJ}H@V%o8 zo%fteOuKhwEk{m`US%bt&zy)H4r7KX(@$Icud$D-yZLkT>bwY^K&iRkUo7^Jw-V8} zu)E;K%36|@^(FK3GheH{-r{eYXRqp2$gKIVEw`t2t6Y`#p(dV;h0E;2kBP^>IoNAn z*V4w89sj0JR&~dRLsM6*Pv5&$^mfA0{nqUJ_r)Ds6M6K;#>4q5Yn+)2{(fbw`Pgdx z?Qq83JgL3+7PA}VN}cQJJ<4&My-T5So?Y#i)57X*?kfeSf4!iSlB05!Ig7_w`sB;M z4V(EyL}U)%+4){UV1xKlvwXWI>vs|Sb~RxOOeb%yy;JBeX_mk3J6op%!wl`B zOD;P&BB1hHGyBdK`#%qr*TmN!xM=zB%Vois0!OJylav;+_lF;R`jp)`^ZUlqY}s|Y z&)pREy?WKbmDQkl+2XVQCo*?$4&0?QpTDEe?acc9j|wU+w^>@+Ef=&Z`SfICIlt5s z?qGK5=e#XSpU&h+-S!dB@ZooenVFP$?LYUMd$l6#_wPIUP?KpB&zSTrouq2VZP(omb6u*ROw`x?oi;IS-I=4`-Yzv@Kc{hD z@5`@0YooI_R(&muK7MZ1LdJ?O7Z;Qz?yfwiGxy%|N#)07U(BD?6+K60zVL#>%Y7s_ zZ}692@6WHj{;AV!zkFpWl{*qSfky*&$AmvW&Mv>>^R3t0@7+4QWX8cv_L%J3d%sSz z?tilPzg>X%lN)Xa-O}Ua_Sz&e9jg9%h{>Yj&{4AskB@)fRex^}qwN;)yC?n&UYN2l z{;2X@X_NZ+4@l^2_6AB+2JzKVpHnV@*L&OH5GL&{aNjNBHfqg!4^WiNViQSj!;BfA$b zdsqMY+2Jz3vmvw1wVN3_l~&I!kxKcTvA1&b?#J?Z_uibVd-E$r`M^}8lAGD<9QVuF z8k|38c$4YZm&^WpYcD9|YcL8W>}UC>ees4zj?B`tie~-Swr;I^bEELcxs@|tJ)b|7 zW37FDvctiX&5rZzvhwe~(z{??#`1o#=*^EO-G1)Ts6A)d;o)<{x2*5D(S^=V!QiyB zQg_<73Gcam^OntD-JIMMbEfInQ;z>$s38}1_y4)Mu8!ODHr(3QoBJ~1;j{#vm60K0 z`QJaEf48se_Ivg+vxCC5VKtlPYP|i-I6bE6-^Ps=mtPr1u>JaSnO#rP+Ds=%M6~k# z=fw|&e?LjcVVD21*!`~jlI1&gh*fdilERVJoTkTM8{r5{F^WnocrPaP{GRuR`f z-u*Adn4>;Pcw_;2Yu9v-QdMuuHcmTcY*{gRWnR2=JRVG(uP z`}V@VP2sQSZoPeTtE5{w_s{=gqN4AZW8?goeVsL%Zd9>|y>IvNWh~iwda*e>_l6he z79MBTiDSllJTGJ-1ozjUlUmv~*rs)V8Zzvn94X_}CEI z=Q&YUTd8foX2ZLzlM}5bpRl{3>z`kKGu7k8efjOPYPY?Nozci#-P_+CtYoy{AKQ{+ zsh*SF{DG6HXUwn-&LizaN&Um2iLs|(%2o=e77&o z{xOdVqeRk{xevF`n>lk!#+Mfz8E?fkjL&mlI`C-?+kK}Mf9Kw9oN=$-rA6Gi?TBQ& z;cO{GC9xWXv)7upD=EA0;{I}B;ghwYzN)(!q$5SDel~Fn%%7E2qkTuw%XRs-TSw5X|=U%&L*+0}V18E?PHTe$V)3-_gG?f?G? z+59>sGBa~-;j5Jw=VT{{EM&|+;Iu)$RLV?;W#!6uy|>B^vaWmUrJ??y>)YZpd*c*z zdG9io2u>Aenma?HGA}GF$NXyC3h@}}*0TwZm+pSL*mbR(&@%t`y)QD#XT8_kyG8wo zea^S}?c3j(SWPzRcR%*#=DU!R|mpn^Ogw(}K&Fh4o}D zE(kAT@Y&TS<(_(v!{b?h=teVV7QMOoVk=c!+N%prs@9%ezogSf?!l+i!3lh7IqPn3 zRtk@K$ai6Ly7}6Av)~QelK)h+e1Fr~?fR5QspL9i?5+<{CO6EIi+9}FDE&?OTWmQm z|E{gc&(5Tzq-c~K)_66`HAQaAwY4WcJhYorUsv$oLQ(O+{C^@>^m0z>-gbPyzPfxt zu=+<{v5VX9YU-}dmZ@cKF}Z&6Vj%yfZSAtsoogHNm-cWZ{oTgQ=JI&e>R-2arb$)W z9AM_>X)@fCcCDGs#6tVigXW2jjvsg5XZ-ui^8P;6BiUa)yqsiMnwNGr$k~?Vg!EkV z%FwraJ9lfX#S4u*wiQ$R+~3sQ$c>n=<#+vyg@GPEJzL7YO582-w&#%+-~jKU|Auc{OcH@c%PePdr?{zB>B;%XwBSJG)d;-KuP4cfEn{`LeJWAcs;m0y z&Fqa2{zt62C2AB~ool})s#Gm0xmP>v^0Lz7+wb?edwFU6n4f*r{j(0c{;bc}&aUgwT@K4e(B%J9m@jR#hR9NN2eY;}9iTMvm- zeEav1>@JL$KQBAam;nEUlRD<%J?K(@!}V1 zfyI)ls--JdeNr|wsYvn$yonxM&!oAujq*66n|~5xa%Gq9PGS%0{7B}l{GiN?7CyYAMkU*r!DUv zUn>_a<<6MtY5s5bvW0!!+^#Wm`n{gJx(RMk`rw-SCW^m0Oht+5#pUB~c5UtH`LlWM zLG_pGR=#}sf=Pjw=fbWDcR}+$Cr{2RxEHVV`_6*0yDvhy(ykroT3e81|KhRnW*Zyn zge|?1BIQZ8H{QO9oH*I_?=M9yJGotc8+LKbogiRor0CdWq1Eh_tll}9|J){?x)ign zYkTHzIy3Y0B8lDSnigzTy0&<;dii{UH{3W^HS|EhK(B~%BD!_MYES= zWJK)WVexd)Cmr^==Vm8t;5#RCW!J^&$?O@%2hOf(&^W7i#_hB~%{*c4qE9Q``57AC z?OV?GqNC%&IYmXTEw2`bZd$YUjpTafERki7rk~D*?J@cN^y7il>#lKEI5JYL@9mPE zaaL*HRQZORzfP+6x0h_-;&c?5y`0&~=#seGue=q3Gh4%>HJ2PT?K-ok)PcvsKgVU+ zWmkvK3s{es9p-S@Y%}rb2C0p28)`l=t<})+v9H_0_u|8c2?bvtX`U@AO5Qy84#%z? zsm*^F4*X$Y*jJvmc>BzVjF+kRc5!4m@C(gN6Ir{~C3CgiEuOfD2>Z$-b`8=}tKKhI z!fd5;Mta@XlU^!ei?&|5A)_An_WYz}dv|_(%sVY{lg=c;3#abn%w*pbDI#+4dE4@` zsCBicPhEca@S&JeNyZBG5ZQaDZ|_Jhbe+J!=R1qZ{I{9TtAd%9yLBdUBw3y1$S|I8 z@XXqOe`hnk+w$nfzqM^EdT)9C-g15Q^XbLOuG3DgR^HRQefv>S$HLQWv4$*@**-LG z(>7l4vecMcY1W;NYhhY_X$H-k?|n-t_u<=I`?IF1!NQ@s=1FL7Sfbe09EPeJ2R+NR zv;}8Pf0nW-OK?p?*0mOPN6TF~wzd`#kf|A&%7zQ&9)T3iX)+WjZTXh#&k~np-xF{+QqJJ0szKe3a z(C1^%IO)HuRV(+Do33=#ZN<|i)m!dVT*z5s7G-8DIyrUswZrd@G@XquPjbFd@-61p z@*b;yvppy2xV*S^tIf*P>2tK%k=zRpLbzVm{PMWV78RMkc$V-2#?NB>+j{PP%UsOi zUw`bRruKyER~^6avzEP3TFSdM>$yr%bL%>uNxM7TrrHZ$*f1f@HEfZQ^>fEF>ozNB zpMG;kC+hQpZA=yHXVQ`ue)}DGdfG+C*kIum;rgrgr8-AThFnTzVc!yblkpZu!GWl5 z_ETF?7*_ibE{tR&Gejha`jwkse+!fmu-VJJR56WUA?XZsDIp1nl|}zBF7w`gLQpV zizgqfuM0}E+t2)L*_DTf85(MHcJaNKB=rCM&!15fdK38y*rse`YWv$Bw|bS-hfk;7 z142U5v#pa~WG;BqbUyF&y3hCK-!Mzi(_x9LlU%VT=cV#Rua6gZ?sUl1tV{b`>hS#e z?{YZ{ffbB%_V`~oCmsLxfd8^?y;BF9SSu^u?M&+a9dN0+^G@1v^{#7gw&k*?G5olF zzk!vzpZ-nQ^aRj@`>2_1f{6IJ&+SM}4UKOG$( z9xUo!#PWN~tH$0%`ur(kF&8CN(k{-r&UDGD^rGt$^XNRi4T;W|>!!utNR_;?U*44A z(@h(>3yqh8>StP6Bs;LLuG87QZ2gzNkL6Vv8o2d)oJve$?wG%qKcV#{Y_X~S`ciEx zsTr1&b?pC^oR)3pcz4QcO0%%&j>F+$AwRlby*d+Zc4VsKa^>z%%k%fGJjN~`qGWQV zS=IYdPLA}MLYB*q4*oNUcyYUJnvUZ7jTQg@F6+M(rh59o$Hxw(rd$<2V-~Dl{ks0f zX7@iwweH>Ei}IMOmHP6B-MP1!LicE_X%C9`Oj+A$c;b;_NvdVT z&(GW&lih8FglcyD|0i+x`OW!JtBx+tef976$?AjUZy27P=jZb=*J@+fkjky~`O^nc zeSedrZ~ph|Uh9YM6Y&g+*5+97r)z=E>%_uayA)3g(uXM%81{VTt6Y1OU7n%*&O^T~si|yMwqM?b$1fB)kan-m z#o_vsr$4<_J9AHNs1j~vJFu^R?Rx%%dwY^>KJ)C{{PW4=jWbL%&pi4T7Pmad)OLng z|2FBVk-jsTZ2nn9{`-C3bIH2vUyHM)J}OlN3VJcr9#=W=z_xYjRNmcQUT0=K=ur%; z=UKxm=sbOHZ2!s0-5W|WP4~n-$gl4{xORKlvzl!eZ*OqPH2QH|Un_t4(ZzpVQs?FE z=B;=lc;GSPy^3asec97Ln&sW#E~{^Cb;#8%>pI2Jc<%i7{r?~L2kqrr_A`E0?f*4? zQ>QZOLJ~XbF6F7bQjBfqDKTm%?uiZOk;*Y&?TiNAottRdAInVgCQ`{_8 zvS7)UkYnHP|K3WVOza$`JV|~5z zM4jp?O@Yb$6H1-y1vkdJUVYA$DtTkCb;I&`s~i&-ww&^kY;6_#_J(tgMMgjS3@cNf z?f0ZQ7b!_zk6r$a=gb+MgZK8n>2|I-aOrhegIUuI-Q&zVYTZ+!7SFsM^Y6L+l0!ev z7#|RiXZZh1e5Q?D!UG2dL&1(NCHp4(e}WPQ2WH+)sFz^pUnIaZgE4tQV4&a+D{JeN zFAol~_=TEV18~<|t>pU58l44+9+*Z#&GrOk4r1oMC$)uSbk6P*VRWy?^PU)tJu5hD#L+|aw*%_ zT-DH;v>|7vyj(x~#fuI>lf;vcw<#$$)^q6Zk7;M+US*nDS8v`jC*hXX1)CL>uaz?V z<0dd3c<=t0+aq-EH>rZFp?^LdJ$j(ue$uIESJsW3(_`I~w#BYqc!Njv?fQDxkH+T} zxG#L)6rA$whDX}6@6$!KYn(gl-ITA?yytnb*TnO2b z`%4PlI1zN=Wmv{F&alHQzb##(jX&gSq~*6PEsCkW1S2S7%ksp=@S_# z*CVym_DH92|Kl5m<@a=MGj56y6+O_xd27oVpW0a-ehZ$u7P)#)f8^oEH_!BUjCQ`m z{hfJp=lJhmb1&=5R;CR%>!#^lmD-VfJVhrWKwZP%Jy|{Q&y%)@i8{>JW1OqDUab0C z`uT0or#T@kQ$O9WZD(Ro+g`MWvG-Dt&i!4YlP*qH&$-W+e1W;CXpQZJ3G?{wl+X7s zwmChG^+SLCmBVw{3;gr?&s@IjKej^}`Db{g%Hztrgra zXJjMCe{F5By|w^D*qRB22U(3y+|IXNP`WC1cf-oyuXBIM*G$-=EbYm%LbRe+$&%sE z_x&32o7t{kzpi}#ROJn+eHlkMa(?|t?)midLPfJH%La?{I`=&q8D8ptxS4+I%M_6Z zr}aMith-*W%s({gVcoO-o0Vyg^8 z!y+Y{?Pc76_!;Z=WkfbM>{-!{=KD+iv;TYLp6A@2Uq6x+w7b9TT=4Ys z#BA%bHD7Bjf99QC^tt^0!ha_O_izQR-mY_cs*|gz=#7dO0?*FOY-UmRKjSqeeM;Vv zFC{{2!`EM1cK!O~^aoX67~J}{l;^2;&#RZ4Vcy^K;N+{F&pqy^8Q*NZ;dNl~>A(NJ z-`HAfz0NI|nO8wTsOiJO=EAi1_p-hRD+j!F@L4|7(Kd}|rA^J@I}fL;Bs}%{6mC_j zwL-2+eS21RH(o#i}%+B(>@|^5(v^DQs@RmyB&IiHECfMy%-(2=yZLL`L zwGW@=>m+u3kNf<$qapRQ#m4Dg_vHUIypgpokj-&RKcIMlVabJnkka>ko7s4sEY>%O z?pV9zV)4XHn;K4dB)pBPjZ0Mga7ug0qT*Yb&wgj+Pg41`=iL6uy&}C5jqmM$am;-9@X4VIxtANh znq_EbhJ882ec-G)Gw-f)Wx-x)_B~&O3{pBi7w383+SqLK^D}dKk7CjH_w)a=i0N(F zX0Of57WSQb>1U0mjF3!sRm91}8HKpPqEBBXvKh0Oycq2A4&9J?F%dS^J zbgJOe&Trp;Jl6R2r#R->_r)h}1-?w(e$#Jp7gx^DFTpia=f8i)IeV>i<}H&4yWbzN zJuD@4;_w`1HU@t?&MQ|UZs$i|`1tsU9gl>=xxg>}Q|I|D`Zh)L+PCa)cddS|yQyg} z|LNA&i0Ihf1@BZ?L>DqXI3c*kwt!)?-X4LE4fpge6e(LpcV7AUF{NbwotOvLc60-GX@$j0%WK>9E6>E4 zl!TU=uJqaWfhi!4Z^_b$`yX#xQ2V>#xSaBYb#cDuT$jAa2&#|RbmSNRb-$1@silk$ z_I?l8m?bJUefsC)t2ZlOfAvS%E#a*7`-H0N>$Em_*)yn%I$X5co#R^lohdwq@z0mZ zm67YG{}I)XaA5nBmm9cy+q5r{buU#vId4rmdaO@%vE0|!>3@sY9LS!M9;O+xY?D#K zx15?ywbti0)|DjfyxB3e;G%29|M&ID3b!96ir-q+uxnxC9oF67<6b+5ySg(!pX03b zcjt4axz@AuFN>^Nr?!37{rdT5-pc%M=4Ud$oAUnIBx!@quS=wCO%5Jvy|Fim)sH%^VPZ3l zj6H9c2d`Mh_Tyi55qrs9t4km6oe=+QlFnB-Ma%N$FEh6bUDM_s(mj1(_4-9mTE#ac z+_%%&US5{8vu?JcGV}WX&+TX2-fh0eW<_)FF}KrYU*2v%F#o^LGd~e2uV3-}vR-Vu zZ|=|ka{Ap(VGplM{O1m>+Bfgcmy@TJ=dSwpM$wkF{_oT~S3kdw`<3k*8F?W7KUa@@ ze!jK3!R6Cs4%1Q<)Y%(Zxj!7!-~XWe+_@?G*BjXcCQfbr>ODOvBtk;|#Wf~VhpK0H ztJj;>=eo!47P>7ey!hW;bCVT|Ij2tD`MWatI1|6E%dd4eLW>W_#Pi2gX`1EUXy9b9 zh+Q&gwPu6c_jfPMYpv{R{}g^%tld#|pp&~vG*fe!P~7*UJC8r#Jo}C zewN(dnd|p;JrdO}$aZA>ztsNVDo>>?iyj{Sc;hht<7pQc3nr#nyPS3lys@H5!e>j^ zmO|y1(R*twcY0o)^y)JE4AbsAHXAShoTo0hVUNvroemx5*=7sBz1#iq%(mPe+B=K| zR(m$GH3Tl+@aF0C!ay&s8oz~R(PmzYpC8Qlo>#=2_BP6>srIX{^Mh$mzfXR) zL)raAQRviLj9NxQFD|v3{49O%m$1#Mp(4JwzB8NO?|D1FzHen{ zsKujqk)N)rtd+6(`FF*t6$g&*E_?W9W$?rU((e!Q7-+AR4!bIMP;9Htjk&u8=SoX8 z+|ECKj#s)ME>rWZz8tyiHnLTSnt@pWqlgE6oZy_V9^(iYkxj*tIe-96S~QmyrwBf z{6>OOtv9n@^T~DW%$qXu)O}~Zdesn}cW@i8bU~dXPfX-4ENA7A#h6 zohG`XGWCsvkG^&9wKOs3X^p8S z8zm(L_zWJX#Vo2d-9C3ilpD)BjrE#2?6!NqNp=1ISH5RQskYd`gBtF>3$HPmFf3TX zG5_fkh4SlqJvN_xN|Lp^-@4{8oLy&bdf`h|tn7vg!*6S2I-|?qnbcp6DqeL}F*vGA z_UF$ZKVGjlv@;g=G&KCMAwI>R)-1}ob<@se^8AJ?}}*trkIHH3dDTLsnff1DGv>D6cDQ`H zEY~ekvf#AtjlF5Sd1ucZyMN(`?X#x9Y&I!|fVjT7H*Z#?A7)D_dDqk8*nH(+&I|q1 zdRxMN7ny`kDHmG$cFk_lnbUQhkJ^5JaC-gzV?R?P+b-Ulz zpy`<_X7upb*r#%--7jR{rG1#SW7k}**I(|uGRlvglD6F^`@_XOCt`w`ZT^~c+5HvC zE`N7~mqCXw`?A#)u|_tABS#WS?(eI3wrsY+mp`8cbFa;hirl{WOtkpYo4S{!qO2lT zOxJgR`uY6h>33pc;=|hF&-8Ejy7Da3S`A&^a8>o>k8igx{(q#Cqx{pAQzto0_U_4( zeKoI6s`}HX58IvF-|Tf;yLIJR=DRBjCjkM;hMN|h z7PXi2a$JMhS!TwBr(Rd~eBvtVlR3Eh_xHz=41DZz*|k~u4%(lVE=h>{{%*(9N8K0S zcWDb;-nsd&b&qeVxya63>z-$!p$BH?FM4yBe`0V%$9{2<2f14&{S(hywU?>ZY2(Hd zUVg`Ho7oZ{G_k(0%@UDx5!!HFC~3;GB|H!KE~K3;w*Sil+RL#$oxe_7BLIl$5-@EgrbHdUD~53jUwBDp^*q>*IMi zdpi4*CksApzaKDlcK57LMIkcweNDHbYUWRGU(fi5V>&x$zW*zRTl@AfR55tGy~ew| zeDjOLX7#KWV{an^L HB{Ts5wR(V` diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png deleted file mode 100644 index 7fd9e4e6d69f3120428d1d778846d495cea1a989..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18537 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3Fz|G745^rtlpw*nn1P*v zfq~)we<{|*4C+IOdE|U2Z!?1cv!lKG#DB9{InVitat0sNyU+JyzGR)99iR84jLX+} zR6RFvE@p$ML1Bd&SOSW7s1_BNb3(wZKDMZ|?_#fR_sueh?bHYsBZ4nXQn9BOY>-&!< zp3!EJZ7TFTp0UhRzdJVE{JdD;(G`NBURp~T1^2MOzZoUzF3HB2z`*#U?ZkY}1NQ$P zFE&+7QqpYoei-$;wm7?Ho^ATQZTsHODc-NA_UQ48t%o-6*aP;v)TGYl{{G(S)0^Ad zoBR8n9UFrk8@-*Hf*l&>&YvIa(-Z2^F|)tFarSR};mh`J$Nozg_D<#PtoyU+|Aio) zO~?NKU-;<%zrVl#{{H{{{r`QnzrX)9-lFsK%gg(3Z`bR`|11ChZ~yoA|Ns8^`Tpzc z|NHBBODu#yi4m0Oj5?d=&yNk5;O)>59W%$g)Z<>i_L)e*AxQ)PY@(51Ov8bT-eP9U3tsI%0-@&?I;F_V~z|;Z9A_UR}{HEuGWF zFBvr`MQ}EnvI=MzZ|FL(Sf>6_!*Yf#EliJ-Jgxr!|NsB*@B97o|Lgz$`u_6r{@>sK zzrXwY{VCD5@bwb+_ND&#khp+ZmJ@1MNhkBAIa4MGOqd{W;hNZ`Yhn{7KAbw`iI{Nm z!>6eUiH_<WIkeI(qE?{~1&Hik7)wZVHo2Sew0}TY>YR>)Pgje}A*TWc>U4|NsAw zkMp?q8N9iF{=*Bw!?VqKz}Kmd z{%iOCzt9zQe%&LXox1~q-{*ZzQ-AMX5E6Q?^w_N`nc0aNx3|6ceP@QrX_1N|XMx-; z50ol<{{MTnVX~W4$GSzwwB%It^#mP4ZLTnV-!XBOO6Tp62R1j$mvvUAMcy#GCMwwX znAuAYoP?!*965OYyq>zchQ5AYqTOv3=RnI;m%Zv=R@`{|B1P zakKyYKmXw|`{BR8d3Ki@-`RP%;I$ZtV_$Dn^WU%E0+e9?|CeewqNsCb;S!Z)i?fAt>aDm`F-`a{x`eZtl8z* z9U@}7!<(a`vLWnJg1^j(T)sQ8(iMS?cYd^TI58Fli_8oA_dor|hsKBBa@)GaCHrKP zLE-zexJ^GkLo^i3;l`i9%1Fv`Sbx>lQ6|%5Y7I#j^9yzq18j zwGQ9i&C||jILFR+wVeUPisB!`rT}m(07H6tSh_;k($j=jSE&)h_nqfh3qkb&nF#(xyy*zI|ih=PxGZrBhSW z(-V@Kz8FoI%=B#=tIi|I3y<0u_ul--@5?3krR5Z-j#`45hoRN#8HN38#<8$=1!m6DK6NdJAvpaG$~(QLV8fm|4lIfHzMm++@X7uA-QvX#zWUp(b< z%WPTNaPY;}MJpX&KQ=HBm}v1J#^TrC*$TxY2y^12}c}jP9-?#x`NW@=k|v`3fr!(meh+&*-_4K z#pBNgwbbd+gNKRNFHI^ay!i2h!L`f6X{o6#OV}c;8g|WO+^Xf+RhrVWl%Zkr4q?W$ z&Zhk~4#r|j7dEV9%T5*!KXSPD(2Lm$rxbHSIipr7X%#-m>;`51H%-DSq^-LEw?IX7-JG&DHyTlvJvCPuNjJQ^h`!2wytbDFf zV8<`}dI`Ilgoyp~5?_b4tqzy8t5r%25Cx|fCH2A$8w@->K14@l9XS#)Yu2gHA2%8r z8X6cW^*F5yNYHF(Y-D_-@kW*9-Z_qcuO7%xaywJR6wc*vxkkt(dHas83`poEDH5HNv&AbrJ&BePfYF%zE+I})2z6lPfV7`!cb zSMTQLHiN_HWQR%9p$08(L;h|tm9H5LzN&Xh)^th!`|o~EJVR|kW=hNR(v;Ny|NkF1 zV6=9)#;0i+5Zk23#4aPl&zTXC zn%&)>4UCKo3>2=beF$Ol7vtH`@`-<%)Pq*WF0m^a4olc(SQJG#d%C%?g$TAt^*lM+ zAa-9uWnIXj2Cde2Y%1rK4*dVmv%#h3s*lP313Uj09@SIvJ+SnHI(J2B^Rn)lGadc= z9UU4RothF4vn^UB0d<2};f9Tdr%!xfXTSa9$BXQ&uFoGf7#bNV9%pg;<|utN!D(&4 zj0qC|6AI5aq$ot4OLoz!yO1n$HEqquvmMr~3fc({svlPz&tfY3{{R2^j#&*GmNW0* zC`x9U&aq6-rybO_AeT_mB13yc6~^ z$?+$G{wJ*D_5RM-9_Lg2KlyUg-m9IyALAe8IEuTxeX(e3?!&jiE{_&WICAR8&&dy8 z2De=cYjbXwu&X;F_Z1q{b1e4k^_?{Nt*q?Sn>Qm9GMVPtJ?NC($;xo@=KqOFe6N38 zwU00s&w-mfM~(`6da_^En4-sTc3i;6uwL?l^{yA4%`+4O*LX7a zE3o|idHBOiVdqfM!v~vr^6w{gRzaz}{8#l^Nqj_!<%keGO< z_s*e&maVD}b5i90HGeJoe>jM(DEYI!kE)Zv%B!9BH5-pHUYqu^o`*G~O<+qaAH&VW zj)DfBx&$|yYfBi|S*rI0WCu*1*u~X6UHtI(_dLJ98{1SidB{UN=;SYR=1kA3RZ4Yr zZk(LwgoI+woas2NxRI}8nvy5tfAN%lD`f$HI&xf}&;g)^+t)3Z-JWnEyuVM{m{Ir5|lUQ_gn|-}R-Jb-D0)|`lkXD|Pr_7>7Hg$E&7A{ni zl4AOC_jtpV;?@rjRMr{Vg{+O>uC&o?kZ}C-?7{U8`3q~i4)T4tqR%7Y%DRkWo*BDQ zLR#8}O-<9})%}fXejR~0I;qjqfQzfh#U-MpRWc*XDb45KVWr>pRbB}R4yVpe_{Y)m zmw#(T!*t0E)?@V{Z+TiIB6hMcuJmjv(`?|HDjxOAp)Y?;b8<%WDwg(m#fI+ggLmeD zS{&l~5_<7z5IdUG_~hjH?Cl?Sa0r{43WkNTRcz*)X7u19XX;`1j)}k4W!U{c-XI~J z>X6l#q-*dmxnr@af(e`4-V>X6ZY=r0d@Iy&$+W%~4B^%y;nht3P6tmlFc=!LRadw9 z&zHEjHyL6}Qer}afWU)ITS7cNHUtI>KY7x!gp)1emqVFMn##fIRrP`1;uVEW4lBI( z2ZTK0Tl}Ag=YReGf4_f!muG9M&&lyQbLPZTfmzl6l8xsxKJ)X5^h)lw`SAVy|M>m? z>i_?n-}}~{o&8CKAY0B)M(zWb6;cm`{hna;nTfHeh{w!~=lpzQs5=A&61Hy(^YYtv z^{Qw}a&nV8caPFT`J8P$JIhb8x?dp=uO7+d1Q|EEs%xViPTwe`frs7Q2gt!up98o4lGn%E4jhYd4l zI(l~>yf_h(oRThHnez4H#qAp>CM72~xT+NW7q~aiL9oU0qkPV}9>%RR8nvC*#awXu zBR?r=O30^APp2mzmwR)w`1zHU>f7`7{{8gy`u_U=k`fX(Zq96Nu~@RS($usyCwHl` z(iIEKnGda>7aZ5P=Agdm-oa$4Fw=+w-{1fDpZ{-X#>MT$&+GQq{Ct17z5YY3YEBN% zp+h~NKJ_eH*5m2vbL$q*hSr-cj0e9oo=H8BQTafqk%5noEjSq3tV_zt=jY(PI(f>I z2Hsxf^Q#S&dZ!yNERCG_YaNe7b3(_AW7{5yUspN0e}Db|xII5gPEPt9z3tA9^7nV| z@4w+Zr@PxVYS!@!0c(~V@dyrXm_567;zxa*Q~?>a|NT~pZKrcjXmx&MvikY|KPY+Z zt^1pPtVe%!c>Uh;_vg>g*Vt-ZWb*lgK}Lqbv**T@RYn|~f0b9p9k$xqrrB3o#rk@N z)f{ecd6#5UxhcfEdwwTpu2pa6#ECN-8yPiI4HoiEOZJq^cqx9{De1=2 z^Z!3AeFla;O-yP!Kg-BaX_?)>L(-BP)-cRpmVUrYsZXdl!<02^gHpG1@yUMq{r+<* zBEq-7esQtgh3)us{r&p!b=IY?o|@{+pe?5Tve%yuQKl~Fm&U@`~S<2N^t2QQbi zFMK4(>3HbCfddB$UR*G)s|zzU$c7f4)Zq`2GL(SADI1du#n{vxz(BR{pPJi;iY9HD!y8 z6e>w{)1A|;*?lcNG2uYa90LPThnM01|E6_+(o&A>QWeN8cqGqU@{m>P*Xa{K#J8C| z)IS!)wx(NJpy&Vp|M@qIa&K>wwy&E|dFSfu>-Kqfer%}xd|2IopOX`V+oP451uGK| z1RQ8kalP^6`}_a7H$HrQa#CJDep02(q5btd(c27ScDbBeY1`AIp}ahUXH8Rqt^nI6 z3rHw9=_dStctg5F^slS8p24wrm6lj;qhDWM{=T@FKkrV3YmtXS@izr3tJ zA^1kY5t)OFni!pJnfdn4cwoq+k9npk&?cr>A+!-%qNP<1Ug(?yc}) z3lo-*fVO9wObiUWYrjlsELp&yBKzm)RF?@Qe~(5_aM-EtZ?~uJ@74GBm!u1;eE9fy z{q61caeIILJJ@`_!kdFlCfA>B$)Pv$W}1IE`MT!W{H%R+T@pkOv@+;)tIsA#3P;^rG|I*{{DSu=ll@MD}O$vemb9UQ1oVn zm*5&Th~Z5p9+C;VzGu|bPig5d@#7NqQ7#H+UB}~Z_>bT1UwXoi|Ig3wZ|A?izs|RH zamK&Dzv~|#JI>B;XKc`s+#P!+PH2|(#BN@;B^}3Sosihjb!X4d-={=h%_*;cEygS? z49ZYGJ}sijOlxYRw$$m&O4`A+!wq6{lB3_ho}Ld=7}w`LxU>I6CTp37bd3Oy_<^WO zWl*}Ee}Dh~e}AnT74?%2?5_uni2izXv^;04%C#uA?5_tKqPem&8z((`cmMz2ABLrV z6aW61`S371Xi&|<;>5+ioAQm!XFl@BoMeS05l20(2?8t!es8J2=p}fAE%1wL(u_VA z)r*IJU0(kF|B8|+&;I}Ck+=I(_5YvxyG@N1E&<$|L<^!N=Gp%K{8G4d&*76NKm506 z<~~_Hb0Nd)9x3K+ElpcrbAzfVP##d{Qws_HXJoWUNAkD?OY(tfMiXvkgqo(gexBd> z@cw-JdWG-lfvh#ZzWlzsd%ncS_G1dm`V@@5E|B=~^z{AX{r_wJc1`3x$>ik3V_!F6 zzJ-KDff(lu=F}ZsVvx{k3fG=+vG7NNICD_vp@wM|6TVG7%=-WEj)I0~dT(0Ug@u`| zts|#unH@NB;=||X@rxEuV|4EpUE6YCvBb^U|L^br@13a5D#^h)(Z;EZJ3)-6LQASc z#|7m2|NoO3eRuzF&o@m-`O_VE^niNggwyE8I0S7=rb56^!^SB@a=WTMO+me(Kv(seT>mX$+{PTAG}JAUev$J#gR z)}2#Obd+7(vBt~b_6BIU>0E8>sZZ`(kSza9DJg_G*F^Sp#!Gv)w)#7F&iwkdC@1F% zXCNbEn55*n`2GJD9J_DNHfQ25dF6Ni6TO9hOy*0f@d%wdbI&|=H3Oq00ii`rmKm8% zjJGY=uRSzkV}sW9PV#o{vUi)7NUsU}C!W0FkNmL}me*Jsjdq+n@gv^fKO#PU3fI8{ z2M(M*y=d`a_T4L56RIECXEizW=O|6$^Llyc^yvlb&VBlfh zCs!JHE}Y`u5?KxLOH!k`>&2+K9xv>fw{$YDO!{~D&4D$(8@5?qkjOY{%FNCloUq{2 z;dc4>y^CH538tmFm6fR}E7#@arCsrGOaePPDW z=g-vAQ&l|^1w1?pGqOH?`e;~OV6f=eeftpaLQ&}*E*FkS7O2eq-_+D2B-C^4*cuTt z>z*D7CudC_o)U)jb{}|FCxb?c82ZhKn};zeE?~b)T5SE1O0CrY8JYzwB5_%8{I$ zGd=lkjOGdH=^b5399&ApC+37S#LP($)8*mefmoR26!olm=H~y8G}JFh>|psAVpwoY zzT)$?ZPzr+!g_l?HFtJ8dUtPM;W2aWTtQ{o2Tl=EygM#)%}Zk2x$Ecg10U|6>atX8 zo7=M@DLtKoFYU#Pgal>dW&wtbi~VdHwn0bKlbraBmQ=CqU6GLThyC@tN9#>LoJd;6 z|3u@Dw$h=ns8!J+OAa4&bo!N$o?cWUa`sHflxb5F}kHD9E_OqT`n_W$(aYL+j4d}(iW%X_`Y4->a-7WVX= z`u(F|^V=zZ*t?C-%s6&`&jHnrt*wpI>;hasRm;am^DT?(bYp-0zO4Sn)XGReBqbv! z#ls`v!NY?$&6dbKxYf~+&J1n1{`sgpud$KQ-O9m_uV=$8j$FG|#>Lv*k`fWZ>}kIm zjQ;)qU;pyb?##=1I|E(-M*oDpgexv_G55JNsMkL5 z(yssi=jZc6==S_~`fZb9;B!{yx1r+`jJ54^T=y zb7sY|V~<~Iipl6?W@>i4&Ei}0l3{78L&>j(1^aK9JThcI_4n)R|7T~{o9F!4oqBrz z>+6#HY8uZS;^r{W&@jlzHa>FPc*cx?Vh?6}1vN*7&Co*p&qw81jH9TXMDNoR8D?h4Oh{7W{432{-t+EEOKh3X(0TF30Ut3I@!ttX z1xtC>ys4K6Vv&}~sGOc4^H|gPg9`T_H(s%Z{qMUpp+5Qf|G$3L$;Ley%&Yh^pFgfw z`lS}}=Fl=B<4A?#iQe3g1P@BF*Sk7z@NSxUYeIir!hvNu1&mFcOp=hy(DdHs*cum! z0B@!k?WzwuS!Q!wQ)21u4YN^LF43gP94vWIOygR_@x}8B>{+h{H;X^;*M?|JYHVfw z*xAgumE&iBDA~k~DBBaJ6C9?B$6OJkOB);Qc$kvW7Vv zAC`cI#~J?rKNMADV3gp`b;R6ggVtdm;T=tkErYkjLdqlELX2^()kP z?oDB2JR%}-LN*HG;H1X;N7~HTx*~G~%+&p(`gy?Gu*XEg#8?@xPFT{?qPRnIj+h zmFIof?)1ULw?kgu*{uEqgVJs-*gS#bSM_Nd^>b3$c3s`!x~}lsl7s1$Icu%xGm#YjSw}NK5yFI^Q1- zf29P4UExMg7@t)+rf8;^fwP!XcJTuL)rZoHBqbjnUKVkC)ee^pYOFqAXDnr9&PqvG z#?!R)L_N>c9j|8nKf=3>@8^`pty>Q4su5s<+9|Er>S27S%A)7PY2OLAlp>lrbsv2c zk>i*OSv_EHY^;1~#O|AUkBp*A9yRhzxUU+;KAvm-S_wG{dwlhj)2mIsvK-R z6^DddfBe?`arvNvmhQVdi8pplb#!PrczJSGy5R$+9YUa0LI3|Ztzk)auv*j3n9a}X zDW>b2P?o_7nvVYQ;bD33<o#mQ&dA*O>4Sly=I{Pha=N|-3P)8pDI82`tXB}- z{6Ny9;EjZHxOnpE`Tze< zpFDZu)F}m}Lq`sM;&i-S0A--qqs$!y7 z|F^CE_4?f0`PbL|oiZt@tni`B)z+1QuO(mAPdM`b@SK+Ve*0^G_nw$1rPDBRqGRC1 zgC{y3{w!|0y$rL6llJ1uKw5e z&(H6#4&Q%%_VJAe>8bsCmKae=3d9r62`(8_u%#Eg#7a#p79<&(#aFP&fw2q@4#OT z%v+RL`k_PKNrx;ZJhl(o^rJf@VZs#mi&y?Ps&xPV|NrgX-&^gg==ly?=O((8$&D-3IC9sXQ;Nm;PIpBO& z`hS0ZzMpBl-^h5U!IorGuI&1j#wjgEM<%h#eE4jBcWtp6<^s(S9o$M5g|>&O3_YyCa@VAJ|`zP~je zAHDCF4_LZp4Nq2<&;Gi~2L1!+oy}xI8 zXWv-{%f`Ou=EnB+hi9eR-rkms+n2(@o$TS4_Tb5pA{VQV+-E;tw}$4R$T!TMEdT#I zm!C8;FgTDc(~)UlHqWT};pgY|*Vp~Mx98{aHIec6_qE2&IM`9u^nboR&#PBH@%#Th zewKD1O@ukiD&c_G>mL^l7-eL5s=qnhw68N@VF5MRZ0gPw{4j8ilsvr1l}AvJhmVhE z)hZq)rWaM5$82XMndriM#ZD$36Wu)iD_@tIkuJaZnm*5mmaosx|1W?4Z*A1q+QV({ zpP&2t@596W9UP0NhS>8}@#!;6Iv*b4WD5^xla^**#Jt7O5k?q8Cn>j!a*)Y8%~>>9Vc#ko!2 zUf7v`n*F5Er+X9b%#__{qr@>M`iW{(>ipD70omI9k-LRt3*TJb>NRQhwYb}PF51HBE%I$1!x_>jd34M&D)~=Xr~D z5_v9(1gfmw8$bWQg|5mWV zkN5xg@-S#^-^|Xw`Sb~ckDrb8qP{%7vGKl>Q_uT*ieKvfo)Qo^aOlJj`Ni_xGk{baYjX^p>gd|)=-PVl#tg>@iKjvGXB-V;I3exKJ9F1^H77{4&hN}HsBsnK znYQ)rfdd+B`UhY1N~xa^5H!*0x?!sMFE??AyjjHkm-@@+d}x_o{H|J?|LwNfuTQ@g z-;>OKVO#dBbIUK6*!jB}nx9RQ2ohMaZ@oOJ|e!g`>pz9T=Ok<-S=MAm1@Xy z`r5Hyo)R@IP5CE(r1vVPb_$fpHY^Ibe&d@kgGujU#>+=aJ%t|4(0N?6aMg+87KiJc z&NlqF|2_OV)iMtp!gppFay2^$xbAl|%Vgo+C~)M`A?-JsHm$wWuejMXwDvxD*L60E zBdjyIt3$=BsKLkfu8rNEhxuB&zwawEtW-~)cYUAwQZdC9xzl&mg<=ywIesOZo386OgJd9WQoBMlZmOaQJv344vJg+tM5=yJyxj9>>!}f z*Pq?Sc%Ak61DQ;&r4ycKNR`O8F-9%cd1xUmlV1DxFH1EOc-8AHTdw8+ftKUT9R(aW zh>IMVuN)Ck5!auzoLwNgQq13_^T=J3PEVdmcG0}iXQur&JhS*s@s1tEFD!~~=cQKW z&RYB4Q!}h>28vAAF}@VCJ=gd8%a`YtopdUQy+6rikMZ$cN`0=^110MA z_Zt^_SpTb^;mD#H7`pzc&EfBkC9`rP#F`k^btfJ$aPZP#J?|`1Yw`Y(df6TD;?yUx zyE$6|1m@hWZTu+Q!{fqZVjys2-d4ViY}^|d3}loN&v`CIkD}1ut34ASvr$f8M!JJ)|lUC+I()aws+?3tDg&w#4F8E>s69soBq7U zm32?BR-Ge@;FrJmlPe{XWaQxpnJ=GM@!w zk&Ln7L^I$1IWCY=w<+crv%b~?F=HcxKhGZ}UAUGuWlGA!2MJG}rapO`oRHMiUy!g( zCQxiL7q{`9-QUAqS{~js7TNB=^YGcf<7du%c*?EJ&%e&?SO?qwI>SA6XFt3WJ-j+R z05o|U7|52F2O15&v2pJ1e#xg5Ts%p~j{p0g4z*cMf<3eSkh`nmQVwnH<_3nv3lx}{ znO&wa>T#Ph-ZV)$_WJt&B}*me+5Od5Zho=S*uY`>?f-vHe)#>p-ptHq?v06jar@IO z3Yi2e-@M_`(&G95KiOZB$HMHQvB3g$rWmdbUXVhx>D;M9(vlw*`v3T+`h$c0m4Mcv zf{xo0j@JMGcmC|z4?jNIGctmf+|8IN`QZEIn%~C`9Q*eFv^ZN?8E9fXBJzK-aq*I7 z^L&XuxzrzDxdjt#Y(TSQD^@+a(zLURdF8|gr6&7Y1#Qrf-T(iGV%{{{^tbM8`XK*i zO`*wAjZ+bk|MypYoj!f~!*6f@uUW%$!d$$t&@g8Au>)t%e)#nCzkU6L_lsp%13;@q z54ZF5%Ng3#^$7g=+1O|K|DsI=lYWf9YtPFMm{X%^IHl^~N=SPks2NdsWPpPyN7=BOhK0 zAMTUoxxY_w+UJ5IqZ6l%KYVzwC!X7L`BBIIddaxGNeL+_760Zuyspot zrp9*a{Vf^42any`Hm6IzyPI6`TTIYMMuw-R=E%FagI9kXh-!Y|>5Qo0cOHBFzkbRl zMom+P)1psO8Y9mfxVdJ{|9g9WoqSR@ zle|4>Np{7Tj)z||+veK~KKlRPSVF>J!=_1M0WM6udf@wxpVTvGv1P}IdkVWwzeL;KVQ)jw65&o z`*^k+MW(ly($dzx{g?9k8JAyI<$1sE@(Q2J3M_{m1SY6t)TSu@f7Tx4BYgUhMbPyS z(V$JaYhG)9c(zB2>(nG2<@27wHS;b0Z8UW7`ZaIc($_oO@@)API~Mit*vXp~{qgy% zndxQVB)(?uMy_TDo;G_Mc6kRYtsRz~oBbbT-+!Rck=Hc2`9alo*?C^=UeT+AG|q}R zG1>p&*`NzS|Ab? zeBFfi+Pi?CcAiboW3N37fBf5~u*CYT`r4P(@4yaTGRK;)$w8paeiA3I+X@eh#v{fR zJb5P+-@I92zG35*s1-U(9GUYHH{axNOn!9#+~OJERVGht-KGEaaq#g-*GUtWbxcSt zx!aSXTYKJQp{bIKpzq^PMKU+@HcnG;ICArZ|C;Xj-E#Lrzgs7Ea7^dkEuSoBpfv-M1T| zd$zxcf7bcF@sVWD!-^-YptixB+55Sg5=4&Hch&t=cj2%zkZze4w_y2;n^g;zJ!31( zC_3fKImKm*Lq}WFD{bjrmuJ2!*eP>v=bUT}8G|*JY%iZ=$FA==?z8;LRw-NdQ%s~P`WpzMjELQZ^^e-S_6pBG_C!=$?J3{8{{p*Z&A|~fXO=DZAqSo| z`R+##BCjVs?BdhoitYL@A2{LBwUx~m4mz*3SfN{WXl3)G?M-btCnxYvGFQ}$`x^K9 z)AF)UF4J0bm08w#%h=D>X36ZjR$}$#_m$Tce1R^TJ5whmr+&7y7)m&=Y`He&bc$r8+uCCZ4M8o6hZQUTH5_VX+SdENzUEW) z?61`?Ur+j!8n*XSh0prqImb9bQMhK7A$L=NNUOb^63e#@2UHcA7B19Osi@x_|3G5O zYHw#3g^ZUI_9!eo*d`aKcJabrX7P8HHFubgSA~9GY4tYY(y^2JI-*S%-m54Al9KTo)ncp&A#XO^~`jen14 z-p#pi>*T{-W%_3h&FFkk;o{Yz;A<)KqB>0b(Tvwq|H>&!xNnPI#kEw!d-+*?@ik9t zp4HVqzi*-Dd8f$k*mwS?yTfXwXI_f(Q{QxE?n`EnALhi^a<>@p)H*w}7rshzEV%4> z!Tw!;K}msD-l82GO&tq59uypIRhoC=ZmAY;Yt6?=)18h!Kbt;b$A->y4o@K$!A)=C zjy^tM(#TPJUN+1GxGQ&$r5xZeuLv zO%4cENc!~k>?SX%!?QQF1f3|}q1SR?lLE(uZ5O0|_f@t{k#Ohxe^W{GVWmfK#^24o z+a(L8#RVLf+vi;zSkV0q+-FEScDIZn%PMk6;E^eEwe(|x;p}*hxr55~8 zG~V*TOFT{}U}@QdhmB@G_SdHy7=XKh;pqWEP48UO($W%=nv^=O*8elGsXkWll@%KgW(@Nd(|##vCVhlsXpAJ z^pEwSpH#$+#;4KGY7XCV_|R>%&P40u68`(@z9)^^9-etv&s(_v*n`R=hK>!Xf^90C zmFv0%4|T5Ve!PM)?edFVn{-?RQ$1rZn`~M8>tW@x_gjy&yQy8eZ{J|^AX9|phFPd< z;GWp8ERx4-;`w*ynjcqAo}sp@ukeZ_{|9iyuYowRb-$@*Q1K5KDsnfaO+$8Y~w{AY7cw&OEy-&xjY-}qp2cSY#5ExOn4HScHtdd!65L~$Rp z;47)Mnp}q_=qz4!VU~s6#(-1v-Ly+LJyQBytKMb6mOMl4nWW&f*1{uElUOFUuUA}d zvh;Pvto!d5-52LuUwKDuN73yU)oqQXdw=PJi^Mgvb@^Hxcv|->GKz3=GWqXtG@a)B zV5fSLNQR17;}iGIO`MEvb5ka-&6@T6)|T3{s~2v1rDR*~a_7PIUmeM(gw{&7?H6?y zkVu=cY}rHkD+*l!T{_M!HtDx5;$ELSezErW=e$XI+hffg1zjZEr?u`{mV0qlW>V}+ zt7nq;&8xT9UjKRW(1b+`R(VWXFJb+9+kdO1E3u{($7Z+ftB-5813NLP<~yHZwUa1Y z*oSAl&R$%HkM&xJF4)2OjM2<2Lr`6bNtU@mXVc%0&5q&I9i5sUKKExcG-P9DW@BY- z(~n=URYF}Iw6?dW$7If%)-^7PiHV>UL{Gih($d-{t4sRLO-e{jt@tzJ;W=4QXXyEP z$$x*76Ox^0)~IvLm?5!dt>mj$5^vsYP)|B@i)C}s!Nm-|fBx6Mu&IodJ-jZqqjo1~ zqK@JJ|21>GA?=joUg26I{R>`*+BlwnyHditf*!iNp-j!fi>J>#&jtK{OF zPYMD;|5$_`N&K^T8vSr%mh}18M;{;jd+zNqn|i;#vjqw)>$-!=EGA6SNsbg~GTHh= z{#XosY%P;o47Tvq|DW=;e3qJ@`jkbP1+l;8?Q+vxvLttB%nK`qr!cms8`{<9_7T2TxHe3miL?AFVS>NP>G+mF_(ogGL6AQsJQ3H zdC9-*mv7}*{E|EEl-+XRvp`eju|f$p<@lvXN@uD)l2{6I>n4-)S>?MK{>Zf37az8L z?IeBN@p8cRExJK*+K^;%WzI&f1_K_K`#~>M_#Oqlc>XK9u5od9^zIvF1p!=qXHx=8 zzMPn1di?1FrxqC-zq$Q?9>h5{%$cbcwCMfJ^pjy;B}ux93rvq~SeL}pUB70&^k1gq zjX^ommfWJV*q*!SBp->|v;1{N^u!5LTc6fMXyngY`fYA%qJ)I|&OJLHuz%_!B$ZD8!&+}6q(sKt|W>cIPiRm`uJ@uVF6p)Jnw`>A)^*VmHY-X>Rk z=c_)IkdOd1z>!<9V)+;Qtn=(Q)pv;eIWCbb+oH<2tp4MPB&`L;2SOJbBrM^1qQn?E zjWIf|_V+Z}?)@pqVe5E8)OgqX)MNL$!H$c;fdP#q0);xSyS^(H&|CDS?w3dz6d?rt=kaE^ajn zi?nLdDROf8Y&=hiG5ke>8h1=v!1eO~f!4}AEtgor*qHmKeEh6_`0Q++=@ugAy-Nz+tXIPFj=n8B! zdQkQ8xcR>{mQ5T7LrynL4A>wR_~Wdi(c}mIGCWVz1XI4e5sRAmVd8*FV<($1Qh ziMLGH^z_(*gI}butbEDr%b0dyLI~qSCKh9*e=II3Txv#!$ z!eV}T2{GNI8ygy1rW`zQ0Gc{dQc@l~N!hR=;l$Zg1H(qWlEX5$4$0hgntVvaQP@2# z;i(Ykg;xrHPJMWx8YEulF7$=z+MKpb!L&yS2c~(Q&^FswpFYE&G4Z)y8#}+mzIycs z2`NyMe?oS4X@^UsolQzeNH8#M^?Z8(+PdsOaN&dZ?~<4%y^>1W@<5;G!kiX&<*4i_3Hy0gTka5ImrHe-6cK#v_jjJN zXD{64Pnk0rI(y2=Z)nI?Tidoog-u#I~b*l+oSa^99oAb zGkuuR_RPSfkx?ZucYhh}UX1l6=$knzBAfwDNEqQ%$?ucva)ckHf*c z1cLV``0AbLnQt$7ZeHpPv)+eCIJ?3OK`HA0f2a6_KksA@uMFmi-)B(s=fs1jM-QCm zzx#)eea2NT!(O4JB-U_^WHy-pcb1|`i=$IhV=L=kg;oZo-|LPn=V1%7 zQeauD5aIZsT=ZJX!3HVMBNo!DHy(My=yP;OmY7A=myX24Y@mWD@9v^QU9pg0I26w8 z?QL99ZQRmkv|^=WKSP2`)ThG@+s(VDcI$M!Fv>vmpC^!IbzR@hi4?)%=10uP96Xm3UcSE*WkSl zplK>eNuF1)cor?{XinSkEnm_gh+*9(mYpx#@7AV#t^KgwtRvHS(iAn0vZn<%n?hTD zOnUst6*STI?S1-=I@!NA3adat3NllXCn8c(L_{JcMnXnb(!^BK#YMpT?+2b70Uwzk z!EDTey#8w%-Y_xj{~00MaFfB^to_6>qluYJhkktNOnev9_V~DD-aSw>Tj}yZD;uS? z7g9i*$x|mxOZ)I8Wy7|#3DeUBL{%?vHD(_{06_b{=TR@v8pbm-s!e?dU%GM>JN~&_bZ+|dQ?H<#DWC}9=tlWV9Ak$gaZm{ZJXC}ZSDEO#?B%jEXm_CUE>Gi z#J!?1yoqb{j`*%utV}A#F>k>Oh0z&l!DF~ z1?|%dmLE^ZIFXRTVtbS=%jz8?|C&zb?5AfN1m8utN_x|KMdJl9=u=vAl$9=gZz6FF6OMPAHSzJ&XlZDx&HAnPyRiFn4O0{ zypaS=FyG&I@$l9eP=`PJ!KbawY;4RdF5bMwgjrs`dA4+OC+C+mqZ5(&KW?uo(3`lH z#pYCo`1c*FoKlWI$jn_(`2SC0#b?mg%C`IaLFwSee)%e{dykLv)c<$7J)0RcSt@^vfz8c4z2w9cuJrGZv znQP6};K1V|f2b~irTV=>_5bdOn>V^!?M0Z)8n*@;Jn=-hV9sHQuU$Ic zThl(z6Ih|OsVDWI<)=9&dk;9i`Q#S+U}DUkL&u*kab2_{h3DEa6Txl1XWiyrUj09F z(R)cY7Y(j8OLH@&j!jfaoVPOjuE=Yz>l@!kJ((}R8gvH8|Nm$1T7ynoIrecsf0G3B z!LS^b_w}0Do;CJd+h;g_W8$`E=g@59(R}v(?;gv0r;6uVdpgAii&(f{zVN-`{O1JS zT<`0Fp*@R}*4;~9`^w$0DAsF9y>^^ok<6}{xje25PqGJHKC)>;?i=gpD=%_(+1V z6Fhe?YR>YyxvFa)?3yqsPh7C4@pqWolQ}CymudtZs%}|OX2`bJnPsaJ%UVs&O4oBo zYK5jb73-W)7H(}dS=h(CS9@;JLI#bn(~;4_o+oD6O#kfnGq`4_BdCD;|DTyb`c>Br TwZ<3qpvd%e^>bP0l+XkKzc=NG diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png deleted file mode 100644 index 86d225e5d2158804f88dca881f69ed3ab287d866..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19086 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s33iWhx45^rtlpw*nn1P*v zfq~)we<{|*4C=IH9_imGctC;YNKY@PEbs4r59i6&x8IB0WGs<#(Av&?&M;m2apu~t z4XaiZt$A5rm21E*zH`ZutHptb<$y}g zh1S5pgWdoC&(=`l<7k@v&i0LdOO?}JWB1#?Y?d$kxh$kS*0gVa;)cvk`?gOx5*u>V zC*(heL(4m-PnAtev^*FXnph45DW$)$o~C+D^tRY@$?KxuWBV3A`Ey9$9&Fc@kd0i; z4kFd%&%dq<3K0+w{P6$(iK9o)dOFs2zh8Q|G~>HdX4`}{iYM!*AC~Y^Ubiaxx~OmP zm9LB6zd!uo_O+=?&l)}6+k5eu<#@(Gq!S zZqDp;oBI~~OkVSP>-$YQhyJ|I*w+Y(mH+>*Ots}|P7rV@zrLVGLdBJ->=EOU-|+{e zE0_+({c*f7cO#eKy#sk|;pdun&X-vw7TMG$Z2R1cwvC7EZ3N(N|6Wp0${g)f32Y z>)Np!?H7)|Xg^rRzjkVIZ?VC%N=~8f#{$NW7SDNfc2|;$cj7spPL=DW^2e=wOXuy> zxnahj;8+*)%v%N=-coNmn`h3PICpOA)Txm%bG)6MgWTP(9dQW=ojP;&?1{a-oz2aS zQ$?jJZ*XyL`NW_tzW0oSuXcf6QPIJ3ZR&zMES3cP{Q3F*a{vG5=KU>yaNvLScZqv@ zlPi8tdwAcz&Ay(kT8j~4(Vouc$&+U`H^+udarX7~2@k(_E6T~axwreZy|J;f^zHhz zvt`UzpWL6J=te*gZyU*7)T z-+zDqUta!S-QVcVos%EFXdYf2&Xa%N=+EENA6{Q~XgB49X0x77XNQKlvuAtz_QnSX zA3NuF^U9Rz^Z(l`Kdu)OV{<$9zo%zLU|&b+44on_hxNV>CUQ-0>NGGg_;WOPv%*rI zH!)APw1)gXaC&O}|9^k~f9(c&t^B=&S#FX=)s%-{v)gu;OaA+lDAAG|0kQGSwAF$K z6a-w#jb_Y95D1(p!6TcXIg@WiquwLq6&nOO^yV{ND9^k9ZqLRC25Hx-KGv1y@h5H$ z({ydPtit(2w0mAgv0LNm)lY5rZ!;|7c@lc)zB|zKbO4k$t zO#QDqpndtJrgGi?!5S2e(ffQM~l?? zYVz0y3duNmIif4<S`YU`FttSERdu) z17!BHH9C1oi(Wigl9ZyNs(O@5Q$2x|f!StWL4VTU!xLKF_?J9!_e#|0WZtRcxJubT zdf|yq=4#32sK*XBSNO_2xx}=#Co$oGhgiYH{q>UTVv{Wjn;!lsY~vP}xVKMsh6>ah z4M!BUwV%9Lk(a+dDN9RR|2%7Ek&%Ic!gK)-Q;us^tm4e;p6M4Ya?IVQP@=e8dX2AN zkJVHAv?i?ilfL`$4#7XC0vCi=fvw$!1} z%?X}XD>~Vh`Z>NT|8vyGaE2+j+tyF=_dX>Y2;6x;Ovmo~lq!e6~`)I?Kke=@14I2ZeO@E%8nCRdq<{^0gz=03_jg5?!Nd{@Q8@4M3 zOkJp|_T(J*yp|^0ix0AL6|#>@DE@7C=Bk|`dNPHnlUc9z4%g4m?GGQhw!OVAxh*Gg zM|pqSMNkXl|Nkws_VTtG@U-mjR$^PQAdy*xE8^i*okJEp$E_A_$Wd@w?{LVq4Q;rYs{ckHoOPig#DmDdYf8OE>GsT8>OVb?}Ah4g1L*Ck8bF#GoRuk_#P;1X$3 z=pzrmvQuYD0>Z@7($iBC5*m)k1kL;?ztXJHRa=QgQEOo!hqo}#hk&${BclE%Rx;X# zO%N6fYD#eF(%Q1(Z-16fm*xNe|J{qsX2o{2eEM&1^Kg;mp(6~k9BMK%CZsej3ksXo zIDful%$$W=r#?I*d8lmyD5x3!|6f$J_0W+c2hN|LIQ_Z2WTuMl+4E<5J_zl3(qL{m z@z(7Bk_Bb%hOY&ul(B7N`mcZNj`Th+mSC~1Z#!f4KgmdE$$k;f-6xm1hG%n;;;J;q z)$tRuXU?~me0Mkb#ft=sl8%SJ%eDBoLlbM-qlCo7ppYkL&+~u&d|62BSyEEdQyq=J z^CwPZ43jvJ%(u4H(PuJKw?)(<$J{l4jvBB`?2*{edwlWPqrx|p8ePA|&(P@#{IE^e z?KiVv!wFt)8>JIXANunA`42C2<~ctvu=p6LGW-93QPiV^hKt({yNkHsis-$G#FH;M( z%>PQuwU#uoq(3~^FiXV1Gv&)W*~3px^6V}*wy8Mq;kS9|BWRi8H@msH`RE}hLw)@V z*TUw_o;`Kq#7@SjnZ}A&r+k?5vA?lV(Cptt zyqp>1!Va1&+M4o^VXbO6(}ry_fx1pYsVPTFY*w9?ytwDXfBRX<7jloBa?M_M$R}Aq zC`m&v?ZM;Z8(TUbzKd;}Z*LguX9UToYoZ+a4hRUyEPD8W|E>O=KMVI4d@()$an@VM za82K;0+qumnw(x|(Eo-YdFxXlZX`zBlWf zP}0%h!(SEJrTX<&{=O1#@pcUpL!h{ErRI;XET79;J}an_R2-(qV^83IV0-d z;+?#jx<&^c$*p>7uXI*<4%bK4O|cQ}<^@ZcV?ud^g_)(LovW;vnVAnavGVlG2Vbd& z*sf#`wmtLYl&O{0*4mpl86_{_QFA%u5Temh#-{Qk;iwLCkZo_wJY{t$wrERKuPc9;acXy{_CdFq2!D(&HTy$Ib+9U@(n;8l`5mOJg$Q=J5 z(88dicgM_ai4e!Ep9z(M>lm~ZRxm_P6P&Fer`uy8!6Ol~;&)=Ndy1LWArTj`#DoLu zc&6`YU~qO}Xq5fW9R(&oaW{#*A{MM^=J3cY?hp6 zG($pi=7eAdB?ZAyek1W?&6$m?+SjJBuq2$l$ugNwu0ir+&yLiIk`cLyF5Z#_u_-Ft zJ2+?Tn!vhrspOwO5;?h&EvGP=K+6cY8okVSWyq+y8AWR{J~gm(A~s&f7N)XW%d z)i7VaF?1TEra@_`79UzU!ax@N|F5+6?7@>Kv+~vn2_`OFb*kTVy>h5t z&ju!DrN8{H^8{^oJd8?M?EE2+<;MlqDAm96?9#jfZ`LHa8SlHsuu973Xv9(p!`n(9 z%FMI0rLVqhd6sYJI$^c~&xREnHz)u2G4bKs=(f|-J*90}!BM89Sh!)cv9!dABgbY* zOM5##^EFt_z~<7(*q@+wMbKbNKym|roq>_j0f~sk?8a&n+V}iRK4MiT%(0B&5o1x) zfh`<&_ZzS&zp^-6AJlIC;d}f9Mq}gVZtlaUr}M<`Gq|(+_=o5G8|5HHypm#}hZh$M ztK{ult2S*kJh144{hqn3<%-*4`d+^H?-8tfg6je2Yx%HZg9G(FOB~|X2ruT|%%HHZ zC-K0vvcOA*%EuJg#ln-MxgCT1O;taBFxauf094q1QayaU-{r^D#>U1*Xg=F^_Tcg3 znfdEqzFMWK(&FJiVNaink~PQ1Su$Hy98@%CC`M#S1~H^1HY{g4^dyKoVVj>spx~?| zoqwwP)SEQ>)-*6`Dm`fO^5WUIkLTDio;PofRB=KAOslYy@OGmyz+}YKEO>J9dTqkY6*w zVPmM*TiV9&WOAHD}9EK^(}AyIIu zsKCI&(r}Lb`45j+8~@&f#L1!Q7CUxE>gqg^l@)DmRXn8-$h}cm?)!vS&73PYHLQ8i zbbh1ZfkWJuh7Y?u*erhkuW#F5|KG0WN8QCm#q4}{>c79U78Yh!SMT0qv%j@ z#zu}aHIux!n_NW>^7!TmHM;!YS-k(-yT6r>j;wd@|5pdAUCbGG^Kl@RkR$0lW zrnX4UuESG4L+Jxcsf`Tzf+=@u#~EU__Hu3TkJSgx07 zCcfcG>5@atsi)>OGVXlyvp-kyx%~{z|NsB~{{H9Z@ulA9&(AD=_V#xEeEa`@{z_Z% zR=>S9n_E0i#`f2ZyRnK|UUMf;?(FPjEKgBB#M19yxZ?Qywhc@!Dt~?-|M2$qe^%~& z@9zA3d~2)y`nbP;Ke1{ZJ$0(!>#O~u+7iM-Gm?^LE?KJSB|jsGheyM+MKJR`w@X)N z=fP854{sQ@WncfG3Tlrr{QrMwrbR{NO*XU;6yxl>Y7z&Y-JqFBN*p(m;~JqAK@ zDH*ks>u+%JYyA5D{{G9${pvmzo3gLJ{r-OcfA`*w^YiO#|NVJ=w0pgNTutr&f7L~s z%GP~vvF`nL zfBwm)ulo1p<@KG#>+Amg`TYO?{xv$Xd3j{yil>~Pa^AyPKM372OHKt0rfd0Bn@A@ zFo@aHVGQk=9g3Ut@#Dtm=+iT2coY;Q^aVd$6?4H{(!l6nYiZJdrK?giG?^`H4ffRi zHPz35ywLgk$0a-*KmPqazcu^)ww#^&OJ6tp&)=|ao0`Pmg47)h>r7hQxgYN?|8JJ} zXJ78^@3*)Af8ZtQu`j`*YKo%vYkN<2?F7E!hc6!`@pU*Zsi=^ssg?ZlRdPc6dT@W@ z|NletERG!cv3Bj#)Ktqt6^<`wl|S@p*PP(K*MGEZ%O(4sDIfW_J${^@{CM5tHYos?{9W)E_vB%U+)p| z<;S__GJD2#R~oc^9~iYvo_z4~WXD+9>TYPRRZ`PAlb)%mt$)5Rrtko}`h~K7$6xCN z9JefK$ZeKNi)vif^Z)<<`@6rFU0R|nZ!e_PVR+#4^ZVO!tABiY`h9(Tz2Z{KH4M6u z0+C%0m|kp}Z}+$Q%Zu;V*51FjN3k;SeLFwT<}|~Yyqh#J2)ljKwz4J_J%Df z0U<~1ZbO2=$zNv88lR}BMQ(1BWMp_Gc)6!U3SGJKL0;4M!7AT>hrS6Vd8*d`|7ThG z>EXk}{2p8Z96$d2{C;}+{eJoSkMCrQPB$k6ItD2nP}!JK^YzvHjmh@+_Wk|+R=Uuz zhs$qHf3U$RERfoVnyqVDbg|M|_$_w4-tYX12Y&1EuuVSo7P zX`bVK6PIz<&lXwB*%WFmF5Wy%Y}!f-sO6qAH8nY6Vvnq>YI=J98)%8V$z%!3VQgHHrSTus(;@YHyJ z@9*yF@89q57s++1`P%jHiD%Mm*%J(Fa}KC>pK-k=Bz8Em?T(zwyAT1ctL`J!O{iWY^uL~ytueO<+`T8hqt%qYlq+a|KlUO zdsf1Mi7o}A6L<_vXRz`xPtswk{r&Czr>FPr>w}*0vbDAIOTN3W?ymfAnZ~?hnl5`6 zD>I9WcY#9n|9>YHzPh?HX=zSV)3PTAoZglsh#$N*=|}fOsSiu0*2~-1DKBR4W~=-2 z{!1>;Ggv&QF`)Nw!$R)JIgFu(yJr6X|G)n4uk-KkPf_dhpD%H5pK7Fd)NJO+ zIS(2nSSqR)i_1Y{PlYcpk1sVlM3YVzjhE`Ifb!_ZK1yKzCN_W!`kEG9b-_?an1^e^bH z`Ty^?{89J0e}8>_e|h=+`u|B*65((D9iGj%;gu`fi%p*S+woCQ>pD=sQjI-P_%}+C~@=jaT-rL(b zbLN`0`y?DcyqBN%rf)0T&vtX(g)5KiOp}-})zH}ZQ779Y#&sw8CNUiR7<`7$ASGNE zG(*Dh|9_Jkr?vI$HER|uP>5RjVLs#YGm>7ROxA`HYdsE}`q!Ox3zW~9nAX_XOlf16 zmKHWQ=f0#Mafgd7CzUPj=_h_=--IwFo{A77qkqpIC#R$?b$aE{rx?U&nEYx-NoS`a z2d5$nq@CNO#wRBi#>&blv%KxUyEm6X;g?fPrCIZ4oMO$IvOrsiAQnOX5G`X0ROcl7Asy#B@fV?%$xqf3j=UKV$Y53iFF6E9qIJM#e2he=9G zNJvOc4GMa6@#@n<9F97IoQj>&YR64EzKQpW2tN)8d!}w|lhDxH>lrz-y|-5|+OmDW zs-szwgW0sXa~)kJ2=+lO>B3I3Hw9ND6JR;W~8kWI`HS?Efw|0~+vH!kCU)5@-{zIo|V6?S%4_9=#jhFRI4KYrRIFr7of zAWJ@GC40Zf9St_Nw*PP=FAz9Q0+5kc&1I``6Kc~#OAMH)8@iAZ+Jd^;<2#_ z&;u91VEYmi#e^R}eei>Q_a$*%*#%1_*7SDn-w`dH@wR8b#Ib*=DSy)Q^cF5T(ti1> zOT&Eo|EpFMbarkO3iNoeGF75uh06x+t`37$D-DA!&M9i0dho>MnWN1V29K`9q@)KA z97>=QJ55t$d^T;`^TADZTWIb7gz_1Gox3jXUm?m8mHCl{dwzQ z!wJo56I~u2o;!DVii!dRVWF9lA|R4-`N9K-b&emV$Y+>eQ_%f(FlzY&FZmbBFKlei zR905S#H3u)iHSM$@S)M7ML8?l7YMKMRZ#kIS|TRqOUjWGKjJ+)47IwN`}@z#YMnb* zQM1!AY?{qo4hDbWh;)^L-U&_&_V&%Qq_%v3*_oIa6#8`Xv^1A*T6f|`DlCg&>G zs96^;PW1KY=-iW*86fcWwa6dO#zw|kZ*F^g&}hh!zcBlfl0?LwiHW#O5&ocjXJRm? z%t5s@#l?vgTHGaZc|pib0{o01k53-(4L*(vfQDe+*a z{QtuboP-3FHf_AB`9t_@e9qDH@oa7NO-(v`_oi*zCiLO81qV}HoLF^rpt-rBr6nH^ zd$O+U2NjMOPex0Y4cAOLl2uhCY;7ZRb0Sz+Jw4=$ig-RhGwkRVUf;~cQ2AJRPe7xL zE4$<(NZUIpDJe-r_^}J0mz&!bGZwF45j&o)Id%!U0YTn^I-7p|ViH!L_4~&Ofy*-< zJxK`*ew>+i?ZV~9iD}1tuFc4lHNN}Dwd?)<9l`?4>sGONzLIA?T%Y!=e$}d;yLacb zwfVS+F;`Xb1TQz*v1^jl&M8bWqP*tjTh2m?n53k%v?)_l4|6!WI5lqA_}2Mnd*sW6 zCCn);p<=qcf76qGtzUoMzIo) zk_IfC%Qzp2E|_uV*thtwZ~rH$u9>WEoOkEP_h)DS$M0L9BcGCzA|RGF!>IM(`ToYq zmu51*Ver2#{C}5LVj`%^8WzR|b?L-Oj|4MSa=2&Td-SY8@I()`D~qJh{moBQ+>t;3a4YxwKDpZB@9(DT$JcG%Xz1y&Av@!P zg!E@^&BBcv4Ijk4?ot=qc+5nu#XdDbH{_q8;os*E6IZNA;NtjE|L4bkZt;Kib!R^O zwPv=qZeFC++|1m(T6^m3`5Ft)bl9@~`QNXf>QKVTZOp-8U|?(vb?n2($>KtXd?ZhN z?BzC0Ft`xJ92Ir&QAmy}2Ul}*^T9(-7Y;c+xDW0_{{8-5URqjNSeW_P*Q-a4*dDyp z`a`6_T`bVWF)BYQiYF?nr~d!H_B->A9CPcJV$oJ8Qd~Ac094Qa|7QrbE-^j*>9c?Q z-U2;5JSWNp3>9M?T4t(UaFH;$#GV#)utfISA?8C4>o_E4%=ou?v+*IGgQDwMxY?el zsTgKHm0wl;LF0fjcjk&k>n|)ZR;W{!)z;p^2AQ2rN=#2rOlg@enK6}vxk$;uOM{1p z=YQc8c^(g*HL1tyb9(vT9A)ckV$|i6@X%M`n8xBF<*}o%Yl;jz&x7?|8=CglOaA+x zj&P@2ZovV;WgP3QSSO!tFt7dNTx@giQ}c`?jvML@PFCw!6Yt?=m7&HXp`aY;aNz!a z(9keUbF#BtAA8XpgFj-0Z%#68U6v8 z{r~zCKkIo`aI#igixjRGVrCA~X4!nR(Y`PtRpGd;(PMd`X2x=nBrYQXFV!O=LPwVH zpZRwD0CNm0vt#Up<^?dr5*o~%Bn_nIu~=*JNaV2X$!uZ_Wh`n+I1pB6kl-T0oRwnm zpu;9+<7o%S#zt+`#AW@Cj0L=<0xT1fnqlLWNr|bcEmxKQPvomo-Zc4;L)aq+wiz#( zXC9el*Wq~NOHzksg@+WMEF5;+V2ue6?GOG<)^u$sP?i z*Z=%Cm(gejq)U<1cy>*azrc;EwE_(L=kzeDo$HWtP*LSuqsl&~TSk?qV)9XYm9Iy> z2S{u<`AC2A1Fi?R*u@)MER7B=Wf1=Jhf`J&TCO*){e5V@k;H{l{5i%Oc~`%wV_e+^E>}09p+o^ zsv!#v4xRgz-f}mcVSk<^Uq}9gMN@eT1lPE7{JV0%!R2>za7VjB1wl`V+8BGmS6(WRLKALi0GihvhG?;22 zaJT>HERF(I*)0#JE#xg&I`#eig`Lp0;?M8z-~azV!P@ZO-~asYuQ@$C^3l|_bG282 z6_2!dcGN*1F&>LW0`(gm1G*Yt#GMJyGU#Cc@a5xQ>n+3-*da%>|9bE%*PQF!x72Pe-d2W{S!al-&{A+>4nsrAHA6+GvC$;zB-`_m8 zwmi%I3Iu6VnO1w|YEfXZGPWdThT+ zfq7NZ4(osUcYf6U`%^9>!^6gwGiQ#E!PFnx;x?&l%$>R1O|LfYo@1Br*sbl9SKAB; zHgE4;w|>8qJ$$&G=l(uJ0|Nu7Un)O8d;k0Ue-Y7#-6uGCI+Y9s4lmxo##MABX>E+a zo$dMm|NZ{{|N8p>lO~<{|Np=9BqwiSo)sTkv}+T#DRyK!Jn#}=zQ5udjx2Wl=7HPGOz`Ty_x&(HUd9-Vpg$P5ch!^h0o&CD~qZ~7g{nJIMnpF?&iPtB%; zgoGde=Q}zyIEGGrcviYi+T6oKrb3AEp9pjM{+dpU7ebIc{p;J?|I7VPSl@Wy_^rS3 z;s5{t1wMkjaMM7rNz`nX6wB}b|M&m@_jhl}%ijl^?=M*zIcKh=;kP@B-j@IBS8P39 zW^;SRhF!cr3k07$N^f9PR&HLR(!5gha4R>@^>v0df4vTgXnRiHn6dK7|N6Fm`Guui z%*@cF|Lf=H`}6JVANmGLG%aKEn2_kp>k_lP$#`nazCVlCzn8VPPWIvHZ($K$^+vvj zrE#X(2D5}tPxcH={>_#~5e+*mn)li^uh4Lgmp{BU+a*)6=)92Jhv)o*5!3RbASE2wKZN(PiEo5 zn3iAGJ1()z>z$S%_3&c9_s0a5o*o`YN1nDe9{qTuFJA)pQ3p3^Yi-3%8l2)MC{i6Z+PQ>fa`}6w+UDI z4G)|?ec<%z58reTPuJ&}ZEn1$>ga?C2S8(fYHf840yjfBi?k~L&3SlPy+sEc##iR< z=4~+GagpCIxQY4jMuFWI3>cIzED+hE$zgIs#DhhME9y>U*~@2h{dWn?c)nMICBWAH z)1=htua0uAHvg$DWZ<;AaLwATgtzxfqi_F8Qh;W=D}qevz>BGrlvff zuG&xU(Y5c_M(T6o&E9V z^!mi`$Yfc*gZ6t4Ht0@GaCMgc^SgaVUYu*IqgU6%m%(l8^x7UloHJK)%NB{2 zmV$dyJ}I-AmA*dV1eH%GwyQw%QDQ(|HsPrkozzKl%Iu3b9Ywyk-8@9*txyth1)l9DD&PyhA#xjZ+w zvYS;x!^T2U2_9v|NQVRS?Rid5bG*4`4UeiSPyIgwpLu6MOa1Qb>Nsc?SZA+{3> zIa!VRxmQ>HtN-!g|JT>`BQcyjrpmjdTAwo74Z#v-|t|$4AiYVqu}0 zt?jfXCO&ufFjdtwKEAe{#q|oefBpJuucyb>^TH)LA-CN~VXL}?LDtMf)_;GKK?_=s za%-5|%z60Ko2|5zEi{x(Qx9#t5Nj~|s9Yw7VZyLAV-YyvtDaj|7^5A*;iPOms z9xASt|NW5p@NDxJ<)@)#Kw?5dLQ2YypP%0!@AqM1>gmxi*7<)?Y=Tp7(>5~+y_g@y zrLX#%+3V|nAN%n6`TzU-B`Yc=b#)?QVk2#BBBoBAX=y2`kl8VL@;E>% zTO4E9Jdsg(?E^2>!`oIku+{%Js`+!`!#mk2r;oK+Y??QF_Q5Mt4qlnEa7D+%qug!E z?hHqxl|AYk`^2!Vr zE$jckLiB9Cn;U4_jg!N`!fMl}&&D1e1}|T1kdQRg)-pOM!`^#Q-u|C`^|$Z8N?pv7bk@&t^xb@oRgA7YzKYy;!RPEP_ zRge6p#kFzTs!7WXQ#Fba(z7=5=InNsFU{Fb*)L&T;Kgyh{8eeH zM3Bz=R!5fP0>0;U6JIJ%V|_0#+}B|aF6o-)`F(wP`G5ZXe}8`-K5(jQ&#QLEpbCYx zQh)eoA52e3Nf8iA+OjdRX{(Qf%cI@LnD?Ir;sa1 ze});^#`ku&+h}$zE6?b!3}!VjG`zF(@P|*TQ?xZt9yr>nnwr4Z_*m1pKvZ{ozH!Wc zzPn7FJW#vT`t;)^&dvLKzM&*qy5<{8=`UVYvB2;O1$99e8Dpb=yK8^1U!kFFllbYA z(Vw4(51jiaUq0(c{}G8uRyG;X!q#6VB^Q{2$}=((!#xY6Soweapa1Z8d0YBbNe@(_;4O{46i0vYRYhM1z?&=j1N55;<`C zvfdoqGZgApRUi;@n!84vCI$JLNvv1=lD*Y_J@`JNKaQXI^Zw=K`ZH&EK18dr zuu9t1{`&v?e0|up`W31yM%6COprwy@5_eRKpL+T@dBW7AGmRuBJk#sQY+1B)Dm%Z# zxp_zDfo3om{{Me7SC{XA0#ECHmfnZ|o;Nx_;N+Xou}nBQ=jMq?KW)PUzhAok<-3Zz zW{ZIu8{hfH=-|3Ex$vElwGyELf~9>DrD73TOUru#1#2%nVt2@{@b@~rMeO;#bF$|r ze8@bUdVR{A>$8}0z^O6GDU54ALkqK;+l!wUJx;R=p5?xwajkyIC05;i{|${m6RY~4 zY>)Hs@Vt5R|Nh?J|9^ka*mQuY?r+kL>gf*;v9_G}{+=f+EJDrqL%IKkRW88{k`9O8 z9C3ogj8nvq@A`-T{^t4qZlOePqOcd2L#!i*ZpejOllIsB{r&&H{im59=FH*gmpA6% z{-2&E=PMj>;Qf6b^L!(lx*mZ>pOp{tD;`a3y6!$h^V!$zw&Ld(+|NFQnyu1jUoY|Q zPGZIXc{8fGs}&D6yxkYc*Y|(E-QWNJWG8<3|0nUmv$TpY9S^VT3$bOn@m2hv_wYmF zk$((!c5JMyY<_-Bp=^e^K_(NyNi_AxAIaTfY;2(JAH)CuPMWLs)h6$#lfCGa=AhfR zA*xfYp|kGKkMoBP9XQdk>EFla{%z&&CF=e!d#!gtk#S#LDrhbFpUNX1X2!T^&W(o62M@vq_nkatKr@p!8229PWYny6IHkH}S6kzbXZ%Yhe|pI5 z^yGU!XvuR$g+$#S2l+Gd4DhG|M2#Fo~9;__Zv4Dcz8Si&TL*4 z1*#oynI?sQxWwepW`1stH&vZS*n%~Dh{5EH{wq`arcMcMAE;D0(BBZw~wx3B-bZ>>A z;iR)CPbw%kr&=1SIe0rnukFK252Ppf?ilOyZK0%`RE zmhg=Qk_Q=|oJ-U;J0!~D`myQ9toHwV`F{*1r3<@IGJ#wIQASW$OPKfL8y%muv3e zxT5fTp8kEx8z1?zRxAJipZ??5)Q3knzl6@Yc8%xJ zBcA&Ii}V%UvK|_A((K81$U@A6yaUxVOXn z!c9jJ**SMuN(;m@Wku%ys<3ZXt@ir0)bm!`y@v*Ei3>6(6s=#|f1@z0*x^I}@#t$$ zb9}RpO{`qDj+4oTKlxjHkK7&mWt_K<6?zDLnzL{DYmF}N`!Cgv@|-oNDD_PgQtVJV zRIuB+CSl&}(CMu@i*;763fFqIi=**3+M&Dsk3r;rh!K=e943{I>I)EGXXR%-qj)$bsir{jEiJ8e4Z+ z+}zhRCCU2NqJ7;89_>xt5nju^mM_-Y)~(QSnDd{tBTwX})|gLEPnWkn+Y$3_`L<^# z=eM7{ef#qf*B+K{Kd<}Fo+8t)BUT}ITJ^D^qeJ8)3!ZivK8{9#ExJw#x99kAIIeFG z`tqBH?TO{IqZuxf7Kj9Gy1g+j_3;pTyO&;0NImoXOq!3{4!uMV zna%ef|7N)U+L`ZwiLTX^r>nGPwV9-DeNyDSp%=6i-z-{CUv?cJ#xZwH31&?@wql{8C+Z_RZHw(Zihd0iV53+&9l>B;Jk#q3}|&h_VN3J^F}KSj4l@spB);6>NB zR*m0ZyCrH{H{3gLqE)X+?0nP?gQ~4B93{jq<+LZ79X|8@to$;2i(Au7mTE+-y?3r^ zB^Ps4;%3Qt*AsmgPdwK4-MX;nal-AK?Hr9k9-WT^u1`_zW(^7wsgMgQvM`umyt~jU zQKIMgT<*31C$*$L{LOK&q$_BaX~@-p&(~+A&)a-oxNU~oY~H{lj~icaiYwkL#PTcq zR?*3i9RgESeHZuhpG%+Aa(ZcSu$u6q7uf}W+UDDDi9D9~-7@a4nm@RDpEGMecT<4C zar>iz<9=1$=6FIj7l6?RvRG`dyoT`Th5Wdxh+L zZ{|g=7ktDm`F7(+$(J&6QJW(k?9vEzJtcJcNtNpPLYYY|FGY?&EK0OVGjZ~2Ik4%* zTM3>EyZ$Y+nXC2u=bv^r4#&U+rk^bx52PI~I+=4UCFt!w9cQmgQ~vH-q_gerN2lu> z%)i|CjS_2Tp4XncSS0bp+qU<{GLur*gv~i$As6(iVu8qU8$U{AN_b8_4hVpE5VvshFmQM0(S1pZ_CreA~tES4<<3) zdE#|}414W)oB31EADvb=op+&*=cMf8%JUrF@%kBmSD$oOvN`C9rI#Q}ilnP!=t9%m zZH6t1flp?*cmMf6cbN(AWtZ!QJUZO3Pgzto9kmX7=yLvO^o{fa{x@5ft?PW;zWCm; zL)vn`{cc-XFP<(BPC19njsASoJ$z*)&$n+J*&F&Q4GbQvE>UoBX;D;Ycz8}$>$~iP z+;s3_hzXNGiz0sfnR)Qiq=P3q9R2zldwU&2rZ_q?-%@X}kh_$7j9KBXYDZOUESs%u z+jMn78c$j?i+jH)TxBLa|>2?`|}q4c)Tj(!lIWi0^2sHd!BBD zRQGe{_;NV}h_vpXGCzndi0xR3=hL&f!r+bsGX_00v3zV=&9t#st4z-b%{&Hcm6 zz~QuJrXp8sfPgE%8mn*Ayn|{pSZ-@Ko!)pmw$IyuJFHMtX-A``ln#I3p9kMRr!zAj zo_lz%HNXAR)Pe(EP&z}c zwD+|V%jK5b;@x{!`nElq6zr!i$KTXoqkh`X=2Pb3d)iUEuRmS*erL>zRrB9}WdgbW z%*>5^&H*BA_RZEEYDzqMYWfLARSuKOKUk@`O%7FA*UFg5e#xwNS?2P8>kIbHvA=IQ z|KSCbNi8<&U2?(o;o8d!_9!e833a`HdD|N8>D;mHuZ7ya*akha=uzI+pJ{adhFROe zO$tXC59S^EKkfD9$^IP6I+rPOXn0Ray1gP>r+Zzp1YiF5J&)I|dVRz&vuld_g8z5a zez%=A>UzZOx$`-=VpPhqYnVP=QM-HLf`-Oc*6K|Mrg{}jmH$xC(j#hQ>|9+kA;)F= zSK$(d3*HYSCG6@zgFTRq-E47jZRYtBEG!cD_NNO7H1xf25DQLXvbo4Vv30t@iI|S7 z|Nk4mxp(%$%}%5F?lzE2prj>dSD$ucL*v1lGY=kM*t?aHpXpDeWbq+ZrUyG&jI>WX z9G`0Xg>ix$FEjI&WddStU8heN7#KXb-NnQh5zHLL|IPm0xwqZopdr~idpw^RK*pJr zww3Iwot=1yrS1IujbD;CRxxj8G_7D*rd)WEPv<}hXWFI%m%aY}o16I9txeY2Bd`4s zRG(SlpWnwf-YEbsQVk8 znc;XS!4ldeRazMP{W1IDGc$SE`2?N@__VC)-o7gOtpVX6WmU+%7vXc z*t*)j$u!d8!eV!xzkhlB=LbEJfCk6fw*Ma*7jB>K7$;|VuYpbBbliml|NmbQ)81r0 zVd_Kqs^ccS#hsi{a}K_BDprva-~6#Z_uSqN;~sGa~A7jd5nyBSXp^wWCFq$rRs0_^w|Dh z&_nxed%RT+EESKG>Ho>H_FsQgOG0S>)+tFYvFrP#%D=Rx?MlLP}OO_a%IQ3{vyu=1m(+d(D(=S@oPMBiBVbwNc60>ok z!2>@ro*R=mqE6k|)|>b$q^+LDb$mh|U^T#)Q%PA)Q=`)tGG547?dLFts-=634a^p2oM+<(Ntqo#>P2?>~ zdz6rnkdT}_VM2n2ZfZecqJWs%oi_(gIau^HePdTs*teP~yx&{$+LZ$iE4CbXC3l8R zlX)9gD`QxKfq}uEy0aJVo!d}r-B?_y0b0fi@?z1W1_nk(w)ptA6CP~L%o&1C8=fBQ zFtl|{IdwqzR|MlM-4up1*Yy4!IoPoMi9$rng0s4BZXPzMwK{xq@)rw3$by4Ko*;KU zc#t3W_Zz*rcXmSpW`@b>`De=t* z8s@CuQl@+O;^L0Ik4xB~Cd5B#U{F+KmC6)q4}|1jm_1IEKIFN4bJH}}qdc*tB@c5xys-pU^}F!=eg2@0|? zGqZ7Xvx$nbm6oz`a=I)QeQ2@gDCZ`Nd3jm~Z!z*KEY5D8Q^UE&M27$WrDhyqWIS_*XU`sgYM%{RXW6|0Y-boAdCVZQJ|%lI!A9E$YA=^L(~XPf}t)uKxdDN~DvS*P&1E=@Zbj zLqIra-d{i{>B6O?3zwSaO+9?<7-;hI`0)eB+B$ykuNSoL*uXT&&4uHd?+zXpZA-hl zlpmj1Uu~FeF6lQv4K%Ov*c~+7XIGm98qs+jF4b2b%mK0L%pF6nRs#Xoi>p_-Fm+Bi zr1jumdCJn>*47&=E+z?v%>Tvp4s35VKNC{C&qC(Eq1*3uT22ereVixpvij+bA~m)B^ec|IA%Ot_B01)`#^Ty=(9NpTH*@ z&LkXs*N@8-yi-cv48HIzb1Q?581HaeG=z?Zbh2`+w{b{w{qD(`q!67 zZPTsW>gv%sB|tUZYvGgt?O2PoQ%h4NW~dz#G|ozmyzza4_?bn%$5?muzZcGb3eH$k zlRBF#4jf~9tF<_HrBj-*0`C@wE&+}tmszGQQ?%{t3~K)K$u4dYF`kgD`R||Xk9n-R fkPyUvQWP^oPUPjdAN#Ie1F7bP0l+XkKdlJbi diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png deleted file mode 100644 index 37e8ffae114625d0cc6a07ab2b8dbbb7413a3829..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18884 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3?DlkV45^rtlpw*nn1P*v zfq~)we<{|*4C?rpNBR#6GAQ!2#4P?6zh3S1-gT_08Vp~o9>@jF-t4Zi$UURypSEXE zwD*B~HyX|>UY@yb(J8Y~UEzw_qHcbW1xL;s3NZ-sI9!xSlk{9({(o|&XbaQHdrjp{ z|L?wTsP2e4HzTOyqjO0|--)VxX9fmSwhjlEXltcaRWr`dsqVS^JMmueyQpx@?#Q_H zF`t-0M&7w%&DCHaz zCVu$*#<@W8M(NsRC%@RvNshf7#gxV4$tt4Yuz-Q(i?iU_$ocN)H^13^OyhP~(wXCm znkQ?Yzn(PZ-^`QTV9PFTcT#_;FDB z&0l`y3YN7Z(yL>GSyh*K3mSd(G2V0kaY0{pt;A+#4u-t9YO2OT{AC$PEsVb?{Ib@2rM~A;p&*?)>E`ELiv9TVWouLsw z?1hC*_DM=kTr6Y!>*Es9Gg@qW|2U-nZ+-puzx=$61xoe*{#5_@ale(jKL75o-=CiT zH%|Yzzv}3R&+3P#>odzXEdkj8O2bl}&66ibM$8EJ=(v5z$tN__$=^T3ySsP(fBP&Y zhrHNFD}4nn_RN~}Z=tAAh469vC1yG*g$qoQA9nr!`}_N+r~7Mv|BK!G>;KQIMO&v=K`0|8&7uKg$uNtRrsDbJ^hy3wl-Qa?{3nL z>gf+3I=8*QFIo35MdHk2HK;vHI-6(CoY>wT>(LP%AAjab$jy_Ue!jgkdwJ8d9K;;8 zjQ{=r|DS&=hogydg61>(ET8}B2LdY;#6@phI$mG#uyNLse@>U!l^@ri_;B<4dmeT^ zgEu!07JSz`ytkTXzP<6EU#1?DIUrG90=BBTIW%U@@jEg1j=9~v8R-->Nl<8dPY=(D ztL%MWK8h=`_Ip=6ayb3#Llb-UdX}~3-Alxc1ez8Jntom2t++k$|NrzK|K~rvZ{PO+ zzocGlQpDD-hmTy_+W95z>Qa7uPQRck2=$mtXLD!g#MV|%pPt*7f^MGnK7P#2#i60I zw^wlS#!d};11Yf&iaKYesZCR2+$`j{I>sPbHoCLv+N~e$cVb*~SN}^+XjsXm-OF=F!EmJ2??(BSN zzoz9tQ0juO-XFI)eC$X_UtAh#D3G?XaqF%|^}hWEg)Wa8ChV^_uK9K3!w1E~*Vgh> ze>bwJK4wti<_f=BIDEhv!=J+}>{d=HA&4pHvU` z%QH(V3qbAoaU?N8;ql|-7cVrHuQ{WyaNy9PLk&^Q3gPk*ml?$z&#Yp}WMW;)cj&6p z!4;YTDTcq)Ui1cEDQQ+aeV}2Yg~F5b^LftCGmP1HW<#Cr;hn`i=jZcBEOCGuej+kb zRh7}(JGZ``H#XLglantjj4eWukRP zSDbqA!v2hj8e3bvM^gruf?@j_9?1iu&LKud1_nwy1%f3X|Nr09(_?X4_rY|{xtMh+^ z^g@ZB5`kt9^5Pb}JgSpiFx57~?bro3qYbN$ygA`ub=AQ1hBqGvr=z^R(HxtT1wRZ9 ze|yW*FVF8I!w5;y4M!HP(n`o%CoZ!lHAO{R>Clk_2O6%ta>;8<>dmRx%v_z#yj8W) zQmAX`y94vXF1R@vzUDjQ(yUzg$Mq5`>xq+1jGCzjZf?&roMU--Lx$kt$H$oiS|HBe zGj$_xg8>iA{SHO}W)~5U4USUJPHYH?RLlM#a!hT>B8MGCap$aqmh9iZQRwlMpgY?t zW83#FPtBPuY;e(ew=zHT*|p5`E}4qFb6YY~uwmB!s}1qrcZCF9R94}Yy3S^bjp zd>u$IEOLL8kdkuYib!#>@u#m>jg2NHrKdMc74F&a?$K-D#e1s3Xec>xq@G>{ofEL`x*R4LGxi)z;L3 z!9`%_pM->j3l|e7OidLKOS`e9^Wi1cHgWwIhGA^rNM2MqyScUXYqxO48u`HaUGh~4U7jp}Au_Eg`j@XIvKoE)ypN>45prgTp@9(Pgw z-UL}@dD(*!PaK{tGBNE?1QlYxr$4-)*miffVOWj#KU#2LhWkYA%!WCPoABqzhcIs9K)6Az%%dM zh8Cf)22;lcjE0773l!Mo@KN~{ ztpd!-;^S`vjsUp8CyB`96og5zySt-ffY+x!;UAMe8UpVX_SzhTu_$w;6cKJ zXK5B?JrA#HYncZ^im63uAX_h8n#9AsnUBLDEJ(;@-Ia&?oV4U76x(}!Is944&9+Hs zM~{Z%S9wnsUDgAqMW6iYV6@EIIF*;j!&Q)Zl@}Y^lohUWPYxt2d^2ejdwL=!MwM%m zg|TsSFE6vUHnX(!;f2mT`f&lqwa}Cl1q$PkU}0|l?RZNJD5t*u@ik2YgsEv;?^(RBPY3WgehubCo?! zC^6x{bgv1nGiFHW=tx9FNJvP6tKy%NvR*Pna*ZD-Elrpz%E!H#i^pW*6 zHeAf!aee~(zJJC>|9=0U&uMn4A*gvlU%OIzA={dnLYqqz*KJB@xtbAib-{9{C5L#z z-=(*d2PNnxu~mF(HBarx?|P&eexYHbVaUOEcX{&f2Q9A!b$R~(U*z{lK!D6c=V3?`0yM%#`FK5Va@+@8`Akp%}zi= z)#=fL=g+w~4g0$`1%!w+sj^m_N;srp`R}lr@4v&RIN5r7_)gB{@!(|5?m9TD!65ip zJ)4du^K`v4M$FSi$|7YrMY`iZta+=j?7#t$l%ymB)6^fI+aKOGZ*y*6v0)kyBp^)+ z3yL@Y{3WHL60v#(hp{23R8?HcllEjvW6Z(>&wT^98Qf;L{Wzk@?4v)$Ex7H8fLqSQ zcD3&1@m-Nx1_>FCuZ0tJTLN}?eC1GHXU4uKAtB+wrPQNQM;~Z{;_u8EiGB6yKfZQ9 z{8898-+m)2+eN7FGzvFt-n?UXwz}%g%*;+>W8*_ok~}L?*^0I@v1d%>`1FIFxu-j9 zt8&}sk0ItWwh06^toA!N`J4pDQl2MMy6#b0gFuPBr{sh|4ihI=jhe5#JnTABWUr z*{vb%_mPX2&XtC zD>F_>VPXDXc-;1y#Gz9MSPfD)u1Y_$mZ4j;hoj+x<{TF>#m>evQzs^Ja3vX-rza#I zerJ#Ha78Z$?7KyIz;sbYCm^%*oKACdx1V{dgB(V(w4G~Wobt4mc>tZsS5}rI{ zbkTAS?y*l@urV;p$1P&A>^!xCrJ`%N46ZulCZ&Mt@e2tZy{QJKsR>Hj^B}%Za-TeP zs*h9C-bG6GZRQ56*($UWorKz#b5^|h=l)HU?LfR*(llO`Om#QLy3Z4rN)#4PWWD%F z_*=yBP?ZE<`5Dg)jEoEn4J9NESy&BMteW)5RRZdA^+FdnD{0Bj%^Uu-n+h2g8z^0y zxL98E(Zi~Xd)igsbTD@ab(HLV^72qrPQp}I=ZKI;o?)^1*^%4}?W z(vAg^dUOj73=NMRjr{#%MNzTw!C0QJO3KW8vKl%M-~7N@!%^t-RzTQ5NNirl;Wy0- zHhwhdU`bI4-l1o+lCe5UF{0c+V4|JDSwZ1s53eKxbM>3jP)F((&Y0n`Z0XJJjtvLB zxt}cLxDskpF!BG2Zu5m4^BN>1FMeC~fbqZKBFXSy3LGc26CN;ZzTuGfFiBOiV2P-V zn85Wb4U>5!JXvmPh>Eto$Y67IRnlz-)vgTx|0^jL78Gn?VYkl8{%l~lkk?YdOh93) z_C#}qgO}M;x&;?aIB@ZhgEz~LS2`BrlmE&qe+`%+rN9t=MJVUR15dGtQ~Vap_1se$ z8RvGdv$ohb4{d#q~wpEl3H34 zTef(-*a#LnbDkU%!0%`h5KSdVc-lsj0De<8I6s{VT!2 zi*bh1j&JvW{W4k;bMnI@)`y(X_Jq=%%|>R^-oE)_WGtMTl$4f~l#-B;mXeZ`kkF)i zy}@9{18qx%O*}jr#+nAx6`$vPIop%*;^yZ5-R1YAHavK9aWQ}W|6`k8-8IgD;?ZeO^l>ET6Xrfbkd5K#2tt19ujg1J25BcLT~G)O z68iXM%Jav`t^JXP3${sbkX8P}|FUG>{t3EAC8B2Z)n$3-k z55K>!H_zKM+xq*dPT}+aoUZF6EaPYEi8wwr#vn9F^dL)yax72%f1{k+-)e7eTF)={ z=ivTnj~+U=ZQ4{Hz3q>B=1H-6a$MJrX)Y_%(`#Fy!6qokBnAnwL(?sG?1{Ya-n{wo22^7H|9@z@#gZi~GBQhATW=;Dj?@V3Hfw9_ z+%R`x!)meg27`Zpe^);~cKX}5zwhr0Uw!`jd;i1R+waddzrVlq_2J*&?F(J(|9Q3a zd|_nNn5s6X=HsLHKR@sP_Wu9Bui6{GUitl<=k8se>H5OE=L^FtvfT!l6lSb=>-{0?FTer97$A{WS z;d+06e*XXO@A>2X`zt;?5Z?BQhi}6)Rh}Cnpqi#&n!LE)ADiN5&mSLG(K-A3|NMt9 z0^6o)OWxbB9>)F8h&fnh^M{1wWCPRGfS{y=gLmIRYPmx*Em~T3=<2Gfs9G92%d9=f znD1|3WOQIve|^O9bB2>xy*fYr|G)om`}?(##Xq0>UlOx5GBkX5>24*Do#=6%q+g8PmiC*H(U%FQ0Wl$L2wmVD7~D{dM_w zD&Ahxoy33k_s`GoH>cmvyZ`UYtE=X0Y-~YNG8HM3212~c+)7sNF0a48_xJ22Nz8oj z?j}dy#0PcOKU4Yi^_x36n>QHv9fl^sc@{f%Mt=S3`t+%yZN!s_+GjMwToyU* z-ouo?v2khC&Vq+G8=1e?{Zo0qMoLWVn6mr4`X3*J%M!S*O=+|*Jisa$@K|oOVvmHI z;h+EKKm7Q3pIf|c;&!$7-}BpSt0m6OQFP=!Ske%x8q?C*=@>QZ;DLt5X6D^Lp>h6> zpOG;yIXSSfP;7}nqnpZ`D(NS?*i>Z`=BV+#U!|~kzl7(L`{(AG%iAdIy5`Dyf8Srr z@^@#iuiwAF_IK}<6RvDWrU?I-t-OQ5M6&kJkLPc1@1L$Waev^G-{oyzUwdA5+VHb; zKew{t!Gy%d&1rBCySVNQ4Q;(~GjI}X7I(8lKC5QupQ9eyCs^5>Qm(JNy@ZuV=*Q2` z_ut?DKhOUEw};G2+&i>2A82tFnEDzlHQ;URvakR5{p)LcdAmQ0p9oBKZkO0stL)3Y zPEFmgH%w2DO;wf6(D02Kq|iAu$zsQjh_7E;MMZaBh!S0KHfT-nsd|sOaWK`6n z>|$d5Z?4_nCy(8`%KrWP`}_U3xBdI;{~w*m^zUI`_#aU&y~r&gM?bv0yq}%FZu5o- za)BNX4zsrfFPAv6dz*y+f7bqvB`VErY~L;#K&_c%QBisG>Xnv=$cwA^JiB7c8<{eN=u zevw~~{@1rjo1eI;(0_A-!{np4)KU`?64KKrOg(D16B2|@{xWOU`0U!%q@^`SLXvND zl;g_02boNDhJqhoyuWXMeqODLrA@%W_3{7r)&D5ef_^bmA|JQ`EYU4t=XveKM}jqb-;jQPeQwZmZI~WPyfT+j6_tHIC{ORDC(AZ^zwlg-^4kaXLa7a zDRB7hv1{}Fs}yt07v?+PUOt7%tL56e$yGjr%7*c);;WjQ4~O<&&+G{B`jMH+c%^H~ zxtIIYbf!hRu$CjFA!3nKK{!*rV1mo{ihl#eSoA_Kry$`{H_*E>&q|H8MBv zo+BfEYlq#c$AMgyIV)N8jy4GXopG|2(etJ~TigGb7@k$DbVT@?3=Ir2G7~%1l^C!6 z)B1O?lp`TAQ9!^UoD*V^r;LvekDJ>CHufAr@u(Sx^v}1N+ieIFYYAqKda`o$=hLS@ zPn1;Q0f9Sr&RALf$;s)F&}86u3O?r062}&!#KhEd>(-i#^u3e0SXd-{d`{T-G4ow` z#kYnx^hu-@$A#r-v(7a;{impzJP}^|NuB?8QkZ;+H&a_-_!zc5N2x&aR*N z>FU0o9R|k6uU>xq^x?rx-Nvb>8v++OFf21VQ_`HYY=y?+Ra&3l-@mb_DPzIPlM7aQ zb~658xOP>pV8Pn63aYJ2ka%+Plwo4J^XO5AVrxvQ!}=GhI~)X}zx;R3t^echtM^}0 z;$Lb?#^krTa&lsVf@YPKesOVPIak@2aa@y{oWRJ;cw=g3hk=%cL3fwY!}uAU@-A*h z9$u5y%W5CE{~)13RPNNN7mpL5dDm0s&z~3{pAL(|Wv#|Vfgk1F*E&9#!ewP-EWCN! zG==m3gMwu2?1avo;gOhr_pv+A5m95Fif&$w=7}HoPY_6UVPTh-pJ~c%Z|_`XRT;IB z`O)mco*tf0&-QF$j%87C*1|Nj@gR#*Q!s3qva_2d74{a3DK^^y~+G?w%fEclxL|M1Rl z--IF~qwd^U!n$^kO}a}<&khS|c6Mbq#rCk;gk@}No-$5kBT(fpo!&3;{t`*9URKS!d?}MU&9nSpV=R)`5WJ1(YNY}zMHDkf@6Y0Nx3;DCr$(? ztFeFhv}wjn&TU&Vx;i&1`tDRzKJ)0IeBX>?k&yzEQXRz7lYg&Yb?VBcLn{{__IVO9 zZL#v<7EYd9Pt1=OGX1$LZ1Um1ee)KRQ^)e4$w!5ck58y=rB61?~c{}@q`3sM-&X_4_VG&>giG?J`Ny$y6p$p6#55H?l zNN|w(#lO>@tu8k)iywy@YsR!0cIEGFDGY9vzwe_V)bv zou58FJ|2JOOwXiAGHPl}=gujyvDNVW;N$$oZY;J|=U}MK1IHOl1)0s911_-}f3AL$mi0&!_H1)`ET8r0NB=^GnJ(o#KBwz%WGPf^YUnmDC@`?F;IOvf z`1|+w`{Vul|Nr~@_(_UWZk$0vw@u8%F7=5*qXC5AvUg>-qLs-Rmu6cFf6BrvCq-&8(o{aC1vrqFpO!o^XJRzaq17 zPTquWE)z4q{R@mHC^H;PO-pN-)8p-O?pTkONg>;wPUe2jAD4Nrg>i3rk(8*oA$#Vv zh8ZHw+k_y)&`v6S|Nl?)?)&et@}YdnTJcx09RDY3$+G3V-giNWi@ER1NBOCL<)xb? zTvAem52rb->XGQU)~BB;&?hFvye9jA!O{ytpcO~||2wJl)&KwZ_vh#S#)tp^|NpOG z{#0SAz>HMpTS9?bmNe`LWbI6GNLp<9&Hjvx)-wJrE0PXOy!&v|k>zr0ns#taYV?=n zxnZRr!98idePCZBERXc_%qpm9urBQ1;VSJ2>4pC%{!;r;_QL*#(wF}$ zx+ZoDSUC8}|2OOsNMTDmmEh1T*TJz)s`12?hLx=KcU#nVJ_xdzz;Hf^FNPsE(HK%+ zg3OXy^5LEKmPoEn?wK@i&26Lk4^ui5V?dbm({f& zLd+v3GtGMAz;q&j{oj;E&EL=Yy|%(r{*V8!#ZxpmFFi;|O8LW`J9}oLO;IJ<9rv{kul}Kb|an2A5fww|!#vKXo8}&*Z2%38Lu^ zz11-%lNy5ASOk0|kC=EsQoGa2u)6>My06SjXt*ld16o8-%2hP0AzW`l8hcdhp@xI0 z4x-0PWQ&9j`l$X7Jo@ptwq$3UilEKXa|f6|1s(hUpJ#vFLYP_JG7~Z*ez;!r@mL(! zvw=^`h`BhYnbWkPU@5;&Uh~8mvQcML4mDilI(YTjfyq@H%seCmxS84Zq@GE5Gh;!h zhDxa3{`$pJWT3wDl(DZ@nl5v~dC>>!Cdmu0M3z{JdpvB6`CuThD(s)KB*$~31w|Xx z_=3Cz*Cg}r*z&-im&c;3_aN(sImNv@6NTqB?5lU52x>U~|L^22GeNWT*#G~6$8R~i zIsN~ie!xnE&0F$>^(sjNFA<-Hn9U1X-_+-*tYc|ZH#HEL9@8P1av*RHPtE$4kcQU_ zN;rOeWL7PNx=i14LYw=-r|imG!hZHo(<$I-H#l&EN22M+ot{$;&xC^OCQME&k&VGGk^&Jsvo#+hQ4 zR&r=3&x)Tj_9QhYOf;6bJx_(v=57D~|C0ayB>wm(>&gpBr!y;le|-FYd;b55O2OkA z2WBlgu~vZX#+2LfIeR177D+EW!F7R)Cq-}zQ!``v9wP&Rj^rdMnH@(AcW-s(5y+Xj zum1dp&+1*!(GsVb(Ut%Iy+7XnzoK%Y^z0K$kJk5Cim+Yz)Xcb;L-Fk26+9-BW*l+! zIWM+fD#=Hps$s^$OV7_s-rM7tSq$}@O5gqc|LyDl{l2?_(dAfLei(_@Bi~3K0f|`ef>qVT?-XdG}m}L{#QPk^e)iB z-Mz0SFRyN2@pJZ^==6^n?s*bI{r{BSHXeADlVbko_=Kh9jfKzooNH|lw{nXtk+}{d9-j^Fb5Z|NlGPwEOb*_I~sHe+C8{{`^1BGSjm0q=C{@)r`l>c}~Re%zyCZ z=KudZ=J|E;`~Lj>|NsA+Hx@sCC}^L(=*68PxQ553H`qk4vX!ey;ezOzh=T^1LWlR) z^Bg;-vV?!jN1ik1wB&mEFN3F~9(HoG|7xIJK+j&M| z^RhSZmwMk%KmYH*$&)jt9MRT1*)oyi!}|uO;Lq~TGOy#TV%|xXXfiV`St2oKj>L~2 z5?0od=jN$yyX#Q)u<*0m;m5~ctb`^9P>pP4Q}yNj+vqPDZKBc|2e|@2|Nmd#;U!Z$ z$3COB_y7O@clZ7-KG@X1-0yEe!Gn&TCx<5LbZB~SQ0u5)u~h2dmaq8%OF1nP($YG5 zQZ8IiulUlDcuJ%#`?^O*>7QI~lc@rq-bA*YpMNnGS}5Po{PFGW{q^zxlaBnYmk{p@ z&=RrP8=vy!{r&r^!|Q!!eJMQJr9WxXnVB|ZYN!})~jiMJOHa+k6kXG%`W zn{fPu;9apRnW1b;981_#*fue>`Op6+Yxk%4VAJ~D<^L1X(oURwxNUp7^Q+|ves_!o zvbvOtC%$g2w^+g!SUuSzA?M75i3cxSJ-VSp^YH2EJgKQS*6&~NWY5?C|0V6}k6n^V zO@Q^N{AJ?z|EvD@=l%QpJ!j7FL_B{Hc)i%*fXJWr;{VR;9omZj|Nrsv|GByL(cAyU zYKfb>XkxH)xO&9{pp7VX11WBNWd}RLX$NUf%_|VUIt335U#P`;F z{jSkx8cjYinil;1_5Snodh3!G;(Mge9y+vO*|9(W|J%=SJL9J#85_xVCo}1VDmTZE z@BI%SySM4bOGrpa)cs4bsGs-nFne3}b@0SqiG+%ZL`6lxyt@u5Q9u9Be|Sc+OGn9TW{;$UwC&FS_2^Zq>k_jmvOeSiP|`T76$_WzX?5>uzlSiITmM! z@WjROgs(R`GwbMvVs+30-R*gXcXl5?aOB8=GiMYuP6Pxtrs^t$F$nIjoBQyd?HloS zNaTZDz4v$hrzi2%)oM5LR9{Tui)d=_UwOb>_0Rs&*Wd5%uK)Kx{mH{b5%Kg*8xt>G zNii}?+PE=MRIrh6pMCAG@YuO8pPjAe;o*tj_oqGQR-1)TXUc(#VF%(nc@C!>JaFK{ zfBVB17xS!7xUR&;W>oW=&Gw#i^z4NjCoWvj@bG1D+v(|&ar;t!{F(XizCD||dRzT} zi5W9E&a5>sP!NgPf9}H@$!~t3M#2C8cjkhVgzNottCmP?h`QjFySL^0{X&%VgKxv@`u|S|K~ptNR&BT7$9QEqMT%Bo8}YI;bL&lm?!M(#MCKEGo@~q zzK_fljF`6KR5#CpRXtl*UVm<`H2vf3+drHu&)dvr{l04X(i6pV4qM)uwl6*QX1UXX zb?a+=x2G%XZtu<9RrhN0JaDV$P~D^FyUYK_?M<38tt~J7=l}oudmOp+btM+u7L=GV zQ!+GE^6Hg{1orEOhqUTg9z0IoQSq_AgX8gR^Z$|(ptTn}cS`>I^Yi$jCYirHD^@l( z1{|K8xI>Mb1GI?dil#=iS65>@`{}C}KZH0HToM&_SoZS$or5ziAerRQCEj1(`ybx3 zZL9ts&}R4l|Ns7S6%nb#72?9qOF@gfe}1+vEnUVhzo75$@97VJmrq%JE>UG-hK0C9 z!G~9(hp(;<_;!Ck&$ewW=Qvci*XdFl|C z_LHx6AHFFwugPSRXKZHnSUk0{_38hK%JnyHEO@u0{QdkL7JH%+4_q`%ba+)|&@+LB zk-h%E(HzUeAHHZ#(N;R7pwqLC%~JEg%^h95K1c2AL2IHvd{p)`hx*5^;nQpJ!=Il| z5Yzco!lqPwxNq07dG9AKS@0Htj1(T6 zlWjB4Uyvg#+&16d(~Qyo@2CIvpmP8E`p3IFg&J(&ivRzOYkq%=5BOl;=62@iHo;js5)u;k_9R-A@mT#i zaP+8x#t8+5hIMS`btNkpY}VS;o+@}JvyZzEnx86j-d$eKbAO-Wb^j9|+4sKSza((5 zC%k8>x!8cAg9#H9JR2R-1t!bQd9r`6%)DiEdvD1zyH?` z|L1IVxu=Kc`}_aTpQI$DACKzn`2x;`Z>0}^ea+)P&td1GLp-~7{W!Ksf#dTrS)0Gp z&(D|SlR0qxK?1BsJT%i{UrpkU3c;fmJzqLByaR5_N6b>_m-{O$`j_9_-J#*(-QEA~ z>;4?SV9EumZtUvQEb3$o|14eB_H)~Z}COv3y&$RgWFLg&D^Q|ea?_`p1vA=$*q|ULW zu&=)S{lE74_kaJlKin{wXZjd*>T{*FViW%-^({o6l~sMAvW~{ z``JZD1&t2KSp5G#Kk=zoi|7t$ikxZj?|*v5uPF~-haXZEHDlG_Y@PVgUR-=p=$Ru& z6jY8B{QsA6>Hy!peW@0elO7&s*9ecA6JTfCF25@M** zgyQ@AQY*gmWogWs{b9?ri#4C#%a?F!vlti|ZP;XFQ*q$HIlm~r1*ShfFkY;d5%}<& z|M1(}JmPu^Zn}{X5;e6OV-6ndD7kI&?_klHj@>mgAD)wav%2~M%yf%&@##N4vD%s@ zlr8(%yuV&@U%k7@N`;?~pZmAD_e=cyC%fUx$LIW@4upNZkxtYR0ii<+793cx?AU_U zrxg?rE?9f^!u8`HUW_>|K0<-|hz291FN^ z!kg+Dx=?3^m!`E5z;BsioEL*Nt2c8!D zn|>cy%DSF!*e@ z?G*fH-J|^Xr-@}!tl+`#_h(F5l)G_SZt^_NrsZ>1GAlb>Ht73eo7!pUBkUE+)x0&L z`&x!?;DqNcf`vX3c1)K(M~JQJTK9WV=B!C7>R-ci9^~AuFXLq{tu6jtv=O`;Zq3|< zTx|{lE&GccdD^no470X$=kMWvpd8WSy!)ktfxOZoc@bB>kPr9wB_@`Yl}X-s{(ax! z0~SKZr+a;<`g4zrlQ}?|n^_-*W?wxNF?ae03BT0b%!NxeY^8vtJ>*zoANF^=ng($=~5>6 zQ>_jhc1z~kaR6!kg_+TT`UU+2NMU!VgFT|^#c|K+Hrgg`? z&zmT+?DD&8z+=Ie>N)T7wlB7oev=efP8StCogcupbiyPJB@PZ}8~)QyUfbTTD}P-) zr<#{J)OD87afxHDe3y@WuQZhZTPI=HdD(V%pU&=Tvj)g0<(#>O+)V)j$LseTacoJ? z=RLUdpkhngG`s9Cfhrt@LM#UzrS2&#bN1uAdLi1#^VP9t>30_I>g(V2&GzqGrg%DH z{qoM&9Pw(*Un(}lY>EmDbyH@Eb)O`_a>Q_-zeJFTM(dO-ra?)!x9EB)vP3;QK3!HG<%ERETFA~TjttxHzf!h ztLKthnzYNr8H%)nhV0QbtQ9cJVSgBZ<70v)$M#eQg5c;L?J~E=WE%s+7uMd-|nwZ zk$i64%oynwdFlP;`(n9getnxW>~93;V3>bw5(1s4y64Q(?xQw8k5Yxa z9N8Ur{n7B4xUI7J$&;cYk9{W!UR2)_JASj1RYvmI&kjMSe_wLHRRvzZVdmK+Ai-9s zGf|IAroD2R&d#`*YHf^)5n{&zpS=i6ls@^_zb8D@rvEl?tb6EEmCL0za=&kW-)YL* zuxP^b8EUiKLUNGOK zqvmImV?Bdcy_$aEfHcT2Q|9<`H#rEj%`Xa^`0bkE3Fl`glOOPZt1o$b?O^hZdqtXJ zLaj=Z_@rwiTK0ZVWP6snZ^q2HCnt(eZMt2sKg`NLsq*M zEi!ddU~z2}h*_UeoBptwrM_@K^ZCQucE)Vbc{D>qiQ~iH=XZB){8N**Th%vmZN&Qb zhj-nNt~$2TYoW+tMbDU%inSl;o9GY=2EbP1i9y5;Td;9mzD zEYj@WXT4OJ;$Oonr&g&u{ru9y^L8+T{Ig_^EmwmCkia?)N9B9{G6tUid_dwW{+Y!sYs%({iNS z4qCk0wkyc$*Xc#u|1UlIr{3y@*}V2CGt?X%B!padto!%k@+K4I?_b~B^50kYotge> zmd)~2U#fr9m1G@%_t+%U$(+~W_979d01*qm*mePpqXr639ithRsCb^68lm@2=6i{z z_pE0{r~YvHgUc-?twI9>gP471K0IJdGU4$*+hqE+qr*T#+BoM{i;~CP$jBX6=gi^x z^M~jDKEn+g3?d>8GqQ|0I1LL54K*|jUc3a6e}1#e&OCZwyp^YK*EY% zEF3DnvOzbADw!49R2?b!XS3zz!G>FZj@(i>>Z5e;9*?c<6AMA{g!FbXv!Ws%`+B3A z|K}32PAyo$5i7J{`SFA_HeZ%Qhj^5fIC_12TzYqI+-P_p+$B2FFe1{>rshP!SFOXl z%X$3g8OYe2DEMQ+tgnCg_V$o@b}8xUAHKO28b}?O%CTY9pYQz-f0ed1vwvJ`bR0B1 z&hY>LoH>S|p1`xBvY0bPq0tn-~3EBC}=jrP298F3}m#1oqY zSe9t6>%OMK8F_5mLes*Y_lr$`=ZR?6AKhi-d&BIo!)>F!Gsdyy%QA%T|LoIP^nT)} zr9U$j-_6gs?>BXkPUfuoDWKNe|NmQN8}hjZ2)Ochdff|tA+ksy$#7b#GFNBoiWj0K zTvJ&&=Ed6x+J98-syEV}vGb%)-m`rMc{crLi+&4u0pRx9J{E`pccWW=F$Vjp+S+XiaYRaPb znb9o^UT@IhX|Al>^lJSMy~?`%!ShNBR!c0^;M(-&tL7@uBDDYi*UYu$Y7P)+vFEOA zQfkt)X%G`LyLX8BiGth#Wsh^JH=;V4Cb&s=PL298`I)0ii$hbeZgh_E;Y06q-ta#X zXnH->WNPluZiRc3+^?8!m#*}?og{I?tkz#3VHQW@>qizj((~GvZCv+b-p)EXCx1TCA*Z5{mYa?a;=hn^>v zhaILZRX-$A*taP~#*pp(#aW9EM7{c@VfJp@Bx4nei>fTQrmg#Z?{gR=!>(1>SKgoa zyX-}PmGZW=|4!tb=wVjrn|PVs*tjHyF;|s=nR!v1#3jvU#%FbWd>JXLR~s7`7@Rm| zWN<*O?ZHaa2m?1k8w4*sD;l{41i{v29QYBrXfB#Y=_RM&AQCY|D z;L|*t+?S24Kb&Wl@wNF#9tyTsv41dYvXb=P>f;9aa)*1RMJz=jF0*X6t3MYI&As4bg#!aik5&;9+9)AUkr?4A82P#-#+rnLOWz1`Cj?^c=Pqz&QZkM}z-rY%j@%E@uoC9da1Z3LghoB|RPfqg4+fSN&>7gputx%@|HkUb# zj7#_XwYf$xi!*Vw*)8*m>5?q?t~tTh^}??IH%>(<*|J{$`i$4P-v02`>?@JtpswZr z|4PL-Ufj0_?Y0aFmHhN6;MTnv5B}Fr`C4Bf^!kdd)NgyYX-7p4<}=mB?l`h>npj1b zfX%l>qG>;BTV`I7C|K^t)}!+2ul3~PXdD+S?}}lFngP8H)#KAQ&Ye> zvz8@nDGgye4@_OiY94jh#X!w~QF7%g`)#xMEUg~qITvevJa>4#(Al#t)66rJ{{J)l z^YgGlGTY%@r7Ajts-SK14FCTx$~f@zWpLX(IW|Q_Hg=mW;v$|6UCi?=F5C#%9k}yE(~OsGAq`hq)=XvfR0}xTI%BCCv!*J$(f9lI zZS(CV#dH#8nDPF8_-zU(MS>jXWS5$fGGSWUiI~h)zo=;4lLrYKHYR%brf%5o9`nC(tKY$?0TKl+ z_C;ZB%SBIdi#Fx*C51d(eBW`)}A{60_V;xSanK3_UwbFtt(O`3QR0MglJb- zaUXG!Sf|r{ye7rL^N~VC!hx#~{KX26^~>|TzGkQscc$Q<&D=CYsKY`ZH86yQvDMbH zm6x|YNqHf;`p0hFA2&}lxol(qsLt1PYh_UJLw}hJt_{;9L8JY9PF%QpwBV=FOzwXt zKtoHQaL#?yz|6|p_8!@-dezrpK z#}L)~`wU}t9odj0dHCJkD>A=SgrNQkew2`qkdT@xAea~soc!TiT0nTZfvIc!^QP6^ z-0VlV9|bx5QD8s#t3%;HaKeH@!^ubY*Mr&$F?&vINMt@d)0ij!Uc&yg?rTu<;vY3I z^YO7UGO|fZvUz*6>Fcwxva<2(H9p#pw+$q%E90p6Id=tiSbCRWb9@1 zml4-D%DI28;ETrLx3^6sZz(~`Ua5v7jXW-{k`)yaKE9Gurb?QaO7ieZ9yy|syHQBl zbqUj-UePtF%wa44uXy})hU?LOd7k)v1~T^N42snbPgW0^r=$qo<0bU~y!@3XCkM3f zYtkehCMF&^Ii6p?c$}Pgf`WLOnlv7(o`~A8$kPM~A_VorW2ihJSvs`nsJkFI8SH zedZji;bYYwjMqdO_V;&Pvv!@?l@N$(9ySsn*jY5fTDg`_s}Y`Q+(Bb=O2zl~9KYABBCVj$gf(l92G@ zQ!8kM-haMC+@1uBiV2|8IK=fO?(LQC;bfA8y7@{cv+#xu4*rpXf{pLG9zRODa3#gS zAYsC^ws*@~=Bzq!u(7yYrS&J|Nl!RbuvpHcr6|l_M$IRPmj&ekgc?o&DE7nP%z`;xl^YOoIQKsG`Fjb zjmB0X=8KuEn_h^<&2XH=`eel!^ZW(s8#in)sQGuM;IGx;mzR0g$AJpt51&*`c0Smt zn|%Q^UkeJ5L!He00zyr4Cz&$$Ib^8@2C^k4zUV!7<_wRH4$qo38Y=AUCAK;)j4X~z zA|fUK{Y|d;KkwmP>$dds67y^lEB?z>ZTP>xo+tml)A^VdnSH-sUQSrA-vL@?2r_+M zC$q4DpbdnO%QA^AWsab}h(45^rtlpw*nn1P*v zfq~)we<{|*4CJIHd9|{-!_I@()a%p#=`C_vAjvcg#_|8tlH1r~;>Fe!?Sv1!A`dl^OZeW_Q zv?X%a@imb%TC;i{?tNbRv+`HrkIzqPpZ`84kvT0*QgH9`SN^>zGed+ z*2w~@UMo^2PVD{vU$K*;_TD{KJAMB7bQ>?if3xp(^XSeKSycXtQBKL4Oe?oLhKZf&tHEuHP{jZ?qcD=S}naYXoX z#FYi2iV~V_4qm_h%g28E|G%DxN8m`qk_YlCNB{i%{Qvv=`u+9)|9^gde{=f(^>Kg8 zpPjj%ef?kUj}QOPO1JULvzcAl2+A-Zn{_&y+uNu1_j?CSi1z9_ej~!eu`xbsmV4B! zV3(Gevwz!jbDQvSOy&J=FvZbDV!~sg>{$oq>J&_qUtxOAe#;Z4hl|t})Mse@|NsB{ z|NsBZ^Z&)}`0)S7$NKQ~MtkbdfB2$#_;^20{ePoBzmI?T|KGt4l)eA|mpXCe;Hgt5 z&Y#!RJEyLxnUSWpXrZEl_GxXsbBZcQ&K&$Wzmrq$1y96cnPaI52@OXWnYX1hsw!O! zl8EX_y8Mq{_vM1DpY5yIbayIf|6^a@kf8NnQS-<5_xJy+`%9dgo4liL?!%M9ZM(}Q zY?$MEhqokzNzvoY!Xqd9xu$$}59A5Fn16TcyXm?B#dGKw~RE>;O zwr`sl&(=KYXpd-#nVsOR@jqbepZ0Lo35iqv=S#%xO{(}a{T>r@#Q_Yq&aHBpTh?1 z*(}Bjj>%th;@Ir>vws%T1uv#2ZCcZ`UjF~Te}f+X72XM78mD>H{ZISxZ|=iKu5DLW zOX|fYRs5Xv@UHbOHyMb1xsL?Io?W>nx6jnL4pI9I9pFsV`C#@xH-#i z1?9*8|NsAg@RFq0tOH?d6|}gw9BFhmoEF65%oxSnw70c!>kWssF&B^CBKg&fPVw>lqgruY?Q=e|$5Kvfpb=%gdQ>UdRB_uSsax!nb z)WjIRMq#<*EU(H|o)YM@hlq;^n2G z+c#gna9vzbAfffEtE5E0uDKtK8W@ca8ALDe=;isN%A&+nplX-H$?YmrI)~YK{o_sI z!Mw~u-}SexYGIrfFyZ#X3{UpzM+ct#`+xq!dwEdi?3XiK6L+@Yi^k!}>OAuHL5hBi z5L=xdJ$RlT93ZfHTiEx{moHtN`s8tP(^n742}~0ix6VpGAlxnC$zr?vaI=Ka!3}O~ zHrFO{^VFQ=aOU!MI+Bt5z-Q%+DpieFhOhcS@tm5Fa3E~$ANP`|fXS@ajxaL*|7ZB; z_wj;HDu-`v<@x>1(5BWa^9-n}|NnoHN#TlBT|0NbuBdqN;#C(n_h#ouoq}0fMas;Z z4>+7Y5|Pa3{k1XV?t@j?AGT;4a!ar+S;gWVmEs`6JmXtZ?vy`=C&oOuv!6$!Ir;wv z!v%TD5jhHzKeT*$DSWt*nJ4?2LCn50A6|(v-<*6HQXVK77aABEa&Q*jxbdQ`T{=4I z6gTHaV`Jw}hRQ`v4!SlEJXz8O4nGpznV~o#yRfHK;PSa2?Ye(mPi3O2lW?cLID zU0u2I^oauJYbOp|V!X04Lz6k&B;lHJgtVkbC!1}e;}vCdafL3<(%cW`$`dwQ>mAMb z%DT)gg>^z@)tcDIHLC0n)7Y=2Fg|y>A#Z08v-i}7a{a@*N_o!DSKcDP3AN3#kb}Fp zy83lW%8!hU4}}E=i`BSYTA7qD2C-FT9SC0dAggQ9RjD7+8aEzIV$9BUU9suFI}Z!- zQWZJXWgC}@rbN1|Y@TNE!QA@8gcCjX^`N50qGm?oEt9s{=8_&!+u6X$MM<|XBg;!w z?bgvF5uZOA8X6iN3SwhcJ|yA6koYb3Vzb=hmknxta=%VIlJMj*=1t6Bz;)aiph_jQ<-Mghrmj0BGl$4N|@!%zsYN^5{7Ms)uJ+4bW zaz3dz8a3^YyRX#`B?jgD$!G8Iw1^0suutFnW9NreEeVetJRasxc=UclLg>%;@`soE z^HhH~(uq1!@Ym|_@%|rCiV)jsqIU{581PKC;9Aj;%w)Epg>{poTC%~kg=}2Mk2x&( zIQOY~|J3y!KTk?6di_e^n8fl*xm(+mGS6%ycj!Rx_i485>o3cN}URGPl z|9G{m`sQWk?_^hm{o9r5Yj06f!vD2m=TBGF%QLjp?<;t4sfYLq?O0pHtb0Lurk+u& zIO{An#Rcy->O_R++G|ytj3<*}&o6XI-%1S!CJWDJsA|xeeCd^B8$;w?Y zMTTd^XSEGX4V-R93ZgtG)D)7sCr-}s$+f{!<KZSq~)Hm2Vt6pv5QN zve>>JRG(>RCG99+{9U34PHahsL>@hR=HdVS)QJt(FHK5FOl(`qtfs;ytgI|NhtZLT z$A^dKjK`(`-4ic4)ebS_Z*-`#?TO%WaZ_nA^-WfaImM=aAkOK4*+aua<&PEl#SVC? zsj*1=a?NZE73+(LkPs1-ymd<=Cr9GO&5ah2-0QSH%*CZ5I5>9FWL*`NA2Sy4@;tFf zbJB7Z*c-WIQnTR75C4y!;K+A#m~^;)Mxe%{9opUhB{p_1*IU3Mp!X*;Q(3uf`v1eX zB{L-1>$#bIWp?gmT3Ug)!g5=#ZD7+mdapUWp9yf{wmQJsu02r!^Ls@*8%G`AqHEakSH9+Oh_v z-50F%XBhVGKXX7N#X-x7g;jFRT1mV5w2J@p9$wRJf##s5ZI&USu1?Or!a_2vEH`G% zT)0@mV<{7}56dApKNFt~O)Qq03fYat7t`Vl7(X6#4{}KUrvHNTUvj}i4&Q_IA*nAI zvL7?A6>Zx3lrcRax6)xk2S;-^x3dpV^A?jsZjfx6l$4M#VdBH`vTMBj+ow%PNKQ?4 zD&pF-`~P7s<|qF+mdO3=wvzfNAC{^V#Azfx{rSWr4-?rVzcZM!O_%C<;8w_KW_l!J zq3nIQt*uK2T@k{r}i~e}3QD8GnBM|G)qK%BOgEMa^;yof;TC*)imcJvTRV z-fjVgow`k{(;RQ~E;i+w-YUIx@!zFK(ar!$0LYGBJ0DB-kvEf zc@OUI=h@-FWA2V=phEMD-x; z7RG39W?fBz^#=|%KD??8>O$n*OMyB+W{1Jeotca zvnuDjk+DoY;JM|Nj!joLsWdS+%isVPr(4fW>p!O4%*@SOO%MP43`?DkS+Pb3 z7Rle**08kyzsKjLGdAaac=xVeg>9Zrw}S!f&oxNBE)d)yw&qo2q~xN-(1z)psNGyG1_E<#{C@wF%X{{f zf6fmMT=H8t(Zp_+vSjy*jk~z+wdR(YEs<(dG5%CGvw){`lBRU*ZtZE>@7w}Ka#XtgSWyje^GIH^vUn1qS=EJoLXicF{{t1{x^E; zcxmU-Rm%h}O+GL+PI1Ayi>3DU&9A<{;@rvX-!8&>N(j>NN@^4p`Y`E%{hmJ#TE-F& zlw&u1V$R}YKDB}8+M3SD@6N)f-ruk1m)-UK$4B-{Obnbge?cK|{LLGmW5=8xJ$hqf zbLK|Xb)JgH626SKhHa`11oBLzTtFnZXrS6MwjZnAxGzoHwf0?q9)r%>IdHT16(~<$vPL3#VFjC_VZ0 zb^81J_5VLMFFL#b{{H{A5HzL}S$D)hn%q{lmaW>DK4_;ngZhZDl^7bBQzOHDPI5F{kTv9OGCcQI>dTc#HtgH2w`0%AdQgYLF%Ll$(Kl&=zI!@HT zkU3p{rA&i4=x1Y&#xD-o>&wPTK0Z9S5y6pT<#VT<;U?oZk)3k>kXFv`~QFW0)>L7r@W1r88~ZxfBXLIZ2$9fdk>05 zpV-QLP3fV1mT!wrN95)N-#87^i+ij0=iU3$TW`}W@!_jBv$i&~WO`tdtd9G%rp+78 zd?#FmTG&)Ax=r?{e%7XjaKns8^@*ZfX;O#Q#7sE+OMi{sum6vZs+*b$PO9za-BbJf z^P8LH8)iL~cl)!@U9r#NU;B|(2Af^~7O!eIdUyZ-=Vxc*Po3ZM@Be&9mzFt44mK=y za`fiPC|j7>4(dxY{QsZibna14&xymc3MSU`bf|u=pV-A)v8ZveJr7U4!7Q~Itz1qq z&dxJr?k6|QxBnlz=f}Tez5Nms+gI;%xRPhFWx>nT1Ir|g?;6uZyk>oXgVK^h)OKmW_`<6yqDr-481Pr8H84&z1dTpi@4j;>fTQ8MEucOwhS z4N=jbC%@YYB_t(1c#=}};{$h}#bF(W3#+8hv@|_S@QF`Qn^K>D@6X>)PvxzxAJ4a+ zvW!12jx8~fjggV7*HQk!fm3a(U9x9E0!> zPlltW?%B0#b(M6^{QUN|yy#Ca%fCNA&!0a1>(kTnJ(kkpcapd18gvUc_5A1I;mN=E z=jY$w_HXvob~Q{BYhI-E^OodBUD3n$YmdHp2kDHw=j@-kzT3wZ*3-wKoh7-5=4vteE8f(`65)Wmpif3Eg`jOpP zMsOAv*B(YjttoAc%*@Qphv(b>-?Fvx|G&TOQj2#;as4-xY2Iq&Wc*N`k2OF~UcPyo z8TZpzQ8l%;2?A`KoLxMQueS*p7j=PR36vU|Ow1COuU+V(oAX1)Tk^wydzG~bWu6>j ziW-mp{8_VcuhJQTF<|;F7d<1#0}fh7I=Ma5f!ks2Nn0A#JASs-@!>7NnWhST8#p# zH_X`hk3V{rgSJxumrz((+oL39XXik#n$u^_NJwd&J$keykn53SglK}Vq-uv z`4b;C>*V$6&6C!x*=XqO#&+(a&nCu+t{g^?fxe`~gdgnhCmmpOemGgD!0gb{|Njp! zi}I@7F{-h`U znm=n*VuHeysii+YJ^g;9dR~9Os-B;of`e1c9A4$Dk5|+pyv1W&Io2^TU!G9U!}C8i zMdI(Di1{`@x7YmCf8_ruI5bs5C&j}@<=$%-#x<$ThOQjU%+Q)4T>HbS7D2PGOp?66 z+owoPSoUh;70v&J1^*XslAO>a#I)e!!E@&ll9hKQw^#d~z3Dq!Zf~u3#i{6!DY|NI z+cpGvhE0>rk5gZ^?7;k_4fn3bk6%(BzMQE-3#|Ig>dFaG>&`R(od_f&uHZftb)>N+@hT(As4Yy7mdyZxNETGAR^7sGl?)iCtlInlGxRy9xO@#vs79Co! z?%adNhabE-qu>?j-fM7RzSM^$pqV6w|NpndIC3#4a>yL)zF__R|MrCkZaBY5JLolC z)W6K2V1~oomOUx$N50jZ`21VPsonF7Qs-lfXl`Z{yF5YcxP8u9KDM4K45so^8>e=={X3qpc&YTC+@}8)pS1reG3s(j z7KpITP)pd#k^RUa%pigxOD^1W!S>vU|reaInflfvR0g{>zXv!C)6Pv>muu`cU*>%HoMTSPVrMTvg!Vv3SfgU1a6NUU$YQrG{em{7ntaTg=!G zGBNW(tz2VwT!+6Ps>S(<{kCK5>y|Vu6AZU>6nL0l|G-N!qS~ZllHfAk8J%ey3`UnA zqf|+bpXK)a?z&cUG^2e|NYn$R!or6+YeIR7S^`%uIyl+F;IdehmE4|&A5Ki51~tR~ z|4yDVH_rU(k6fgn`|QBvuoHgrA5soB2-?ng$!7e?U};BxDcho}JTsY^8Kccbi#24m zrNO>x>Mu@U{cpI_A}#bn(8>B7r5zqIo-CVn9KBgScpNenaJqDY{a;Gs%T)ee!@xp{QKOrVPQ84d#vyy!3YPq&S#XW-$LaC2P^C z(|6JhTJ$=-KjkF)|M;a3N3MA$9`KNyp|bDFhxhmYPuKr{XV=$Q_jc!q8Iqn{Yt0AgZb^Qs ze`)RVj$F+X(aMS9TK^AkyUQCXbNyW;%gqgW`HpUFj$U1jGiN?L!ucs&LHPcm1EG&0 zW+ojHZ2C~dWVu#BT#IEg>$<|6fF8p_Igig|j&Chjqrt2~1AfaI0JW zD9@r4M#i2Vo;PoJ=FQ`Y-)HdVe*fQlZcz+pf*@m$Nl6a*)BiS4{dwcFWLj*q;I9+E zlRvsX|M~eopZvdL$Bt#BA6L}x*PfZQR_)(o&w?9L@)5$97kG;mD5xAs$U1f4@L|xj z)!ErguIi|i#urXi5`dWN^d(F}Qc_^LlA@c@1uL}zwsn1+dZ{mZ^6&rCjr;TQ%}x2T zG8sX^7#^PbiC_LX2lXtQuF&-QZ~H}O$Fc(p*nE6={QP*LqIiz?8^5`Ec)@}L2hRF7 zm94qTBAE*+fI->zNqA4&(fSI@#wullb-QHzJDbk^`2YX?mdy9z>;LI!pPn@Tyt>Mf zhQ&Mw;!PJ^Rm*rAyhioDfQO`jg5tpmGfo^hbjYAs?eJ3XCoZ~0)ssEGWJooPLG&b^V&oeA3G1yb(TDQIR$3AT-h{=b7E+6jybg=os_AKGpia)JO zdcME^?=$bu%4N?E?=Js;>GICSi!5f&EL2rnqslmO>6HuGS_X{#{LX=b&Aq&bpPl8g zwB&hzU-)+0fmJ;{Um*2aQlq|Y`+6bQT$2Uc?E{W`e%xYtA({s?%lP&6{WCM?tNZ`^ z^yz85zCK@VZ5SgX8(Ukw$F6^g60J8s{b+CQURiO1xwMomGqdggf5~fW5)+b>SFBGD z2vRag^D+<=m~RJhxs$Y=d**~g(^~$x3m?C><8Y6k$hUs^|LfxZ-o5eTgg&|FyoAI(p?qgYq@bZEo%SKmN>ocz{u(`g0nanOR#C6I*TVqA!p9 zS&m$Rb+enyEhHrW8!ouFyV)e=-+acWe=KZdqkO*n|NVXc?d|pF&)4rSea-K2Dj^}E z;{U(<6P4}b_xxy1Zt0z&#k_f?^_>HIDtYAX9B$6HH`a+e6A*Q3L%luo$MQb{K1w_m zRpy8R(1T&K4HB*hzIZg@5sRntx1`7K@BbIq`!mn%?de6X_YK&K{pS6dJMrMTh83$e zh)HC`JgH(oB-fV7#1ywb?ZyViiF`V`l6Ex-H+D}44UzQlv^6k%IlR8GH2&nf4bZTP ze#0EzvMqzz+nPC9heg%u|KH#LKR(_+*Sh}9oS$ERe2hPHhG*HbHU9Jdw9eo7{jD^! zwe{g^Yxf%&-vq@ytI5QA4<+Z_8$AU;bHZOV4zG{z5Da2wW)l~0Q+AiQwkGk!$wZ5? z9z~-`2M;tjIxy5$%F7%GXL+QrIDv6f6~qxudfy!53c0L>ni(@^t1t)6Yz%17={xh| z^Yj0!!|QL~-oGa9@7#$K6>a7mzA@wJnVI&-`xnd1-v&!GUe3UfZX95Sf;AL1e-59(MkJI+34h ze|=eRU-xJG#zbG=ry_!hH6I?lKRJ2-lP5FeY@Mz!&6?%2J@0R7Qc_ydp*JjvYJK&} zioOm9?(gTBZ*Oc<*CX)oQIdgi@`I#4nK>)cOQj z_SL1{*v@}&Z+X9C=+uX&#b4ZXm^O`viHXO_iRboq|LE_k6+Hg}>^TSa&Qu#B-rEsl(?FD2?j>n`isG`tdRO>hJ%m zetlWL-8?!=t9`PtHaj~r^Wp#6;r}=|3}kG6%x-Rec$~jQw5F!Vz5n0WkBPs2ezv!^ zW_~xtP|1ti;sA+{nfB#7At8<;gD3@j`RX3S9Vo93jab>W-pgD4jcP=tb-Fb89Pvg^-`g8GYgWag#Wxe#cm1vZ^+jG^|M0`Z^&%oO=Gpx{edGv( zz4)Kc&;LJq)Z;(zPxaql_VV(~s~##FMjkl$_BK!ceIuJ{wx^#y7<~Agyfe|+vr|#K z`{4Qh#IsUuywVbW^V2H6^?=qk6+f5g>XP{KwR3I81W|?c0-(VB|3AsGuc&9~;{O+S z2(T^D`T6(v|L5obKR^3>U*6q*Vf8v`34`t1i;o}K(A{;&pEdQz*Vpq~SPtLYTYu%s zjBjs$pFMl_!&mJ?noUhS_xJz*|NHy>`}=z~FcmLR=VoJTv#*!fSC_h@kU6mE&;Rou zewmhpN<6zIpPAV%_u zT5~l!2-Lo|QqhRMaU=Rv!o%L2J+tQ>a^~R9e#jVhFw618agl5X6-D+FSB$UP+_Q=M zcx?VuS+RWIs(WV@AEde;v#^_$mU}z&b@epX(%QAFxQiEv?3wv&d52l@yug-qyIU7p z*eOjEx~6pA@civf3ciIw2KCYB5+w|L6`K5ym+_@?W=~X+|JCXs(=6$=P$Pk7OWTwe z)dtNkv_thIt)eHzx`kvD-_jhaR>GMmy>usvP`3Fq6cyFGm#f-Cu zn?wEj1k3bnnVFfFZ2a{9>+Apab$@Q}{A_M)e0X#E|B0`s?5~%ssF2uK^Yi~VT~Kj1 z-~NC7-(UW(WOpCt|MT# zV`gsq{$6s+R>^&}$q$~puTZ>k%R$=-l8vv#D)NDb>dmg3gnVG+`M~tA;fY|4Nb|vo z4?H5?-xu8f?(!RpqU*W7o3CvRJSDX8 z+N9@+p{}WtqOH~OwZ$yvfv3)G{$i`n+4gYP1rx>74l37nO}a5-`iT$sLj)2c9a^>> zo%m+P!5)*=Yx4?!d%LT2K6bBtAwO#YM;X{Xcjj*7YEY19@#o-dZ<{7}YgL}=oys=_m-47>B$c04XPcEX~| z@XOxY->sSMt8+wzPhwfC$+hNal*_*_6&(viZXerpp;W(ZN|MA0qeU;e*Hm2J?_+;X zom;d5ywt)e@;sxY+Wx}F=XaIve|GNg!6Qc&tUM_rcP=aI)S5+ymaI9WqtH-O$r^Q@ zDRAMRzo$QZe7t{s{QuZ}fBqkAu77_oaid(&gpO|GJvAp6tUdeTpY0YuQDNn4;fHw) z3=M1koB%BXeR-KDCx_?5vETdac}|_Wz*wrNSfeH|$0YH<>D+h!<<2PrNxqRgY72H*Xo#Q4>^{N+imfwq4MEG3UE^oD@N~5H zGUgpP_Hiepw0S}j1E-;)hm+L>rKy)LZIt_;5P400-+rCwx7~402EK9G@gm#bvZhVz zR1rL{ykxg}*+HJ?9M196UT<0x`tzB%QHQKclBu;l4xSk+$gqm-PfP} zg(s4E&MB$}{bS{x>#TWf;=bkU;#rQ{B)?7W`+KRk+4`Sn${ywAvr3%bfs^%?xxQS7 z3p>KXMW6n z;Cy!1**2A#Pb(kj_Md8hfA-Tmt%shUb}TB0Ew`4rBk(Vr!%$8lv+G)lWN2&99KSc; z=bT>?=Fur|z{Go>u!#|<;key5;(g-p2267D4@Rm*cInTSsh+o!bW7(heQ~)n z@qmQOjyTKtmU18Gty-j^;k{*<#-`Q-87`MEnB;6<6L#c8$*H1~`%jn5Z+K_ra^wBS zdB5k*&u30PvXf_GeAA!1U&XlcXHSQWHLRJno9mE+fNTFVM<=bDBAP90FW+GKZ&RO; zD%#Q8t7tc&fm2iGh{^`BbwXjHL`6=%=$U!ITqUXyZ@5YJXvhR ze|nL|hgz9-o9mu;-rsy{VlY*8Hm}ozU3c=XY@NAMeWF>4(ZNkCv@VGxY*g^M;ku&SF^5mB^sO-W8qlH{xV*H``M8?EyyC=DRXT><(k`k zr=}L)gqv@3*F0j}-qhr*S4obFYt-nbico!-PbwN$W-(Gvxeo5Y&bN$Yvhk_oOWTGhv&3HyU8jZnTMB~ zZl8Vian;k3Me8)VmTZ|5*0t}r1RuLi|MSV$PpAFaAFX$+urMdtpzZNQ0~gIlGc+C* zB}xc1DQsx_AO6-zeZlLZ{lTG2G@~Xz=WdM=SA1brwmUxJ=QDxm>KXfg{q|jC4$fI~ zW^Dv{$Yp+lfRCU^a`LR-e5?Ot{yTClPPgTV(~+Pl++u;}Ln%)Eb^<2({TTVEk7!D8%;Zs-*n^onG2V$ zT7$i_X0|PNvjb0?{2z(DmkVV0WaqZ7yZh(#U%{iUf(yI5O5P+~^`1fuiXOo&26rFr;IG*B{l8bMYGBu8&fCj!HMtgbC?srG?&Eac z)bmzsX+f+_f1*L%t!ST;rsr&9C_6PE#-Yh6zWVjwpz2THDdu>p9W2 zIl*aF%!5$=XMfKaL_7WRYuY~P+iR1X3#PYT?$mMSxFx1&@$2}XiiF6X#n&GC9{%bT z`Ka=Yu~*`(E2fSP^Rl;UEM=>?Ip5Ucr+4i3C{E3lS~c@k9+o{WQuOUimTCVyQ)AI< z4(B$;(}zrEs^vt_{`Ol|afQ~ed5au06VCG}a2W8!eE)Fa-(kNOd3}GTR5Czzo6MPO z2ufV8_jegKMQ|uJ&E4B;{^5$*yaP@w{R=pW(1bzx|)ge~(YB zmTL0nmybZmG%DRqgalYr|H!&J{n7fN^8m&Jaxc_np!xkiN8s;<)G`Gn)LZcTx;0n9p}o( ze6g{$nxm_f#C>*-Zv7;E$w}%n&PzSM8zAR?Yr}k}^>+Dl&a#OHa4Ej?yCl*4+Q`>} zFY5ivr01d9;#Zj{-F;ar<+SFx<^Hyfp{Fl0TNkr4|*>0mev3UDU*H6BM zxO$c$SE~U}+y1q;mMT0-apiyTaBpj|`&=Chjiw9SKia-^uK2>MAG}a<)+fP9e|Dzx zy)R~LeQlYvPis#49A}O-Pq+E~`SRRa@=Go&i^91jfB6NTv#WAODW?jQoSL+2*{T!8 zQ=C>F*f@V?Rcy)a8EVG_lOv`n{Muu-*WbENpw&?0sKEsj4#)e8O=I0}C+WH#+{Z3< zeX7aPozp9(I(co<+4fR6@zL|($Kj9?6colS1_CbglNPs@bvZG8tK0Kmu|r;A+4hME zQo$|SldSnZTJ+z}+U=WQbo0!FCvQ&eQ+}{qxm2dzJ0~t9OKQq5yG_%X%d@)waqd62 zX6d?@R;#*H6weDP)d=aa{61I6aMt4hc=PM@%{zFN`Gvw!MORl>>lvEKliKD#F<05M;85)UBR#P#$ z&gXxR&-(waY~JM5Gsa#Qx0&!}zG^$aYuPG>HL08H$A%(N!P7)q-wRg+E!DrPz5L8Qu*0^@-pkeE zz|&%{tYKDYW%_g!imVrC)4I&u$A8G2xBeCaxWNC;EZbgwh~q1y2dKV-quz z=0%3@Nss)N5X&DR=QMkrmY2%5lywGd$r;h7J}rLmBe#D~>coy+E*0RS>QJ6~nvmkM zKRec)dhpgPNB{oD>C+uuS`J>A z;OO1`@Hl_l{~KXXCRu&s=@jyq;Al2EcJHYVuSA=Bc@J+)zHqSgL=J~!LqgA{O-3(X z8fs`7dU!uJVT{}I_+{IJ_3j%s{r{8r;6VbUjlE_zWS5(K9hYKQSlO*?g{dpQ+_`t? zp4wK8ldLS=9T6)cQuH?HaOananeCkZ@UGXtAKo&LWkQeID6^FI8a5U$PInMdRL%4X zyzt%QmqtXkR#96hw zjdQBzD~Rt^>fe-de`WTPl*k)qYyKYFr1G&a{JJ4eiezs0H4)Zx7fs|8=KS898WC#8 zChb1uki@mA`_gB1DD^3qtgj51r_2(ySVZ90b4`I)&VI6mXKaeWy`MF+?71BRM2^@ur@A$D>wttvejGtTWMJ zWpui`pTL9PTXain_4M3-OtAA?|H+6g`35+ZY=Op2yx<1Iq{r(|2nOBcXi}=k%YVQc zH0#QwYv+{0nliYabDTc3AwTWt+3oX+=Dv46U;JuTZTh^8(>AGiE}AJcAyu4-(_~ks zg!>dHjxB1wmVMLh=kFI~GSie~TjQF2Yg+2T>Yl_{O|CiJ2G=f_tl0glv{D06I9~wnQI8jNv`vGEHYBn;~a91o;t-`;h3M4`8?#CnVf@wH@nCj zT{p3<5o^t^<_k~ybAZ2d@z>U{eX|t5v*(L{IJ{<#S)YdM{mUji%%=~T*v|jBZPk}u zLdPYXB_^f*uq*TlOyHU4E+T99XSwGd^RtD|F^# z4m`3+fJLcPqxJbh)6A}GB1cPq{lD1#yr$s!0bOgpV-r^`()kH?;FdWM2i{*K=C`(a@gxv*8Z7UJ$c9L=SQ=2k8OCb zanxdwNN%?wACpj1Wtqh3U6WGtw#J4&v2;>c`g?!mvr`>fzBDCOkz)>Hl!%L9kkIk|v{&t(1>*6h1@mIWuU-0n#nkt(G$^vS%(3Na zFc4|pzw=*=gT~&5+2&uW&NW@w*er7J+}j1t++4D2I`YrROcFS>`-I~&HOq6SkBI-D zd-UYA;xopD5xdrH$<0z;qQdfO);@QRS#60PoA1q6I(m1Z_lN%Hwu%9vI^0ePK0*@N zfnB+aUk5Gg^UdhAnik>KXFutg#f|c1orNyhe&BFw3e!Fz8}&zpxigVH(&_t8EMQ1!s&+O3G+>t+O=b6CN4@XC+x`iDsShuO55K?9lazE}qkzY|g;n0d+08Q3MI)B7 zg`O5)n~JYqTA&d&oWY@J@7O6r^P-~H{Qf9k0YJWLO? z6nAq;NGuS3%EQwU#k_OQf+U{>s@!KnoC3t}T<2+4=uG&^S7h0^hHdSK16h36CKY{7 zYg6}^*jJza8(WF`LDq{`92n}@7r#98 zMRSg?|NF(RTFcB0MJLSC`S(Bl$KTnY4W85W1-UOl%y61t(Y$%%HRTygPaF_a%m{4N z@lcuW@pSu?ed4AWk7fk%@bIwn8N}>3@Zp_o(jv&V z*+X$_6LbwGyp_1|iTQFe%Vdi=-I0ufTUuvKVvcOla%_y$GB8?HV0hJFft&rT%!toI zGBLZ46}%QZyg6N?Jr(MNgFcKqF0r#LSSrM}Ys>Ww#~8y*f9&k3cz)o*k_nUTVpuNA zm>3&geenK%BAfNH>JKh9^QJVeZgEZq4R}B0K74jIXe(OHpN=F*EtaHsW7~ny90P*| z%e73*LK(vD3xzaydk4s@mf8~1B-t8a%#(J-=z&Mv46TXWoF7C9lX z=s}TOn()*aLDLq*|4V+Yq}csfEB{||$w7st^7lqDI}Q|lR{Qnypa3K)oL;Vd;3W{j z&9h~LLvr#jne5r~6L=3b2);YA;>cpTehazO9gl?0mh-f{(qH3x;{X2}r4Re_*{*aP zH<`ewoN8dCbmRYjm= z_?Z3h-fEuo^PpgDs1(xxP4I(ff1V24?3G~aIpDN9%uso%M1$$kgj0)HS?X9%^esH% zS&^mM#2XZ|X$P0YAG@PH)h4MCsT05cYxg}n-|FawV)aL1;yw`XA2Q-N*0A`1!m_vn zPZQ7BaW9LDVaSd;xQb!5)T3bLWnO>I^8NYAa#rRH3uC`WlW({Qf6`gk8JSF*|1@mg zU!QJKIVtflTbsK7hqqRcGVIX4Uw`b~OdWl^xi07b*j;d<=?AxTlUCtU&MisI(|?}W zG-IzZn@p-=T0??p%k?i2(V`KPxo3&an3g0aQ88uV(w_bGl6>-MH@0*>{Fck~+Yp+F zW^Ot@-%di}!eqgJe?Xzbnjv>MbhmNB5xuM3yj!01sdcPga^#lR(f37bnkr;?_U+M)QmKk8TB99Z@2e|=m1f6(6k8{7LIUeR1s zCIoR|(!mn*G>rr?iHb=~>kcTX9Tz@RQ*mQP%o?Q~4D0^%ZvAi|bb>(w%Zd6&S)omO z$7eAWbUyZyyuQxx&CSCFKMW3ke(o}NAuNsaaBtah{FcA->69HxqL(N0?C?(W`CP~< z`L>JKr*{Vz{~AlVbv*uD#~w5MS=er0`@mCg!j!`Ia)-aY<@x>H_|AU+H$RnN@w=Ug zea2d?KYzK`^z!UD8DZOLDk~tliDl}Byd<3`jmI7yCM{eFH zQy5?FSQi1Fiagw|^5!$NRCs?%DZ2m7&;L7SEAKePA9GYQtsvoQANK=);RtSqq#(|9 zER9!vY>vt;GpfjE^|dWpcff=_e3xTzgSX6~wl~&)zV#-)3TX>oE+J=|^y5=&i*PW+ zkxnzC4?Z5)XK=S8(yLvFYTn3@G z&STE5Y>141;HA35XgD8dNGH>pkE%;$@@;lQ3s98qZ-7}YWo;m9>P5uOfzo>J{)#jHfO1=u0tZaD2 z*Q{g-e=at?cSqN>{}Q=AKF9w5e-Xp+_)jG$$ok~dEox`YnH=~45@yT{nzKFPPMUgU Og7}`UelF{r5}E*j-wS^L diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png deleted file mode 100644 index df3cf2004312ed0ed0ebf1f0340cbfec7fd9ac46..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19251 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3`sL~37*a7ODM5mDF#|gT z0|UeV|5B`r8Po?o^It<}XJcpP0$D>7MzFJw^gD932JkS?zW(ZXYsYu{r3xDR%zkp) zHa(D0n0)X$`@NLNwnCl-o^G4Eu7`ep^ZM89WsC*i#Tq-<7!1l&qo&>wOi@QXwg132UAc6Ux5Yq!<$z@JHJND@5}kiO zZ`NVq*Yp2w*U==gYTtC_mS_ju~2tR+Q`-HAmCD-Z(5{uf>ZJ3yZ`@BI%yv~9+C1;*FpG|CfyB-nvOxXV1~d+1&!3 zIg{5uGYx*Tr+!MaWq_)~X`^+kOit$n+Ptzl_jyU~d|zQH<#Q7a7j6}CJ=Jr1%l&V4 z?(5HWuCJN+Y_tEW^lvBLSkFDYDPfjDAA`c8!wu`4?`Y$zGed+w~P92T!LC2IxL->oxR`XE5&ErXckC0kj8N9OZtmzRo@>~v;7zE)43e= z+GOdjZN9Jg)T@0yE9U z9ba6?{_-z*VMY6+3$6u5g)5V~&&|?wHrjjqfBnS65^J|^+a`Vdprx^|lZ)5Z+I}O6 ztPmH;V_R)beq83E=q{;}zx43b`lpP>>GAn%r^LHd++Ug*(UN?`COL9~&>nY=i0~yW zf;qRZ8=SKQhl13JBL|Ni`}6(1y{#>0X{n{KaH6KBA~SQKq-5Lu{r@*^_>-QLl#uX) z`~2CSp8o;HRz*CHlQ_QJVl-Cb+-ehW#?e^J`0v6_hgA|Dlf~xQJ?Lb1IuvvI!+ZI| z%l&!Q#~J?lb+q88(cyP@dF<UI-qZ zXUlVay>ZQt10P-rGyk{fI#R9|wu+!`2_aLxJzCsSaBf~Ip=XV^{$W_IJ+KmX5vcqx2% zdp^%}ePbDm0|j5T4mY#&yuWAo=a1)hiDamK?vEZkOZ)qOzM;~gJ6A(}V`D=+JEu<- zjWbNv{J(K4=fA{mr=(TVZ%%T@RX*&fI5UYMJhye8MS!ncAdkP;iq1)mjHZzalmGuW z{`2?rhu7kVR|fN>pEH=zEnNeAJno>(+;KNsKUnN0ELahTOm`&TldO8a?YzPPndX$>f)ULy9W3GH%?Sv_l^5b~6 zDH`tn!XA&=BaTc2`D2OSv01Y(u1PpBc|!s38b#jl1xs@7bTaRCjbDDneJ+SM*UU8JJ7mD)dVhOD;>xJbox#(J`tB`j zf4#7(i;uIp)3aM*!ij{VeSd{-$A6k|y8hv=cV_4NQ)dS6`uij9g(R2(*0U5sx#rhe*wXMvM7>nTR&_#zYQWuM!$mL7RK zFJ$V&HP3A49cYwayVuqqs^URLvBGri7gGhd zDTe4Q6+Gs_)-`)#+M5Zfik%WtlXm>tenRO)kA1zwy}ijdHn%4}^=exkE@@YPY~HpL zpyUlobbgN>JW08BaUwV8#-2ViagmhtloqvOHa51LlT3S+4jgpI&PrG&aVCvL$tsZj z%Em3y6J#2~#2>6t_;5x4$7_)*EL~>~%(U3BjMF6b1n<@R`z8BilOwivJv_qM_Wb-x z;}xKm*#G~FoVFf1d^jy}p^#YG)5nX|6%L%^@D?|a3aU6XnSN}Es=U|e@AgeCBx3PB$rDk8Uha~himw3&~U%Tu`=?3 z=#hFEl_LT!vRsJ{t0dN#avL?6TTJ*Q-NZdHDJdx-IeEk8WC5Wh0rB)3Te}|~WY+qU z1oHg<|4U{b!g9p7kS{wnM3R>lp2)3wg{EP30fsFXBO>sVIV<{%@*xAW|Ne+=VZH+lJs37kSl z*EF+T^R|vk->pR}!Px15&Ee`LEpz_YeVE4pP5~N) z8#Wt9M{268Sw%%_fBvkzoG0VN;aSQ56ICsRbrwvN&il~euI(i7lT+!_e|wvhi(C>O z_8fAdKa@D*?j4X}D`#f*;oYOko$sAA(bahV#Et#Kg1kwwzp{i9CW zl*5cMT>e6dDJcN~2^~Et4;~~$Z0~#cDYa|X3P`vt(kRTxKAn zV)l5fv77;Mhmw7vhIaS#>Fm}P8pVYh4<72^j4L{Dhf81g;Z?3BTn!8!=Av>pKg4hd zwrp7`@buLG|2`YqRAdfxGfq)(^D5@?n0Tr|N}DGmlhxGM!08m9O6rSOigv9l6I%)e zpPsFM;Z)|PQrOZT;H@?%myazclQr9!g>S|Ti5oX17cG{&aYJI6Z}N^x;r^RHpzbjS zxo7J1-<1_BHg0m_;!$&RQ(Mx*C!f>McUplr*+9WmUqP|^j-In#vgE``Qp!PG4U#82 z#9nI@c{H7PYR^3BVL|GLZ(N5MGS)VA_^rw87S!D7(9N@?O?Q@*&teW0ZVyJ6tY{X# zhzJQLX32<1Nj0B@9i`nH6k!3PUbtdK$CBmNtt}FpHW?kdCHa9P&8f?b?ZykSTM~lT zp0O4QH8MQmdTPH$u`}l4f+LbeO%1Eu`1Q{i$?U!1pr>dcrTSte`^ubzhRF6gc@yUL zX4EWLmDr;0Sg~0o+b7w~!hoCKr&mUXXWl%XGiP}I{^couH(}2QS#bJPvMfAt>XWNW z#PK5$MFj_L&->t!nv~#>S7~q{OV~wOWb1?m^LH0&TIc8_eCp_84rW=a^q*h4G1VcI z;d)=#Y=Z@>jx6Q5$--9gMLC-Hu~L@ml{W5K!59CWX83M&OMyM*4u{dI#;wk$8>WO^ zV46I8_Q9)DA0FjyJK8NNXQ#5Zo*Uwb+{Fs~{Hr%_t_zr8Xm8KGN3ccasRxgmM^xh! zRf$w)uZNAx=1vq06I&Q=&QtJJqLWj2fvD4rR}()QsvR=n-5(esaAdB)dxt|@o-C(Q zlbn8tN*=tD{P5AlBp0c=mg33^2_Yc~85xO|){U*gdC+)Y_#`1I>B^-^{9Hz1Ay3?S znL2_6o-zha{%o&P|G)if*o3VH3nZIVe#mFrIh;K{BT_)`oI^}tBPWN5=!!=!QXKkc zBqSvyCM*kASjzqFm|9)KR*r*9KmM<0B~9)3&|yU45_E!w9!4dOX^1e+$H)bYz*hJ}h+Vl!{+>H%4gvRzLnC zC~cJ3oh&R7Cr%U?8X6u5VvFj3VE^oZK~bFJRHtv8%0�Jxz)+ zH>D&KbiE}yv_BV0cC^jdV96Yw*nHJC!drHWPm_kFM2Cjl1kXRYf;+tI?AX-R+2Z5b z?CcgXf!b65|0`(~c61e~s@{x<@;V^aZG515#s&)q$u~{sgN%$8i5v9v@MJWxybcd( z*dVw_x?Re|!AC6pfPmr*CC*hx*2EiZU0~h3#EEI=9n~3j5|WZjmPp*WE9v7aIb&vF zH6I()BU*(wZmeKsf4zB=(sBk1huIU`mrML;v1XnmQY2t-XqBQ*!}5Y-s~E!5j>bO; zV4Vh*g?as=^#?2E2n_F3%CkQ0&hqP3b=2%Rb>iXwzZ%Rsp z!bU!S`4gE;$2UE!KfyIYTXsg^|BX+J)-bF#@;UgwBDIrc$0cn&wmQ8cD}`k{oF*NZ z_?wd}DfO^tSy|f-3%2ZRwz9G{*OZ{{onWzJN5q@Arfb%!CNljG6bw69-_ui{m>uzN z(YZDSb^VxS51Ck$=e@b&pr{aGktkL)RV8&)NzqzH(leE6O| zVS++}DI`x$uvoIBMMg%XwdKZzQ*29iF-QK9Uvt1gVT-Pc%M`JSN!)sZNnAXiCh>W0 z6-vuxN>AQ4!NXu?0(WyGqw3uSOBLDJN{;gI@Z{z3+N)(%z=UQ=F0?u!rdQNf<7Y z4;O6?ZDBmA{A5eZhwUbXQ&o2eiD_#)7Z^0=>4F`7XuidbGiRqRS!x+DTYSROi5a|n zUznJK(iBRg|2_0$c5 z|MU0v|LOYw_t&Q@9G<&m8UGGFww@A)(~Rg z@L05{LF!J{5>1m6hwPqM20gjbuyxLIKYv67x(9#Q66KL~;lSLk`MoP8e8qwdU0Oc=|Nq~+|ma z`}^VHeSZ0Wh6aDagOa{{PM$JZ`JsnT#hS-a(rkgv=}~-#o<2)6FipL&W8%Zh>Tjy! zpbnjBQBiT@&D*c3DG`YiPjRRxh8=nFxNw+rt5K?V33Szuwa^%1P zhMNsv{y#lk-_G~<&YqvgCo1osXY-T2lc%hVEibQao!+q?o@EOZ+*DPCj%~X*iS?F^ z$TJn)XEF9EXRchF`0!_OTmJnG5s*A`Xr{%NFH4%5p3I#2KlzI0iOeR!m46!a+0q&& zf2h;%ak(AWAQXCje*HYt*GIm*RFB{Hr%7x_{=J&}TVEf1dHFoQOvQ}3Yx6TT4<0?r zBDwnLMWux@0+VNOs3{+-|Nrm*zq18DKfRCKyf5eGr^i2wr^x*Jtp3EK^?z?~z5l$F zGw0H_Y)*dgQ1Ok0+nv*P5)v1VPUPmlH98sw zU%pKf7fd{SS6gja$bqA3LIz4x#WKD*_5S)l|KaKB|9^k~Kf~nZ<(0wv_thNCJi5Q0 z=l1sh|G&Nc|NlRao?eT)g2aSN?AJo=GqfZPR>|!UoX=3o*l%ymbO{?ySu z8WE{@@qELUvNpl3azFW3yGu-v_`t-x@7f$es^zamE_DDKs@R&@ExLI(ZBTGsyHX};l!P(jSkN5x2$op6E_SXCN z_c>Me{r@l7CzmQ9kRTwOY+!gWNGpNq%$I`=%9;iXrpXk%<>BF3wTfrgE}r!BNt!m$ z6nTjyDJ^Zvbn&2|N6Cpl`)_6cJ>O7O!T8Fs|3aYt4UQZ2SJvrBIeBWFpI;xh``f3x zyYv75R~KmF;o;d||G)av6YuorJ8i4w_Sf@UK4;TiwLwg=LW}i`N^9hwgw*6^mMlMC zUfzFsdHvPv^$KdK2lgD>P;Gv=PnPHXec{D(8X4W~TN#fxNQ9aC`0&)#@hB-p@PNjP z|NlQU$s!_h=GLvPOPB7HxbS_m@-c%6?7KH?3Uohg5X^b!%>V!Y|KH#L|J=>Z{`G&A z%DC_EujeU$R}-`G(YJSZ`vcp3E_kp#QkqxMkjZrJOX0US=TlGbi`n<5`@|GR#`yh4 zId@JJJY+igJN9Ewzu|&qiYBH!JUmubJexK}oMC}P-6V^D|I;sAnIa&P(lS*tqw=qQ zQBfm*gOSl8;e+u@9u}`jWs7-Q&(>By-@bl-+1sa!-Q_*FdDz z{<;<^hBc^1Ca5W|;h4TLLxsh_uIkJ02M6E3zE-Q6eE!u`ZJz0RhBXIoemTUqfv+|s^Gs=-K}yC`f%{z-s<_`Yj>5uzqfq8 z{r?2*4~DZvcqBIQ%n#T~NJU!eVUtq#<;etI+Bpc7v{|09chBQY`b8y{P z^RxWbmG9Nx>uP_w_-$ll{Qlnf&kx6_&CIVSSgMH|FLZDb5dpQRA_A<}!(GtT)%5Gv zn+czv8_t@Rq4eJ{n4!;#b*GM_w_g4I{r|r`<^H+5w*KFr@>f@0OPkvz95G3gN%l4T zZxF=5+@^P?h$+&V>&Ks;_dhXG8lY`E3L*ZMVnFZ%gY^2g7M zypZ(gG*Vgdk;Rgkb;+&b{9Xt_b4O%fbxtb zM$L@QV&R2NX+Qq+%m0(J{`UE*wimzu&%d)*EaVGfTsNtC;zUP({@MaDaN>6IlsR*z zCn@PoQ`M6v~Waa+)|6Cje0goP<=hxla_xI)N>-Q_P z1=*CcB-vCx)^7-UXy10Fo~N+oVf_Ao)n8tG-=6>Ex~71`w~L$^y}oQRqFibQhKT|q zE`pHC#K~L6M@!3Q(V{aOcDQqEx?DMAwVK0h62~(Ii8Loi5yhAook^4Kq_nXxGcz+E zetv%c`qg)6|%~f<|~F0~;nO{P_I*{@dI0C)`@X1 zkn&C!SIHwsPhRX;UU49&Mj<;fCFRGrUPpiarM{4H7$;Aem>3-|FD8$JT``@1k{u#t zxcv3%4JUET^I}QOm1t2^JGOASx|g4a!k)7y4j=yX=coCd)Wi0B0{h~{vMpP-*8Mzw z;KTpp{r~su@ezp&)6zbzpz2zdAt3S9NzDD-V*^9O6|0Rud{VjsO_(2BriiScC&!*P z>p@ z*RR#5{YY|xM8>WE4?P45iWfA!dBYPFbY_Z-+L?m~A3Sq=v&^K4VeR`aOLJ~^A0M9j ze+#yh!Y#G2c~k%IPpZS5c|XO&$`zWVFU7kCG4#It|KHHv>Ejl&f6Cq)HyNd*yx6#T z@_n}CCoLp)fq+#Hs!l$PvR#=#(r}v{s?#a(z`y~W>H0%{TSt`TC+&6w~Z+ys~ zwXxB_H#&O5k|PuL9CCJO2o0YO8gjC>R^Dd9sQBM#feOcy5GS8q2PZ!eX!`&E|MV#- zAz^9PuBjQ9c61x7s83!lDw`lC#Nw@-nJE$r4Nw(6H#fJuJh8mI75Njh#qLaAH^X^; z#BnnY^N^V{{;{&0SiU@ak|3A7{lBtuaZ%x9XQ>&A5y}r%CVdFE+Hp&M!hxUXPn>x0 z_3O&BcxZYS;GMuhGzKx-$|wK{{H`l1{*eP zR9wWnQp-`WrMD_^u4kjUQ{htPlrQYMs;1_%ckS|;EG6dY`AJLb&Y?p(8KI)@zOfXQ zHbmAbi02$p5iGfEVk#*s6LIE@hqpWv6AvpZPhH)TUvkz5td=zL7>G;%|9}3&KU=SG zXyK;P7r+1CrcFf&B0?F8Kc+M8{mbY&VH<~8%htb*r6OPT<>fE$sodu1_(@0S$hPA- zIW4iVu3x|YG)NNHJ``;5z$+p`a)z6t$e;iJ?KL{C-ks~}*B23AP-G+`YM7EPymiYA z1qVH?4_h1zxu?q82UK}NgEn(QSJ$RQ6OISG{uP@X5*I96V$bv9D*HM!&8ntmWl71t zo153JT2;c?r)+3w_~PY9RrS+9e|K-*w8&CAdoq)+KsICZ)`|nc2@W@SPI$IY{5ZdX z!MV_|IznRh4vU9h4jwsTP^#q|DS36*`htczMjbN~6H`(iJaoLG1GQhIR!67j&>&Q z{r~-|w89b}{`a2tXJ74a`)i#hX=!Y=wQcj{*xcP?KDr7X&~f?@Za8NSPfkupZ#mS3 zDtv)~WqI%H|Nr}&+Sb?nCpjUZA-?#_e|ur&$MH87ZtgMYURW(?Fp1+1XGAdDr!s!F zGe-^{_%MIs$NjTs@0dCB@5jgMudl1!Q~!VU@&5gP|NWic+36TG>GCnR6Zhi}9dwL{ znKO0bL`L_Y{r0~)JFAmK{u~x?>EPh{r>~W?ZNrcKxwr2tyZ^hphjZ`Rc{67^#>_c* zWs0J~goCHM9j7ch$(+QvXKI%wAA3y=Py9ZGbDJPpb<12^&@5}~{G{yTJd45$i{JfS zugEpm#pJx>k)+T91Lw9iKPyg7m@mxtyk{%7XOV5*Ud4M^Dv#O4d;YAgh}qzw+4<9< ziG#6AkP{?c@yt7UrUSsRK8l$zv*M=?C*Upj8cnZr*F-bV>%Z2TW*zrW2hs` zxxYqRCf*Vjo4zY}nnc^@iRalsL)b}F=#5B}Fbxx9Mf_ale8 z1=yC9G?-3Zuv?)1SA#Mi9~&QESWM*EW4gue<{qNq#as_xCwUmH7Ps|35h;#ldRsOwLZ;l}Qo--M#-;oa9K9 zWou^CG(E8M#L^S>A%a^X5*sG&Ffu$Sk$T`Ri(_7_&}6j_D%>f0o356y9tlxB@!-0? zh$hr*mA?A_|LT8#;}=Vrz%4&xL#X5jr547QPx9Gvc-Z!=4Rn)Wp6TK(#v@_q5_Hp{ zMrLw~#k!EzYabocE=xb~664u0rP1>Fr~mecSBGn)8^R6UU;qE_kAn>UG8@9wmEXJ) zR@?IB;DLr3MKZooZ>(GPq{hs+(>F&5~04#LQy|7QH?KU(mjU%O^j&zTjdonRQNgWKKfY1gW!6mh!MYaVbT-lp`KWAGT?IQebCZ=%(SfJZS^31q>-75(PKS^8-Yn?ujh^ z@R+?sfM@3)2R-8pzcdBdZrz<}5SjG1>C~~Qb_HAAI{TxZ%<-+UY>bzTl9JV7xOb~z zIoqF;Oicg&I@|w*tN=JP%i>>hPga7j%c) zoco4N-@#92hHr?)@Bj5}+w&#&Es{G48qoaz-^o+PzW!h9UFCioo35>_U3adhHk`d< zw1BU}aKWZRR;9E@Kl*#S=XiDWcn4iIm@uz_P18Z!?89j%iTry8HUIk-@j%)t+Lr$g z+?H9P8(gtoG;XTFg3{E7lYe_WGc+>zqrU4&!T}9q34=w9{tp{hA2?wDr*(gEcl_A| z7M9)XBd$6#q&Z{ru%@(grUKi7*}tCiAAWwG z=g*%b^QQT$9Ef`P=l}T+?_>|pwSI9G-X8byskg8Bp{~T#pQCnTh2=Fi%{xMFds5kU zc~7u>fA8;1wsH08_t|NsAQkj?*}_}KgZg>7mIy*hsn z@AYOC|1QsbI9_ep|IO+9Z|?f~`1Ex9jW1nq*L0WOwp(_ho=5oKLaEkFyEz}_ug+;? z)Q>k>6LWGy3g_Wwc9rdV8vPiW5_{Py zyqfZN{%e=ow?j!tO1@wIf5n#<;>O0!`|PYQ`7PhSLR08+){|WQ8J!I0)3nxjid}AH z=CQDl`0+zR&i+{4Oh!kCtW!V!&wu!@b_#2g0L)`D_5Y3bRDKqJtKw(kl6JMx^}ejTMua>P#Y{=GN5kJbJ|AV%R%k=#J@7!H-W^zWND$6t%XSY*7K0ZD_Mf1I!{lB?WkLv56 z?^%1`N4uBO>3{BC<}BWVYtjzgwfAV;X676sa(Ht(&+4#+I(H9qb>vS^xccLZ=Hb`Z zBl13hyzu|Olef(On>YXe{mstlSi#wt;i>Sp$z?UGz)F_#$fl0v|Ns9#H}`kp+gtit zT5B>h@7!3#8?0{=nc&MiLzVs76$6$@tSo>2pBIonf8f}$4KdXEA#oC|qgzug`+YQqO33mra+Ah?{{R2~{blzw>FN2u@!UmY0|TYliyb!K zzscX`VBd84-~XSV>pwnP-`gv1U-RSojT7%*ojP`=$7UA;`-zPmHerr`0@$BLToYhE zbcn~$5VTGu=gx@(M~)ce%l))o`|t;c@=9mBAb5aJZH=GcWpnTD+}aexo!r7UgHxetRP? zEL>#$*nUkiQ)+_9Ck{b3H?|vdbu@Ud{{R2K_V>5%7Z%DdT`I5U_vg>2r~Ci^mpnKB z|ItHD`)#{#wjEgGXrL{Xa@5&6)xf~u&;RoUk622!Bp>Hvv$t;xT^$hi>EKjB`)!SD zx*HoGo|A10U%w$6T6Ah!Mnp(NME?Kx?{9xEuTj}Q{;(p4O*(t#IB4qrbIzaN%(ue1 z&(106{D;TK|35vw-+$((uaA!27jQQ)GCFa}Xm8C=_b91(Ru9iAo=IwEbf4I;+DX#x z!Eye!^7jiC->Ux)S~qv$!hsF-_Ra0=&HL<{m#CaGUew{E{r|u59IK-r*p8Q)L!%os zAjA_B^WU!iAHVgAX2!)U8S`@u4rNKQxw(1FW?q{6@xA=v>+5;=<^EU}J-Pnq=>79^ ze(GwRSh4>6rza=xo9F-g|Nnf!&rkFH=l}EaJjh;oA&c?MD#o3epX8@@$L!?ME_lRp z_~qq*GA}M>dHKW7&hm7N8^`QFmymQQA>+h?B}WdNKY!rt*#oCfA2{6n{)c|d($imG zM|3HH`sR>UQ~iIVn&034UtV7CU6YmXU_G@tYe5LCX$LPUhKPZ>*tZ)X{Cs!DW<@Wwc~@^8Vxe;-C`pws{-B ze1RBfnhWZ*e+D+Srw$zY#J_UR5B7L_9(|{P@ETBM{Qq6a~zrXui`|ZvB!|nC@ z@&Ae+9eK~r|BsWyKuW@ZoqhA}of|nh4VNwdpOD;~_w?Dbw3=^k);BR7k$Ixj$e5p9 zVo~DU!SesV@td2>vR9`~OMCDz@xryV3)j*L*{?9i5Ur`|;D;v3Z(G6y%8!j#qk`fYmcYhr|+&n|1?&~Xg>D%=; za-YxWY+zItoZ2Q;Rqul!lQxRHO&y9RNU3ClDd zzr9^=ZM}be#K+g)^Vt{~-8ebjyuHJurNfGg#cFHQ^z_&~YU=;}d4El}MZ2VQ=8T#D z|Ns2l-^%(f<;fbl{)=pP$zY3H5MtzEM)@;o|bywrx#Umyd_L+P~z4gde}Z|DUX0AHT0<8rPgT zAWI60|EDJ%nx!D&R<< z>V5N5gpI9D-Ctr|+|lqNNZIftW;fRX1)egmy0k!oPXc1>>4;+yFYPt0_Fk?gZPsDN*2kLu-}oBjTX zi80KWD70t(`A;q@zAn_QnHh5TN5`by3m7tZ4fnaug@m}g;@=+=AO8IO|HDW3C8E+7 zRyav%wJMzmkvXRO?f=BBHNU>R|MYafyq!bt!^g=F9{kYP;@Zyfdc|uFUr7N_jK05r zVaD%&a}OS2Iqj9aMU0J^*(>TG*MpZYzvpLcZ34|PffAj*<-h;w6`!8Sui5g0UH)a$ zRGA3}d0tFlYhir-T4}e?Ba@7kp#IH|kN2g`{~eA!cKGm$b>|M8`}h97eaHNb-(ubM z5>iqsev4gvFkxESgJ)?!{z`jqX5OjN&|+UxB)|4~6WhOksXNMgA6{0kIR^>NH**d7 z8VopGz2+@%Fzi^R^ z3}`-Y`h*|+yT7NJL@sFf|6j81Z&JlCk&FF5KD9df^{qI%&aZ#dHMhUj-}6MtmS2E``^>^KW_iO@*f}Se=Iy{UuWRq zVbIlSSWuwwzD|NQ02H=gwGYp=?qId}^oeK98WG>K$xX?}Ij(3TQoTN?lH9RF;@=;~ z)%X4#K7Me*v)d995+CG`{GZ?0`0#yvTetZCzdt_8&*PslQ&L1k;@{uj|DT_)_gc4G zYTbkD`k>{idG}m8Hf#z?yf1z0!}Yn=EyAD#@c+M4zl@q%+kAV;ef7sLeM@XOd(PNE z;g$>!56_9mFFYQL1-SqDaqz?2+xxSx3vi{RO5V8f&)X}ga`F8S3=@7-e`HF{>FVuu zbZFRO%Qb^xr|zeh!iR5f?_mB=2r4!}>C0bcf4%XY{pSzx?zWRzE7>Ua{e)kWwqx;y z>+ApX$^GenbH~13LPb?FB64Lx{M8T46Re(IRA#FX?wvZ-F;Fo6l!G_diCQre7JH9( zkKJ`<%Nv3W{{P?U{GR{+jrUZu=dMaHF_`$3i(Pu_vs<3B$R z9ysc{{I-h0!TI)}^@STY7#J8juD%$)USeNetBy*7fUxtOwOudEts2DjCGPEaeVd#**nd!1GmvrRSL z-4>*z?=_XF*RAC$N9Dgo0`~>YCZBz$GI^(f`}Mb5uPU{r$*XOZ+{?bjOlX&qkMO?p zHquXz+h5pb!h5)Z!_{%#V$)R5(1*KL2k3C$R^#YUdNf1DQS+GK`Q2M3_bq+DGJ8`F zzq%D);*NbB)4S(Pb~z`r?xIQU)Y1rWakOQ&Enl+%PpiEulbGIt1LqVYA}lqgpA_0? zXuLr{lUaDd1;v0Xr&jIy!1qA@W#qKKIf{=h7{jl=edEm}rm6I8LGIz5{RLB%d^No% zwPf4=5?NIH{$=$t&i9gRYmQ1xV_kY-mOxAW!onjGyZQt&q;5^yrFL4QE84l9>G!!^ zNpC%`RWwh2p1$_q&gTLj)y@_goVRGdw*SkG4$wlL|NpnlT+7w!Akb=WIEz(K$F8C5 z;2Y`udykkongS1Zom_NK;Sg{2M|O9e@AC{251o^|VQYPO;?%`DCqH|ioI7)cfKvYK z7_l;M)`HkW0?8dp2R(mrUYlBZ?DLB3Ez2@zsO{=owW}xLTa?XQhWp!Bi+Bkh7h#Qb z6PU2<$G`YGnd1v{CE1RBe6TH7rE`bPz6W{tZ)fn-{=e~CQXU*|Yvyj`Y%maUk#CAT zESRGcqq2Et`_xwD9SyEJtg0MhW}1sy7iEYjS3JEs|M&Nu&Gp7IP6@ilWb$?NFP@ux zx$tM79@_o7LPWToFMAy*a7Ag0?! zzqk)OTo0VK{8+w>@%cEd*HuB^Cv2Ryubwlx95hT0o=>m)n`BYWr@C^{RkecXi&Z;k zylm^`P0irnq%ya&^Wpb=HcrmA{q+m4+NeIbZJxoovt0l1-Q7I;aRzfN4iry97OS33yN&H*YcEW?r+4;pMW-dk*EBl%Ri_)&fR#w5JGiP{g zYV7PbiBy&Yt~8jJdrfMoo=T!$_h+Bm_dedA zz0gJTrIk(prX?CPmgSa7I6VLSTwjgF>4Hg$WTw}o2fvSeR1>jVviZjDn$U~iuYA22 zzP3}T>40pLYV+qS1&`kIc};wt-@TcMf#LuEDKl;PTmuAJ?WgV#?42fhqlq_SdFl!c z7sZR&d5%tt1V3JmIjQYYQ8}+**GZG$nwgf9i+PV5C_I(>7W1hv@}HNc39lkwi0|Z8 zi{3|yE#o}B%XnvcZgpww%ygb+lcnokS}p3>2dMR5 zxbo|<9XWV%;o|7O4A1MD@4r%87aPl>*`dc*re&ql(a4vX>zZBrSUT^)#0kI8{jId* zS$O-7)Tv#w6;HJit2RkprlB7soi#Y|1zdJajfE6yYs$h@UFgHYNrL5 zH2Xhoxw+`J*>hvZ120>i?=>o18`yQcdE3)6<@5hnNUmJAFq6Z1Qj3LJS3&Hej>0Fh z*6*i2oO=1lSFb<=9`4l{4G)){vzisZzw7rDqn#I79y<6Jg*n-S8lPHb+Kcc>t&0%0!=pR zo;&W|cpI@+qW`4P$z$;xnk_yDmnrgmw22YFeas|&*OBMD*XKSh$e*;~zVPum=U@D4 zU+dxQ9n8~ww8b_uE;PnvE>)NN6{#m|c%GUSBvsdn$ zGU5BRc*FY|LDyyY>fcCKgJY^??q04#1|qHY>Ki>2SA>3GcQ=oX;j(iqzkhyu#=>ar zA52`YUOZaV5!IEs@X>{UtzVz>D5mWGAehJZeZu*IB^%GY7FGPT_SoxF*5ZM!o^&HT4wnvptJzPH{KITN2L z)#kX=X>ntw(M$t5ZGqtET`}kG-w14(QnH(e&1bpJ^c{MAhu<#ylz3l+Wr|mY+-lLe zeA{|cA6E9bt%^AOplD&(;}WOqmNFg5OOAYxTtD;dr;SI-WBs*>Z9$q4;<&us_i>paVDwpv4U#QrT4{l$+LFo zIc>N(VcDANEr~BH<}KCWIGEA0EOfbszW%O#pD%csbNss?TEGr2CFa<2Hx&pRtM}B+ z>=sxU{osniLlMm@cf{XHTd+KQB<8Es!F%>%(xLl~N2I32>Au|dc*pdI59jTcey45G z)ueEsQ10hE&v%=1YgSE-5Zf%d`|fF{YO6qzHP?NXzp$EjyuxRa0?S{!M342ypB}2~ zUsYsxecxK~y{nue)9j8LTqyMszP(P}Wx?&gkGb?bO>a(D?Pfi#x~}_qp^eaS7RT=D zX%}B^xN9Wy=y!zJb?wE1&5TY9GF=3_4n1rTSn|@^-b-k;=wwglC;J%+&7WG%_C37$ zqI?l}q4uHcOU%sEBlgZ}oGfhklA+LK$-5Tg3pX-4nIA2iBJ$;6jhh>roLq)=j9=fw zlfrC*f}oye+@6F7kCQ8YPI~yieoW&04qJyR($HTB1znT{?kj7IT)|61?j)Y0L2_Kauh zRGzrF8_$CkbdA@1{v^<%_VvB|;kD5`l9D{(>n1#igtW-l%vR)Uao}-{_iA!-x)AYZ zhGD`74wmE2#gak>9C?lgEQh-!%WiI%AQ*jh$tvI9zh`grFE!!$VjE?>v3GObx8vUf zyLK(h+;=YW``lE=meN$2k2cS4_Sxr`l%C`fT-1@Pd|Jc%(i8;+&$_1becWuLM#fZCVS zE&lyUw5XhP@DNiL2o$!s^g;i+}%8Eh;7`noez;EnRy;X6MJo z&M4^lB`EY=V@Uel&d(#RXHfI&$c9Ye5|wA) za)oaB@bK{H=zw^O-=x3=)|QzY zIa?e=T;%n1BqTYw#5VYHh$SQ_vXvz(iYcgN<*gB&F=K~XQ%u3EHPz)3cU~vh9X2@g zzxImGVx9B8hYFel19=zUxoP%l)~ZFXXDlnu|Izn)(|Z>|FGUtlr&&s>`}`&B!hYvJ zlRUmK_ht38)@6>_Thd|$4++hDe`o3>g#({8xE@I?eZ&9D|L)~ioa=T~$@C~!o-?qS z_8DAFE}34g$(g+0%0>z3Y=I5ycLPIc3|E2gQQ^CmB|R?3N1;dD%x^(}u^;gM~E zt|F}WkDCOBHYKzd)%%IMyUu&Zw0x<4vh(~}|Im%=QY3v38w7~FunKL}QRX;fJj?C+ zLyHKpb>0$-Uaxqi+PqW#M&I6c@8>rropzce;JRP1PZzYq<^TUBvvs)|90XeTCkdA& z&OW-y{DaHAgUS`UyU!+Vh~$~ZdRXd+t1g@1|621I4?pg0oVhc<;kn@PB`*ut%t<}P zYTuA}Kw^#R`An%}6BjvnwH%OWYA`rrxkat(MfDV?Q`fh?E%9FI{FhTF`(y2@u62hk zPYEfm$X>>|^xF3qm2R)UCVkIYx!aVHUtrT$uiKMUQf^KU%x45;!zHkGr{jxv3VKVE z`ClxoYQ1-yo1^!_tAOQQKf({@pPS%tsG8;8@{3!KS*}@IZ_!uud7r$Tb=N63~^oC^Mi-AR~I((s}21SY3GkXp-?gW1g>1 z9_vbjQ<>7Wxl5>^HdT&2F%aeY7(U-XwE@|pH#@On`G<>>ne2eVm+!?Pn z&T4qcI_Zy-qQmBi2?~o>{`@^1v^jHg`bp?o6eY95n*Zl6Ts-*UIsdt5{B=hh3cnp? zzTni$7+tbxsuIg&(`6RRes1|Jvh49mhw^s@YhpZ|;-!S4mM50J*;&kEoSx(<`Qz^a zhtms=$md4PZNI;D$0Sxg?Fln>G%KtJkQS? z=iGF9`6~&Fnmr)eLKXwaDG9*weDdj5FKmpzyLEfBkFjDQTJ-FIZ+{qse@^ zX3edO5*hmrKdt+-?ScFLdgCu&4Q;C3(!5$h>*qo4(JlP*`*^_zg~NL)BecYoq?Kl5 zvg*$0;8b|zEX>qVD3NA#=+w~?v%1fmRg8LlcTOMR@o(DaV77ek+=q88g~DncLObnB zmCP@u>+^7n8@+gCWMOG&uwdQ4q$Atdq$-{I9?W2uYnaKoA+7trMUmDVZ=ZaQNPXp3 zLh2mSmzU2_%3$bcN+~?!a`-@lDr>?ro{CAgwsa<5R%81yas3pKk3sHCFaNMPooDOT z3wMv5ko*0;zkucO%-6^H|1DmpoS1MxD4pj9lhkFINep4p9~j)0wCcM=^}eh)qtdim zrfGHik{<1FiD&22E539{%ok(>nfd>}QgKLQXQyNQ{KR`UY?+xg$}f0$UgVfeV*LKC z#)d&x;9?R}_{_!}&3PHeq}Km9esIEHrwb8lo@$4&%}Z)#T+H$Fdp~IZ+^?#(x3@j# z%{mTU;jt*=#L3g$3)lBMM$BkzWULlSOgJ#fXv4Xafio;5K1`E5amr&W({yV|!>}ul zwln!ZN^tPy<*}H=%AB>a(d_)Er`*o*@`wNYRLSvHgwB2}(kL`AHa>F1z{JF$p!l(j z#c``Xzm#7GLMI8wS5D^LAeJ`cDf=cPC1Lp})s8uvI48|G%BIr!=jY)K35?F^>Wx>s zmOvwE(F+%$6AW+n*Ylh`%M%mBBDH)*P==znn`&>oO!ixcnnefK^mA-0Du`@fz4eGg z$)*GcEwc&F-zhfDHZ$~@;qY|I!2{3fnzY|BhedIsPi`%qiCJM+psAb^iZ1)`>e~P^k2% z!BPw6ymKnb?Ci~be23>)@=W0V%VB0MQLselQS`DMLhQ`j*7$UrvCq1iqTcs^MXYp; zE{}z{CCHlwnL>v@J>}t-pY*`=5oCyL(KDVc`|O;HjhlB`PWzg0M4gZAQIeo;Scj+l z5;eZ0TQU_s$0jQjT=jC9+3FQ<&2x?o+;piH9jEx&dfuJ9yBm}TbRrzK z#%YH^&H8ss)X%SNgF)M?tQQM+pK#@1-nLHXW^L4;PG(=0=^HD&L~L#qW;*(@L(9@_1&`Sd&obqS z-ezD^=d+_|3Cul-j}j6R5)u<9Oi1wXO5)&66%ccqQ`>FYnQ-k6OY)QxM;vt;c!F9ol!_93<5tEZ${kei428Ul>=4t0Q-c!^0^b6lEP^g1W1So$5ZeZ%^ zu~k;G#m2H(T7q^F%F4dz^AI_nc*P<@S}9{;|MDyaW`Ue^f#AP?kDKwFYIt>Mf4!vN z{InZ;XFt4W+je>R!sB{;pg}Fr;Yu@(Fm9bKxNB>SPmiNlm!ntL!DDR)kFZqTX5;5i z+QhbB(d1Obj$P6fQt~|(X}2!;G&IET2Q8oY@xA}y-|DtrX~{mB#EO4&9zHzmSR%{| zuA`64-OJhLAmB1z@6p`l5e*ZiZq>_&C9wZ%T$J>Qb$d-?T9XB9j>$>BGqd&27Eb4I zZQL3n>gzmDxGhC8)id_7%;hC(bhAB!djgMLx5@6jB(2w1*r#zx>uKSJbve?%&sDy< z*X3^4mEm~jFl(=(=ej%R{~JrP_+MLEvqN)H`LB8Fk|QPDZ-uSgc=$urqOT72U^kt) zyOFEKfalo9{T;1gQ2~Nq|K2}&_NcEoyLa@2L%e4X+BO_c<()Zc&HnfM`yOxKBKCdZ znzcG>Eb1R?lnX0=_vO%N-73)>^r(5M$~Q-jB`Sv6HTAnTG^x5SU9}`SZL3@MEwSvv zzIBV=OR`N_w2re;;^fR_2Q8n?%oGy4SY32XVp{9-hc=dcd&FlH+>Z?JbiQG>&U?;| h^3reRufaV=W`?;7RzxxUV7w1X)Sj+>F6*2UngG!74J-fv diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png deleted file mode 100644 index e1799a87c8542d7e515b6185d7e8f6f75fe73f3e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19132 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3YW8$-45^rtlpw*nn1P*v zfq~)we<{|*4C?rpM<#3(WKiH~dGLL{Wq#sgmWfTX4&P^7t--ZIMJwyj-w6z7wy;_` zd*428|M9$>H$!w~n)n&p2D2IVF*GEok%MqUM`by^;t(Alh+?!#1H{S9SGsty!E;;hGD)6u#Nb>&AZ|HVY z;(fjO-4}->d|2eZ%AJ#Zw%Kx-XZu6>L+js{%9y&%OW&^}lq~MF{FTdz>f=6V($db& z>=2ARXJEW2Ogk%g>4GWKjMs7h?khdI?V&}%ydCeq*?4ej)^0w{4Yn)Pk*mc(V7A!l z5?v#us*kV!|9`TPqxF@e$a3TA7j}t>iOzbQmaPu|(q%f#Siaobx>h-Bn(CR)tKZvB zly^7xJ!>GL^w!E%B>QzGM}w5|DyD$crA#58g@rGEo-#*O_2xDq<>g0?AN+mJ@YAko zbH46-TKQ$ymMQV&A654-g6x<2)Y&|D?#$NKnNz2F22BbM4fP0_67SaLA3ohZaAIW8 zq{hjg^_3sjFX?bkR$e6U$)eXcFCgK~e;%It|NsAg|Nj2}`T76v?*0A#)YSdY&;PIc z`|Er=fBih0pP&EP9=^Z-|NpEg&q+!@WJ;x!K>V%iXOl zIAqGiW@fqL9~Lo%KT=TWzWKvJ>|of%1uA?h2kYhS{_L;+|Gt_1f83rQ1=LqpE`ZIci_bH2OPZI-H%-gity{}oGL1%Izfb? zQ}Ig1v8zn31^@rIt1GP0Bnh|M#DlVo@>S;X~)P-`^$U z_9XoHnjH{s4mE!XDAK3Sog3=ze&c|{(c|uSPjrZ0+Le;jq{6y}mAPtW3jycO`JFxH785ngJ7PxfWZ^g!g9UDi z*H*FU{xf(ja%K}_Fk?|?)6A*~r`zf+rYu}CyZ-+LEh#oOwi9c3LH;^F-}uhHvm4TQ z58vI*<3FEooe~Qq9q4pAhfE3f_P%z^?c|*pr^rYLr>42RZ|!gW5OvFKS}omVdhKF2 zOUzV`O-WB2gVu0ztPF7oZd)4AIFW~EgAmg>=Sr%o~St6W*9dG1K#&0Q)Xeh;+F0#5O; z<@oYmnbJ7NMImbfx9E<{`iWhVY;0GKFe={Po4li__2D7bw&ddyda+4AJ|sTaZ49;H z!I6clw6gNn zeAm`94l7+Alqqg?3%s>TVzI*ihq>HaR6hN;Km7YU&vHLQ8QW6@pHvS2{mmn9&lhsW zz`(!&YT1S(ikc@EFHm^;e0gI2`jv|hpFPqN!(gCr)MVW$ChsGUSIJ(OwZdaleW8i> ze?`^4{Sn$~SM+$Qwl)QRQ23sj!0k3+5=WF)<>$7<*I{j&($N{UcFh9p00lKAR~Vjn@uLu)&mD7c^R~An4m5DW^Sm=-bfMC6{0LQ zYaNbpbh(-|m>l3;OyE6Q;>$cs_X<+*bKnVqILSMg6>ohuMF*i!g(ueNpzK zgrp=9@#ld-lX&>ezkRwiX-Z1lQqD6hALXN0Hmpwk$e;X5G~MXXGKPK*FVmJVg9N|R ziPG8bFHb3TE>K}j(UVHPZ`Z~vEx{*~@Z(eK!`IG|#q>s#|Mmg#Wjbo&dz}&+`sORCy^@gp zpS;aea>G>l`u|2Xe@__{s~zqX=9#X4c`FB`@LJ^lC?PHF$>YaYE=}s|+VtVmB{89- zw8X@e2EN!o?psv)Yv1CGPECe))|oB(N$UB_t9Wui`-Bx%G#VMXI?Rx}@2j@bq3${n;;;Yr!Sj!nvZ8Ig+tR zv+16UXvpeLAql{Q{TJ7Dc0`vq3(3dpLf6Zhv^&yiL|x!Yn6YM@i?yui0#~^wdDD zsQ>>LB|duaFwxv}s=rT3SwVo9Ktgh2V#`v_j`jjmqYLSm3QaivJq-BCzw>P4#0?jG z4o@!GaQ?w#KP!z)*0o)Za*D3Lwk_FWY**S&?62p!zu!1!_puE{s)zsmEfJ2hN{6$I@NxVR?`tKk>k4$!*VAmE4uCikv8AoY%#6?(gXX2O3HxL{=&@YaY8G z-gU_6dwq!J998zL#q8()aYsLCTv2Xt>}*Hwv_m%qSZ3b3yJu$NL#MXg<(WpZko>S_ z_D0?Y1p(Ll*N+_YlaglNSjZWBaP6Gau5WC)TNIL>hdyztvncxSSZqIeaptPTg3mVV zUSIj18>{_o-*-a+y(ukXbv$d0tQ{A&SNsa?JhV1_nzYrPV7o;Xl3Fhxd2%?Uc}O0s zc-yndLu)R(T}0BfuqtNd?xaZuyRMh&aG%?l`^NhC$Is$Vm_QA(MQM*7JV*!(7Ut*Q z-rK!dKal)=e;s=-xy(WBr0NM6;o?g9~R+%fY1^R=95yk3M(!%&8NPF1-2o|9`!YX2^F1Yvy?x ziBmP!Bs44)-BGks&B+h1M6X@~brk;p zU*rcWsIFa|+S^^s!)5g3d3t(EO3PB7430nl|I3TDet7YxJ!biV&n7_+8-;jJs2aEk zggr}E<96%W;ga^p{c4lY!4+~7iOFjGPx^l*JfT1tz> zOitaDiAf@=ZvwazQyh1cZDC?t#HO2IU~KCoWBM#0Wumh5SzaEGr!12!=J0+LD!Sx; z&3nNL1&!9NuM(4z49wcN-rYa{;hn5#+X=Xp0YQ^Gx;F8083_qINJvRZN=RsO(aKuP zcxgwn(<@&CLnEc_VuFe7Yd@bn(6F@WKuG^Jivz2;^VHVmsYgj3YY62Hn%MA6ZpFzD zZQQfGB*f!ZI4te(RGPt)a^T33grq~DHq`m~JiosMCQjpn!b@R##x_F=-6jm5^4R(qQ{u>EyrTZb=he zb>9lC4b0gkVE604{4CW4S9P)s8PL?Xx?&KPPxgo(N^LIro5-jXCIO;d!j+7dwJ@q2J4FkhK4g{8`u0l_u;kp;f={6YfT}o zCnfvB51)%!*rnawW}Q6AX<&Hpvz1D!f#UPJh@L~*B2T2+8Fd>9MAv52`t_`0iTox% zNu;Rc-|>ZVavuwX{7ZY)?zbiG;8cy1-JfiH3%>kwh2}2 zHc$!;IuUfjmv_xy{~fVMnAdc&S;{7eDQ4(778(RaHf>&^5Hw-NmxCuyCZr$#@QnBH z@qQljya|u;Rp2(gc*X6bJ%B_v6I~TZ`>9HBUY@R>=;N7_o4>GrH z&zFqbuXa2cVo{QkGrzfcwNFoTYpaa7xbd162a^j4n#>bj5;$2cbrvwJj68neL`P7_ z0fr@#zvWAPWO);=iJUm~FL^?RNHUYnQ+A`yXQn1*lL|Zbuzb}oTH?fVamfVMiz$Y@ zj$%`$q;zyA8yF=eq#b)}e+Octl4W5*(WWEEqksL%;^x?}u(F3SY^Q?Zj08{iz!eU+ z6?;B>X_=!VVAooMIxi5;kS>iNH-tMDpQ zMxmR)gH@3QD^m_k)48yt?GSVJDTirCLqiii*?A-c`r`O!U=^@&z@BYV$fmxC#qFFJ^ur=W!WN<4z1Drw^%8qVdV_Q*-}1^YqSbAo0*$>17da=WDCDp z%Nz5uErIvYfd;*fQ1MNTH8nhmi9D-T@#q}k&4+qItMJ6>?y9OR4(`oL?&?9_Oo#rp z&q7UT^D=_uuyr$2yWSTsWMBupw-P#39 zg?uzUeuwz>9=tj8#xzZE>8v!z;>XXMu`ySAdR=GElr&f*78iWL{rJUH!)t$&mhsrA zvdru0TYcjImxq^O1f!>K$QsU8p5g*CSIG^k+-yBY%)WCP8yy`Q4jyeicwt zxuT_|t`QLu8$=~;TxQvJrC}#)BV%+bqw9yxBlh+GYJYwC{P?(h{l7oXvtm`2xi;x| za69-&HpH#{^ZPicbUohBbASKhl`3yM{-m-oyBRWzr#Scq)zt8qnei-J)^hv`Gy*Od z35q@2ye&*bG_|>sVIJRvofE8D1v3|lGe5gx^G=!JV)C#1`~Tb5|Jzsa@aDh2)8MQ z|Ns9u_~F^v{q6k!@9h5m{N&{Q@%#TJr~O;M=+J}Lr&Saj-e~a8GCCtFCtw<}{OJVW zfbjGS*V8S^c#L}&fdYl$|NleNEg~Xk-nk>v+j}%Y{fzOVzBq$JNv)h=(;U|P=Z`%9#^&hA$IZR+LjiAn zeQI1>+7c^1@dH;o4n{dAhB^xvJPK=@Wc2S(;*P4X=VzJTH_!c5`1x77}4G$WH^)53>GINOqsf4IpCPJ4%@TU1n9`uI*g zawH-!yRqKD{(xkLV9%Xm=X1g{I+=U_{(pYn{&>Iri&s_^kB&U%m*1!S{!7*OcgMHo ze)pSKQ}*G(;(d13a&lr7%rgUO9y75lmN&M2Xe6`wgF{FC|9|zDm&xC~JOBJ#=bKx9 z{4_p%b@l)Jdw=R*U;FPpT_QMGQdPBcmR;(R84KLCB`zGf=*8C|e6R(66{3&wqda z_nuz&{>`77mqOn>etd3!`0efg%gg@Gw)uJg^71D+ng?#BJ5F;7W1jx>Ny>+hi4&$D zPX~>2{{Meyrp1jLGuEuVnwl~rFgzq-8PA!u?>fvhgHDTV_V1pTBxh3*9=`qcmzV1H z^^fkD|NC!$`1W>tKG~{ES4ti|J}$rF`pgHK_7WR3orC$;vKojyHs|F}`1ASs|7&aS zi|hO-dV4GXPqyX#J&7~y|NpCd@y zL=>*iOH|?hQp4{rzrp0=bjHOQj!%`icseZa@Bcr``umkm;r_b6N(GA!?63bHzwb}k zhXZE^Ss9zXJ)5KBoHj^@;?PWsoSZABrjZl8Ikq%%dDydVzR@sU z>_!)FjO&U2-qYvlMwdN2ExzcTz>m+*|1Wl*FRr_*@c+N#`tfy%N5qO|IWYeZl>Be_ zXdYwvm&Z&elqESnot$jX&R27P^Yh=|-@8bD{cnHx?QI_IFaw(!k1aN4uHPG;wXVKDPk&`QW^TviWyDdx%dv6?&d39i#PJq_3{q_I#;%eUg{Oo_E zjf?qkzkI#C{l85)H{UMz-~Z_B@uG;w0%-5)e#=(Xw{L%5blYXX&D?XP zVJ6=OF$n>oz^^vd-wMyqyY0d2v*?EneEDU1cwH<{{(Yg31-_ys?q}{QP<~Gn<45-`|6Vr>#m}+-zo_ zpv1oO)B&B(7Z?oB3Z7ZT6)fg$dE~^2f*&9Ht;_4`|M}G1WnumQ&+yKUg9XocIHNXf zGygb6EW=mW*LRLKbR6i=B#R?QcdlJ?#Nd$ly$1)PoD`JgG*kbyF05Z68}+5>$(NVv z_xDdOF1olrzJ7a7<(CHs({J=fu?Vs(II7L8XnG{v6IAY=KXRnt(UJb_>nCiwU%riQ z3tsMVxvA7aHaxR&*~FPM4_=$rI9;3)zvHSmhKGy~3)H7<&)>hN^7GsK_C-SM%*>!Uk@t6g);#CilcqR>uV5<6 z4llu^E@74*A0Pj3<+iu4QTXq6bAP?0oUPKrn~aPVv!r(H(k)-!8=rqaC`$XmeEp-$ z4<_eE?BwytWU_NTe)#Z$wPz0;Jow?QbXOqMxB8YYE)^{;Hx#zdPtaD}ASS)zmEa-) z;mh@Wd|g7vkN5ANXZQEp-{10g1kX2^%OBgte%P>bWg$0*!6I3;{#&OV z_DQfV-I9>v@_I+o+av#6jVH{XAL`iH|73y#6DYoZeYri`+&^sit3|Bs)N_B9EA{?7h0%hkWXWvx34W6{ILnJJ1VTU%Qj zXG_=ql7skfb=Zt0ODrDL9kZAubMeiAYugT-a?M_sl)wGd$wF&r>jSUYU-;sCLiv$$TU(nK6@E(>G zRQS@;+(JX;3=P#P794ngw6R^}*wQ;Q&UXKoaDFx;IX>gf{xfHIK7NdP^X5tW$`%%O zdwXp+t^@JD3!c_(xx&Eca?OIn`Nt0l6O)L#ze@XW7?`C6gsVrJGcyb723R<)OA-;4 zOi2xdj{Q4nTC%WOva()$^j@Di>Ek(jw`2c%dOnDJ;NSVZ@#4gv-3Q&Y?E|8g9XxpI z*RLnrZ6YEir%v7Z@6S(f7ZIiDtt?5i0zNjY-u3V@I(fn%_|gLpFQs^W=6817jtaWH z`({F#cTOsO|Nl$w*lBs=hJf+Q71v)L;^z1G)v%mx&0N7Vr&ui=7d*XE&%^V7%NCZ) zm$ybnp0sT9^68n|-|rebS32{gVbqmHiaw4pp`2+_Ne2|UO~c~mc}C6hjG86WZ(v}s zVue9V`{d=KvJS^~82na?U{qFS78kE-7J*3pRt#v?FFEO#xY)8MPgULBG(5bEEv&oi{{4}@-CB2IBL}a^qHeJ>Q$F$^_9;p2 z>G>ZTG9xCY<3?@=V@s>#s#PZ>A0FVk5X79O&N{E^!K;gY@1cp&Q)|+sH#dqzKkY3m zI2Lcwxp=|>AF&UoUhtdg{;#N*k$z@_sGwrfd^sTjk(8-ZpPHD9KX{gQsKX}4lxbbj zQ~NfBgSVc@>)cd)V`PwEW|A^-y87A=32A8^{mcC}Hb#0Ryxo`NxAnw{1szcPDjMd_ z_4Mju-7N6$zx-NT#{H8d?>+d@E%JIt+nx>60-TlxCS_1y{lb^P$*2QN=PdhC3NGuOw5M^KQbwbdg0%|w<0 zRo0kLo;jY^U8=Qk;iOHQ-t0)`Gv1l- zUE!S*lW}6ARJZG?v%Qwe&1X+{v;Hp-Gd6b4)I7YWl4sJSC*>v^+W6y+P?l;*y2b>+@oT?GXONiO{JR;aptlV@gTez%Inzsu26 z{>+*GckW1tibg&=pB}OE)A`58BOW<>b|zj_V$;_4`5(ygfLS8qwRlW-`oI6_51zP8 zgyk~6w6taO?f?J#&%b~3d-_T1U~`{>%}_S)y<@)fYvF)F-DyxmK_6g&H4=POl zi6>q%pL+Hv>B}df@1KNze|~=d{r!4L$^V--8X6fIKIoJCwBv{1L{0u|;pg(okL{Bm z|37xD=j_@4imNks?8vaQ@(ViE(ER>>pb%tKf6d&DTn7|*T;@->`Tu`V;U?LZf{Rz2 zGTAs8UEj?+;(1uEdVW_m`^EkhU7r|Lm7G#qb8K4Xd$;r`a;9Cc`5#iEZu~f*K1*v8 z(ZNAqZ!TTo|qPPn{1Bw6tJE`>5|NnPTn4S4>JsVq^o7i%{Wy%mRMj=uQ)|LT6TeODI_I>?Zj z_{aU#?n5_wcrIL?_uzQ_LoKI(7;fVh#*-#bcBCAb>r${(=+mNU3q%a3@?E>cWZB5a zaA2y!1BTUazQ5O)y%HJ(dY1ocezWBl8yFZktTXzz(X=ZGJ|35 zhXYY92}*21x8y616sn3(3X;3_>}>+e|Np`ofp)b{nxKVc4FCT-y<7XI`a7R-p#m5G zl7svnKL1^{{1o2uu{qgBm_QK+s)b~p|M)-u;bZqH&dZ^WlVqv>|Ia)}k^hsyi~18Q*mP3=xXPNP9=P_&;oi2! z`bg$IR}MxMCHRV+C_HwcLG@avr%45u+$Qe+gG>sZjFN(!7tCHj3w%$R`F4MQ{rRb{ zuP+>wFLo=vu{84G$Htg6hd!tF_^!^d%?m7f!<(5`b6or4$fGpl@kbFGma7V<{~a#) zpTIc7lSR9!x&FVzzB*S!X!F!lX1@LZy6^A!_4Qx>|NsBr|MZsk!A-GmPjz%jOz;x_ zp}Uf2iHXI6eGal>zxxlW+0{Nc|CGnY#q!#gqYbOLckbs^XXjSfx}~ANt@hWE56q?i zq#>!$Qzm}jpSthw`uErv?@4A_>GjX~G)qM_Q}%b!nWr00Da^2t2xykD^fg%UER8K^ zV#}F?1F<;*KD;x!Bn`eSS}7CLuxGQ5!Y;cy7k?ROIC{$5-~Yez@2~Uw>(`t)qfr}v z=5$kFesJ1>i=K|sHx9fsb7Gdu_}HT3?V~?sfzg?VAxRes7(N(1yKmnXzW&1YZ;-JG zCr_FEb$@IB{P^B4pL6F<$3E>fiH!2M9p0TgAlbvC5z0Kzk*~(@s9RF%0#}=57KTfi zZf008Y+3dEIDcF9_Y3EbLreSBVgK&!{QUd#^ZGk?{wJ>9AF%A%!@XfWQY@GB5)%>< z9C9lYa_fu|CMP-gGR{!i@u>6154B~2R*xpJx;*)u-WI)G!fvtLOAbhAtq%Klf6vd? zXJ^+xdURnUZ_eu~344?OP+b#DBGFQ1;yU+(`uE7LGR zg`16y&Er4o+GM8G1J7C?s@orAR9<)B@BjG^pPr7NI`#aMClODd&fL1?L3Z@TU+o#2 z3zvvVv&@#|*&%%SKtuX@iBqQ~=h-Aqn4WH6s95&UaAItjPb?1)kAA$7O>LJR)JlJu z_xJzTetKe_*2>hMd?oeRgk5cojg5^H1$Tb!V%rd|*VDsufB*m5e}A^OvCWGXz1dUBUWUjKG*4PNm zFY>qleSQ7^_V)UJe;Uu+{h!<*ySrhFppc@g$)P%(<7=bs!`A*fa_pFd`0>+cdR9J_ zzZ51O^XNnF*BM&>6IcA{j=q^NQ?ijkOpJ}6zwQ2h$!)pH4~1Ai9r*vB=l;ILm&c7@ zYnz<>W$OR^ss8xr`|0Ts!Uy9+4*vJp_*Fhq)sXYW@&5gj)%RC_dg31=vwoePc2-H! z-S=O_XDckTSQpZ2dh~#;(Tj$?w#~DpK?}@gn+xx2Y`qc~n9|F`!*hO~;ho*>OP@gV zl)udWdgD90zCORG{3YS<|Nr_qetdOd?6a6WUj6J=PF;|4=l}ozdNDr|uddQ>YVz^* zW!g~t>&If@nIGjZ`z+a8a%4&8!Asi?uv%D3PMRdKuRc9uOUJ`kp-GSDEm@`^b7$}A zg6DkGR3NGRkWuQdhllrDm;W0_m7B} z(b&P#7#Wh_%UEx4CBe6>*(%b3L0P$Zs;F~-z~Pz3JagyrxWv706@B1kSn%nd?VIJ9 zki30p)Anb_`~S<^|7*?hYb=ijm1c9fjyD)SV9cNI^etWK{8^&~u7~H_|DUe+_tu3E zAD^Dy-`mSHZ{CF5bv!G&F3xJt*gLh};G2DjqYbY=TS#;pLqbZ*gb4{AJ}DN(ZHY&@ z+UoyHEb~be5K?;e%OiwC^Xt-mVo|2I8^K*{hrE+f{D|pJ~ zTx;v>!qS**<018LSFQBv8ycYDd62&+S%4-Y3LoE}ZT^4BQppF0B|Nw*A27%Fcs!Q* zANYFue;yvt8cwHkRr|``oW1t zhxX0?{@A^umrwHdFLr6+tdvEXl*Vq20BqSuHr3nZn{rT~6eFw*5Be@I z|L-&B$KPjXL4o!E*Vq5Y`~S1DN}fF%$;5m!Cido&r!#NdkeH_T_uSdDAHHfIzQ3QR zrl#l38y=6qZqG%kef29IWo$iT!1DjUam;?c*z1})XAT@YwqO|uCFGyqP_BRYU^9<- zp242l)1WEA=jVCm+Z*56aqz(#o~s@k&N2$te`t94DfLTs*aL`T@60jeYH{Fko&Va5 zi`gvrcBEzn_j&{NFD^;R+8j|Fw>UOQ=}uw1d6PTmyCO@FjmLxfM{UXXSFX}_)ADD3 zx8bJz>w|H+zMo%O1#+$3tMTdakEzS*=I!5nCj3xfYeobIjoU71P|)CL41_Se5$o{mW`C`1ulKB-`dQ&jD9Zhbr4n zOxORvuQvI~v$Q||2#C4u_iJr#Zk#JyeY3$PvR`yh?dgI) z7W+1aL!#=?JPU;9+hsFj`%nur4x zLBVScHf(fU%w=}qEC0(iegAoQd?sYECNEW(CI8RZz@X;enGIE6_4)YP^y4KYBqZ!= zf90nf;V_raS5E%1U3Wv%{(4EhxE49tZ;<8ZO-&I~9GiaKx1ZGR{{L@s#ot*EAG_CN zLA|px{NwNPir}a6%wIyHT-Nd@D*K)|7~P#wTokbK`T75LRbRv>8O@j>@#f8pgmRC? zyc-uAx&?4E^BNc%*Zl43u*uDxqI)ib;pdgWHD9^YW;aiq`0#`v)As9-mj0oc7IrlW z7DX(H*Z%*XaD|~a@JB0`$HpK0(=sOH?{Z%tYrWtrleu~G4vWPN4F6aRA4N%IJUGtZ zmVJGJvF4gJJUKZx-m^M;b7in!UbK|+iOA>Y{D*IDX3^7tw9O99w3sndG9n^iS>?a~ z?)#d5*efefTm9DXR1atza9{oZ?U|S56>l_Ny}#c$BlGX{DS!HP9Tl?WFEVU0;Vx=E z^!`4NjLeJa3lo^OgfzKJGBY1OJ)KA1P9S0dXoJxI|A%fqOGrtn_}=&My!?^QnHv@c zas0N=Y1NFGAUJ)tUCodGFD}|wtd0ns>KG%lcCt~?YW|o9_wC!Z=P&%6IA;z|P>_YP z&}&ZrAIJAeNJyNUlUVV&tpU`r0LAi5a79zv-6;C<$^ZWy&M*J{caORHQNGeac3B&z z<+7VM|L5QN!EfiLt#n93Bsl?_xoh)A9`Pih?Kl>m$&`(|E2IDA6r{D$Ak4A*BAFNl^i&6;=^?}FabycJRyYU3;EgE;a#s43^ zd+_-21IzhPXAY~a70w3zaeh{C@bmMF{uPHni2)R4;YBrnP89qy>DoEtauv@CPS&fw z|Ns9_|M7*>@Y1LE@}P?1(xsAJf#Uo>zVlnHvdB1iu({)iqQ(gYy}r%A5)OpzRAAmB z*jN8wf=|Zben7t{)V1Q?Yj)^sHZKX@@%w{F())XgQCo9zc$%6b^6ZvLSNxv#@U%Fa zn;R&*>%}I4_ADIZ1+VO`Yw;^HHZ<($GSbjCe(~Da!oom8!eO<&S&?3~iZEye<3F2{ zE>WRvO-&JQ2NXW`{$G;x#EF&f-JQfAe`bCVJkG-d4X`bPZ5B(`joeDYLtulF2<2A`M%uSL|<*pibMnIx_K@{l1Yq~`ar z4=)7IJ=2wf#EnwG&;S3 z&Q9pc`S2Fpv(b-NT6WXI;==U82jyDNYPgt#Hb4Gd-qtP7S)QZ_amJdt)?5t^JTCHD zC#K0AxpnDu#Ua~cT^wfLZogz*^q{G=DM`@Yg55(jj)~LQ{OrTOjlHKQeLP~W)M6`{ zwR(e&MDvOB$*J=?m43}zmMD=Cy+*Z<)3Lhvm%{UcKAYoFrzWjZ@;tGrTuf@$r749= z*74uz3J`fAIN7T6yv0nlU4GUd=4}Np^;Vhy%BL2EO$X2MCGy_-?-a&(G!L z!9#~W{ILu%Y`w|1=-*wFa!S83bz^^$&b7gf$J{b_Xg>};O!^+r0e zrzXrgm5_2oLHF!}H9iXx{Zw{TO?~*lvCV(JKx10cAqAbDJc~~$ILwQ261!yIJGfC6o7) zuVudhx00sJRpf3C5NWkHYd4ED(u@i}^639fdy@#C!+ISvOms7a&%UbW_AkgWxSzMD z;Jj{oG5?$b{u7EUxw{wd*T1|`VcXun8$&;>lX++IvF1@_#56to5DS~WFSgH(J)I(r zc%J-RHoVACC{&wB2SLc3TXtJsogj4EjpJMOs;;YC z$v56+{5IVy)&J3kjrq}xpL1^ZC@fgFQAeiz;f9T7+oin<_ZICeuq(Ei^gPFG&Gj96 zrGGz9uD6<)`ok{sRFS8IkgJZE$H|PAYqo_O3^UF?beI$%lE0hj^2^CD*ME;cAOG=V zX7rY2I{&@SM&AV&C_Qs@`I`y^j@8$jNwoShNvAE5kP-T-EU==E$$r}X0}tM?G#f_> zZ1!#9l-b32m|^k*?wDN#yScx=xw-f4F88UPhyI>RxS;e{-0KJ1#E%lU^U6IUk9|Fr z*8E=CbA{F{x5%K-x4D6H*<2NQP8Od_FSEWhMdMMCW5>QD8lDz>hZWlzUmMLgo@@3+ z!R2_&Y7LKIo$jMePbYnyrJ!)W^YQ%pvOM#agsn*?hfHqfeY8oLd9&xaaBTjM_pv!w zy{0W()uPzW{M&YU@aurU89p1{>TmoPEmEl+}-Tx-7J65|5C?b5AMEmBO9%!2RR{C`n2tG-TY&Gj{5Z@zbx+;e8Rd}Ldp zj1KpC=gSiPFG~!3f7QO471wXG{C4X0#{~*~ok~aZczK!)j(nG&yM5-O+NeLL?wtrd zn&dd`ufT6nknfhv-pJLIAkbhhvS^{etMUyOWA8f^lxM#Tlv0hVk($3SPGN3|P@#ah zU{qh5impn}zLz`R_T^i?%L`k*sN-*)LGq`$Auj=oX#D2t=_o)dFJNS!0Q!q zN)~*lRof1xIfrhHDA;|mG-kaB>-XOY` zvjtRAt?t+`QL1C(PTq)yjkXaDwi$-k5(K9yXdId~Q$ljZ!Mq~w z-y>`}t8L%$?{7Ij<`zXfnUVha_ZcR=(*jMORn7-?y;kD6S>94G^@GW;K$k!T)l|>e z$;*!Q{|J_0zuXekqkOpG`QNbMlKHnP`;+IjD>!&fYgI|S-O#_)Z}p;B_t2Y5V>l9@ zzMoyXH23V$9o2il4pq`BjEFE;6M6W+xxPsv1xxukq$U6SJ-wmQ*f~gOT5`ZcQ&wT& zZ&$DGV33yHqAMwIV8QzH3aUpFGEXXKd++$_uKVGW>fz_-d8X?dpP6^|!xzoNmzT3l z`?;90qkd!9l%5`*OP6?-F6Hs`e32agL50j2R0y$4{EXqo>DHQ*+_utb+{m>{53Ww?BLt z+_uykG`w8#Z_dN7*^|1R#l+Y$GuzJiwEd~kY0ix3=BoI3pkb{=qlk1npJ7V+WMNP} z0$L|;RwyB9m~*RTbK1eFE*B0Q@8>z*Z|pPYM8OLIP#bf9eKNnb2j{`_{CCeJryf0U z{(M3PC=fuCr%Hzooc7*iZ^&45?yyynGuw|ZosCncCSKQTn{6)n?r!ppEuD?CXFohA zTk}_^LquA-d5=x=OiAY|tLC-3e(aCESat|;rJOOho}GAJE@^flbeqo_(D;ObfNOke zvf@h?R_TLIEPES!kAG5-bJ^RLAT7POfy3#|Hjf*J-gGqu`x#H*IsE)@;yb%$0TbTT z%!-Sixd-069aNIP)g{e#Yg(n7*ODXOZWT<|G(+WJ;fdm|rpF4W<2EG!$Pc)A z;MlqQ`U^dK(jsfmZ!_%E>Au-7*p#Wn6UcR5nWG?fRhP-$UAMpd{TeJBlM!TVwySTK z*{tuGptX|!|0^B*e()gA-@iODF*mdYpWI?u`{97p5zuy?zo$SQoay>J=jS_%Ti5@PufvbJgPsg}OCPUEdM*V+@x<3gOKPEJ`w?90_ z%ciN>R{mb1qGG}Gu2ZLtH*9n)meo7{SX1kR$`tc_iLNe*vUe(L%bG!hnV@()c=^yF zp3~C|B_st`gl$l&V%jpzC`MqqkxxSy_kpQ_?XXcGFugl~UR@(}EHXgxMsBs%W^TJUTY3 zYQkK1PKEQK8J_>|+qYR3OMH8?@QN+eh@D^SKPoxb+beC7oZ!SBZZEyI;ovOM3C(Pi z8`Z4}Q*~^1Jz$t_Y;aY0<_rmgNqo$Lnr%Ns8r1sse0^qr$>h|#yFA?Dlgt>qMWNS4+_VKD5D!U9Og2;T=?gB#Gmi-rM`*>r@gGdaX@?CmL|%7BE zcx`WOWV|>v)xox#V>07bxoBoy{~eDf9g$^j^yYvk6%#ZT<{e1p>$7b<8xGCZ* z6DiJo@5kXSeE(;#>k0n(mvkU7Dq&K^D{;0ZQ$BuBbdHrh+{nzrSF8kDECxzR`=6gX zc1%FvfI%V?v$i(pNv6pz7OjHSTORt$@U)zgws|U0WZ9q2(Dfig;@XkMXJ$5E#hJ6R z5*&QRcz7a$nA1*Gd}vs>Z{94;X4njnQoZ}<_3=EfuN(jQef+}h(+5s=<|jypvmKgz zbmK9Nbw?WAxg9l|%qHF1b0AA$-7(>?$TgbGlN;}x+fg@n;jUQ^pLiyjFA;$Hc~Qxk z{)rP0-kh2E$mNJ6N#|=>jC)+)Eaaz1>Yqn%n)d$%;f0(OoZE0^|adu;K zPEsmK*4=aXP{TyE2P!tRI#+J)OcCbXaIK_b7V8c!%O+3B>2D);9@*~s|8Me+>gkD( zTwbiv?}a44MbG9)XlpmOvN~sKHqVqy>J!X9b>NATj7v0Vyi`mAD&ZUCForQ@Kfu>HnsA`Wh-Fg(0A`pU~qk6?cM*RpbgK=WQ(=Tg(=9-c-m zwpUD-vc=ePRx)n>a_qi9->Tt6)5w0Qj3y{-7UMB14Ii(09wE*oD+ zZMmrKQ3HdaA!rJ2f&iPc@{yIrGjjRZx`H{ERhdjKT*@Exn#rg4h#3-QfvyQs^Vl9;BqC2al64TuAuvEv=w1F(61O;HbhKmld2& zHU}o#dFakGFio=J^lj-W(@^{Q-~RBiUY_FTMsw_ZpUslp1a(v5BL}t#Z;wu#$YW*I zvG+8O#Vm!2NlczkEz`Cjclsj4u`2j z6a1p!M-9x3jBJL6Y?YO4?(S^a*=&rAY;tmJa&mK0IS(qb*f0c(g|o9vw|t_?XX2h@ z%I>fH_+M??+Gt5WIq;z&Z=<#PvnPUPR6xn9{E-7&Qxi{44v&@=kCPKmRTa;*YdkeI zJbU)=JbI+@cx}eUj+nFwvn)J98|qgyH+EF9thRV&aP0PWp6&UdQQHlb#-O7c*2nXA zIoCk^Ds|#WBU?pKdNPZ zF#Zeb`r*bV#az_#@cw?D+uMzE?wtXh3bHkuXMg?Vf=yhYqX8KH|Ce&n zsG!KHKO;3c`NE~74V#+g9aK^~rl7_qTabL<8jE5k3+sNV7+wV%*~}+P8f;d5{Aql6 zZ8T5$dn21#@KFMRB zd9IdH-LXKSd4U46y}i;!2Zo79IoCz}`7K^0bm9O13rcT)7Po1KORS4&T-N#ey5u}t zP$l*7>+6H_L<}GSe+9Z6OMaV>$g4;H>*KeJiLkS8Y~H@Dx_`~hzLctRd$Z&1ea!D~ zmhVw_3sOq|clY?H6-DUs;~f8>Q(Q zo_OxM#De%2;GUz@yiR6e10$!)jLKi4QrN`A*v!l{n3b3;C;t6i{-QJBMEdJP`Fno< zk=zyd{(lj}tpy)Ivz3%)XEV=%j*Y8&8yTNQEKB5v6e$D`lVWDr?eejJ SmEWitq`}kG&t;ucLK6V`eLJDEwk^kok&p`cK54y=As{TBNmmvDU87 zGV_FIPCThq=B}osk$BzAgMm$_^4RWsS9{X8yq2sDI<$M~?TPiDz;SqG(nhWp0|D0Z z-il)aEgox*{Qtj6M0$f@(~8~Itw$TTDtxavdgbt+a+zZf3`{=1S$4ATvHCH!7OR$B z@3r6eexF)=qclTzi>{JZ!wM~i7(s?RKgzF4A7)jG>Uws@#p#Gt|M`#DguDV(M&eZjN+madv13_UMR?nd2Tf(K~FKbJVO*pPtBo2@_|} zZft&RudU5&{JH*5;J^R&yA+g_^`_2<`j;5Y>6`Wc|9|IeKc1ieZ=V0}-rnEezrEdm zef|HP#n0bgUS4ls`|J1L-}~p=|NsBb*5IHA8#JkwbT-eO8|l#C95Tf{Xi~hV=ZO;@ z4zY86otir5|F^fcW){9|pEL1PgVa-=86SI>u|>-4oN!<%_cAui&OgqVIT(3({s#!( zv}bGk|NsC0-~a#rU+(|^-Mzo%&(6%hy!`*Y9UtGnyqZ2(|df zk%K2r95{ViP3h3;1qvw%3h&rW#sbytrzka2)Y{7v;Cp$IXDt`R+-`=Lyz`)>7dPW)^ndiWKi0Lg z9^SQX`~F_C?(d@fDG4CIG5r6(sQl4`=g$KI1PslmfBibu)bQccXKqIX(oz|{RW&9a zIdP&vM)E{dfw~minnx!z`E{)%CuGT7dG%P{Z1F$E%*Jr@{{g2ZrX)2p?!EMr{qm3Z z|NsB@pKl~%b7Die{^6&mdDh1p*Zg*S9?=B#;i6!$ePLq71p!?A;US@^>4}LAuEte8 zNzY2x1omllERtKf=iodJ18-KFjtNFnZm}4fE-;JwuXx?`geQw;>YwJRCOrTD8P@zb zQ1DUd@V~!2%l(aG_W8Wu!~#y6i*g@5c#tq{vT$K(C^u)|)$7mGQd1pH{rzuv^Twlj zjg1fGwZ!FAC;WOUzi<;{B=SzGitt07Ig7r@pv0qQS`GtrbuCB%W{4; zHc)z5_~Nv9+w1F+d~zu_c1?YF&(_M+7Zia1|1a`9}17 zR?9_PJanL8mfeJ#=J`fC5eGIDs~=t+&SN5F%?Js-s7DFO$zef4Wo6g=y)RFnoSdAR zns8vQ+5=Z6W#vUr`vsr=mz11ny;P&;|Ns9F_4zj_O%i#$GT;Ks#8sRrkw-Y#-);HC zzw%yVxWT`OOkeWJas_2(* zW?o)KVF3xLsi`eTZJAYM7fk#Ag4gYi>rwljIcFPYHUyL|oGx=>nc#e#2d_GFdW{y8 zId4#vI#Z{!=iyKOP#N8+4$P9_KMx)_@Zr1u;lu4budf^D+&WV5n2lLmIF+}8!_Ok0{*R{lTKFvo5~m`TCM>+2=ed=hSK>wWmt`&G{_NK`4= z7k>D(DKbh+Mcvxk!sF8i17oF`Ew`Se9PnU&ku2&_crL_vhA$?Bp=CJv?(Fs2Y=?$q%WC@yb*6Qne>TTy8l8xeb+y{)SJgR-6-e2 z-#-Rzhy}WZ9$s7_VO4A_FSW;g`1zAmQ?dis`xo+&Jjo& zS28Qyu*s;h;>EM4J0qh`85tg&e6&8|k;A&+3Dp}uXt?`{{WqA!myybrrgVTOHRfXT zwUPtzjuQgc2?TFmV7NhT;{O#ZpU5*;{T7MaxnMb`4_n%)hT{G8={L4^FWfov;X~(1 z!k~uM|Nm>kt@#=hcxp}cUZmaF(KUPV@`A@Y_j}%oH>NZW*gT*-$ST`8XIsEyKGiyfIn^TpK;x6>QtO%MJsvKsM zsK175RaN$%raOMq+qY?jgg!Z8$g8!r;=WZe$3vNClJ}BhCtlhvn!*lh@hQa=X=ry( zomSP_B2ipuXl!6$WMt%&G-cvMM#~oyqojF0%xSq5*1#YA_wb9VKh9^r>^POUMpC4H zYX546154YMxUm`L#Kax`qP*u|W7uf{Ju{AL`v3nM+ti&2h(2BL!{G4q^GDS9ph<9o zMNf~2w)WpUcT6H8AD(Oy42v=_Qc$hjax-KJ8@o-U!gaL*vui=({UY;ZyU)dZ6+ z3AA=T*5{M{pZG(NTRMBIQ;n9R^L%?_5iz4T_x*EzK}aD^02?|B0iTA= zk^x(d7jzjU1h;V(T$NpNP4oZ9BF^mgDHlFyw6_>X9c^6q+`dSaHI!e{?CHbA6>C!y z($a41p8oK%`k`yg;PiQDo<&7PL`>{gZtmKi-la2VEXZt{IFVCG_{Jsns?0{4@N*|P zUVc=%#Lj#;e#sO@e}NA#(p|cWnm#i6i_I`(bf2LVAuPO1%!j#?kwx|o|KY?RUzC<} zvh{>~dM|G!E^J_E_~-ZW14oW1=${XWI|nMv&duequNV3NY6kuP-!jvdtHFS$HJ+)p zse3Yef!O+sCwEt{=|3-&>GtSt4hiBd&Gq)W|9jTIBMpmlYdId<_#eJtD11PYJ1nK3 zbNRa7iAL5d-FwgR&SG}%tNHm$vdlZ&S9%%a(U7ne%TKGkkzRQ2>}G!jrQdp*o|hk3 zs2rQ@v8TQGe)F0bmAx|^!a8?0JbUJ_d+}|?FSgTIZ?~2H{h7KS94=1z#T*<36%{Y0 zPGw!SlGD)G_;4_rN?DmA!4zaP|~`F?J0 zytG-({oUW6?=JtpulD!%hllG=Wbp_JvibW@yAY5jF7C$1_e9|Eyxa-FY$;b{^!TP9 z)#7vi|Ig5->WF~!>4L{>`=Diklc$W0%^E+ysL)Wys;WsnJw7}lpQIrHa_ z2*ap<|2GyID$CWkvt|b!xZI#R$7NmI-<_G4uUCHFXH)Uv`F;C2o0pwA!y_ftW1P0( z#tnt+<9r{??F*N3rhNIV4r9Wt!20 zn6!wkOqw(~cT0miKLG+;NC)x|q#L?xh~PtG{3W_}E@b>Yjbom-Z81Efy`}k(BgFP35t% zu@DmEdEn2!8xon(E3T zsT29f_0d1)SEmy-jq~o;#m4MOJw5I9j$8L47>$jU&CR`oCb<=D+SSFy&(5y=?n=XI zl>nd3bqwY;0IU7OAp& z%xszIwIR$<;NboJl19doW$%yKwQ_@#Vbi%enp#~~uKkLR*50(iz`$Yl@tzA8dE8vA zIh3#cKNQ8I5qR!I$4(WAb8Zda-|yFtC`h}$E?T8kmYMnR{r&&#Yk%!cICyr(jK9&_ z?S6e`ue4-kp5&UbZShg1S-j+Whuc^3{QUnb*Vk{a`g(o7y{ES% z4-e1%{l;rzPj4txewb=KVMXI{K4D=e7T+$xy}6vCDQRgd)}&0Bcrecn?9`;BgoFuG zMOUo&P*{4kVJ%;KwWzW6A{B18WA+|7Tf`37_dG~*;QMG_|Ie=GN8YC=$@cY=ELZ+{ zczAuc_b(sOkKE;{|Ce z|MR?<%XenVM{(1s3EP}6dMAj99pjZ=S5P2ey0e*mj?K@iUteDDF0*aqOz>riILfIe zo6;ASZV_VW{b|R3w z|F^wCi-wtmY?mxqwD_wVXi2xpZ}kqx7XF(=s%x> z_n{EuzrV=|X=ye!Kd#QVuQz-&i7`K(*;sRdD)*mOmT5&#UtYF9Kkx6-8qOcz-}5Y5 zr1B=`aQw~$Q5Qd@J$raouH@n3(ny0eRg;oZ|MIVP|F@VoPVuYPg-1X5b#^BBiq&si zs<{!W?3gmK!Iby!;e-FqANcV8 z>}>m*njRmYDZe}-W;jO7XzcE0(cNIlt|4w5`Cw9NLPA1Xnt@T0fqD9aCoU2kkUZdY zSE^;2TEHWTc?u8C`d5TD1#VfODy!po^4PHh$B(~ywaUfsL}J7F`TtLxD%xB7oB6QP znxAr;-40x7-TL8xPO`*>NqowW>qSMyPHkWC@`b^NPm`9hW*%YO`@3tuC{v)0%8>_8 zjwmQL3P2;;>D<{9);berOJz*_``?~ReCO(>iL3r4N;GM>Ub)&TEBn;N)$^q6j|26t zuIuFO|0Rg9ReZAgC&dyTbU?&?hK~C2C2P+3>`c$em7Frw^IGd1JBbh1Oe3P@!oql* zoH#B+)3cNB*9j_3L3Igb{t}Y^ldk9#eB$30(xmyLzwx2Gy87a&Q%hW0)L0WaSe=}5 z)~xCI^GD~+n;waaZF~P5fADB!+k;K@6K_Oht^WW2e`cnkx3_U;hrr#l2iSJ(l>GBo zQbkphUqXi={326kWlNh;M~4F|G$kcD=v@BAzB(*m7T^E>&dK5{4;-*M)OhWm!!xDn z@*1r>XGzsZ&g@*a%;00K*_sE(`|TSWC)wEeY)HEt-&4}CT&$N3Jo>1m4#FxQJCZqwdF*EQ44Et_!T9V zs0j;H6&49b{rxn(^X2{?p7Q$rdzY%Z-c-@@3Uh9DkC?$4z3g~Y%Y&@sB;O-DPANW_ zCBE?3jTtA8wuX9lpE-5(bhn~vYh&xCi;jP0@@B^}GnSR{bah>5Gk{o@nDB!=*8bmo z#(fK9@;LHN|1(~|H|rql#2qJ84bt8-Omx3{H?_4TA}g!!{q65h&d>kPqrzluov5#0 zsH|*vWljIo#ux*Ggk)vyVf+6aPW)8Q)>hxyxyFCqpPwHd-rtm+n)3JW(S&5h{SySVwY8a_ z&GVAE*twkNfc#$<%Sk&nJ&|WV{Qvv=|8MV~FZgM6c(FUroH;z-zVUQ*fzY8tSA4`y zykxG^WBzwFDdSY#pM)FR`xh>0fo9G>|Mqfe{+c-XXuZbSAM(BerQXUm{>i?QJRXcK zC)?P~4@x+7r~L_RTE5aFrDxLI9)rosMx7mIT)W>LVfd5E{4b>Fm58$=Bh+t=KUr84 z#3Xn+Oc=LHxh+!Vdt%b4_CsFt!Gy!ymexiI*@{L627ivKwa(nbVa8g(X(X_G?hKKh zmEyK7FDDp1V9*sz*ebiPZ-U^qmyONMj&5yGS2ng<%P4HCKH&3{e>>Z)=YP*L%wFmo z)-cV;fYp$BO5+rrZfzzF9h(p83R_%!YO9qOzO#%#mkN@Sa1NE zFde9- zUaN*_A_l4d|0XxQ;fGXFP19xmC*GbcA-N!xd6R+>Gt&fVnHeXW8X0Y0Y+S~}w#TvG z{vGEjp-A@!UMw@h3eG z6YmfuArbTBQtOW?2ODN>oN$@t<6PDzQ;us(oNHz`&v?=Pn{~6RQ43?y=EtwYL7g^+ z|Nos#JOYIonVF}})0)HR>2N~uOoPiKPs#tC4o6&sil!tSvr5ZS-Y4ypW#p8`@8(eO z*GhTY;wA|Ri78W|4sd#R>R%42R0vE3;|J41QihR5=P ziH*C=XEC~)7(Yl5o^RygrL^Rlsf$+HfwhGmOfjA;O6N{=T>8|IvrAAU1rlLSdBqDR zXI#!P5Ylb>=ppDM%yUDDVYPys;G|n(N0tb+Dc;?dkZ|DowdYwq4v#-{@H)! zNlTZ4yM0tPbEx4#5r-ays2sxu*Eo12xC|aoO2|CepzQh3U(%U>!j*3>!Y$$|5c`_S zQ~xji>Z{O{@sfERPqd8j0;7M48V9D96dYBWcysQXhdO^aneH8yuVVQBhSP;BSj_hc zB6XTFG~oG2+XjodVmUAIV?RTWo)0%YlmtKA>*Z@jgQrcjGg&Tqi0XDfclbmohLi z+OPrCBmmXFhorWy=w#S*`n`PCg2hYaVix))|_>lBqcT@jW6!Np+gIn z9Qp8C{P5r3JYFI)L5@#2tY%4Bh=Ut1PI{LA4xi#Y<9xqB^ku|nrFllodpoNB|NDMw zs=S1R1Q!=eWQ0MnW!l+K;$c;Pj~4J8RAPU3_8^~&tK^)ylJjhnEvlz+ElX4rRC*o* zjX@_aql(rhVa3=Pzu*6_Ke4ac<|Ie9iSnXX*VlvQm~QO*yLzc=dnc!~^zC|?uOc>% z2af)KU?%x`-5JHnItmT_{Ra;Ha^Vu=GW`i#d1j_Cvd^2xla(kEt zDR_71{DTBNc6PI{us3q2Caq-)R$Lni9h%sD9O1``6eDSlIkbs1hYoE58x^hl4uUK>7 zign0jRu%&zqn1{~7q7ux_G0zGC=cBPj#ZGjZ<_z6d22+QxXOu5KTa|C`1DMB`QOOk z&)@(5?>{>mUsYvSP-NscM`OXNQ!LguAKI_ttnlJqQ+qvelFkDMmzIUQW<9*F-?q)H zOyHi~4yFujzuO z&+7k!29Iv+p1z{mYy#uIrO#(*O@w;j<}(MYB!Q&^53)&2g)ALOL6@82)$J(F3PnGfII&ZDZ@ zA-pbrPXY&DTEsq?=WF|p^iO}RPBIehB?fkTCOAx(5z1Ml&0_wm{^O(dD^_g3 zdw2f-zsU}Ex4wVRZD==S0CT6Q`K4X^yU~ zj-gW(jVCvDb}n4hl=v!y&CINAfdX4z-WOw)i##rD8Cn;w{a#QA4GRAG&R(of_}iPNOV}9Vp_Li3KLP9>l%RacdY$z@^HZV3mal$~c{M!Hj z=?Q6RJ4#-@Kid6&p55Q__xI-K-*?N%WMrS9XWBy`!6^a5EW5uX5QI1c|n!#!zh@9*ypO3wk-PtggCn^TdlsN8>lA)kg3B z#?E~BzvtwNMT>4IsMY`ck@I@%h<* z`+A8dPbGKk)U=hXJJI<6zhvFtq>67n56{WARexVlzQ?hlhv)G5`8;#xXas7g90`ay zqo8ysAoOU#Un}Q){ljyuc`jb;n40_{LOm_kFbtm5x2i}OB+E`q`j@zbhwX{ji|zUU z`DA`rrk~q>cX$2G+nYAu0_z787<+nn?(Z|)Q+aqpt#wI=2-A)o694`r zUbve2;8@w?$QqmGIWo@4%7;6JFKn4Q=}dcoe!PEsd;gj>|Mg;iDC?g6@aLyJ zBcl#zPXF+1a~^ShBb(Y&1s@a+UtJAavGM1J&a_Wk|E?aC`w z8y6HjkdfHBEp3LyL`8*$hu?DB_Et-(`8e!cD`9!fuz!lnYTpZHcOk9mGjnwLnjJ)1 z>^%f$^Ex^>Sum`7{y?naT&rWE$A>wLosMgnd=+(fi!R~a61w)~oChm^o;-c#<>a6Z z-b&S}iT)C=vv$3U5}9r@f9VpPyDEh(4gE*Ojudkl+Wv^IM;)8Ey75jg#IlEM! zJF(;1obx`yfi2f=eDk-RZ{Kcs#`v+v?L{J9vT_pPu38W7Cdcw-^Pt6R;1IPNExrSWL4k9kc@2=>4 zac!HKlH7sdn-x6yj}+hN94U)d+}pZlkyzKeWgZ>|8GI8g4;$G0`!MtLKhDDPmsY2H zwoOg7T>kRNPpRYERd()L=l1fPq?hL50N>5Mi}&2$zRx{&btaP{``$;Fmj^j{74V&B zPL^QfV7%@s(IjBNcKVRbVUzgCPx6;cN@QCc0vD`ucv+smN#{dtKv(Hs)|Y+F8cpjC zZ=3l1gb~ZZ^tHV1HC1kF@@>J+zB1F0ugO8cb-tob22bnOEAtw!SyyO8^*xN_U}eAH zlBD6I&~PB{k)D{;;zb*`&CPmt{>2Z^uXe8 z{^_Ta7A4pn-?lQFhxzi8D%Z;sp30fJO746)<%fKg72~_|G~-i~9#6Dbrjapg`ah4L z{?^1lP03b&_w}!voEmYh`c?EvbM8v8kG9O&%hePha=czFD%@xmQ(<->M~y`MQuP;e zXEkSJW(7paHZ2I~RNAHRB=>_*t!MD&5(d6g9-^gh<6 z88{_~$IeV#%kvbXE@e)<=FVM>c}pUvC}lm5z`Pu_j>^Nzb6k2lq7iRyG~ICb1# zW;g5GrAa?4Oq6(vAL(>X75Kq;Wau8_Se|pxF4k;DEB$wH%Yn1OhZ20D? zQ!C&dot>?%E0iPTa#GbP;(J%DQ^4%oS!Jsl=coRwPn-60(x>Gw*S~U=d>f*)^wj6) zX~FBR?=Q^J6}$g=r?J$>dF~EZw=zv#=6^o;_S!Q>TW_zld0wP=x}m=3#H$}BIbIJd zZRFfuTl4MnpJy!bVAm4O$A;T{Cq4Mh!uZ|#y7u9P3)MWEQU%+-RFzm=F?BuoHtop5 z*HZ5;y-m6uA?9@9y)cW<@*{?mIRjhXOWR4bok`QT)yjT`SRWh~}e$k{(oVF^5vb+oAS@t&r;cD>g!q=RO$e|VJ7E&TP{(_K6N zF@o}T&n!c(LjgQ(`!6j|&}CJB8DbS89?qVwuKz%HPmB6TrM$gvnj&8EyEHVWKKg9= zC2+~othNb?x8*14D^3eaysxoRD^?_H|9)A%r&+3HyDz3G+pJ%26z4P{HOt^ZMM&kk z#}V_-dt}~wwLj<8{|H`f-ZSQn0?!IQc^d0(`pNWU>$GX99!IyG^<6r%Fv@tgVA~J7 zb*Dd{OmPx1oF6<*!&N|l#p7hbi4w296MK)}dA~z1^yuTp#|1kdRL=2h+p$!|_oUII z8Ly|R7@pmf>a-zd)7?#Rn(^Kt}B1_ zCO?sE+kW-c#)yummu8%2Z#yV{_=16uQ|in$TB65`_J`kIYc=0;u7%vg`79JRa~64MvF195&cZV(2_5(ha)_Hq4W!@T}t}O8AX#FJRF4`+Yc=~cYT7( z1efCXRo4!Gp7U8s>Gk%r0vDo+_j*m@*?o6u=DR|h`+3hD6QacCyB|BPmaFZ$@O03( zYD>A3Mz^NzdR8Vae`Wv4VCTaP99|#S)J{EA`m$Pv{ruwB5jqiiE49M3&!y)`-+oqd z$mC>kgNbW4U;9Mu$%Z_KCY){4IP`ty*}A6VODtw29sYii85ART=GbyII|#JyH%Vg_ z(h=WysXMZAR?LCj9;&S!mcg_H2d+j`@kx0J-rvNSi{0Us*tTG$}LpuT|!ya8c1`M_l?kIKnhF=g3aJzyE*z&rk2)-~T^d|NrlAZ}0#6 z`+vS&>W;$ZhnLmc%=6h6oaXw@mZErfeLT;mO*~9YJofcQd#a8uSbu)OnllP2EV5Hl z5*zt?xj76zd@|aw(J^^$W8=f;{%!L13l>kauQRCmdFaDC*_4~cC2Bq0+*Vt-?kBmJ z^E*9DR@4-=^YY%7nNiBMjLAe3(#hWwV-4C5Qp+N~g~LtIW!vVj4ELOD+R8urY>PYM z)zLbkL*>}bLsL#a+O**^-(_35J^aF7_O5%q;Ps(pl`5R~4jr;oQqnwcu%yst`&${d zT}?-7kMH^yJtHRXS)l`mi{KWv=PR`CGd+~a?cUTPU~o)*Z)BT|d!Rz!LXjz--YqZQ zAa<;xfH{FDDtVrw>avs9YkNQ5JGQCj`@y%<6`Qtt1RFfN`u=m}_iuLHH4%Hk8DY&V zTabfWEHEW8fZkcoZabz`bcS7RA)-tYxN*9xyx_EczJWQT7(^F7_<6-UL zmWr)!qdCM}e|$=qp>V+FwTbKXm$mzU{fi3s=zP54{(1ZINO{4F`bz_*J&?KGw#+eB zxi67t6HjRB-rPmsTnY~hF!VHDn|4m6bFZ0bY(SLA0~xPG8AF~4LYG@UyUa_DT$A`p z|Cx=RlWR|;%J1g26YeO~IRAY-Bfezi>w92l@0qn1RA9FpKV%&sl2hBcSy-A|K1N5U zhCy1|Rfl!!1mTZ$K@}{DYAc0qU0(2hV{SzF7Tr}`UX20;|Fm8FL?oQ2Df&)S`LIX7_fNU}rRbMtaXY<0 zL;3&z@0n%K-IO44q&{oCl`2PLvjxXa(Q|oqKeqj1XzYH>&864I^=pfW=3nWiydC}p zcRt#buh!W-<&meDu*dbz#~y(`x!T7chfFK7@(f*QT6)&vQtAJIw$!`-njH@qJox>l z%z|&5@2pMtm79WG1U-wqGtO5VJ~8iM-`|}1Xx9$CJHHh!r)*X{-E)7hb>}#BvnH1RXLWj@{_wxbdT!&qRiW9t-jYYk`~Ux* z*_PbR0Rk=dDitM7WpZ=HxXk|jzjiKYgNR6U_wq-K`$Q^PngxP9mfcI;om!UmtuWMR z$&}Mdtrl&L*S=mXb&ou?ZE5C=^snFc=l)la+;whW@~j_rmt7`JNR1HdbDk%B{M1{X zrv7Qwh4)ORxz4)uox@S$Q6;GLw=3zk2#;t5U8{H@LwaF7@$o zYPHz2`>CMeKTzx&aw z=lt+9uiJT^H=Rmd`+irlO#im8mtF2ZmN_jX_K!J*wS%IkiZ4ykNtRflwPZ==1R=#4&%L9+`TdC&yfjNiu}gp{JMfkN$H@)9 zlDm(z@X z{8(`Mu8*%|Pme@JrKF3iq>ip63yVbE|1<%ywr`Fv_<#IdDE;ZmRmmAMCH3OdethY4 zjG5Ee+xzfMSE;E+tzxe%P&N&aEXEPTmoaba(vs5Q^rA6J(%eN#%RZbVg{+efU zb=M~Yfi@G~ef`hx-ZVOvp{LDpe5srBXG|7U0j8Zi8R zef|IHZ-1))|NDQi`Tzevi4|WscHI%bv~{t+_2cXMY;kdHmX>Yj=S%A7NSrw%@#BZY zks}go)+`8J-@xE2);gIhB4WY9l}lJ+Ltni)bKuOGf`?3p*T?hd=zxa%K74j&zZntP z8KuD)&&w$}SyVK2!nCx6#6(a){SFMj{f2wdJ!Y{L85XV$96 zmC18mZ>>qazcQOwL5+KR@rUkN=;4@6Y#VXa65=|G%&9 z@BcHBpel6o`~Uv}6VG*eFhuemJ$Ft){rHEk+RYOMn`cTkFHmS+q0u~5lv!N-@csQE zua{k7>gWJ9QWO*pB%~cH_^MUH^Y%sHi$zb{`6Z7Wk(g)Kx~!r8|AK$5DUm;}*?O$z z+cZORlfS!1hoetVV}F0+RMDmWjobof4p)dQm?{sckk8D4R224ob6PEJK1exk*eYoK z@vn9KfiM9U-M^0O0}NZ8f?7-(Csej1&i+|eo_Dw)Wk)!uY-6M^BuYcSpuzz;X zWm&}zrD?4us;58H_9&j;dH?3N@2sN#l{NLZ%vV{X{kY?r40~y>;Thv~-BY&c&U3&2 z*dqL$_4H$2iMmnAx``cASQxzok6Z8^OL+Ua@L6-5@XP5p9)o+lXXY4kwL0*)$o~*b zR^HUw+dB0HX}7+BDpXc=<2sa%YmVg*ykcq?++h%k9?BcfXsVX3ukrb=LFK zA&;dwy02BQ>dIZ5yHPCpMDdzX-EW0yEJrr-t<98h-}|>~#j_XHT~2$A1h0uEn?&x= zaS@Dc<4E|;YI0aNVgHi_oJ%{B*DPH%BmG$57NP9Gu5GG3FRL>z>T*XV^F*rYvRqg7 z{b=(^M&RRvkFU#~Tza~becI{D1aLR!%aVrjYjhqHMl zck+>0$ttQ_ragIR8+vrtBGbz)-+Pp=EfTrrWzNa?d`0%p3hgfOO$VEp_cXU(5L?J5+q7cag7T*Nio@q0*RJPIJU>%n z;-;y|`%B#Rt_fQC_{63J9@E_i=&))a%b zDFuDGhpSr><~2_1nm2iM`rdz%;rH*ngj;?#(QH{B^A){-wssPDt;`AxF(7!8BCqAp8) z@7=0#bXy|N^;W|<>c7u@elVvbd~KxIp7dEPdw7p~U3b{A2W)FnN^o zLP7QDhc}YU($dWh3?+A8&$kDyM6js{-4CtI90XOCAV9dWuOcbEXY!wTbE2ba-zf0zu(cJ!O^?>;9XfOC6}Pz-4YF@KRCku``s9VQjHA_Ykn}? zmH+*iy?Kes;kUPW{`}#YZ*Sbu>3Dsi<0=V{9|zc9cCzg7;xpnBf|@)D zhUPv#W^wW67MA8t&Jtc0(dB*``92r62(rDba`+ZuVBp~X^TtiV_LBwyd&)Q~WlgVL z%Xle1(|1&5k3d)@*zpkRyeCq^eZhu>+b~h`P zsSeYw8yf^HlymeJII?B+hE*vk(4}FEvL7{w%(JgIl8`jy;4=E~(eTAfLkUU8*{?%h z&PzCEvM0S?(kCOgdd)+R?S3**Ef-hB_T=cy6BO*w0efaq{G*0B<=_5VgFGPO5k33h z9hqHvZCbv(F_*cH9RGD-8NU{j(Vd=|GLXu1QShULw6uiO)E^(49mA)CmP@s^I>yRY z9k5hZeptHs2t(FU?YC9hUl^BI$(U6DB4mBqRicr5PAE%k6V} zxGR!F!PJFgm3wf5aGD$N2dN zZ%9;CO8*NE?_@c0L?i9UWRq2fTe$d*(z-vKfF^*@M+r$ydE3GQ69t5le*B&7=-z(t zhJ@8+4NXs0?T*ZlNAkB7F8L@;esG}a{|&RNJ{m{F`6EP= zuYxvU{{O!y?NNh|;s4+4cPD&)%H6!t@XIN~vNASFNv$WCzfxqH3hlZ)78&qol`;+09#ZQZ~y=R diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png deleted file mode 100644 index 6f8b65451cc08a463e4305ddc4be0dbe2879fae9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18058 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4qj3-okx45^rtlpw*nn1P*v zfq~)we<{|*4C>$({aM~`q#Tr)_v9Y?N1wZD=^(Q%StVGiFhw8xVyb0B}#MZOl6J& z!M*F=SDunsTdt;+mO0IVVV%?}z9XA2)xTi}Ma7v*o_S1$0<0HSZM&Vx%)LP*rBn91 zz0`_KHvZJzb!I!AmG&y{*?&^7>3_|WDqcmIZ@)E)U)B1p+os&w_Vs!2{d`|e&A@X4 zj+#-cyLcRW9kaC-S@z#!Z17*Dq7DNOGx=cy;(kVsQg5>QN! z4KMAR-nv#bc}3V1Cc#hhp3SUw_i}MLcTvKGP4J@0#DAxE*@F|;kx2*h4jBlz953Wh z>=NGbBtlQ&}3Nvy0NSuTDp*-*G@Q?0{mFS)!r ze@?8rr76g8t>{X*Nw4QE5-7TT;`5Tn7i=zDc%9SiT9J97)I(^C?kg#m6;VnOtIx^b`WUYuY1Tz;b_RpoS>y*U;Zt>z^w6x(Pg87vF4xX zSe?0_KBiTCuzTia;W97s+o{j_fqPDWQf@kNxH^o(&u2;3!S@_Te)_xAEV1g`v2|Tc z`TnXMn_MS$Jht4fomRDH!{Zlsn3mq;`Mak{@?4_Z{l~Vk?zb0t#R@a|I)@tav>dQl zbjsvz_+6uVu!|OXK6>yl(cEZ~kwHLV`Sq~Chl0Y7Q<9sXRlB*l9Q^SAzrEYB`VT@( z?9!?`4UCjr9Mtr#>T;?&z5f782Ip6n*_&O^Fxmw=&vbk2rG2Bc|w=d z&8?!FC9oo)By@sw$&00|Zo;D3D;t(eHm){Mh&ZsolKG(9s;YkrRRx#aVh`Mzpldnd zX2wdEro4M87KKfThuPYs%_Z#WmN@NagIK9ixM70JHiFozeyQsjx*y!K? z|MMA*JQ(u(4rYnzsIss*t@btW7N63?;cvL%%4hk|SN1vwA9`x@NG#=J&MG=M)o#Ny z!;UYFAvYdqz54g}e}BxS1tEGF4B49!9JHJyBqaX*Oa1Y={oxtOw%pqie6mRuzotC= zobI(~IwQ;(D^_%{u|>JKN3UANWo&eaol8<;!B-BOJr}fCx43)|*Zlg&)l2NZVTe=v z@(DX7rgB8R_@90tOk6Hj13G751ryYQKP9-|KGjq`hK1ZhyLs5tYBWNb#Q9lzsF*Z$qGrV zrm}yM9Wt36yBY8J3ZMDXs4l5E-MJ&BWy%ymzo!2Ci$$e*I?{9&r6nGirncc*%!M5v zLO#8hKYX~IXLY!-PVA`-AaI-h-n1R9hFSf5wZTtHt<{P+nnKEOq zV1oirYy5{B2dv-Pwl63VlbT*+RAwmlvDHIe)?TsY`@}mRrTgYA58C$G=3DJHVWwm4 z0dMAI>UMu}^Qn96;5zBJ+}Y0VRccwu&z#~D6yLAeFsJmEkK#VRg~~OGZVOI{KL{1G zlP-3C%<~*RlW5cTXWxa7p(?84R2mhQSUa&2bpC!KZ!mpVQ9iH#>ewxeN-gc$x z$)4pYAI-Itnwj<`AAMaX=%gN0!!6HqH}MO*f)L}2w#hlNs!ToCA|_4H3`{ZJ?ri1D zvR>r!i_Lv|Hd#CljyGXwm2g}mA7FMVkKgR+*}GEe>*6LAI)DCh%iBHXbyrY9YzFW1 z&i?mopti!4u#LP26nKggUNYyzTwvDv$Xc!c);;zYQ;EaE!o-Z33PCKMJ#Xb7e25E* zD!%6w@k_h1sQ0t9UDvnO4CgM{Wlu1kyY)%q^lLp25*#9KZJ5p1YSp{BMr7^kbNh69 z<1cGp5ZSzHbLG0^*UDM$Ete0nPEy<&wpiYt>CiD(0p)M9&pRIpxnnh8h@DP zHKl)Z*R5Rr`hQH#1n;_NFD=)tuXbIT<$byroQzt+9r+F@aAcnj-*$KJ?7tcR_rH6f zaN>*`hav9`kIulq%XiFue(krOZA!CLX2|3lO1&bPFLe(UUjD(pyg|85vr4?=&z6j3 z?n}9L7<0B={U9oL=4pkZO6a6o?g>)_WM>PCeX!gn@w52ht;Ss*uddWGY-xLasPMp> z_>a<{U{EqE)X?r`Ws5p-sz{;VY>`-AM?~XN&WbMz+dTwaHFSamMR|?HCNRKR2%8v%pK;yUsI6FDvfF*7so;t11=F}yOh2UPJ! zMe$s__Ci!R3{uT2nHAo+p&=o`VQge{U=piMr-1@*pmPiZzx9+4PN&YExGKOLb;`l( zN5o~uS@SOlF{Uvk9SG>pR9JVVEtD}V^+b#h8}qWy7bbBtzdO;EUfW^O;u}=ul zV`s+`6~(h=&5p;j8yg!NA+?N>Rv`yx;q4nQIJk=!@EREz7#JA+YxdcEFzd_z4S|6! zlHD>98zwa{nx-yj)m(StFi-6tPsW?GD~jy@nY4uRUOUpIE|~nvHBou0#tB!oho6`u zdFON+$S%IoV9?VeQBxzq!cwq%B9eV`EFvN#a&tod{F!p|rldrHPDVqoyzzp*4rMEj z4^1MS-Rj~-55!UqOl&SpjcE!qNJ@UZQT%e{jxO^Uy(1=!t=-z14?GPcSU;{vOilgp zJ>9@KIU)V{^F(Oq&9V6R?{9vN&f!C!#KmrYU^{LfVwM!go4JX1(=mBXh6$G}w(U@i zka(bfd!yE2o%Rh{#~!n-c`6ph{83&1kKfS=3fjLWZdIO~(bmyn@Zz;`LD8dPNpP8= zG{?fkwDs(nCpT|OPIz`IkcZQI%J^%#DojmK6K7+qS;?-~%~nwC|QD`!3Ide45E_w1HttQb2>r+omJ2YRX;MGwd z_K4d{B1DZ7yv=!dc&w~=`ucuE$w7l8rYIt!W8>!k?y;$##aEtf49YPu_~W`Z=0Xr# zQLInUPx7HMwjCUtkF<{Nw3gLZ8|MAw9@m-T0OKK!?5b~`a)noGgPjjP=KJb8F{goJo@?Xp<@ z37S3)YSt}MO3l*Z@s{k_u(N2zPYIEQmmO@HcBBaA{r>s+{^R5S zxw($EJCX z470fSH(Opv034cbF=K{=sR@gv#lM5g{2xwU@&DoEr7OI-9t5lOY*=!*A*A!bYXvqH zHj~af`~T;io|b)i`TRYVpLvy)58vN^fo;vEO%40^XT{8!Hf@>$H#hT{vNJ*)kzC1)|GG?|FS>)`uhKEIX~^Hzisap|L-s@Y{N#wf+C|o|Ia^ocu++0WJcaO z1x?R6sXY;C4<3jqY9}Wgu*>}Z-=0}pySbhH@G{>U!tCHwd}zLfL`o{_jG2-XCb73zh|NsA&zu%`9SyXqdNBI1_g*wy!{h$Bv++6;> zdzVkw+bboybmEjHYuBASf9Ozy;NJ=^PPUwrtl3-&G4mc9%+dJp@$vto-TQrJe){^% zcgk0h{dJDd4}`7%SN@F0IYuTqORKqsg;`ke@Z=LOna>~5Va{r7ToxNR(b1*l;ZbfK z>tB#~KlGfXuux7|xH2s4$t#9pUG+uDpZ@=kS9uqobK2>@^285~0&U61<-UEBbMuxw z@!)88{$%y|j^4Y5_5Zf+um7)jcB|0?4K0psY#lkd3A$Y$%+&%e{Qu9hzs~T@y}!z8 z&tgNbs&ARnm~^DRt@^t}Rn(Ix?=z?3$K&Jj zKb#+ae_#Ln{C+(ho08w(e$SDSwY1Dz;?uKrZa14jGM}%^>Z&yjEvueCJ-t7Ad)?cY zKP#S}lTVtKQ2X^Zw%Eeiv3|=EK+5 z*YDbWzPxsnHL4Oj(R8GXO&5&C2lvheYqi@>Tnr?CANQ;Y0L_tngs|wSU zciL;%yCoD|edRNh{!2*w`}_Cz_v8Kd`Q`ViE2>CHi0SQ#jI4Zgb@luDe+yJCEFAhg z*09=19Z={_I@-TzO^oK1m6QvNs7kO1o2Hc`ROuH%;x^ zXZOuh3>pG6K^ak0gk|T>i_g3SW-m-k%b(n2ZpB(9V&QwsylwahxOeT zpP%24-fowbX<7bOs%VvyAMXsMBg;AeXfjOa)X3Ub|9^ksiJ+mA_N>oaZ#-t9;cOe#2TZjsv&k59YHf z=VWTh%Qvsk5PJ&^h0LG*^7S)jL`X^+^7|eLGWhfVzj*ZAPvV}2QQoY(n7|h)70*Hd2;suwq{?y z{`z`-?O(6H{Sp!q`|8{CDpffjtPCj7Y%_3i-S|Eonqy~Lu&~?^6=9iZwm#{Y$*mf$ zs7pVaSN+ppWt7pfaiihM)1M~?YC9iT8!bOwZ*TQOrWC`k^;b$7EYr?6NKW)Gg{Z~<|NnQ5y}>yBZ}Td-iIZl`{O{@gd4d?5$AkO!ZI_qde{=6| z&x;o>-t7MaE(?U2X1rv`-r00XZ(s5A?>me4*G-CC>cA*3&nzyU8TCMwy^eK8y92Zt)+QJnx^e!Jqnnf6l+YK1EJJOQl71 z8wb-9$DlQ`!ak85)8y?p%(()!aHd7w-(N@1oaw5P<1JY?b)w+J--;($w{C6Rd!0+z zo$q@+56}N8Q-AvUcFy)Yz*hI~Px;$h$CcgV61%_FugHmUfri$k#e)01S$lcePi!jCJ#&WV{e9yEu~N9LDylzIQX&kJzT3abkVvyS zkdyf5s9LK=duqy#yLV4=aY{URkYH8*PWRp?hH#(cWfC!uKA1FWo^Cky(8&R{f&){lP51a zl9+l_LEU{>+@HysJUlOyUkE6uwJjBcW>ZPk`}_Y_{rn{UWQSyy*c+UDN)E`B@HXM&hU z%>j|k>$@Wp6u6ZZP3U}CU$tx9ym=gz2S5J(9bZ=`=I57|bFsg>SDp7Dt7L&Xx6R78 z^LJWmtE#rI)!k@)X#-P?E9<&X%t~Kh2u?GDR^&4+R8)4but)?p^H1WKpgp~whv$PF zR}|NQsDuYu6aQSkD43IBpsMe}!W#Mi&(Gy+&o);T7|sh~37_?&dm&FY*N^tIH7ly( z;@FIh!}Rpr{zN+!Fp7z_wXw0q#T9)~Vm`BoDN-QU*^HU_@N)l%ZDG)my!MU3=_^OA(WxczSt(z|Su)&5u3fj4|afY~`Al z(sG364Pk4m-;l(dJ-5m9>z~AgAE(d!S-<_dk&@HQC7+rnPmc8LjCOCI zK7YRC;yKRep1yEZSZKshfAh2aVT~3eN5^9m1jt;Bq$kH^}28m}KT>0f^KUQ@H}oSz>fqnnbj zm9TL0YHeqK{^kt^6aO9KoG`uPkYvR#9UdMY`#OU^e>ziHIU#k*WQ#fnHRD8Iu8apx zQVRtfuY6;l7s$J&^?3Xhj^`bN19*_dhDiCwO_$$`uBJn-^$nbR^y~VT+ADq}9nXBm3|LmWhnU#)lst=dm}q z_6VA8XIiwh-rTW6V&Wz#rKZ5^xqqB>1OF*r)y!DRDkW~YP(fVfzomslcJ|kAZ;MS$ zMUxT_&U34%jEsqOy?x6hDQU@sb=AMRrHV6J|HtpS%CgRiBbduywngCKi=lPwYvG zWq~A=DY1L`S`0Y;Jt&&Dc$UQ%*&06f`4^@aoAdRqJ1Z))M=CeyvNDeY2Tu(%v%Tk< zYhTl4cJEt1V`j(U!o}AMyPq+*2XH2&wUn~#`cOQLL&ngKVb1*OJ=aT1&(+R;7AsP6 zf6;0Edz%f9D^At8IdMhT_f5H*bd1ht>7EkG z1dj?HdbuV4{y&@AU)-kmjSUU{xLSSZJ(SvF>MM~@eOKax8s8JwNCAfJOuDsyk{i~g z{y%=;z=1P={3B*amK{*`{5j*ti~sD!{856R_)jMO-=KMHmdgSa<;V77)9Mc$`rq36 zKRfGDZ>SbyQz{$Ff$*lgdrp4%sx4Lw36VoHE$aUL`TgZ(KHH7RRU0PPEIGln z=|k32`)x_hjJNG3{+Gz9s4)=e3S`?;%0BTb*FT}w#vMxR+P~$^gz|LGGEUTFmb|m` z@Q0_|V(=<=#on5q{M)LgG&1fw{nlPt*_ipujPe(P6aW8LJ~y#-?i3H7txePHCY}JN& zmzHAs3r;01H8GM0$BpTu|0J;<@wjVZge>E^SBT>_@H3CnocdL}h9Mo*r=sJwjA zETa>z_aun<{Wo|c%g|7Ig?louV*K7imA6bGrOb4TeKkM-zPcK(sCc4qGtY{*JU`}2 zbUf}z{2NiB>Qu9tdGncs1A?AMj%fVrP}wAMFiVPAPV!Ay1mukzyGgKt7vTkrK*I6E1EnWj}v%qM9FFB9Y1z7v?#%ijd`0**H&fcASNZ# zm4*r025mv75)u-26th3KZG@Pl6i~FM_P4m$i=GXCbN^3B`fGp5f_;x)qC&{*gpb@= zSqTTEqcbKRY0$kMG(+Ko_A%2ph7prkKWa8NHa>h8J4Y2-d)!y}%j_xh|G)DoHV?%{ zi(-juEQw8XKQ)(qVv0J|P(0CDpto0M--(00(>NqPq-AV*#FCw)q`dRN#*6?JZnmBm z>S-*0pZU(=jexl7&~#Ay>-)dI@=Mx8Yt&fl+9icA9ni3;HAo22s5r&{B*9hNSi+-U z-A0M0MMzBUvn0n%`-mVmsU~hGw^J9dI55oH%?L?zN(P($KRhfSvHFqypK10>q@LJc z%CJ(Nq{F<;F-W98VZ%gW7OCk!Bo0L-2wWCUJdnuBY<9F+FEWAY$v=);KGH>&AOBkK z>+XaaHpk-M|NqAsUUwz<%vE4JC?RnC?}6jTB?7iS;Br5cavnccJ+8Ej56iHmRMI4Gl^eqf@`hn`uF z{q>Ga5pK4PG)Qo>Xdzt4Yv+snySnEZFn@;7igX7lfV z`Vq6$hZ@SOSX+O8er~U(_T#AIjEzm)4<%n8QEk=i%RCyOrS+g8!LpI_-96QUUQ2MM zQQE-s?|A?I_xJxd^D-UUuJ)(B`@zljrXxp195=-8{dM#1-#-T${db7ZTx59Rjnayz z@|TiAeRS&$1@4qH&os)rlW=1L<7qvUB!+`6OoBV><{o(H2zTYZ{r^Az`@8@D|B2r< z7A)b3d358DtoNxO|Nr0b5`AA)6(?)+gVS06+L8wC+6R%3`=(WWSQ3@8pGW1#&&dmS z&Rn>%^WibxMOxfj40+htzP!`kBxUjap|j5K`{BK934MX;?16B%biU0qd{qVz((HLsQ1dACnCHL1f-q$P-y{d3kJwg>CWs zCC<%FHZW1SFB9|i^#A`n>*EA73JeSkpkcM+Yv0HJ^-r=S0-GJKwI<)L?cbW)7`M>C z;Lq>R&)=V%eBW>GFFAn&i`Sg#Stt>n+qB1@FPQh77Eg4nu|zN%GxHalB0s;j(A6Fm z!4rPm<6>q$yfwQ8IzFZ}0W^$IQL*6R@Bj9H)bum%I0Uiz%rW`$|Ns8w{`F^Pe|L0h zI(ojpyStk)f6}3=OAag*GIr!yq{eD^;L;>V-`Lfa;*bINFUn1wg#^m*GdGaufV z%pxVj!vmUL1Z9_qqCd}{y5{XY+|4ybP3w}JMa9ZK^`n3O{{Mgf`}_K5XMPsEy3)VF zATK#NOy(%_IyU9BN`~kE+NWwnnBV4YOkrgF^M_~VOrH66hHIjafB0wnFo<#IsRKUR zGmN*0>j{)W8tO_DEdKrZ`Te!Ha?uoLKQ-4^m;brj{$LOk_IRYR@!$XdpaIXkJ3mt2 z-SNM6?M-Lroi}ePl;px)o3EK3z4g-d-CRD~2XD?KWSrPgs(pB&v&+ikK8!Q|igWyY z&VR0S@d;>DOt84O@9+0VtV)k2o?FwZCh{ct;>T%XCypNY@7%s`>$ZK*pQr0+Kb|@{ z`OrkE4X=*&RIg#>EZUv2g2~XZZHGnM`}>k={>QezV$wdaN@AW}+@6FVKPGNagd2Y2 zhQ$4Se@`D^xS9Q*zwGmu`Xy$XTNHl&m#@<|Ra(1XTgrc9gFnB&zrX+R@cp8qd;6-t zZ{IcR{HFES@AE@69&K*2;K9tfBye(f4Ez` ze!BjDyV6(Fe}Dh~@BjIN|8^xyIe7jwzdU4muF>Ye(S|8}Y+pNATR|fmv&|*<)g=D- z-k*3?t8II}WK@*Im#>l^=DcjMRO!0uqWWnTEc<{H#_=;}7`i3gmP}#1+ug9m^^twe zd#*ck3qBogub(@2{&Jt6T$&P+l9D2#Gv)06fpU__O`aP<>`_ezO#hhL9Q^*CN8a8k z{c8Q+B#V*`MfLWBw`U(b+Un@u{-JP6#BUvW`=tA3y`cUKD8WwvwFv+J`DtHU>%sX^ zK91MW$Z2|?)4NB>DS!G)w5$)g+SeJ}+4nd7c;9^Ew3_NKFV;^I`uzU>e|ei9-~Zbm zKG?iJD*9rZkz|0fv@z=f)#(rC=qNn+nB4aD^};tjJZJ&>))B8!ae`Y*9%DpHD(l$_9oKw!AzxOUpK9cF-!08ygxOqh>YEpWirpwxY!hMN`qJ2?w+V-FjYED%@G_ zFL`gT@~JXtD!B11;lib+o)m{Szx($rl9*>8At9k~^3lV?_0ik@RQ>qyey(+Wzx@Bd z|Nhns3c7iFUwe^JCM|vK^z{EVzgSFPotJOBzW)Ep6$XditxY{J&Ei2T`zEF*(@qtz z)&Dm-Glw(R^3-YLj!wgjOv4v14J|ATbmGnwJYqTg{5;S4I70(N!W2^=~kE{Q3ERNePL#y}!PHeQlqXrk2C&zs5JPZDMeO z!+~aYp5^||H>091ELOOALqbMY^2`~DdwUXh6f{0ODcqKSU&6$6;ckt0KrZ|L|H+(RC)30xS{UlM-|vpV%ejD)IUCbNB0a!|vSs!UCEX zelmA0Z-WDmTl~W#je9<`oMg~B;J|ZI$wOuui-*P|y+2%R4~k7qYdwE2_597~J}q$C z^+P!gSuQS*j2#%KxvuuQbfw+AOC&W!WFSPcKb8S%Pky-H=HewIQ3ys_m+Qj2vxQSQ{>-0#pkA`CanFpGJ6FZbeCYI`Z$aC4k)|uNdK8x$ z%w|qIH*>D=@+Ko^uT5nRp*q_h_gfi-zxUwW%G6-kQTX}mX4_*)Ler;KZfOhJ_pJcZ z44Pn(k|I&}@6Y+epXW~$EZlSX|9>7Ho)5|r3wT4#%-mR6J))o3t9(tc_%@*>XpegtEw=|K|&yaxFT1^7!!&zs;F1UD9G>%h6~zuWxlG|dA26O<9AyA)uhei@zp=Yt zzv6S&1heDy7ShVE|2seZ_xwr9jyl<>IR%A=9-e>ye|`P``udDugT$9*i)t~e6KBtyS+J%j z|Fxjefv7nL7?qU|pP%pIyA#?}JT%?n-{0gPzos^}vRY~MZJGANKIl}#t*$WHMLU1o z-Cdu5?@xRF=|8`wI{Nf9wm-MGwr0+fHQ@rSHmmqK>ETiCjHZSo=JRHnSsb{<;^?h? zeci*`|2sfSuNc5{IRE~qKX~kHCN)p(L8|bU0}WfJN<5UBxct6^q~wg5|KH#E$Uo1* z#YLi`LZa^Pum7K)*MG`0kvhgONlh=VrRe|PWC4*B0U@O|g0CePXFPd)yk&adAxLl^ z3TG`WGFq`J>7E%!tRYus`T;+)wr6!63PHk8~6-tm`Oo@hHFWm@{a}2lfB9?3SFLrFZS>W8L#4ap8l|QrVE6dgE z#Lq7myd4&qcAWXKSfpp8;hz0Q+fs`sxxAmPSAB#Tl-HFmRERdl-r(?`UGRIuw*Uh@ z;~(cw{CIzTeZ9EepZ4zyA|fT9JeAzBQ?l-_(!vgD#Ruo*+1%XPgw+LHS&faGcUYWT zDm~+4lTK^c@(-Vk?(Fw>dITwH4}~Auu-W*-XXl$9r%e~|+)&Y-(yi0_#)*xsEqePu z`?@m+&h&gR;O9Slem+l3%#}TLZ5|(=&*S66BO}9e=8TK!mlvFMe-bPzCOrI{ z?y^l#AVEMl`NwzuYW*n_1*LBOcrC*5`=qc?5F0ZyB$e)&yPK=QfX9`8QS;>P$sG^g z{r}(Zyx?uZ+^**Jju%&Hlo)0ku@njK;Pl*g;NhLuyRIIbvG?Yim0xt1b3VWD>(Sn8 zB1f;e@P9B3`BRe>y(U$nLurZ=i(t>f*WVULcsk9Rl={Rn)id_8Nr>p@GZsR}>kni^ zzNlW~`lRQ@H;rGq-Sh5TU9W0>L_^dev_TXUxLfAfa~pP#F4 zwyl_f`#;A`rRLXK8ISFjc}PA}?f>ch?!?A0i%rM zJoj*Qk7LU=*Oo$;mc#|`Us`SBP2MtDQs}5f7dty;A1lQbMSHrkG2+@nTIW}eRcbn{rN{?U+wZ-=LHqk(uX${ z$h<#(s#2-RkcYX{_xi$9HkZHmJZ^n?=d$!s!&N@Qp{|nxrirjdwryRad2L_KwstMw zW{(>y7j@XUt4y))U;kwL$(FK0GbnW?L=6CzJ5wQ|rUm;cdtJJ+-|RAH0uu zX{!4*CGlfY+tzH!H*Y0-dL^Gckq8Nul##hm%`&Cgf4)TA9*6ls>1k;X9v&28jd&g) zv0yj?_&xYCq6u8Z=NC| zx=PgJ^S_tDZQj!*?CR5Y6f`E@HfsxCuW)$Nkv=P7=6$uvH}=hac!ZNpS((k#vn~0! z#Gbv9CMFVf|56@2UVNp%<22i*FR!m^w`~6ZKmEeRgLT!G2VLVo{9--SwuUh|Svm2C z3^-Wk%+lp*0IgqF+P0Q$V|9mZ+ji0Bz|9Ajs7>A=<`{6nplN@{YwH6BJo0Cnzu$Xc z(6;XO-siSP`|^t~ZZhCm&eSL}^T<*&UdIpZ{5Ml(Nw~*Mx46gj&PM%ofy}d?*}~_- zj!jVcVdv^NOUO0D_qI^m|8%d=<*PKjU;Sd)@_gZy&xx+?E^+_5EjH}ie&Tf_I9<<~ zwUM{MfQKcYVMarsKygZt8^hAYOFcX%a=B(|csM@vU}%Q22?%CRtT+^$0D(1h+lFd3i%w88u zC7f>`6VrDxcxl_HwpM=|qlNqB4;6_gwl7*4I^pbeff>s-odJ!Y{{O#bt~FPK1CNV* zhunc%+8kB9{x4O($TW5IUTr$j)jD?(hnV)i{SQP|P8RSm;yL;EZE?$i(&>9Mwrw_l z&0#6+c&kjmWqtOnwnYaF%*=PnzjjXbz1*SH)~KS$F=3L8`ssVEzvX|;Tjg~9W9=o8 z!|kr`(s>^?;`)?GsB$iKzYa+4$1S3HsCZP$|0v^NoH6$It}DRXVPS{!&< z?OR+^1vVCXN%ueS+_PGI!v#OVY)1j^?v)`a8*V10u-=R{3BB}$J??>^$0vT7evZfM z73c2EN{v)s(YoOM&GJ=UDhCVsrd56XVA#7!=Z4v3m)OOou8z|ZBY6(r{kyiiLN2l6 z{o`HkRZ+(0HA=Hj-ss3YvMEKTQ)yP4iL2KVmA;#1EkEL>H}+L3trw~EyMA%op1Aa4 zYraGe0UN!yheCail-AbF{s37RIoFoEDM8?P{f`d{8XhesCYj9_glZ?+ZPRmV>Q0v8 zeW+k6;vaVFLi5Gh7wc`_S?qWm_phM9-s-_|{cHM}yC0{%C~-P|G4+b0W{X1K$J#EY z%PA&{*2!O9-hKD0*JTCO*}Tn+flI!>HBj+1=s9lbW8ZQ?TqXB(ij%;D%AVsq?Ga)@ zS^dj}k1Ow%y{)@iH1gY5<9pWEvR8H4sQX6h6vU<(KPx^ZbpLt5fo(fu1e&%jT_@P| zS59;5$s$jQB0Kx7wM%FC)n9&Ar*->f#I^bm)*ayDZpth}?xp~N(CAPQmbudLz*NIjLnJlK{O&>DV z)+C!Wlce28H(AI?@OT7^u=*zNUw->&>BLDldo))4^fBl0nOwJj*@^S#)K`8KI`_PO zj@a}v@G{aVbL_dB0z{71pYT2+<8*SVp^Q*gm)icO+IaPY3s$x{aL5?VD&kOz`r~Z! zMMKzg*XPH2?+e~pKDW(dFIMs7cDyPWkS>@Z>AYBQn(O}mrPKS_fA77cvms)_^AyR; znwh5(6uvG?6+E6I8R{xhXi>0#bKGK$qNvhonO-qfa`TSY=o$ZxXpJZ+yEBsJfW*w$|Xu{`6wH*7J3*Mr!>sc5d8wMUJ>Rh+^uF>p`Hw|VM*d$5Z z$(!RBqi6KhYf@UYq6A-ItmHlOReas@>$Tn|ujhAh`jh@>9=I%=GS`r+IY7WA{-g4Z zl^F`T0++Qa80#l?KjULlX`LFrLEwtfjTIU*)D&CiI|wzWtZIEb?dIIn#2cbTCcN8k z`#*eA+g0-+?O0m#^V)d}-<2Pm_*qg@l8ui&X1Y!K{_y`Va%QV6%$=!rDI}M0$Zl!Y|mPiJ!?~+?iDA6mQ!|erzF)>k1Kl~dg?SS@pjC5?YPT3=XNp0 zJ}*(?@Cc3(Kd#t!^X-akpUIn+ynku6ic3-KiXuW+*aYt?0T;Iywv93!}YBWp${cD#jbY|O!Sy{`TG~!+}->}z8sFBiLLS5n6lBPHBR{<1x3Jy5za zcT$UvvM;~ETh^ELMaQs-`5XVIqCRiU%D?Y-DE_Y*kKl_ppyyfq38Tp@cRl=IUWrhkE4mX>U~x^0`&H=ZeU zNOQNZPuo#7wXwPR;n(ao`+A8TI~VR%Ik7;0qC@2@$%f2?-Uy zrp%cMooiK^U=a~1nRh>J!?t6Vd~Mehj^29n;89XXXQD;joQGetb^Kbo4@9r|z}o84 z_qD6bXvQp~6Q_+=tTw)J%Sb|Up+(OthuFEGWro*u*?fK3Dl6NjYD>oLP5SYj-^$M0 zG9WN=ab0%olNU7pJ7ifFZ#*Htwnigp~ZMb>x&>;n# zGXiqw3jW(2zPwz;TF~CWFwwv;F(4%6!j+T_8y$HhFKVPZG;AIrL z$9uZbo*GVBg@r5z#>OX37)VGOrlc=EfAKNnhai^gHmAS8?}#-}0d;LagJ(9p1%Is$ zvGPc`vZk>e6-i7? zs96VciX9~K?;F`vIBcqxfKBfy&9R8v3)+(A=*;|(?@XP}9n*?Wt9My7PZmDByPU_m zEMSV=jvW&J{w80zoc!Ti+J(!>0-~uGuBc>1v+Fi2Pyp?;J9myJD2Ruh-^ix+l!Er@ z4e9*M;^O|?`kF>dwhL({{70cU+iE}?Dwdi&|ew_T^aWV&Q zs)4ENO38~1>*REp-A+05E%^Um^4c23;$NWQ%>Vzl%vI!WP7r95H|gM7)bUbV?5%u~ zsE*qIR{Luy7j-n!3};-~v|vYXt_9ofs)rWuo<4XOw^Cs@J@GaQ^Y*|#`OzV zrATh;eXPJzde$ON=b>%(tQN&tZSN;PXJ5SQUA^qw`^WFCc|K8vhdD?|HTC4C3npJ| z+Z?aUK3iFNY*R#diL7FP$f6DtRgR9w+}WpgKy&nF)jtTNzvHgli5mrrr*`meRiIujK5T;io`b%z6vH%&zBx$9KHD z*rK|9wTju=*}8wZ!{`6@czWGGcJlMzOt1WF!0ix_%UcZuT;dajIb1eu40rjUb??~k z3RZui3)h0y0n47Yl|q}%{X`v(U65JFa*wG^`CUzt)KvD6)n@)!5+8O$^453RO8`Hf|_V{iS9f z1ak8K|4NF54q{sz!;&6(ip`KaJU@OVQ^y*n0@b85Px|hs%vvV;sF^Q&C0k<6j{_U3 z%`Kzop(4(WS+;-68?1x$3pt=yRA_Y z`Nf(obcDymC*8on;LqRF8wwOpp6)b++Foh#@?C73WwFG+KZ;sYHfkj(Xm>u)RJbOa7!kb5l2X5}L@i3VB!14V( z!!_~#tx3)BjX&uMzurc-T?=cA-`~*{7~?57JzO%NS!i|U4j;7)?yv^UR{MzyE&262 zPAaJRCYPGcGjYG5yt~YBj%9N!<4UMoil;p~A=tJxyVL7Lg#dG3#}QRO8)-fRWr@|s z2DvW6F3*xoc~}3OVRtA?{3E051^xALhG%9t9(wug2Q*kd`>eaaU((82QbbfzN9V$0 z=1rEet0nm;b1FzTZS!jteD7(M(JlGG`kDQ<*4he>Ce4?(&D-APy*OpN z3gi?}P5D1%Zu)tNTel?Q_8gdWOtQdSx#Nts%QKZTdpMpiVl+B)B-Kdo@Wx+K{s9T4 zqRg6_%s0Q){|8UmZ0S64Lm6}s2E+gVi(cGVI$2%v)-8#Bb*?+6_JnD%2xxkwI3#DO zb4<9JaJA!j;*Vf9W@bC~WTOwuV`E;;w@I8~IeF1c5or7@(kT4%AH0$J(o&6T^M8+L zII3AoKQv_27VmAmn$lF3`0D5rp~*%cRK&dmW^um1Z|pPUK*2X1)rpvpITN>UPhkD;IcM836Lwz~`{@fZS*9kMSxXoQ$uF~z>z)wIIV))k z%aI183f8VYo~5Se|1k6XBd^O9lbB}x`2U}${=d6GyJqqA}jSANwl>*8KR;k*vmT!j_g>^iAjR z5>KAz=O2F;bA_7nuP;bavTct|+l2tOygV1><|Ut) ^%n!tEat8-5hoBu<{Ak~g# ztj4MjGs_O#{QuAJ&+p>}U$va8tq)IB-eMar3@+amoh(asbZpyV(sn22$jci?wpebs z_B!HqKF0#D)~5RoMq-N}>-$M?nA#Ow)j84m_>^edx3V_p_7w*2yX2uRxf)keR|jfy z|EqgZ=6XV@l#OkYFgx?hPz%FU9?6Coj~J)V<_t4DqSE*OzhvG2G>e)U3-``__{_Iz z&QWfNBc3|4^6;!$#q;tdPhujEj7&!g8l`P_XicPBY)AlP%~sv`J5MYpKt@$#nBn zhqSguXe~Kk`?`;aUnep#nb%)Hc zj75Dfp3YHyb?l{3p*%7jF6{cdvVE0M}YsFURbTEx+9b|7b82xp@gC z-~GNT*VOIY%=4R<9C;np@9ZU*wsW1>GW9oXAb(!DwveyIK!D}K+q|QkCt8bDc)1S$ z|Ia4XuXNCCMOm3&!NVV{cYajPJhQoQ2J2M?p;>Cnt}OfV^7nU(ZI{1DwOuIfnW$tU z96D>K*4cBv^x5?eN%z(7=vI`qXm43sp~=VapfDk}HqzjH>HX|;rMI)r?H4}W&VNcA zWVO_-PUpah{y{+=US09Qlifq7#zxGT+TA^~zrS_<{Lc3F#<_pxm6aE@IA}{u@Zxy0 zDMOPjXoAA#|NsC0|NVVGzx=Ta*%=%(Y2xhP^1{MZZyOG} z`e}cd(zx8>XLs0j1$C*EZ~N>2@2ma&{l&%or>Fn7tNZi*aQpu6@Bi;Ee}BK7|Ns7) z#2^1^7clpgKvTIgHwzhg7pa1vw|1W=dXuq=izkB=se!sfYtxC z-cx`6!xv3vVd2C7|2r>m1?BPo|E1n^Iy*FkIy(nBJ0H6gXTBG^cKI_qe&?p5V-k@EMf&4FD-r4#2{n^?7`{j+_+&*6L-|q13 z?L71C40%3%G=chPPp7kAUwB;Hr87QG{{8_$K@PEVy&WFdtGFee^@%yk(CEY@)ZNoF z)o`esg@I^)U*`JMSp-i$LenNPXuS#IbnULdNT z@s<19HO@)1_IpVESDLE*Us6J%?tj{j%E=EuB(_PLOXS^8tN76H@U(cFK!fwak%OmB9XWPv)k4LLymgP?uV1v_fTG5U zb0<$8IPjr=;zUNvTN}0-Z2CG}W)A$6gvfTUB@S~+^^B)J-~cTx#{Yw*XXF|oaqTE zG*DpI-nPX_A#39Dh(PPr8XNn3589sz7i=+Pq-9!Gt7{R~{Xo1ELpbJY}DEr193jqd_|ry8rw={o#w| z;f2mT>i$MDHYYwjVx9ZA3lbxX;vWeJJ~S~7E-bsYX>)K;*t691^n`>4y)BKV`VV%> ztLhm_$!kPP>Z<S+(JXtwc6OU$!LtoS`G&+r)3SHX`00tLT*H!ms(FgKg_5MQ<9wV=yg zM$Wu5DUGJu%3qZy?7R`omh+5f9oy=Y9Zsy7HoSS<(~fkR@*cc-V)bbWiGPL$3(QRw z7_y_E{I7587MJv!pBAx4!s?x0HpDMmq71nj40!gujwvV>`OUC!$1jG1$G;iYPAGh3 z`s$8q%afv(33sM!nWa{3AuQ|eOr9We z`)$zOOoqAbdvBiETzlf`Y{~z&`zNv_xJ|m}W%k4Ry#IyzDNlQ*7@XH&-O4w80z0U} zU6goo;bLV;$;`)(7Oh>Yd*sLwhVu(HT-42&*;uV5sw#Z=*Txx3S$+%U|D;XQQz_7RQU{EK@aDzS2Ww2*(J^L({|L(E)zTf@#3P; zlZmNICrx{nk-b_^^W;H>+zKNFQRW^V9s#{2e;Qhj*GEiC`on+n_KTxz%52ja&$Xt; zOnJmOllOGrhezx?B^r2a7QA%QoF5?DA7i7yWfQTo;lZIfEFblkENRJ3Z{aT&(6N`8 z&?S(T#>UOfR#?cUs@i5*ETI>pAR_q$R8ukh|G%i{Q9?>eLRwl;;ls8Cn-bO}S*26ao3YY!=|9{~zb0@|ly&v9&ewflY$K}GL?ny$;69k$&I1XP~ z$>ZLCDdnpGIJYlKdz6rt_T>5VFQ1M5y*8N{Oh`=p*+1uNV&D(?opT!}%1->O8Y6a- z<%vY9LzgsLj~Z)Q4CkgYCw`B?+^qSC~Fe-bM`wIrSrdEp=_nqC4e6#PyeJauZxA|)}Q zBw68PJ#B4E5PYm2Lz8%S^U!Zw7u+~6f zs@sHV0%_kIR;s}yT2$AWqVtwYvwnVF+aixW$iS-6Co(zV#uL&%6&3kN| zD=nGz^$*|M%fl}hkb4&z-cF$K4ha?Q?%LGfw@pC!aa)C4l&VAUv3fQ(H8wW3h}J_a z+=X9mwFt_Eb$pWHO8DzG$FcLuQ|G*i4^$Mx6qZI=aGE~+|NsArP$8?%H!&ga4Ga!k zVpREfP$FY)fy;I=<41*C%{GJ?`uOlDDeGu%r-N;v;X{ux6;!TltiK7Zc=Dq zWOU+mcS~E;(IXKZT}8%*h6jV$bS^x2$mh>ELpPw}Kw@8_fl{kZ(b|5Q;{Sn?JRX|@ z^ftL&31ZPZ$KWqq(7kbMpg@zyQ3mF1M~-s@H(Z*~q^iDU9{02~O=h>C1CzaG%#hfz zL!ziyG9ps)+S=qDh0PDo%g0P~(}6poqvJzsYvj=*5nY{y#zsa5_wcR`7pVMbuj9MG z)bjt~MJ$mM1a+?jBnx^6^J(r>Na6?;U$@LgIqFiQdG$o8*&1?=FH%~}JtQaWw7 z(H6pS@aq5LQEUlJKB+q<@d*nvD=RyvsWD4SA8umhDSszmCSwn^Tcfa`cyoJu^s!@6 zp5C7iFY4voDv{8z<&A`-q=IJi#Eiz|7>V|NuTUutQsYGaNa2OaE7#JBXoF>x|sJY}X3y+5=yYiYhfBVI1Rzyz-v}(!V zYdESG)^JI%HB{^bD}!>@oyT5rC&CnV%Xk-yW@gM0^tD}csv%tOe_|y2JHa^(OTMu) zGc$8@JC~Ro-e1pidAYGoz2CngXNb*nV!+)`V}nA6{DilhwvCO_72V%08g*qgPE5Jf zcva9TVp?)#La@v+jrZpz^q=#s6`39)r2{BQkmv9jo2w!7xN}#srL{BbP6D*pRZ=h1(Bw={VUd*b+O)~&pc0R6>H-zPD=B)F9<3p=_#xwu>J+;O6~&`_D3G3!)Wy zk57PTkB^6A=ZRk{_Bp-tOsbODJjXr6)+4Cm6lYAXf6S|mY6?E)35tQiHaa{L7^J4p zIK(`&d;Zx22M(M$vtaF60r~R_)}3oGOY?=c4V2cNJ$U~7ySHoS&&%WUl#~cO-Ekzr zN$5Ab^p9WMf{wEmx?b!!<(#dTz~t65r7@KE+CL7*10B|p3naU{U5wdP#RYF}Q#he! zA$$1Pv4q^SA6^K88ujJxCyB8@{i0j=;?=36$D%%eGBQ+n7Ou!}^Z$#PZHy++>mXk_Z9ebL6Ujfo+C$(K6rAZ;4$0b#qLYao`DqDN(-YVP7~wdF)=d{as2eK z{>nzRCxPB>71_-bKiWs#WLO_I;jq+7)sOO~*Cv)K&TiIzBxxw};B|=r4oE;dN7b<4nV*^)#N@j%xg&V6XGAgT9ZQ5+C zsLqr!-@w4&z;D41H;zvGJ@M*+y^aRYHcS)aS#i;!=5FJ1sYA^Y1xL(h85@KeC2SCE zI`NjDt;eKM$7zzz9PY#!vy58Wj82>okagnWfd-ygp@%1DW%X+h&qC#;Y%VzmkC{(g z{r|s5_kV$gi68B=n2!61Rwzkqwm7(od$VYhCLZqbC^kUyt3-0Da(H4<-3c-h?8 z7;vz`bm{{y`2s;RGd6Q`Hd|YvsAW(qv_PI{YnPUi_TI3`=s*{5MOFiUkfJ;1uUi~F zK`fJ8IyCe-7;FrCP1BP%eatwREX!=hHcKaL>WOU$VN-Z`ZUnI%JjEJmC?ziG>0uxu zI`Kj=G!nE5B_uqmD>Aq_Hy&hp@bZB@oA|~fY?+4-)-UPR3Rs?OqO)O}@dU;&%`;9- zKNhu0I2cqebk)6fq)T7(!4`?da}o}CnC}Ql%B^&`F>4l&o*qw4jmu7`*>fy@{J2q5 z%WGm%F>^*hnbN_V9)kT5s;yfJUh7}lrJvHvbE2sw#?ePfLI2lE)(v8@JQAABn{OPr zwW(3EaWO~YfsWD`Q@%}sJv}^Y*6AfGjEX~l6LPBT*j zL3Va$7nbG)3W=+s?ww%a;<|Is-rm&InFh1OBLrSe3*Qp*Y5qjPTz?&wgk{WY+7@d% zBpW;kXmD?_Y1kikz(;k8*-C*l!HISf1ydw>Bsh~xHuU+@rappUQU-n~x~-W`~y zbKzW$OIC5hy05Bij*L0xno+;o7>}t+Pgp1(9@(`2gTv#bTi#KZ9R1{ZEN-nm(e+V9 z#LI=~9G)x2iiQq|`V6^}AVe{xzYB4N1gNRsGQYr2>C^Z)<<`|El1hj?zp5&u)^=knYvYbCp-*B* zVwUVJR z|59%3_;`P=HBbIM12;D#4o<$OZ44v>-`6Lmqy&VfUbvoaQ7e`H2RE>7Grf)fr|a3~XWgoP%vN=(p*jEl zKN;(9Yj*s2FirQnUG1;dUCiI?*$xS17!*#L)p#V_Gh((Q&;S3%H9tP&AMca*^nC9> z?@w#??u-Bb^MtQ6@R|9k{&3rWmwCb6TW>aM_ATFi`uOpLv||d|-It+*s)y!TsHn2$ z~It7 zk4;9qzTS|U$K%n;6;&)clh;vAjc3*@i?g74hX4N$O|qCVLn1Y`b?Vfg5fK+I9jV_^ zaa2R`-$M_L9aj1QB_AH_Jv)2-|KIKwv;Y15{r}?P^Rlw({4y1v-b$zBG9BEJt{`&u zhqhGA$&P}h-#$P8&&*!uGvmY0cX#cNd~yHxH`$_YPU7#fHuHQA?KXxrk0odD#55}% zI>eKe#gmi669Fl44ow0TxpU{Ls;JxuY! zf8NJG;x1_0oF2bErtZHJ*&CAAn+^{N0|NH8Y1cTU|SlNnRUA%H+=Lu$T1@Gi9 zyO9urGMT1R5OyFY%qGP?-u$=e44_)BsGpF(~jTV?5`jjKIuR^|NcIi z%8iAOo6pa$PhP^nwugr;=VRCEl0*m5<01#Q%xY|GY@9gpobyTAN>-7K4*uMW4*|NlSzK%kStamk{;K!=Dm_0i@%{b(ar^${-QTy|e_qYO zSObHE?pU@QPNtn6heJIRQxCf?cyW7sefYXRI|?3N?-oyqv#)v1XIXvaLx|Fkokm7H zmo7!{Kx$nle;FgAJ5f1u`04w5c{Q~*`}&TIQ;!*r z_wC(R@=}(WnfYE4bGB{M&AbCk+4`^8hp_VS@cg&0{Z;<^MlFzuzxc`}M^|@tljD`wb49`8z{u;?~D2O9H;^tN;J})z$d;eF0qn_y7O?>gxUY{U`iooSZD!mvyj2 zbaOp<8DpXu7RDAD`lad!vKXiXTC@N}SUaD5&VqWuzXUAn;=50)m<@uNeALZZs^Y_C;`O4*T4h=0V%{we+ zR&D$izHybKp6iYs5NX_p5XQOu-M1{9U8_LT~+UP%|16IZ-9tTK-hb>7pQ$^(E zf2y+U>o+e~Z)C0c!FX#I?;nPFGk*M(Y-tT#0*xpYK2A=*)YNPB|Nk96#koa^Me6AU zZiWf$`zA~}Vc+wmvAa=*u|6_FqN}T?qT>k~&jG#lxPLneEIOiJqR0SvH5c9;BaqsHJ{EN~VA{DT(LPC!RGP6XEv8#qk*$ zx;?qZ5XIZn_F&TlBj&9|KfBjXI54YZ!PDT7DYG^kze`oGU<8e|Jg$$7YD%m z?_^+UapT5~e~Zi97xw)B|Nr2-YPXq*ic7crIDX&*`}d5O^{lM(u7wqUI%f0a=}uOb zh^0#{4IT+Cb#n3T%V=epx1>4J&@jQ2r|18c zt*p0hpFR1(Uifl-eSK&C{kr6y;}6ycbC@X?T-C1+6kUAd#EBpOkN5vqRFrx2NJX8m zw6txT8JpOxd-GcP9vtNq7WUeB@W276l^-lDC1=i5^iEk=o}D~5;qZz`gKE_sv*!Qb z$hm07%xTL4BQqT?HCU;uPu{-0WrKnCCDX(V-oE-9J7#Qd$k?^v+vW#fMXo8aXGy6) zug}g-iTdjvlk`x$*1t!Gi(8i|^maaH;v5l&1FNrRKT?2RbJ{-ni)h z4JC=SXAa~9CCus!TldHL>ev3>xAmT!yO*gMPdxsku-JIZHX|3;iN$NA)eiWuXRHm0 zaaOo=Nke@Zr0#7};|mK@b97Ym^JCk=ADDmQFZ*h*4Q#e)+#;))o~Sgf{`j%KyL;n? zfa23)>WPU=b1l=-+?197Z85177f&=ZW4j`8$cHyVm@8~~lCN-t>x7#;C%A6h_@}L5 z;5YBg%2lUaHk6(`eR@NUrLz;0>)U`o491!Xx*jPh0acqJ;hof^{q#Y?qeo8N8}>_x zUt74<_``JhdP#|Y2ci_~CrbREkg~BX?w_Sq*L;hIfiY_owNCY!_HuJ?Zf)_{wl$ND zt?RIuQ?jn&g-4(4%~CT|86AraN)tWHa&rEgno5f4KKk$}>Dkkxa_MW<@Z7t{!_IH? zel00cvh;YHDgHzL~1s zcp2*BaXiBCK%Y%|gYH%LxDEZ!thPLjIKlJe57*OwKR>@e+Wr3acKhgUZyw&<{NBF) z-{BJ{6x5Erd!QgBto-cqmr+~1Sc={6Yj!~JtaSU{e6OAC&xh_`G6xwzRaILUvk@Bq54w}-CArxT>NY* zY<00Gvi>D{Bzo$dIrE>DMS_LpL|1MQ!?kZL|Mv0;-1MIxkOdvLb5iMpbd{&ip3SHk z^pNKXH^0L7MUHw52@R?f_pf-xIZfvOM3##}EP7^9cMJqh9bxHaj43?t;K8_VOVi1} z+{NlPGx->P_|-QA2uBBpDKezNNu1k4usFPR|j=Z-mH#)W%2qM2xo)7ad1K#vrvuCVH98?>jFX;a z-QN2D{f!UxnVE&N!dY)^Xy`OPBqXrpTVexGk;2_`XB)Pg9ek;&a?D=EY^7kE_N7=s!UZ4R$$I63##;XWcbfU<&xePgC`nFig|*%g=X<3 z1o23?axk+=-xXhx^CYKzrufEV0{^-iPJGj3ewSxp_%dyS3b#j-f^y`a|K~rv6#nI> z1&wV_nfLeqSN;3LZ*R}Mq_WZQ(7^^1p;JdroM5n?!EJT!V1v=&CY}$|W&Q_BX1r`* z)cn-ZuluLz#Kq2d$&X^ZVhxsheQF({s?5xw1v44$)1YDJEn`35=I7dV=Z-X7y5*qf zq;QjaXHlcL77q{4hlK`gGeo4Acb5kUGcsotB)h3hEIi+kJf&$#i9=6sv+M#@4mP$o zQ_sD={z4i$edpvUGvEGy-Jc)g%*@)%hyVZoFL+q`!^D7?WN8V_<`XH5r3PBe*(V)S zwiqUKJH=!&MGE-pGiP-jNj{Xo{=-}FNyw*Prd21Qj`Ng>-}k5T#|QJ8U%X+P53FCV zoEajb&Aj_};S|Y?Uf(SdKl^XR9GHKab6d2hYqRWxTmC=)&wu#T`^yoy0r&U+-&_9v z`2GDh6LyN`_y)2?h<=vOR~MVyUBD(Mo+Kj2BT>r5eD8*8s7&@syE z!faPcobx|?*FXICH_!inh8o(z=_d{47&O!5>{52rO8=cu1G8$!k6&NsZ%(hT{r``D z(*cQXKR?>%JYs12XJqi_@Bjb$Ifhm-ALRQ?U(7h|C#WQCzOTQpE+F{xo6F1Z&)Al* zVWU)QX|D0CEAc$V23r@eZIat?>Cvmuwl_I#|LTPJ=BV%=&|WaxTymbR^ZI^>jfXa^ z`g&^W{P*|&|NhQDOYOmJ!Gx1f<+GX`ShRIi74tg|^%#gu{Qv)d-S2PT54Z1s^yo~* zVtM~xzuN^5Mg*%bvy8ZFcHkrv6KH<;V^iYiG@&rzN0I;ASMmhEU+NvfHw)4dJv7PU z-TnW6KRwN7)7dTi(6CEx(+8tx|LbF{b2g^G`~JRuzV-LsnKK>z`+M8l8Ql#VRwo>o za!5yX!!ov*r)Lu~BQmm#Hf%E5Q+KxDkAIx2@IS=`(I|L-&2 znrtSo8wXGEoba5Tv6SQ5h2ygV{vG7qU;qF4w%qx>y_P+_H)qT!nAxyZ;y@Hbf^WqC zgsV#HrkHn3Vr3~PGP1BX-muB&&7DpoUoW8p!P6EjEPpRyS9c@{?u&hOe}8{^I)8ut zL~qt9E)2I>JJla?F4_-Tn)v7E`9rPp_xAmL{r>#gB}bZ+6BE}4ZP>wuQlGJpqk_SOHd{{HSgzx;!P?UPPk zI3?6`r*lrjzs1M&x-9Ge|GT^4;pb0J=jY^D{P}Bn^Tq}-`yE|MHe31sUHL42a%;1c zhqv*M9|m)*k8j8p|8+w|&*5Ip?d|zI>F161)b+ZGfy>TAQAL0D*Ze$xdism6tZBSW z+b&B5EpnK(PJ)v|(EtDc|9^jff4|WAepJ-G&JKg;Pd=PFVX#n* zW2>Ikz#JFH79Y=MXV*5jjeQU`%f4tDcF64Tr@O^FV`*r{RtY6V_{o1tNnUd94nU()1XJ;MQSzmBP zKjZ0<37%pC2hN;H$UXbu{rQ6bcFx(_hu_}*kvHMUe5L|bagN82-P_vvH)cadN)GLt z`S3JMKNN(@e%NU+@7 zyyq6nBJQG2&2<+=pqVJH=+CdO|4&cfzrXHaw(uFf`}H|FE~-tXD;^(iw+~)c z;+V^uu9GuDfBu>I@Kwgs#aGCpv~t|9N+}{f5mGF9^2e^?nA3YNgq0c?7}WebQ}EyJoC!4QD;v(;-u~ah zQt|n5!$aa_3Txjb{^6gyu2p@?{}r1z@z~cJ*;ITezrQcOTda0}$;<64G$t=qef{@$ z{e0Ws-~ZbmetCI+qAELcWM}e?X<{cHoRe)6R$nktT>kulakV@^)OpAT>|G&SyT+hh( zgHN)8_Yg~DHP7tz?o*WF_y5~n_xJtgbWp?o|EsI}rOp4{+4c4Ny}kSO5^^%lzm6D=Kn|+NGk(cV^j(4XpM5jqdDXO^uE;jEFMQ&@_~> zIZ^P_=`)98ylWGcX;&Uja=NF`M)YHd&(&Ja7WeDhlkncWIdvdeV&5h0N*#3;OH~wGHp73AZ`djs*Bk%L?|NH;v=l|E&K~B|IJ(`kw^wHzP zX~~DR)Q*M3{+&8iG^%;i27`ivKgXvTeOTU~cu821%^^>ywM6yDPe}_)$&ykKVq%gI z(@(P~Xn6S0xoy6^b8`;bnQ?)_4^$w|w3snd zQqJ}ld$x~5Uv6T<7W0X(q}YBiT`GS5@7tFe>#8sO#>ULj(#rb!i+5UfZ#K>j5SYEk z##s3B|M&O*|Nrw-KGX8#e}4IYoE!#^Jh*ryIGY%!+Wt9sem+n1Hiw%|P94@Vsi~4( zT@pn_67y`6cT`M#cviYi-d^I$)xx+>9o!CKy$@iHnQkE=@$c`?!v{{C`tjfX^8f!I z^0-r8>l8IJKl=4QEbNS$n$Dj;JVixkmMzmUGV;n`W2a%zf!XFf9}aW8zn>lu+PZ1xOvxQP!Dxp>ORHqv-=rVk`K#90ddA*d z?*H+sCJ$`(`^g;8azX(Xb@q({N7Ov*9{Ja-?f=NwSQ55qUaL@`iQNI+^|z8Nb|l{n zw^-I$cqKJ3TQo?+;3$i}yan;Tc=%I{+9Zd%58A!q63b+_j-eX&)1!jl8Z^)oF%)mHIyd+FQu5%N#$-(2AP z@hPYxdrITPiGdy+fe|w}uSK=p(dlH8v-|V=m1xL>nKK(_&TO13>pwer#`i$ZruzR9 z@9re-C~Srd{5!g`F85M;o-c{h@NPH?z%oHrVF=f1oUrA^ldd zVd6x_vR&MV>%S;7NJvUZ+}r!R{?`}zu&|8je-F3+U$(60&6~w1nbbi`m%rs^WPbc_ zfB5zF8}C`$7>wL5%(qSc@wNLv;7@4cFaMz`FQ1Yix^#9!NsoNWEZ(ZWZ~p)1FH_d8anztNRCrw6wPT`vTboDk>5M zvT0sg4NI0tn@h;qs}*botxo#?|Il=cd3LSpC;xw#(pa{Z^Nkqy7H_8ii`5p?hp+!< zSt}J)_wkd_i4z7NKK^x|)ziAXbHB$MLEe3}$vbML3%_nypm0)rhBEV$TP>hb0DI8# z<$L?p1D-%VFx{f=Ps6gX8G`>O=$Sa4IhX7!aH46x-CygPANn;4{+>1$0ZuV`($a^=VI6C9C@M;bze|JHwdbN^uTg=MP=shrle|$Vkb5e-E2a>13O*JbZ)S`J!UuIkw&vFO49H z@p|;nZ@rFAOo@`rUj9$)iwc`=6(_pLt^M_Tdg9|_^&f&AfBckmagkWERPx4+2bmcn z3!3-WOWxb>erNWVzt)Gpzvuap|NMZC?uVGOC7;!%^>WOBhD3PLpFbxHp7ZUh_2~Kk zq11N6YO@R0i~j%r4|4Ei_5Tl4Z%o(c`SpvZrsn_58E&)1Ee`(w&!eNmv)oV7MMQ)n zLop)ifNxqt!j3ZD`zbmyP-CZC{QK7`_woFJhV_x%SAID7%lGvB-(UN?{GH4pW-BY6 zUAzA8+~GEv)$YS<@xxoQc_vK?(36)xyxjkVbmT(@%dC!_Zjax`>pb6*0P4Vk3dr!a z8#XBT6&_Djz0_hihr3+o@&5Y%{BnQlzw1pof9%+Tb>}|3m){p?Y1L5wU*g}t*0{{J zdXbC;3@Z2cS291X`^Pia^B|YPRrY5uES4F0Y^eX;R(DTB58~WCbFH}=3DKzs zdY9_u)v`dxUT$9J6_bZDr<;(z zz?W^$s^McEJmvG$O z?AkP=^4Rb2X^;|d%Z!B}=eWerICxIaG4Y4c6}y7@CXLcU%v?wLdr~?CIe0jj&TV^i zDNcW+*kHob^Ehsf z+LAqKKlIkS&D(g;vZr$E6NWT!ia9jh0#uNFVA%W2Sx;qx=bBsdZ5&1J`uKpnyn&x^2$w5>C#N zKE9Iw{;3++xC$jCB~^TEdiWyn$OqLW|NCdo{m`Fcuwa@@0cbtSA=aeTqLA$TQE~eJe^6hLUk=oWRnYNiO*`H#4q8{dI?V9R{qr9_ zsUCiLndkjI$GN`2A099sZf55>b&6+qx$%+X#w~3|28Mteq;D0 zw^ovERoAvunY)&E&E=+=b$JLqlDMB9J2Sm!;Blwfd5BsIjr#B_= z_!iqvQ{Xviq<;6wJ-^+0PGzRbN5DG}=ghI?b14uxQs3gIDYQsntwkHN#bWIX){LE9 zVIhkqPWa)Kv+;kD_|4er#_MNNp6xyPq_!#Jex}s3NR#mMn?6>pjk)A{d4tZxgUnq@ zQeR@**Dcog{v|nfV(O(S%M{OdDmm_pmw7T%L6^I&@%`gnGt}A|mvyeXV`A9%#WvSF zHh5ad)#)=2H>e067h!dDm?qeE!)(>B^Sk!uo-p!snzr}hbBl_58y%`X{PI3%(Jph^ z>Gr(}(J$b%)iY-!SF;08n?0YOIr~F4_T_r9bD~~7{*h+aa>4V(rG?I{)}`-NSlROf z9=hCG7Ob&B$i-}BZTmgrgdddK~pF(-`p z*k`dR?sNVi`sH@7McVGC6)Ig3Yd_db`Q@7C-(w?}aklRK%OhW{o?YE^BWIudgmWJk z&j0&#ZDi=2^Z)lXG0uZ{XYPLPrT~HC_6q#$(M1L&9O8;CYVA@|f5ne%tl#*Hokjd$ z%$-?L(sQR&B$)kJ5Fd57;&gSv{O@~@7vA~X@VIJe<_sS>(`n{^E5!KiT&`{_jO}xG zjksR8v%qTOZ9&%%(YD5Q+DsKn>+Kht>$+Soef{6QAW< zbZ43US+vB3<6Qct9u-C3_$Kq&f^CaiKRX)jtN3Ns@c%6zsD$j9wV$ggK;&4x*fCia zLB*VmH>`QhhhuA)r?=f(<6cnwic`s{<%6VCL4lE|Y>d>dfG_9I{m^Y^wzs(d(?Yr6 zis|(3fA#fw-wIFf@wc1*__xbN=apK1>MaX0Z{+QaE0odcUgykF`{G=h2<;;{|zc%IuBYO$h==>*pt3eDdUph^47!sY$>0o;CF^Ih5J{G9tw))#zbG&P*G14h^m)M@kKS zf7Ry9wr~r*XmUwJ(L;!jy(MA0bI1F3rO>6T3S!fY-IUv$W!Mi_y!@%J;kDOjy_Oia zP5;%dvuv6ZC+Sqjf1P6&*s^Z({mplcGKYiA_OBC&V@OzHm z*SzhSH#SA+-Hu5W*2f$B z%szJEW@jC@K+3gw+;)$!6&5a8_wlqiTXuHa_I$}J zS0(LglM)gWLA{7esu^hx-Dz%aprNiSA#Ja+<|xYuuxMufOU}u1dggTF`8mUs6a$+I zho*E;Kk@(nIdcs8Tml5z>`yAIIEnl!=S&aC3eo#;?its*h8LbDi>`LD+)Rq!IPJ}? zzrSaGn*GPP{@Lz@GHlaWk6W&>vP*cJyQxjl(n<|!dC%f=3^7^vBgNisnJ?dYTIhCK?9B9* z9cAa71deO8zK`Etd-T{$`Sgk$Y2VM%W(|;mQ>8f;3Ubcx^o$QgEt_FG{r`=$2PUSH zixx}9?N75PX0z2%xHI2=;mfuq7xO%f<6`F?yg%PDdiKNr^;vy;AF->c@n~uBSXuGp z6aWm^%Gh@j*){`&2?|YC&P*iZv+{CLa!xe&oef5hcgN!xO*X zXpV*B6%$)fGoIo9e^XDiDk?NQJkH-%`B|b*UOj6j zr`~~+oO5q|{hAsOmUiJvih;TMeG{*S28QMahQr7EdD7B&q|J?TZXWvZ%k=OhK+me>73q<8+XidE7Ki_DMmFuTO==L?GRdd4v4+ey^+kNbIXUY$wVz z(?(b*>A|zK2TxK8ijxBZ6DLef{qQaA$M5NlQ>UJ}SkB>aI^D%jRFGNDHYp(`<;L#m ziT~@`&d-rJ#EC8Hs6GDu zq*08xrY6_6vrXsre-)nfu`W_xefyU+DcQcaA6Z=4T3dA4q($*Cg8<7Js^G1j1jQO(;%)cdJI4v)i5F2A^w#UaeTJxQsX>{V>Len4pw_1}O-kwh6 zjY(mfWm&3Sx4asy4bYFmIdG=H{wgcU&OY!+FZzzo#$U^b9Gi4>dVws+g{wFe~#zyZ)?=5l1BhvIUy>4y3Y9WT=;T!IO!-UaQLd{!JOg; zS$~pcJCDm$ID|FFvjhwDdMD0i4xT2MIaBE41N%$A)Xq$4QR~odHB?yIBdjj5YPICL z*u{sOpxbzrDi?hGoDM3Eo;;PTsg;y4m{#}qxY@_HWetp(J2-WH7ic<*9hn?f!1+k< zxN7T5xtM86>Z~T^Li`tkn8O^;m>g`-t#t@GwlbKfx0mPo`rvdAEI%-Us?B9V`*S#?1GQG#*zzk#~4an1sign7Y47H@5XIT-*Eb zqH@-|XONkPMJu-JsU5p;<;a8Arw<%!+kEz5gL0~Z@rGw+F;C587F-ROA!R74y!s%s zf$xE-9fr0|%NTrB_1YIs1-X-bozdgZ>7Z@9%X}9*af^TsG+_Aue^H4>dwaX0@nl8Q zsfB9Nk`+?8XVa6QBhs6 zAxxX6rK|U7w`AH`r#si2OCce+=$68UEhf$D^+D$BuxOmc zI=Ne?1K6HntD8RrTM=P0R?RGojnSYI*SQD)!4aC2p2h4zZo-Al4q{bzoB%noXkXorLA zuz$9`enPNvc+tr(6MJ}^0|cB444S*Rl1?i|X*EXmFOKYr+&n>Cdk2H#<$|U5XWTgD z7ITP6@K~JUVe4Ufd|kgS`MAWjHHj8gQy%^-wwk*GQt2=9d<0q-nwIw9adJR-`iCzm z1!ZXgp+~R(JK!T$A=NV@(8=wRVuaB_-(!mfS9^cRR$nRm>%aZs`};wscf7fKdP9X_ zQn=<_Xefm~a$uXlyZYTb9yd2od!VT3!qJ@ z!JFI1KYUXC`MP8UEEcjs4n2ENMuta8iAP9?M@@}q&Kw>oDIPbs3wu3lYF@C%F4}0w zE&6CO$3&@*>U?Z_UNUFho@@uOxHKwQ+xWu-`@=LA9I6>8qg^- z8;&%xJ^1PA(9k${uA@(nW5kTc=4MCF&VzU6G`6=lPMqi%C%21j?nJ?=6^xt8)+|f( zO5}Q!ANc4}t7f;izS`sa_Mlo?FFtKY{d~|V6^Gj;?dsKjo^S@uw1Fx(DVityTu8WZxp`gLv!|&OCMQprl=R@4TPCNFrCpBFz!JP0?MmyuT2Y6zwE`RRIxu-S1G@? z_^{ya$LrGcx7Ocw&|KoOw#V_@mSc0i{OdfqBasp8g;kx*{0S*30>a4&$;k=n=?@+y z35cdnn3xE1`sCyZ)6xV)Qy)BPnpeSTFT$Fg%oO?IzI~g0{eosc9i0n1+D#Pa+oxB2 zYI*oExlP`Ffw<6}U97fJ??0!%@^8B+3`vZ4<{EMxP!MsJ=W1F}@n86q{J#eef?Tab ztV&;o&C+D&(2xFfxWMLARFINe&rO}3dUKLhSmr3F2i>~;{NCd`&(7xdHAe-ywgd$% zHNBSP=klUf+~s}G$GgiamI{OYpxMdH-=M-FCiddv51tLeArH@Tb0npt{P;Qf;cfG( jeivQ^h&u?LfW^#kCBJ6FvfZ0pK^i<={an^LB{Ts5;0bTK diff --git a/tensorflow/contrib/image/python/ops/dense_image_warp.py b/tensorflow/contrib/image/python/ops/dense_image_warp.py deleted file mode 100644 index 9403003be1..0000000000 --- a/tensorflow/contrib/image/python/ops/dense_image_warp.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Image warping using per-pixel flow vectors.""" - -import numpy as np - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops - -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops - - -def _interpolate_bilinear(grid, - query_points, - name='interpolate_bilinear', - indexing='ij'): - """Similar to Matlab's interp2 function. - - Finds values for query points on a grid using bilinear interpolation. - - Args: - grid: a 4-D float `Tensor` of shape `[batch, height, width, channels]`. - query_points: a 3-D float `Tensor` of N points with shape `[batch, N, 2]`. - name: a name for the operation (optional). - indexing: whether the query points are specified as row and column (ij), - or Cartesian coordinates (xy). - - Returns: - values: a 3-D `Tensor` with shape `[batch, N, channels]` - - Raises: - ValueError: if the indexing mode is invalid, or if the shape of the inputs - invalid. - """ - if indexing != 'ij' and indexing != 'xy': - raise ValueError('Indexing mode must be \'ij\' or \'xy\'') - - with ops.name_scope(name): - shape = grid.get_shape().as_list() - if len(shape) != 4: - msg = 'Grid must be 4 dimensional. Received size: ' - raise ValueError(msg + str(grid.get_shape())) - - batch_size, height, width, channels = shape - query_type = query_points.dtype - grid_type = grid.dtype - - if (len(query_points.get_shape()) != 3 or - query_points.get_shape()[2].value != 2): - msg = ('Query points must be 3 dimensional and size 2 in dim 2. Received ' - 'size: ') - raise ValueError(msg + str(query_points.get_shape())) - - _, num_queries, _ = query_points.get_shape().as_list() - - if height < 2 or width < 2: - msg = 'Grid must be at least batch_size x 2 x 2 in size. Received size: ' - raise ValueError(msg + str(grid.get_shape())) - - alphas = [] - floors = [] - ceils = [] - - index_order = [0, 1] if indexing == 'ij' else [1, 0] - unstacked_query_points = array_ops.unstack(query_points, axis=2) - - for dim in index_order: - with ops.name_scope('dim-' + str(dim)): - queries = unstacked_query_points[dim] - - size_in_indexing_dimension = shape[dim + 1] - - # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1 - # is still a valid index into the grid. - max_floor = math_ops.cast(size_in_indexing_dimension - 2, query_type) - min_floor = constant_op.constant(0.0, dtype=query_type) - floor = math_ops.minimum( - math_ops.maximum(min_floor, math_ops.floor(queries)), max_floor) - int_floor = math_ops.cast(floor, dtypes.int32) - floors.append(int_floor) - ceil = int_floor + 1 - ceils.append(ceil) - - # alpha has the same type as the grid, as we will directly use alpha - # when taking linear combinations of pixel values from the image. - alpha = math_ops.cast(queries - floor, grid_type) - min_alpha = constant_op.constant(0.0, dtype=grid_type) - max_alpha = constant_op.constant(1.0, dtype=grid_type) - alpha = math_ops.minimum(math_ops.maximum(min_alpha, alpha), max_alpha) - - # Expand alpha to [b, n, 1] so we can use broadcasting - # (since the alpha values don't depend on the channel). - alpha = array_ops.expand_dims(alpha, 2) - alphas.append(alpha) - - if batch_size * height * width > np.iinfo(np.int32).max / 8: - error_msg = """The image size or batch size is sufficiently large - that the linearized addresses used by array_ops.gather - may exceed the int32 limit.""" - raise ValueError(error_msg) - - flattened_grid = array_ops.reshape(grid, - [batch_size * height * width, channels]) - batch_offsets = array_ops.reshape( - math_ops.range(batch_size) * height * width, [batch_size, 1]) - - # This wraps array_ops.gather. We reshape the image data such that the - # batch, y, and x coordinates are pulled into the first dimension. - # Then we gather. Finally, we reshape the output back. It's possible this - # code would be made simpler by using array_ops.gather_nd. - def gather(y_coords, x_coords, name): - with ops.name_scope('gather-' + name): - linear_coordinates = batch_offsets + y_coords * width + x_coords - gathered_values = array_ops.gather(flattened_grid, linear_coordinates) - return array_ops.reshape(gathered_values, - [batch_size, num_queries, channels]) - - # grab the pixel values in the 4 corners around each query point - top_left = gather(floors[0], floors[1], 'top_left') - top_right = gather(floors[0], ceils[1], 'top_right') - bottom_left = gather(ceils[0], floors[1], 'bottom_left') - bottom_right = gather(ceils[0], ceils[1], 'bottom_right') - - # now, do the actual interpolation - with ops.name_scope('interpolate'): - interp_top = alphas[1] * (top_right - top_left) + top_left - interp_bottom = alphas[1] * (bottom_right - bottom_left) + bottom_left - interp = alphas[0] * (interp_bottom - interp_top) + interp_top - - return interp - - -def dense_image_warp(image, flow, name='dense_image_warp'): - """Image warping using per-pixel flow vectors. - - Apply a non-linear warp to the image, where the warp is specified by a dense - flow field of offset vectors that define the correspondences of pixel values - in the output image back to locations in the source image. Specifically, the - pixel value at output[b, j, i, c] is - images[b, j - flow[b, j, i, 0], i - flow[b, j, i, 1], c]. - - The locations specified by this formula do not necessarily map to an int - index. Therefore, the pixel value is obtained by bilinear - interpolation of the 4 nearest pixels around - (b, j - flow[b, j, i, 0], i - flow[b, j, i, 1]). For locations outside - of the image, we use the nearest pixel values at the image boundary. - - - Args: - image: 4-D float `Tensor` with shape `[batch, height, width, channels]`. - flow: A 4-D float `Tensor` with shape `[batch, height, width, 2]`. - name: A name for the operation (optional). - - Note that image and flow can be of type tf.half, tf.float32, or tf.float64, - and do not necessarily have to be the same type. - - Returns: - A 4-D float `Tensor` with shape`[batch, height, width, channels]` - and same type as input image. - - Raises: - ValueError: if height < 2 or width < 2 or the inputs have the wrong number - of dimensions. - """ - with ops.name_scope(name): - batch_size, height, width, channels = image.get_shape().as_list() - # The flow is defined on the image grid. Turn the flow into a list of query - # points in the grid space. - grid_x, grid_y = array_ops.meshgrid( - math_ops.range(width), math_ops.range(height)) - stacked_grid = math_ops.cast( - array_ops.stack([grid_y, grid_x], axis=2), flow.dtype) - batched_grid = array_ops.expand_dims(stacked_grid, axis=0) - query_points_on_grid = batched_grid - flow - query_points_flattened = array_ops.reshape(query_points_on_grid, - [batch_size, height * width, 2]) - # Compute values at the query points, then reshape the result back to the - # image grid. - interpolated = _interpolate_bilinear(image, query_points_flattened) - interpolated = array_ops.reshape(interpolated, - [batch_size, height, width, channels]) - return interpolated diff --git a/tensorflow/contrib/image/python/ops/interpolate_spline.py b/tensorflow/contrib/image/python/ops/interpolate_spline.py deleted file mode 100644 index ad17921991..0000000000 --- a/tensorflow/contrib/image/python/ops/interpolate_spline.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Polyharmonic spline interpolation.""" - -import numpy as np - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops - -EPSILON = 0.0000000001 - - -def _cross_squared_distance_matrix(x, y): - """Pairwise squared distance between two (batch) matrices' rows (2nd dim). - - Computes the pairwise distances between rows of x and rows of y - Args: - x: [batch_size, n, d] float `Tensor` - y: [batch_size, m, d] float `Tensor` - - Returns: - squared_dists: [batch_size, n, m] float `Tensor`, where - squared_dists[b,i,j] = ||x[b,i,:] - y[b,j,:]||^2 - """ - x_norm_squared = math_ops.reduce_sum(math_ops.square(x), 2) - y_norm_squared = math_ops.reduce_sum(math_ops.square(y), 2) - - # Expand so that we can broadcast. - x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) - y_norm_squared_tile = array_ops.expand_dims(y_norm_squared, 1) - - x_y_transpose = math_ops.matmul(x, y, adjoint_b=True) - - # squared_dists[b,i,j] = ||x_bi - y_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj - squared_dists = x_norm_squared_tile - 2 * x_y_transpose + y_norm_squared_tile - - return squared_dists - - -def _pairwise_squared_distance_matrix(x): - """Pairwise squared distance among a (batch) matrix's rows (2nd dim). - - This saves a bit of computation vs. using _cross_squared_distance_matrix(x,x) - - Args: - x: `[batch_size, n, d]` float `Tensor` - - Returns: - squared_dists: `[batch_size, n, n]` float `Tensor`, where - squared_dists[b,i,j] = ||x[b,i,:] - x[b,j,:]||^2 - """ - - x_x_transpose = math_ops.matmul(x, x, adjoint_b=True) - x_norm_squared = array_ops.matrix_diag_part(x_x_transpose) - x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) - - # squared_dists[b,i,j] = ||x_bi - x_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj - squared_dists = x_norm_squared_tile - 2 * x_x_transpose + array_ops.transpose( - x_norm_squared_tile, [0, 2, 1]) - - return squared_dists - - -def _solve_interpolation(train_points, train_values, order, - regularization_weight): - """Solve for interpolation coefficients. - - Computes the coefficients of the polyharmonic interpolant for the 'training' - data defined by (train_points, train_values) using the kernel phi. - - Args: - train_points: `[b, n, d]` interpolation centers - train_values: `[b, n, k]` function values - order: order of the interpolation - regularization_weight: weight to place on smoothness regularization term - - Returns: - w: `[b, n, k]` weights on each interpolation center - v: `[b, d, k]` weights on each input dimension - """ - - b, n, d = train_points.get_shape().as_list() - _, _, k = train_values.get_shape().as_list() - - # First, rename variables so that the notation (c, f, w, v, A, B, etc.) - # follows https://en.wikipedia.org/wiki/Polyharmonic_spline. - # To account for python style guidelines we use - # matrix_a for A and matrix_b for B. - - c = train_points - f = train_values - - # Next, construct the linear system. - with ops.name_scope('construct_linear_system'): - - matrix_a = _phi(_pairwise_squared_distance_matrix(c), order) # [b, n, n] - if regularization_weight > 0: - batch_identity_matrix = np.expand_dims(np.eye(n), 0) - batch_identity_matrix = constant_op.constant( - batch_identity_matrix, dtype=train_points.dtype) - - matrix_a += regularization_weight * batch_identity_matrix - - # Append ones to the feature values for the bias term in the linear model. - ones = array_ops.ones([b, n, 1], train_points.dtype) - matrix_b = array_ops.concat([c, ones], 2) # [b, n, d + 1] - - # [b, n + d + 1, n] - left_block = array_ops.concat( - [matrix_a, array_ops.transpose(matrix_b, [0, 2, 1])], 1) - - num_b_cols = matrix_b.get_shape()[2] # d + 1 - lhs_zeros = array_ops.zeros([b, num_b_cols, num_b_cols], train_points.dtype) - right_block = array_ops.concat([matrix_b, lhs_zeros], - 1) # [b, n + d + 1, d + 1] - lhs = array_ops.concat([left_block, right_block], - 2) # [b, n + d + 1, n + d + 1] - - rhs_zeros = array_ops.zeros([b, d + 1, k], train_points.dtype) - rhs = array_ops.concat([f, rhs_zeros], 1) # [b, n + d + 1, k] - - # Then, solve the linear system and unpack the results. - with ops.name_scope('solve_linear_system'): - w_v = linalg_ops.matrix_solve(lhs, rhs) - w = w_v[:, :n, :] - v = w_v[:, n:, :] - - return w, v - - -def _apply_interpolation(query_points, train_points, w, v, order): - """Apply polyharmonic interpolation model to data. - - Given coefficients w and v for the interpolation model, we evaluate - interpolated function values at query_points. - - Args: - query_points: `[b, m, d]` x values to evaluate the interpolation at - train_points: `[b, n, d]` x values that act as the interpolation centers - ( the c variables in the wikipedia article) - w: `[b, n, k]` weights on each interpolation center - v: `[b, d, k]` weights on each input dimension - order: order of the interpolation - - Returns: - Polyharmonic interpolation evaluated at points defined in query_points. - """ - - batch_size = train_points.get_shape()[0].value - num_query_points = query_points.get_shape()[1].value - - # First, compute the contribution from the rbf term. - pairwise_dists = _cross_squared_distance_matrix(query_points, train_points) - phi_pairwise_dists = _phi(pairwise_dists, order) - - rbf_term = math_ops.matmul(phi_pairwise_dists, w) - - # Then, compute the contribution from the linear term. - # Pad query_points with ones, for the bias term in the linear model. - query_points_pad = array_ops.concat([ - query_points, - array_ops.ones([batch_size, num_query_points, 1], train_points.dtype) - ], 2) - linear_term = math_ops.matmul(query_points_pad, v) - - return rbf_term + linear_term - - -def _phi(r, order): - """Coordinate-wise nonlinearity used to define the order of the interpolation. - - See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition. - - Args: - r: input op - order: interpolation order - - Returns: - phi_k evaluated coordinate-wise on r, for k = r - """ - - # using EPSILON prevents log(0), sqrt0), etc. - # sqrt(0) is well-defined, but its gradient is not - with ops.name_scope('phi'): - if order == 1: - r = math_ops.maximum(r, EPSILON) - r = math_ops.sqrt(r) - return r - elif order == 2: - return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON)) - elif order == 4: - return 0.5 * math_ops.square(r) * math_ops.log( - math_ops.maximum(r, EPSILON)) - elif order % 2 == 0: - r = math_ops.maximum(r, EPSILON) - return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r) - else: - r = math_ops.maximum(r, EPSILON) - return math_ops.pow(r, 0.5 * order) - - -def interpolate_spline(train_points, - train_values, - query_points, - order, - regularization_weight=0.0, - name='interpolate_spline'): - r"""Interpolate signal using polyharmonic interpolation. - - The interpolant has the form - $$f(x) = \sum_{i = 1}^n w_i \phi(||x - c_i||) + v^T x + b.$$ - - This is a sum of two terms: (1) a weighted sum of radial basis function (RBF) - terms, with the centers \\(c_1, ... c_n\\), and (2) a linear term with a bias. - The \\(c_i\\) vectors are 'training' points. In the code, b is absorbed into v - by appending 1 as a final dimension to x. The coefficients w and v are - estimated such that the interpolant exactly fits the value of the function at - the \\(c_i\\) points, the vector w is orthogonal to each \\(c_i\\), and the - vector w sums to 0. With these constraints, the coefficients can be obtained - by solving a linear system. - - \\(\phi\\) is an RBF, parametrized by an interpolation - order. Using order=2 produces the well-known thin-plate spline. - - We also provide the option to perform regularized interpolation. Here, the - interpolant is selected to trade off between the squared loss on the training - data and a certain measure of its curvature - ([details](https://en.wikipedia.org/wiki/Polyharmonic_spline)). - Using a regularization weight greater than zero has the effect that the - interpolant will no longer exactly fit the training data. However, it may be - less vulnerable to overfitting, particularly for high-order interpolation. - - Note the interpolation procedure is differentiable with respect to all inputs - besides the order parameter. - - Args: - train_points: `[batch_size, n, d]` float `Tensor` of n d-dimensional - locations. These do not need to be regularly-spaced. - train_values: `[batch_size, n, k]` float `Tensor` of n c-dimensional values - evaluated at train_points. - query_points: `[batch_size, m, d]` `Tensor` of m d-dimensional locations - where we will output the interpolant's values. - order: order of the interpolation. Common values are 1 for - \\(\phi(r) = r\\), 2 for \\(\phi(r) = r^2 * log(r)\\) (thin-plate spline), - or 3 for \\(\phi(r) = r^3\\). - regularization_weight: weight placed on the regularization term. - This will depend substantially on the problem, and it should always be - tuned. For many problems, it is reasonable to use no regularization. - If using a non-zero value, we recommend a small value like 0.001. - name: name prefix for ops created by this function - - Returns: - `[b, m, k]` float `Tensor` of query values. We use train_points and - train_values to perform polyharmonic interpolation. The query values are - the values of the interpolant evaluated at the locations specified in - query_points. - """ - with ops.name_scope(name): - - # First, fit the spline to the observed data. - with ops.name_scope('solve'): - w, v = _solve_interpolation(train_points, train_values, order, - regularization_weight) - - # Then, evaluate the spline at the query locations. - with ops.name_scope('predict'): - query_values = _apply_interpolation(query_points, train_points, w, v, - order) - - return query_values diff --git a/tensorflow/contrib/image/python/ops/sparse_image_warp.py b/tensorflow/contrib/image/python/ops/sparse_image_warp.py deleted file mode 100644 index 9f50503d8f..0000000000 --- a/tensorflow/contrib/image/python/ops/sparse_image_warp.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Image warping using sparse flow defined at control points.""" - -import numpy as np - -from tensorflow.contrib.image.python.ops import dense_image_warp -from tensorflow.contrib.image.python.ops import interpolate_spline - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops - - -def _get_grid_locations(image_height, image_width): - """Wrapper for np.meshgrid.""" - - y_range = np.linspace(0, image_height - 1, image_height) - x_range = np.linspace(0, image_width - 1, image_width) - y_grid, x_grid = np.meshgrid(y_range, x_range, indexing='ij') - return np.stack((y_grid, x_grid), -1) - - -def _expand_to_minibatch(np_array, batch_size): - """Tile arbitrarily-sized np_array to include new batch dimension.""" - tiles = [batch_size] + [1] * np_array.ndim - return np.tile(np.expand_dims(np_array, 0), tiles) - - -def _get_boundary_locations(image_height, image_width, num_points_per_edge): - """Compute evenly-spaced indices along edge of image.""" - y_range = np.linspace(0, image_height - 1, num_points_per_edge + 2) - x_range = np.linspace(0, image_width - 1, num_points_per_edge + 2) - ys, xs = np.meshgrid(y_range, x_range, indexing='ij') - is_boundary = np.logical_or( - np.logical_or(xs == 0, xs == image_width - 1), - np.logical_or(ys == 0, ys == image_height - 1)) - return np.stack([ys[is_boundary], xs[is_boundary]], axis=-1) - - -def _add_zero_flow_controls_at_boundary(control_point_locations, - control_point_flows, image_height, - image_width, boundary_points_per_edge): - """Add control points for zero-flow boundary conditions. - - Augment the set of control points with extra points on the - boundary of the image that have zero flow. - - Args: - control_point_locations: input control points - control_point_flows: their flows - image_height: image height - image_width: image width - boundary_points_per_edge: number of points to add in the middle of each - edge (not including the corners). - The total number of points added is - 4 + 4*(boundary_points_per_edge). - - Returns: - merged_control_point_locations: augmented set of control point locations - merged_control_point_flows: augmented set of control point flows - """ - - batch_size = control_point_locations.get_shape()[0].value - - boundary_point_locations = _get_boundary_locations(image_height, image_width, - boundary_points_per_edge) - - boundary_point_flows = np.zeros([boundary_point_locations.shape[0], 2]) - - type_to_use = control_point_locations.dtype - boundary_point_locations = constant_op.constant( - _expand_to_minibatch(boundary_point_locations, batch_size), - dtype=type_to_use) - - boundary_point_flows = constant_op.constant( - _expand_to_minibatch(boundary_point_flows, batch_size), dtype=type_to_use) - - merged_control_point_locations = array_ops.concat( - [control_point_locations, boundary_point_locations], 1) - - merged_control_point_flows = array_ops.concat( - [control_point_flows, boundary_point_flows], 1) - - return merged_control_point_locations, merged_control_point_flows - - -def sparse_image_warp(image, - source_control_point_locations, - dest_control_point_locations, - interpolation_order=2, - regularization_weight=0.0, - num_boundary_points=0, - name='sparse_image_warp'): - """Image warping using correspondences between sparse control points. - - Apply a non-linear warp to the image, where the warp is specified by - the source and destination locations of a (potentially small) number of - control points. First, we use a polyharmonic spline - (@{tf.contrib.image.interpolate_spline}) to interpolate the displacements - between the corresponding control points to a dense flow field. - Then, we warp the image using this dense flow field - (@{tf.contrib.image.dense_image_warp}). - - Let t index our control points. For regularization_weight=0, we have: - warped_image[b, dest_control_point_locations[b, t, 0], - dest_control_point_locations[b, t, 1], :] = - image[b, source_control_point_locations[b, t, 0], - source_control_point_locations[b, t, 1], :]. - - For regularization_weight > 0, this condition is met approximately, since - regularized interpolation trades off smoothness of the interpolant vs. - reconstruction of the interpolant at the control points. - See @{tf.contrib.image.interpolate_spline} for further documentation of the - interpolation_order and regularization_weight arguments. - - - Args: - image: `[batch, height, width, channels]` float `Tensor` - source_control_point_locations: `[batch, num_control_points, 2]` float - `Tensor` - dest_control_point_locations: `[batch, num_control_points, 2]` float - `Tensor` - interpolation_order: polynomial order used by the spline interpolation - regularization_weight: weight on smoothness regularizer in interpolation - num_boundary_points: How many zero-flow boundary points to include at - each image edge.Usage: - num_boundary_points=0: don't add zero-flow points - num_boundary_points=1: 4 corners of the image - num_boundary_points=2: 4 corners and one in the middle of each edge - (8 points total) - num_boundary_points=n: 4 corners and n-1 along each edge - name: A name for the operation (optional). - - Note that image and offsets can be of type tf.half, tf.float32, or - tf.float64, and do not necessarily have to be the same type. - - Returns: - warped_image: `[batch, height, width, channels]` float `Tensor` with same - type as input image. - flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense - flow field produced by the interpolation. - """ - - control_point_flows = ( - dest_control_point_locations - source_control_point_locations) - - clamp_boundaries = num_boundary_points > 0 - boundary_points_per_edge = num_boundary_points - 1 - - with ops.name_scope(name): - - batch_size, image_height, image_width, _ = image.get_shape().as_list() - - # This generates the dense locations where the interpolant - # will be evaluated. - grid_locations = _get_grid_locations(image_height, image_width) - - flattened_grid_locations = np.reshape(grid_locations, - [image_height * image_width, 2]) - - flattened_grid_locations = constant_op.constant( - _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) - - if clamp_boundaries: - (dest_control_point_locations, - control_point_flows) = _add_zero_flow_controls_at_boundary( - dest_control_point_locations, control_point_flows, image_height, - image_width, boundary_points_per_edge) - - flattened_flows = interpolate_spline.interpolate_spline( - dest_control_point_locations, control_point_flows, - flattened_grid_locations, interpolation_order, regularization_weight) - - dense_flows = array_ops.reshape(flattened_flows, - [batch_size, image_height, image_width, 2]) - - warped_image = dense_image_warp.dense_image_warp(image, dense_flows) - - return warped_image, dense_flows -- GitLab From cbce7fd25c5f1bd109692ac1d5250fe1942d5dc0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 14 Mar 2018 17:05:57 -0700 Subject: [PATCH 040/960] Check for the value of the _XlaCompile boolean since xla often creates it with the default value of false. PiperOrigin-RevId: 189110514 --- tensorflow/core/grappler/optimizers/function_optimizer.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 87160f6b83..b17715e742 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -138,7 +138,8 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } // Don't touch anything marked XLA to prevent XLA failures further down the // road. - if (func.attr().count("_XlaCompile") != 0) { + if (func.attr().count("_XlaCompile") > 0 && + func.attr().at("_XlaCompile").b()) { continue; } // Can't create IdentityN nodes with no input or output: skip these -- GitLab From 53bad5a6eee61f7ac5f627640ff096266532f041 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 17:08:35 -0700 Subject: [PATCH 041/960] Automated g4 rollback of changelist 189060958 PiperOrigin-RevId: 189110935 --- tensorflow/cc/framework/cc_op_gen.cc | 1 - tensorflow/core/lib/core/stringpiece.cc | 5 ++ tensorflow/core/lib/core/stringpiece.h | 6 ++ tensorflow/core/lib/core/stringpiece_test.cc | 70 +++++++++++++++++++ tensorflow/core/lib/hash/hash.h | 1 - tensorflow/core/lib/hash/hash_test.cc | 73 -------------------- 6 files changed, 81 insertions(+), 75 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index d73121c7b7..39893f5ccd 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/core/framework/types.pb_text.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" -#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 5bd79778a6..29b727fc44 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -17,9 +17,14 @@ limitations under the License. #include #include +#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { +size_t StringPieceHasher::operator()(StringPiece s) const { + return Hash64(s.data(), s.size()); +} + std::ostream& operator<<(std::ostream& o, StringPiece piece) { o.write(piece.data(), piece.size()); return o; diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 910e4d9e2a..caa9642774 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -35,6 +35,8 @@ limitations under the License. namespace tensorflow { +struct StringPieceHasher; + class StringPiece { public: typedef size_t size_type; @@ -129,6 +131,10 @@ class StringPiece { // Intentionally copyable }; +struct StringPieceHasher { + size_t operator()(StringPiece s) const; +}; + inline bool operator==(StringPiece x, StringPiece y) { return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc index d0dbeb6072..8f17b85b6d 100644 --- a/tensorflow/core/lib/core/stringpiece_test.cc +++ b/tensorflow/core/lib/core/stringpiece_test.cc @@ -65,4 +65,74 @@ TEST(StringPiece, Contains) { EXPECT_TRUE(!a.contains(d)); } +TEST(StringPieceHasher, Equality) { + StringPieceHasher hasher; + + StringPiece s1("foo"); + StringPiece s2("bar"); + StringPiece s3("baz"); + StringPiece s4("zot"); + + EXPECT_TRUE(hasher(s1) != hasher(s2)); + EXPECT_TRUE(hasher(s1) != hasher(s3)); + EXPECT_TRUE(hasher(s1) != hasher(s4)); + EXPECT_TRUE(hasher(s2) != hasher(s3)); + EXPECT_TRUE(hasher(s2) != hasher(s4)); + EXPECT_TRUE(hasher(s3) != hasher(s4)); + + EXPECT_TRUE(hasher(s1) == hasher(s1)); + EXPECT_TRUE(hasher(s2) == hasher(s2)); + EXPECT_TRUE(hasher(s3) == hasher(s3)); + EXPECT_TRUE(hasher(s4) == hasher(s4)); +} + +TEST(StringPieceHasher, HashMap) { + string s1("foo"); + string s2("bar"); + string s3("baz"); + + StringPiece p1(s1); + StringPiece p2(s2); + StringPiece p3(s3); + + std::unordered_map map; + + map.insert(std::make_pair(p1, 0)); + map.insert(std::make_pair(p2, 1)); + map.insert(std::make_pair(p3, 2)); + EXPECT_EQ(map.size(), 3); + + bool found[3] = {false, false, false}; + for (auto const& val : map) { + int x = val.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], true); + EXPECT_EQ(found[2], true); + + auto new_iter = map.find("zot"); + EXPECT_TRUE(new_iter == map.end()); + + new_iter = map.find("bar"); + EXPECT_TRUE(new_iter != map.end()); + + map.erase(new_iter); + EXPECT_EQ(map.size(), 2); + + found[0] = false; + found[1] = false; + found[2] = false; + for (const auto& iter : map) { + int x = iter.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], false); + EXPECT_EQ(found[2], true); +} } // namespace tensorflow diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h index ca05e6346e..77b8031598 100644 --- a/tensorflow/core/lib/hash/hash.h +++ b/tensorflow/core/lib/hash/hash.h @@ -76,7 +76,6 @@ struct hash { return static_cast(Hash64(sp.data(), sp.size())); } }; -using StringPieceHasher = ::tensorflow::hash; template struct hash> { diff --git a/tensorflow/core/lib/hash/hash_test.cc b/tensorflow/core/lib/hash/hash_test.cc index 7d58313132..0e5f6c6803 100644 --- a/tensorflow/core/lib/hash/hash_test.cc +++ b/tensorflow/core/lib/hash/hash_test.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include #include #include "tensorflow/core/lib/hash/hash.h" @@ -83,75 +81,4 @@ static void BM_Hash32(int iters, int len) { } BENCHMARK(BM_Hash32)->Range(1, 1024); -TEST(StringPieceHasher, Equality) { - StringPieceHasher hasher; - - StringPiece s1("foo"); - StringPiece s2("bar"); - StringPiece s3("baz"); - StringPiece s4("zot"); - - EXPECT_TRUE(hasher(s1) != hasher(s2)); - EXPECT_TRUE(hasher(s1) != hasher(s3)); - EXPECT_TRUE(hasher(s1) != hasher(s4)); - EXPECT_TRUE(hasher(s2) != hasher(s3)); - EXPECT_TRUE(hasher(s2) != hasher(s4)); - EXPECT_TRUE(hasher(s3) != hasher(s4)); - - EXPECT_TRUE(hasher(s1) == hasher(s1)); - EXPECT_TRUE(hasher(s2) == hasher(s2)); - EXPECT_TRUE(hasher(s3) == hasher(s3)); - EXPECT_TRUE(hasher(s4) == hasher(s4)); -} - -TEST(StringPieceHasher, HashMap) { - string s1("foo"); - string s2("bar"); - string s3("baz"); - - StringPiece p1(s1); - StringPiece p2(s2); - StringPiece p3(s3); - - std::unordered_map map; - - map.insert(std::make_pair(p1, 0)); - map.insert(std::make_pair(p2, 1)); - map.insert(std::make_pair(p3, 2)); - EXPECT_EQ(map.size(), 3); - - bool found[3] = {false, false, false}; - for (auto const& val : map) { - int x = val.second; - EXPECT_TRUE(x >= 0 && x < 3); - EXPECT_TRUE(!found[x]); - found[x] = true; - } - EXPECT_EQ(found[0], true); - EXPECT_EQ(found[1], true); - EXPECT_EQ(found[2], true); - - auto new_iter = map.find("zot"); - EXPECT_TRUE(new_iter == map.end()); - - new_iter = map.find("bar"); - EXPECT_TRUE(new_iter != map.end()); - - map.erase(new_iter); - EXPECT_EQ(map.size(), 2); - - found[0] = false; - found[1] = false; - found[2] = false; - for (const auto& iter : map) { - int x = iter.second; - EXPECT_TRUE(x >= 0 && x < 3); - EXPECT_TRUE(!found[x]); - found[x] = true; - } - EXPECT_EQ(found[0], true); - EXPECT_EQ(found[1], false); - EXPECT_EQ(found[2], true); -} - } // namespace tensorflow -- GitLab From 6fdd2d6d912e1b918d0a89ec9a5e42991182bf69 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 18:06:25 -0700 Subject: [PATCH 042/960] Turn on strip_default_attrs in custom export strategy. Validated that the change WAI on a local pipeline. PiperOrigin-RevId: 189118106 --- .../boosted_trees/estimator_batch/custom_export_strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index 23ba76210b..d9b0d89a03 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -54,7 +54,7 @@ def make_custom_export_strategy(name, An `ExportStrategy`. """ base_strategy = saved_model_export_utils.make_export_strategy( - serving_input_fn=export_input_fn) + serving_input_fn=export_input_fn, strip_default_attrs=True) input_fn = export_input_fn() (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features( -- GitLab From 041cf60da44f4c423c8f1ef603ce74b3377eb2d3 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 14 Mar 2018 18:39:25 -0700 Subject: [PATCH 043/960] [tf.data] Fix Python shape inference for `tf.contrib.data.map_and_batch()`. Previously, it would incorrectly report that all batches have the same size, not accounting for the possibility of the last batch being partial. Fixes #17720. PiperOrigin-RevId: 189121488 --- .../python/kernel_tests/batch_dataset_op_test.py | 14 ++++++++++++++ tensorflow/contrib/data/python/ops/batching.py | 3 +-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 71dc1c1172..a2da953c7b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -387,6 +387,20 @@ class BatchDatasetTest(test.TestCase): def testBatchAndMapDatasetWithParallelBatching(self): return self._testBatchAndMapDatasetHelper(num_parallel_batches=10) + def testMapAndBatchYieldsPartialBatch(self): + iterator = (dataset_ops.Dataset.range(10) + .apply(batching.map_and_batch( + lambda x: array_ops.reshape(x * x, [1]), 4)) + .make_one_shot_iterator()) + self.assertEqual([None, 1], iterator.output_shapes.as_list()) + next_element = iterator.get_next() + with self.test_session() as sess: + self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) + self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) + self.assertAllEqual([[64], [81]], sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testMapAndBatchSparse(self): def _sparse(i): diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 6eb512dec6..6463d75750 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -374,8 +374,7 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): @property def output_shapes(self): return nest.pack_sequence_as(self._output_shapes, [ - tensor_shape.vector(tensor_util.constant_value( - self._batch_size)).concatenate(s) + tensor_shape.vector(None).concatenate(s) for s in nest.flatten(self._output_shapes) ]) -- GitLab From 6f0c663f4c125a87519541cd6f11932e2cf4c039 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 14 Mar 2018 19:02:29 -0700 Subject: [PATCH 044/960] [XLA] Add a test for F32 -> U32 conversion PiperOrigin-RevId: 189124355 --- tensorflow/compiler/xla/tests/convert_test.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/compiler/xla/tests/convert_test.cc b/tensorflow/compiler/xla/tests/convert_test.cc index 59d6d7a415..7926767a4f 100644 --- a/tensorflow/compiler/xla/tests/convert_test.cc +++ b/tensorflow/compiler/xla/tests/convert_test.cc @@ -177,6 +177,24 @@ XLA_TEST_F(ConvertTest, ConvertR1U32ToR1F32) { ComputeAndCompareR1(&builder, expected, {arg_data.get()}); } +XLA_TEST_F(ConvertTest, ConvertR1F32ToR1U32) { + ComputationBuilder builder(client_, TestName()); + std::vector arg{0.0f, 1.0f, 16777216.0f, + 16777218.0f, 2147483647.0f, 4294967040.0f}; + std::unique_ptr arg_literal = Literal::CreateR1({arg}); + auto arg_param = builder.Parameter(0, arg_literal->shape(), "arg_param"); + std::unique_ptr arg_data = + client_->TransferToServer(*arg_literal).ConsumeValueOrDie(); + + builder.ConvertElementType(arg_param, U32); + + std::vector expected(arg.size()); + for (int64 i = 0; i < arg.size(); ++i) { + expected[i] = static_cast(arg[i]); + } + ComputeAndCompareR1(&builder, expected, {arg_data.get()}); +} + XLA_TEST_F(ConvertTest, ConvertR1U32ToR1S64) { ComputationBuilder builder(client_, TestName()); std::vector arg{0, 1, 0x1000, 0x7fffffff, 0x80000082, 0xFFFFFFFF}; -- GitLab From 6b2e5a4a1a11f3bed1260bf63cf78af3e7109a9d Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 14 Mar 2018 19:03:28 -0700 Subject: [PATCH 045/960] Checkpointable: Add logic for late-naming of SaveableObjects Previously SaveableObjects returned by _gather_saveables_for_checkpoint ran into a sanity check for naming, and would otherwise have been saved under the wrong key. This change has _gather_saveables_for_checkpoint returning factories for SaveableObjects instead, which are either passed the object-based saving key or are called with no arguments when using the global name. Variables didn't run into this, since the Saver was generating their names. PiperOrigin-RevId: 189124512 --- .../eager/python/checkpointable_utils.py | 6 +- .../eager/python/checkpointable_utils_test.py | 71 +++++++++++++++++++ tensorflow/python/training/checkpointable.py | 30 ++++++-- tensorflow/python/training/saver.py | 6 +- tensorflow/python/training/saver_test.py | 14 ++-- 5 files changed, 113 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index d07121df63..677b56b7b8 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -220,12 +220,16 @@ def _serialize_checkpointables( object_proto = object_graph_proto.nodes.add() object_proto.slot_variables.extend(slot_variables.get(checkpointable, ())) object_name = object_names[checkpointable] - for name, saveable in ( + for name, saveable_factory in ( checkpointable._gather_saveables_for_checkpoint().items()): # pylint: disable=protected-access attribute = object_proto.attributes.add() attribute.name = name attribute.checkpoint_key = "%s/%s/%s" % ( object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name)) + if callable(saveable_factory): + saveable = saveable_factory(name=attribute.checkpoint_key) + else: + saveable = saveable_factory # Figure out the name-based Saver's name for this variable. saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( [saveable], convert_variable_to_tensor=False) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 2054878bf8..31f661e634 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras._impl.keras.engine import training from tensorflow.python.layers import core +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops @@ -152,6 +153,46 @@ class InterfaceTests(test.TestCase): self.assertAllEqual([1., 1., 1.], self.evaluate(v2)) +class _MirroringSaveable( + core_saver.BaseSaverBuilder.ResourceVariableSaveable): + + def __init__(self, primary_variable, mirrored_variable, name): + self._primary_variable = primary_variable + self._mirrored_variable = mirrored_variable + super(_MirroringSaveable, self).__init__( + self._primary_variable, "", name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into both variables.""" + tensor, = restored_tensors + return control_flow_ops.group( + self._primary_variable.assign(tensor), + self._mirrored_variable.assign(tensor)) + + +class _OwnsMirroredVariables(checkpointable.CheckpointableBase): + """A Checkpointable object which returns a more complex SaveableObject.""" + + def __init__(self): + self.non_dep_variable = variable_scope.get_variable( + name="non_dep_variable", initializer=6., use_resource=True) + self.mirrored = variable_scope.get_variable( + name="mirrored", initializer=15., use_resource=True) + + def _gather_saveables_for_checkpoint(self): + def _saveable_factory(name=self.non_dep_variable.name): + return _MirroringSaveable( + primary_variable=self.non_dep_variable, + mirrored_variable=self.mirrored, + name=name) + return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory} + + # The Saver sorts by name before parsing, so we need a name property. + @property + def name(self): + return self.non_dep_variable.name + + class CheckpointingTests(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) @@ -261,6 +302,36 @@ class CheckpointingTests(test.TestCase): optimizer_node.slot_variables[0] .slot_variable_node_id].attributes[0].checkpoint_key) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testMoreComplexSaveableReturned(self): + v = _OwnsMirroredVariables() + checkpoint = checkpointable_utils.Checkpoint(v=v) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + self.evaluate(v.non_dep_variable.assign(42.)) + save_path = checkpoint.save(prefix) + self.evaluate(v.non_dep_variable.assign(43.)) + self.evaluate(v.mirrored.assign(44.)) + checkpoint.restore(save_path).assert_consumed().initialize_or_restore() + self.assertEqual(42., self.evaluate(v.non_dep_variable)) + self.assertEqual(42., self.evaluate(v.mirrored)) + + @test_util.run_in_graph_and_eager_modes() + def testMoreComplexSaveableReturnedWithGlobalName(self): + # The same object can also be saved using the name-based saver. + v = _OwnsMirroredVariables() + saver = core_saver.Saver(var_list=[v]) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + self.evaluate(v.non_dep_variable.assign(42.)) + with self.test_session() as sess: + save_path = saver.save(sess, prefix) + self.evaluate(v.non_dep_variable.assign(43.)) + self.evaluate(v.mirrored.assign(44.)) + saver.restore(sess, save_path) + self.assertEqual(42., self.evaluate(v.non_dep_variable)) + self.assertEqual(42., self.evaluate(v.mirrored)) + @test_util.run_in_graph_and_eager_modes() def testSaveRestore(self): model = MyModel() diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index e49965703e..96e3c4821c 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -210,8 +210,8 @@ class _CheckpointPosition(object): restore_ops = [] building_graph = not context.executing_eagerly() for serialized_tensor in self.object_proto.attributes: - saveable_object = saveables.get(serialized_tensor.name, None) - if saveable_object is None: + saveable_factory = saveables.get(serialized_tensor.name, None) + if saveable_factory is None: # Purposefully does not throw an exception if attributes have been added # or deleted. Stores unused attributes so an exception can be raised if # the user decides to check that everything in the checkpoint was @@ -225,7 +225,11 @@ class _CheckpointPosition(object): else: existing_ops = None if existing_ops is None: - named_saveables[serialized_tensor.checkpoint_key] = saveable_object + if callable(saveable_factory): + saveable = saveable_factory(name=serialized_tensor.checkpoint_key) + else: + saveable = saveable_factory + named_saveables[serialized_tensor.checkpoint_key] = saveable if named_saveables: validated_saveables = ( self._checkpoint.builder._ValidateAndSliceInputs(named_saveables)) # pylint: disable=protected-access @@ -600,14 +604,30 @@ class CheckpointableBase(object): """Returns a dictionary of values to checkpoint with this object. Keys in the returned dictionary are local to this object and in a separate - namespace from dependencies. Values may either be `SaveableObject`s or - variables easily converted to `SaveableObject`s (as in `tf.train.Saver`'s + namespace from dependencies. Values may either be `SaveableObject` factories + or variables easily converted to `SaveableObject`s (as in `tf.train.Saver`'s `var_list` constructor argument). + `SaveableObjects` have a name set, which Checkpointable needs to generate + itself. So rather than returning `SaveableObjects` directly, this method + should return a dictionary of callables which take `name` arguments and + return `SaveableObjects` with that name. + + If this object may also be passed to the global-name-based `tf.train.Saver`, + the returned callables should have a default value for their name argument + (i.e. be callable with no arguments). + Returned values must be saved only by this object; if any value may be shared, it should instead be a dependency. For example, variable objects save their own values with the key `VARIABLE_VALUE_KEY`, but objects which reference variables simply add a dependency. + + Returns: + The dictionary mapping attribute names to `SaveableObject` factories + described above. For example: + {VARIABLE_VALUE_KEY: + lambda name="global_name_for_this_object": + SaveableObject(name=name, ...)} """ return {} diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 2ce57c4432..5f68eec6ce 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -578,9 +578,11 @@ class BaseSaverBuilder(object): names_to_saveables[name] = [var] elif (isinstance(var, checkpointable.CheckpointableBase) and not isinstance(var, variables.Variable)): + checkpointable_saveables = [ + (factory() if callable(factory) else factory) + for factory in var._gather_saveables_for_checkpoint().values()] names_to_saveables.update( - BaseSaverBuilder.OpListToDict( - list(var._gather_saveables_for_checkpoint().values()))) + BaseSaverBuilder.OpListToDict(checkpointable_saveables)) else: if context.executing_eagerly(): if not isinstance(var, resource_variable_ops.ResourceVariable): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 46fe2735be..4e80fab9bd 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2873,11 +2873,11 @@ class _OwnsAVariableSimple(checkpointable.CheckpointableBase): class _MirroringSaveable( saver_module.BaseSaverBuilder.ResourceVariableSaveable): - def __init__(self, primary_variable, mirrored_variable): + def __init__(self, primary_variable, mirrored_variable, name): self._primary_variable = primary_variable self._mirrored_variable = mirrored_variable super(_MirroringSaveable, self).__init__( - self._primary_variable, "", self._primary_variable.name) + self._primary_variable, "", name) def restore(self, restored_tensors, restored_shapes): """Restore the same value into both variables.""" @@ -2897,10 +2897,12 @@ class _OwnsMirroredVariables(checkpointable.CheckpointableBase): name="mirrored", initializer=15., use_resource=True) def _gather_saveables_for_checkpoint(self): - saveable = _MirroringSaveable( - primary_variable=self.non_dep_variable, - mirrored_variable=self.mirrored) - return {checkpointable.VARIABLE_VALUE_KEY: saveable} + def _saveable_factory(name=self.non_dep_variable.name): + return _MirroringSaveable( + primary_variable=self.non_dep_variable, + mirrored_variable=self.mirrored, + name=name) + return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory} # The Saver sorts by name before parsing, so we need a name property. @property -- GitLab From 8b305c509353815e2086200b47d0af898fa9df35 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 14 Mar 2018 20:07:38 -0700 Subject: [PATCH 046/960] Add int64 to randomized tests. PiperOrigin-RevId: 189129641 --- tensorflow/compiler/tests/randomized_tests.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index e72dd4eea9..e53efc3091 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -83,8 +83,8 @@ string LocalDeviceToFullDeviceName(const string& device) { return strings::StrCat("/job:localhost/replica:0/task:0/device:", device); } -constexpr std::array kAllXlaTypes = { - {DT_INT32, DT_FLOAT, DT_BOOL, DT_COMPLEX64}}; +constexpr std::array kAllXlaTypes = { + {DT_INT32, DT_FLOAT, DT_BOOL, DT_COMPLEX64, DT_INT64}}; // An OpTestBuilder is a graph builder class that takes as input an operator to // test, its inputs and attributes, and builds a graph that executes the -- GitLab From d832e0e1b15302aa76ce6db121bdeb15d2dfd0f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 20:23:36 -0700 Subject: [PATCH 047/960] Make hash.h a public header. PiperOrigin-RevId: 189130768 --- tensorflow/core/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 213315f40e..352e183104 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -313,6 +313,7 @@ cc_library( "lib/gtl/optional.h", "lib/gtl/priority_queue_util.h", "lib/hash/crc32c.h", + "lib/hash/hash.h", "lib/histogram/histogram.h", "lib/io/buffered_inputstream.h", "lib/io/compression.h", -- GitLab From 357cd4b8b2f960520fc57b6cfbf41117a2a20fc7 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Wed, 14 Mar 2018 20:35:42 -0700 Subject: [PATCH 048/960] Internal change. PiperOrigin-RevId: 189131526 --- tensorflow/c/BUILD | 3 + tensorflow/c/c_api_experimental.cc | 346 +++++++++++++++++++++++++++++ tensorflow/c/c_api_experimental.h | 26 +++ 3 files changed, 375 insertions(+) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 29ed957c9a..c178d7f81f 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -116,6 +116,9 @@ tf_cuda_library( ":c_api", ":c_api_internal", "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index be7f85a5bb..f6d8949bb0 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -17,8 +17,297 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/protobuf/config.pb.h" +using tensorflow::Node; +using tensorflow::NodeBuilder; +using tensorflow::NodeDef; +using tensorflow::Status; + +namespace { + +const char* const DEVICE_TPU_REPLICATED_CORE = "TPU_REPLICATED_CORE"; +const char* const DEVICE_TPU_SYSTEM = "TPU_SYSTEM"; + +TF_Operation* ToTF_Operation(Node* node) { + return static_cast(static_cast(node)); +} + +// Graph rewrite algorithm (modeled after the python TPU graph rewrite path): +// +// 1. For each input node I, feed it to a new TPUReplicatedInput node, which in +// turn feeds a new Identity node N, and store the mapping I->N. +// +// 2. Rewrite all existing graph nodes by adding a attribute on TPU cluster. For +// each node reading some input node I, rewire it to read from N instead based +// on the I->N mapping in step #1. +// +// 3. For each output node O, feed it to a new Identity node, which in turn +// feeds a new TPUReplicatedOutput node, which in turn feeds a new Identity node +// M. Return the set of new output nodes (the "M" nodes) for caller to fetch +// from. +// +// Limitations compared to the python TPU rewrite path: +// - # replicas is always 1. +// - Less error checking. +// +// TODO(hongm): Simplify the graph rewrite to generating fewer TPUReplicate +// related nodes. +class GraphRewriter { + public: + GraphRewriter(TF_Graph* graph, int num_input_nodes, + const TF_Output* input_nodes, int num_output_nodes, + const TF_Output* output_nodes) + EXCLUSIVE_LOCKS_REQUIRED(graph->mu) + : graph_(graph), input_nodes_(input_nodes) { + for (int i = 0; i < num_input_nodes; ++i) { + // Will fill in the value part later when we create the associated new + // input node. + input_node_map_[input_nodes[i].oper->node.name()] = nullptr; + } + + // Grab all existing nodes for the upcoming rewrite, before mutating the + // graph. + for (Node* n : graph->graph.nodes()) { + nodes_to_rewrite_.push_back(n); + } + + for (int i = 0; i < num_output_nodes; ++i) { + output_node_map_.emplace(output_nodes[i].oper->node.name(), + PortIndexPair{output_nodes[i].index, i}); + } + } + + // On success, sets `config_op` and `shutdown_op` to the corresponding + // "ConfigureDistributedTPU" and "ShutdownDistributedTPU" nodes added to the + // graph. + tensorflow::Status Rewrite(TF_Output* new_output_nodes, TF_Output* config_op, + TF_Output* shutdown_op) + EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { + TF_RETURN_IF_ERROR(ProcessInputNodes()); + + return RewriteGraphAndAddOutputNodes(new_output_nodes, config_op, + shutdown_op); + } + + private: + // Synthensizes new nodes for the input nodes, and creates a replicated + // metadata node. + tensorflow::Status ProcessInputNodes() EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { + Node* metadata_node; + TF_RETURN_IF_ERROR( + NodeBuilder(metadata_node_name_.c_str(), "TPUReplicateMetadata") + .Attr("num_replicas", 1) + .Attr("_tpu_replicate", cluster_name_.c_str()) + .Finalize(&graph_->graph, &metadata_node)); + + for (int i = 0; i < input_node_map_.size(); ++i) { + VLOG(1) << "Handling input node " << input_nodes_[i].oper->node.name(); + Node* replicated_input_node; + { + std::string replicated_input_name("TPUReplicate/input" + + std::to_string(i)); + NodeBuilder::NodeOut input(&input_nodes_[i].oper->node, + input_nodes_[i].index); + std::vector input_list; + input_list.push_back(input); + TF_RETURN_IF_ERROR( + NodeBuilder(replicated_input_name.c_str(), "TPUReplicatedInput") + // This op requires an input list. + .Input(input_list) + .Finalize(&graph_->graph, &replicated_input_node)); + } + + { + Node* new_input_node; + const std::string new_input_name("TPUReplicate/replicated_input_" + + std::to_string(i)); + TF_RETURN_IF_ERROR(NodeBuilder(new_input_name.c_str(), "Identity") + .Input(replicated_input_node, 0) + .ControlInput(metadata_node) + .Attr("_tpu_replicate", cluster_name_.c_str()) + .Finalize(&graph_->graph, &new_input_node)); + DCHECK_GT(input_node_map_.count(input_nodes_[i].oper->node.name()), 0); + input_node_map_[input_nodes_[i].oper->node.name()] = new_input_node; + } + } + return Status::OK(); + } + + // On success, sets `config_op` and `shutdown_op` to the corresponding + // "ConfigureDistributedTPU" and "ShutdownDistributedTPU" nodes added to the + // graph. + tensorflow::Status RewriteGraphAndAddOutputNodes(TF_Output* new_output_nodes, + TF_Output* config_op, + TF_Output* shutdown_op) + EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { + tensorflow::Status s; + // For each non-input node in the input graph, place the node in a "TPU + // replicate cluster" via an attribute, and with the above metadata node + // as a control dependency. + // + // Although we have handled the input nodes in ProcessInputNodes(), some + // of those nodes may also serve as output nodes, which we will handle + // below. + for (Node* n : nodes_to_rewrite_) { + if (n->IsSource()) continue; + VLOG(1) << "Rewriting node " << n->name(); + + if (n->IsSink()) { + // TODO(hongm): Rewire SINK to be control dependent on the new input + // nodes created above? + continue; + } + + const NodeDef& old_def = n->def(); + Node* new_node; + if (input_node_map_.count(n->name())) { + new_node = input_node_map_[n->name()]; + } else { + // This node is to replace `n` in the graph. + NodeDef new_def = n->def(); + const std::string new_node_name = "TPUReplicate/" + n->name(); + new_def.set_name(new_node_name); + new_def.clear_input(); + for (int i = 0; i < old_def.input_size(); ++i) { + const std::string& old_input_name = old_def.input(i); + const std::string new_input_name = + input_node_map_.count(old_input_name) > 0 + ? std::string(input_node_map_[old_input_name]->name()) + : "TPUReplicate/" + old_input_name; + new_def.add_input(new_input_name); + } + if (old_def.input_size() == 0) { + // It is sufficient to only set control dependency of nodes without + // input. Other nodes with input(s) with inherit such control + // dependency. + // e.g. say the graph computes add(x, y). Once we make nodes x and y + // control-dependent on the metadata node, node add will inherit + // such control dependency indirectly. + new_def.add_input( + tensorflow::strings::StrCat("^", metadata_node_name_.c_str())); + } + tensorflow::AddNodeAttr("_tpu_replicate", cluster_name_.c_str(), + &new_def); + new_node = graph_->graph.AddNode(new_def, &s); + if (!s.ok()) { + return s; + } + VLOG(1) << "The rewritten node node is " << new_node->DebugString(); + } + + if (output_node_map_.count(n->name()) > 0) { + VLOG(1) << "Handling output node " << n->name(); + auto range_it = output_node_map_.equal_range(n->name()); + for (auto it = range_it.first; it != range_it.second; ++it) { + const PortIndexPair& pair = it->second; + Node* out_identity_node; + { + VLOG(1) << "Handling its output port " << pair.port + << " at output index " << pair.index; + std::string output_node_name = "TPUReplicate/Identity"; + if (pair.index > 0) { + output_node_name += "_" + std::to_string(pair.index); + } + TF_RETURN_IF_ERROR( + NodeBuilder(output_node_name.c_str(), "Identity") + .Input(new_node, pair.port) + .Device(!old_def.device().empty() + ? old_def.device() + : tensorflow::strings::StrCat( + "/device:", DEVICE_TPU_REPLICATED_CORE)) + .Attr("_tpu_replicate", cluster_name_.c_str()) + .Finalize(&graph_->graph, &out_identity_node)); + VLOG(1) << "out_identity_node: " + << out_identity_node->DebugString(); + } + + Node* replicated_output_node; + { + const std::string replicated_output_node_name = + "TPUReplicate/output" + std::to_string(pair.index); + TF_RETURN_IF_ERROR( + NodeBuilder(replicated_output_node_name.c_str(), + "TPUReplicatedOutput") + .Input(out_identity_node, 0) + .Attr("num_replicas", 1) + .Finalize(&graph_->graph, &replicated_output_node)); + VLOG(1) << "replicated_output_node: " + << replicated_output_node->DebugString(); + } + + Node* final_output_node; + const std::string final_output_node_name = + "TPUReplicate/output_" + std::to_string(pair.index) + "_shard_" + + std::to_string(0); + TF_RETURN_IF_ERROR( + NodeBuilder(final_output_node_name.c_str(), "Identity") + .Input(replicated_output_node, 0) + .Finalize(&graph_->graph, &final_output_node)); + VLOG(1) << "new_output_node: " << final_output_node->DebugString(); + auto oper = ToTF_Operation(final_output_node); + new_output_nodes[pair.index] = {oper, 0}; + } + } + + if (input_node_map_.count(n->name()) == 0) { + graph_->graph.RemoveNode(n); + } + } + + { + Node* config_node; + TF_RETURN_IF_ERROR( + NodeBuilder("ConfigureDistributedTPU", "ConfigureDistributedTPU") + .Device(DEVICE_TPU_SYSTEM) + .Finalize(&graph_->graph, &config_node)); + *config_op = {ToTF_Operation(config_node), 0}; + } + + { + Node* shutdown_node; + TF_RETURN_IF_ERROR( + NodeBuilder("ShutdownDistributedTPU", "ShutdownDistributedTPU") + .Device(DEVICE_TPU_SYSTEM) + .Finalize(&graph_->graph, &shutdown_node)); + *shutdown_op = {ToTF_Operation(shutdown_node), 0}; + } + + return Status::OK(); + } + + TF_Graph* const graph_; + + const TF_Output* const input_nodes_; + + const std::string cluster_name_ = "TPUReplicate/cluster"; + const std::string metadata_node_name_ = "TPUReplicate/TPUReplicateMetadata"; + + // Keep mappings from the current input nodes to newly created input nodes, + // which we will use to rewrite existing nodes that read these + // inputs. e.g. A node that reads input node PlaceHolder could be rewired to + // read the created TPUReplicate/replicated_input_0 node. + std::unordered_map input_node_map_; + + std::vector nodes_to_rewrite_; + + // Map from name to set{(output port, output tensor idx)}. + // e.g. Say ther are 3 output tensors, respectively produced by (node 0, + // port 0), (node 0, port 1), (node 1, port 0). Then the mapping entries + // are: node 0 -> {(port 0, idx 0), (port 1, idx 1)} node 1 -> {(port 0, idx + // 2)} Based on these mappings, we will generated 3 new output nodes. + struct PortIndexPair { + int port; + int index; + }; + std::multimap output_node_map_; +}; + +} // namespace + void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) { tensorflow::ConfigProto& config = options->options.config; auto* optimizer_options = @@ -37,3 +326,60 @@ void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) { optimizer_options->set_global_jit_level(tensorflow::OptimizerOptions::OFF); } } + +TF_Output TF_SetupTPUExecution(TF_Session* session, int num_input_nodes, + const TF_Output* input_nodes, + int num_output_nodes, + const TF_Output* output_nodes, + TF_Output* new_output_nodes, TF_Status* status) { + TF_Output config_op, shutdown_op; + { + auto graph = session->graph; + tensorflow::mutex_lock c(graph->mu); + + VLOG(1) << "Graph before TPU rewrite: " + << graph->graph.ToGraphDefDebug().DebugString(); + GraphRewriter rewriter(graph, num_input_nodes, input_nodes, + num_output_nodes, output_nodes); + status->status = + rewriter.Rewrite(new_output_nodes, &config_op, &shutdown_op); + if (!status->status.ok()) { + return shutdown_op; + } + VLOG(1) << "Graph after TPU rewrite: " + << graph->graph.ToGraphDefDebug().DebugString(); + } + + VLOG(1) << "Initializing TPU"; + TF_Tensor* dummy_output; + TF_SessionRun(session, /*run_options*/ nullptr, + // input related parameters + /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0, + // output related parameters + /*outputs*/ &config_op, /*output_values*/ &dummy_output, + /*noutputs*/ 1, + /*targets*/ nullptr, /*ntargets*/ 0, + /*run_metadata*/ nullptr, status); + if (status->status.ok()) { + TF_DeleteTensor(dummy_output); + } + return shutdown_op; +} + +void TF_ShutdownTPUExecution(TF_Session* session, TF_Output shutdown_node, + TF_Status* status) { + { + tensorflow::mutex_lock c(session->graph->mu); + VLOG(1) << "Shutting down TPU, with input graph: " + << session->graph->graph.ToGraphDefDebug().DebugString(); + } + + TF_SessionRun(session, /*run_options*/ nullptr, + // input related parameters + /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0, + // output related parameters + /*outputs*/ nullptr, /*output_values*/ nullptr, + /*noutputs*/ 0, + /*targets*/ &shutdown_node.oper, /*ntargets*/ 1, + /*run_metadata*/ nullptr, status); +} diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 5a7b007e40..af65123131 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -25,6 +25,7 @@ limitations under the License. // Experimental C API for TensorFlow. // // The API here is subject to changes in the future. +// -------------------------------------------------------------------------- // Macro to control visibility of exported symbols in the shared library (.so, // .dylib, .dll). @@ -59,6 +60,31 @@ extern "C" { TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable); +// Sets up TPU execution, by rewriting the graph accordingly, and initializing +// TPU system. +// +// On success, returns a shutdown node to be used in a subsequent +// TF_ShutdownTPUExecution(), and sets the new output nodes in +// `new_output_nodes` for caller to fetch from. Must be called exactly once +// before TF_SessionRun(). +// +// The API and logic is modeled after the python counterparts +// tpu.{initialize_system(), rewrite(), shutdown_system()}. +// +// TODO(b/74774824): Create separate APIs for initializing TPU system and graph +// rewrite. +TF_CAPI_EXPORT extern TF_Output TF_SetupTPUExecution( + TF_Session* session, int num_input_nodes, const TF_Output* input_nodes, + int num_output_nodes, const TF_Output* output_nodes, + TF_Output* new_output_nodes, TF_Status* status); + +// Shuts down TPU system. For any `session` where TF_SetupTPUExecution() has +// been successfully called, this call must be made exactly once before the +// session is closed. +TF_CAPI_EXPORT extern void TF_ShutdownTPUExecution(TF_Session* session, + TF_Output shutdown_node, + TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif -- GitLab From 9037e241de1e64044ff55ab539ccc1fb013c178a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Mar 2018 20:39:10 -0700 Subject: [PATCH 049/960] Enable Add/AddN tree rewrite for symbolically equal shapes. 1) Rewrite a tree of Add/AddN ops with a single AddN, if all shapes are symbolically equal 2) Lookup shape properties using GraphProperties instead of direct access to Node attributes PiperOrigin-RevId: 189131726 --- .../optimizers/arithmetic_optimizer.cc | 173 ++++++++++-------- .../optimizers/arithmetic_optimizer.h | 3 + .../optimizers/arithmetic_optimizer_test.cc | 61 +++++- tensorflow/core/grappler/utils.cc | 26 +++ tensorflow/core/grappler/utils.h | 4 + tensorflow/core/grappler/utils_test.cc | 41 +++++ 6 files changed, 231 insertions(+), 77 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index c0fcfaf428..675cd8f072 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -197,35 +197,39 @@ bool IsNumberType(DataType dtype) { return kNumberTypes.Contains(dtype); } const char kOutputShapesAttr[] = "_output_shapes"; -PartialTensorShape GetInputShape(const string& input, const NodeMap& node_map) { - int output_pos; - string node_name = ParseNodeName(input, &output_pos); - const NodeDef* input_node = node_map.GetNode(node_name); - auto attr = input_node->attr(); - if (attr.find(kOutputShapesAttr) == attr.end()) { - return PartialTensorShape(); // unknown shape - } else { - return attr.at(kOutputShapesAttr).list().shape(output_pos); - } +// Shape is symbolically defined if it has a known rank, and each dimension is +// defined, or is an unknown symbol (dim.size <= -2). +bool ShapeIsSymbolicallyDefined(const TensorShapeProto& shape) { + return !shape.unknown_rank() && + std::all_of( + shape.dim().begin(), shape.dim().end(), + [](const TensorShapeProto::Dim& dim) { return dim.size() != -1; }); +} + +bool ShapeIsSymbolicallyDefined(const OpInfo::TensorProperties& properties) { + return ShapeIsSymbolicallyDefined(properties.shape()); } -bool ShapesEqual(const string& input_x, const string& input_y, - const NodeMap& node_map) { - PartialTensorShape x_shape = GetInputShape(input_x, node_map); - PartialTensorShape y_shape = GetInputShape(input_y, node_map); - if (x_shape.unknown_rank() || y_shape.unknown_rank() || - x_shape.dims() != y_shape.dims()) { +bool ShapesSymbolicallyEqual(const TensorShapeProto& left, + const TensorShapeProto& right) { + if (left.unknown_rank() || right.unknown_rank() || + left.dim_size() != right.dim_size()) { return false; } - for (int i = 0; i < x_shape.dims(); ++i) { - if (x_shape.dim_size(i) == -1 || y_shape.dim_size(i) == -1 || - x_shape.dim_size(i) != y_shape.dim_size(i)) { + for (int i = 0; i < left.dim_size(); ++i) { + if (left.dim(i).size() == -1 || right.dim(i).size() == -1 || + left.dim(i).size() != right.dim(i).size()) { return false; } } return true; } +bool ShapesSymbolicallyEqual(const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right) { + return ShapesSymbolicallyEqual(left.shape(), right.shape()); +} + // Returns whether `reshape` is an identity op. The tensor that `reshape` // reshapes is the `output_pos`-th output of node `input`. bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, @@ -290,16 +294,19 @@ NodeDef* GetTailOfValuePreservingChain( struct ArithmeticOptimizerContext { ArithmeticOptimizerContext( const std::unordered_set* nodes_to_preserve, - GraphDef* optimized_graph, NodeMap* node_map, FrameMap* frame_map, + GraphDef* optimized_graph, GraphProperties* graph_properties, + NodeMap* node_map, FrameMap* frame_map, SetVector* nodes_to_simplify) : nodes_to_preserve(nodes_to_preserve), optimized_graph(optimized_graph), + graph_properties(graph_properties), node_map(node_map), frame_map(frame_map), nodes_to_simplify(nodes_to_simplify) {} const std::unordered_set* nodes_to_preserve; GraphDef* optimized_graph; + GraphProperties* graph_properties; NodeMap* node_map; FrameMap* frame_map; SetVector* nodes_to_simplify; @@ -388,7 +395,7 @@ class ArithmeticOptimizerStage { ctx_.nodes_to_simplify->PushBack(node); } - // Get a node by input name from a node map. Return a error if node was not + // Get a node by input name from a node map. Return an error if node was not // found. Status GetInputNode(const string& input, NodeDef** node) const { string node_name = NodeName(input); @@ -401,22 +408,31 @@ class ArithmeticOptimizerStage { return Status::OK(); } - // Get input shape from a node map. If node doesn't exists return unknown - // shape. - PartialTensorShape GetInputShape(const string& input) const { - int position; - string node_name = ParseNodeName(input, &position); - NodeDef* node; - Status node_status = GetInputNode(node_name, &node); - if (!node_status.ok()) { - return PartialTensorShape(); // unknown shape + // Lookup tensor properties by name. Tensor name might have non-zero port + // number. Return an error if tensor node doesn't exists in a graph, or it + // doesn't have properties defined for requested port. + Status GetTensorProperties(const string& tensor, + OpInfo::TensorProperties* properties) const { + int port; + string tensor_node_name = ParseNodeName(tensor, &port); + if (port < 0) { + return errors::InvalidArgument( + "Can't get tensor properties of control dependency ", tensor); } - auto attr = node->attr(); - if (attr.find(kOutputShapesAttr) == attr.end()) { - return PartialTensorShape(); // unknown shape - } else { - return attr.at(kOutputShapesAttr).list().shape(position); + + const auto& output_properties = + ctx_.graph_properties->GetOutputProperties(tensor_node_name); + auto num_outputs = output_properties.size(); + + if (num_outputs == 0 || port > num_outputs - 1) { + return errors::InvalidArgument( + "Node ", tensor_node_name, + " is missing output properties at position :", port, + " (num_outputs=", num_outputs, ")"); } + + properties->CopyFrom(output_properties[port]); + return Status::OK(); } NodeDef* AddCopyNode(const string& name, const NodeDef* node_to_copy) { @@ -509,8 +525,8 @@ class ArithmeticOptimizerStage { // Rewrite a tree of Add/AddN with a single AddN operation, consuming all the // original inputs of absorbed nodes. // -// All nodes in a Add/AddN subgraph must have fully specified and identical -// shape. All nodes must have the same device placement. +// All nodes in a Add/AddN subgraph must have symbolically equal shape. All +// nodes must have the same device placement. // // Example: // AddN_1 @@ -533,16 +549,12 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { if (!IsRewritable(node)) { return false; } - // and must have fully defined shape - // TODO(ezhulenev): support partially defined shapes, when we can prove that - // unknown dimensions in the rewritten subgraph are the same. - PartialTensorShape shape = GetInputShape(node->name()); - if (!shape.IsFullyDefined()) { - return false; - } - // and must have inputs of fully defined shape identical to the output - // TODO(ezhulenev): relax this condition to support equal unknown dimensions - return HasAllInputsOfIdenticalShape(*node, shape); + + // shape must be symbolically defined and all inputs compatible with it + OpInfo::TensorProperties properties; + Status has_properties = GetTensorProperties(node->name(), &properties); + return has_properties.ok() && ShapeIsSymbolicallyDefined(properties) && + HasAllInputsOfSymbolicallyEqualShape(*node, properties); } Status TrySimplify(const NodeDef* node, @@ -567,23 +579,26 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { // input_nodes: [x, y, z, w, q, e] struct AddOpsGroup { const NodeDef* root_node; - PartialTensorShape root_shape; + TensorShapeProto root_shape; // Add/AddN operations below the root level that were absorbed by this group std::vector absorbed_nodes; // Inputs of absorbed nodes that will be forwarded to rewritten AddN node std::vector inputs; }; - // Check if all inputs are fully defined and identical to expected shape - bool HasAllInputsOfIdenticalShape(const NodeDef& node, - const PartialTensorShape& shape) const { + // Check if all inputs have symbolically equal shapes + bool HasAllInputsOfSymbolicallyEqualShape( + const NodeDef& node, const OpInfo::TensorProperties& properties) const { const AddOpsRewriteStage* self = this; - return std::all_of(node.input().begin(), node.input().end(), - [self, &shape](const string& input) { - auto input_shape = self->GetInputShape(input); - return input_shape.IsFullyDefined() && - input_shape.IsIdenticalTo(shape); - }); + return std::all_of( + node.input().begin(), node.input().end(), + [self, &properties](const string& input) { + OpInfo::TensorProperties input_properties; + Status has_input_properties = + self->GetTensorProperties(input, &input_properties); + return has_input_properties.ok() && + ShapesSymbolicallyEqual(properties, input_properties); + }); } // TODO(ezhulenev): use GraphRewriter? @@ -614,27 +629,25 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { if (!node_status.ok()) { return false; } - - PartialTensorShape shape = GetInputShape(name); - CHECK(shape.IsIdenticalTo(group.root_shape)) - << "Cannot absorb a node of incompatible shape"; - // check basic preconditions if (!IsRewritable(node)) { return false; } - // with a single output consumer (presumably if we reach this node from + // with a single output data consumer (presumably if we reach this node from // previously absorbed or a root node, it means that this node is not used // as an input to any other op, outside of the group) - if (ctx_.node_map->GetOutputs(node->name()).size() != 1) { + if (NumNonControlDataOutputs(*node, *ctx_.node_map) != 1) { return false; } // must be on the same device as a root node if (node->device() != group.root_node->device()) { return false; } - // All input shapes must be fully defined and equal to the node shape - return HasAllInputsOfIdenticalShape(*node, shape); + // All input shapes must be symbolically defined and equal to the node shape + OpInfo::TensorProperties properties; + Status has_properties = GetTensorProperties(name, &properties); + return has_properties.ok() && + HasAllInputsOfSymbolicallyEqualShape(*node, properties); } // Node requirements both for a root node and an absorbed node @@ -660,15 +673,19 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { } // Check that optimized group node name doesn't exists. It might happen if - // graph optimized multiple times without pruning beween invocations. + // graph optimized multiple times without pruning between invocations. bool IsRewritten(const AddOpsGroup& group) const { return ctx_.node_map->NodeExists(AddOpsGroupName(group)); } // Create an AddOpsGroup with a root in a given node Status CreateAddOpsGroup(const NodeDef* root_node, AddOpsGroup* group) { + OpInfo::TensorProperties root_node_output_properties; + TF_RETURN_IF_ERROR( + GetTensorProperties(root_node->name(), &root_node_output_properties)); + group->root_node = root_node; - group->root_shape = GetInputShape(root_node->name()); + group->root_shape = root_node_output_properties.shape(); group->absorbed_nodes.reserve(root_node->input_size()); for (int i = 0; i < root_node->input_size(); ++i) { @@ -737,6 +754,9 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { added_node->add_input(input); } + // Add frame dependencies that the original node might have had. + AddFrameControlDeps(group.root_node, {added_node}, "", {}); + VLOG(1) << "Absorbed " << group.absorbed_nodes.size() << " Add/AddN nodes from the graph"; @@ -891,8 +911,11 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { mul_node->input(0) == common_factor ? 1 : 0; unique_factors->push_back(mul_node->input(unique_factor_index)); if (i > 0 && !IsAdd(*node)) { - *shapes_match = ShapesEqual(unique_factors->front(), - unique_factors->back(), *ctx_.node_map); + OpInfo::TensorProperties lhs; + OpInfo::TensorProperties rhs; + TF_RETURN_IF_ERROR(GetTensorProperties(unique_factors->front(), &lhs)); + TF_RETURN_IF_ERROR(GetTensorProperties(unique_factors->back(), &rhs)); + *shapes_match = ShapesSymbolicallyEqual(lhs, rhs); } } return Status::OK(); @@ -1627,8 +1650,8 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { } const ArithmeticOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, - node_map_.get(), &frame_map_, - &nodes_to_simplify); + graph_properties_.get(), node_map_.get(), + &frame_map_, &nodes_to_simplify); std::vector> stages; @@ -1660,8 +1683,10 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { const NodeDef* node = nodes_to_simplify.PopBack(); // TODO(ezhulenev): move all rewrites into separate stages - string simplified_tensor = - TrySimplifyAndReplaceUses(node, &nodes_to_simplify); + string simplified_tensor = ""; + if (options_.enable_try_simplify_and_replace) { + simplified_tensor = TrySimplifyAndReplaceUses(node, &nodes_to_simplify); + } // if it was not simplified try to run it through all configured stages if (simplified_tensor.empty()) { diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index d5a7af5ba6..2c6b52c072 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -55,6 +55,9 @@ class ArithmeticOptimizer : public GraphOptimizer { // Granular control for arithmetic optimizer stages struct ArithmeticOptimizerOptions { + // TODO(ezhulenev): flag do disable TrySimplifyAndReplaceUses in tests. + // Remove when all optimizers will be migrated to separate stages. + bool enable_try_simplify_and_replace = true; bool combine_add_to_addn = true; bool hoist_common_factor_out_of_aggregation = true; bool remove_inverse_transpose = true; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index e1f47625c1..d677aee589 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -89,6 +89,7 @@ class ArithmeticOptimizerTest : public GrapplerTest { // should explicitly enable required optimization for tests isolation void DisableAllStages(ArithmeticOptimizer* optimizer) { ArithmeticOptimizer::ArithmeticOptimizerOptions options; + options.enable_try_simplify_and_replace = false; options.combine_add_to_addn = false; options.hoist_common_factor_out_of_aggregation = false; options.remove_inverse_transpose = false; @@ -1270,7 +1271,7 @@ TEST_F(ArithmeticOptimizerTest, RemoveRedundantCast) { EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "inputs", "outputs")); } -TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { +TEST_F(ArithmeticOptimizerTest, AddOpsRewrite_AddOpsOfIdenticalShape) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); tensorflow::Scope sx = s.NewSubScope("x"); tensorflow::Scope sy = s.NewSubScope("y"); @@ -1322,7 +1323,7 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { EXPECT_EQ(collapsed_add->name(), updated_outputs->input(0)); } -TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { +TEST_F(ArithmeticOptimizerTest, AddOpsRewrite_MultiplePasses) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); @@ -1395,7 +1396,7 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { EXPECT_EQ(collapsed_right->name(), updated_mul->input(1)); } -TEST_F(ArithmeticOptimizerTest, AddOpsRewriteAddInputThroughMultiplePaths) { +TEST_F(ArithmeticOptimizerTest, AddOpsRewrite_AddInputMultipleTimes) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); @@ -1440,5 +1441,59 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteAddInputThroughMultiplePaths) { EXPECT_EQ("c", collapsed_add->input(3)); } +TEST_F(ArithmeticOptimizerTest, AddOpsRewrite_AddOpsOfSymbolicallyEqualShape) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + // unknown input shape propagated symbolically through the graph + auto input = ops::Variable(s.WithOpName("input"), {-1, 2}, DT_FLOAT); + + // [a, b, c] have symbolically equal shapes + auto a = ops::Sqrt(s.WithOpName("a"), input); + auto b = ops::Square(s.WithOpName("b"), input); + auto c = ops::Round(s.WithOpName("c"), input); + + // [add_ab, add_abc] shape must be inferred from inputs + auto add_ab = ops::Add(s.WithOpName("Add_ab"), a, b); + auto add_abc = ops::Add(s.WithOpName("Add_abc"), add_ab, c); + + auto outputs = ops::Identity(s.WithOpName("outputs"), add_abc); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableOnlyAddToAddNCombining(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + + // We expect the following rewrite(s) to occur: + // + // + + // / \ + // + c --> AddN(a, b, c) + // / \ + // a b + EXPECT_EQ(6, output.node_size()); + + NodeMap node_map(&output); + + // check add tree was replaced with AddN + const NodeDef* collapsed_add = + node_map.GetNode("ArithmeticOptimizer/AddOpsRewrite_Add_abc_Add_ab"); + ASSERT_TRUE(collapsed_add != nullptr); + EXPECT_EQ("AddN", collapsed_add->op()); + EXPECT_EQ(3, collapsed_add->input_size()); + EXPECT_EQ("a", collapsed_add->input(0)); + EXPECT_EQ("b", collapsed_add->input(1)); + EXPECT_EQ("c", collapsed_add->input(2)); + + // check output was re-wired to new node + const NodeDef* updated_outputs = node_map.GetNode("outputs"); + ASSERT_TRUE(updated_outputs != nullptr); + EXPECT_EQ(collapsed_add->name(), updated_outputs->input(0)); +} + } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index eb1f882ff1..829bfe9e31 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -40,6 +40,16 @@ bool SafeSetScalarTensorValue(double value, Tensor* tensor) { tensor->flat()(0) = static_cast(value); return true; } + +// Is 'node' an operator that consumes only the shape of its input, not the +// data itself? +// TODO(ezhulenev): move to op_types.h. Requires to break circular dependency. +// TODO(ezhulenev): what about Identity passing tensor to Shape consumer? +bool IsShapeConsumer(const NodeDef& node) { + const string& op = node.op(); + return op == "Shape" || op == "ShapeN" || op == "Rank" || op == "Size"; +} + } // namespace NodeMap::NodeMap(GraphDef* graph) { @@ -270,6 +280,22 @@ int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map) { return num_outputs; } +int NumNonControlDataOutputs(const NodeDef& node, const NodeMap& node_map) { + int num_data_outputs = 0; + for (const NodeDef* output : node_map.GetOutputs(node.name())) { + if (IsShapeConsumer(*output)) continue; + + for (int i = 0; i < output->input_size(); ++i) { + const string& input = output->input(i); + if (!IsControlInput(input) && NodeName(input) == node.name()) { + ++num_data_outputs; + break; + } + } + } + return num_data_outputs; +} + // Returns the data type in attribute `attr_name` of `node`. If that attribute // doesn't exist, returns DT_INVALID. DataType GetDataTypeFromAttr(const NodeDef& node, const string& attr_name) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index fbd38c1531..7aa31939f5 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -144,6 +144,10 @@ int NumNonControlInputs(const NodeDef& node); // Number of connected non-control outputs. int NumNonControlOutputs(const NodeDef& node, const NodeMap& node_map); +// Number of connected non-control data outputs (Ops that consume output tensor +// data, not just it's shape). +int NumNonControlDataOutputs(const NodeDef& node, const NodeMap& node_map); + // Removes redundant control inputs from node. void DedupControlInputs(NodeDef* node); diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index eabce5b5ee..49a1996d25 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -292,6 +292,47 @@ TEST_F(UtilsTest, DedupControlInputs) { EXPECT_EQ("gnu", foo.input(1)); } +TEST_F(UtilsTest, NumNonControlOutputs) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + // *) Round node has control dependency edge from Add, which + // is not on this scheme (ASCII graphics limitation). + // + // *Round [Sqrt, Shape] + // | | + // | ctrl | + // Mul ------> Add + // / \ / \ + // x y a b + auto x = ops::Variable(s.WithOpName("x"), {1, 2}, DT_FLOAT); + auto y = ops::Variable(s.WithOpName("y"), {1, 2}, DT_FLOAT); + auto a = ops::Variable(s.WithOpName("a"), {1, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {1, 2}, DT_FLOAT); + + auto mul = ops::Multiply(s.WithOpName("mul"), x, y); + auto add = ops::Add(s.WithOpName("add").WithControlDependencies(mul), a, b); + + auto shape = ops::Shape(s.WithOpName("shape"), add); + auto sqrt = ops::Sqrt(s.WithOpName("sqrt"), add); + + auto round = + ops::Round(s.WithOpName("round").WithControlDependencies(add), mul); + + GraphDef graph; + TF_CHECK_OK(s.ToGraphDef(&graph)); + NodeMap node_map(&graph); + + const NodeDef* add_node = node_map.GetNode("add"); + ASSERT_TRUE(add_node != nullptr); + + // [a, b] are only non-control inputs + EXPECT_EQ(2, NumNonControlInputs(*add_node)); + // [sqrt, shape] are non control outputs + EXPECT_EQ(2, NumNonControlOutputs(*add_node, node_map)); + // sqrt is the only data output + EXPECT_EQ(1, NumNonControlDataOutputs(*add_node, node_map)); +} + TEST_F(UtilsTest, DeleteNodes) {} } // namespace -- GitLab From b08c54271084b05ea822b3348a3a448a9fe3b898 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 15 Mar 2018 02:22:17 -0700 Subject: [PATCH 050/960] [SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth. Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407 --- tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 2 ++ tensorflow/stream_executor/cuda/cuda_driver.cc | 13 +++++++++++++ tensorflow/stream_executor/cuda/cuda_driver.h | 10 +++++++++- .../stream_executor/cuda/cuda_gpu_executor.cc | 12 ++++++++++++ tensorflow/stream_executor/device_description.cc | 3 +++ tensorflow/stream_executor/device_description.h | 9 +++++++++ 6 files changed, 48 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index c67b552abb..07be2a0cf9 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -671,6 +671,8 @@ StatusOr> GpuCompiler::RunBackend( if (module->config().hlo_profiling_enabled()) { HloCostAnalysis cost_analysis(ShapeSizeBytesFunction()); + cost_analysis.set_bytes_per_second( + stream_exec->GetDeviceDescription().memory_bandwidth()); TF_RETURN_IF_ERROR(module->entry_computation()->Accept(&cost_analysis)); profile_index_map = MakeUnique(*module); profile_printer = diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index a017ff64d4..58e1e58c59 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -1503,6 +1503,19 @@ static port::StatusOr GetSimpleAttribute(CUdevice device, return true; } +/* static */ port::StatusOr CUDADriver::GetDeviceAttribute( + CUdevice_attribute attribute, CUdevice device) { + int val; + CUresult res = cuDeviceGetAttribute(&val, attribute, device); + if (res != CUDA_SUCCESS) { + return port::Status{ + port::error::INTERNAL, + port::Printf("failed to get device attribute %d for device %d: %s", + attribute, device, ToString(res).c_str())}; + } + return val; +} + /* static */ bool CUDADriver::IsEccEnabled(CUdevice device, bool *result) { int value = -1; CUresult res = diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h index 4002ba2021..fa9172b3f0 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.h +++ b/tensorflow/stream_executor/cuda/cuda_driver.h @@ -400,12 +400,20 @@ class CUDADriver { // Returns a grab-bag of device properties in a caller-owned device_properties // structure for device_ordinal via cuDeviceGetProperties. - // This call is deprecated in the NVIDIA driver API. + // + // This call is deprecated in the NVIDIA driver API; its replacement is + // GetDeviceAttribute // // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html#group__CUDA__DEVICE__DEPRECATED_1g65a5b4e25186bd257df80b98c98cffe6 static bool GetDeviceProperties(CUdevprop *device_properties, int device_ordinal); + // Gets a specific integer-valued property about the given device. + // + // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html#group__CUDA__DEVICE_1g9c3e1414f0ad901d3278a4d6645fc266 + static port::StatusOr GetDeviceAttribute(CUdevice_attribute attribute, + CUdevice device); + // Returns whether ECC is enabled for the given CUdevice via // cuDeviceGetattribute with CU_DEVICE_ATTRIBUTE_ECC_ENABLED. // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html#group__CUDA__DEVICE_1g9c3e1414f0ad901d3278a4d6645fc266 diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 4bbd531e14..5ecaf46b8c 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -1103,6 +1103,18 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_device_memory_size(device_memory_size); } + port::StatusOr mem_clock_khz = CUDADriver::GetDeviceAttribute( + CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device_ordinal_); + port::StatusOr mem_bus_width_bits = CUDADriver::GetDeviceAttribute( + CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, device_ordinal_); + if (mem_clock_khz.ok() && mem_bus_width_bits.ok()) { + // Times 2 because HBM is DDR memory; it gets two data bits per each data + // lane. + builder.set_memory_bandwidth(2 * int64_t{mem_clock_khz.ValueOrDie()} * + 1000 * + int64_t{mem_bus_width_bits.ValueOrDie()} / 8); + } + { BlockDim block_dim_limit; FillBlockDimLimit(&block_dim_limit); diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index a98143e34b..52f5319a3b 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -50,6 +50,7 @@ DeviceDescription::DeviceDescription() shared_memory_alloc_granularity_(1), device_address_bits_(kUninitializedUint64), device_memory_size_(kUninitializedUint64), + memory_bandwidth_(kUninitializedUint64), shared_memory_per_core_(kUninitializedUint64), shared_memory_per_block_(kUninitializedUint64), clock_rate_ghz_(-1.0), @@ -85,6 +86,8 @@ std::unique_ptr> DeviceDescription::ToMap() const { result["Device Address Bits"] = port::StrCat(device_address_bits()); result["Device Memory Size"] = port::HumanReadableNumBytes::ToString(device_memory_size()); + result["Memory Bandwidth"] = port::StrCat( + port::HumanReadableNumBytes::ToString(memory_bandwidth_), "/s"); result["Shared Memory Per Core"] = port::HumanReadableNumBytes::ToString(shared_memory_per_core_); diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index f2b35bcb43..fcf0928096 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -140,6 +140,11 @@ class DeviceDescription { // Returns the device memory size in bytes. uint64 device_memory_size() const { return device_memory_size_; } + // Returns the device's memory bandwidth in bytes/sec. (This is for + // reads/writes to/from the device's own memory, not for transfers between the + // host and device.) + uint64 memory_bandwidth() const { return memory_bandwidth_; } + // Returns the device's core clock rate in GHz. float clock_rate_ghz() const { return clock_rate_ghz_; } @@ -212,6 +217,7 @@ class DeviceDescription { uint64 device_address_bits_; uint64 device_memory_size_; + uint64 memory_bandwidth_; // Shared memory limits on a given device. uint64 shared_memory_per_core_; @@ -305,6 +311,9 @@ class DeviceDescriptionBuilder { void set_device_memory_size(uint64 value) { device_description_->device_memory_size_ = value; } + void set_memory_bandwidth(uint64 value) { + device_description_->memory_bandwidth_ = value; + } void set_shared_memory_per_core(int64 value) { device_description_->shared_memory_per_core_ = value; -- GitLab From 10927e9f77d0bfebb597b5cc64fa3908db23361c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 06:56:23 -0700 Subject: [PATCH 051/960] Add ability to use feature_columns in KMeans Estimator. PiperOrigin-RevId: 189179304 --- tensorflow/contrib/factorization/BUILD | 2 + .../factorization/python/ops/kmeans.py | 49 +++++++++++++------ .../factorization/python/ops/kmeans_test.py | 37 ++++++++++---- 3 files changed, 62 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index 180f1b68f3..90f10f1fa8 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -66,6 +66,7 @@ tf_custom_op_py_library( "//tensorflow/python:variables", "//tensorflow/python/estimator", "//tensorflow/python/estimator:model_fn", + "//tensorflow/python/feature_column:feature_column_py", "//third_party/py/numpy", ], ) @@ -238,6 +239,7 @@ py_test( "//tensorflow/python:random_ops", "//tensorflow/python:training", "//tensorflow/python/estimator:run_config", + "//tensorflow/python/feature_column:feature_column_py", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index c092f85d35..38faca119d 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -26,6 +26,7 @@ from tensorflow.contrib.factorization.python.ops import clustering_ops from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.export import export_output +from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -105,24 +106,32 @@ class _InitializeClustersHook(session_run_hook.SessionRunHook): logging.info(e) -def _parse_features_if_necessary(features): +def _parse_features_if_necessary(features, feature_columns): """Helper function to convert the input points into a usable format. Args: - features: The input points. + features: The input features. + feature_columns: An optionable iterable containing all the feature columns + used by the model. All items in the set should be feature column instances + that can be passed to `tf.feature_column.input_layer`. If this is None, + all features will be used. Returns: - If `features` is a dict of `k` features, each of which is a vector of `n` - scalars, the return value is a Tensor of shape `(n, k)` representing `n` - input points, where the items in the `k` dimension are sorted - lexicographically by `features` key. If `features` is not a dict, it is - returned unmodified. + If `features` is a dict of `k` features (optionally filtered by + `feature_columns`), each of which is a vector of `n` scalars, the return + value is a Tensor of shape `(n, k)` representing `n` input points, where the + items in the `k` dimension are sorted lexicographically by `features` key. + If `features` is not a dict, it is returned unmodified. """ - if isinstance(features, dict): - keys = sorted(features.keys()) - with ops.colocate_with(features[keys[0]]): - features = array_ops.concat([features[k] for k in keys], axis=1) - return features + if not isinstance(features, dict): + return features + + if feature_columns: + return fc.input_layer(features, feature_columns) + + keys = sorted(features.keys()) + with ops.colocate_with(features[keys[0]]): + return array_ops.concat([features[k] for k in keys], axis=1) class _ModelFn(object): @@ -130,7 +139,8 @@ class _ModelFn(object): def __init__(self, num_clusters, initial_clusters, distance_metric, random_seed, use_mini_batch, mini_batch_steps_per_iteration, - kmeans_plus_plus_num_retries, relative_tolerance): + kmeans_plus_plus_num_retries, relative_tolerance, + feature_columns): self._num_clusters = num_clusters self._initial_clusters = initial_clusters self._distance_metric = distance_metric @@ -139,6 +149,7 @@ class _ModelFn(object): self._mini_batch_steps_per_iteration = mini_batch_steps_per_iteration self._kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries self._relative_tolerance = relative_tolerance + self._feature_columns = feature_columns def model_fn(self, features, mode, config): """Model function for the estimator. @@ -166,7 +177,7 @@ class _ModelFn(object): # input_points is a single Tensor. Therefore, the sharding functionality # in clustering_ops is unused, and some of the values below are lists of a # single item. - input_points = _parse_features_if_necessary(features) + input_points = _parse_features_if_necessary(features, self._feature_columns) # Let N = the number of input_points. # all_distances: A list of one matrix of shape (N, num_clusters). Each value @@ -316,7 +327,8 @@ class KMeansClustering(estimator.Estimator): mini_batch_steps_per_iteration=1, kmeans_plus_plus_num_retries=2, relative_tolerance=None, - config=None): + config=None, + feature_columns=None): """Creates an Estimator for running KMeans training and inference. This Estimator implements the following variants of the K-means algorithm: @@ -383,6 +395,10 @@ class KMeansClustering(estimator.Estimator): iterations. Stops learning if the loss changes less than this amount. This may not work correctly if `use_mini_batch=True`. config: See @{tf.estimator.Estimator}. + feature_columns: An optionable iterable containing all the feature columns + used by the model. All items in the set should be feature column + instances that can be passed to `tf.feature_column.input_layer`. If this + is None, all features will be used. Raises: ValueError: An invalid argument was passed to `initial_clusters` or @@ -402,7 +418,8 @@ class KMeansClustering(estimator.Estimator): model_fn=_ModelFn( num_clusters, initial_clusters, distance_metric, random_seed, use_mini_batch, mini_batch_steps_per_iteration, - kmeans_plus_plus_num_retries, relative_tolerance).model_fn, + kmeans_plus_plus_num_retries, relative_tolerance, + feature_columns).model_fn, model_dir=model_dir, config=config) diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py index 06a2c52c11..0103cc4439 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py @@ -27,6 +27,7 @@ from sklearn.cluster import KMeans as SklearnKMeans # pylint: disable=g-import-not-at-top from tensorflow.contrib.factorization.python.ops import kmeans as kmeans_lib from tensorflow.python.estimator import run_config +from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -226,27 +227,43 @@ class KMeansTest(KMeansTestBase): self._infer_helper(kmeans, clusters, 10) self._infer_helper(kmeans, clusters, 1) + def _parse_feature_dict_helper(self, features, parsed_feature_dict): + # Perform a sanity check. + self.assertEqual(features.shape, parsed_feature_dict.shape) + self.assertEqual(features.dtype, parsed_feature_dict.dtype) + # Then check that running the tensor yields the original list of points. + with self.test_session() as sess: + parsed_points = sess.run(parsed_feature_dict) + self.assertAllEqual(self.points, parsed_points) + def test_parse_features(self): """Tests the various behaviours of kmeans._parse_features_if_necessary.""" # No-op if a tensor is passed in. features = constant_op.constant(self.points) - parsed_features = kmeans_lib._parse_features_if_necessary(features) + parsed_features = kmeans_lib._parse_features_if_necessary(features, None) self.assertAllEqual(features, parsed_features) - # A dict is transformed into a tensor. + # All values from a feature dict are transformed into a tensor. feature_dict = { 'x': [[point[0]] for point in self.points], 'y': [[point[1]] for point in self.points] } - parsed_feature_dict = kmeans_lib._parse_features_if_necessary(feature_dict) - # Perform a sanity check. - self.assertEqual(features.shape, parsed_feature_dict.shape) - self.assertEqual(features.dtype, parsed_feature_dict.dtype) - # Then check that running the tensor yields the original list of points. - with self.test_session() as sess: - parsed_points = sess.run(parsed_feature_dict) - self.assertAllEqual(self.points, parsed_points) + parsed_feature_dict = kmeans_lib._parse_features_if_necessary( + feature_dict, None) + self._parse_feature_dict_helper(features, parsed_feature_dict) + + # Only the feature_columns of a feature dict are transformed into a tensor. + feature_dict_with_extras = { + 'foo': 'bar', + 'x': [[point[0]] for point in self.points], + 'baz': {'fizz': 'buzz'}, + 'y': [[point[1]] for point in self.points] + } + feature_columns = [fc.numeric_column(key='x'), fc.numeric_column(key='y')] + parsed_feature_dict = kmeans_lib._parse_features_if_necessary( + feature_dict_with_extras, feature_columns) + self._parse_feature_dict_helper(features, parsed_feature_dict) class KMeansTestMultiStageInit(KMeansTestBase): -- GitLab From 1de48e49000c80d762052abb8173ee040e116293 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 08:09:08 -0700 Subject: [PATCH 052/960] Removed the pointer to model from interpreter. PiperOrigin-RevId: 189186901 --- tensorflow/contrib/lite/interpreter.h | 12 ------------ tensorflow/contrib/lite/model.cc | 2 -- 2 files changed, 14 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 7c5a195815..af143370ee 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/error_reporter.h" #include "tensorflow/contrib/lite/memory_planner.h" -#include "tensorflow/contrib/lite/schema/schema_generated.h" namespace tflite { @@ -275,12 +274,6 @@ class Interpreter { // contain new nodes that replace 1 more nodes. TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate); - // WARNING: This is a deprecated interface and will be removed as soon as - // possible. Please do not use it. - // TODO(impjdi): Remove this interface after resolving dependencies. - void set_model(const Model* model) { model_ = const_cast(model); } - Model* model() const { return model_; } - // Ensure the data in `tensor.data` is readable. In case delegate is used, // it might require to copy the data from delegate buffer to raw memory. TfLiteStatus EnsureTensorDataIsReadable(int tensor_index) { @@ -509,11 +502,6 @@ class Interpreter { std::unique_ptr nnapi_delegate_; std::unique_ptr memory_planner_; - - // WARNING: This is a deprecated interface and will be removed as soon as - // possible. Please do not use it. - // TODO(impjdi): Remove this interface after resolving dependencies. - Model* model_ = nullptr; }; } // namespace tflite diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 8c456e70da..3cf6bcbfcd 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -794,8 +794,6 @@ TfLiteStatus InterpreterBuilder::operator()( return cleanup_and_error(); } - (**interpreter).set_model(model_); - // Parse inputs/outputs (**interpreter).SetInputs(FlatBufferIntArrayToVector(subgraph->inputs())); (**interpreter).SetOutputs(FlatBufferIntArrayToVector(subgraph->outputs())); -- GitLab From 1e1f1b8cec776bdfa85840a044e14a6461910dee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 08:37:21 -0700 Subject: [PATCH 053/960] Disable Add/AddN rewrite (temp). PiperOrigin-RevId: 189189997 --- tensorflow/core/grappler/optimizers/arithmetic_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 2c6b52c072..bd2f42ee8c 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -58,7 +58,7 @@ class ArithmeticOptimizer : public GraphOptimizer { // TODO(ezhulenev): flag do disable TrySimplifyAndReplaceUses in tests. // Remove when all optimizers will be migrated to separate stages. bool enable_try_simplify_and_replace = true; - bool combine_add_to_addn = true; + bool combine_add_to_addn = false; bool hoist_common_factor_out_of_aggregation = true; bool remove_inverse_transpose = true; bool remove_redundant_bitcast = true; -- GitLab From de32f18ef3de465bc5391c4951a53f2a25b6ee3f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 09:07:41 -0700 Subject: [PATCH 054/960] Expose setNumThreads in the Java API. PiperOrigin-RevId: 189193847 --- .../tensorflow/lite/NativeInterpreterWrapper.java | 6 ++++++ .../main/native/nativeinterpreterwrapper_jni.cc | 10 ++++++++++ .../src/main/native/nativeinterpreterwrapper_jni.h | 11 +++++++++++ .../java/org/tensorflow/lite/TestHelper.java | 14 ++++++++++++++ 4 files changed, 41 insertions(+) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index 014636ffff..518e8b3a96 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -140,6 +140,10 @@ final class NativeInterpreterWrapper implements AutoCloseable { useNNAPI(interpreterHandle, useNNAPI); } + void setNumThreads(int numRecommendedThreads) { + numThreads(interpreterHandle, numRecommendedThreads); + } + /** Gets index of an input given its name. */ int getInputIndex(String name) { if (inputsIndexes == null) { @@ -308,6 +312,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private static native void useNNAPI(long interpreterHandle, boolean state); + private static native void numThreads(long interpreterHandle, int numRecommendedThreads); + private static native long createErrorReporter(int size); private static native long createModel(String modelPathOrBuffer, long errorHandle); diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index 2870ffe8eb..21bcff40bd 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -316,6 +316,16 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, interpreter->UseNNAPI(static_cast(state)); } +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, + jclass clazz, + jlong handle, + jint num_threads) { + tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); + if (interpreter == nullptr) return; + interpreter->SetNumThreads(static_cast(num_threads)); +} + JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( JNIEnv* env, jclass clazz, jint size) { diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index d611ec7f38..fb76125471 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -69,6 +69,17 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, jlong handle, jboolean state); +/* + * Class: org_tensorflow_lite_NativeInterpreterWrapper + * Method: + * Signature: (JI) + */ +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, + jclass clazz, + jlong handle, + jint num_threads); + /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java index 3aef0c3bb6..3722e51b3b 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java @@ -33,6 +33,20 @@ public class TestHelper { } } + /** + * Sets the number of threads for an {@code Interpreter}. + * + * @param interpreter an instance of {@code Interpreter}. If it is not initialized, an {@code + * IllegalArgumentException} will be thrown. + * @param numRecommendedThreads an integer value indicating the number of recommended threads. + */ + public static void setNumThreads(Interpreter interpreter, int numRecommendedThreads) { + if (interpreter != null && interpreter.wrapper != null) { + interpreter.wrapper.setNumThreads(numRecommendedThreads); + } else { + throw new IllegalArgumentException("Interpreter has not initialized; Failed to setUseNNAPI."); + } + } /** * Gets the last inference duration in nanoseconds. It returns null if there is no previous * inference run or the last inference run failed. -- GitLab From 85297e839dbe4422b6f6e5ef3aff37af397773f9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 09:33:03 -0700 Subject: [PATCH 055/960] Enable constant folding optimizations in loop bodies by copying constants across Enter nodes. PiperOrigin-RevId: 189197514 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/constant_folding.cc | 135 +++++--- .../optimizers/constant_folding_test.cc | 323 ++++++++++-------- .../core/grappler/utils/grappler_test.cc | 21 +- .../core/grappler/utils/grappler_test.h | 15 +- tensorflow/python/training/saver_test.py | 11 +- 6 files changed, 315 insertions(+), 191 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index fe095a725a..ffeaa3835e 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -554,5 +554,6 @@ tf_cc_test( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", + "//tensorflow/core/grappler/utils:grappler_test", ], ) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 77ccd4ffc8..263983584c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1535,12 +1535,12 @@ Status ConstantFolding::ReplaceOperationWithConstant( return Status::OK(); } -Status ConstantFolding::SimplifyGraph(GraphDef* output, +Status ConstantFolding::SimplifyGraph(GraphDef* optimized_graph, GraphProperties* properties, bool use_shape_info) { const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; - for (int i = 0; i < output->node_size(); ++i) { - NodeDef* node = output->mutable_node(i); + for (int i = 0; i < optimized_graph->node_size(); ++i) { + NodeDef* node = optimized_graph->mutable_node(i); // Remove Shuffle or Reverse op over scalar values. if (use_shape_info && @@ -1554,7 +1554,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, replaceable &= shape.dim(j).size() == 1; } if (replaceable) { - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithIdentity(0, node, optimized_graph); continue; } } @@ -1595,7 +1595,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } } if (replaceable) { - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithIdentity(0, node, optimized_graph); continue; } } @@ -1624,7 +1624,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } } if (replaceable) { - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithIdentity(0, node, optimized_graph); continue; } } @@ -1648,7 +1648,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, replaceable &= flatten(j) == 0; } if (replaceable) { - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithIdentity(0, node, optimized_graph); continue; } } @@ -1668,7 +1668,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, replaceable &= shape.dim(j).size() > 1; } if (replaceable) { - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithIdentity(0, node, optimized_graph); continue; } } @@ -1677,7 +1677,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, !OptimizedNodeExists(*node, "_const_axis")) { // Create constant axis node. Tensor axis_t(DT_INT32, TensorShape({})); - NodeDef* axis_node = output->add_node(); + NodeDef* axis_node = optimized_graph->add_node(); axis_node->set_name(OptimizedNodeName(*node, "_const_axis")); const int axis = node->attr().at("axis").i(); if (!SetTensorValue(DT_INT32, axis, &axis_t).ok() || @@ -1685,7 +1685,6 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, .ok()) { continue; } - VLOG(1) << "*** Rewriting trivial Pack node: " << node->DebugString(); // Add a control dependency to make sure axis_node is in the right frame. const string ctrl_dep = ConstantFolding::AddControlDependency( node->input(0), graph_, node_map_.get()); @@ -1703,6 +1702,50 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (node->input_size() > 2) { node->mutable_input()->SwapElements(1, node->input_size() - 1); } + graph_modified_ = true; + continue; + } + + // Move constants past Enter. + if (IsEnter(*node) && node->input_size() > 0) { + const string& node_name = node->name(); + const NodeDef* input = node_map_->GetNode(node->input(0)); + if (input != nullptr && IsReallyConstant(*input) && + !OptimizedNodeExists(*input, "_enter")) { + auto fanouts = node_map_->GetOutputs(node_name); + // Find non-constant nodes that consume the output of *node. + std::vector consumers; + for (NodeDef* fanout : fanouts) { + if (!IsConstant(*fanout)) { + for (int i = 0; i < fanout->input_size(); ++i) { + if (fanout->input(i) == node_name) { + consumers.push_back(fanout); + break; + } + } + } + } + if (!consumers.empty()) { + NodeDef* new_node = optimized_graph->add_node(); + *new_node = *input; + new_node->set_name(OptimizedNodeName(*input, "_enter")); + new_node->clear_input(); + new_node->add_input(AsControlDependency(node_name)); + node_map_->AddNode(new_node->name(), new_node); + node_map_->AddOutput(node_name, new_node->name()); + for (NodeDef* consumer : consumers) { + for (int i = 0; i < consumer->input_size(); ++i) { + if (consumer->input(i) == node_name) { + node_map_->UpdateInput(consumer->name(), node_name, + new_node->name()); + consumer->set_input(i, new_node->name()); + } + } + } + graph_modified_ = true; + continue; + } + } } // Switch(x, x) will always feed false to its false branch and true to @@ -1754,7 +1797,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, return n1->name() < n2->name(); }); // Create constant false & true nodes. - NodeDef* false_node = output->add_node(); + NodeDef* false_node = optimized_graph->add_node(); false_node->set_name(OptimizedNodeName(*node, "_const_false")); if (!CreateNodeDef(false_node->name(), TensorValue(&false_t), false_node) @@ -1763,7 +1806,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } false_node->set_device(node->device()); - NodeDef* true_node = output->add_node(); + NodeDef* true_node = optimized_graph->add_node(); true_node->set_name(OptimizedNodeName(*node, "_const_true")); if (!CreateNodeDef(true_node->name(), TensorValue(&true_t), true_node) .ok()) { @@ -1776,10 +1819,10 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const string false_port = node->name(); const string true_port = strings::StrCat(node->name(), ":1"); const string false_ctrl_dep = - AddControlDependency(false_port, output, node_map_.get()); + AddControlDependency(false_port, optimized_graph, node_map_.get()); false_node->add_input(false_ctrl_dep); const string true_ctrl_dep = - AddControlDependency(true_port, output, node_map_.get()); + AddControlDependency(true_port, optimized_graph, node_map_.get()); true_node->add_input(true_ctrl_dep); node_map_->AddNode(false_node->name(), false_node); @@ -1861,13 +1904,13 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (y_matches_output_shape && ((is_mul && x_is_one) || (is_add && x_is_zero))) { // 1 * y = y or 0 + y = y. - ReplaceOperationWithSnapshot(1, node, output); + ReplaceOperationWithSnapshot(1, node, optimized_graph); continue; } if (y_matches_output_shape && (is_sub && x_is_zero)) { // Replace 0 - y with Neg(y). - ReplaceSubtractionFromZeroByNegation(node, output); + ReplaceSubtractionFromZeroByNegation(node, optimized_graph); continue; } @@ -1875,7 +1918,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (y_matches_output_shape && is_any_div && x_is_one) { DataType type = node->attr().at("T").type(); if (DataTypeIsFloating(type) || DataTypeIsComplex(type)) { - ReplaceDivisionOfOnesByReciprocal(node, output); + ReplaceDivisionOfOnesByReciprocal(node, optimized_graph); continue; } } @@ -1888,7 +1931,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (x_matches_output_shape && (((is_mul || is_any_div) && y_is_one) || ((is_add || is_sub) && y_is_zero))) { // x * 1 = x or x / 1 = x or x +/- 0 = x - ReplaceOperationWithSnapshot(0, node, output); + ReplaceOperationWithSnapshot(0, node, optimized_graph); continue; } @@ -1901,18 +1944,18 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, (is_mul || is_matmul || optimize_zeros_divided_by_y)) { const PartialTensorShape shp(output_shape); if (shp.IsFullyDefined()) { - TF_RETURN_IF_ERROR( - ReplaceOperationWithConstant(0, output_shape, node, output)); + TF_RETURN_IF_ERROR(ReplaceOperationWithConstant(0, output_shape, node, + optimized_graph)); continue; } // Even if an input shape is only partially known, we may known that it // matches the output shape and thus forward the corresponding zero // input. if ((is_mul || is_any_div) && x_is_zero && x_matches_output_shape) { - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithIdentity(0, node, optimized_graph); continue; } else if (is_mul && y_is_zero && y_matches_output_shape) { - ReplaceOperationWithIdentity(1, node, output); + ReplaceOperationWithIdentity(1, node, optimized_graph); continue; } } @@ -1937,7 +1980,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, continue; } // Insert new reciprocal op and change node from Div to Mul. - NodeDef* reciprocal_node = output->add_node(); + NodeDef* reciprocal_node = optimized_graph->add_node(); reciprocal_node->set_name(OptimizedNodeName(*node, "_recip")); reciprocal_node->set_op("Reciprocal"); reciprocal_node->set_device(node->device()); @@ -1950,6 +1993,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, node_map_->UpdateOutput(node->name(), const_input, reciprocal_node->name()); graph_modified_ = true; + continue; } // Consider the transformation @@ -2042,6 +2086,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (IsIdentityN(*node) && NumNonControlInputs(*node) > 0) { const std::set& tmp = node_map_->GetOutputs(node->name()); const std::vector consumers(tmp.begin(), tmp.end()); + bool updated_graph = false; for (int input_idx = 0; input_idx < node->input_size(); ++input_idx) { const string& input = node->input(input_idx); if (IsControlInput(input)) { @@ -2072,7 +2117,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (input_node_name == node->name() && output_idx == input_idx) { consumer->set_input(consumer_input_idx, input); // We will keep the input from IdentityN through a control - // dependendy, so we only need to add the consumer as an output + // dependency, so we only need to add the consumer as an output // for the constant input node. node_map_->AddOutput(NodeName(input), consumer->name()); add_dep = true; @@ -2080,12 +2125,18 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (add_dep) { consumer->add_input(AsControlDependency(node->name())); + updated_graph = true; } } } } - for (NodeDef* consumer : consumers) { - DedupControlInputs(consumer); + + if (updated_graph) { + for (NodeDef* consumer : consumers) { + DedupControlInputs(consumer); + } + graph_modified_ = true; + continue; } } @@ -2126,7 +2177,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (1 < const_inputs.size() && const_inputs.size() < num_non_control_inputs && !node_map_->NodeExists(new_node_name)) { - NodeDef* added_node = output->add_node(); + NodeDef* added_node = optimized_graph->add_node(); *added_node = *node; // Always use AddN for the constant node, since AccumulateNV2 is a fake // node that cannot be constant folded, since it does not have a kernel. @@ -2230,7 +2281,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (node_map_->NodeExists(new_node_name)) { break; } - NodeDef* added_node = output->add_node(); + NodeDef* added_node = optimized_graph->add_node(); *added_node = *node; added_node->set_name(new_node_name); node_map_->AddNode(added_node->name(), added_node); @@ -2278,7 +2329,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, Status ConstantFolding::RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, - GraphDef* output) { + GraphDef* optimized_graph) { node_map_.reset(new NodeMap(graph_)); nodes_whitelist_.clear(); // Fold fetch nodes iff it has a single fanout. Note that if a fetch node @@ -2307,20 +2358,20 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, TF_RETURN_IF_ERROR(MaterializeShapes(properties)); TF_RETURN_IF_ERROR(MaterializeConstants(properties)); } - TF_RETURN_IF_ERROR(FoldGraph(output)); - node_map_.reset(new NodeMap(output)); - TF_RETURN_IF_ERROR(SimplifyGraph(output, &properties, can_use_shape_info)); + TF_RETURN_IF_ERROR(FoldGraph(optimized_graph)); + node_map_.reset(new NodeMap(optimized_graph)); + TF_RETURN_IF_ERROR( + SimplifyGraph(optimized_graph, &properties, can_use_shape_info)); return Status::OK(); } Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* output) { + GraphDef* optimized_graph) { // TensorFlow flushes denormals to zero and rounds to nearest, so we do // the same here. port::ScopedFlushDenormal flush; port::ScopedSetRound round(FE_TONEAREST); - nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); @@ -2332,20 +2383,20 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, } has_fetch_ = !item.fetch.empty(); - GrapplerItem item_to_optimize = item; - *output = item.graph; + *optimized_graph = item.graph; int64 node_count; do { graph_modified_ = false; - item_to_optimize.graph.Swap(output); + item_to_optimize.graph.Swap(optimized_graph); graph_ = &item_to_optimize.graph; - *output = GraphDef(); + *optimized_graph = GraphDef(); node_count = graph_->node_size(); - TF_RETURN_IF_ERROR(RunOptimizationPass(cluster, item_to_optimize, output)); - } while (graph_modified_ || output->node_size() != node_count); - *output->mutable_library() = item.graph.library(); - *output->mutable_versions() = item.graph.versions(); + TF_RETURN_IF_ERROR( + RunOptimizationPass(cluster, item_to_optimize, optimized_graph)); + } while (graph_modified_ || optimized_graph->node_size() != node_count); + *optimized_graph->mutable_library() = item.graph.library(); + *optimized_graph->mutable_versions() = item.graph.versions(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 9050ccb053..aeb430b384 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -43,9 +43,9 @@ TEST_F(ConstantFoldingTest, SimpleFolding) { item.fetch.push_back("d"); TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); EXPECT_EQ(1, output.node_size()); @@ -89,9 +89,9 @@ TEST_F(ConstantFoldingTest, AddTree) { item.fetch = {"add_parent", "mul_parent", "addmul_parent"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); // We expect the following rewrite(s) to occur: @@ -525,9 +525,9 @@ TEST_F(ConstantFoldingTest, CreateConstNodes) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); EXPECT_EQ(24, output.node_size()); @@ -574,9 +574,9 @@ TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { item.fetch.push_back("f"); TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); EXPECT_EQ(2, output.node_size()); @@ -615,9 +615,9 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { item.fetch.push_back("e"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); std::vector expected_nodes = {"dflt", "p1", "p2", "e"}; @@ -658,9 +658,9 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { GrapplerItem item; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); std::vector expected_nodes = {"dflt", "p1", "p2", "c", @@ -715,9 +715,9 @@ TEST_F(ConstantFoldingTest, ControlDependenciesDeduplicate) { item.fetch.push_back("i2"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); std::vector expected_nodes = {"dflt", "p1", "p2", "i2"}; @@ -789,9 +789,9 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { } item.fetch = outputs; - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int constant_folded = 0; @@ -827,9 +827,9 @@ TEST_F(ConstantFoldingTest, ShapeMaterialization) { item.fetch.push_back("p2"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; @@ -866,9 +866,9 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationEmptyFetch) { GrapplerItem item; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; @@ -920,9 +920,9 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) { GrapplerItem item; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; for (const auto& node : output.node()) { @@ -978,9 +978,9 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN_MultipleOutputs) { item.fetch.push_back("ia"); item.fetch.push_back("ib"); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; @@ -1039,9 +1039,9 @@ TEST_F(ConstantFoldingTest, SwitchNodesEmptyFetch) { GrapplerItem item; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); std::set present_nodes = {"v_in", "v_ctrl", @@ -1117,9 +1117,9 @@ TEST_F(ConstantFoldingTest, SwitchNodes) { TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); std::set present_nodes = {"v_in", "v_ctrl", "switch", "i", @@ -1185,9 +1185,9 @@ TEST_F(ConstantFoldingTest, MergeNodes) { item.fetch = {"out1", "idx1", "out2", "idx2", "out3", "idx3"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found_nodes = 0; @@ -1262,18 +1262,18 @@ TEST_F(ConstantFoldingTest, ShuffleReverseOnScalarRemoval) { item.fetch = {"out1", "out2"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef got; - Status status = fold.Optimize(nullptr, item, &got); + Status status = optimizer.Optimize(nullptr, item, &got); TF_EXPECT_OK(status); GraphDef want; - AddNode("in1", "VariableV2", {}, &want); - AddNode("in2", "VariableV2", {}, &want); - AddNode("s1", "Identity", {"in1"}, &want); - AddNode("s2", "Identity", {"in2", AsControlDependency("in1")}, &want); - AddNode("out1", "Add", {"s1", "s2"}, &want); - AddNode("out2", "Identity", {"s2"}, &want); + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("s1", "Identity", {"in1"}, {}, &want); + AddNode("s2", "Identity", {"in2", AsControlDependency("in1")}, {}, &want); + AddNode("out1", "Add", {"s1", "s2"}, {}, &want); + AddNode("out2", "Identity", {"s2"}, {}, &want); CompareGraphs(want, got); } @@ -1295,21 +1295,21 @@ TEST_F(ConstantFoldingTest, SliceWithSameDimensionRemoval) { item.fetch = {"out"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef got; - Status status = fold.Optimize(nullptr, item, &got); + Status status = optimizer.Optimize(nullptr, item, &got); TF_EXPECT_OK(status); GraphDef want; - AddNode("in1", "VariableV2", {}, &want); - AddNode("in2", "VariableV2", {}, &want); - AddNode("begin", "Const", {}, &want); - AddNode("size", "Const", {}, &want); + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("begin", "Const", {}, {}, &want); + AddNode("size", "Const", {}, {}, &want); AddNode("s1", "Identity", {"in1", AsControlDependency("begin"), AsControlDependency("size")}, - &want); - AddNode("s2", "Slice", {"in2", "begin", "size"}, &want); - AddNode("out", "Add", {"s1", "s2"}, &want); + {}, &want); + AddNode("s2", "Slice", {"in2", "begin", "size"}, {}, &want); + AddNode("out", "Add", {"s1", "s2"}, {}, &want); CompareGraphs(want, got); } @@ -1332,22 +1332,22 @@ TEST_F(ConstantFoldingTest, SliceWithSameDimensionRemoval) { item.fetch = {"out"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef got; - Status status = fold.Optimize(nullptr, item, &got); + Status status = optimizer.Optimize(nullptr, item, &got); TF_EXPECT_OK(status); GraphDef want; - AddNode("in1", "VariableV2", {}, &want); - AddNode("in2", "VariableV2", {}, &want); - AddNode("begin1", "Const", {}, &want); - AddNode("begin2", "Const", {}, &want); - AddNode("size", "Const", {}, &want); + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("begin1", "Const", {}, {}, &want); + AddNode("begin2", "Const", {}, {}, &want); + AddNode("size", "Const", {}, {}, &want); AddNode("s1", "Identity", {"in1", AsControlDependency("begin1"), AsControlDependency("size")}, - &want); - AddNode("s2", "Slice", {"in2", "begin2", "size"}, &want); - AddNode("out", "Add", {"s1", "s2"}, &want); + {}, &want); + AddNode("s2", "Slice", {"in2", "begin2", "size"}, {}, &want); + AddNode("out", "Add", {"s1", "s2"}, {}, &want); CompareGraphs(want, got); } @@ -1370,19 +1370,20 @@ TEST_F(ConstantFoldingTest, TileWithMultipliesBeingOne) { item.fetch = {"out"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef got; - Status status = fold.Optimize(nullptr, item, &got); + Status status = optimizer.Optimize(nullptr, item, &got); TF_EXPECT_OK(status); GraphDef want; - AddNode("in1", "VariableV2", {}, &want); - AddNode("in2", "VariableV2", {}, &want); - AddNode("multiplies1", "Const", {}, &want); - AddNode("multiplies2", "Const", {}, &want); - AddNode("t1", "Identity", {"in1", AsControlDependency("multiplies1")}, &want); - AddNode("t2", "Tile", {"in2", "multiplies2"}, &want); - AddNode("out", "Add", {"t1", "t2"}, &want); + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("multiplies1", "Const", {}, {}, &want); + AddNode("multiplies2", "Const", {}, {}, &want); + AddNode("t1", "Identity", {"in1", AsControlDependency("multiplies1")}, {}, + &want); + AddNode("t2", "Tile", {"in2", "multiplies2"}, {}, &want); + AddNode("out", "Add", {"t1", "t2"}, {}, &want); CompareGraphs(want, got); } @@ -1408,23 +1409,23 @@ TEST_F(ConstantFoldingTest, PaddingWithZeroSize) { item.fetch = {"out"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef got; - Status status = fold.Optimize(nullptr, item, &got); + Status status = optimizer.Optimize(nullptr, item, &got); TF_EXPECT_OK(status); GraphDef want; - AddNode("in1", "VariableV2", {}, &want); - AddNode("in2", "VariableV2", {}, &want); - AddNode("paddings1", "Const", {}, &want); - AddNode("paddings2", "Const", {}, &want); - AddNode("c1", "Const", {}, &want); - AddNode("c2", "Const", {}, &want); + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("paddings1", "Const", {}, {}, &want); + AddNode("paddings2", "Const", {}, {}, &want); + AddNode("c1", "Const", {}, {}, &want); + AddNode("c2", "Const", {}, {}, &want); AddNode("p1", "Identity", {"in1", AsControlDependency("paddings1"), AsControlDependency("c1")}, - &want); - AddNode("p2", "PadV2", {"in2", "paddings2", "c2"}, &want); - AddNode("out", "Add", {"p1", "p2"}, &want); + {}, &want); + AddNode("p2", "PadV2", {"in2", "paddings2", "c2"}, {}, &want); + AddNode("out", "Add", {"p1", "p2"}, {}, &want); CompareGraphs(want, got); } @@ -1444,17 +1445,17 @@ TEST_F(ConstantFoldingTest, SqueezeWithAllDimesionsGreaterThanOne) { item.fetch = {"out"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef got; - Status status = fold.Optimize(nullptr, item, &got); + Status status = optimizer.Optimize(nullptr, item, &got); TF_EXPECT_OK(status); GraphDef want; - AddNode("in1", "VariableV2", {}, &want); - AddNode("in2", "VariableV2", {}, &want); - AddNode("s1", "Identity", {"in1"}, &want); - AddNode("s2", "Squeeze", {"in2"}, &want); - AddNode("out", "Add", {"s1", "s2"}, &want); + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("s1", "Identity", {"in1"}, {}, &want); + AddNode("s2", "Squeeze", {"in2"}, {}, &want); + AddNode("out", "Add", {"s1", "s2"}, {}, &want); CompareGraphs(want, got); } @@ -1475,9 +1476,9 @@ TEST_F(ConstantFoldingTest, NoOpReduction) { item.fetch.push_back("s"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); bool found = false; @@ -1534,9 +1535,9 @@ TEST_F(ConstantFoldingTest, NoOpReshape) { item.fetch = {"s1", "s2", "s3", "s4"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; @@ -1581,9 +1582,9 @@ TEST_F(ConstantFoldingTest, Packing) { GrapplerItem item; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); // Make sure that the representation of the folded constant is space @@ -1616,14 +1617,14 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); // Run a second time to make sure the optimization is idempotent. item.graph.Swap(&output); - status = fold.Optimize(nullptr, item, &output); + status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; @@ -1677,14 +1678,14 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch.push_back("reshape"); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); // Run a second time to make sure the optimization is idempotent. item.graph.Swap(&output); - status = fold.Optimize(nullptr, item, &output); + status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); int found = 0; @@ -1717,9 +1718,9 @@ TEST_F(ConstantFoldingTest, LargeConstant) { TF_CHECK_OK(scope.ToGraphDef(&item.graph)); item.fetch.push_back("out"); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); // Make sure the diag node hasn't been folded, since it would use too much @@ -1756,9 +1757,9 @@ TEST_F(ConstantFoldingTest, SwitchIdenticalInputs) { item.fetch.push_back("id_true"); TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); EXPECT_EQ(6, output.node_size()); @@ -1984,8 +1985,8 @@ TEST_F(ConstantFoldingTest, PartialFolding_IdentityN) { Output c1 = ops::Const(scope.WithOpName("c1"), 1.0f, {2, 2}); Output c2 = ops::Const(scope.WithOpName("c2"), 2.0f, {2, 2}); auto id_n = ops::IdentityN(scope.WithOpName("id_n"), {c1, x, c2}); - auto id0 = ops::Identity(scope.WithOpName("id0"), id_n[1]); - auto id1 = ops::Identity(scope.WithOpName("id1"), id_n[0]); + auto id0 = ops::Identity(scope.WithOpName("id0"), id_n[0]); + auto id1 = ops::Identity(scope.WithOpName("id1"), id_n[1]); auto add0 = ops::Add(scope.WithOpName("add0"), id_n[0], id_n[1]); auto add1 = ops::Add(scope.WithOpName("add1"), id_n[0], id_n[2]); @@ -1996,38 +1997,44 @@ TEST_F(ConstantFoldingTest, PartialFolding_IdentityN) { item.fetch.push_back("add0"); item.fetch.push_back("add1"); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); - + Status status = optimizer.Optimize(nullptr, item, &output); + LOG(INFO) << output.DebugString(); TF_EXPECT_OK(status); EXPECT_EQ(8, output.node_size()); - // id_n should remain unchanged. - EXPECT_EQ("id_n", output.node(3).name()); - EXPECT_EQ(3, output.node(3).input_size()); - EXPECT_EQ("c1", output.node(3).input(0)); - EXPECT_EQ("x", output.node(3).input(1)); - EXPECT_EQ("c2", output.node(3).input(2)); - // id0 is unchanged. - EXPECT_EQ("id0", output.node(4).name()); - EXPECT_EQ(1, output.node(4).input_size()); - // id1 should have the constant input forwarded to it, - // and a control dependency from id_n. - EXPECT_EQ("id1", output.node(5).name()); - EXPECT_EQ(2, output.node(5).input_size()); - EXPECT_EQ("c1", output.node(5).input(0)); - EXPECT_EQ("^id_n", output.node(5).input(1)); - - EXPECT_EQ("add0", output.node(6).name()); - EXPECT_EQ(2, output.node(6).input_size()); - EXPECT_EQ("c1", output.node(6).input(0)); - EXPECT_EQ("id_n:1", output.node(6).input(1)); - - EXPECT_EQ("add1", output.node(7).name()); - EXPECT_EQ(3, output.node(7).input_size()); - EXPECT_EQ("c1", output.node(7).input(0)); - EXPECT_EQ("c2", output.node(7).input(1)); - EXPECT_EQ("^id_n", output.node(7).input(2)); + for (const auto& node : output.node()) { + // id_n should remain unchanged. + if (node.name() == "id_n") { + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("c1", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("c2", node.input(2)); + } + // id0 should be constant folded, and a control dependency from id_n. + if (node.name() == "id0") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^id_n", node.input(0)); + } + // id1 is unchanged. + if ("id1" == node.name()) { + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("id_n:1", node.input(0)); + } + + if ("add0" == node.name()) { + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("c1", node.input(0)); + EXPECT_EQ("id_n:1", node.input(1)); + } + // add1 should bo constant folded and have a control dependency from id_n. + if ("add1" == node.name()) { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^id_n", node.input(0)); + } + } } TEST_F(ConstantFoldingTest, TrivialPack) { @@ -2043,11 +2050,10 @@ TEST_F(ConstantFoldingTest, TrivialPack) { TF_CHECK_OK(scope.ToGraphDef(&item.graph)); item.fetch.push_back("stack"); - ConstantFolding fold(nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; - Status status = fold.Optimize(nullptr, item, &output); + Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - LOG(INFO) << output.DebugString(); EXPECT_EQ(5, output.node_size()); for (const auto& node : output.node()) { if (node.name() == "stack") { @@ -2072,6 +2078,55 @@ TEST_F(ConstantFoldingTest, TrivialPack) { EXPECT_EQ(tensors_expected[0].shape(), tensors[0].shape()); } +TEST_F(ConstantFoldingTest, Enter) { + GrapplerItem item; + AttrValue frame_name; + frame_name.set_s("foo"); + AttrValue type; + type.set_type(DT_FLOAT); + AttrValue value; + Tensor value_tensor(DT_FLOAT, TensorShape({})); + value_tensor.flat()(0) = 1; + value_tensor.AsProtoTensorContent(value.mutable_tensor()); + + GraphDef& graph = item.graph; + AddNode("x", "Placeholder", {}, {{"T", type}}, &graph); + AddNode("c1", "Const", {"^x"}, {{"value", value}, {"dtype", type}}, &graph); + AddNode("enter1", "Enter", {"x"}, {{"T", type}, {"frame_name", frame_name}}, + &graph); + AddNode("enter2", "Enter", {"c1"}, {{"T", type}, {"frame_name", frame_name}}, + &graph); + AddNode("id1", "Identity", {"enter1"}, {{"T", type}}, &graph); + AddNode("id2", "Identity", {"enter2"}, {{"T", type}}, &graph); + AddNode("id3", "Identity", {"enter2"}, {{"T", type}}, &graph); + item.fetch.push_back("id1"); + item.fetch.push_back("id2"); + item.fetch.push_back("id3"); + + ConstantFolding optimizer(nullptr /* cpu_device */); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + // Run the optimizer twice to make sure the rewrite is idempotent. + item.graph.Swap(&output); + status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(7, output.node_size()); + for (const NodeDef& node : output.node()) { + if (node.name() == "id1") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("enter1", node.input(0)); + } + if (node.name() == "id2" || node.name() == "id3") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^enter2", node.input(0)); + } + } +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index 89c3aa82bf..6b6cecebe1 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -23,7 +23,7 @@ namespace tensorflow { namespace grappler { std::vector GrapplerTest::EvaluateNodes( - const GraphDef& graph, const std::vector& node_names) { + const GraphDef& graph, const std::vector& node_names) const { SessionOptions options; std::unique_ptr session(NewSession(options)); TF_CHECK_OK(session->Create(graph)); @@ -35,7 +35,8 @@ std::vector GrapplerTest::EvaluateNodes( return output_tensors; } -std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { +std::vector GrapplerTest::EvaluateFetchNodes( + const GrapplerItem& item) const { SessionOptions options; std::unique_ptr session(NewSession(options)); TF_CHECK_OK(session->Create(item.graph)); @@ -52,17 +53,23 @@ std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { return output_tensors; } -void GrapplerTest::AddNode(const string& name, const string& op, - const std::vector& inputs, GraphDef* graph) { - auto* node = graph->add_node(); +NodeDef* GrapplerTest::AddNode( + const string& name, const string& op, const std::vector& inputs, + const std::vector>& attributes, + GraphDef* graph) const { + NodeDef* node = graph->add_node(); node->set_name(name); node->set_op(op); - for (const auto& input : inputs) { + for (const string& input : inputs) { node->add_input(input); } + for (auto attr : attributes) { + (*node->mutable_attr())[attr.first] = attr.second; + } + return node; } -void GrapplerTest::CompareGraphs(GraphDef want, GraphDef got) { +void GrapplerTest::CompareGraphs(GraphDef want, GraphDef got) const { auto comparator = [](const NodeDef& n1, const NodeDef& n2) -> bool { return n1.name() < n2.name(); }; diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 3df6625d5c..c7f06557e7 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -29,15 +30,17 @@ namespace grappler { class GrapplerTest : public ::testing::Test { protected: - std::vector EvaluateNodes(const GraphDef& graph, - const std::vector& node_names); + std::vector EvaluateNodes( + const GraphDef& graph, const std::vector& node_names) const; - std::vector EvaluateFetchNodes(const GrapplerItem& item); + std::vector EvaluateFetchNodes(const GrapplerItem& item) const; - void AddNode(const string& name, const string& op, - const std::vector& inputs, GraphDef* graph); + NodeDef* AddNode(const string& name, const string& op, + const std::vector& inputs, + const std::vector>& attributes, + GraphDef* graph) const; - void CompareGraphs(GraphDef want, GraphDef got); + void CompareGraphs(GraphDef want, GraphDef got) const; // Check if node 'src' is directly connected to the input($position) of 'dst'. bool IsNodesDirectlyConnected(const NodeMap& node_map, const string& src, diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 4e80fab9bd..787582ae70 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -35,6 +35,7 @@ from google.protobuf import text_format from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.protobuf import queue_runner_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.core.protobuf import saver_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.client import session @@ -2165,7 +2166,13 @@ class MetaGraphTest(test.TestCase): # Build and run the gradients of the while loop. We use this below to # verify that the gradients are correct with an imported MetaGraphDef. grad = gradients_impl.gradients([output], [var]) - with session.Session() as sess: + # Turn off constant folding to avoid breaking testNestedControlFlowSerDes. + # It appears that a missing control dependency in the gradient graph + # causes the fetch node to not be triggered. + no_constfold_config = config_pb2.ConfigProto() + no_constfold_config.graph_options.rewrite_options.constant_folding = ( + rewriter_config_pb2.RewriterConfig.OFF) + with session.Session(config=no_constfold_config) as sess: sess.run(init_op) expected_grad_value = sess.run(grad) @@ -2182,7 +2189,7 @@ class MetaGraphTest(test.TestCase): init_op = variables.global_variables_initializer() - with session.Session() as sess: + with session.Session(config=no_constfold_config) as sess: sess.run(init_op) actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) -- GitLab From 9637651f144a04225279fa3f6ffe944ee35086d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 09:40:28 -0700 Subject: [PATCH 056/960] add grpc service stub for TPUProfilerAnalysis PiperOrigin-RevId: 189198495 --- tensorflow/contrib/tpu/profiler/BUILD | 26 +++- .../tpu/profiler/tpu_profiler_analysis.proto | 73 +++++++++ .../tpu_profiler_analysis_pb2_grpc.py | 138 ++++++++++++++++++ 3 files changed, 234 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto create mode 100644 tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index 198da0203a..0a52d0b13b 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -18,7 +18,7 @@ filegroup( visibility = ["//tensorflow:__subpackages__"], ) -tf_proto_library_cc( +tf_proto_library( name = "tpu_profiler_proto", srcs = ["tpu_profiler.proto"], has_services = 1, @@ -98,16 +98,36 @@ tf_cc_test( ], ) -tf_proto_library_cc( +tf_proto_library( name = "op_profile_proto", srcs = ["op_profile.proto"], cc_api_version = 2, visibility = ["//visibility:public"], ) -tf_proto_library_cc( +tf_proto_library( name = "tf_op_stats_proto", srcs = ["tf_op_stats.proto"], cc_api_version = 2, visibility = ["//visibility:public"], ) + +tf_proto_library( + name = "tpu_profiler_analysis_proto", + srcs = ["tpu_profiler_analysis.proto"], + has_services = 1, + cc_api_version = 2, + cc_grpc_version = 1, + protodeps = [":tpu_profiler_proto"] + tf_additional_all_protos(), + visibility = ["//visibility:public"], +) + +py_library( + name = "tpu_profiler_analysis_pb2_grpc", + srcs = ["tpu_profiler_analysis_pb2_grpc.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + ":tpu_profiler_analysis_proto_py", + ], +) diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto new file mode 100644 index 0000000000..a4fc8d4e87 --- /dev/null +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.proto @@ -0,0 +1,73 @@ +syntax = "proto3"; +package tensorflow; + +import "tensorflow/contrib/tpu/profiler/tpu_profiler.proto"; + +message NewProfileSessionRequest { + ProfileRequest request = 1; + string repository_root = 2; + repeated string hosts = 3; +} + +message NewProfileSessionResponse { + // Auxiliary error_message. + string error_message = 1; + // If success, return session identifier for future reference. + string session_id = 2; +} + +message EnumProfileSessionsAndToolsRequest { + string repository_root = 1; +} + +message ProfileSessionInfo { + string session_id = 1; + // Which tool data is available for consumption. + repeated string available_tools = 2; +} + +message EnumProfileSessionsAndToolsResponse { + // Auxiliary error_message. + string error_message = 1; + // If success, the returned sessions information are stored here. + repeated ProfileSessionInfo sessions = 2; +} + +message ProfileSessionDataRequest { + string repository_root = 1; + string session_id = 2; + // Which tool + string tool_name = 3; + // Tool's specific parameters. e.g. TraceViewer's viewport etc + map parameters = 4; +} + +message ProfileSessionDataResponse { + // Auxiliary error_message. + string error_message = 1; + + // Output format. e.g. "json" or "proto" or "blob" + string output_format = 2; + + // TODO(jiesun): figure out whether to put bytes or oneof tool specific proto. + bytes output = 3; +} +//////////////////////////////////////////////////////////////////////////////// +// TPUProfileAnalysis service provide entry point for profiling TPU and for +// serving profiled data to Tensorboard through GRPC +//////////////////////////////////////////////////////////////////////////////// +service TPUProfileAnalysis { + // Starts a profiling session, blocks until it completes. + // TPUProfileAnalysis service delegate this to TPUProfiler service. + // Populate the profiled data in repository, then return status to caller. + rpc NewSession(NewProfileSessionRequest) returns (NewProfileSessionResponse) { + } + // Enumerate existing sessions and return available profile tools. + rpc EnumSessions(EnumProfileSessionsAndToolsRequest) + returns (EnumProfileSessionsAndToolsResponse) { + } + // Retrieve specific tool's data for specific session. + rpc GetSessionToolData(ProfileSessionDataRequest) + returns (ProfileSessionDataResponse) { + } +} diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py new file mode 100644 index 0000000000..c28fef22a9 --- /dev/null +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py @@ -0,0 +1,138 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +# +# Do not use pylint on generated code. +# pylint: disable=missing-docstring,g-short-docstring-punctuation,g-no-space-after-docstring-summary,invalid-name,line-too-long,unused-argument,g-doc-args +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import grpc + +from third_party.tensorflow.contrib.tpu.profiler import tpu_profiler_analysis_pb2 as third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2 + + +class TPUProfileAnalysisStub(object): + """////////////////////////////////////////////////////////////////////////////// + + TPUProfileAnalysis service provide entry point for profiling TPU and for + serving profiled data to Tensorboard through GRPC + ////////////////////////////////////////////////////////////////////////////// + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.NewSession = channel.unary_unary( + '/tensorflow.TPUProfileAnalysis/NewSession', + request_serializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + NewProfileSessionRequest.SerializeToString, + response_deserializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + NewProfileSessionResponse.FromString, + ) + self.EnumSessions = channel.unary_unary( + '/tensorflow.TPUProfileAnalysis/EnumSessions', + request_serializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + EnumProfileSessionsAndToolsRequest.SerializeToString, + response_deserializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + EnumProfileSessionsAndToolsResponse.FromString, + ) + self.GetSessionToolData = channel.unary_unary( + '/tensorflow.TPUProfileAnalysis/GetSessionToolData', + request_serializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + ProfileSessionDataRequest.SerializeToString, + response_deserializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + ProfileSessionDataResponse.FromString, + ) + + +class TPUProfileAnalysisServicer(object): + """////////////////////////////////////////////////////////////////////////////// + + TPUProfileAnalysis service provide entry point for profiling TPU and for + serving profiled data to Tensorboard through GRPC + ////////////////////////////////////////////////////////////////////////////// + """ + + def NewSession(self, request, context): + """Starts a profiling session, blocks until it completes. + TPUProfileAnalysis service delegate this to TPUProfiler service. + Populate the profiled data in repository, then return status to caller. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def EnumSessions(self, request, context): + """Enumerate existing sessions and return available profile tools. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetSessionToolData(self, request, context): + """Retrieve specific tool's data for specific session. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_TPUProfileAnalysisServicer_to_server(servicer, server): + rpc_method_handlers = { + 'NewSession': + grpc.unary_unary_rpc_method_handler( + servicer.NewSession, + request_deserializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + NewProfileSessionRequest.FromString, + response_serializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + NewProfileSessionResponse.SerializeToString, + ), + 'EnumSessions': + grpc.unary_unary_rpc_method_handler( + servicer.EnumSessions, + request_deserializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + EnumProfileSessionsAndToolsRequest.FromString, + response_serializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + EnumProfileSessionsAndToolsResponse.SerializeToString, + ), + 'GetSessionToolData': + grpc.unary_unary_rpc_method_handler( + servicer.GetSessionToolData, + request_deserializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + ProfileSessionDataRequest.FromString, + response_serializer= + third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2. + ProfileSessionDataResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'tensorflow.TPUProfileAnalysis', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) -- GitLab From 2acc5b6c465832fc8c1fba2454d3dfd8f3aa2eb5 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 15 Mar 2018 10:26:13 -0700 Subject: [PATCH 057/960] TFE: Modify initialization of `ContextStack` to ensure eager context is kept. When eager execution is enabled in the main thread, the fact that it was enabled is propagated to subsequently created threads. This change ... (1) ensures that the fact that eager was enabled is also propagated to the `ContextStack`, which is renamed to `_ContextSwitchStack`, for clarity; (2) adds a `_ContextSwitchStack` object to `Context` as a member, removing the global `context_stack`. PiperOrigin-RevId: 189206207 --- tensorflow/python/eager/context.py | 40 +++++++++++++++++++--------- tensorflow/python/eager/core_test.py | 15 +++++------ tensorflow/python/eager/ops_test.py | 17 ++++++++++++ tensorflow/python/framework/ops.py | 26 +++++++----------- 4 files changed, 62 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 7953d10a89..6c9a14730c 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -94,22 +94,32 @@ class _EagerContext(threading.local): self.execution_mode = None -ContextStackEntry = collections.namedtuple( - "ContextStackEntry", ["is_building_function", "enter_context_fn"]) +ContextSwitch = collections.namedtuple( + "ContextSwitch", ["is_building_function", "enter_context_fn"]) -class ContextStack(threading.local): +# `_ContextSwitchStack` is a `threading.local` to match the semantics of +# ``DefaultGraphStack`, which is also a `threading.local`. +class _ContextSwitchStack(threading.local): """A thread-local stack of context switches.""" - def __init__(self): - super(ContextStack, self).__init__() + def __init__(self, eager): + super(_ContextSwitchStack, self).__init__() self.stack = [] + if eager: + # Initialize the stack with a pointer to enter the eager context; this + # ensures that the fact that eager execution was enabled is propagated + # across threads, since (1) `enable_eager_execution` modifies a + # process-level flag (`_default_mode`) and (2) `__init__` is called each + # time a threading.local object is used in a separate thread. + self.push(is_building_function=False, enter_context_fn=eager_mode) def push(self, is_building_function, enter_context_fn): """Push metadata about a context switch onto the stack. A context switch can take one of two forms: installing a graph as the - default graph, or entering the eager context. + default graph, or entering the eager context. For each context switch, + we record whether or not the entered context is building a function. Args: is_building_function: (bool.) Whether the context is building a function. @@ -118,7 +128,7 @@ class ContextStack(threading.local): """ self.stack.append( - ContextStackEntry(is_building_function, enter_context_fn)) + ContextSwitch(is_building_function, enter_context_fn)) def pop(self): """Pop the stack.""" @@ -126,9 +136,6 @@ class ContextStack(threading.local): self.stack.pop() -context_stack = ContextStack() - - # TODO(agarwal): rename to EagerContext / EagerRuntime ? # TODO(agarwal): consider keeping the corresponding Graph here. class Context(object): @@ -171,6 +178,7 @@ class Context(object): ValueError: If execution_mode is not valid. """ self._eager_context = _EagerContext() + self._context_switches = _ContextSwitchStack(self.executing_eagerly()) self._context_handle = None self._context_devices = None self._post_execution_callbacks = [] @@ -283,13 +291,16 @@ class Context(object): old_mode = ctx.mode ctx.mode = mode if mode == EAGER_MODE: - context_stack.push(False, eager_mode) + # Entering graph mode does not provide us with sufficient information to + # record a context switch; graph-based context switches are only logged + # when a graph is registered as the default graph. + self.context_switches.push(False, eager_mode) try: yield finally: ctx.mode = old_mode if mode == EAGER_MODE: - context_stack.pop() + self.context_switches.pop() def executing_eagerly(self): """Returns True if current thread has eager executing enabled.""" @@ -545,6 +556,11 @@ class Context(object): run_metadata.ParseFromString(compat.as_bytes(proto_data)) return run_metadata + @property + def context_switches(self): + """Returns a stack of context switches.""" + return self._context_switches + _context = None _context_lock = threading.Lock() diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 6dfd8d1afa..6ebf5b2481 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -123,19 +123,18 @@ class TFETest(test_util.TensorFlowTestCase): # available, when no device is explicitly provided) self.assertEqual(y.device, '/job:localhost/replica:0/task:0/device:CPU:0') - def testContextStackContainsEagerMode(self): - # Eager execution has been enabled, and no other context - # switch has occurred, so `context_stack` should contain - # exactly one entry. - self.assertEqual(len(context.context_stack.stack), 1) - stack_entry = context.context_stack.stack[0] + def testContextSwitchStackContainsEagerMode(self): + # Eager execution has been enabled, and no other context switch has + # occurred, so `context_switches` should contain exactly one entry. + self.assertEqual(len(context.context().context_switches.stack), 1) + switch = context.context().context_switches.stack[0] # The entry should log that eager mode was entered. - self.assertIs(stack_entry.enter_context_fn, context.eager_mode) + self.assertIs(switch.enter_context_fn, context.eager_mode) # It is not possible to build a graph function when eager execution # is enabled; the stack entry should reflect this fact. - self.assertFalse(stack_entry.is_building_function) + self.assertFalse(switch.is_building_function) def testInt32GPU(self): if not context.context().num_gpus(): diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index f70c7544d6..fc76ede4c5 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import threading import numpy as np from tensorflow.core.protobuf import config_pb2 @@ -376,6 +377,22 @@ class OpsTest(test_util.TensorFlowTestCase): def testNoOpIsNone(self): self.assertTrue(control_flow_ops.no_op() is None) + def testEagerContextPreservedAcrossThreads(self): + def init_fn(): + self.assertTrue(context.executing_eagerly()) + with ops.init_scope(): + self.assertTrue(context.executing_eagerly()) + context_switches = context.context().context_switches + self.assertEqual(len(context_switches.stack), 1) + self.assertFalse(context_switches.stack[0].is_building_function) + self.assertEqual(context_switches.stack[0].enter_context_fn, + context.eager_mode) + + self.assertTrue(context.executing_eagerly()) + t1 = threading.Thread(target=init_fn) + t1.start() + t1.join() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b2f43773fe..01a0e03be2 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5095,11 +5095,12 @@ class _DefaultGraphStack(_DefaultStack): # pylint: disable=protected-access @tf_contextlib.contextmanager def get_controller(self, default): try: - context.context_stack.push(default.building_function, default.as_default) + context.context().context_switches.push(default.building_function, + default.as_default) with super(_DefaultGraphStack, self).get_controller(default) as g: yield g finally: - context.context_stack.pop() + context.context().context_switches.pop() _default_graph_stack = _DefaultGraphStack() @@ -5125,13 +5126,13 @@ def init_scope(): graph function. Here, a context is defined as either a graph or an eager context. Every context switch, i.e., every installation of a graph as the default graph and every switch into eager mode, is logged in a - thread-local stack called the `context_stack`; the log entry for a + thread-local stack called `context_switches`; the log entry for a context switch is popped from the stack when the context is exited. - Entering an `init_scope` is equivalent to crawling up the - `context_stack`, finding the first context that is not building a graph - function, and entering it. A caveat is that if graph mode is enabled - but the default graph stack is empty, then entering an `init_scope` - will simply install a fresh graph as the default one. + Entering an `init_scope` is equivalent to crawling up + `context_switches`, finding the first context that is not building a + graph function, and entering it. A caveat is that if graph mode is + enabled but the default graph stack is empty, then entering an + `init_scope` will simply install a fresh graph as the default one. (3) The gradient tape is paused while the scope is active. """ @@ -5161,7 +5162,7 @@ def init_scope(): outer_context = default_graph.as_default else: # Find a context that is not building a function. - for stack_entry in reversed(context.context_stack.stack): + for stack_entry in reversed(context.context().context_switches.stack): if not stack_entry.is_building_function: outer_context = stack_entry.enter_context_fn break @@ -5278,13 +5279,6 @@ def enable_eager_execution(config=None, device_policy=None, config=config, device_policy=device_policy, execution_mode=execution_mode) - if context.context_stack.stack: - raise AssertionError("Invariant violated: The context stack must " - "be empty when eager execution is enabled.") - # Log that eager execution has been enabled by pushing an entry onto the - # context stack; this entry won't ever be popped, as it's impossible to - # disable eager execution - context.context_stack.push(False, context.eager_mode) elif ((config is not None and config is not context._context._config) or (device_policy is not None and device_policy is not context._context._device_policy) or -- GitLab From 4b0687d70c4bcab5ec2837345bd0115a0b356946 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 15 Mar 2018 10:39:55 -0700 Subject: [PATCH 058/960] [XLA] Add --xla_hlo_profile_last_run flag to replay_computation. When using replay_computation for profiling, you usually only want to do one or two warmup runs and then profile the last run of your model. This flag makes that possible. PiperOrigin-RevId: 189208924 --- tensorflow/compiler/xla/tools/BUILD | 1 + .../compiler/xla/tools/replay_computation.cc | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 091fa0c3ec..2e55f609d1 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -75,6 +75,7 @@ cc_library( name = "replay_computation_library", srcs = ["replay_computation.cc"], deps = [ + "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index eda5effbb9..62a353ad09 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/lib/testing.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/session.pb.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -66,6 +67,7 @@ struct Options { bool use_fake_data = false; bool print_result = true; int num_runs = 1; + bool xla_hlo_profile_last_run = false; }; // Invokes the given computation passing arbitrary data for every (unbound) @@ -122,16 +124,21 @@ StatusOr> ReplayComputation( std::unique_ptr result; for (int i = 0; i < opts.num_runs; ++i) { ExecutionProfile profile; + ExecutionOptions execution_options = CreateDefaultExecutionOptions(); + if (opts.xla_hlo_profile_last_run && i == opts.num_runs - 1) { + execution_options.mutable_debug_options()->set_xla_hlo_profile(true); + } + if (opts.print_result) { - TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer( - computation, execute_arguments, - /*execution_options=*/nullptr, &profile)); + TF_ASSIGN_OR_RETURN( + result, client->ExecuteAndTransfer(computation, execute_arguments, + &execution_options, &profile)); } else { // If we're not printing the result, execute the computation but don't // bother retrieving the result. This can be a significant speedup. TF_RETURN_IF_ERROR(client ->Execute(computation, execute_arguments, - /*execution_options=*/nullptr, &profile) + &execution_options, &profile) .status()); } LOG(INFO) << "Execution took " @@ -191,6 +198,9 @@ int main(int argc, char** argv) { "Number of times to run each computation"), tensorflow::Flag("fake_infeed_shape", &opts.fake_infeed_shape, "Shape of fake data to construct for (infinite) infeed"), + tensorflow::Flag( + "xla_hlo_profile_last_run", &opts.xla_hlo_profile_last_run, + "Pass --xla_hlo_profile the last time we run the computation."), }; xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list); bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); -- GitLab From 39c5b0470e7e6b6f79c8f55d89ea46585168f2a8 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 15 Mar 2018 10:54:42 -0700 Subject: [PATCH 059/960] Really delete old checkpoints this time. Follows up on cl/188187349, which fixed checkpoint management for tf.train.Saver when executing eagerly. Except I was recreating the tf.train.Saver objects each save, so tfe.Checkpoint and friends did not benefit from that change. Keeps the same tf.train.Saver around when executing eagerly. This limits object graph mutations just like when graph building; if there are complaints I can assign to Saver._var_list instead, since eager tf.train.Saver is not specialized to its var_list argument. PiperOrigin-RevId: 189211552 --- .../eager/python/checkpointable_utils.py | 17 ++++++++-------- .../eager/python/checkpointable_utils_test.py | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index 677b56b7b8..389d4a03c8 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -602,8 +602,7 @@ class CheckpointableSaver(object): """ named_variables, graph_proto = _serialize_object_graph( self._root_checkpointable) - in_graph_mode = not context.executing_eagerly() - if in_graph_mode: + if not context.executing_eagerly(): if session is None: session = ops.get_default_session() if self._object_graph_feed_tensor is None: @@ -622,17 +621,17 @@ class CheckpointableSaver(object): named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( tensor=object_graph_tensor, name=_OBJECT_GRAPH_PROTO_KEY) - if not in_graph_mode or self._last_save_object_graph != graph_proto: - if self._last_save_object_graph is not None and in_graph_mode: + if self._last_save_object_graph != graph_proto: + if self._last_save_object_graph is not None: raise NotImplementedError( "Using a single Saver to save a mutated object graph is not " "currently supported when graph building. Use a different Saver " - "when the object graph changes (save ops will be duplicated), or " - "file a feature request if this limitation bothers you.") + "when the object graph changes (save ops will be duplicated when " + "graph building), or file a feature request if this limitation " + "bothers you.") saver = saver_lib.Saver(var_list=named_variables) - if in_graph_mode: - self._last_save_saver = saver - self._last_save_object_graph = graph_proto + self._last_save_saver = saver + self._last_save_object_graph = graph_proto else: saver = self._last_save_saver with ops.device("/cpu:0"): diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 31f661e634..1ab94b88bd 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -849,6 +849,26 @@ class CheckpointingTests(test.TestCase): saver.save(checkpoint_prefix) self.assertEqual(before_ops, graph.get_operations()) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testCheckpointCleanup(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + self.evaluate(checkpointable_utils.gather_initializers(obj)) + saver = checkpointable_utils.Checkpoint(obj=obj) + for _ in range(10): + saver.save(checkpoint_prefix) + expected_filenames = ["checkpoint"] + for checkpoint_number in range(6, 11): + expected_filenames.append("ckpt-%d.index" % (checkpoint_number,)) + expected_filenames.append( + "ckpt-%d.data-00000-of-00001" % (checkpoint_number,)) + six.assertCountEqual( + self, + expected_filenames, + os.listdir(checkpoint_directory)) + def testManyRestoresGraph(self): """Restores after the first should not modify the graph.""" with context.graph_mode(): -- GitLab From 5e610ebf7f13b443dbaeef261f6c1b0429fb2592 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 15 Mar 2018 11:03:42 -0700 Subject: [PATCH 060/960] Internal change. PiperOrigin-RevId: 189213372 --- tensorflow/contrib/lite/python/BUILD | 5 ++++- tensorflow/contrib/opt/BUILD | 3 +++ tensorflow/examples/tutorials/word2vec/BUILD | 3 +++ tensorflow/python/keras/BUILD | 3 +++ tensorflow/python/tools/BUILD | 5 ++++- 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 76607af079..d6f39219ed 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -69,7 +69,10 @@ py_test( name = "lite_test", srcs = ["lite_test.py"], srcs_version = "PY2AND3", - tags = ["no_oss"], + tags = [ + "no-internal-py3", + "no_oss", + ], deps = [ ":lite", ":op_hint", diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 827279bd47..bacf15bbd6 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -52,6 +52,9 @@ py_test( name = "external_optimizer_test", srcs = ["python/training/external_optimizer_test.py"], srcs_version = "PY2AND3", + tags = [ + "no-internal-py3", + ], deps = [ ":opt_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/examples/tutorials/word2vec/BUILD b/tensorflow/examples/tutorials/word2vec/BUILD index 42d6355b4f..bfcf459269 100644 --- a/tensorflow/examples/tutorials/word2vec/BUILD +++ b/tensorflow/examples/tutorials/word2vec/BUILD @@ -13,6 +13,9 @@ py_binary( "word2vec_basic.py", ], srcs_version = "PY2AND3", + tags = [ + "no-internal-py3", + ], deps = [ "//tensorflow:tensorflow_py", "//third_party/py/numpy", diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 8ace3e0968..eef91e9c5b 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -774,6 +774,9 @@ py_test( size = "small", srcs = ["_impl/keras/engine/topology_test.py"], srcs_version = "PY2AND3", + tags = [ + "no-internal-py3", + ], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD index 5415881cae..1de1adcfbc 100644 --- a/tensorflow/python/tools/BUILD +++ b/tensorflow/python/tools/BUILD @@ -249,7 +249,10 @@ py_test( "//tensorflow/cc/saved_model:saved_model_half_plus_two", ], srcs_version = "PY2AND3", - tags = ["manual"], + tags = [ + "manual", + "no-internal-py3", + ], deps = [ ":saved_model_cli", "//tensorflow/core:protos_all_py", -- GitLab From 4ce5beb9c5dab5d49c00f4a5f5dbd124b700cd56 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 11:16:21 -0700 Subject: [PATCH 061/960] Refactor and enable loop optimizer tests. PiperOrigin-RevId: 189215781 --- tensorflow/core/grappler/optimizers/BUILD | 6 - .../optimizers/loop_optimizer_test.cc | 550 ++++++++---------- 2 files changed, 230 insertions(+), 326 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index ffeaa3835e..0df5307d9c 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -538,13 +538,7 @@ cc_library( tf_cc_test( name = "loop_optimizer_test", - size = "small", srcs = ["loop_optimizer_test.cc"], - tags = [ - "manual", - "no_oss", # b/74111495 - "notap", - ], deps = [ ":loop_optimizer", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index 0bd202a2ab..0d45ba9b56 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" @@ -26,72 +27,56 @@ namespace tensorflow { namespace grappler { namespace { -class LoopOptimizerTest : public ::testing::Test { +class LoopOptimizerTest : public GrapplerTest { protected: - static NodeDef CreateNode(const string& name, - const std::vector& inputs) { - return CreateNode(name, "Identity", "", false, 0, inputs); - } - static NodeDef CreateNode(const string& name, const string& op, - const std::vector& inputs) { - return CreateNode(name, op, "", false, 0, inputs); + // These helpers always sets T=DT_FLOAT. + void AddEnterNode(const string& name, const string& frame, + const bool is_constant, const int piterations, + const std::vector& inputs, GraphDef* graph) const { + std::vector> attributes; + AttrValue type; + type.set_type(DT_FLOAT); + attributes.emplace_back("T", type); + AttrValue frame_name; + frame_name.set_s(frame); + attributes.emplace_back("frame_name", frame_name); + AttrValue is_const; + is_const.set_b(is_constant); + attributes.emplace_back("is_constant", is_const); + AttrValue parallel_iterations; + parallel_iterations.set_i(piterations); + attributes.emplace_back("parallel_iterations", parallel_iterations); + AddNode(name, "Enter", inputs, attributes, graph); } - static NodeDef CreateNode(const string& name, const string& op, - const string& frame, - const bool is_constant, - const int piterations, - const std::vector& inputs) { - NodeDef node; - node.set_name(name); - if (!op.empty()) { - node.set_op(op); - } - if (!frame.empty()) { - AttrValue frame_name; - frame_name.set_s(frame); - node.mutable_attr()->insert({"frame_name", frame_name}); - } - if (op == "Enter") { - AttrValue is_const; - is_const.set_b(is_constant); - node.mutable_attr()->insert({"is_constant", is_const}); - AttrValue parallel_iterations; - parallel_iterations.set_i(piterations); - node.mutable_attr()->insert( - {"parallel_iterations", parallel_iterations}); - } + + void AddSimpleNode(const string& name, const string& op, + const std::vector& inputs, GraphDef* graph) const { + std::vector> attributes; AttrValue type; type.set_type(DT_FLOAT); - node.mutable_attr()->insert({"T", type}); - for (const string& input : inputs) { - node.add_input(input); - } - return node; + attributes.emplace_back("T", type); + AddNode(name, op, inputs, attributes, graph); } }; TEST_F(LoopOptimizerTest, Basic) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode("Less", "Less", {"VariantAdd", "less/y"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"VariantAdd"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}, + &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"VariantAdd", "Less/y"}, &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"VariantAdd"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); GrapplerItem item; item.graph = graph; @@ -123,27 +108,22 @@ TEST_F(LoopOptimizerTest, Basic) { TEST_F(LoopOptimizerTest, Const) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode("Const", "Const", {"^Identity"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "Const"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode("Less", "Less", {"VariantAdd", "less/y"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"VariantAdd"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("Const", "Const", {"^Identity"}, &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "Const"}, &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"VariantAdd", "Less/y"}, &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"VariantAdd"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); GrapplerItem item; item.graph = graph; @@ -174,27 +154,23 @@ TEST_F(LoopOptimizerTest, Const) { TEST_F(LoopOptimizerTest, ControlOutput) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode( - "Less", "Less", {"VariantAdd", "less/y", "^InvariantAdd"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"VariantAdd"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}, + &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"VariantAdd", "Less/y", "^InvariantAdd"}, + &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"VariantAdd"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); GrapplerItem item; item.graph = graph; @@ -223,47 +199,38 @@ TEST_F(LoopOptimizerTest, ControlOutput) { TEST_F(LoopOptimizerTest, NestedLoop1) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"Exit2"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); - - *graph.add_node() = CreateNode( - "InvariantEnter2", "Enter", "while/while/while_context", true, 1, - {"VariantAdd"}); - *graph.add_node() = CreateNode( - "InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}); - *graph.add_node() = CreateNode( - "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); - *graph.add_node() = CreateNode( - "VariantEnter2", "Enter", "while/while/while_context", false, 1, - {"VariantEnter"}); - *graph.add_node() = CreateNode( - "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); - *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); - *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); - *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); - *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); - *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); - *graph.add_node() = CreateNode( - "NextIteration2", "NextIteration", {"VariantAdd2"}); - *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}, + &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"Exit2", "Less/y"}, &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"Exit2"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); + + AddEnterNode("InvariantEnter2", "while/while/while_context", true, 1, + {"VariantAdd"}, &graph); + AddSimpleNode("InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}, + &graph); + AddSimpleNode("VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}, &graph); + AddEnterNode("VariantEnter2", "while/while/while_context", false, 1, + {"VariantEnter"}, &graph); + AddSimpleNode("Merge2", "Merge", {"VariantEnter2", "NextIteration2"}, &graph); + AddSimpleNode("Less2/y", "Const", {"^Identity2"}, &graph); + AddSimpleNode("Less2", "Less", {"VariantAdd2", "Less2/y"}, &graph); + AddSimpleNode("LoopCond2", "LoopCond", {"Less2"}, &graph); + AddSimpleNode("Switch2", "Switch", {"Merge2", "LoopCond2"}, &graph); + AddSimpleNode("Identity2", "Identity", {"Switch2:1"}, &graph); + AddSimpleNode("NextIteration2", "NextIteration", {"VariantAdd2"}, &graph); + AddSimpleNode("Exit2", "Exit", {"Switch2"}, &graph); GrapplerItem item; item.graph = graph; @@ -299,47 +266,38 @@ TEST_F(LoopOptimizerTest, NestedLoop1) { TEST_F(LoopOptimizerTest, NestedLoop2) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"Exit2"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); - - *graph.add_node() = CreateNode( - "InvariantEnter2", "Enter", "while/while/while_context", true, 1, - {"InvariantAdd"}); - *graph.add_node() = CreateNode( - "InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}); - *graph.add_node() = CreateNode( - "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); - *graph.add_node() = CreateNode( - "VariantEnter2", "Enter", "while/while/while_context", false, 1, - {"VariantEnter"}); - *graph.add_node() = CreateNode( - "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); - *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); - *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); - *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); - *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); - *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); - *graph.add_node() = CreateNode( - "NextIteration2", "NextIteration", {"VariantAdd2"}); - *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}, + &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"Exit2", "Less/y"}, &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"Exit2"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); + + AddEnterNode("InvariantEnter2", "while/while/while_context", true, 1, + {"InvariantAdd"}, &graph); + AddSimpleNode("InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}, + &graph); + AddSimpleNode("VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}, &graph); + AddEnterNode("VariantEnter2", "while/while/while_context", false, 1, + {"VariantEnter"}, &graph); + AddSimpleNode("Merge2", "Merge", {"VariantEnter2", "NextIteration2"}, &graph); + AddSimpleNode("Less2/y", "Const", {"^Identity2"}, &graph); + AddSimpleNode("Less2", "Less", {"VariantAdd2", "Less2/y"}, &graph); + AddSimpleNode("LoopCond2", "LoopCond", {"Less2"}, &graph); + AddSimpleNode("Switch2", "Switch", {"Merge2", "LoopCond2"}, &graph); + AddSimpleNode("Identity2", "Identity", {"Switch2:1"}, &graph); + AddSimpleNode("NextIteration2", "NextIteration", {"VariantAdd2"}, &graph); + AddSimpleNode("Exit2", "Exit", {"Switch2"}, &graph); GrapplerItem item; item.graph = graph; @@ -371,48 +329,38 @@ TEST_F(LoopOptimizerTest, NestedLoop2) { TEST_F(LoopOptimizerTest, NestedLoopConst1) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"Exit2"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); - - *graph.add_node() = CreateNode( - "InvariantEnter2", "Enter", "while/while/while_context", true, 1, - {"VariantAdd"}); - *graph.add_node() = CreateNode("Const2", "Const", {"^Identity2"}); - *graph.add_node() = CreateNode( - "InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}); - *graph.add_node() = CreateNode( - "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); - *graph.add_node() = CreateNode( - "VariantEnter2", "Enter", "while/while/while_context", false, 1, - {"VariantEnter"}); - *graph.add_node() = CreateNode( - "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); - *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); - *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); - *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); - *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); - *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); - *graph.add_node() = CreateNode( - "NextIteration2", "NextIteration", {"VariantAdd2"}); - *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}, + &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"Exit2", "Less/y"}, &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"Exit2"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); + + AddEnterNode("InvariantEnter2", "while/while/while_context", true, 1, + {"VariantAdd"}, &graph); + AddSimpleNode("Const2", "Const", {"^Identity2"}, &graph); + AddSimpleNode("InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}, &graph); + AddSimpleNode("VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}, &graph); + AddEnterNode("VariantEnter2", "while/while/while_context", false, 1, + {"VariantEnter"}, &graph); + AddSimpleNode("Merge2", "Merge", {"VariantEnter2", "NextIteration2"}, &graph); + AddSimpleNode("Less2/y", "Const", {"^Identity2"}, &graph); + AddSimpleNode("Less2", "Less", {"VariantAdd2", "Less2/y"}, &graph); + AddSimpleNode("LoopCond2", "LoopCond", {"Less2"}, &graph); + AddSimpleNode("Switch2", "Switch", {"Merge2", "LoopCond2"}, &graph); + AddSimpleNode("Identity2", "Identity", {"Switch2:1"}, &graph); + AddSimpleNode("NextIteration2", "NextIteration", {"VariantAdd2"}, &graph); + AddSimpleNode("Exit2", "Exit", {"Switch2"}, &graph); GrapplerItem item; item.graph = graph; @@ -445,48 +393,38 @@ TEST_F(LoopOptimizerTest, NestedLoopConst1) { TEST_F(LoopOptimizerTest, NestedLoopConst2) { GraphDef graph; - *graph.add_node() = CreateNode("0", {}); - *graph.add_node() = CreateNode( - "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); - *graph.add_node() = CreateNode( - "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); - *graph.add_node() = CreateNode( - "VariantAdd", "Add", {"InvariantAdd", "Identity"}); - *graph.add_node() = CreateNode( - "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); - *graph.add_node() = CreateNode( - "Merge", "Merge", {"VariantEnter", "NextIteration"}); - *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); - *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); - *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); - *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); - *graph.add_node() = CreateNode("Identity", {"Switch:1"}); - *graph.add_node() = CreateNode( - "NextIteration", "NextIteration", {"Exit2"}); - *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); - *graph.add_node() = CreateNode("1", {"Exit"}); - - *graph.add_node() = CreateNode( - "InvariantEnter2", "Enter", "while/while/while_context", true, 1, - {"InvariantAdd"}); - *graph.add_node() = CreateNode("Const2", "Const", {"^Identity2"}); - *graph.add_node() = CreateNode( - "InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}); - *graph.add_node() = CreateNode( - "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); - *graph.add_node() = CreateNode( - "VariantEnter2", "Enter", "while/while/while_context", false, 1, - {"VariantEnter"}); - *graph.add_node() = CreateNode( - "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); - *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); - *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); - *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); - *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); - *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); - *graph.add_node() = CreateNode( - "NextIteration2", "NextIteration", {"VariantAdd2"}); - *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + AddSimpleNode("In", "Identity", {}, &graph); + AddEnterNode("InvariantEnter", "while/while_context", true, 1, {"In"}, + &graph); + AddSimpleNode("InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}, + &graph); + AddSimpleNode("VariantAdd", "Add", {"InvariantAdd", "Identity"}, &graph); + AddEnterNode("VariantEnter", "while/while_context", false, 1, {"In"}, &graph); + AddSimpleNode("Merge", "Merge", {"VariantEnter", "NextIteration"}, &graph); + AddSimpleNode("Less/y", "Const", {"^Identity"}, &graph); + AddSimpleNode("Less", "Less", {"Exit2", "Less/y"}, &graph); + AddSimpleNode("LoopCond", "LoopCond", {"Less"}, &graph); + AddSimpleNode("Switch", "Switch", {"Merge", "LoopCond"}, &graph); + AddSimpleNode("Identity", "Identity", {"Switch:1"}, &graph); + AddSimpleNode("NextIteration", "NextIteration", {"Exit2"}, &graph); + AddSimpleNode("Exit", "Exit", {"Switch"}, &graph); + AddSimpleNode("Out", "Identity", {"Exit"}, &graph); + + AddEnterNode("InvariantEnter2", "while/while/while_context", true, 1, + {"InvariantAdd"}, &graph); + AddSimpleNode("Const2", "Const", {"^Identity2"}, &graph); + AddSimpleNode("InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}, &graph); + AddSimpleNode("VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}, &graph); + AddEnterNode("VariantEnter2", "while/while/while_context", false, 1, + {"VariantEnter"}, &graph); + AddSimpleNode("Merge2", "Merge", {"VariantEnter2", "NextIteration2"}, &graph); + AddSimpleNode("Less2/y", "Const", {"^Identity2"}, &graph); + AddSimpleNode("Less2", "Less", {"VariantAdd2", "Less2/y"}, &graph); + AddSimpleNode("LoopCond2", "LoopCond", {"Less2"}, &graph); + AddSimpleNode("Switch2", "Switch", {"Merge2", "LoopCond2"}, &graph); + AddSimpleNode("Identity2", "Identity", {"Switch2:1"}, &graph); + AddSimpleNode("NextIteration2", "NextIteration", {"VariantAdd2"}, &graph); + AddSimpleNode("Exit2", "Exit", {"Switch2"}, &graph); GrapplerItem item; item.graph = graph; @@ -544,50 +482,26 @@ TEST_F(LoopOptimizerTest, NoOp) { VerifyGraphsEqual(item.graph, output, __FUNCTION__); } -namespace { -NodeDef* AddNode(const string& name, const string& op, - const std::vector& inputs, - const std::vector>& attributes, - GraphDef* graph) { - NodeDef* node = graph->add_node(); - node->set_name(name); - node->set_op(op); - for (const string& input : inputs) { - node->add_input(input); - } - for (auto attr : attributes) { - (*node->mutable_attr())[attr.first] = attr.second; - } - return node; -} -} // namespace - TEST_F(LoopOptimizerTest, RemovePush_NoOp) { GrapplerItem item; - AttrValue frame_name; - frame_name.set_s("foo"); - AttrValue type; - type.set_type(DT_RESOURCE); GraphDef& graph = item.graph; - AddNode("c", "Const", {}, {}, &graph); + AddSimpleNode("c", "Const", {}, &graph); // Stack with corresponding push/pop. - AddNode("stack1", "StackV2", {}, {}, &graph); - AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); - AddNode("pop1", "StackPopV2", {"stack1"}, {}, &graph); - AddNode("id1", "Identity", {"pop1"}, {}, &graph); + AddSimpleNode("stack1", "StackV2", {}, &graph); + AddSimpleNode("push1", "StackPushV2", {"stack1", "c"}, &graph); + AddSimpleNode("pop1", "StackPopV2", {"stack1"}, &graph); + AddSimpleNode("id1", "Identity", {"pop1"}, &graph); // Stack with corresponding push/pop behind Enter. - AddNode("stack2", "StackV2", {}, {}, &graph); - AddNode("push_enter", "Enter", {"stack2"}, - {{"T", type}, {"frame_name", frame_name}}, &graph); - AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); - AddNode("pop_enter", "Enter", {"stack2"}, - {{"T", type}, {"frame_name", frame_name}}, &graph); - AddNode("pop2", "StackPopV2", {"pop_enter"}, {}, &graph); - AddNode("id2", "Identity", {"pop2"}, {}, &graph); + AddSimpleNode("stack2", "StackV2", {}, &graph); + AddEnterNode("enter2_c", "frame_name", false, 1, {"c"}, &graph); + AddEnterNode("enter2_stack2", "frame_name", false, 1, {"stack2"}, &graph); + AddSimpleNode("push2", "StackPushV2", {"enter2_stack2", "enter2_c"}, &graph); + AddSimpleNode("pop2", "StackPopV2", {"enter2_stack2"}, &graph); + AddSimpleNode("id2", "Identity", {"pop2"}, &graph); // Stack with unexpected op type in fanout of Stack. - AddNode("stack3", "StackV2", {}, {}, &graph); - AddNode("push3", "StackPushV2", {"stack3", "c"}, {}, &graph); - AddNode("stop", "StopGradient", {"stack3"}, {}, &graph); + AddSimpleNode("stack3", "StackV2", {}, &graph); + AddSimpleNode("push3", "StackPushV2", {"stack3", "c"}, &graph); + AddSimpleNode("stop", "StopGradient", {"stack3"}, &graph); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -598,29 +512,25 @@ TEST_F(LoopOptimizerTest, RemovePush_NoOp) { TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { GrapplerItem item; GraphDef& graph = item.graph; - AttrValue frame_name; - frame_name.set_s("foo"); - AttrValue type; - type.set_type(DT_RESOURCE); - AddNode("c", "Const", {}, {}, &graph); + AddSimpleNode("c", "Const", {}, &graph); // Push without Pop. - AddNode("stack1", "StackV2", {}, {}, &graph); - AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); + AddSimpleNode("stack1", "StackV2", {}, &graph); + AddSimpleNode("push1", "StackPushV2", {"stack1", "c"}, &graph); // Push without Pop behind Enter. - AddNode("stack2", "StackV2", {}, {}, &graph); - AddNode("push_enter", "Enter", {"stack2"}, - {{"T", type}, {"frame_name", frame_name}}, &graph); - AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); + AddSimpleNode("stack2", "StackV2", {}, &graph); + AddEnterNode("enter_c", "frame_name", false, 1, {"c"}, &graph); + AddEnterNode("enter_stack2", "frame_name", false, 1, {"stack2"}, &graph); + AddSimpleNode("push2", "StackPushV2", {"enter_stack2", "enter_c"}, &graph); // Pop without consumer. - AddNode("stack3", "StackV2", {}, {}, &graph); - AddNode("push3", "StackPushV2", {"stack3", "c"}, {}, &graph); - AddNode("pop3", "StackPopV2", {"stack3"}, {}, &graph); + AddSimpleNode("stack3", "StackV2", {}, &graph); + AddSimpleNode("push3", "StackPushV2", {"stack3", "c"}, &graph); + AddSimpleNode("pop3", "StackPopV2", {"stack3"}, &graph); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(9, output.node_size()); + EXPECT_EQ(10, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); if (node.name() == "push1") { @@ -631,8 +541,8 @@ TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { } else if (node.name() == "push2") { EXPECT_EQ("Identity", node.op()); EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("c", node.input(0)); - EXPECT_EQ("^push_enter", node.input(1)); + EXPECT_EQ("enter_c", node.input(0)); + EXPECT_EQ("^enter_stack2", node.input(1)); } else if (node.name() == "push3") { EXPECT_EQ("Identity", node.op()); EXPECT_EQ(2, node.input_size()); -- GitLab From 143f3585ca78380891cb862880c14c4ca9d7b9fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 11:19:32 -0700 Subject: [PATCH 062/960] Broadcast Sub and Div from #17123 except for quantization. PiperOrigin-RevId: 189216312 --- tensorflow/contrib/lite/kernels/BUILD | 32 +++++ tensorflow/contrib/lite/kernels/div.cc | 73 +++++++---- tensorflow/contrib/lite/kernels/div_test.cc | 118 ++++++++++++++++++ .../internal/optimized/optimized_ops.h | 91 ++++++++++++++ .../internal/reference/reference_ops.h | 82 ++++++++++++ tensorflow/contrib/lite/kernels/sub.cc | 77 ++++++++---- tensorflow/contrib/lite/kernels/sub_test.cc | 117 +++++++++++++++++ .../testing/generated_examples_zip_test.cc | 18 ++- 8 files changed, 551 insertions(+), 57 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/div_test.cc create mode 100644 tensorflow/contrib/lite/kernels/sub_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 9c63269324..200cb3075b 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -269,6 +269,38 @@ tf_cc_test( ], ) +tf_cc_test( + name = "div_test", + size = "small", + srcs = ["div_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "sub_test", + size = "small", + srcs = ["sub_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "transpose_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc index 44bd0dc85d..6dd243ad62 100644 --- a/tensorflow/contrib/lite/kernels/div.cc +++ b/tensorflow/contrib/lite/kernels/div.cc @@ -37,7 +37,23 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; +struct OpData { + bool requires_broadcast; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -45,35 +61,47 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); - for (int i = 0; i < NumDimensions(input1); ++i) { - TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), - SizeOfDimension(input2, i)); - } + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = input2->type; + + data->requires_broadcast = !HaveSameShapes(input1, input2); - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - TF_LITE_ENSURE_EQ(context, input2->type, output->type); + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); return context->ResizeTensor(context, output, output_size); } template -void EvalDivFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteDivParams* params, TfLiteTensor* input1, - TfLiteTensor* input2, TfLiteTensor* output) { +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); -#define TF_LITE_DIV(type) \ - type::Div(GetTensorData(input1), GetTensorDims(input1), \ - GetTensorData(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData(output), GetTensorDims(output)) +#define TF_LITE_DIV(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) if (kernel_type == kReference) { - TF_LITE_DIV(reference_ops); + if (data->requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDiv); + } else { + TF_LITE_DIV(reference_ops, Div); + } } else { - TF_LITE_DIV(optimized_ops); + if (data->requires_broadcast) { + TF_LITE_DIV(optimized_ops, BroadcastDiv); + } else { + TF_LITE_DIV(optimized_ops, Div); + } } #undef TF_LITE_DIV } @@ -81,13 +109,14 @@ void EvalDivFloat(TfLiteContext* context, TfLiteNode* node, template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { - EvalDivFloat(context, node, params, input1, input2, output); + EvalFloat(context, node, params, data, input1, input2, output); } else { context->ReportError(context, "Inputs and outputs not all float types."); return kTfLiteError; @@ -99,19 +128,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace div TfLiteRegistration* Register_DIV_REF() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } TfLiteRegistration* Register_DIV_GENERIC_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } TfLiteRegistration* Register_DIV_NEON_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/div_test.cc b/tensorflow/contrib/lite/kernels/div_test.cc new file mode 100644 index 0000000000..276b8289fb --- /dev/null +++ b/tensorflow/contrib/lite/kernels/div_test.cc @@ -0,0 +1,118 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseDivOpModel : public SingleOpModel { + public: + BaseDivOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_DIV, BuiltinOptions_DivOptions, + CreateDivOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +TEST(FloatDivOpTest, NoActivation) { + FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.5, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.4, 1.0, 0.8, 1.6}))); +} + +TEST(FloatDivOpTest, ActivationRELU_N1_TO_1) { + FloatDivOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-1.0, 1.0, 0.8, 1.0}))); +} + +TEST(FloatDivOpTest, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.3, 0.8, 1.1, -2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.6, 0.5, -1.1, -0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-20.0, 1.0, 0.5, 1.6, -1.0, 20.0}))) + << "With shape number " << i; + } +} + +TEST(FloatDivOpTest, WithBroadcast) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.PopulateTensor(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.0, 2.0, 0.7, 0.8, 1.1, -1.23}))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 6bbc213cc6..edd65c9170 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2157,6 +2157,51 @@ inline void Div(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastDiv is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] / + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, @@ -2184,6 +2229,52 @@ inline void Sub(const float* input1_data, const Dims<4>& input1_dims, } } } + +// TODO(jiawen): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastSub is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index d3d15edf4c..527276f7bd 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1325,6 +1325,47 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, output_data, output_dims); } +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] / + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + inline void Div(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, @@ -1379,6 +1420,47 @@ inline void Sub(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index ddaf498d5b..c15a7a50a4 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -26,7 +26,7 @@ namespace ops { namespace builtin { namespace sub { -// This file has three implementation of Div. +// This file has three implementation of Sub. enum KernelType { kReference, kGenericOptimized, // Neon-free @@ -37,7 +37,23 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; +struct OpData { + bool requires_broadcast; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -45,49 +61,62 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); - for (int i = 0; i < NumDimensions(input1); ++i) { - TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), - SizeOfDimension(input2, i)); - } + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = input2->type; + + data->requires_broadcast = !HaveSameShapes(input1, input2); - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - TF_LITE_ENSURE_EQ(context, input2->type, output->type); + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); return context->ResizeTensor(context, output, output_size); } template -void EvalSubFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteSubParams* params, TfLiteTensor* input1, - TfLiteTensor* input2, TfLiteTensor* output) { +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteSubParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); -#define TF_LITE_Sub(type) \ - type::Sub(GetTensorData(input1), GetTensorDims(input1), \ - GetTensorData(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData(output), GetTensorDims(output)) +#define TF_LITE_SUB(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) if (kernel_type == kReference) { - TF_LITE_Sub(reference_ops); + if (data->requires_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastSub); + } else { + TF_LITE_SUB(reference_ops, Sub); + } } else { - TF_LITE_Sub(optimized_ops); + if (data->requires_broadcast) { + TF_LITE_SUB(optimized_ops, BroadcastSub); + } else { + TF_LITE_SUB(optimized_ops, Sub); + } } -#undef TF_LITE_Sub +#undef TF_LITE_SUB } template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { - EvalSubFloat(context, node, params, input1, input2, output); + EvalFloat(context, node, params, data, input1, input2, output); } else { context->ReportError(context, "Inputs and outputs not all float types."); return kTfLiteError; @@ -99,19 +128,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace sub TfLiteRegistration* Register_SUB_REF() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } TfLiteRegistration* Register_SUB_GENERIC_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } TfLiteRegistration* Register_SUB_NEON_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/sub_test.cc b/tensorflow/contrib/lite/kernels/sub_test.cc new file mode 100644 index 0000000000..fdbb4243bb --- /dev/null +++ b/tensorflow/contrib/lite/kernels/sub_test.cc @@ -0,0 +1,117 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseSubOpModel : public SingleOpModel { + public: + BaseSubOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions, + CreateSubOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatSubOpModel : public BaseSubOpModel { + public: + using BaseSubOpModel::BaseSubOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +TEST(FloatSubOpModel, NoActivation) { + FloatSubOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.1, 0.0, 1.4, -0.3}))); +} + +TEST(FloatSubOpModel, ActivationRELU_N1_TO_1) { + FloatSubOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-1.0, 0.0, 1.0, -0.3}))); +} + +TEST(FloatSubOpModel, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatSubOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5, -1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8, -1.1, 0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.1, 0.0, 1.4, -0.3, 0.0, 1.9}))) + << "With shape number " << i; + } +} + +TEST(FloatSubOpModel, WithBroadcast) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatSubOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5, -1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.5}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.5, -0.3, 1.2, 0.0, -1.6, 1.5}))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 86606d1239..88c5aaa099 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -47,10 +47,6 @@ tensorflow::Env* env = tensorflow::Env::Default(); // Key is a substring of the test name and value is a bug number. // TODO(ahentz): make sure we clean this list up frequently. std::map kBrokenTests = { - // Sub and Div don't support broadcasting. - {R"(^\/diva.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, - {R"(^\/suba.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, - // Add only supports float32. (and "constant" tests use Add) {R"(^\/adda.*int32)", "68808744"}, {R"(^\/constant.*int32)", "68808744"}, @@ -238,23 +234,25 @@ TEST_P(OpsTest, RunStuff) { INSTANTIATE_TESTS(add) INSTANTIATE_TESTS(avg_pool) -INSTANTIATE_TESTS(space_to_batch_nd) INSTANTIATE_TESTS(batch_to_space_nd) INSTANTIATE_TESTS(concat) INSTANTIATE_TESTS(constant) INSTANTIATE_TESTS(control_dep) INSTANTIATE_TESTS(conv) INSTANTIATE_TESTS(depthwiseconv) +INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(exp) INSTANTIATE_TESTS(fully_connected) INSTANTIATE_TESTS(fused_batch_norm) INSTANTIATE_TESTS(gather) INSTANTIATE_TESTS(global_batch_norm) -INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(l2_pool) +INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(local_response_norm) INSTANTIATE_TESTS(log_softmax) +INSTANTIATE_TESTS(lstm) INSTANTIATE_TESTS(max_pool) +INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(mul) INSTANTIATE_TESTS(pad) INSTANTIATE_TESTS(relu) @@ -264,15 +262,13 @@ INSTANTIATE_TESTS(reshape) INSTANTIATE_TESTS(resize_bilinear) INSTANTIATE_TESTS(sigmoid) INSTANTIATE_TESTS(softmax) +INSTANTIATE_TESTS(space_to_batch_nd) INSTANTIATE_TESTS(space_to_depth) -INSTANTIATE_TESTS(sub) INSTANTIATE_TESTS(split) -INSTANTIATE_TESTS(div) -INSTANTIATE_TESTS(transpose) -INSTANTIATE_TESTS(lstm) -INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(squeeze) INSTANTIATE_TESTS(strided_slice) +INSTANTIATE_TESTS(sub) +INSTANTIATE_TESTS(transpose) } // namespace testing } // namespace tflite -- GitLab From f465d7467ef5488aca1b4e620c27d4016c774ef8 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Thu, 15 Mar 2018 11:22:58 -0700 Subject: [PATCH 063/960] [XLA:CPU] Fix the parallel task assignment to not parallelize dot operations. The IR emitter currently generates incorrect code for parallelized dot operations. Add test cases to check for dot operation parallelization. PiperOrigin-RevId: 189216963 --- tensorflow/compiler/xla/service/cpu/BUILD | 25 ++++++ .../xla/service/cpu/dot_op_emitter.cc | 5 ++ .../service/cpu/parallel_task_assignment.cc | 19 ++--- .../cpu/parallel_task_assignment_test.cc | 84 +++++++++++++++++++ 4 files changed, 123 insertions(+), 10 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 91ae66ece1..093db020c0 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -772,6 +772,31 @@ cc_library( ], ) +tf_cc_test( + name = "parallel_task_assignment_test", + srcs = ["parallel_task_assignment_test.cc"], + deps = [ + ":cpu_executable", + ":parallel_task_assignment", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_layout", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:algebraic_simplifier", + "//tensorflow/compiler/xla/service:computation_layout", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + cc_library( name = "cpu_options", srcs = ["cpu_options.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 6f06256e08..8b1e20d79e 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -715,6 +715,11 @@ tensorflow::Status DotOpEmitter::Emit() { // which performs the sum-of-products (the reduction loop) before storing // the result in the output buffer. + // This routine assumes that the dot operation is not in a parallelized + // enclosing computation. + CHECK( + dot_.parent()->root_instruction()->outer_dimension_partitions().empty()); + const Shape& lhs_shape = lhs_array_.GetShape(); const Shape& rhs_shape = rhs_array_.GetShape(); diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc index deb21bf4ef..38f1668159 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc @@ -130,22 +130,21 @@ int64 ParallelTaskAssignment::GetTargetParallelTaskCount( // *) Emit custom loops (kSelectAndScatter, FusionKind::kTransposeDot). // *) Tuple-shaped. // TODO(b/27458679) Parallelize instructions which are skipped here. - if (instruction->opcode() == HloOpcode::kParameter || - instruction->opcode() == HloOpcode::kConstant || - instruction->opcode() == HloOpcode::kCall || - instruction->opcode() == HloOpcode::kCustomCall || - instruction->opcode() == HloOpcode::kSelectAndScatter || - instruction->opcode() == HloOpcode::kGetTupleElement || - instruction->opcode() == HloOpcode::kBitcast || - instruction->opcode() == HloOpcode::kFft || - (instruction->opcode() == HloOpcode::kConvolution && + auto opcode = instruction->opcode(); + if (opcode == HloOpcode::kParameter || opcode == HloOpcode::kConstant || + opcode == HloOpcode::kCall || opcode == HloOpcode::kCustomCall || + opcode == HloOpcode::kDot || opcode == HloOpcode::kSelectAndScatter || + opcode == HloOpcode::kGetTupleElement || opcode == HloOpcode::kBitcast || + opcode == HloOpcode::kFft || + (opcode == HloOpcode::kConvolution && PotentiallyImplementedAsEigenConvolution(*instruction)) || PotentiallyImplementedAsEigenDot(*instruction) || - (instruction->opcode() == HloOpcode::kFusion && + (opcode == HloOpcode::kFusion && instruction->fusion_kind() != HloInstruction::FusionKind::kLoop) || ShapeUtil::IsTuple(instruction->shape())) { return 1; } + // Consult 'cost_model_' to compute target parallel task count. return cost_model_->GetParallelTaskCount(instruction); } diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc new file mode 100644 index 0000000000..79b00135c6 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/parallel_task_assignment.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_executable.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" + +namespace xla { +namespace { + +class ParallelTaskAssignmentTest : public HloVerifiedTestBase { + protected: + const HloCostAnalysis::ShapeSizeFunction shape_size_func_ = + cpu::CpuExecutable::ShapeSizeBytes; + + // Use any value larger than 2 since we only test whether a module is + // parallelized or not + const int max_parallelism_ = 10; +}; + +TEST_F(ParallelTaskAssignmentTest, DotOperationNotParallelized) { + const string hlo_string = R"( + HloModule TestTaskParallel_Dot + ENTRY Dot { + dot_lhs = f32[196614,2]{1,0} parameter(0) + dot_rhs = f32[2,1]{1,0} parameter(1) + ROOT dot = f32[196614,1]{1,0} dot(dot_lhs, dot_rhs), + lhs_contracting_dims={1}, rhs_contracting_dims={0} + } + )"; + + ParseAndVerifyModule(hlo_string.c_str()); + TF_ASSERT_OK_AND_ASSIGN(bool changed, cpu::ParallelTaskAssigner( + max_parallelism_, shape_size_func_) + .Run(&module())); + EXPECT_FALSE(changed); +} + +TEST_F(ParallelTaskAssignmentTest, + FusedComputationWithDotOperationNotParallelized) { + const string hlo_string = R"( + HloModule TestTaskParallel_DotNestedInFusedComp + fused_computation.0 { + parameter.0 = f32[196614,2]{1,0} parameter(0) + parameter.0.1 = f32[2,1]{1,0} parameter(1) + parameter.0.2 = f32[196614,1]{1,0} parameter(2) + dot.0 = f32[196614,1]{1,0} dot(parameter.0, parameter.0.1), + lhs_contracting_dims={1}, rhs_contracting_dims={0} + ROOT add.0 = f32[196614,1]{1,0} add(dot.0, parameter.0.2) + + } + ENTRY DotNestedInFusedComp { + parameter = f32[196614,2]{1,0} parameter(0) + parameter.1 = f32[2,1]{1,0} parameter(1) + parameter.2 = f32[196614,1]{1,0} parameter(2) + ROOT fusion = f32[196614,1]{1,0} fusion(parameter, parameter.1, + parameter.2), kind=kOutput, calls=fused_computation.0 + } + )"; + + ParseAndVerifyModule(hlo_string.c_str()); + TF_ASSERT_OK_AND_ASSIGN(bool changed, cpu::ParallelTaskAssigner( + max_parallelism_, shape_size_func_) + .Run(&module())); + EXPECT_FALSE(changed); +} + +} // namespace +} // namespace xla -- GitLab From b4633736a9a2acd37cc6ec59644d40cb1a2b541b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 11:24:46 -0700 Subject: [PATCH 064/960] Prevent fusing activation functions that would remove non-discardable arrays. PiperOrigin-RevId: 189217360 --- .../fuse_activation_functions.cc | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc index ab943f72d1..c5ce3fcd95 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc @@ -42,9 +42,9 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { if (CountTrueOutputs(*model, *op) > 1) { AddMessageF( - "Not fusing activation function into %s because it has more than one " - " consumed output", - LogName(*op)); + "Not fusing activation function %s into %s because it has more than " + "one consumed output", + LogName(*ac_op), LogName(*op)); return false; } @@ -56,22 +56,31 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing activation function into %s because it is consumed by more " "than 1 other operator", - LogName(*op)); + LogName(*ac_op), LogName(*op)); + return false; + } + + if (!IsDiscardableArray(*model, op->outputs[0])) { + AddMessageF( + "Not fusing activation function %s into %s because output %s it is not " + "discardable", + LogName(*ac_op), LogName(*op), op->outputs[0]); return false; } if (op->fused_activation_function != FusedActivationFunctionType::kNone) { AddMessageF( - "Not fusing activation function into %s because it already has a fused " - "activation function", - LogName(*op)); + "Not fusing activation function %s into %s because it already has a " + "fused activation function", + LogName(*ac_op), LogName(*op)); return false; } if (!OperatorSupportsFusedActivation(op->type)) { AddMessageF( - "Not fusing activation function because the %s op doesn't support it", - LogName(*op)); + "Not fusing activation function %s because the %s op doesn't support " + "it", + LogName(*ac_op), LogName(*op)); return false; } -- GitLab From 7e277054400e913af05672a2f6e3519f5b00873a Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 15 Mar 2018 11:53:00 -0700 Subject: [PATCH 065/960] [TF:XLA] Don't follow control dependencies for const analysis. Those inputs are not actually required to be constants. PiperOrigin-RevId: 189222308 --- tensorflow/compiler/tf2xla/const_analysis.cc | 6 ++++-- .../compiler/tf2xla/const_analysis_test.cc | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 6f46532419..de1008803d 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -55,8 +55,10 @@ Status BackwardsConstAnalysis(const Graph& g, compile_time_const_args->at(index) = true; return; } - for (const Node* pred : node->in_nodes()) { - must_be_const.insert(pred); + for (const Edge* pred : node->in_edges()) { + if (!pred->IsControlEdge()) { + must_be_const.insert(pred->src()); + } } return; } diff --git a/tensorflow/compiler/tf2xla/const_analysis_test.cc b/tensorflow/compiler/tf2xla/const_analysis_test.cc index 9d125f8d49..992b12c06d 100644 --- a/tensorflow/compiler/tf2xla/const_analysis_test.cc +++ b/tensorflow/compiler/tf2xla/const_analysis_test.cc @@ -79,5 +79,24 @@ TEST(ConstAnalysisTest, TopologicalOrder) { } } +TEST(ConstAnalysisTest, DontFollowControlDependencies) { + Scope root = Scope::NewRootScope(); + + Output arg0 = ops::_Arg(root.WithOpName("Arg0"), DT_INT32, 0); + Output arg1 = ops::_Arg(root.WithOpName("Arg1"), DT_INT32, 1); + Output c1 = + ops::Const(root.WithOpName("c1").WithControlDependencies(arg0), 1, {1}); + Output add = ops::Add(root, arg1, c1); + Output reshape = ops::Reshape(root, arg1, add); + + Graph graph(OpRegistry::Global()); + TF_ASSERT_OK(root.ToGraph(&graph)); + + std::vector const_args(2, false); + TF_ASSERT_OK(BackwardsConstAnalysis(graph, &const_args)); + + EXPECT_EQ(const_args, std::vector({false, true})); +} + } // namespace } // namespace tensorflow -- GitLab From 9d0640e68e07a65eb315e75e6aa73eb84d60dcf4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 12:06:24 -0700 Subject: [PATCH 066/960] Automated g4 rollback of changelist 189110935 PiperOrigin-RevId: 189224522 --- tensorflow/cc/framework/cc_op_gen.cc | 1 + tensorflow/core/lib/core/stringpiece.cc | 5 -- tensorflow/core/lib/core/stringpiece.h | 6 -- tensorflow/core/lib/core/stringpiece_test.cc | 70 ------------------- tensorflow/core/lib/hash/hash.h | 1 + tensorflow/core/lib/hash/hash_test.cc | 73 ++++++++++++++++++++ 6 files changed, 75 insertions(+), 81 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index 39893f5ccd..d73121c7b7 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/types.pb_text.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 29b727fc44..5bd79778a6 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -17,14 +17,9 @@ limitations under the License. #include #include -#include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { -size_t StringPieceHasher::operator()(StringPiece s) const { - return Hash64(s.data(), s.size()); -} - std::ostream& operator<<(std::ostream& o, StringPiece piece) { o.write(piece.data(), piece.size()); return o; diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index caa9642774..910e4d9e2a 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -35,8 +35,6 @@ limitations under the License. namespace tensorflow { -struct StringPieceHasher; - class StringPiece { public: typedef size_t size_type; @@ -131,10 +129,6 @@ class StringPiece { // Intentionally copyable }; -struct StringPieceHasher { - size_t operator()(StringPiece s) const; -}; - inline bool operator==(StringPiece x, StringPiece y) { return ((x.size() == y.size()) && (memcmp(x.data(), y.data(), x.size()) == 0)); diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc index 8f17b85b6d..d0dbeb6072 100644 --- a/tensorflow/core/lib/core/stringpiece_test.cc +++ b/tensorflow/core/lib/core/stringpiece_test.cc @@ -65,74 +65,4 @@ TEST(StringPiece, Contains) { EXPECT_TRUE(!a.contains(d)); } -TEST(StringPieceHasher, Equality) { - StringPieceHasher hasher; - - StringPiece s1("foo"); - StringPiece s2("bar"); - StringPiece s3("baz"); - StringPiece s4("zot"); - - EXPECT_TRUE(hasher(s1) != hasher(s2)); - EXPECT_TRUE(hasher(s1) != hasher(s3)); - EXPECT_TRUE(hasher(s1) != hasher(s4)); - EXPECT_TRUE(hasher(s2) != hasher(s3)); - EXPECT_TRUE(hasher(s2) != hasher(s4)); - EXPECT_TRUE(hasher(s3) != hasher(s4)); - - EXPECT_TRUE(hasher(s1) == hasher(s1)); - EXPECT_TRUE(hasher(s2) == hasher(s2)); - EXPECT_TRUE(hasher(s3) == hasher(s3)); - EXPECT_TRUE(hasher(s4) == hasher(s4)); -} - -TEST(StringPieceHasher, HashMap) { - string s1("foo"); - string s2("bar"); - string s3("baz"); - - StringPiece p1(s1); - StringPiece p2(s2); - StringPiece p3(s3); - - std::unordered_map map; - - map.insert(std::make_pair(p1, 0)); - map.insert(std::make_pair(p2, 1)); - map.insert(std::make_pair(p3, 2)); - EXPECT_EQ(map.size(), 3); - - bool found[3] = {false, false, false}; - for (auto const& val : map) { - int x = val.second; - EXPECT_TRUE(x >= 0 && x < 3); - EXPECT_TRUE(!found[x]); - found[x] = true; - } - EXPECT_EQ(found[0], true); - EXPECT_EQ(found[1], true); - EXPECT_EQ(found[2], true); - - auto new_iter = map.find("zot"); - EXPECT_TRUE(new_iter == map.end()); - - new_iter = map.find("bar"); - EXPECT_TRUE(new_iter != map.end()); - - map.erase(new_iter); - EXPECT_EQ(map.size(), 2); - - found[0] = false; - found[1] = false; - found[2] = false; - for (const auto& iter : map) { - int x = iter.second; - EXPECT_TRUE(x >= 0 && x < 3); - EXPECT_TRUE(!found[x]); - found[x] = true; - } - EXPECT_EQ(found[0], true); - EXPECT_EQ(found[1], false); - EXPECT_EQ(found[2], true); -} } // namespace tensorflow diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h index 77b8031598..ca05e6346e 100644 --- a/tensorflow/core/lib/hash/hash.h +++ b/tensorflow/core/lib/hash/hash.h @@ -76,6 +76,7 @@ struct hash { return static_cast(Hash64(sp.data(), sp.size())); } }; +using StringPieceHasher = ::tensorflow::hash; template struct hash> { diff --git a/tensorflow/core/lib/hash/hash_test.cc b/tensorflow/core/lib/hash/hash_test.cc index 0e5f6c6803..7d58313132 100644 --- a/tensorflow/core/lib/hash/hash_test.cc +++ b/tensorflow/core/lib/hash/hash_test.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include #include #include "tensorflow/core/lib/hash/hash.h" @@ -81,4 +83,75 @@ static void BM_Hash32(int iters, int len) { } BENCHMARK(BM_Hash32)->Range(1, 1024); +TEST(StringPieceHasher, Equality) { + StringPieceHasher hasher; + + StringPiece s1("foo"); + StringPiece s2("bar"); + StringPiece s3("baz"); + StringPiece s4("zot"); + + EXPECT_TRUE(hasher(s1) != hasher(s2)); + EXPECT_TRUE(hasher(s1) != hasher(s3)); + EXPECT_TRUE(hasher(s1) != hasher(s4)); + EXPECT_TRUE(hasher(s2) != hasher(s3)); + EXPECT_TRUE(hasher(s2) != hasher(s4)); + EXPECT_TRUE(hasher(s3) != hasher(s4)); + + EXPECT_TRUE(hasher(s1) == hasher(s1)); + EXPECT_TRUE(hasher(s2) == hasher(s2)); + EXPECT_TRUE(hasher(s3) == hasher(s3)); + EXPECT_TRUE(hasher(s4) == hasher(s4)); +} + +TEST(StringPieceHasher, HashMap) { + string s1("foo"); + string s2("bar"); + string s3("baz"); + + StringPiece p1(s1); + StringPiece p2(s2); + StringPiece p3(s3); + + std::unordered_map map; + + map.insert(std::make_pair(p1, 0)); + map.insert(std::make_pair(p2, 1)); + map.insert(std::make_pair(p3, 2)); + EXPECT_EQ(map.size(), 3); + + bool found[3] = {false, false, false}; + for (auto const& val : map) { + int x = val.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], true); + EXPECT_EQ(found[2], true); + + auto new_iter = map.find("zot"); + EXPECT_TRUE(new_iter == map.end()); + + new_iter = map.find("bar"); + EXPECT_TRUE(new_iter != map.end()); + + map.erase(new_iter); + EXPECT_EQ(map.size(), 2); + + found[0] = false; + found[1] = false; + found[2] = false; + for (const auto& iter : map) { + int x = iter.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], false); + EXPECT_EQ(found[2], true); +} + } // namespace tensorflow -- GitLab From 5d624aa437d3541d35e42482ee649c005f7fd5b5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 15 Mar 2018 12:15:07 -0700 Subject: [PATCH 067/960] Clarify that in_nodes and in_edges includes control edges. PiperOrigin-RevId: 189225717 --- tensorflow/core/graph/graph.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index cbd58b051a..f7ca7d0620 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -124,7 +124,8 @@ class Node { // Inputs requested by the NodeDef. For the actual inputs, use in_edges. const protobuf::RepeatedPtrField& requested_inputs() const; - // Get the neighboring nodes via edges either in or out of this node. + // Get the neighboring nodes via edges either in or out of this node. This + // includes control edges. gtl::iterator_range in_nodes() const; gtl::iterator_range out_nodes() const; const EdgeSet& in_edges() const { return in_edges_; } -- GitLab From 0f508d4de379e800ad7f990de08959bbd6fcabb5 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 15 Mar 2018 12:33:10 -0700 Subject: [PATCH 068/960] Warn when creating a `tf.InteractiveSession` if another is active. Fixes #13202 (as far as possible without breaking backwards compatibility). PiperOrigin-RevId: 189228094 --- tensorflow/python/client/session.py | 19 +++++++++++++++ tensorflow/python/client/session_test.py | 31 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 924d62992a..29f06c8f22 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -21,6 +21,7 @@ from __future__ import print_function import functools import re import threading +import warnings import numpy as np @@ -1624,6 +1625,9 @@ class InteractiveSession(BaseSession): ``` """ + _count_lock = threading.Lock() + _active_session_count = 0 # GUARDED_BY(_count_lock) + def __init__(self, target='', graph=None, config=None): """Creates a new interactive TensorFlow session. @@ -1652,6 +1656,15 @@ class InteractiveSession(BaseSession): config.graph_options.place_pruned_graph = True super(InteractiveSession, self).__init__(target, graph, config) + with InteractiveSession._count_lock: + if InteractiveSession._active_session_count > 0: + warnings.warn('An interactive session is already active. This can ' + 'cause out-of-memory errors in some cases. You must ' + 'explicitly call `InteractiveSession.close()` to release ' + 'resources held by the other session(s).') + InteractiveSession._active_session_count += 1 + self._closed = False + self._default_session = self.as_default() self._default_session.enforce_nesting = False self._default_session.__enter__() @@ -1664,6 +1677,12 @@ class InteractiveSession(BaseSession): def close(self): """Closes an `InteractiveSession`.""" super(InteractiveSession, self).close() + with InteractiveSession._count_lock: + if not self._closed: + InteractiveSession._active_session_count -= 1 + self._closed = True + else: + return if self._explicit_graph is not None: self._default_graph.__exit__(None, None, None) self._default_session.__exit__(None, None, None) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 781725d63b..6c7339f3d8 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -22,6 +22,7 @@ import os import sys import threading import time +import warnings import numpy as np import six @@ -66,6 +67,10 @@ ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape) # @test_util.with_c_api class SessionTest(test_util.TensorFlowTestCase): + def setUp(self): + super(SessionTest, self).setUp() + warnings.simplefilter('always') + def testUseExistingGraph(self): with ops.Graph().as_default() as g, ops.device('/cpu:0'): a = constant_op.constant(6.0, shape=[1, 1]) @@ -1191,6 +1196,32 @@ class SessionTest(test_util.TensorFlowTestCase): self.assertAllEqual([[24.0]], e.eval()) sess.close() + def testMultipleInteractiveSessionsWarning(self): + # Reinitialize the global state to ensure that the expected warnings will + # be emitted. + session.InteractiveSession._active_session_count = 0 # pylint: disable=protected-access + + sess = session.InteractiveSession() + sess.close() + # Opening and closing interactive sessions serially should not warn. + with warnings.catch_warnings(record=True) as w: + sess = session.InteractiveSession() + sess.close() + self.assertEqual(0, len(w)) + + with warnings.catch_warnings(record=True) as w: + sess = session.InteractiveSession() + self.assertEqual(0, len(w)) + with warnings.catch_warnings(record=True) as w: + sess2 = session.InteractiveSession() + self.assertEqual(1, len(w)) + self.assertTrue('An interactive session is already active. This can cause ' + 'out-of-memory errors in some cases. You must explicitly ' + 'call `InteractiveSession.close()` to release resources ' + 'held by the other session(s).' in str(w[0].message)) + sess2.close() + sess.close() + def testInteractivePlacePrunedGraph(self): sess = session.InteractiveSession() -- GitLab From d696214ab7d1565850702959983fa189d0bfe50c Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 15 Mar 2018 12:43:01 -0700 Subject: [PATCH 069/960] Remove underscore prefix from broadcast_gradient_args op. PiperOrigin-RevId: 189229472 --- .../contrib/nn/python/ops/scaled_softplus.py | 4 +- tensorflow/python/framework/python_op_gen.cc | 4 +- .../python/kernel_tests/basic_gpu_test.py | 4 +- .../python/kernel_tests/bcast_ops_test.py | 4 +- tensorflow/python/ops/math_grad.py | 50 ++++++------------- 5 files changed, 20 insertions(+), 46 deletions(-) diff --git a/tensorflow/contrib/nn/python/ops/scaled_softplus.py b/tensorflow/contrib/nn/python/ops/scaled_softplus.py index fcbfbc239c..7184ef2b66 100644 --- a/tensorflow/contrib/nn/python/ops/scaled_softplus.py +++ b/tensorflow/contrib/nn/python/ops/scaled_softplus.py @@ -30,9 +30,7 @@ def _reduce_and_reshape_grad(g, t): """Returns the gradient, sum-reduced and reshaped to `t`'s shape.""" shape = array_ops.shape(t) g_shape = array_ops.shape(g) - # pylint: disable=protected-access - bcast_dims, _ = gen_array_ops._broadcast_gradient_args(shape, g_shape) - # pylint: enable=protected-access + bcast_dims, _ = gen_array_ops.broadcast_gradient_args(shape, g_shape) return array_ops.reshape(math_ops.reduce_sum(g, bcast_dims), shape) diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 846e0c356c..03721c9a68 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -98,9 +98,7 @@ bool IsOpWithUnderscorePrefix(const string& s) { // TODO(annarev): reduce usage of '*' imports and remove these from the // list. "fused_batch_norm", "histogram_fixed_width", "stack", - "batch_norm_with_global_normalization", - // TODO(annarev): replace these ops in the next change. - "broadcast_gradient_args"}); + "batch_norm_with_global_normalization"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py index 405651e8ae..987a6ffcd4 100644 --- a/tensorflow/python/kernel_tests/basic_gpu_test.py +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -33,7 +33,7 @@ from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables -from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args +from tensorflow.python.ops.gen_array_ops import broadcast_gradient_args from tensorflow.python.platform import test @@ -157,7 +157,7 @@ class BroadcastSimpleTest(test.TestCase): def _GetGradientArgs(self, xs, ys): with self.test_session(use_gpu=True) as sess: - return sess.run(_broadcast_gradient_args(xs, ys)) + return sess.run(broadcast_gradient_args(xs, ys)) def testBroadcast(self): r0, r1 = self._GetGradientArgs([2, 3, 5], [1]) diff --git a/tensorflow/python/kernel_tests/bcast_ops_test.py b/tensorflow/python/kernel_tests/bcast_ops_test.py index cb46fcb007..3305e55c05 100644 --- a/tensorflow/python/kernel_tests/bcast_ops_test.py +++ b/tensorflow/python/kernel_tests/bcast_ops_test.py @@ -20,8 +20,8 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args from tensorflow.python.ops.gen_array_ops import broadcast_args +from tensorflow.python.ops.gen_array_ops import broadcast_gradient_args from tensorflow.python.platform import test @@ -33,7 +33,7 @@ class BcastOpsTest(test.TestCase): def _GetGradientArgs(self, xs, ys): with self.test_session() as sess: - return sess.run(_broadcast_gradient_args(xs, ys)) + return sess.run(broadcast_gradient_args(xs, ys)) def testBasic(self): r = self._GetBroadcastShape([2, 3, 5], [1]) diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index d220fe3cce..eb33687cb5 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -620,9 +620,7 @@ def _IgammaGrad(op, grad): x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) - # pylint: disable=protected-access - unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx) - # pylint: enable=protected-access + unused_ra, rx = gen_array_ops.broadcast_gradient_args(sa, sx) # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. @@ -649,9 +647,7 @@ def _BetaincGrad(op, grad): # versa; so its sufficient to check against shape(a). sa = array_ops.shape(a) sx = array_ops.shape(x) - # pylint: disable=protected-access - _, rx = gen_array_ops._broadcast_gradient_args(sa, sx) - # pylint: enable=protected-access + _, rx = gen_array_ops.broadcast_gradient_args(sa, sx) # Perform operations in log space before summing, because terms # can grow large. @@ -677,9 +673,7 @@ def _ZetaGrad(op, grad): # Broadcast gradients sx = array_ops.shape(x) sq = array_ops.shape(q) - # pylint: disable=protected-access - unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq) - # pylint: enable=protected-access + unused_rx, rq = gen_array_ops.broadcast_gradient_args(sx, sq) # Evaluate gradient with ops.control_dependencies([grad]): x = math_ops.conj(x) @@ -699,9 +693,7 @@ def _PolygammaGrad(op, grad): # Broadcast gradients sn = array_ops.shape(n) sx = array_ops.shape(x) - # pylint: disable=protected-access - unused_rn, rx = gen_array_ops._broadcast_gradient_args(sn, sx) - # pylint: enable=protected-access + unused_rn, rx = gen_array_ops.broadcast_gradient_args(sn, sx) # Evaluate gradient with ops.control_dependencies([grad]): n = math_ops.conj(n) @@ -841,9 +833,7 @@ def _AddGrad(op, grad): return grad, grad sx = array_ops.shape(x) sy = array_ops.shape(y) - # pylint: disable=protected-access - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx), array_ops.reshape(math_ops.reduce_sum(grad, ry), sy)) @@ -858,9 +848,7 @@ def _SubGrad(op, grad): return grad, -grad sx = array_ops.shape(x) sy = array_ops.shape(y) - # pylint: disable=protected-access - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(grad, rx), sx), array_ops.reshape(-math_ops.reduce_sum(grad, ry), sy)) @@ -870,7 +858,6 @@ def _MulGrad(op, grad): """The gradient of scalar multiplication.""" x = op.inputs[0] y = op.inputs[1] - # pylint: disable=protected-access if (isinstance(grad, ops.Tensor) and _ShapesFullySpecifiedAndEqual(x, y, grad) and grad.dtype in (dtypes.int32, dtypes.float32)): @@ -878,14 +865,13 @@ def _MulGrad(op, grad): assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape( math_ops.reduce_sum(gen_math_ops.mul(grad, y), rx), sx), array_ops.reshape( math_ops.reduce_sum(gen_math_ops.mul(x, grad), ry), sy)) - # pylint: enable=protected-access @ops.RegisterGradient("Div") @@ -895,9 +881,7 @@ def _DivGrad(op, grad): y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) - # pylint: disable=protected-access - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx), @@ -920,9 +904,7 @@ def _FloorModGrad(op, grad): sx = array_ops.shape(x) sy = array_ops.shape(y) - # pylint: disable=protected-access - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) floor_xy = math_ops.floor_div(x, y) gx = array_ops.reshape(math_ops.reduce_sum(grad, rx), sx) gy = array_ops.reshape( @@ -942,9 +924,7 @@ def _RealDivGrad(op, grad): y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) - # pylint: disable=protected-access - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape( @@ -962,7 +942,7 @@ def _PowGrad(op, grad): z = op.outputs[0] sx = array_ops.shape(x) sy = array_ops.shape(y) - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) z = math_ops.conj(z) @@ -990,7 +970,7 @@ def _MaximumMinimumGrad(op, grad, selector_op): gradshape = array_ops.shape(grad) zeros = array_ops.zeros(gradshape, gdtype) xmask = selector_op(x, y) - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) xgrad = array_ops.where(xmask, grad, zeros) ygrad = array_ops.where(xmask, zeros, grad) gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) @@ -1017,9 +997,7 @@ def _SquaredDifferenceGrad(op, grad): y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) - # pylint: disable=protected-access - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) with ops.control_dependencies([grad]): # The parens ensure that if grad is IndexedSlices, it'll get multiplied by # Tensor (not a number like 2.0) which causes it to convert to Tensor. @@ -1183,7 +1161,7 @@ def _ComplexGrad(op, grad): y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) - rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) + rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) return (array_ops.reshape(math_ops.reduce_sum(math_ops.real(grad), rx), sx), array_ops.reshape(math_ops.reduce_sum(math_ops.imag(grad), ry), sy)) -- GitLab From 61032e9ca7bf9849cb65db9b646381d124080856 Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Thu, 15 Mar 2018 12:54:42 -0700 Subject: [PATCH 070/960] Small code readability improvement. PiperOrigin-RevId: 189231130 --- tensorflow/python/feature_column/feature_column.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 381153c66a..7d99fcb3e7 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -1889,12 +1889,12 @@ class _LazyBuilder(object): self._feature_tensors[key] = feature_tensor return feature_tensor - if not isinstance(key, (str, _FeatureColumn)): - raise TypeError('"key" must be either a "str" or "_FeatureColumn". ' - 'Provided: {}'.format(key)) + if isinstance(key, str): + raise ValueError('Feature {} is not in features dictionary.'.format(key)) if not isinstance(key, _FeatureColumn): - raise ValueError('Feature {} is not in features dictionary.'.format(key)) + raise TypeError('"key" must be either a "str" or "_FeatureColumn". ' + 'Provided: {}'.format(key)) column = key logging.debug('Transforming feature_column %s.', column) -- GitLab From ccd8079e579604547f4b4d8a6b061cfdc6ce49bf Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 15 Mar 2018 12:58:08 -0700 Subject: [PATCH 071/960] Merge changes from github. PiperOrigin-RevId: 189231636 --- README.md | 4 + SECURITY.md | 14 +- configure.py | 7 +- .../xla/service/generic_transfer_manager.cc | 9 +- .../compiler/xla/tests/convolution_test.cc | 2 +- tensorflow/contrib/BUILD | 10 +- tensorflow/contrib/__init__.py | 6 +- .../boosted_trees/lib/utils/batch_features.h | 6 +- tensorflow/contrib/cmake/README.md | 12 +- tensorflow/contrib/cmake/external/grpc.cmake | 1 + .../contrib/cmake/external/protobuf.cmake | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 4 + tensorflow/contrib/data/__init__.py | 4 + .../contrib/data/python/kernel_tests/BUILD | 17 + .../kernel_tests/slide_dataset_op_test.py | 242 +++ tensorflow/contrib/data/python/ops/BUILD | 1 + tensorflow/contrib/data/python/ops/sliding.py | 102 ++ tensorflow/contrib/distributions/BUILD | 2 + tensorflow/contrib/eager/python/BUILD | 5 +- .../python/examples/linear_regression/BUILD | 1 + tensorflow/contrib/factorization/BUILD | 5 +- .../contrib/ffmpeg/default/ffmpeg_lib.cc | 9 +- tensorflow/contrib/gan/BUILD | 1 + tensorflow/contrib/kafka/BUILD | 108 +- .../kafka/kernels/kafka_dataset_ops.cc | 4 +- tensorflow/contrib/kafka/ops/dataset_ops.cc | 44 + .../kafka/python/ops/kafka_dataset_ops.py | 9 +- .../kafka/python/ops/kafka_op_loader.py | 24 + .../contrib/kfac/python/kernel_tests/BUILD | 1 + tensorflow/contrib/labeled_tensor/BUILD | 1 + tensorflow/contrib/layers/BUILD | 2 + .../layers/python/layers/embedding_ops.py | 2 +- tensorflow/contrib/learn/BUILD | 12 +- .../learn/python/learn/ops/embeddings_ops.py | 2 +- tensorflow/contrib/lite/Makefile | 9 +- tensorflow/contrib/lite/arena_planner.h | 2 +- tensorflow/contrib/lite/build_rpi_lib.sh | 22 + tensorflow/contrib/lite/builtin_ops.h | 2 +- tensorflow/contrib/lite/error_reporter.h | 2 +- tensorflow/contrib/lite/g3doc/rpi.md | 50 + tensorflow/contrib/lite/interpreter.h | 2 +- tensorflow/contrib/lite/interpreter_test.cc | 2 +- tensorflow/contrib/lite/kernels/conv.cc | 2 +- .../contrib/lite/kernels/depthwise_conv.cc | 2 +- .../contrib/lite/kernels/fully_connected.cc | 2 +- tensorflow/contrib/lite/kernels/kernel_util.h | 2 +- .../contrib/lite/kernels/lsh_projection.cc | 2 +- tensorflow/contrib/lite/kernels/lstm.cc | 6 +- tensorflow/contrib/lite/kernels/reshape.cc | 12 +- .../contrib/lite/kernels/reshape_test.cc | 2 +- tensorflow/contrib/lite/kernels/test_util.cc | 4 +- .../kernels/unidirectional_sequence_lstm.cc | 2 +- tensorflow/contrib/lite/memory_planner.h | 4 +- tensorflow/contrib/lite/model.h | 2 +- .../contrib/lite/nnapi/NeuralNetworksShim.h | 2 +- tensorflow/contrib/lite/rpi_makefile.inc | 33 + .../schema/builtin_ops_header/generator.cc | 2 +- .../contrib/lite/simple_memory_arena.cc | 6 +- tensorflow/contrib/lite/simple_memory_arena.h | 6 +- tensorflow/contrib/lookup/BUILD | 1 + tensorflow/contrib/makefile/README.md | 2 + tensorflow/contrib/makefile/build_all_ios.sh | 3 +- tensorflow/contrib/mpi/mpi_utils.h | 2 + .../contrib/predictor/predictor_factories.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 2 + .../contrib/py2tf/converters/single_return.py | 2 +- tensorflow/contrib/py2tf/utils/BUILD | 1 + .../quantize/python/fold_batch_norms.py | 4 +- .../contrib/quantize/python/quant_ops.py | 4 +- .../contrib/quantize/python/quantize.py | 2 +- .../contrib/quantize/python/quantize_graph.py | 2 +- .../python/quantize_parameterized_test.py | 8 +- .../contrib/quantize/python/quantize_test.py | 2 +- .../contrib/remote_fused_graph/pylib/BUILD | 1 - tensorflow/contrib/rnn/python/ops/rnn_cell.py | 6 +- tensorflow/contrib/saved_model/BUILD | 1 + .../seq2seq/python/ops/beam_search_decoder.py | 7 +- tensorflow/contrib/session_bundle/BUILD | 1 + .../contrib/slim/python/slim/data/BUILD | 1 + tensorflow/contrib/tensor_forest/BUILD | 1 - tensorflow/contrib/tensorboard/BUILD | 1 + tensorflow/contrib/tensorrt/BUILD | 2 + tensorflow/contrib/tensorrt/README.md | 23 +- tensorflow/contrib/tensorrt/__init__.py | 18 +- .../contrib/tensorrt/convert/convert_graph.cc | 256 ++- .../contrib/tensorrt/convert/convert_graph.h | 8 +- .../contrib/tensorrt/convert/convert_nodes.cc | 1481 ++++++++++++++--- .../contrib/tensorrt/convert/convert_nodes.h | 53 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 11 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 39 +- tensorflow/contrib/tensorrt/log/trt_logger.cc | 8 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 4 +- .../contrib/tensorrt/python/__init__.py | 1 + .../contrib/tensorrt/python/trt_convert.py | 68 +- .../tensorrt/resources/trt_int8_calibrator.cc | 56 +- .../tensorrt/resources/trt_int8_calibrator.h | 15 +- .../contrib/tensorrt/test/test_tftrt.py | 57 +- tensorflow/contrib/tensorrt/trt_conversion.i | 63 +- tensorflow/contrib/timeseries/examples/BUILD | 5 +- .../timeseries/python/timeseries/BUILD | 5 +- .../timeseries/state_space_models/BUILD | 1 + tensorflow/contrib/tpu/BUILD | 1 + tensorflow/contrib/util/loader.py | 7 +- tensorflow/core/BUILD | 4 + .../base_api/api_def_SlideDataset.pbtxt | 18 + .../core/distributed_runtime/tensor_coding.cc | 4 +- tensorflow/core/framework/dataset.h | 4 +- tensorflow/core/graph/mkl_layout_pass.cc | 31 +- .../grappler/optimizers/loop_optimizer.cc | 8 +- tensorflow/core/kernels/BUILD | 12 +- tensorflow/core/kernels/data/BUILD | 14 + .../core/kernels/data/slide_dataset_op.cc | 252 +++ tensorflow/core/kernels/depthtospace_op.cc | 3 + .../core/kernels/depthtospace_op_gpu.cu.cc | 6 + tensorflow/core/kernels/hexagon/BUILD | 1 + .../core/kernels/mkl_conv_grad_filter_ops.cc | 81 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 18 +- tensorflow/core/kernels/mkl_conv_ops.cc | 146 +- tensorflow/core/kernels/mkl_conv_ops.h | 117 +- .../core/kernels/mkl_input_conversion_op.cc | 7 +- tensorflow/core/kernels/mkl_relu_op.cc | 23 +- .../core/kernels/segment_reduction_ops.h | 14 +- tensorflow/core/kernels/spacetodepth_op.cc | 3 + .../core/kernels/spacetodepth_op_gpu.cu.cc | 6 + tensorflow/core/lib/core/stringpiece.cc | 2 - tensorflow/core/lib/core/stringpiece.h | 2 +- tensorflow/core/lib/io/record_reader.cc | 2 + tensorflow/core/lib/io/record_reader.h | 4 +- tensorflow/core/ops/dataset_ops.cc | 12 +- tensorflow/core/ops/nn_ops.cc | 8 + tensorflow/core/platform/tracing.h | 2 +- .../platform/windows/windows_file_system.cc | 3 +- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/community/welcome.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 56 +- tensorflow/docs_src/install/install_mac.md | 23 +- .../docs_src/install/install_sources.md | 4 +- .../docs_src/install/install_windows.md | 5 +- tensorflow/docs_src/performance/xla/jit.md | 2 +- .../docs_src/programmers_guide/debugger.md | 3 +- tensorflow/docs_src/programmers_guide/faq.md | 4 +- .../summaries_and_tensorboard.md | 2 +- .../docs_src/programmers_guide/using_tpu.md | 7 +- tensorflow/docs_src/tutorials/layers.md | 12 +- .../docs_src/tutorials/recurrent_quickdraw.md | 3 +- tensorflow/docs_src/tutorials/wide.md | 16 +- .../examples/android/AndroidManifest.xml | 4 + .../org/tensorflow/demo/CameraActivity.java | 7 +- .../org/tensorflow/demo/StylizeActivity.java | 60 + tensorflow/python/BUILD | 84 +- tensorflow/python/debug/BUILD | 1 + tensorflow/python/estimator/estimator.py | 32 +- tensorflow/python/estimator/training.py | 26 +- tensorflow/python/keras/BUILD | 5 +- tensorflow/python/kernel_tests/BUILD | 4 - .../python/kernel_tests/conv_ops_test.py | 20 +- .../kernel_tests/depthtospace_op_test.py | 10 +- .../kernel_tests/spacetodepth_op_test.py | 10 +- tensorflow/python/lib/io/file_io_test.py | 5 + tensorflow/python/ops/nn_ops.py | 2 +- tensorflow/python/ops/rnn.py | 17 +- tensorflow/python/ops/special_math_ops.py | 4 +- .../python/ops/special_math_ops_test.py | 5 + tensorflow/python/tools/freeze_graph.py | 36 +- tensorflow/python/tools/saved_model_cli.py | 60 + .../python/tools/saved_model_cli_test.py | 22 + tensorflow/stream_executor/cuda/cuda_dnn.cc | 33 +- tensorflow/tensorflow.bzl | 18 +- .../tools/api/tests/api_compatibility_test.py | 2 + tensorflow/tools/ci_build/Dockerfile.cmake | 5 +- .../windows/cpu/pip/build_tf_windows.sh | 3 +- tensorflow/tools/def_file_filter/BUILD | 0 tensorflow/tools/def_file_filter/BUILD.tpl | 15 + .../def_file_filter/def_file_filter.py.tpl | 168 ++ .../def_file_filter_configure.bzl | 56 + tensorflow/tools/dist_test/README.md | 8 + tensorflow/tools/dist_test/local_test.sh | 22 +- tensorflow/tools/git/gen_git_source.py | 7 + tensorflow/tools/graph_transforms/BUILD | 1 + .../graph_transforms/fold_old_batch_norms.cc | 67 + .../fold_old_batch_norms_test.cc | 95 ++ tensorflow/tools/pip_package/BUILD | 129 +- tensorflow/tools/pip_package/setup.py | 4 +- .../tools/test/upload_test_benchmarks.py | 9 +- tensorflow/workspace.bzl | 8 +- third_party/jpeg/jpeg.BUILD | 4 +- third_party/kafka/BUILD | 13 +- third_party/py/BUILD.tpl | 22 +- 191 files changed, 4250 insertions(+), 926 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py create mode 100644 tensorflow/contrib/data/python/ops/sliding.py create mode 100644 tensorflow/contrib/kafka/ops/dataset_ops.cc create mode 100644 tensorflow/contrib/kafka/python/ops/kafka_op_loader.py create mode 100755 tensorflow/contrib/lite/build_rpi_lib.sh create mode 100644 tensorflow/contrib/lite/g3doc/rpi.md create mode 100644 tensorflow/contrib/lite/rpi_makefile.inc create mode 100644 tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt create mode 100644 tensorflow/core/kernels/data/slide_dataset_op.cc create mode 100644 tensorflow/tools/def_file_filter/BUILD create mode 100644 tensorflow/tools/def_file_filter/BUILD.tpl create mode 100644 tensorflow/tools/def_file_filter/def_file_filter.py.tpl create mode 100644 tensorflow/tools/def_file_filter/def_file_filter_configure.bzl diff --git a/README.md b/README.md index ef5bdc66ef..3cdb6e478d 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. +Keep up to date with release announcements and security updates by +subscribing to +[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). + ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* diff --git a/SECURITY.md b/SECURITY.md index fea24b2739..2aaa9202d5 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ report vulnerabilities in TensorFlow. ## TensorFlow models are programs -TensorFlow's runtime system interprets and executes programs. What machine +TensorFlow's runtime system interprets and executes programs. What machine learning practitioners term [**models**](https://developers.google.com/machine-learning/glossary/#model) are expressed as programs that TensorFlow executes. TensorFlow programs are encoded @@ -28,12 +28,12 @@ data you supply to TensorFlow to train a model, or to use a model to run inference on the data. **TensorFlow models are programs, and need to be treated as such from a security -perspective.** +perspective.** ## Running untrusted models As a general rule: **Always** execute untrusted models inside a sandbox (e.g., -[nsjail](https://github.com/google/nsjail)). +[nsjail](https://github.com/google/nsjail)). There are several ways in which a model could become untrusted. Obviously, if an untrusted party supplies TensorFlow kernels, arbitrary code may be executed. @@ -109,11 +109,11 @@ graphs known to the `ModelServer`. This means that an attacker may run graphs using untrusted inputs as described above, but they would not be able to execute arbitrary graphs. It is possible to safely expose a `ModelServer` directly to an untrusted network, **but only if the graphs it is configured to -use have been carefully audited to be safe**. +use have been carefully audited to be safe**. Similar to best practices for other servers, we recommend running any `ModelServer` with appropriate privileges (i.e., using a separate user with -reduced permisisons). In the spirit of defense in depth, we recommend +reduced permissions). In the spirit of defense in depth, we recommend authenticating requests to any TensorFlow server connected to an untrusted network, as well as sandboxing the server to minimize the adverse effects of any breach. @@ -133,7 +133,7 @@ which exhibit unexpected or unwanted behaviors. The fact that TensorFlow models can perform arbitrary computations means that they may read and write files, communicate via the network, produce deadlocks and infinite loops, or run out of memory. It is only when these behaviors are outside the specifications of the -operations involved that such behavior is a vulnerability. +operations involved that such behavior is a vulnerability. A `FileWriter` writing a file is not unexpected behavior and therefore is not a vulnerability in TensorFlow. A `MatMul` allowing arbitrary binary code execution @@ -168,7 +168,7 @@ below). Please use a descriptive subject line for your report email. After the initial reply to your report, the security team will endeavor to keep you informed of -the progress being made towards a fix and announcement. +the progress being made towards a fix and announcement. If you believe that an existing (public) issue is security-related, please send an email to `security@tensorflow.org`. The email should include the issue ID and diff --git a/configure.py b/configure.py index 97f46757ee..d14edef1be 100644 --- a/configure.py +++ b/configure.py @@ -1048,7 +1048,10 @@ def set_tf_tensorrt_install_path(environ_cp): for lib_file in possible_files: if is_compatible(lib_file, cuda_ver, cudnn_ver): - ver_str = nvinfer_pattern.search(lib_file).group(1) + matches = nvinfer_pattern.search(lib_file) + if len(matches.groups()) == 0: + continue + ver_str = matches.group(1) ver = convert_version_to_int(ver_str) if len(ver_str) else 0 if ver > highest_ver[0]: highest_ver = [ver, ver_str, lib_file] @@ -1377,7 +1380,7 @@ def main(): # environment variables. environ_cp = dict(os.environ) - check_bazel_version('0.5.4') + check_bazel_version('0.10.0') reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index 78dc0ad4fc..a99e2b7794 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -38,14 +38,7 @@ namespace xla { GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id, size_t pointer_size) - : platform_id_(platform_id), pointer_size_(pointer_size) { - // We currently only support kHostPlatformId for CPU, kCudaPlatformId for - // GPU and kInterpreterPlatformId for Interpreter. Before supporting other - // platforms, we need to test this transfer manager on them. - CHECK(platform_id_ == se::host::kHostPlatformId || - platform_id_ == se::interpreter::kInterpreterPlatformId || - platform_id_ == se::cuda::kCudaPlatformId); -} + : platform_id_(platform_id), pointer_size_(pointer_size) {} se::Platform::Id GenericTransferManager::PlatformId() const { return platform_id_; diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 99640f5bb5..72715398de 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -723,7 +723,7 @@ INSTANTIATE_TEST_CASE_P( ); #endif -TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { +XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); Shape input_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index bab37e8906..986b61b3ea 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,6 +8,7 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") +load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -51,7 +52,6 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", - "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -63,7 +63,6 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", - "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -110,6 +109,10 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", + ]) + if_not_windows([ + "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code + "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code + "//tensorflow/contrib/kafka", # has some linking issue on opensssl. ]), ) @@ -121,6 +124,7 @@ cc_library( "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", + "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -147,7 +151,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:kafka_ops_op_lib", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 4f6f539027..669d611b01 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow @@ -83,7 +85,8 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -from tensorflow.contrib.lite.python import lite +if os.name != 'nt': + from tensorflow.contrib.lite.python import lite from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs @@ -92,6 +95,7 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") +del os del LazyLoader del absolute_import diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index da5e744851..7815fa049a 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, nullptr); - QCHECK_NE(num_sparse_float_features, nullptr); - QCHECK_NE(num_sparse_int_features, nullptr); + QCHECK_NE(num_dense_float_features, (int64*) nullptr); + QCHECK_NE(num_sparse_float_features, (int64*) nullptr); + QCHECK_NE(num_sparse_int_features, (int64*) nullptr); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 8f85a75ee4..fe83bb3204 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an example C++ binary, and a PIP package containing the runtime and Python bindings. -### Pre-requisites +### Prerequisites * CMake version 3.5 or later. @@ -34,14 +34,16 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* Additional pre-requisites for Microsoft Windows: +* Additional prerequisites for Microsoft Windows: - Visual Studio 2015 - Python 3.5 - - NumPy 1.11.0 or later -* Additional pre-requisites for Linux: +* Additional prerequisites for Linux: - Python 2.7 or later - [Docker](https://www.docker.com/) (for automated testing) + +* Python dependencies: + - wheel - NumPy 1.11.0 or later ### Known-good configurations @@ -102,7 +104,7 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the pre-requisites detailed above, and set up your environment. +1. Install the prerequisites detailed above, and set up your environment. * The following commands assume that you are using the Windows Command Prompt (`cmd.exe`). You will need to set up your environment to use the diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index a9f43a3ecb..17f65999fa 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -35,6 +35,7 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index aba8a5244e..ab464bc99a 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) +set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 9f96a4b797..b3e5b30826 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -476,6 +476,10 @@ if (tensorflow_BUILD_CC_TESTS) "${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*_test.cc" ) + list(REMOVE_ITEM tf_test_src_simple + ${tf_core_profiler_test_srcs} + ) + set(tf_test_lib tf_test_lib) add_library(${tf_test_lib} STATIC ${tf_src_testlib}) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index f09d156832..9212b69700 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -40,6 +40,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@rejection_resample @@scan @@shuffle_and_repeat +@@sliding_window_batch @@sloppy_interleave @@unbatch @@ -72,6 +73,9 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat +from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch +from tensorflow.python.data.ops.iterator_ops import Iterator +from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 22418b38e3..2c4d4adfda 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -498,6 +498,23 @@ py_test( ], ) +tf_py_test( + name = "slide_dataset_op_test", + size = "small", + srcs = ["slide_dataset_op_test.py"], + additional_deps = [ + "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", + "//third_party/py/numpy", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py new file mode 100644 index 0000000000..33c48e20be --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py @@ -0,0 +1,242 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.ops import sliding +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class SlideDatasetTest(test.TestCase): + + def testSlideDataset(self): + """Test an dataset that maps a TF function across its input elements.""" + components = (np.arange(7), + np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], + np.array(37.0) * np.arange(7)) + + count = array_ops.placeholder(dtypes.int64, shape=[]) + window_size = array_ops.placeholder(dtypes.int64, shape=[]) + stride = array_ops.placeholder(dtypes.int64, shape=[]) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> + # RepeatDataset(count) -> _SlideDataset(window_size, stride). + iterator = (dataset_ops.Dataset.from_tensor_slices(components) + .map(_map_fn) + .repeat(count) + .apply(sliding.sliding_window_batch(window_size, stride)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([[None] + list(c.shape[1:]) for c in components], + [t.shape.as_list() for t in get_next]) + + with self.test_session() as sess: + # Slide over a finite input, where the window_size divides the + # total number of elements. + sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) + # Same formula with convolution layer. + num_batches = (20 * 7 - 14) // 7 + 1 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(14): + self.assertAllEqual(component[(i*7 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over a finite input, where the window_size does not + # divide the total number of elements. + sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) + + num_batches = (20 * 7 - 17) // 9 + 1 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(17): + self.assertAllEqual(component[(i*9 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over a finite input, which is less than window_size, + # should fail straight away. + sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over an empty input should fail straight away. + sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Empty window_size should be an initialization time error. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0}) + + # Invalid stride should be an initialization time error. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) + + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSlideSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( + sliding.sliding_window_batch(5, 3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], + dense_shape=[5, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSlideSparseWithDifferentDenseShapes(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=array_ops.expand_dims( + math_ops.range(i, dtype=dtypes.int64), 1), + values=array_ops.fill([math_ops.to_int32(i)], i), + dense_shape=[i]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( + sliding.sliding_window_batch(5, 3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected_indices = [] + expected_values = [] + for j in range(5): + for k in range(i * 3 + j): + expected_indices.append([j, k]) + expected_values.append(i * 3 + j) + expected = sparse_tensor.SparseTensorValue( + indices=expected_indices, + values=expected_values, + dense_shape=[5, i * 3 + 5 - 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testNestedSlideSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse) + .apply(sliding.sliding_window_batch(4, 2)) + .apply(sliding.sliding_window_batch(3, 1)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + # Slide: 1st batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], + [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], + values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + # Slide: 2nd batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], + [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], + values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSlideShapeError(self): + + def generator(): + yield [1.0, 2.0, 3.0] + yield [4.0, 5.0, 6.0] + yield [7.0, 8.0, 9.0, 10.0] + + iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32, + output_shapes=[None]) + .apply(sliding.sliding_window_batch(3, 1)) + .make_initializable_iterator()) + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r"Cannot batch tensors with different shapes in component 0. " + r"First element had shape \[3\] and element 2 had shape \[4\]."): + sess.run(next_element) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index f03430c5c5..c3331e9636 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -106,6 +106,7 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", + "sliding.py", "stats_ops.py", "threadpool.py", "unique.py", diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py new file mode 100644 index 0000000000..19cc3cb89f --- /dev/null +++ b/tensorflow/contrib/data/python/ops/sliding.py @@ -0,0 +1,102 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Sliding dataset transformations.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops + + +class _SlideDataset(dataset_ops.Dataset): + """A `Dataset` that passes a sliding window over its input.""" + + def __init__(self, input_dataset, window_size, stride=1): + """See `sliding_window_batch` for details.""" + super(_SlideDataset, self).__init__() + self._input_dataset = input_dataset + self._window_size = ops.convert_to_tensor( + window_size, dtype=dtypes.int64, name="window_size") + self._stride = ops.convert_to_tensor( + stride, dtype=dtypes.int64, name="stride") + + def _as_variant_tensor(self): + return gen_dataset_ops.slide_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + window_size=self._window_size, + stride=self._stride, + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + input_shapes = self._input_dataset.output_shapes + return nest.pack_sequence_as(input_shapes, [ + tensor_shape.vector(None).concatenate(s) + for s in nest.flatten(self._input_dataset.output_shapes) + ]) + + @property + def output_types(self): + return self._input_dataset.output_types + + +def sliding_window_batch(window_size, stride=1): + """A sliding window with size of `window_size` and step of `stride`. + + This transformation passes a sliding window over this dataset. The + window size is `window_size` and step size is `stride`. If the left + elements cannot fill up the sliding window, this transformation will + drop the final smaller element. For example: + + ```python + # NOTE: The following examples use `{ ... }` to represent the + # contents of a dataset. + a = { [1], [2], [3], [4], [5], [6] } + + a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) == + { + [[1], [2], [3]], + [[3], [4], [5]], + } + ``` + + Args: + window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of + elements in the sliding window. + stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + steps moving the sliding window forward for one iteration. The default + is `1`. It must be in `[1, window_size)`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + def _apply_fn(dataset): + return _SlideDataset(dataset, window_size, stride) + + return _apply_fn diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 6bd3f5f09b..1bd73ee704 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -454,6 +454,7 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1143,6 +1144,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 2f9bc68aaa..384ef7f963 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -266,7 +266,10 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = ["notsan"], + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", + ], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index f86331af6f..2f6cfdf31e 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,6 +22,7 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index 90f10f1fa8..ad8568ad44 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -224,7 +224,10 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = ["notsan"], # b/67512932 + tags = [ + "nomac", # b/73741358 + "notsan", # b/67512932 + ], deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index e61221a6b0..35341406a0 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -256,6 +256,9 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, if (p != std::string::npos) { string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); rgb24 = rgb24.substr(0, rgb24.find(",")); + // Strip anything after " ", in case the format is + // `640x360 [SAR 1:1 DAR 16:9]` + rgb24 = rgb24.substr(0, rgb24.find(" ")); string rgb24_width = rgb24.substr(0, rgb24.find("x")); string rgb24_height = rgb24.substr(rgb24_width.length() + 1); if (strings::safe_strtou32(rgb24_width, &width_value) && @@ -270,8 +273,10 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, // We only look for the first stream mapping to have the number of the // frames. // Once processed we will not further process stream mapping section. - if (line.find("frame= ") == 0) { - string number = line.substr(8, line.find(" ", 8)); + if (line.find("frame=") == 0) { + // The format might be `frame= 166 ` or `frame=12488 ` + string number = line.substr(6); + number = number.substr(number.find_first_not_of(" ")); number = number.substr(0, number.find(" ")); if (strings::safe_strtou32(number, &frames_value)) { in_mapping = false; diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index 0eb0e3cbe2..ff6f3b7441 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,6 +354,7 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index efb403462a..1c3974871c 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -1,66 +1,93 @@ -package( - default_visibility = ["//visibility:private"], -) +package(default_visibility = ["//tensorflow:internal"]) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") -load("//tensorflow:tensorflow.bzl", "tf_kernel_library") -load("//tensorflow:tensorflow.bzl", "tf_py_test") +load( + "//tensorflow:tensorflow.bzl", + "tf_gen_op_wrapper_py", + "tf_kernel_library", + "tf_custom_op_library", + "tf_custom_op_py_library", + "tf_gen_op_libs", + "tf_py_test", +) -tf_kernel_library( - name = "kafka_kernels", +py_library( + name = "kafka", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_ops", + ], +) + +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = ["ops/dataset_ops.cc"], + deps = [":dataset_kernels"], +) + +tf_gen_op_libs( + op_lib_names = ["dataset_ops"], +) + +cc_library( + name = "dataset_kernels", srcs = ["kernels/kafka_dataset_ops.cc"], - visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/kernels:bounds_check_lib", - "//tensorflow/core/kernels:dataset", + "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", "@kafka", + "@protobuf_archive//:protobuf_headers", ], + alwayslink = 1, ) -tf_gen_op_libs( - op_lib_names = ["kafka_ops"], +py_library( + name = "dataset_ops", + srcs = [ + "python/ops/kafka_dataset_ops.py", + ], + srcs_version = "PY2AND3", deps = [ - "//tensorflow/core:lib", + ":kafka_op_loader", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", ], ) tf_gen_op_wrapper_py( - name = "gen_kafka_ops", - out = "python/ops/gen_kafka_ops.py", - require_shape_functions = True, - deps = [":kafka_ops_op_lib"], + name = "gen_dataset_ops", + out = "python/ops/gen_dataset_ops.py", + deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"], ) -py_library( - name = "kafka", - srcs = [ - "__init__.py", - "python/ops/kafka_dataset_ops.py", +tf_kernel_library( + name = "dataset_ops_kernels", + deps = [ + ":dataset_kernels", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +tf_custom_op_py_library( + name = "kafka_op_loader", + srcs = ["python/ops/kafka_op_loader.py"], + dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"], + kernels = [ + ":dataset_ops_kernels", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", ], srcs_version = "PY2AND3", - visibility = ["//visibility:public"], deps = [ - ":gen_kafka_ops", + ":gen_dataset_ops", "//tensorflow/contrib/util:util_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:iterator_ops", - "//tensorflow/python/data/ops:readers", ], ) @@ -88,6 +115,7 @@ tf_py_test( ], tags = [ "manual", + "no_windows", "notap", ], ) @@ -95,7 +123,9 @@ tf_py_test( filegroup( name = "all_files", srcs = glob( - ["**/*"], + include = [ + "**/*", + ], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc index 88ef5f3571..a4cd4a2cc4 100644 --- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -13,9 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/dataset.h" - -#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/dataset.h" #include "src-cpp/rdkafkacpp.h" diff --git a/tensorflow/contrib/kafka/ops/dataset_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc new file mode 100644 index 0000000000..8cdf16103b --- /dev/null +++ b/tensorflow/contrib/kafka/ops/dataset_ops.cc @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("KafkaDataset") + .Input("topics: string") + .Input("servers: string") + .Input("group: string") + .Input("eof: bool") + .Input("timeout: int64") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that emits the messages of one or more Kafka topics. + +topics: A `tf.string` tensor containing one or more subscriptions, + in the format of [topic:partition:offset:length], + by default length is -1 for unlimited. +servers: A list of bootstrap servers. +group: The consumer group id. +eof: If True, the kafka reader will stop on EOF. +timeout: The timeout value for the Kafka Consumer to wait + (in millisecond). +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py index 8e51d27a34..a1624614d1 100644 --- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -17,8 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.kafka.python.ops import gen_kafka_ops -from tensorflow.python.data.ops.readers import Dataset +from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import +from tensorflow.contrib.kafka.python.ops import gen_dataset_ops +from tensorflow.python.data.ops.dataset_ops import Dataset from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -58,8 +59,8 @@ class KafkaDataset(Dataset): timeout, dtype=dtypes.int64, name="timeout") def _as_variant_tensor(self): - return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, - self._eof, self._timeout) + return gen_dataset_ops.kafka_dataset(self._topics, self._servers, + self._group, self._eof, self._timeout) @property def output_classes(self): diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py new file mode 100644 index 0000000000..ec2fdea962 --- /dev/null +++ b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py @@ -0,0 +1,24 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python helper for loading kafka ops and kernels.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.util import loader +from tensorflow.python.platform import resource_loader + +_dataset_ops = loader.load_op_library( + resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index 146ae8b7e2..d1c449402a 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -114,6 +114,7 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 894e6f6946..544065dac6 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,6 +70,7 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index 852d06e1e3..cc7bbabf21 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,6 +188,7 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -353,6 +354,7 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index b62e3050cd..ffa208540d 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -470,7 +470,7 @@ def embedding_lookup_unique(params, ids, name=None): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( - ids, math_ops.reduce_prod(shape, keep_dims=True)) + ids, math_ops.reduce_prod(shape, keepdims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index f837ca3265..b05f5eeaee 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -5,6 +5,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +load("//tensorflow:tensorflow.bzl", "py_test") + package(default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", @@ -115,6 +117,7 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -170,6 +173,7 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -188,6 +192,7 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -426,7 +431,10 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", - tags = ["noasan"], + tags = [ + "noasan", # b/73741358 + "nomac", + ], deps = [ ":learn", "//tensorflow/python:array_ops", @@ -585,6 +593,7 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -814,6 +823,7 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index b3b067b8e1..8f9811cf25 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -61,7 +61,7 @@ def embedding_lookup(params, ids, name='embedding_lookup'): ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( - ids, math_ops.reduce_prod(shape, keep_dims=True)) + ids, math_ops.reduce_prod(shape, keepdims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 7f31629272..b4504f246a 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX) gcc +CXX := $(CC_PREFIX)gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX) gcc -CFLAGS := +CC := $(CC_PREFIX)gcc +CFLAGS := -O3 -DNDEBUG LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,10 +57,11 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl -lpthread + LIBS += -ldl endif include $(MAKEFILE_DIR)/ios_makefile.inc +include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h index 58bc164619..f84b3dad95 100644 --- a/tensorflow/contrib/lite/arena_planner.h +++ b/tensorflow/contrib/lite/arena_planner.h @@ -33,7 +33,7 @@ class AllocationInfo; // each tensor needs to be allocated and deallocated, and preallocates all the // necessary memory (the PlanAllocations phase). It then assigns portions of // this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may -// share some of the bufer if a tensor B is to be allocated after another tensor +// share some of the buffer if a tensor B is to be allocated after another tensor // A has been deallocated. // // If dynamic tensors are used the planning steps can be repeated during model diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh new file mode 100755 index 0000000000..3824b16412 --- /dev/null +++ b/tensorflow/contrib/lite/build_rpi_lib.sh @@ -0,0 +1,22 @@ +#!/bin/bash -x +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR/../../.." + +CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 2218ea8eac..ea3ae3489e 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -24,7 +24,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin // ops. typedef enum { kTfLiteBuiltinAdd = 0, diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h index da193d2586..3c5f805f12 100644 --- a/tensorflow/contrib/lite/error_reporter.h +++ b/tensorflow/contrib/lite/error_reporter.h @@ -30,7 +30,7 @@ namespace tflite { // va_list args; // foo.Report("test %d", args); // where args is va_list // -// Sublclass ErrorReporter to provide another reporting destination. +// Subclass ErrorReporter to provide another reporting destination. // For example, if you have a GUI program, you might redirect to a buffer // that drives a GUI error log box. class ErrorReporter { diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md new file mode 100644 index 0000000000..7a3a231626 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -0,0 +1,50 @@ +# TensorFlow Lite for Raspberry Pi + +## Cross compiling +### Installing toolchian +This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). + +To cross compiling TensorFlow Lite. First you should install the toolchain and libs. +```bash +sudo apt-get update +sudo apt-get install crossbuild-essential-armhf +``` +> If you are using docker, you may not use `sudo` + +### Building +Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: +> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. + +## Native compiling +This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). + +Log in to you RPI, install the toolchain. +```bash +sudo apt-get instal build-essential +``` + +First, clone this TensorFlow repository. Run this at the root of the repository: +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index af143370ee..3749869f58 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -481,7 +481,7 @@ class Interpreter { // During Invoke(), Interpreter will allocate input tensors first, which are // known to be fixed size. Then it will allocate outputs from nodes as many // as possible. When there is a node that produces dynamic sized tensor. - // Intepreter will stop allocating tensors, set the value of next allocate + // Interpreter will stop allocating tensors, set the value of next allocate // node id, and execute the node to generate the output tensor before continue // to allocate successors. This process repeats until all nodes are executed. // NOTE: this relies on the order of nodes that is in topological order. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 7a029c7df8..72d4acedbe 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -40,7 +40,7 @@ TEST(BasicInterpreter, InvokeInvalidModel) { ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); } -// Test size accesser functions. +// Test size accessor functions. TEST(BasicInterpreter, TestSizeFunctions) { Interpreter interpreter; int base_index; diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index b91ba1a03d..e0cd12f1b4 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -64,7 +64,7 @@ struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multipler plus a left shift. + // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index 15dbfe08c8..cad9ce114c 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -52,7 +52,7 @@ enum KernelType { struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multipler plus a left shift. + // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index a77fe94e49..888e67966c 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -48,7 +48,7 @@ enum KernelType { struct OpData { // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multipler plus a left shift. + // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h index 28f53b9fbb..21da1daff7 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.h +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -58,7 +58,7 @@ inline bool IsConstantTensor(TfLiteTensor* tensor) { } // Determines whether tensor is dynamic. Note that a tensor can be non-const and -// not dynamic. This function specificially checks for a dynamic tensor. +// not dynamic. This function specifically checks for a dynamic tensor. inline bool IsDynamicTensor(TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteDynamic; } diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc index 5f73b56ed9..0ee35775d5 100644 --- a/tensorflow/contrib/lite/kernels/lsh_projection.cc +++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// LSH Projection projects an input to a bit vector via locality senstive +// LSH Projection projects an input to a bit vector via locality sensitive // hashing. // // Options: diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index b9255b23a5..8cf1165135 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -213,9 +213,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, // present. // 2) If projection weight is present, then projection bias is optional. // TODO(ghodrat): make sure this is correct. - const bool projecton_tensors_consistent = + const bool projection_tensors_consistent = ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projecton_tensors_consistent == true); + TF_LITE_ENSURE(context, projection_tensors_consistent == true); return kTfLiteOk; } @@ -357,7 +357,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existense of only one to the get the condition. + // check the existence of only one to get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc index f3e6ddc9f4..438f70d311 100644 --- a/tensorflow/contrib/lite/kernels/reshape.cc +++ b/tensorflow/contrib/lite/kernels/reshape.cc @@ -49,20 +49,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions); int num_output_elements = 1; - int strech_dim = -1; + int stretch_dim = -1; for (int i = 0; i < params->num_dimensions; ++i) { int value = params->shape[i]; if (value == -1) { - TF_LITE_ENSURE_EQ(context, strech_dim, -1); - strech_dim = i; + TF_LITE_ENSURE_EQ(context, stretch_dim, -1); + stretch_dim = i; } else { num_output_elements *= value; output_size->data[i] = value; } } - if (strech_dim != -1) { - output_size->data[strech_dim] = num_input_elements / num_output_elements; - num_output_elements *= output_size->data[strech_dim]; + if (stretch_dim != -1) { + output_size->data[stretch_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_size->data[stretch_dim]; } TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc index 0fbcf6e6aa..aecbd0399f 100644 --- a/tensorflow/contrib/lite/kernels/reshape_test.cc +++ b/tensorflow/contrib/lite/kernels/reshape_test.cc @@ -60,7 +60,7 @@ TEST(ReshapeOpTest, TooManyDimensions) { TEST(ReshapeOpTest, TooManySpecialDimensions) { EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}), - "strech_dim != -1"); + "stretch_dim != -1"); } TEST(ReshapeOpTest, SimpleTest) { diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 373310bd87..0bb28b50b2 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -141,8 +141,8 @@ void SingleOpModel::SetBuiltinOp(BuiltinOperator type, void SingleOpModel::SetCustomOp( const string& name, const std::vector& custom_option, - const std::function& registeration) { - custom_registrations_[name] = registeration; + const std::function& registration) { + custom_registrations_[name] = registration; opcodes_.push_back( CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data())); operators_.push_back(CreateOperator( diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 508a570e2e..42941a97db 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -360,7 +360,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existense of only one to the get the condition. + // check the existence of only one to get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h index 5cd6c20850..0294ec815c 100644 --- a/tensorflow/contrib/lite/memory_planner.h +++ b/tensorflow/contrib/lite/memory_planner.h @@ -34,8 +34,8 @@ class MemoryPlanner { // [first_node, last_node]. virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0; - // Invalidates allocations made earliers. This is called when tensors sizes - // have change. All planned allocations remain, but can't be used until + // Invalidates allocations made earlier. This is called when tensors sizes + // have changed. All planned allocations remain, but can't be used until // ExecuteAllocations() is called. virtual TfLiteStatus ResetAllocations() = 0; }; diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index a467df5bb4..8dc1c794dc 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -64,7 +64,7 @@ class FlatBufferModel { const tflite::Model* model_spec, ErrorReporter* error_reporter = DefaultErrorReporter()); - // Releases memory or unmaps mmaped meory. + // Releases memory or unmaps mmaped memory. ~FlatBufferModel(); // Copying or assignment is disallowed to simplify ownership semantics. diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 76032771af..bd49d327c9 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -569,7 +569,7 @@ enum { ANEURALNETWORKS_LOGISTIC = 14, /** - * Projects an input to a bit vector via locality senstive hashing. + * Projects an input to a bit vector via locality sensitive hashing. * * Inputs: * * 0: Hash functions. Dim.size == 2, DataType: Float. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc new file mode 100644 index 0000000000..832ef5824b --- /dev/null +++ b/tensorflow/contrib/lite/rpi_makefile.inc @@ -0,0 +1,33 @@ +# Settings for Raspberry Pi. +ifeq ($(TARGET), RPI) + ifeq ($(TARGET_ARCH), armv7) + CXXFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + CCFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + LDFLAGS := \ + -Wl,--no-export-dynamic \ + -Wl,--exclude-libs,ALL \ + -Wl,--gc-sections \ + -Wl,--as-needed + endif + + LIBS := \ + -lstdc++ \ + -lpthread \ + -lm \ + -ldl + + OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ + LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ + BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ + DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ +endif diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc index 08bcfe4516..640972de77 100644 --- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -46,7 +46,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin // ops. typedef enum { )"; diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc index 4aab244989..2f2004f56b 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.cc +++ b/tensorflow/contrib/lite/simple_memory_arena.cc @@ -113,21 +113,21 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) { underlying_buffer_size_ = required_size; underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; } - commited_ = true; + committed_ = true; return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; } TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc, char** output_ptr) { - TF_LITE_ENSURE(context, commited_); + TF_LITE_ENSURE(context, committed_); TF_LITE_ENSURE(context, output_ptr != nullptr); *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; return kTfLiteOk; } TfLiteStatus SimpleMemoryArena::Clear() { - commited_ = false; + committed_ = false; high_water_mark_ = 0; allocs_.clear(); return kTfLiteOk; diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h index 0535522374..5faf78b59e 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.h +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -22,7 +22,7 @@ limitations under the License. namespace tflite { // This little structure holds the offset and the size for a dynamic memory -// allocation in the memory arena. When the arena is commited and the +// allocation in the memory arena. When the arena is committed and the // underlying buffer is set, the alloc can be resolved into an actual memory // pointer. struct ArenaAlloc { @@ -43,7 +43,7 @@ struct ArenaAlloc { class SimpleMemoryArena { public: explicit SimpleMemoryArena(size_t arena_alignment) - : commited_(false), + : committed_(false), arena_alignment_(arena_alignment), high_water_mark_(0), underlying_buffer_size_(0), @@ -73,7 +73,7 @@ class SimpleMemoryArena { } private: - bool commited_; + bool committed_; size_t arena_alignment_; size_t high_water_mark_; std::unique_ptr underlying_buffer_; diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 8ca03f4193..0a6edc33c5 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -46,6 +46,7 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, + tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 995230dfa8..6c3b02e12b 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -194,6 +194,8 @@ with: srcs = glob(["libs/arm64-v8a/*.so"]), ``` +If you are building for Android TV (Shield TV devices), replace "portrait" with "landscape" for android:screenOrientation in all four activities in tensorflow/examples/android/AndroidManifest.xml + Then run: ```bash # Create dir for native libs diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index 2d99791839..9b148688c4 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -80,10 +80,9 @@ if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then fi else echo "${PRNT_SLCTV_BIN} found. Using it" - ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h - fi + ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h fi if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h index fa297c28cb..df055ff567 100644 --- a/tensorflow/contrib/mpi/mpi_utils.h +++ b/tensorflow/contrib/mpi/mpi_utils.h @@ -24,6 +24,8 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" +// Skip MPI C++ bindings support, this matches the usage in other places +#define OMPI_SKIP_MPICXX #include "third_party/mpi/mpi.h" #define MPI_CHECK(cmd) \ do { \ diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 04b5d5bdf1..6e77e934fe 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -53,7 +53,7 @@ def from_contrib_estimator(estimator, `Estimator`. """ if isinstance(estimator, core_estimator.Estimator): - raise TypeError('Espected estimator to be of type ' + raise TypeError('Expected estimator to be of type ' 'tf.contrib.learn.Estimator, but got type ' 'tf.python.estimator.Estimator. You likely want to call ' 'from_estimator.') @@ -88,7 +88,7 @@ def from_estimator(estimator, `Estimator`. """ if isinstance(estimator, contrib_estimator.Estimator): - raise TypeError('Espected estimator to be of type ' + raise TypeError('Expected estimator to be of type ' 'tf.python.estimator.Estimator, but got type ' 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index f624c42686..4bb6f76019 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -81,6 +81,7 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -91,6 +92,7 @@ py_test( name = "call_trees_test", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/contrib/py2tf/impl", diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 90bc22008f..1194b98f5e 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(gast.NodeVisitor, self).__init__() + super(DetectReturnInUnsupportedControlFlow, self).__init__() def visit_While(self, node): self.cant_return = True diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index d029289f5a..8bc338e801 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -83,6 +83,7 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index b278265639..1afcbb8504 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph): # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's - # correction). The variance tensor read from FuseBatchNorm has bessel's + # correction). The variance tensor read from FuseBatchNorm has Bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() @@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, Args: context: The scope under which we look for batch norm params - match: Object containg required batch norm tensors for correction + match: Object containing required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index 0a8e35080c..a4f7b1b221 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). - max_var: a variable containing quantization range lupper end(s). - per_channel: a boolean specifying whether to use per-channel quantizatioh. + max_var: a variable containing quantization range upper end(s). + per_channel: a boolean specifying whether to use per-channel quantization. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 0608ab9302..ec721afbc8 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -267,7 +267,7 @@ def _InsertQuantOp(context, """Inserts a quant op between a producer op and (multiple) consumer ops. Args: - context: Context w,here producer and consumer operations are nested. + context: Context where producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 5a3a74cec4..5abdcd2475 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -158,7 +158,7 @@ def experimental_create_training_graph(input_graph=None, often fail. Args: - input_graph: The tf.Graph to be transformed,if None then defaults to the + input_graph: The tf.Graph to be transformed, if None then defaults to the default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 0624cc878b..db745aa562 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -419,7 +419,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -470,7 +470,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -526,7 +526,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -565,7 +565,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initialzes with a truncated normal variable. + An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index ef59475167..b2e5707a6d 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -144,7 +144,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initialzes with a truncated normal variable. + An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 27f0a7f58f..54c66271cd 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,7 +38,6 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 73f2607d84..358b2eb02b 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2133,7 +2133,7 @@ class Conv1DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_1d_lstm_cell", **kwargs): """Construct Conv1DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs) + super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs) class Conv2DLSTMCell(ConvLSTMCell): @@ -2144,7 +2144,7 @@ class Conv2DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_2d_lstm_cell", **kwargs): """Construct Conv2DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs) + super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs) class Conv3DLSTMCell(ConvLSTMCell): @@ -2155,7 +2155,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_3d_lstm_cell", **kwargs): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) + super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs) def _conv(args, filter_size, num_features, bias, bias_start=0.0): diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index 245fe07f2b..b10757df47 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -53,6 +53,7 @@ py_test( size = "small", srcs = ["python/saved_model/reader_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":saved_model_py", diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 03fe31abf7..6adbb8be40 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -299,12 +299,13 @@ class BeamSearchDecoder(decoder.Decoder): """ finished, start_inputs = self._finished, self._start_inputs + dtype = nest.flatten(self._initial_cell_state)[0].dtype log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, - on_value=0.0, - off_value=-np.Inf, - dtype=nest.flatten(self._initial_cell_state)[0].dtype) + on_value=ops.convert_to_tensor(0.0, dtype=dtype), + off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), + dtype=dtype) initial_state = BeamSearchDecoderState( cell_state=self._initial_cell_state, diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 67011c8fef..3ad88a8a22 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -165,6 +165,7 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 5daabbd62e..7aa1684839 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,6 +61,7 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 1e4cc3f095..07b6b1f142 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -553,7 +553,6 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ - "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index d833744d0c..db2e000ef8 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,6 +9,7 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") +load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index c832c6f2e0..906cc3f034 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -83,6 +83,7 @@ cc_library( "kernels/trt_engine_op.h", ], copts = tf_copts(), + visibility = ["//visibility:public"], deps = [ ":trt_logging", ":trt_resources", @@ -154,6 +155,7 @@ py_library( deps = [ ":trt_convert_py", ":trt_ops_py", + "//tensorflow/python:errors", ], ) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index dfcce0fd00..461e627e99 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -2,7 +2,8 @@ Using TensorRT in TensorFlow ============================ This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. +operator that wraps a subgraph in TensorRT. This is still a work in progress +but should be useable with most common graphs. Compilation ----------- @@ -15,26 +16,10 @@ configure script should find the necessary components from the system automatically. If installed from tar packages, user has to set path to location where the library is installed during configuration. - -``` +```shell bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use is shown below. - -```python -import tensorflow as tf -import tensorflow.contrib.tensorrt as trt -#... create and train or load model -gdef = sess.graph.as_graph_def() -trt_gdef = trt.create_inference_graph( - gdef, #original graph_def - ["output"], #name of output node(s) - max_batch_size, #maximum batch size to run the inference - max_workspace_size_bytes) # max memory for TensorRT to use -tf.reset_default_graph() -tf.import_graph_def(graph_def=trt_gdef) -#...... run inference -``` +will be available. An example use can be found in test/test_tftrt.py directory diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index fd551d70b4..140ad48282 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,6 +18,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import -from tensorflow.contrib.tensorrt.python import * -# pylint: enable=unused-import,wildcard-import +from tensorflow.python.framework import errors + +# pylint: disable=unused-import,wildcard-import,g-import-not-at-top +try: + from tensorflow.contrib.tensorrt.python import * +except errors.NotFoundError as e: + no_trt_message = ( + '**** Failed to initialize TensorRT. This is either because the TensorRT' + ' installation path is not in LD_LIBRARY_PATH, or because you do not have' + ' it installed. If not installed, please go to' + ' https://developer.nvidia.com/tensorrt to download and install' + ' TensorRT ****') + print(no_trt_message) + raise e +# pylint: enable=unused-import,wildcard-import,g-import-not-at-top diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 970f810473..eea8c8efa2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include #include #include #include @@ -48,13 +49,29 @@ namespace tensorrt { namespace convert { namespace { -static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set candidate_ops = { - "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" + "Identity", + "Const", + "Conv2D", + "MaxPool", + "BiasAdd", + "Relu", + "Add", + "Mul", + "Sub", + "Rsqrt", + "Pad", + "Mean", + "AvgPool", + "ConcatV2", + "DepthwiseConv2dNative", + "FusedBatchNorm", + "FusedBatchNormV2", + // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); @@ -69,6 +86,8 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { incoming_edges->insert(edge); + } else { + VLOG(2) << edge->src()->name() << " N, "; } } } @@ -82,7 +101,10 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { + VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); + } else { + VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -109,74 +131,150 @@ std::unordered_map> BuildTensorNameMap( } return result; } - -tensorflow::Status ConvertSubGraphToTensorRT( - const std::vector& output_names, - const std::set& subgraph_node_ids, - size_t max_batch_size, // Max batch size that engine will be created for - // Max amount of memory that engine will be allowed to consume, in bytes - size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::Graph* graph) { - tensorflow::EdgeSet subgraph_incoming_edges; - GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges); - +// TODO(sami): convert references to pointers +struct ConvertGraphParams { + ConvertGraphParams( + tensorflow::Graph& inp_graph, + const std::vector& output_node_names, + const std::set& subgraph_node_id_numbers, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map>* output_edges, + int engine_precision_mode) + : graph(inp_graph), + output_names(output_node_names), + subgraph_node_ids(subgraph_node_id_numbers), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + precision_mode(engine_precision_mode) {} + tensorflow::Graph& graph; + const std::vector& output_names; + const std::set& subgraph_node_ids; + size_t max_batch_size; + size_t max_workspace_size_bytes; + const tensorflow::grappler::GraphProperties& graph_properties; + std::unordered_map>* output_edge_map; + int precision_mode; std::vector> subgraph_inputs; + std::vector> subgraph_outputs; + tensorflow::EdgeSet subgraph_incoming_edges; + tensorflow::EdgeSet subgraph_outgoing_edges; +}; - // Collect inputs by looking for incoming edges - for (const tensorflow::Edge* edge : subgraph_incoming_edges) { - subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); +static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { + GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_incoming_edges); + for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { + p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set> subgraph_outputs_set; // Collect outputs referenced from output_names - auto output_name_to_index_map = BuildTensorNameMap(output_names); - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph->FindNodeId(node_id); + for (int node_id : p->subgraph_node_ids) { + tensorflow::Node* node = p->graph.FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - // Collect outputs referenced from outgoing edges - tensorflow::EdgeSet subgraph_outgoing_edges; - GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { + GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - // Impose an ordering on the outputs - std::vector> subgraph_outputs( - subgraph_outputs_set.begin(), subgraph_outputs_set.end()); - // Build TensorRT node and add it to the graph + p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.insert(p->subgraph_outputs.begin(), + subgraph_outputs_set.begin(), + subgraph_outputs_set.end()); + return tensorflow::Status::OK(); +}; + +tensorflow::Status GetCalibNode(ConvertGraphParams* params) { + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); tensorflow::NodeDef trt_node_def; - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( - *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size_bytes, graph_properties, - &trt_node_def)); + SubGraphParams s(params->graph, params->subgraph_node_ids, + params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size_bytes, + params->graph_properties, params->output_edge_map, + &trt_node_def, params->precision_mode); + TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; - tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + TF_RETURN_IF_ERROR(status); + + for (auto in_edge : + params->subgraph_incoming_edges) { // loop over incoming edges and + // attach them to calib node + // tensorflow::Node* src_node = in_edge->src(); + auto src_output = in_edge->src_output(); + auto dst_node = in_edge->dst(); + auto dst_input = in_edge->dst_input(); + VLOG(1) << " update edge " << trt_node->name() << ":" << src_output + << " -> " << dst_node->name() << ":" << dst_input; + TF_RETURN_IF_ERROR( + params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); + } + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); + tensorflow::NodeDef trt_node_def; + + SubGraphParams s(params->graph, params->subgraph_node_ids, + params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size_bytes, + params->graph_properties, params->output_edge_map, + &trt_node_def, params->precision_mode); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); + tensorflow::Status status; + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + // AddNode does not wire edges. + // Re-map incoming edges to use the new TRT node instead of the orig subgraph + std::map, int> subgraph_edge_to_input_map; + for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { + subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); + } + for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { + std::pair old_src = {edge->src()->id(), edge->src_output()}; + int new_src_output = subgraph_edge_to_input_map.at(old_src); + params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, + new_src_output); + params->graph.RemoveEdge(edge); + } + + VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + } + TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); + for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(), - edge->dst_input())); + TF_RETURN_IF_ERROR(params->graph.UpdateEdge( + trt_node, new_src_output, edge->dst(), edge->dst_input())); } // Remove the original subgraph - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph->FindNodeId(node_id); + for (int node_id : params->subgraph_node_ids) { + tensorflow::Node* node = params->graph.FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - graph->RemoveNode(node); + params->graph.RemoveNode(node); } return tensorflow::Status::OK(); } @@ -194,12 +292,39 @@ tensorflow::Status BuildNodeMap( } } // namespace +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { + VLOG(0) << "Starting Calib Conversion"; + tensorflow::Graph graph(tensorflow::OpRegistry::Global()); + TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( + tensorflow::GraphConstructorOptions(), graph_def, &graph)); + // get calib nodes + std::vector calib_nodes; + for (auto node : graph.op_nodes()) { + if (node->type_string() == "TRTCalibOp") { + VLOG(1) << "Found Calib Node"; + calib_nodes.push_back(node); + } + } + VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); + if (calib_nodes.size() == 0) + return tensorflow::errors::FailedPrecondition( + "Graph doesn't contain any calibration nodes!." + " Please generate calibration graph and run calibration first"); + for (auto n : calib_nodes) { + TF_RETURN_IF_ERROR( + tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); + } + graph.ToGraphDef(infer_graph); + return tensorflow::Status::OK(); +} tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { - // Optimization pass + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, + int precision_mode = FP32MODE, int minimum_segment_size = 3) { + // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; @@ -209,16 +334,23 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::grappler::LayoutOptimizer optimizer; tensorflow::grappler::Cluster* cluster; - // Virtual cluster + // virtual cluster tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); + // single machine + int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); + int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); + VLOG(2) << "cpu_cores: " << num_cpu_cores; + VLOG(2) << "gpus: " << num_gpus; + TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - // Constant folding + // constant folding item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); @@ -226,7 +358,6 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); - // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -243,7 +374,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 2; + segment_options.minimum_segment_size = minimum_segment_size; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -252,14 +383,37 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); + std::unordered_map> output_edge_map; + int count = 0; + float total_num_nodes_in_segments = 0.; + for (auto s : segments) { + total_num_nodes_in_segments += s.size(); + } for (const std::set& subgraph_node_names : segments) { std::set subgraph_node_ids; + size_t max_mem_per_engine = + max_workspace_size_bytes * + ((float)subgraph_node_names.size() / total_num_nodes_in_segments); + std::stringstream oss; for (const string& node_name : subgraph_node_names) { + oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - output_names, subgraph_node_ids, max_batch_size, - max_workspace_size_bytes, static_graph_properties, &graph)); + VLOG(2) << "Subgraph nodes" << oss.str(); + ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, + max_mem_per_engine, static_graph_properties, + &output_edge_map, precision_mode); + if (precision_mode == INT8MODE) { + TF_RETURN_IF_ERROR(GetCalibNode(&p)); + } else { + tensorflow::Status status = ConvertSubGraphToTensorRT(&p); + if (status != tensorflow::Status::OK()) { + LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count + << " due to: \n" + << status.ToString() << " SKIPPING......"; + } + count++; + } } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 154ad3f2e8..e1596e89e2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -28,6 +28,11 @@ namespace tensorflow { namespace tensorrt { namespace convert { +// This method converts an already generated calibration graph which was used in +// calibration runs to an inference graph +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); + // max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. // max_workspace_size_bytes: The upper bound of memory allowence for @@ -35,7 +40,8 @@ namespace convert { tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def); + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, + int precision_mode, int minimum_segment_size); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 9ee717dd7f..75a3c3d034 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -24,6 +24,10 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" +#include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" @@ -32,6 +36,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tensor_coding.h" @@ -39,7 +44,6 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorrt/include/NvInfer.h" // Check if the types are equal. Cast to int first so that failure log message @@ -49,6 +53,7 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::strings::StrCat; namespace { @@ -65,7 +70,8 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, *trt_dtype = nvinfer1::DataType::kHALF; break; default: - return tensorflow::errors::InvalidArgument("Unsupported data type"); + return tensorflow::errors::InvalidArgument( + "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); } return tensorflow::Status::OK(); } @@ -112,6 +118,18 @@ static std::vector> CreateSamePadding( return padding; } +string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { + size_t last_scope_separator = 0; + for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { + if (op_name_a[i] != op_name_b[i]) { + break; + } else if (op_name_a[i] == '/') { + last_scope_separator = i + 1; + } + } + return op_name_a.substr(0, last_scope_separator); +} + class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, @@ -244,6 +262,11 @@ std::vector TFAttrs::get>(string key) const { return std::vector(attr.begin(), attr.end()); } +template <> +std::vector TFAttrs::get>(string key) const { + auto attr = this->at(key)->list().s(); + return std::vector(attr.begin(), attr.end()); +} template <> nvinfer1::Dims TFAttrs::get(string key) const { auto values = this->get>(key); @@ -266,6 +289,17 @@ tensorflow::DataType TFAttrs::get(string key) const { return this->at(key)->type(); } +template <> +float TFAttrs::get(string key) const { + return this->at(key)->f(); +} + +template <> +bool TFAttrs::get(string key) const { + return this->at(key)->b(); +} + +// TODO(jie): reorder4 & reorder2 should be merged? template void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -283,29 +317,87 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } } +template +void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, + T* odata, nvinfer1::DimsHW ostrides) { + for (int h = 0; h < shape.h(); ++h) { + for (int w = 0; w < shape.w(); ++w) { + odata[h * ostrides.h() + w * ostrides.w()] = + idata[h * ostrides.h() + w * ostrides.w()]; + } + } +} + +// TODO(jie): fallback to tensorflow!! +void ReorderCKtoKC(const TRT_ShapedWeights& iweights, + TRT_ShapedWeights* oweights) { + int c = iweights.shape_.d[0]; + int k = iweights.shape_.d[1]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; + nvinfer1::DimsHW istrides = {1, k}; + nvinfer1::DimsHW ostrides = {c, 1}; + switch (iweights.type_) { + case tensorflow::DataType::DT_FLOAT: { + Reorder2({k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + case tensorflow::DataType::DT_HALF: { + Reorder2( + {k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + default: + LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " + << DataTypeString(iweights.type_); + } +} + void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights) { + TRT_ShapedWeights* oweights, int num_groups) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - int c = iweights.shape_.d[2]; - int k = iweights.shape_.d[3]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; + // TRT requires GKcRS, while TF depthwise has RSCK + // where c=1, C=G + VLOG(2) << "num_groups: " << num_groups; + int c = iweights.shape_.d[2] / num_groups; + VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c; + int k = iweights.shape_.d[3] * num_groups; + VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k; + oweights->shape_.d[0] = k / num_groups; + oweights->shape_.d[1] = c * num_groups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: + case tensorflow::DataType::DT_FLOAT: { Reorder4({k, c, r, s}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; + } + case tensorflow::DataType::DT_HALF: { + Reorder4( + {k, c, r, s}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + default: - LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; + LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " + << DataTypeString(iweights.type_); } } @@ -323,12 +415,11 @@ inline std::shared_ptr infer_object(T* obj) { return std::shared_ptr(obj, InferDeleter()); } -// Logger for GIE info/warning/errors class Converter; using OpConverter = std::function const&, + const std::vector&, std::vector*)>; class Converter { @@ -336,34 +427,57 @@ class Converter { std::unordered_map op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - + tensorflow::tensorrt::TRTWeightStore* weight_store_; + bool fp16_; void register_op_converters(); - std::vector get_inputs( const tensorflow::NodeDef& node_def) { std::vector inputs; - for (const auto& input_name : node_def.input()) { - VLOG(2) << "Retrieve input: " << input_name; - inputs.push_back(trt_tensors_.at(input_name)); + for (auto const& input_name : node_def.input()) { + /************************************************************************* + * TODO(jie) handle case 1) here + * Normalizes the inputs and extracts associated metadata: + * 1) Inputs can contain a colon followed by a suffix of characters. + * That suffix may be a single number (e.g. inputName:1) or several + * word characters separated from a number by a colon + * (e.g. inputName:foo:1). The + * latter case is used to denote inputs and outputs of functions. + * 2) Control dependency inputs contain caret at the beginning and we + * remove this and annotate the edge as a control dependency. + ************************************************************************/ + string name = input_name[0] == '^' ? input_name.substr(1) : input_name; + auto first = name.find_first_of(':'); + if (first != string::npos && first + 2 == name.size() && + name[first + 1] == '0') + name.erase(first); + + VLOG(2) << "retrieve input: " << name; + if (trt_tensors_.count(name)) { + inputs.push_back(trt_tensors_.at(name)); + } else { + LOG(FATAL) << "input: " << name << " not availabled for node at, " + << node_def.name(); + } } return inputs; } public: - explicit Converter(nvinfer1::INetworkDefinition* trt_network) - : trt_network_(trt_network) { + explicit Converter(nvinfer1::INetworkDefinition* trt_network, + tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) + : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { this->register_op_converters(); } - + tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - temp_bufs_.push_back(std::vector(weights.size_bytes())); - weights.SetValues(temp_bufs_.back().data()); + weight_store_->store_.push_back(std::vector(weights.size_bytes())); + weights.SetValues(weight_store_->store_.back().data()); return weights; } - + bool isFP16() { return fp16_; }; TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -382,7 +496,7 @@ class Converter { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); - if (i != 0) output_name = output_name + ":" + std::to_string(i); + if (i != 0) output_name = StrCat(output_name, ":", i); if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } @@ -448,7 +562,7 @@ struct LambdaFactory { switch (op) { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; - return [](T t) -> T { return 1.0 / std::sqrt(t); }; + return [](T t) -> T { return 1.0 / sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; @@ -534,6 +648,22 @@ struct LambdaFactory { } }; +template <> +std::function LambdaFactory::unary() { + switch (op) { + case OP_CATEGORY::RSQRT: { + VLOG(2) << "RSQRT GETS DONE"; + return [](Eigen::half t) -> Eigen::half { + return Eigen::half(1.0 / sqrt(float(t))); + }; + } + case OP_CATEGORY::NEG: + return [](Eigen::half t) -> Eigen::half { return -t; }; + default: + VLOG(2) << "Not supported op for unary: " << static_cast(op); + return nullptr; + } +} tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights, LambdaFactory unary_op) { @@ -545,6 +675,14 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, std::transform(inp, inp + iweights.count(), oup, unary_op.unary()); break; } + case tensorflow::DataType::DT_HALF: { + auto inp = static_cast(iweights.GetValues()); + auto oup = + static_cast(const_cast(oweights->GetValues())); + std::transform(inp, inp + iweights.count(), oup, + unary_op.unary()); + break; + } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -588,6 +726,32 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, } break; } + case tensorflow::DataType::DT_HALF: { + auto inp_l = static_cast(iweights_l.GetValues()); + auto inp_r = static_cast(iweights_r.GetValues()); + auto oup = + static_cast(const_cast(oweights->GetValues())); + + if (iweights_l.count() != iweights_r.count()) { + // We only supports broadcast of RankZero + if (iweights_l.count() == 1) { + VLOG(2) << "I bet it is not working!" << (*inp_l); + std::transform(inp_r, inp_r + iweights_r.count(), oup, + binary_op.broadcast_l(*inp_l)); + } else if (iweights_r.count() == 1) { + VLOG(2) << "I bet it is not working!" << (*inp_r); + std::transform(inp_l, inp_l + iweights_l.count(), oup, + binary_op.broadcast_r(*inp_r)); + } else { + return tensorflow::errors::Unimplemented( + "Binary op with non-rankZero broadcast not supported"); + } + } else { + std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, + binary_op.binary()); + } + break; + } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -599,7 +763,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, tensorflow::Status ConstantFoldUnary( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input = inputs.at(0).weights(); @@ -613,13 +777,12 @@ tensorflow::Status ConstantFoldUnary( CHECK_EQ(weights_input.type_, TFAttrs(node_def).get("T")); - // Maybe I should do a switch LambdaFactory unary_op; if (node_def.op() == "Rsqrt") { // Compute rsqrt unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT; auto ret = UnaryCompute(weights_input, &weights_output, unary_op); - // PAss the output + // Pass the output if (ret == tensorflow::Status::OK()) { outputs->push_back(TRT_TensorOrWeights(weights_output)); } @@ -631,11 +794,11 @@ tensorflow::Status ConstantFoldUnary( } // TODO(jie,ben) broadcast is needed yet not implemented -// Let's get the simple stuff working first. Maybe we should fall bakc to TF +// Let's get the simple stuff working first. Maybe we should fall back to TF // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input_l = inputs.at(0).weights(); TRT_ShapedWeights weights_input_r = inputs.at(1).weights(); @@ -648,12 +811,12 @@ tensorflow::Status ConstantFoldBinary( "Binary op implicit broadcast not supported: " + node_def.op()); // TODO(jie): constant fold should really fall back to TF. - int nb_dims = weights_input_l.shape_.nbDims; + int num_dims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; - output_shape.nbDims = nb_dims; - VLOG(2) << "nb_dims: " << nb_dims + output_shape.nbDims = num_dims; + VLOG(2) << "nb_dims: " << num_dims << ", the other: " << weights_input_r.shape_.nbDims; - for (int i = 0; i < nb_dims; i++) { + for (int i = 0; i < num_dims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; } else if (weights_input_l.shape_.d[i] == 1 || @@ -678,7 +841,6 @@ tensorflow::Status ConstantFoldBinary( // Allocate output weights TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape); - // Maybe I should do a switch LambdaFactory binary_op; if (node_def.op() == "Sub") { binary_op.op = LambdaFactory::OP_CATEGORY::SUB; @@ -712,48 +874,90 @@ tensorflow::Status BinaryTensorOpWeight( // Maybe this part has to be moved into the block of rsqrt later // Check type consistency - auto dtype = TFAttrs(node_def).get("T"); - CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); - CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message // Check scale mode auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // Default to channel-wise + // default to element-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + // TODO(jie): maybe use a permuatation instead to support more cases; + bool permutation_flag = false; + if (weights.count() == 1) { VLOG(2) << "UNIFORM"; scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { - // No broadcasting on Batch dimension; - assert(dims_w.d[0] == 1); - - // Broadcasting on Channel dimension only allowed in kUNIFORM - assert(dims_w.d[1] == dims_t.d[0]); - assert(dims_w.nbDims == dims_t.nbDims); - - // Default is element; - for (int i = 2; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != dims_t.d[i - 1]) { - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - break; + // no broadcasting on Batch dimension; + VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims + << " tensor DIM: " << dims_t.nbDims; + if (dims_w.nbDims == dims_t.nbDims + 1) { + if (dims_w.d[0] == 1) { + for (int i = 1; i < dims_w.nbDims; i++) { + dims_w.d[i - 1] = dims_w.d[i]; + } + dims_w.nbDims--; + } else { + return tensorflow::errors::InvalidArgument( + "Binary op cannot operate on batch, " + node_def.name()); } } - if (scale_mode == nvinfer1::ScaleMode::kELEMENTWISE) { + + if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) { scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - for (int i = 2; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != 1) - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); + // default is element; + for (int i = 1; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != dims_t.d[i]) { + // if dimension does not match, switch back to channel; + VLOG(2) << "channel"; + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; + } + } + // if channel as candidate, validate it + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { + for (int i = 1; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != 1) + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); + } + } else { + VLOG(2) << "elementwise"; + } + } else if (dims_w.nbDims == 1 && + dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) { + // channel wise and broadcast required; + permutation_flag = true; + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + } else { + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); + } + } + + // transpose last dimension + std::vector permutation(dims_t.nbDims + 1); + if (permutation_flag) { + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { + // we swap the last dimension into channel for trt. + // because of tensorflow default broadcasting rules. + for (int i = 0; i < static_cast(permutation.size()); i++) { + permutation[i] = i; } + permutation[1] = dims_t.nbDims; + permutation[dims_t.nbDims] = 1; + tensor = ctx.TransposeTensor(const_cast(tensor), + permutation); + } else { + return tensorflow::errors::InvalidArgument( + "Transpose cannot be applied, " + node_def.name()); } } - // Prepare weights + // prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); TRT_ShapedWeights power_weights(weights.type_); @@ -779,88 +983,26 @@ tensorflow::Status BinaryTensorOpWeight( scale_weights, power_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); + // transpose back dimension + if (permutation_flag) { + output_tensor = ctx.TransposeTensor(output_tensor, permutation); + } // Pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, const tensorflow::NodeDef& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector* outputs) { - static const std::unordered_map ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - // {"max", nvinfer1::ElementWiseOperation::kMAX}, - // {"min", nvinfer1::ElementWiseOperation::kMIN}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // Get trt type & shape - TFAttrs attrs(node_def); - // Maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get("T"); - - // Check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast(tensor_l), - *const_cast(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // Pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} +enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; -tensorflow::Status ConvertPlaceholder( +tensorflow::Status ConvertConv2DHelper( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, - std::vector* outputs) { - VLOG(2) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented(", cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get("dtype"); - nvinfer1::Dims dims = attrs.get("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} + const std::vector& inputs, + std::vector* outputs, + int group // group ==0 specifies depthwise conv +) { + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); -tensorflow::Status ConvertConv2D(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - // TODO(jie): handle NHWC/NCHW transpose; - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - ReorderRSCKToKCRS(weights_rsck, &weights); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0]; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; TFAttrs attrs(node_def); int h_index = 2; @@ -874,11 +1016,31 @@ tensorflow::Status ConvertConv2D(Converter& ctx, // TODO(jie): transpose it } + // tensor after transpose (NCHW) + auto tensor_dim = tensor->getDimensions(); + + int num_groups = group; + if (num_groups == 0) // depthwise convolution + num_groups = tensor_dim.d[0]; + VLOG(2) << "groups count: " << num_groups; + + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0] * num_groups; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; + VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); + // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get>("strides"); + VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; + VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] + << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); - auto tensor_dim = tensor->getDimensions(); std::vector> padding; // TODO(jie): padding. if (attrs.get("padding") == "SAME") { @@ -919,10 +1081,11 @@ tensorflow::Status ConvertConv2D(Converter& ctx, layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); + layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", " << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -935,11 +1098,101 @@ tensorflow::Status ConvertConv2D(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConv2DHelper( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs, ConvolutionType type) { + switch (type) { + case ConvolutionType::DEFAULT: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); + case ConvolutionType::DEPTHWISE_CONV: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); + } + return tensorflow::errors::Unimplemented("unsupported convolution type at, " + + node_def.name()); +} + +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, const tensorflow::NodeDef& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector* outputs) { + static const std::unordered_map ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // get trt type & shape + TFAttrs attrs(node_def); + // maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get("T"); + + // check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); + + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast(tensor_l), + *const_cast(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertPlaceholder( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + VLOG(2) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get("dtype"); + nvinfer1::Dims dims = attrs.get("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertConv2D(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEFAULT); +} + +tensorflow::Status ConvertConv2DDepthwise( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEPTHWISE_CONV); +} + tensorflow::Status ConvertPool(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); int h_index = 2; @@ -957,6 +1210,8 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; + else if (node_def.op() == "AvgPool") + type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("Only supports Max pool"); @@ -1019,9 +1274,9 @@ tensorflow::Status ConvertPool(Converter& ctx, tensorflow::Status ConvertActivation( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( *const_cast(tensor), nvinfer1::ActivationType::kRELU); nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -1031,14 +1286,14 @@ tensorflow::Status ConvertActivation( tensorflow::Status ConvertScale(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) return tensorflow::errors::Unimplemented( "Only supports tensor op weight for now, at " + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); @@ -1055,16 +1310,33 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { VLOG(2) << "NCHW !!!!"; } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - weights, empty_weights, empty_weights); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - if (data_format == "NHWC") { - // TODO(jie): transpose it back! - output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1}); - } else { - VLOG(2) << "NCHW !!!!"; + auto dims = tensor->getDimensions(); + VLOG(2) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + VLOG(2) << "i: " << dims.d[i]; + } + dims = weights.shape_; + VLOG(2) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + VLOG(2) << "i: " << dims.d[i]; + } + + nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; + if (weights.shape_.d[0] == 1) { + mode = nvinfer1::ScaleMode::kUNIFORM; + } + + nvinfer1::IScaleLayer* layer = + ctx.network()->addScale(*const_cast(tensor), mode, + weights, empty_weights, empty_weights); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + if (data_format == "NHWC") { + // TODO(jie): transpose it back! + output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1}); + } else { + VLOG(2) << "NCHW !!!!"; } outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -1072,7 +1344,7 @@ tensorflow::Status ConvertScale(Converter& ctx, tensorflow::Status ConvertConst(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { const auto& weights_tensor = node_def.attr().at("value").tensor(); @@ -1091,20 +1363,144 @@ tensorflow::Status ConvertConst(Converter& ctx, VLOG(2) << "SCALAR!!!" << node_def.name(); nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { - VLOG(2) << "Dimensions: " << tensor.dims(); - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - GetTensorShape(tensor)); + VLOG(2) << "dimensions: " << tensor.dims(); + VLOG(2) << "size: " << weights_tensor.float_val_size(); + scalar_shape = GetTensorShape(tensor); + for (int i = 0; i < scalar_shape.nbDims; i++) + VLOG(2) << scalar_shape.d[i]; + if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size()) { + if (weights_tensor.float_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.float_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + } + } } else { VLOG(2) << "Dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - scalar_shape.d[0] = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - scalar_shape); + } + if (ctx.isFP16()) { + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + auto half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + half_tensor.device(defd) = + tensor.flat().template cast(); + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + } else { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + } + } else if (!weights_tensor.int_val().empty()) { + VLOG(2) << "int!!!" << node_def.name(); + nvinfer1::Dims scalar_shape; + if (tensor.dims() > 0) { + VLOG(2) << "dimensions: " << tensor.dims(); + scalar_shape = GetTensorShape(tensor); + if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size()) { + if (weights_tensor.int_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.int_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + } + } + } else { + VLOG(2) << "dimensions: " << tensor.dims(); + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { + scalar_shape.d[i] = 0; + scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; + } + } + if (ctx.isFP16()) { + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + TTypes::Flat half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + switch (dtype) { + case (tensorflow::DT_INT32): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT16): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_UINT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + default: + return tensorflow::errors::InvalidArgument( + "Datatype " + tensorflow::DataTypeString(dtype) + + " for FP16 conversion"); + break; + }; + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + } else { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); + len_data = std::max(len_data, len_tensor); + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val() + .end()); // make a local copy first to flatten + // doesn't have to be contigous + memcpy(dst, tensor_data.data(), len_tensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); @@ -1130,7 +1526,7 @@ tensorflow::Status ConvertConst(Converter& ctx, tensorflow::Status ConvertIdentity( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { outputs->push_back(inputs.at(0)); return tensorflow::Status::OK(); @@ -1138,7 +1534,7 @@ tensorflow::Status ConvertIdentity( tensorflow::Status ConvertBinary(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2) return tensorflow::errors::FailedPrecondition( @@ -1165,7 +1561,7 @@ tensorflow::Status ConvertBinary(Converter& ctx, tensorflow::Status ConvertUnary(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 1) return tensorflow::errors::FailedPrecondition( @@ -1183,7 +1579,7 @@ tensorflow::Status ConvertUnary(Converter& ctx, tensorflow::Status ConvertReduce(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1191,7 +1587,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1229,6 +1625,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); if (index_list_data[i] == 1) permuted_index = 1; + idx_set.emplace(index_list_data[i]); } @@ -1236,7 +1633,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nb_dims; i++) { - if (idx_set.count(i)) { + if (idx_set.count(i) == 0) { permuted_index = i; break; } @@ -1271,12 +1668,13 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.TransposeTensor( const_cast(output_tensor), permutation_order); } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } tensorflow::Status ConvertPad(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1284,7 +1682,7 @@ tensorflow::Status ConvertPad(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1371,19 +1769,287 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConcat(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + // not including the last input (axis) here + int input_size = static_cast(inputs.size()) - 1; + + if (!inputs.at(0).is_tensor()) + return tensorflow::errors::InvalidArgument( + "Concat in TRT support only Tensor input, at " + node_def.name()); + + // We are retrieving the axis + TRT_ShapedWeights axis = inputs.at(input_size).weights(); + + TFAttrs attrs(node_def); + // auto attr_size = attrs.at("N")->i(); + // auto data_type = attrs.get("T"); + auto index_type = attrs.get("Tidx"); + + // TODO(jie): handle data type + // Only expect to handle INT32 as index attributes for now + if (index_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented( + "Tidx supports only DT_INT32, at " + node_def.name()); + + int index = *(static_cast(const_cast(axis.GetValues()))); + + // TODO(jie): early termination with no-op (attr_size==1) + + auto dim = inputs.at(0).tensor()->getDimensions(); + // dimension check + if (index > dim.nbDims + 1) + return tensorflow::errors::InvalidArgument( + "Concatenate on axis out of dimension range, at " + node_def.name()); + + if (index == 0) + return tensorflow::errors::InvalidArgument( + "Concatenate on batch dimension not supported, at " + node_def.name()); + + // incase we need permutation; + std::vector permutation_order(dim.nbDims + 1); + + for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i; + + if (index != 1) { + permutation_order[1] = index - 1; + permutation_order[index - 1] = 1; + } + + std::vector inputs_vec; + // Shap chack (all input tensor should have same shape) + // starting from 0 since we are probably also doing transpose here; + for (int i = 0; i < input_size; i++) { + auto tensor_i = inputs.at(i).tensor(); + auto dim_i = tensor_i->getDimensions(); + if (dim_i.nbDims != dim.nbDims) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent dimensions, at " + + node_def.name()); + + for (int j = 0; j < dim.nbDims; j++) { + // check dimension consistency on non-concatenate axis + if (j != index - 1 && dim_i.d[j] != dim.d[j]) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent shape, at" + + node_def.name()); + } + + // TRT does concatenation only on channel! + if (index != 1) + tensor_i = ctx.TransposeTensor(const_cast(tensor_i), + permutation_order); + + inputs_vec.push_back(tensor_i); + } + + // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( + const_cast(inputs_vec.data()), + inputs_vec.size()); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + if (index != 1) { + output_tensor = ctx.TransposeTensor(output_tensor, permutation_order); + } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertFusedBatchNorm( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + TFAttrs attrs(node_def); + float epsilon = attrs.get("epsilon"); + auto data_format = attrs.get("data_format"); + if (data_format != "NCHW") { + return tensorflow::errors::Unimplemented( + "only data_format=NCHW is supported, at " + node_def.name()); + } + bool is_training = attrs.get("is_training"); + if (is_training) { + return tensorflow::errors::Unimplemented( + "only is_training=false is supported, at " + node_def.name()); + } + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + TRT_ShapedWeights scale_weights = inputs.at(1).weights(); + TRT_ShapedWeights offset_weights = inputs.at(2).weights(); + TRT_ShapedWeights mean_weights = inputs.at(3).weights(); + TRT_ShapedWeights variance_weights = inputs.at(4).weights(); + TRT_ShapedWeights dummy_power_weights(scale_weights.type_); + TRT_ShapedWeights combined_scale_weights = + ctx.get_temp_weights_like(scale_weights); + TRT_ShapedWeights combined_offset_weights = + ctx.get_temp_weights_like(offset_weights); + size_t nweight = scale_weights.count(); + if ((scale_weights.type_ == offset_weights.type_) && + (mean_weights.type_ == variance_weights.type_) && + (scale_weights.type_ == variance_weights.type_)) { + if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && + (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { + return tensorflow::errors::Unimplemented( + "only float32 or float16 weight data type is supported, for node " + + node_def.name() + " got " + + tensorflow::DataTypeString(scale_weights.type_)); + } + if (scale_weights.type_ == tensorflow::DT_FLOAT) { + for (size_t i = 0; i < nweight; ++i) { + float scale = (static_cast(scale_weights.GetValues()))[i]; + float offset = + (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = + (static_cast(variance_weights.GetValues()))[i]; + float& combined_scale_ref = const_cast( + static_cast(combined_scale_weights.GetValues()))[i]; + float& combined_offset_ref = const_cast( + static_cast(combined_offset_weights.GetValues()))[i]; + combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_offset_ref = offset - mean * combined_scale_ref; + } + } else { + const Eigen::half* scale_vals = + (static_cast(scale_weights.GetValues())); + const Eigen::half* off_vals = + (static_cast(offset_weights.GetValues())); + const Eigen::half* mean_vals = + (static_cast(mean_weights.GetValues())); + const Eigen::half* variance_vals = + (static_cast(variance_weights.GetValues())); + Eigen::half* comb_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + Eigen::half* comb_off_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + for (size_t i = 0; i < nweight; ++i) { + float scale(scale_vals[i]); + float offset(off_vals[i]); + float mean(mean_vals[i]); + float variance(variance_vals[i]); + float combined_scale_ref = scale / sqrtf(variance + epsilon); + comb_scale_vals[i] = Eigen::half(combined_scale_ref); + float combined_offset_ref = offset - mean * combined_scale_ref; + comb_off_vals[i] = Eigen::half(combined_offset_ref); + } + } + } + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + combined_offset_weights.GetWeightsForTRT(), + combined_scale_weights.GetWeightsForTRT(), + dummy_power_weights.GetWeightsForTRT()); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertMatMul(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + + // TODO(jie): transpose! + TFAttrs attrs(node_def); + + TRT_ShapedWeights weights_ck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); + ReorderCKtoKC(weights_ck, &weights); + TRT_ShapedWeights biases(weights.type_); + + int noutput = weights.shape_.d[0]; + + nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected( + *const_cast(tensor), noutput, weights, biases); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertReshape( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + if (inputs.size() != 2 || !inputs.at(0).is_tensor() || + !inputs.at(1).is_weights()) + return tensorflow::errors::InvalidArgument( + "Input expects tensor and weights, at" + node_def.name()); + + // implement tensor binaryOp weight [channel wise] for now; + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + auto dims = tensor->getDimensions(); + // restore implicit batch dimension + + TRT_ShapedWeights shape = inputs.at(1).weights(); + + TFAttrs attrs(node_def); + + auto padding_type = attrs.get("Tshape"); + + if (shape.shape_.nbDims != 1) + return tensorflow::errors::InvalidArgument( + "reshape new shape is not 1 dimensional, at " + node_def.name()); + + // Only expect to handle INT32 as attributes for now + if (padding_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented( + "reshape new shape supports only DT_INT32, at " + node_def.name()); + + auto shape_data = static_cast(const_cast(shape.GetValues())); + + if (shape_data[0] != -1) + return tensorflow::errors::InvalidArgument( + "reshape new shape first dimension is not -1, at " + node_def.name()); + + auto shape_num_dims = shape.shape_.d[0]; + VLOG(2) << "shape dimensions: " << shape_num_dims; + int volume_w = 1; + for (int i = 1; i < shape.shape_.d[0]; i++) volume_w *= shape_data[i]; + + int volume_t = 1; + for (int i = 0; i < dims.nbDims; i++) volume_t *= dims.d[i]; + + VLOG(2) << "volume: " << volume_t << " volume weights: " << volume_w; + if (volume_w != volume_t) + return tensorflow::errors::InvalidArgument( + "volume does not agree between tensor and new shape, at " + + node_def.name()); + + nvinfer1::IShuffleLayer* layer = + ctx.network()->addShuffle(*const_cast(tensor)); + + nvinfer1::Dims reshape_dims; + VLOG(2) << "new dimension: " << shape_num_dims - 1; + reshape_dims.nbDims = shape_num_dims - 1; + for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { + reshape_dims.d[i] = shape_data[i + 1]; + } + layer->setReshapeDimensions(reshape_dims); + VLOG(2) << "new dimension: " << shape_num_dims - 1; + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + auto dims_output = output_tensor->getDimensions(); + VLOG(2) << "output tensor dimension:" << dims_output.nbDims; + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + void Converter::register_op_converters() { // vgg_16 slim implementation op_registry_["Placeholder"] = ConvertPlaceholder; op_registry_["Conv2D"] = ConvertConv2D; + op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; op_registry_["Relu"] = ConvertActivation; op_registry_["MaxPool"] = ConvertPool; + op_registry_["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary op_registry_["BiasAdd"] = ConvertScale; op_registry_["Const"] = ConvertConst; - // op_registry_["MatMul"] = ConvertFullyConnected; // Not used in vgg // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed - // op_registry_["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -1393,26 +2059,364 @@ void Converter::register_op_converters() { op_registry_["Mean"] = ConvertReduce; op_registry_["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops + + op_registry_["ConcatV2"] = ConvertConcat; + op_registry_["MatMul"] = ConvertMatMul; + op_registry_["Reshape"] = ConvertReshape; + op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; + op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace +tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { + return tensorflow::errors::Unimplemented("Not implemented yet"); +} +tensorflow::Status ConvertCalibrationNodeToEngineNode( + tensorflow::Graph& graph, tensorflow::Node* c_node) { + const auto ndef = c_node->def(); + + TFAttrs attrs(ndef); + std::vector segment_nodes( + attrs.get>("segment_nodes")); + std::vector output_nodes( + attrs.get>("segment_output_names")); + std::vector input_names( + attrs.get>("input_names")); + string res_name = attrs.get("resource_name"); + VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; + string engine_name = "my_trt_op"; + { + const auto node_id = tensorflow::str_util::Split(res_name, "_"); + engine_name += node_id.back(); + } + std::map node_maps; + + for (auto n : graph.op_nodes()) { + node_maps.insert({n->name(), n}); + } + VLOG(1) << "Output Nodes:"; + std::vector out_types; + std::vector out_edges; + for (auto& i : output_nodes) { + auto node_port = tensorflow::str_util::Split(i, ":"); + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + auto out_node_name = node_port.at(0); + if (node_port.size() > 1) { + VLOG(1) << "Multi port output" << node_port.at(0) << " " + << node_port.at(1) << " size=" << node_port.size(); + } + auto node_it = node_maps.find(out_node_name); + if (node_it != node_maps.end()) { + tensorflow::Node* out_node = node_it->second; + int port = 0; + if (node_port.size() == 2) { + port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); + out_types.push_back(out_node->output_type(port)); + } else { + out_types.push_back(out_node->output_type(0)); + } + for (auto out_edge : out_node->out_edges()) { + if (out_edge->src_output() == port) { + out_edges.push_back(out_edge); + break; + } + } + } else { + LOG(WARNING) << " couldn't find output node " << out_node_name; + } + } + VLOG(1) << "Input Nodes:"; + for (auto& i : input_names) { + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + } + auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); + auto resmgr = trt_rm->getManager("TRTCalibOps"); + tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; + auto status = resmgr->Lookup(res_name, res_name, &calib_res); + if (!status.ok() || !calib_res->calibrator_) { + return tensorflow::errors::FailedPrecondition( + "You must run calibration" + " and inference conversion in the same proces"); + } + + calib_res->calibrator_->setDone(); + calib_res->thr_->join(); + delete calib_res->thr_; + if (!calib_res->engine_) { + LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " + "calibration graph?"; + } + auto weight_rmgr = trt_rm->getManager("WeightStore"); + TF_CHECK_OK(weight_rmgr->Delete( + res_name, res_name)); + auto engine_plan = calib_res->engine_->serialize(); + calib_res->engine_->destroy(); + calib_res->network_->destroy(); + calib_res->builder_->destroy(); + calib_res->thr_ = nullptr; + calib_res->engine_ = nullptr; + calib_res->builder_ = nullptr; + tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + std::vector income_edges; + for (const auto in_edge : c_node->in_edges()) { + auto src = in_edge->src(); + int dest_port = in_edge->dst_input(); + income_edges.emplace_back(src->name(), in_edge->src_output(), + c_node->input_type(dest_port)); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder.Input(input_list); + tensorflow::NodeDef engine_node; + const char* engine_plan_data = static_cast(engine_plan->data()); + string engine_plan_string(engine_plan_data, + engine_plan_data + engine_plan->size()); + status = op_builder.Attr("serialized_engine", engine_plan_string) + .Attr("input_nodes", input_names) + .Attr("output_nodes", output_nodes) + .Attr("OutT", out_types) + .Finalize(&engine_node); + if (!status.ok()) { + LOG(ERROR) << "Engine Node creation failed"; + return status; + } + auto trt_engine_node = graph.AddNode(engine_node, &status); + TF_CHECK_OK(status); + for (size_t i = 0; i < out_edges.size(); i++) { + VLOG(1) << "Connecting trt_engine_node output " << i << " with " + << out_edges.at(i)->dst()->name() << " port " + << out_edges.at(i)->dst_input(); + TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, + out_edges.at(i)->dst(), + out_edges.at(i)->dst_input())); + } + VLOG(1) << "Segment nodes:"; + for (auto& i : segment_nodes) { + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + auto it = node_maps.find(i); + if (it != node_maps.end()) { + graph.RemoveNode(it->second); + } + } + graph.RemoveNode(c_node); + return tensorflow::Status::OK(); +} + +tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { + // Visit nodes in reverse topological order and construct the TRT network. + + // Toposort + std::vector order_vec; + tensorflow::GetPostOrder(s.graph, &order_vec); + // Select just the subgraph + std::list order; + for (tensorflow::Node* node : order_vec) { + if (s.subgraph_node_ids.count(node->id())) { + order.push_front(node); // we want topological order to contstruct the + // network layer by layer + } + } + // topological order is needed to build TRT network + static int static_id = 0; + string subgraph_name_scope; + if (!order.empty()) { + subgraph_name_scope = order.front()->name(); + } + for (const tensorflow::Node* node : order) { + subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); + } + // TODO(sami,ben,jie): proper naming! + string calib_op_name = + StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); + static_id++; + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); + auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); + auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); + TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); + op_res->logger_ = new tensorflow::tensorrt::Logger(); + op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); + + if (!op_res->builder_) { + return tensorflow::errors::Internal( + "failed to create TensorRT builder object"); + } + + op_res->network_ = op_res->builder_->createNetwork(); + if (!op_res->network_) { + return tensorflow::errors::Internal( + "failed to create TensorRT network object"); + } + + // Build the network + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); + Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + std::vector input_names; + std::vector input_dtypes; + for (const std::pair& input : s.input_inds) { + VLOG(2) << "parsing input. Node id= " << input.first; + int node_id = input.first; + int output_idx = input.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + auto node_name = node->name(); + input_names.push_back(node_name); // insert original node name without port + // TODO(jie): alternative :) + if (!s.graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("failed to find input node: " + + node_name); + + auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) + return tensorflow::errors::Internal( + "accessing output index of: ", output_idx, ", at node: ", node_name, + "with output entry from shape_map: ", op_info_vec.size()); + + auto op_info = op_info_vec.at(output_idx); + + tensorflow::DataType tf_dtype = op_info.dtype(); + input_dtypes.push_back(tf_dtype); + + nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); + + VLOG(2) << "accessing output index of: " << output_idx + << ", at node: " << node_name + << "with output entry from shape_map: " << op_info_vec.size(); + + // TODO(ben,jie): update TRT input format/dimension + nvinfer1::DimsCHW input_dim_psuedo_chw; + for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + + for (int i = 1; i < op_info.shape().dim_size(); i++) { + VLOG(2) << "dimension: " << i + << " , size: " << op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); + } + + // TODO(ben,jie): proper way to restore input tensor name? + auto input_tensor_name = node_name; + if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); + + nvinfer1::ITensor* input_tensor = converter.network()->addInput( + input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); + + if (!input_tensor) + return tensorflow::errors::InvalidArgument( + "Failed to create Input layer"); + VLOG(2) << "input tensor name :" << input_tensor_name; + + if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) + return tensorflow::errors::AlreadyExists( + "output tensor already exists for op: " + input_tensor_name); + } + + VLOG(2) << "finished sorting"; + + for (const tensorflow::Node* node : order) { + const tensorflow::NodeDef& node_def = node->def(); + VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); + TF_RETURN_IF_ERROR(converter.convert_node(node_def)); + } + + VLOG(2) << "finished conversion"; + + // Gather output metadata + std::vector output_names; + std::vector output_dtypes; + int trt_engine_op_output_idx = 0; + for (const std::pair& output : s.output_inds) { + int node_id = output.first; + int output_idx = output.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + string op_name = node->name(); + string tensor_name = op_name; + + s.output_edge_map->insert( + {trt_engine_op_output_idx == 0 + ? engine_name + : StrCat(engine_name, ":", trt_engine_op_output_idx), + {output_idx, tensor_name}}); + trt_engine_op_output_idx++; + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } + VLOG(1) << "output tensor name: " << tensor_name; + output_names.push_back(tensor_name); + auto tensor_or_weights = converter.get_tensor(tensor_name); + if (!tensor_or_weights.is_tensor()) { + return tensorflow::errors::InvalidArgument( + "Output node is weights not tensor"); + } + nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); + if (!tensor) { + return tensorflow::errors::NotFound("Output tensor not found: " + + tensor_name); + } + converter.network()->markOutput(*tensor); + tensorflow::DataType tf_dtype = node->output_type(output_idx); + output_dtypes.push_back(tf_dtype); + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); + tensor->setType(trt_dtype); + } + + VLOG(2) << "finished output"; + + // Build the engine + op_res->builder_->setMaxBatchSize(s.max_batch_size); + op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); + + // Build the TRT op + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); + std::vector income_edges; + for (size_t i = 0; i < input_names.size(); ++i) { + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) + auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( + input_names.at(i), output_idx, input_dtypes.at(i)); + VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) + << ":" << output_idx + << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); + income_edges.push_back(incoming_edge); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder.Input(input_list); + std::vector segment_names; + segment_names.reserve(s.subgraph_node_ids.size()); + for (int i : s.subgraph_node_ids) { + auto node = s.graph.FindNodeId(i); + segment_names.push_back(node->name()); + } + LOG(INFO) << "finished op preparation"; + + auto status = op_builder.Attr("segment_nodes", segment_names) + .Attr("input_names", input_names) + .Attr("segment_output_names", output_names) + .Attr("resource_name", calib_op_name) + .Finalize(s.trt_node); + + LOG(INFO) << status.ToString(); + LOG(INFO) << "finished op building"; + + return tensorflow::Status::OK(); +} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& input_inds, - const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::NodeDef* trt_node) { + tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector order_vec; - tensorflow::GetPostOrder(graph, &order_vec); + tensorflow::GetPostOrder(s.graph, &order_vec); // Select just the subgraph std::list order; for (tensorflow::Node* node : order_vec) { - if (subgraph_node_ids.count(node->id())) { + if (s.subgraph_node_ids.count(node->id())) { // We want topological order to contstruct the // network layer by layer order.push_front(node); @@ -1434,46 +2438,86 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "Failed to create TensorRT network object"); } + string subgraph_name_scope; + if (!order.empty()) { + subgraph_name_scope = order.front()->name(); + } + for (const tensorflow::Node* node : order) { + subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); + } + static int static_id = 0; + // TODO(sami,ben,jie): proper naming! + string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); + engine_name = StrCat(engine_name, static_id++); + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); + // Build the network - Converter converter(trt_network.get()); + Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); std::vector input_names; std::vector input_dtypes; - for (std::pair const& input : input_inds) { + for (const std::pair& input : s.input_inds) { + VLOG(2) << "parsing input!!!!!"; int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); auto node_name = node->name(); - input_names.push_back(node_name); // Insert original node name without port - // TODO(jie): alternative :) - if (!graph_properties.HasOutputProperties(node_name)) - return tensorflow::errors::Internal("Failed to find input node: " + - node_name); + // input_names should use the node name in the graph + // here it should be the input tensor name -> matching the binding + // insert original node name without port + auto tensor_name = node_name; + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } - auto op_info_vec = graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) - return tensorflow::errors::Internal( - "Accessing output index of: " + std::to_string(output_idx) + - ", at node: " + node_name + " with output entry from shape_map: " + - std::to_string(op_info_vec.size())); + VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name + << " idx: " << output_idx; - auto op_info = op_info_vec.at(output_idx); + auto shape_inference_node_name = node_name; + auto shape_inference_output_idx = output_idx; + // rewire the shape inference to original node in the graph + if (s.output_edge_map->count(tensor_name)) { + shape_inference_node_name = s.output_edge_map->at(tensor_name).second; + shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; + } + if (shape_inference_output_idx < 0) continue; + VLOG(2) << "shapeinference name: " << shape_inference_node_name + << " idx: " << shape_inference_output_idx; + + if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) + return tensorflow::errors::Internal("failed to find input node: " + + shape_inference_node_name); + + auto op_info_vec = + s.graph_properties.GetOutputProperties(shape_inference_node_name); + if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) + return tensorflow::errors::Internal( + "accessing output index of: ", shape_inference_output_idx, + ", at node: ", shape_inference_node_name, + " with output entry from shape_map: ", op_info_vec.size()); + auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "Accessing output index of: " << std::to_string(output_idx) + VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name - << " with output entry from shape_map: " - << std::to_string(op_info_vec.size()); - + << " with output entry from shape_map: " << op_info_vec.size(); // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + // TODO(jie): TRT 3.x only support 4 dimensional input tensor. + // update the code once TRT 4.0 comes out. + if (op_info.shape().dim_size() != 4) + return tensorflow::errors::Unimplemented("require 4 dimensional input"); + for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); @@ -1482,9 +2526,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) - input_tensor_name = node_name + ":" + std::to_string(output_idx); + if (output_idx != 0) { + input_tensor_name = StrCat(node_name, ":", output_idx); + } + input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -1511,14 +2557,22 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector output_names; std::vector output_dtypes; - for (std::pair const& output : output_inds) { + int trt_engine_op_output_idx = 0; + for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); string op_name = node->name(); string tensor_name = op_name; + + s.output_edge_map->insert( + {trt_engine_op_output_idx == 0 + ? engine_name + : StrCat(engine_name, ":", trt_engine_op_output_idx), + {output_idx, tensor_name}}); + trt_engine_op_output_idx++; if (output_idx != 0) - tensor_name = tensor_name + ":" + std::to_string(output_idx); + tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -1540,19 +2594,25 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << "Finished output"; - // TODO(jie): static_id is not thread safe. - static int static_id = 0; // Build the engine - trt_builder->setMaxBatchSize(max_batch_size); - trt_builder->setMaxWorkspaceSize(max_workspace_size_bytes); - VLOG(0) << "Starting build engine " << static_id; - // TODO(ben,jie): half2 and int8 mode support + trt_builder->setMaxBatchSize(s.max_batch_size); + trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); + VLOG(0) << "Max batch size= " << s.max_batch_size + << " max workspace size= " << s.max_workspace_size_bytes; + if (s.precision_mode == FP16MODE) { + trt_builder->setHalf2Mode(true); + VLOG(0) << "Using FP16 precision mode"; + } + LOG(INFO) << "starting build engine"; string engine_plan_string; { auto trt_engine = infer_object(trt_builder->buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; + if (trt_engine.get() == nullptr) { + return tensorflow::errors::Internal("Engine building failure"); + } auto engine_plan = infer_object(trt_engine->serialize()); VLOG(0) << "Serialized engine"; const char* engine_plan_data = @@ -1560,18 +2620,19 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - - VLOG(0) << "Finished engine"; + TF_RETURN_IF_ERROR(weight_rmgr->Delete( + engine_name, engine_name)); + LOG(INFO) << "finished engine " << engine_name; // Build the TRT op - // TODO(sami,ben,jie): proper naming! - tensorflow::NodeDefBuilder op_builder( - tensorflow::strings::StrCat("my_trt_op", static_id++), "TRTEngineOp"); + tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; + VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = input_inds.at(i).second; - // We wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) + VLOG(2) << "input edges: " << i << " " << input_names.at(i); + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); income_edges.push_back(incoming_edge); @@ -1586,7 +2647,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(trt_node); + .Finalize(s.trt_node); VLOG(0) << status.ToString() << " finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2e7fd19566..954a1e72f8 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -17,6 +17,8 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_ #include +#include +#include #include #include @@ -32,16 +34,49 @@ namespace tensorflow { namespace tensorrt { namespace convert { -tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& - input_inds, // {node_id, output_idx} - const std::vector>& - output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_prop, - tensorflow::NodeDef* trt_node); +const int FP32MODE = 0; +const int FP16MODE = 1; +const int INT8MODE = 2; +struct SubGraphParams { + SubGraphParams( + tensorflow::Graph& inp_graph, + const std::set& subgraph_node_id_numbers, + const std::vector>& input_indices, + const std::vector>& output_indices, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map>* output_edges, + tensorflow::NodeDef* constructed_trt_node, + int engine_precision_mode = FP32MODE) + : graph(inp_graph), + subgraph_node_ids(subgraph_node_id_numbers), + input_inds(input_indices), + output_inds(output_indices), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + trt_node(constructed_trt_node), + precision_mode(engine_precision_mode) {} + + tensorflow::Graph& graph; + const std::set& subgraph_node_ids; + const std::vector>& input_inds; // {node_id, output_idx} + const std::vector>& output_inds; // {node_id, output_idx} + size_t max_batch_size; + size_t max_workspace_size_bytes; + const tensorflow::grappler::GraphProperties& graph_properties; + std::unordered_map>* output_edge_map; + tensorflow::NodeDef* trt_node; + const int precision_mode; +}; + +// TODO(sami): Replace references with const reference or pointers +tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); +tensorflow::Status InjectCalibrationNode(SubGraphParams& params); +tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, + tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index 1dcb87e768..aea44fd8a2 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -21,10 +21,11 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/stream_executor.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -113,7 +114,13 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ctx->set_output(i, t); } VLOG(2) << "Filled map for sending"; - calib_res->calibrator_->setBatch(input_data); + // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files + const cudaStream_t* stream = CHECK_NOTNULL( + reinterpret_cast(ctx->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + calib_res->calibrator_->setBatch(input_data, *stream); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 8efdf63ebe..b32371b642 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,8 +24,12 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -namespace tensorrt { static ::tensorflow::tensorrt::Logger logger; +namespace gpu = ::perftools::gputools; +using IRuntime = nvinfer1::IRuntime; +using Dims = nvinfer1::Dims; + +namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine @@ -40,10 +44,21 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); + // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same + // gpu where the input/output is also located. + int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; + cudaSetDevice(gpu_id); + int device; + cudaGetDevice(&device); + if (gpu_id != device) LOG(FATAL) << "set device failed!"; + + // TODO(samikama) runtime should be taken from a resourcemanager as well. + // Only engine should be in the op and context and runtime should be taken + // from resourcemanager + + IRuntime* infer = nvinfer1::createInferRuntime(logger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); - trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); // Runtime is safe to delete after engine creation infer->destroy(); @@ -55,7 +70,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { size_t binding_index; int num_batch = 0; - bool valid = true; for (int i = 0; i < context->num_inputs(); i++) { // Grab the input tensor binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); @@ -64,8 +78,12 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { num_batch = input_shape.dim_size(0); + if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { + LOG(FATAL) << "input tensor batch larger than max_batch_size: " + << trt_engine_ptr_->getMaxBatchSize(); + } } else if (num_batch != input_shape.dim_size(0)) { - valid = false; + LOG(FATAL) << "input data inconsistent batch size"; break; } switch (trt_engine_ptr_->getBindingDataType(binding_index)) { @@ -81,9 +99,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } - // Might want a different way to inform the user of batch size inconsistency - if (!valid) LOG(WARNING) << "input data inconsistent batch size"; - for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. // Create an output tensor @@ -126,9 +141,11 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - // execution handled by TF since we are getting stream from TF. - // it is safe for CPU pointer array (buffers) to go out of scope after enqueue - trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr); + // TODO(jie): trt enqueue does not return error + auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], + *stream, nullptr); + VLOG(2) << "enqueue returns: " << ret; + // sync should be done by TF. } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index 7add8cb8b3..dda0dc9e71 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << msg; + VLOG(2) << name_ << " " << msg; break; } case Severity::kWARNING: { - LOG(WARNING) << msg; + LOG(WARNING) << name_ << " " << msg; break; } case Severity::kERROR: { - LOG(ERROR) << msg; + LOG(ERROR) << name_ << " " << msg; break; } case Severity::kINTERNAL_ERROR: { - LOG(FATAL) << msg; + LOG(FATAL) << name_ << " " << msg; break; } // This is useless for now. But would catch it in future if enum changes. It diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index d71f66b933..7f3544f8cf 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -27,9 +27,11 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { - private: + public: + Logger(string name = "DefaultLogger") : name_(name){}; void log(nvinfer1::ILogger::Severity severity, const char* msg) override; + private: string name_; }; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 7e050a768c..0b2321b5fc 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -20,5 +20,6 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.tensorrt.python.ops import trt_engine_op +from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 9454862f85..666220d78c 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -20,11 +20,17 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six +from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl +from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.grappler import tf_optimizer +from tensorflow.python.util import compat +# pylint: enable=unused-import,line-too-long # TODO(skama): get outputs from session when implemented as c++ @@ -32,22 +38,33 @@ from tensorflow.python.framework import ops def create_inference_graph(input_graph_def, outputs, max_batch_size=1, - max_workspace_size_bytes=2 << 20): + max_workspace_size_bytes=2 << 20, + precision_mode="FP32", + minimum_segment_size=3): """Python wrapper for the TRT transormation. - Args: input_graph_def: GraphDef object containing a model to be transformed. - outputs: List of tensors or node names for the model outputs. + outputs: list of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) + precision_mode: one of 'FP32', 'FP16' and 'INT8' + minimum_segment_size: the minimum number of nodes required for a subgraph to + be replaced by TRTEngineOp. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: + ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ + supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} + if precision_mode.upper() not in supported_precision_modes: + raise ValueError(("precision mode '{}' is not supported." + "It should be one of {}").format( + precision_mode, "{'FP32', 'FP16', 'INT8'}")) + mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -83,7 +100,7 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes) + max_workspace_size_bytes, mode, minimum_segment_size) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -101,3 +118,46 @@ def create_inference_graph(input_graph_def, output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string # Save some memory return output_graph_def + + +def calib_graph_to_infer_graph(calibration_graph_def): + """Convert an existing calibration graph to inference graph. + + Args: + calibration_graph_def: the calibration GraphDef object with calibration data + Returns: + New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. + Raises: + RuntimeError: if the returned status message is malformed. + """ + + def py2string(inp): + return inp + + def py3string(inp): + return inp.decode("utf-8") + + if _six.PY2: + to_string = py2string + else: + to_string = py3string + + graph_str = calibration_graph_def.SerializeToString() + out = calib_convert(graph_str) + status = to_string(out[0]) + output_graph_def_string = out[1] + del graph_str # Save some memory + if len(status) < 2: + raise _impl.UnknownError(None, None, status) + if status[:2] != "OK": + msg = status.split(";") + if len(msg) == 1: + raise RuntimeError("Status message is malformed {}".format(status)) + # pylint: disable=protected-access + raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), + int(msg[0])) + # pylint: enable=protected-access + output_graph_def = graph_pb2.GraphDef() + output_graph_def.ParseFromString(output_graph_def_string) + del output_graph_def_string # Save some memory + return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 3d5cc76c42..74df75902e 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -23,7 +23,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" +#include "cuda/include/cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { @@ -38,22 +38,18 @@ TRTInt8Calibrator::TRTInt8Calibrator( done_(false), dev_buffers_(dev_buffers), calib_running_(false), + batch_is_set_(false), engine_name_(engine_name) {} -bool TRTInt8Calibrator::setBatch( - const std::unordered_map& data) { - // TODO(aaroey): make sure that in future PR: - // 1. the mutex_lock is outside of the loop - // 2. wait() is used instead of wait_for() - // 3. done_ is to be protected by the mutex - // 4. the first batch is not missed - if (done_) return false; - while (calib_running_.load( - std::memory_order_acquire)) { // wait while calibration is running - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; +bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, + const cudaStream_t stream) { + tensorflow::mutex_lock lock(cond_mtx_); + while ((calib_running_ || batch_is_set_) && + !done_) { // wait while calibration is running + cond_.wait(lock); } + if (done_) return false; + CHECK(!calib_running_ && !batch_is_set_); VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { auto devptr = dev_buffers_.find(it.first); @@ -65,27 +61,32 @@ bool TRTInt8Calibrator::setBatch( // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - auto status = - cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); + // TODO(sami,aaroey): Need to figureout a way to ensure synchronization + // between stream, perhaps using a tensor? + auto status = cudaMemcpyAsync(d.first, it.second, d.second, + cudaMemcpyDeviceToDevice, stream); if (status != cudaSuccess) { LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first << "' failed with " << status; } } - calib_running_.store(true, std::memory_order_release); // release builder + + // TODO(Sami, aaorey): Find an alternative way! + cudaStreamSynchronize( + stream); // we have to wait for the stream before returning! + batch_is_set_ = true; cond_.notify_all(); return true; } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - calib_running_.store(false, std::memory_order_release); // wait for new batch + tensorflow::mutex_lock lock(cond_mtx_); + calib_running_ = false; cond_.notify_all(); - while (!calib_running_.load( - std::memory_order_acquire)) { // wait until new batch arrives - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; + while ((!batch_is_set_ && !done_)) { // wait until new batch arrives + cond_.wait(lock); + } if (done_) { return false; @@ -100,6 +101,8 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, bindings[i] = it->second.first; } + batch_is_set_ = false; + calib_running_ = true; return true; } @@ -107,6 +110,12 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } +void TRTInt8Calibrator::setDone() { + tensorflow::mutex_lock lock(cond_mtx_); + done_ = true; + cond_.notify_all(); +} + void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, std::size_t length) {} TRTInt8Calibrator::~TRTInt8Calibrator() { @@ -115,5 +124,6 @@ TRTInt8Calibrator::~TRTInt8Calibrator() { } // namespace tensorrt } // namespace tensorflow + #endif #endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index 8830f7efe7..d77aa2c5ab 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,7 +24,10 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT + +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" + namespace tensorflow { namespace tensorrt { // This class provides a 1 element queue to match TFs push model to @@ -39,8 +42,9 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map& data); - void setDone() { done_ = true; } + bool setBatch(const std::unordered_map& data, + const cudaStream_t stream); + void setDone(); const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); @@ -55,11 +59,14 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { const std::unordered_map> dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with // buffer names - std::atomic_bool calib_running_; + bool calib_running_; + bool batch_is_set_; string engine_name_; }; + } // namespace tensorrt } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ + #endif #endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index c78f6f2224..0b661bd536 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -60,6 +60,7 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): + """Run given graphdef once.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -74,15 +75,65 @@ def run_graph(gdef, dumm_inp): return val +# Use real data that is representatitive of the inference dataset +# for calibration. For this test script it is random data. +def run_calibration(gdef, dumm_inp): + """Run given calibration graph multiple times.""" + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with csess.Session( + config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: + # run over real calibration data here, we are mimicking a calibration set of + # 30 different batches. Use as much calibration data as you want + for _ in range(30): + val = sess.run(out, {inp: dumm_inp}) + return val + + if "__main__" in __name__: inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) - gdef = get_simple_graph_def() + orig_graph = get_simple_graph_def() # use a frozen graph for inference # Get optimized graph - trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0]) - o1 = run_graph(gdef, dummy_input) + trt_graph = trt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + o1 = run_graph(orig_graph, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) assert np.array_equal(o1, o2) assert np.array_equal(o3, o2) # sanity check + fp16_graph = trt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + int8_calib_gdef = trt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + o4 = run_graph(fp16_graph, dummy_input) + _ = run_calibration(int8_calib_gdef, dummy_input) + int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) + o5 = run_graph(int8_graph, dummy_input) + assert np.allclose(o1, o4) + assert np.allclose(o1, o5) print("Pass") diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index d679945d56..46480e99a1 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -64,13 +64,17 @@ PyObject* pair_helper(std::pair* in) { %ignoreall %unignore tensorflow; %unignore trt_convert; +%unignore calib_convert; %{ + std::pair trt_convert( string graph_def_string, // The serialized GraphDef string. std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes + size_t max_workspace_size_bytes, + int precision_mode, + int minimum_segment_size // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -90,16 +94,64 @@ std::pair trt_convert( return std::pair{out_status, ""}; } + if(precision_mode < 0 || precision_mode > 2){ + out_status = "InvalidArgument;Invalid precision_mode"; + return std::pair{out_status, ""}; + } if (!output_names.size()) { out_status = "InvalidArgument;Size of the output_names vector is 0"; return std::pair{out_status, ""}; - // return ""; } tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph); + &outGraph, precision_mode, minimum_segment_size); + if (!conversion_status.ok()) { + auto retCode = (int)conversion_status.code(); + char buff[2000]; + snprintf(buff, 2000, "%d;%s", retCode, + conversion_status.error_message().c_str()); + out_status = buff; + return std::pair{out_status, ""}; + } + string result; + if (!outGraph.SerializeToString(&result)) { + out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; + return std::pair{out_status, ""}; + } + out_status = "OK;All good!"; + return std::pair{out_status, result}; +#else + // Returns FAILED_PRECONDITION. + return std::pair{"9;TensorRT is not enabled!", ""}; +#endif // GOOGLE_CUDA && GOOGLE_TENSORRT +} + +std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& + // unfortunately we can't use TF_Status here since it + // is in c/c_api and brings in a lot of other libraries + // which in turn declare ops. These ops are included + // statically in our library and cause an abort when + // module is loaded due to double registration + // until Tensorflow properly exposes these headers + // we have to work around this by returning a string + // and converting it to exception on python side. + //,TF_Status* out_status) { +) { +#if GOOGLE_CUDA && GOOGLE_TENSORRT + string out_status; + + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(graph_def_string)) { + out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; + return std::pair{out_status, ""}; + } + + tensorflow::GraphDef outGraph; + tensorflow::Status conversion_status = + tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, + &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -122,10 +174,13 @@ std::pair trt_convert( } %} +std::pair calib_convert(string graph_def_string); + std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes); + size_t max_workspace_size_bytes, + int precision_mode, int minimum_segment_size); %unignoreall diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index bb86ecb220..70bf67c779 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,7 +25,10 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = ["notsan"], # b/67513579 + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", # b/67513579 + ], deps = [ ":predict", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index ed3ed4c0e1..64f5cd8357 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -156,9 +156,7 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = [ - "no_pip_gpu", # b/63391119 - ], + tags = ["no_pip_gpu"], # b/63391119 deps = [ ":feature_keys", ":head", @@ -427,6 +425,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 + "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index c86d06e923..07df7bc9a5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,6 +40,7 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index ed930e44e8..f9d433a45b 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -225,6 +225,7 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index f4283cd9ed..dca01d26f4 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,9 +42,10 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid makeing every user_ops aware of windows, re-write - # the file extension from .so to .dll. - path = re.sub(r'\.so$', '.dll', path) + # To avoid making every user_ops aware of windows, re-write + # the file extension from .so to .dll if .so file doesn't exist. + if not os.path.exists(path): + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 352e183104..5c9fd2f406 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3327,6 +3327,10 @@ tf_cc_test( size = "small", srcs = ["common_runtime/function_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), + tags = [ + "manual", + "no_oss", + ], deps = [ ":core", ":core_cpu", diff --git a/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt new file mode 100644 index 0000000000..9fabe7863e --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt @@ -0,0 +1,18 @@ +op { + graph_op_name: "SlideDataset" + in_arg { + name: "window_size" + description: <contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index beaf0adbc5..cfe23d1ffe 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -474,11 +474,11 @@ class GraphDatasetBase : public DatasetBase { } // Key for storing the Dataset graph in the serialized format. - static const char kDatasetGraphKey[]; + TF_EXPORT static const char kDatasetGraphKey[]; // Key for storing the output node of the Dataset graph in the serialized // format. - static const char kDatasetGraphOutputNodeKey[]; + TF_EXPORT static const char kDatasetGraphOutputNodeKey[]; private: Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 02038c5d77..1507b6eae2 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2492,10 +2492,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { mkl_op_registry::GetMklOpName(csinfo_.identity), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn), - CopyAttrsLRN, AlwaysRewrite}); + CopyAttrsLRN, LrnRewrite}); rinfo_.push_back({csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), - CopyAttrsLRN, AlwaysRewrite}); + CopyAttrsLRN, LrnRewrite}); rinfo_.push_back({csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool), CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); @@ -2865,6 +2865,28 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // path is taken, i.e., eigen node is rewritten by MKl DNN node. + static bool LrnRewrite(const Node* n) { + CHECK_NOTNULL(n); + + int depth_radius; + CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); + + // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN + // and use eigen node instead + if (depth_radius == 2) { + return true; + } + VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" + << "case is not optimized by Intel MKL, thus using Eigen op" + << "for LRN " ; + + return false; + } + static bool AddNRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -3528,11 +3550,13 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, string data_format; string padding; std::vector strides; + std::vector dilations; bool use_cudnn_on_gpu; // Get all attributes from old node. TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3541,6 +3565,7 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); + nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); @@ -3778,12 +3803,14 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, DataType T_pred, T_succ; string padding; std::vector strides; + std::vector dilations; string data_format_pred, data_format_succ; bool use_cudnn_on_gnu; TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(pred->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 244653504d..8f13c4a702 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -408,7 +409,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { frame_children_[frame_ids[0]].insert(frame_ids[1]); frame_parent_[frame_ids.back()] = frame_ids[frame_ids.size() - 2]; } - if (!frame_ids.empty()) { + if (frame_ids.size() >= 1) { frame_children_.insert(std::make_pair(frame_ids.back(), empty_set_)); if (node->op() == "LoopCond") { if (loop_cond_.count(frame_ids.back())) { @@ -427,7 +428,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { } for (auto it = frame_children_.begin(); it != frame_children_.end(); ++it) { - if (it->second.empty()) { + if (it->second.size() == 0) { worklist.push_back(it->first); } } @@ -440,7 +441,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { if (parent_it != frame_parent_.end()) { int parent_id = parent_it->second; frame_children_[parent_id].erase(frame_id); - if (frame_children_[parent_id].empty()) { + if (frame_children_[parent_id].size() == 0) { worklist.push_back(parent_id); } } @@ -464,7 +465,6 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { TF_RETURN_IF_ERROR(RemoveStackOps(item.graph, optimized_graph)); - optimized_graph_ = optimized_graph; // Set up helper data structures. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 48d5955ad1..2e39f25fc1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5157,7 +5157,6 @@ tf_kernel_library( ], hdrs = [ "meta_support.h", - "quantization_utils.h", "reference_gemm.h", ], deps = [ @@ -5232,6 +5231,7 @@ tf_cc_test( name = "quantization_utils_test", srcs = ["quantization_utils_test.cc"], deps = [ + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5294,6 +5294,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5355,6 +5356,7 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5377,6 +5379,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5441,6 +5444,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5461,6 +5465,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5500,6 +5505,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5556,6 +5562,7 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5578,6 +5585,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5614,6 +5622,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5635,6 +5644,7 @@ tf_cc_test( deps = [ ":batch_norm_op", ":ops_testutil", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 484d4f88d6..01754ec21a 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -113,6 +113,19 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "slide_dataset_op", + srcs = ["slide_dataset_op.cc"], + deps = [ + ":dataset", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:batch_util", + ], +) + tf_kernel_library( name = "padded_batch_dataset_op", srcs = ["padded_batch_dataset_op.cc"], @@ -538,6 +551,7 @@ tf_kernel_library( ":scan_dataset_op", ":shuffle_dataset_op", ":skip_dataset_op", + ":slide_dataset_op", ":sparse_tensor_slice_dataset_op", ":sql_dataset_ops", ":stats_aggregator_ops", diff --git a/tensorflow/core/kernels/data/slide_dataset_op.cc b/tensorflow/core/kernels/data/slide_dataset_op.cc new file mode 100644 index 0000000000..4f3537b691 --- /dev/null +++ b/tensorflow/core/kernels/data/slide_dataset_op.cc @@ -0,0 +1,252 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/batch_util.h" +#include "tensorflow/core/kernels/data/dataset.h" + +namespace tensorflow { + +namespace { + +// See documentation in ../ops/dataset_ops.cc for a high-level +// description of the following op. + +class SlideDatasetOp : public UnaryDatasetOpKernel { + public: + explicit SlideDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + int64 window_size = 0; + int64 stride = 1; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "window_size", &window_size)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "stride", &stride)); + OP_REQUIRES( + ctx, window_size > 0, + errors::InvalidArgument("Window size must be greater than zero.")); + OP_REQUIRES( + ctx, stride > 0 && stride < window_size, + errors::InvalidArgument("Stride must be in [1, window_size).")); + + *output = new Dataset(ctx, window_size, stride, input); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, int64 window_size, int64 stride, const DatasetBase* input) + : GraphDatasetBase(ctx), window_size_(window_size), stride_(stride), input_(input) { + input_->Ref(); + + const auto& input_shapes = input_->output_shapes(); + output_shapes_.reserve(input_shapes.size()); + for (const auto& input_shape : input_shapes) { + output_shapes_.emplace_back( + PartialTensorShape({-1}).Concatenate(input_shape)); + } + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + Iterator::Params{this, strings::StrCat(prefix, "::Slide")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() override { + return strings::StrCat("SlideDatasetOp(", window_size_, ", ", stride_, ")::Dataset"); + } + + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + Node* window_size = nullptr; + Node* stride = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size)); + TF_RETURN_IF_ERROR(b->AddScalar(stride_, &stride)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, window_size, stride}, output)); + return Status::OK(); + } + + private: + + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + const int64 window_size = dataset()->window_size_; + const int64 stride = dataset()->stride_; + std::vector> batch_elements; + { + mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + batch_elements.reserve(window_size); + const bool first_call = cache_.empty(); + if (first_call) { + cache_.reserve(window_size); + } else { + // Reuse cache in the previous iteration. + cache_.swap(batch_elements); + } + // Fill up with new elements. + *end_of_sequence = false; + for (size_t i = batch_elements.size(); i < window_size && !*end_of_sequence; + ++i) { + std::vector batch_element_tuple; + TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, + end_of_sequence)); + if (!*end_of_sequence) { + batch_elements.push_back(std::move(batch_element_tuple)); + } else { + input_impl_.reset(); + } + } + // Drop the final smaller blocks. + if (batch_elements.size() < window_size) { + DCHECK(*end_of_sequence); + return Status::OK(); + } + // Cache the data used for the next iteration. + for (size_t i = stride; i < window_size; ++i) { + cache_.emplace_back(batch_elements[i]); + } + } + + // Construct output tensors. + // Those codes below are copied from batch_dataset_op.cc. + const size_t num_tuple_components = batch_elements[0].size(); + const int64 num_batch_elements = batch_elements.size(); + for (size_t component_index = 0; component_index < num_tuple_components; + ++component_index) { + const Tensor& first_element = batch_elements[0][component_index]; + TensorShape batch_component_shape({num_batch_elements}); + batch_component_shape.AppendShape(first_element.shape()); + Tensor batch_component(cpu_allocator(), first_element.dtype(), + batch_component_shape); + // Build the output tuple component by copying one slice + // from each input element in the batch. + for (size_t i = 0; i < num_batch_elements; ++i) { + if (batch_elements[i][component_index].shape() != + first_element.shape()) { + return errors::InvalidArgument( + "Cannot batch tensors with different shapes in component ", + component_index, ". First element had shape ", + first_element.shape().DebugString(), " and element ", i, + " had shape ", + batch_elements[i][component_index].shape().DebugString(), + "."); + } + TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice( + std::move(batch_elements[i][component_index]), &batch_component, + i)); + } + out_tensors->emplace_back(std::move(batch_component)); + } + *end_of_sequence = false; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } + // Save cache. + TF_RETURN_IF_ERROR( + writer->WriteScalar(strings::StrCat("cache_size"), cache_.size())); + for (int64 i = 0; i < cache_.size(); i++) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat("cache[", i, "]_size"), cache_[i].size())); + for (int64 j = 0; j < cache_[i].size(); j++) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + strings::StrCat("cache[", i, "][", j, "]"), cache_[i][j])); + } + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } + // Restore cache. + int64 cache_size; + TF_RETURN_IF_ERROR( + reader->ReadScalar(strings::StrCat("cache_size"), &cache_size)); + cache_.resize(cache_size); + for (int64 i = 0; i < cache_size; i++) { + int64 vector_size; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat("cache[", i, "]_size"), &vector_size)); + cache_[i].resize(vector_size); + for (int64 j = 0; j < vector_size; j++) { + TF_RETURN_IF_ERROR(reader->ReadTensor( + strings::StrCat("cache[", i, "][", j, "]"), &cache_[i][j])); + } + } + return Status::OK(); + } + + private: + mutex mu_; + std::vector> cache_ GUARDED_BY(mu_); + std::unique_ptr input_impl_ GUARDED_BY(mu_); + }; + + const int64 window_size_; + const int64 stride_; + const DatasetBase* const input_; + std::vector output_shapes_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("SlideDataset").Device(DEVICE_CPU), + SlideDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index 39aa3e9eb0..b74a09e2cb 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); +REGISTER_KERNEL_BUILDER( + Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), + DepthToSpaceOp); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 184c703599..0656081177 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -238,6 +238,12 @@ struct DepthToSpaceOpFunctor { template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::DepthToSpaceOpFunctor; +template struct functor::DepthToSpaceOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 108d59db2c..7688305019 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -45,6 +45,7 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:reduction_ops", "//tensorflow/core/kernels:remote_fused_graph_execute_utils", diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 1401bc65a4..e0706568b1 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -444,6 +444,7 @@ class MklConv2DCustomBackpropFilterOp ~MklConv2DCustomBackpropFilterOp() {} private: + const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -492,7 +493,9 @@ class MklConv2DCustomBackpropFilterOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, const memory::dims& strides, + Tensor** output_tensor, + const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -518,31 +521,32 @@ class MklConv2DCustomBackpropFilterOp bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x); } - // Create convolution backward weights primitive. - auto bwd_desc = - (biasEnabled && (bias_grad != nullptr)) - ? convolution_backward_weights::desc( - convolution_direct, input->GetOpMemDesc(), - output->GetOpMemDesc(), bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, - padding) - : convolution_backward_weights::desc( - convolution_direct, input->GetOpMemDesc(), - output->GetOpMemDesc(), outbackprop->GetOpMemDesc(), strides, - padding_l, padding_r, padding); - - auto bwd_pd = convolution_backward_weights::primitive_desc( - bwd_desc, cpu_engine, conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, bwd_output_format, - output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); - if (biasEnabled && (bias_grad != nullptr)) { + // Create convolution backward weights with bias primitive. + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding) : + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, + bwd_output_format, output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); + // Allocate bias_grad tensor TensorShape bias_grad_shape({depth}); Tensor* bias_grad_tensor = nullptr; @@ -553,11 +557,32 @@ class MklConv2DCustomBackpropFilterOp memory::desc({bias_grad_dims}, MklDnnType(), memory::format::x); bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor); bias_grad->SetUsrMemDataHandle(bias_grad_tensor); - } - if (biasEnabled && (bias_grad != nullptr)) { - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad); + PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, + bias_grad); } else { + // Create convolution backward weights primitive. + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding) : + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, + bwd_output_format, output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output); } } diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index eeed009531..d203c04934 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -369,6 +369,7 @@ class MklConv2DCustomBackpropInputOp private: const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0, kInputIndex_OutBackProp = 2; + const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -419,7 +420,9 @@ class MklConv2DCustomBackpropInputOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, const memory::dims& strides, + Tensor** output_tensor, + const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -432,9 +435,16 @@ class MklConv2DCustomBackpropInputOp CHECK_NOTNULL(output_tensor); // Create convolution backward data primitive. - auto bwd_desc = convolution_backward_data::desc( - convolution_direct, output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, padding); + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_data::desc(convolution_direct, + output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding): + convolution_backward_data::desc(convolution_direct, + output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); auto bwd_pd = convolution_backward_data::primitive_desc( bwd_desc, cpu_engine, conv_fwd_pd); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 1440da8f82..f0818eb96d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -493,6 +493,7 @@ class MklConv2DOp : public OpKernel { ~MklConv2DOp() {} explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); @@ -509,6 +510,20 @@ class MklConv2DOp : public OpKernel { errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); } void Compute(OpKernelContext* context) override { @@ -530,17 +545,19 @@ class MklConv2DOp : public OpKernel { MklDnnData filter(&cpu_engine); MklDnnData output(&cpu_engine); - memory::dims src_dims, filter_dims, padding_l, padding_r, strides; + memory::dims src_dims, filter_dims, padding_l, padding_r, + dilations, strides; memory::dims output_dims_tf_order, output_dims_mkl_order; // Get shapes of input tensors in MKL-DNN order - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, + dilations_); auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); conv_utl.GetConvFwdSizesInMklOrder( src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, - &output_dims_tf_order, &output_dims_mkl_order, &padding_l, - &padding_r); + &dilations, &output_dims_tf_order, &output_dims_mkl_order, + &padding_l, &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -553,6 +570,7 @@ class MklConv2DOp : public OpKernel { // Need semantics for Null MKL tensor MklDnnShape output_mkl_shape; output_mkl_shape.SetMklTensor(false); + AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor, src_tf_shape, output_mkl_shape); @@ -596,55 +614,79 @@ class MklConv2DOp : public OpKernel { filter.SetOpMemDesc(filter_dims, memory::format::any); output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - // If bias is enabled, then do the same steps as above for bias. + // MKLDNN dilation starts from 0. + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + if (biasEnabled) { - MklDnnData bias(&cpu_engine); - memory::dims bias_size; - conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); - const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); - bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); - bias.SetOpMemDesc(bias_size, memory::format::any); - - // Create convolution primitive with Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, - filter_out_tensor); + // Create convolution primitive with Bias. + MklDnnData bias(&cpu_engine); + memory::dims bias_size; + conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); + const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); + bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); + bias.SetOpMemDesc(bias_size, memory::format::any); + + // Create convolution primitive with Bias. + // Use MKLDNN dilated convolution in case of dilated rate (>0). + auto conv_desc = (dilations[kDilationH] > 0 || + dilations[kDilationW] > 0) ? + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), + output.GetOpMemDesc(), strides, dilations, + padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)): + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), + output.GetOpMemDesc(), strides, + padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, + output_dims_mkl_order, tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, + filter_out_tensor); } else { - // Create convolution primitive without Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output, - filter_out_tensor); + // Create convolution primitive without Bias. + // Use MKLDNN dilated convolution in case of dilated rate (>0). + auto conv_desc = (dilations[kDilationH] > 0 || + dilations[kDilationW] > 0) ? + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)): + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, + nullptr, &output, filter_out_tensor); } } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -658,10 +700,12 @@ class MklConv2DOp : public OpKernel { private: std::vector strides_; + std::vector dilations_; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; + const int kDilationH = 0, kDilationW = 1; // Allocate output tensor. void AllocateOutputTensor( diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 9dd88221a8..7ca10db895 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -58,13 +58,16 @@ class MklDnnConvUtil { protected: OpKernelContext* context_; // We don't own this. std::vector strides_; + std::vector dilations_; Padding padding_; TensorFormat data_format_; public: MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, - Padding pad, TensorFormat fm) - : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} + Padding pad, TensorFormat fm, + const std::vector& dilations) : + context_(context), strides_(strides), padding_(pad), + data_format_(fm), dilations_(dilations) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -78,6 +81,16 @@ class MklDnnConvUtil { *strides = {stride_rows, stride_cols}; } + // Calculate Convolution dilations + virtual inline void GetDilationsInMklOrder(memory::dims *dilations) { + // For now we take the dilation from the second and third dimensions only + // (we do not support dilation on the batch or depth dimension). + CHECK_NOTNULL(dilations); + int dilations_rows = GetTensorDim(dilations_, data_format_, 'H'); + int dilations_cols = GetTensorDim(dilations_, data_format_, 'W'); + *dilations = {dilations_rows, dilations_cols}; + } + // Calculate Convolution input size in MKL-DNN order. MKL-DNN // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's @@ -213,7 +226,8 @@ class MklDnnConvUtil { // TODO(nhasabni): Add similar function for input and filter in MklShape. virtual inline void GetOutputAndPadSizeInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, - const memory::dims& strides, memory::dims* output_dims_tf_order, + const memory::dims& strides, const memory::dims& dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -232,6 +246,8 @@ class MklDnnConvUtil { // Stride is vector of 2 elements: {s_r, s_c} int stride_rows = strides[0]; int stride_cols = strides[1]; + int dilation_rows = dilations[0]; + int dilation_cols = dilations[1]; // Output batch is same as input batch. int out_batch = GetTensorDim(input_shape, data_format_, 'N'); @@ -241,11 +257,13 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_rows, filter_rows, stride_rows, padding_, + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2(input_rows, filter_rows, + dilation_rows, stride_rows, padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_cols, filter_cols, stride_cols, padding_, + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2(input_cols, filter_cols, + dilation_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) @@ -271,7 +289,8 @@ class MklDnnConvUtil { // // Function does not return anything, but sets error in context status. inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, const memory::dims& strides, + size_t src_index, size_t filter_index, + const memory::dims& strides, const memory::dims& dilations, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -286,9 +305,9 @@ class MklDnnConvUtil { errors::InvalidArgument("input must be 4-dimensional", input_tf_shape.DebugString())); - GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, strides, - output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, + strides, dilations, output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -300,12 +319,14 @@ class MklDnnConvUtil { inline void GetConvFwdSizesInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, memory::dims* input_dims, memory::dims* filter_dims, - memory::dims* strides, memory::dims* output_dims_tf_order, + memory::dims* strides, memory::dims *dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); + CHECK_NOTNULL(dilations); CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -316,7 +337,9 @@ class MklDnnConvUtil { GetFilterSizeInMklOrder(input_shape, filter_shape, filter_dims); if (!context_->status().ok()) return; GetStridesInMklOrder(strides); - GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, + GetDilationsInMklOrder(dilations); + GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, + *strides, *dilations, output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; @@ -344,7 +367,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); - + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -406,15 +443,16 @@ class MklConv2DBackpropCommonOp : public OpKernel { // By default, all dims are in MKL order. Only dims in TF order // are those with prefix tf_order. memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims; - memory::dims padding_l, padding_r, strides, fwd_output_dims; + memory::dims padding_l, padding_r, dilations, strides, fwd_output_dims; memory::dims fwd_output_dims_tf_order; // Get forward convolution parameters. - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, + dilations_); conv_utl.GetConvFwdSizesInMklOrder( input_tf_shape, filter_tf_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward @@ -437,10 +475,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { memory::format::hwio); // Tensorflow Output of Conv2D is in data_format order. auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), tf_fmt); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_input_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + + const int kDilationH = 0, kDilationW = 1; + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + auto fwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0)? + convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_input_md, + fwd_filter_md, fwd_out_md, + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)) : + convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_input_md, + fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Create memory for user data. Describe how the inputs and outputs of @@ -485,8 +534,9 @@ class MklConv2DBackpropCommonOp : public OpKernel { // Operator-specific call to create and execute primitive. CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter, - &outbackprop, &output, &output_tensor, strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_), + &outbackprop, &output, &output_tensor, + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_), bwd_output_dims, bwd_output_format); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -535,20 +585,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { virtual memory::format GetOutputFormat(const memory::format data_format) = 0; /// Create and execute the primitive storing output in the output_tensor. - virtual void CreatePrimitive( - OpKernelContext* context, const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, - MklDnnData* output, Tensor** output_tensor, - const memory::dims& strides, const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) = 0; + virtual void CreatePrimitive(OpKernelContext* context, + const engine& cpu_engine, + const convolution_forward::primitive_desc& conv_fwd_pd, + MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, + MklDnnData* output, Tensor** output_tensor, const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, + const memory::dims& padding_r, padding_kind padding, + const memory::dims& bwd_output_dims, + memory::format bwd_output_format) = 0; // Get the data_format {NCHW, NHWC} TensorFormat GetTFDataFormat() { return data_format_; } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index e9a2376b54..d91f7107c5 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -442,12 +442,11 @@ class MklInputConversionOp : public OpKernel { auto input_tf_md = mkl_output_mkl_shape.GetTfLayout(); tf_input.SetUsrMem(input_tf_md, tf_tensor); - // Create reorder between tensorflow layout and Mkl layout. + // Create reorder between tensorflow layout and Mkl layout if necessary std::vector net; - CHECK_EQ(tf_input.CheckReorderToOpMem( + tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), - tensor_out, &net), - true); + tensor_out, &net); stream(stream::kind::eager).submit(net).wait(); // -- The tensor in MKL format passes through -- diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 267f4f8d12..0a0f69522f 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -392,7 +392,7 @@ class MklReluOpBase : public OpKernel { Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); + Compute_Scalar(context); // scalar case doesn't use in-place operation return; } @@ -437,11 +437,15 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst, - dnn_shape_dst); + + // Allocate output and MklDnnShape tensors separately for possible + // in-place operation + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {src_index}, dst_index, tf_shape_dst, &dst_tensor)); + AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); // Destination memory descriptor is same as source memory descriptor. - auto dst_md = src_md; + auto &dst_md = src_md; dst.SetUsrMem(dst_md, dst_tensor); // execute net @@ -492,7 +496,7 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); + Compute_Scalar(context); // scalar case doesn't use in-place operation return; } @@ -603,8 +607,13 @@ class MklReluGradOpBase : public OpKernel { // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, - tf_shape_diff_src, dnn_shape_diff_src); + + // Allocate diff_src and MklDnnShape tensors separately for possible + // in-place operation + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {diff_dst_index}, diff_src_index, tf_shape_diff_src, + &diff_src_tensor)); + AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); // diff_src memory descriptor is same as memory descriptor for both // inputs. diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 4abfbfb1a6..d0703d7576 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -13,8 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ -#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ + + +// This file requires the following include because it uses CudaAtomicMax: +// #include "tensorflow/core/util/cuda_kernel_helper.h" + +// Unfortunately we can't add the #include, since it breaks compilation for +// non-GPU targets. This only breaks in clang, because it's more strict for +// template code and CudaAtomicMax is used in template context. // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -130,4 +138,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index 23df1c35e5..e59adfc6ac 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); +REGISTER_KERNEL_BUILDER( + Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), + SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index db05ca1ed2..f38459724a 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -234,6 +234,12 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 5bd79778a6..0b006fa2b4 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -55,6 +55,4 @@ StringPiece StringPiece::substr(size_t pos, size_t n) const { return StringPiece(data_ + pos, n); } -const StringPiece::size_type StringPiece::npos = size_type(-1); - } // namespace tensorflow diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 910e4d9e2a..2d00f717dc 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -65,7 +65,7 @@ class StringPiece { iterator begin() const { return data_; } iterator end() const { return data_ + size_; } - static const size_t npos; + static const size_t npos = size_type(-1); // Return the ith byte in the referenced data. // REQUIRES: n < size() diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 254fdf115d..6de850bb20 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -205,7 +205,9 @@ Status RecordReader::SkipNBytes(uint64 offset) { if (options_.buffer_size > 0) { TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); } +#if !defined(IS_SLIM_BUILD) } +#endif return Status::OK(); } // namespace io diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 62dd2efb79..26278e0328 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LIB_IO_RECORD_READER_H_ #define TENSORFLOW_LIB_IO_RECORD_READER_H_ -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" -#if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/inputstream_interface.h" +#if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/zlib_compression_options.h" #include "tensorflow/core/lib/io/zlib_inputstream.h" #endif // IS_SLIM_BUILD diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index bdbbf6d7c3..9a4b616e5d 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -265,6 +265,16 @@ REGISTER_OP("BatchDataset") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::ScalarShape); +// TODO(mrry): move SlideDataset to contrib in the future. +REGISTER_OP("SlideDataset") + .Input("input_dataset: variant") + .Input("window_size: int64") + .Input("stride: int64") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("PaddedBatchDataset") .Input("input_dataset: variant") .Input("batch_size: int64") diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 910fbaca9e..d6a0f38033 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1498,6 +1498,7 @@ REGISTER_OP("_MklConv2D") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( MKL version of Conv2D operator. Uses MKL DNN APIs to perform 2D convolution. @@ -1516,6 +1517,7 @@ REGISTER_OP("__MklDummyConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node does not perform anything. It is just created as an intermediate output of @@ -1541,6 +1543,7 @@ REGISTER_OP("_MklConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2D and BiasAdd operator. Uses MKL DNN APIs to perform 2D convolution and add Bias to the output of convolution. @@ -1563,6 +1566,7 @@ REGISTER_OP("_MklConv2DBackpropFilter") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -1589,6 +1593,7 @@ REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1633,6 +1638,7 @@ REGISTER_OP("_MklConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1668,6 +1674,7 @@ REGISTER_OP("_MklConv2DWithBiasBackpropBias") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2DBackpropBias. Uses MKL DNN APIs to compute the gradients of convolution with respect to the bias. @@ -1690,6 +1697,7 @@ REGISTER_OP("_MklConv2DBackpropInput") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h index 8f7bff1bb0..eebbeaeba6 100644 --- a/tensorflow/core/platform/tracing.h +++ b/tensorflow/core/platform/tracing.h @@ -103,7 +103,7 @@ class Tracing { friend class ScopedAnnotation; friend class TraceMe; - static std::atomic tracing_engine_; + TF_EXPORT static std::atomic tracing_engine_; static Tracing::Engine* engine() { return tracing_engine_.load(std::memory_order_acquire); } diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index b6b3722caa..682e46e0fc 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -382,7 +382,8 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( Status WindowsFileSystem::FileExists(const string& fname) { constexpr int kOk = 0; - if (_access(TranslateName(fname).c_str(), kOk) == 0) { + std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); + if (_waccess(ws_translated_fname.c_str(), kOk) == 0) { return Status::OK(); } return errors::NotFound(fname, " not found"); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 7405e01e14..22f2c02b78 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 9f6fe91b14..6d0458e678 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -51,6 +51,8 @@ Europe: TensorFlow provides multiple communication paths. To pick the right path, please read the following list carefully: + * For new release announcements and security updates, subscribe to + [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). * To ask or answer technical questions about TensorFlow, use [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). For example, ask or search Stack Overflow about a particular error message @@ -65,5 +67,5 @@ please read the following list carefully: on GitHub. For example, use the issue tracker to request a new operation in TensorFlow. * To report vulnerabilities, please follow our - [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/SECURITY.md). + [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 818798555a..0481c97885 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 4c6dfa8daf..8f89898c92 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 527884863e..0ee9c849e1 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0-rc1 + 1.6.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0-rc1 + 1.6.0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0-rc1 + 1.6.0 org.tensorflow libtensorflow_jni_gpu - 1.6.0-rc1 + 1.6.0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
d +
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index e3e115d9f6..3e8744bf9d 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -41,7 +41,8 @@ must be installed on your system: [NVIDIA's documentation](https://developer.nvidia.com/cudnn). Ensure that you create the `CUDA_HOME` environment variable as described in the NVIDIA documentation. - * GPU card with CUDA Compute Capability 3.0 or higher. See + * GPU card with CUDA Compute Capability 3.0 or higher for building + from source and 3.5 or higher for our binaries. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. @@ -188,7 +189,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +294,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -356,24 +357,23 @@ where: to 6006. * TensorFlowCPUImage is required. It identifies the Docker container. Specify one of the following values: - * gcr.io/tensorflow/tensorflow, which is the TensorFlow CPU binary image. - * gcr.io/tensorflow/tensorflow:latest-devel, which is the latest + * tensorflow/tensorflow, which is the TensorFlow CPU binary image. + * tensorflow/tensorflow:latest-devel, which is the latest TensorFlow CPU Binary image plus source code. - * gcr.io/tensorflow/tensorflow:version, which is the + * tensorflow/tensorflow:version, which is the specified version (for example, 1.1.0rc1) of TensorFlow CPU binary image. - * gcr.io/tensorflow/tensorflow:version-devel, which is + * tensorflow/tensorflow:version-devel, which is the specified version (for example, 1.1.0rc1) of the TensorFlow GPU binary image plus source code. - gcr.io is the Google Container Registry. Note that some - TensorFlow images are also available at + TensorFlow images are available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches the latest TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ docker run -it gcr.io/tensorflow/tensorflow bash
+$ docker run -it tensorflow/tensorflow bash
 
The following command also launches the latest TensorFlow CPU binary image in a @@ -381,7 +381,7 @@ Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
+$ docker run -it -p 8888:8888 tensorflow/tensorflow
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -405,14 +405,14 @@ where: hostPort and containerPort to `8888`. * TensorFlowGPUImage specifies the Docker container. You must specify one of the following values: - * gcr.io/tensorflow/tensorflow:latest-gpu, which is the latest + * tensorflow/tensorflow:latest-gpu, which is the latest TensorFlow GPU binary image. - * gcr.io/tensorflow/tensorflow:latest-devel-gpu, which is + * tensorflow/tensorflow:latest-devel-gpu, which is the latest TensorFlow GPU Binary image plus source code. - * gcr.io/tensorflow/tensorflow:version-gpu, which is the + * tensorflow/tensorflow:version-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image. - * gcr.io/tensorflow/tensorflow:version-devel-gpu, which is + * tensorflow/tensorflow:version-devel-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image plus source code. @@ -421,7 +421,7 @@ following command launches the latest TensorFlow GPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ nvidia-docker run -it gcr.io/tensorflow/tensorflow:latest-gpu bash
+$ nvidia-docker run -it tensorflow/tensorflow:latest-gpu bash
 
The following command also launches the latest TensorFlow GPU binary image @@ -429,13 +429,13 @@ in a Docker container. In this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
+$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu
 
The following command installs an older TensorFlow version (0.12.1):
-$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:0.12.1-gpu
+$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:0.12.1-gpu
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -505,7 +505,7 @@ If you installed through Docker, start a Docker container from which you can run bash. For example:
-$ docker run -it gcr.io/tensorflow/tensorflow bash
+$ docker run -it tensorflow/tensorflow bash
 
@@ -647,14 +647,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -666,14 +666,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -685,14 +685,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
 
@@ -704,14 +704,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 623ca6bb79..205db8e6bd 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -292,24 +292,23 @@ where: to 6006. * TensorFlowImage is required. It identifies the Docker container. You must specify one of the following values: - * gcr.io/tensorflow/tensorflow: TensorFlow binary image. - * gcr.io/tensorflow/tensorflow:latest-devel: TensorFlow + * tensorflow/tensorflow: TensorFlow binary image. + * tensorflow/tensorflow:latest-devel: TensorFlow Binary image plus source code. -gcr.io is the Google Container Registry. Note that some -TensorFlow images are also available at +The TensorFlow images are available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches a TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell: -
$ docker run -it gcr.io/tensorflow/tensorflow bash
+
$ docker run -it tensorflow/tensorflow bash
The following command also launches a TensorFlow CPU binary image in a Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook: -
$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
+
$ docker run -it -p 8888:8888 tensorflow/tensorflow
Docker will download the TensorFlow binary image the first time you launch it. @@ -351,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl @@ -376,7 +375,7 @@ do the following: If you installed through Docker, start a Docker container that runs bash. For example: -
$ docker run -it gcr.io/tensorflow/tensorflow bash
+
$ docker run -it tensorflow/tensorflow bash
@@ -524,7 +523,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
 
@@ -532,5 +531,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index acf0af0d9d..c09c9c2c0c 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0rc1 on Linux: +for TensorFlow 1.6.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index f0a30ee394..2413bc9cfb 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -17,7 +17,7 @@ You must choose one of the following types of TensorFlow to install: NVIDIA® GPU, you must install this version. Note that this version of TensorFlow is typically much easier to install (typically, in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. + installing this version first. Prebuilt binaries will use AVX instructions. * **TensorFlow with GPU support**. TensorFlow programs typically run significantly faster on a GPU than on a CPU. Therefore, if your system has a NVIDIA® GPU meeting the prerequisites shown below @@ -41,7 +41,8 @@ installed on your system: Note that cuDNN is typically installed in a different location from the other CUDA DLLs. Ensure that you add the directory where you installed the cuDNN DLL to your `%PATH%` environment variable. - * GPU card with CUDA Compute Capability 3.0 or higher. See + * GPU card with CUDA Compute Capability 3.0 or higher for building + from source and 3.5 or higher for our binaries. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. diff --git a/tensorflow/docs_src/performance/xla/jit.md b/tensorflow/docs_src/performance/xla/jit.md index d4dc3e57c8..d9a979ccbd 100644 --- a/tensorflow/docs_src/performance/xla/jit.md +++ b/tensorflow/docs_src/performance/xla/jit.md @@ -157,7 +157,7 @@ to fuse Ops is visible by starting at `hlo_graph_0.dot` and viewing each diagram in succession. To Render the .dot file into a png, install -[GraphViz](http://www.graphviz.org/Download..php) and run: +[GraphViz](https://www.graphviz.org/download/) and run: ```shell dot -Tpng hlo_graph_80.dot -o hlo_graph_80.png diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 5fb1c2da88..d1399814ee 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -459,7 +459,7 @@ accuracy_score = classifier.evaluate(x=test_set.data, [debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_tflearn_iris.py), -based on {$tflearn$tf-learn's iris tutorial}, contains a full example of how to +based on [tf-learn's iris tutorial](https://www.tensorflow.org/versions/r1.2/get_started/tflearn), contains a full example of how to use the tfdbg with `Estimator`s. To run this example, do: ```none @@ -753,6 +753,7 @@ There are three possible workarounds or solutions: # For LocalCLIDebugHook hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")] ``` + Make sure that the directory pointed to by dump_root is empty or nonexistent. tfdbg cleans up the dump directories before exiting. * Reduce the batch size used during the runs. diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 1548d43877..392ac6f7f1 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -159,7 +159,7 @@ available. These operations allow you to build sophisticated @{$reading_data$input pipelines}, at the cost of making the TensorFlow computation somewhat more complicated. See the how-to documentation for -@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using `QueueRunner` objects to drive queues and readers} +@{$reading_data#creating_threads_to_prefetch_using_queuerunner_objects$using `QueueRunner` objects to drive queues and readers} for more information on how to use them. ## Variables @@ -272,7 +272,7 @@ Prefer predefined TensorFlow operations such as @{tf.decode_raw}, If your data is not easily parsable with the built-in TensorFlow operations, consider converting it, offline, to a format that is easily parsable, such -as ${tf.python_io.TFRecordWriter$`TFRecord`} format. +as @{tf.python_io.TFRecordWriter$`TFRecord`} format. The more efficient method to customize the parsing behavior is to @{$adding_an_op$add a new op written in C++} that parses your diff --git a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md index 79280d246a..fadfa03e78 100644 --- a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md +++ b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md @@ -83,7 +83,7 @@ data than you need, though. Instead, consider running the merged summary op every `n` steps. The code example below is a modification of the -@{$layers$simple MNIST tutorial}, +[simple MNIST tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py), in which we have added some summary ops, and run them every ten steps. If you run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able to visualize statistics, such as how the weights or accuracy varied during diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index d74d7f3181..a9c2cb3e33 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -129,10 +129,9 @@ my_tpu_estimator = tf.contrib.tpu.TPUEstimator( Typically the `FLAGS` would be set by command line arguments. To switch from training locally to training on a cloud TPU you would need to: - 1) Set `FLAGS.use_tpu` to `True` - 1) Set `FLAGS.tpu_name` so the - `tf.contrib.cluster_resolver.TPUClusterResolver` can find it - 1) Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). +* Set `FLAGS.use_tpu` to `True` +* Set `FLAGS.tpu_name` so the `tf.contrib.cluster_resolver.TPUClusterResolver` can find it +* Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). ## Optimizer diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index ee03f440c9..9b17d0d4d5 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -193,7 +193,7 @@ to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training-and-evaluating-the-cnn-mnist-classifier). +Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). ### Input Layer @@ -446,7 +446,7 @@ tf.nn.softmax(logits, name="softmax_tensor") > Note: We use the `name` argument to explicitly name this operation > `softmax_tensor`, so we can reference it later. (We'll set up logging for the -> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook). +> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook)). We compile our predictions in a dict, and return an `EstimatorSpec` object: @@ -534,9 +534,8 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining -> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in -> tf.estimator"} tutorial. +> functions, see @{$get_started/custom_estimators#defining_the_training_op_for_the_model$"Defining the training op for the model"} +> in the @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} tutorial. ### Add evaluation metrics @@ -625,7 +624,8 @@ operation earlier when we generated the probabilities in `cnn_model_fn`. > Note: If you don't explicitly assign a name to an operation via the `name` > argument, TensorFlow will assign a default name. A couple easy ways to > discover the names applied to operations are to visualize your graph on -> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger (tfdbg)}. +> @{$graph_viz$TensorBoard}) or to enable the +> @{$programmers_guide/debugger$TensorFlow Debugger (tfdbg)}. Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index e22536adb6..7584a76ba5 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -109,7 +109,8 @@ This download will take a while and download a bit more than 23GB of data. To convert the `ndjson` files to @{$python/python_io#tfrecords_format_details$TFRecord} files containing -${tf.train.Example} protos run the following command. +[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) +protos run the following command. ```shell python create_dataset.py --ndjson_path rnn_tutorial_data \ diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index 005dc020f9..27ce75a30d 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -74,8 +74,8 @@ Here's a list of columns available in the Census Income dataset: | relationship | Categorical | Wife, Own-child, Husband, | : : : Not-in-family, Other-relative, : : : : Unmarried. : -| race | Categorical | White, Asian-Pac-Islander, | -: : : Amer-Indian-Eskimo, Other, Black. : +| race | Categorical | Amer-Indian-Eskimo, Asian-Pac- | +: : : Islander, Black, White, Other. : | gender | Categorical | Female, Male. | | capital_gain | Continuous | Capital gains recorded. | | capital_loss | Continuous | Capital Losses recorded. | @@ -247,7 +247,7 @@ hours_per_week = tf.feature_column.numeric_column('hours_per_week') ### Making Continuous Features Categorical through Bucketization Sometimes the relationship between a continuous feature and the label is not -linear. As an hypothetical example, a person's income may grow with age in the +linear. As a hypothetical example, a person's income may grow with age in the early stage of one's career, then the growth may slow at some point, and finally the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can @@ -361,6 +361,16 @@ The first line of the final output should be something like `accuracy: 0.83557522`, which means the accuracy is 83.6%. Feel free to try more features and transformations and see if you can do even better! +After the model is evaluated, we can use the model to predict whether an individual has an annual income of over +50,000 dollars given an individual's information input. +```python + pred_iter = model.predict(input_fn=lambda: input_fn(FLAGS.test_data, 1, False, 1)) + for pred in pred_iter: + print(pred['classes']) +``` + +The model prediction output would be like `[b'1']` or `[b'0']` which means whether corresponding individual has an annual income of over 50,000 dollars or not. + If you'd like to see a working end-to-end example, you can download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) and set the `model_type` flag to `wide`. diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index bb75431a1f..5c47ce6b67 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -40,6 +40,7 @@ + @@ -49,6 +50,7 @@ + @@ -58,6 +60,7 @@ + @@ -67,6 +70,7 @@ + diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java index 8bd4abb154..429138abe5 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java @@ -351,6 +351,10 @@ public abstract class CameraActivity extends Activity protected void setFragment() { String cameraId = chooseCamera(); + if (cameraId == null) { + Toast.makeText(this, "No Camera Detected", Toast.LENGTH_SHORT).show(); + finish(); + } Fragment fragment; if (useCamera2API) { @@ -416,7 +420,8 @@ public abstract class CameraActivity extends Activity @Override public boolean onKeyDown(final int keyCode, final KeyEvent event) { - if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP) { + if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP + || keyCode == KeyEvent.KEYCODE_BUTTON_L1 || keyCode == KeyEvent.KEYCODE_DPAD_CENTER) { debug = !debug; requestRender(); onSetDebug(debug); diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java index 6a66ec3927..33ec65e9f7 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java @@ -16,8 +16,10 @@ package org.tensorflow.demo; +import android.app.UiModeManager; import android.content.Context; import android.content.res.AssetManager; +import android.content.res.Configuration; import android.graphics.Bitmap; import android.graphics.Bitmap.Config; import android.graphics.BitmapFactory; @@ -31,9 +33,11 @@ import android.graphics.Typeface; import android.media.ImageReader.OnImageAvailableListener; import android.os.Bundle; import android.os.SystemClock; +import android.util.DisplayMetrics; import android.util.Size; import android.util.TypedValue; import android.view.Display; +import android.view.KeyEvent; import android.view.MotionEvent; import android.view.View; import android.view.View.OnClickListener; @@ -43,6 +47,7 @@ import android.widget.BaseAdapter; import android.widget.Button; import android.widget.GridView; import android.widget.ImageView; +import android.widget.RelativeLayout; import android.widget.Toast; import java.io.IOException; import java.io.InputStream; @@ -381,6 +386,27 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL grid = (GridView) findViewById(R.id.grid_layout); grid.setAdapter(adapter); grid.setOnTouchListener(gridTouchAdapter); + + // Change UI on Android TV + UiModeManager uiModeManager = (UiModeManager) getSystemService(UI_MODE_SERVICE); + if (uiModeManager.getCurrentModeType() == Configuration.UI_MODE_TYPE_TELEVISION) { + DisplayMetrics displayMetrics = new DisplayMetrics(); + getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); + int styleSelectorHeight = displayMetrics.heightPixels; + int styleSelectorWidth = displayMetrics.widthPixels - styleSelectorHeight; + RelativeLayout.LayoutParams layoutParams = new RelativeLayout.LayoutParams(styleSelectorWidth, ViewGroup.LayoutParams.MATCH_PARENT); + + // Calculate number of style in a row, so all the style can show up without scrolling + int numOfStylePerRow = 3; + while (styleSelectorWidth / numOfStylePerRow * Math.ceil((float) (adapter.getCount() - 2) / numOfStylePerRow) > styleSelectorHeight) { + numOfStylePerRow++; + } + grid.setNumColumns(numOfStylePerRow); + layoutParams.addRule(RelativeLayout.ALIGN_PARENT_RIGHT); + grid.setLayoutParams(layoutParams); + adapter.buttons.clear(); + } + setStyle(adapter.items[0], 1.0f); } @@ -602,4 +628,38 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL borderedText.drawLines(canvas, 10, canvas.getHeight() - 10, lines); } + + @Override + public boolean onKeyDown(int keyCode, KeyEvent event) { + int moveOffset = 0; + switch (keyCode) { + case KeyEvent.KEYCODE_DPAD_LEFT: + moveOffset = -1; + break; + case KeyEvent.KEYCODE_DPAD_RIGHT: + moveOffset = 1; + break; + case KeyEvent.KEYCODE_DPAD_UP: + moveOffset = -1 * grid.getNumColumns(); + break; + case KeyEvent.KEYCODE_DPAD_DOWN: + moveOffset = grid.getNumColumns(); + break; + default: + return super.onKeyDown(keyCode, event); + } + + // get the highest selected style + int currentSelect = 0; + float highestValue = 0; + for (int i = 0; i < adapter.getCount(); i++) { + if (adapter.items[i].value > highestValue) { + currentSelect = i; + highestValue = adapter.items[i].value; + } + } + setStyle(adapter.items[(currentSelect + moveOffset + adapter.getCount()) % adapter.getCount()], 1); + + return true; + } } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a206685af6..3cbeb34c54 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -28,6 +28,7 @@ load("//tensorflow:tensorflow.bzl", "py_tests") load("//tensorflow:tensorflow.bzl", "tf_py_build_info_genrule") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") @@ -106,20 +107,19 @@ py_library( ":training", ":util", ":weights_broadcast_ops", - "//third_party/py/numpy", + "//tensorflow/contrib:contrib_py", "//tensorflow/core:protos_all_py", "//tensorflow/python/data", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras", - "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", + "//tensorflow/python/ops/losses", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", - ] + if_not_windows([ - "//tensorflow/contrib:contrib_py", - ]), + "//third_party/py/numpy", + ], ) tf_py_build_info_genrule() @@ -947,7 +947,6 @@ py_test( srcs = ["framework/contrib_test.py"], main = "framework/contrib_test.py", srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", @@ -1312,7 +1311,6 @@ py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -1654,7 +1652,6 @@ py_test( size = "small", srcs = ["ops/clip_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":client_testlib", ":clip_ops", @@ -2721,7 +2718,6 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, - tags = ["no_windows"], ) cuda_py_test( @@ -3305,6 +3301,65 @@ tf_py_wrap_cc( tf_additional_gdr_deps()), ) +# ** Targets for Windows build (start) ** +# We need the following targets to expose symbols from _pywrap_tensorflow.dll + +# Build a cc_binary from tf_custom_op_library_additional_deps_impl, +# it contains all object code from its dependencies. +cc_binary( + name = "tf_custom_op_library_additional_deps.so", + linkshared = 1, + linkstatic = 1, + deps = tf_custom_op_library_additional_deps_impl(), +) + +# Get a DEF file generated by parsing all object files +# of tf_custom_op_library_additional_deps.so +filegroup( + name = "pywrap_tensorflow_def_file", + srcs = [":tf_custom_op_library_additional_deps.so"], + output_group = "def_file", +) + +# Filter the DEF file to reduce the number of symbols to 64K or less. +# Note that we also write the name of the pyd file into DEF file so that +# the dynamic libraries of custom ops can find it at runtime. +genrule( + name = "pywrap_tensorflow_filtered_def_file", + srcs = [":pywrap_tensorflow_def_file"], + outs = ["pywrap_tensorflow_filtered_def_file.def"], + cmd = select({ + "//tensorflow:windows": """ + $(location @local_config_def_file_filter//:def_file_filter) \\ + --input $(location :pywrap_tensorflow_def_file) \\ + --output $@ \\ + --target _pywrap_tensorflow_internal.pyd + """, + "//conditions:default": "touch $@", # Just a placeholder for Unix platforms + }), + tools = ["@local_config_def_file_filter//:def_file_filter"], +) + +# Get the import library of _pywrap_tensorflow_internal.dll +filegroup( + name = "pywrap_tensorflow_import_lib_file", + srcs = [":_pywrap_tensorflow_internal.so"], + output_group = "interface_library", +) + +# Create a cc_import rule for the import library of _pywrap_tensorflow_internal.dll +# so that custom ops' dynamic libraries can link against it. +cc_import( + name = "pywrap_tensorflow_import_lib", + interface_library = select({ + "//tensorflow:windows": ":pywrap_tensorflow_import_lib_file", + "//conditions:default": "not_exsiting_on_unix.lib", # Just a placeholder for Unix platforms + }), + system_provided = 1, +) + +# ** Targets for Windows build (end) ** + py_library( name = "lib", srcs = [ @@ -3677,7 +3732,6 @@ py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":client_testlib", ":lib", @@ -3958,7 +4012,11 @@ py_test( srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", tags = [ + "manual", + "no_cuda_on_cpu_tap", + "no_oss", "no_windows", + "notap", ], deps = [ ":client", @@ -3981,7 +4039,6 @@ py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":checkpoint_ops_gen", ":client", @@ -4022,10 +4079,7 @@ py_test( size = "medium", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", - "notsan", # b/67945581 - ], + tags = ["notsan"], # b/67945581 deps = [ ":array_ops", ":client_testlib", diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 512d292ee2..c60f692390 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -913,6 +913,7 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 9fcbd4ff77..5245a050a1 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -139,8 +139,8 @@ class Estimator(object): to configure Estimators from hyper parameter tuning. * `config`: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. - Allows updating things in your model_fn based on configuration - such as `num_ps_replicas`, or `model_dir`. + Allows updating things in your `model_fn` based on + configuration such as `num_ps_replicas`, or `model_dir`. * Returns: `EstimatorSpec` @@ -301,11 +301,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a + * A tuple (features, labels): Where `features` is a `Tensor` or a + dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both - features and labels are consumed by `model_fn`. They should satisfy - the expectation of `model_fn` from inputs. + `features` and `labels` are consumed by `model_fn`. They should + satisfy the expectation of `model_fn` from inputs. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the training loop. @@ -381,11 +381,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a + * A tuple (features, labels): Where `features` is a `Tensor` or a + dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both - features and labels are consumed by `model_fn`. They should satisfy - the expectation of `model_fn` from inputs. + `features` and `labels` are consumed by `model_fn`. They should + satisfy the expectation of `model_fn` from inputs. steps: Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. @@ -457,17 +457,17 @@ class Estimator(object): checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. yield_single_examples: If False, yield the whole batch as returned by the - model_fn instead of decomposing the batch into individual elements. This - is useful if model_fn return some tensor with first dimension not - equal to the batch size + `model_fn` instead of decomposing the batch into individual elements. + This is useful if `model_fn` returns some tensors whose first dimension + is not equal to the batch size. Yields: Evaluated values of `predictions` tensors. Raises: - ValueError: Could not find a trained model in model_dir. - ValueError: if batch length of predictions are not same and - yield_single_examples is True. + ValueError: Could not find a trained model in `model_dir`. + ValueError: If batch length of predictions is not the same and + `yield_single_examples` is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 2cc3331a15..e38b765da5 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -128,9 +128,16 @@ class TrainSpec( """Creates a validated `TrainSpec` instance. Args: - input_fn: Training input function returning a tuple of: - features - `Tensor` or dictionary of string feature name to `Tensor`. - labels - `Tensor` or dictionary of `Tensor` with labels. + input_fn: A function that provides input data for training as minibatches. + See @{$get_started/premade_estimators#create_input_functions} for more + information. The function should construct and return one of + the following: + * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a + tuple (features, labels) with same constraints as below. + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a + `Tensor` or a dictionary of string label name to `Tensor`. + max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the @@ -185,9 +192,16 @@ class EvalSpec( """Creates a validated `EvalSpec` instance. Args: - input_fn: Evaluation input function returning a tuple of: - features - `Tensor` or dictionary of string feature name to `Tensor`. - labels - `Tensor` or dictionary of `Tensor` with labels. + input_fn: A function that constructs the input data for evaluation. + See @{$get_started/premade_estimators#create_input_functions} for more + information. The function should construct and return one of + the following: + * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a + tuple (features, labels) with same constraints as below. + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a + `Tensor` or a dictionary of string label name to `Tensor`. + steps: Int. Positive number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. See `Estimator.evaluate` for details. diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index eef91e9c5b..f74881f179 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -636,7 +636,10 @@ py_test( size = "small", srcs = ["_impl/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", + ], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5b0c38fa5d..f27ca5c205 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -295,7 +295,6 @@ tf_py_test( "//tensorflow/python:nn_grad", ], data = ["//tensorflow/core:image_testdata"], - tags = ["no_windows"], ) tf_py_test( @@ -1139,7 +1138,6 @@ tf_py_test( "//tensorflow/python:variables", ], data = ["//tensorflow/core:lmdb_testdata"], - tags = ["no_windows"], ) cuda_py_test( @@ -2329,7 +2327,6 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 4, - tags = ["no_windows"], ) cuda_py_test( @@ -2460,7 +2457,6 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, - tags = ["no_windows"], ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index f4fe01f868..25525cc128 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -970,7 +970,7 @@ class Conv2DTest(test.TestCase): self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 6, 1], @@ -984,7 +984,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -998,7 +998,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -1012,7 +1012,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 4, 3], @@ -1026,7 +1026,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 3, 1], @@ -1040,7 +1040,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 6, 1], @@ -1054,7 +1054,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 2, 3, 1], @@ -1068,7 +1068,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[0, 2, 3, 1], @@ -1082,7 +1082,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): # The GPU version of this test is not very stable. So adjusting the # error threshold to 1e-4. @@ -1098,7 +1098,7 @@ class Conv2DTest(test.TestCase): err=1e-4) def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 3, 1], diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index 96c9718b83..f0beabb4e2 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -35,8 +35,8 @@ from tensorflow.python.platform import tf_logging class DepthToSpaceTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.depth_to_space(input_nhwc, block_size) @@ -59,6 +59,12 @@ class DepthToSpaceTest(test.TestCase): x_out = [[[[1], [2]], [[3], [4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1, 2, 3, 4]]]] + block_size = 2 + x_out = [[[[1], [2]], [[3], [4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testBlockSize2(self): diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index b76135764f..cd90d16aac 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -34,8 +34,8 @@ from tensorflow.python.platform import tf_logging class SpaceToDepthTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.space_to_depth(input_nhwc, block_size) @@ -58,6 +58,12 @@ class SpaceToDepthTest(test.TestCase): x_out = [[[[1, 2, 3, 4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1], [2]], [[3], [4]]]] + block_size = 2 + x_out = [[[[1, 2, 3, 4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testLargerInput2x2(self): diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index a751607aaa..223858edfa 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -485,6 +485,11 @@ class FileIoTest(test.TestCase): f.flush() self.assertEqual(content, f.read(len(content) + 1)) + def testUTF8StringPathExists(self): + file_path = os.path.join(self._base_dir, "UTF8测试_file_exist") + file_io.write_string_to_file(file_path, "testing") + v = file_io.file_exists(file_path) + self.assertEqual(v, True) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index fb3fe77b4d..87fe253f18 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -699,7 +699,7 @@ def convolution( `padded_input` is obtained by zero padding the input using an effective spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and output striding `strides` as described in the - @{tf.nn.convolution$comment here}. + @{$python/nn#Convolution$comment here}. In the case that `data_format` does start with `"NC"`, the `input` and output (but not the `filter`) are simply transposed as follows: diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index c59eccc174..42af7f8b27 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -867,7 +867,7 @@ def raw_rnn(cell, loop_fn, ```python time = tf.constant(0, dtype=tf.int32) - (finished, next_input, initial_state, _, loop_state) = loop_fn( + (finished, next_input, initial_state, emit_structure, loop_state) = loop_fn( time=time, cell_output=None, cell_state=None, loop_state=None) emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype) state = initial_state @@ -878,7 +878,7 @@ def raw_rnn(cell, loop_fn, loop_state=loop_state) # Emit zeros and copy forward state for minibatch entries that are finished. state = tf.where(finished, state, next_state) - emit = tf.where(finished, tf.zeros_like(emit), emit) + emit = tf.where(finished, tf.zeros_like(emit_structure), emit) emit_ta = emit_ta.write(time, emit) # If any new minibatch entries are marked as finished, mark these. finished = tf.logical_or(finished, next_finished) @@ -938,10 +938,15 @@ def raw_rnn(cell, loop_fn, and `emit_output`: the output to store for this iteration. Note that `emit_output` should be a `Tensor` or (possibly nested) - tuple of tensors with shapes and structure matching `cell.output_size` - and `cell_output` above. The parameter `cell_state` and output - `next_cell_state` may be either a single or (possibly nested) tuple - of tensors. The parameter `loop_state` and + tuple of tensors which is aggregated in the `emit_ta` inside the + `while_loop`. For the first call to `loop_fn`, the `emit_output` + corresponds to the `emit_structure` which is then used to determine the + size of the `zero_tensor` for the `emit_ta` (defaults to + `cell.output_size`). For the subsequent calls to the `loop_fn`, the + `emit_output` corresponds to the actual output tensor + that is to be aggregated in the `emit_ta`. The parameter `cell_state` + and output `next_cell_state` may be either a single or (possibly nested) + tuple of tensors. The parameter `loop_state` and output `next_loop_state` may be either a single or (possibly nested) tuple of `Tensor` and `TensorArray` objects. This last parameter may be ignored by `loop_fn` and the return value may be `None`. If it diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py index 6d7eaababc..5e2146b79f 100644 --- a/tensorflow/python/ops/special_math_ops.py +++ b/tensorflow/python/ops/special_math_ops.py @@ -163,7 +163,7 @@ def einsum(equation, *inputs, **kwargs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('([a-z,]+)(->[a-z]*)?', equation) + match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) @@ -402,7 +402,7 @@ def _exponential_space_einsum(equation, *inputs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('([a-z,]+)(->[a-z]*)?', equation) + match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py index 2c212f4548..d7c3a7e8dc 100644 --- a/tensorflow/python/ops/special_math_ops_test.py +++ b/tensorflow/python/ops/special_math_ops_test.py @@ -192,6 +192,9 @@ class EinsumTest(test.TestCase): 'abc,cba', 'dba,ead,cad->bce', 'aef,fbc,dca->bde', + 'iJ,Jk->ik', + 'iJ,Ki->JK', + 'iJk,Jklm->Jk' ] long_cases = [ @@ -208,6 +211,8 @@ class EinsumTest(test.TestCase): 'ijk ijk', 'ij.jk->ik', 'ij...,jk...->ik...', + 'ij,k ->kji', + 'ij,k-> kji', # axis in output that does not exist 'ij,jk->im', diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index a52f325ddb..e9f1def48c 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -56,8 +56,6 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as saver_lib -FLAGS = None - def freeze_graph_with_def_protos(input_graph_def, input_saver_def, @@ -256,25 +254,24 @@ def freeze_graph(input_graph, checkpoint_version=checkpoint_version) -def main(unused_args): - if FLAGS.checkpoint_version == 1: +def main(unused_args, flags): + if flags.checkpoint_version == 1: checkpoint_version = saver_pb2.SaverDef.V1 - elif FLAGS.checkpoint_version == 2: + elif flags.checkpoint_version == 2: checkpoint_version = saver_pb2.SaverDef.V2 else: print("Invalid checkpoint version (must be '1' or '2'): %d" % - FLAGS.checkpoint_version) + flags.checkpoint_version) return -1 - freeze_graph(FLAGS.input_graph, FLAGS.input_saver, FLAGS.input_binary, - FLAGS.input_checkpoint, FLAGS.output_node_names, - FLAGS.restore_op_name, FLAGS.filename_tensor_name, - FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes, - FLAGS.variable_names_whitelist, FLAGS.variable_names_blacklist, - FLAGS.input_meta_graph, FLAGS.input_saved_model_dir, - FLAGS.saved_model_tags, checkpoint_version) - + freeze_graph(flags.input_graph, flags.input_saver, flags.input_binary, + flags.input_checkpoint, flags.output_node_names, + flags.restore_op_name, flags.filename_tensor_name, + flags.output_graph, flags.clear_devices, flags.initializer_nodes, + flags.variable_names_whitelist, flags.variable_names_blacklist, + flags.input_meta_graph, flags.input_saved_model_dir, + flags.saved_model_tags, checkpoint_version) -if __name__ == "__main__": +def run_main(): parser = argparse.ArgumentParser() parser.register("type", "bool", lambda v: v.lower() == "true") parser.add_argument( @@ -376,5 +373,10 @@ if __name__ == "__main__": separated by \',\'. For tag-set contains multiple tags, all tags \ must be passed in.\ """) - FLAGS, unparsed = parser.parse_known_args() - app.run(main=main, argv=[sys.argv[0]] + unparsed) + flags, unparsed = parser.parse_known_args() + + my_main = lambda unused_args: main(unused_args, flags) + app.run(main=my_main, argv=[sys.argv[0]] + unparsed) + +if __name__ == '__main__': + run_main() diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b0e9e3e5ed..b88be4ae04 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -38,11 +38,15 @@ from tensorflow.core.example import example_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.debug.wrappers import local_cli_wrapper +from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils +# Set of ops to blacklist. +_OP_BLACKLIST = set(['WriteFile', 'ReadFile']) + def _show_tag_sets(saved_model_dir): """Prints the tag-sets stored in SavedModel directory. @@ -242,6 +246,27 @@ def get_signature_def_map(saved_model_dir, tag_set): return meta_graph.signature_def +def scan_meta_graph_def(meta_graph_def): + """Scans meta_graph_def and reports if there are ops on blacklist. + + Print ops if they are on black list, or print success if no blacklisted ops + found. + + Args: + meta_graph_def: MetaGraphDef protocol buffer. + """ + all_ops_set = set( + meta_graph_lib.ops_used_by_graph_def(meta_graph_def.graph_def)) + blacklisted_ops = _OP_BLACKLIST & all_ops_set + if blacklisted_ops: + # TODO(yifeif): print more warnings + print('MetaGraph with tag set %s contains the following blacklisted ops:' % + meta_graph_def.meta_info_def.tags, blacklisted_ops) + else: + print('MetaGraph with tag set %s does not contain blacklisted ops.' % + meta_graph_def.meta_info_def.tags) + + def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key, input_tensor_key_feed_dict, outdir, overwrite_flag, tf_debug=False): @@ -609,6 +634,21 @@ def run(args): args.overwrite, tf_debug=args.tf_debug) +def scan(args): + """Function triggered by scan command. + + Args: + args: A namespace parsed from command line. + """ + if args.tag_set: + scan_meta_graph_def( + saved_model_utils.get_meta_graph_def(args.dir, args.tag_set)) + else: + saved_model = reader.read_saved_model(args.dir) + for meta_graph_def in saved_model.meta_graphs: + scan_meta_graph_def(meta_graph_def) + + def create_parser(): """Creates a parser that parse the command line arguments. @@ -730,6 +770,26 @@ def create_parser(): 'SavedModel.') parser_run.set_defaults(func=run) + # scan command + scan_msg = ('Usage example:\n' + 'To scan for blacklisted ops in SavedModel:\n' + '$saved_model_cli scan --dir /tmp/saved_model\n' + 'To scan a specific MetaGraph, pass in --tag_set\n') + parser_scan = subparsers.add_parser( + 'scan', + description=scan_msg, + formatter_class=argparse.RawTextHelpFormatter) + parser_scan.add_argument( + '--dir', + type=str, + required=True, + help='directory containing the SavedModel to execute') + parser_scan.add_argument( + '--tag_set', + type=str, + help='tag-set of graph in SavedModel to scan, separated by \',\'') + parser_scan.set_defaults(func=scan) + return parser diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index f99c844845..eedc893a38 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -525,6 +525,28 @@ signature_def['serving_default']: y_expected = np.array([[2.5], [3.0]]) self.assertAllClose(y_expected, y_actual) + def testScanCommand(self): + self.parser = saved_model_cli.create_parser() + base_path = test.test_src_dir_path(SAVED_MODEL_PATH) + args = self.parser.parse_args(['scan', '--dir', base_path]) + with captured_output() as (out, _): + saved_model_cli.scan(args) + output = out.getvalue().strip() + self.assertTrue('does not contain blacklisted ops' in output) + + def testScanCommandFoundBlacklistedOp(self): + self.parser = saved_model_cli.create_parser() + base_path = test.test_src_dir_path(SAVED_MODEL_PATH) + args = self.parser.parse_args( + ['scan', '--dir', base_path, '--tag_set', 'serve']) + op_blacklist = saved_model_cli._OP_BLACKLIST + saved_model_cli._OP_BLACKLIST = set(['VariableV2']) + with captured_output() as (out, _): + saved_model_cli.scan(args) + saved_model_cli._OP_BLACKLIST = op_blacklist + output = out.getvalue().strip() + self.assertTrue('\'VariableV2\'' in output) + if __name__ == '__main__': test.main() diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 0b3b060fe7..03e3e0857f 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -274,7 +274,8 @@ CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 7000 #define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \ - __macro(cudnnSetConvolutionMathType) + __macro(cudnnSetConvolutionMathType) \ + __macro(cudnnSetRNNMatrixMathType) // clang-format on CUDNN_DNN_ROUTINE_EACH_R7(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -586,6 +587,19 @@ static bool TensorOpMathEnabled() { return is_enabled; } +// A helper function to decide whether to enable the TENSOR_OP_MATH math type +// for RNNs. +static bool RnnTensorOpMathEnabled() { + static bool is_enabled = [] { + bool is_disabled = false; + TF_CHECK_OK( + tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH", + /*default_val=*/false, &is_disabled)); + return !is_disabled; + }(); + return is_enabled; +} + // A helper function to decide whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT // in batchnorm. This mode can be faster in some tasks because an optimized path // may be selected for CUDNN_DATA_FLOAT and CUDNN_DATA_HALF data types, compute @@ -1124,6 +1138,9 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { SetFailure(cudnn_params_desc_->Status()); return; } + if (data_type == CUDNN_DATA_HALF) { + set_use_tensor_op_math(true); + } } ~CudnnRnnDescriptor() override { if (rnn_desc_) { @@ -1132,6 +1149,20 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { CUDNN_RETURN_IF_FAIL(status, "Unable to destroy RNN descriptor"); } } + void set_use_tensor_op_math(bool use_tensor_op_math) { +#if CUDNN_VERSION >= 7000 + cudnnMathType_t math_type = + (use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH); + if (RnnTensorOpMathEnabled()) { + cudnnStatus_t status = + wrap::cudnnSetRNNMatrixMathType(parent_, rnn_desc_, math_type); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "could not set cudnn RNN math type: " + << ToString(status); + } + } +#endif + } cudnnRNNDescriptor_t handle() const { if (!ok()) return nullptr; return rnn_desc_; diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index ae305a28e0..a0ec1708d5 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1168,6 +1168,20 @@ def tf_custom_op_library_additional_deps(): "@protobuf_archive//:protobuf_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), + ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"]) + +# A list of targets that contains the implemenation of +# tf_custom_op_library_additional_deps. It's used to generate a DEF file for +# exporting symbols from _pywrap_tensorflow.dll on Windows. +def tf_custom_op_library_additional_deps_impl(): + return [ + # for @nsync//:nsync_headers + "//third_party/nsync:nsync_cpp", + # for //third_party/eigen3 + clean_dep("//third_party/eigen3"), + # for //tensorflow/core:framework_headers_lib + clean_dep("//tensorflow/core:framework"), + clean_dep("//tensorflow/core:reader_base"), ] # Traverse the dependency graph along the "deps" attribute of the @@ -1254,6 +1268,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(is_external=True), + features = ["windows_export_all_symbols"], linkopts=linkopts + select({ "//conditions:default": [ "-lm", @@ -1400,7 +1415,8 @@ def tf_py_wrap_cc(name, ]) + tf_extension_copts()), linkopts=tf_extension_linkopts() + extra_linkopts, linkstatic=1, - deps=deps + extra_deps) + deps=deps + extra_deps, + **kwargs) native.genrule( name="gen_" + cc_library_pyd_name, srcs=[":" + cc_library_name], diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 5268bba3cc..baa7a0889d 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -247,6 +247,8 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor = public_api.PublicAPIVisitor(visitor) public_api_visitor.do_not_descend_map['tf'].append('contrib') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] + # TODO(annarev): Make slide_dataset available in API. + public_api_visitor.private_map['tf'] = ['slide_dataset'] traverse.traverse(api, public_api_visitor) proto_dict = visitor.GetProtos() diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake index ec90c83aac..d5dea4f3e4 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cmake +++ b/tensorflow/tools/ci_build/Dockerfile.cmake @@ -23,11 +23,12 @@ RUN /install/install_deb_packages.sh RUN apt-get update RUN apt-get install -y --no-install-recommends python-pip +RUN pip install --upgrade wheel RUN pip install --upgrade astor RUN pip install --upgrade gast RUN pip install --upgrade numpy RUN pip install --upgrade termcolor # Install golang -RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable -RUN apt-get install -y golang +RUN apt-get install -t xenial-backports -y golang-1.9 +ENV PATH=${PATH}:/usr/lib/go-1.9/bin diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 8b8ba31a0d..40189a6d1b 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -65,4 +65,5 @@ bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ - //${PY_TEST_DIR}/tensorflow/python/... + //${PY_TEST_DIR}/tensorflow/python/... \ + //${PY_TEST_DIR}/tensorflow/contrib/... diff --git a/tensorflow/tools/def_file_filter/BUILD b/tensorflow/tools/def_file_filter/BUILD new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/tools/def_file_filter/BUILD.tpl b/tensorflow/tools/def_file_filter/BUILD.tpl new file mode 100644 index 0000000000..3cb72f4979 --- /dev/null +++ b/tensorflow/tools/def_file_filter/BUILD.tpl @@ -0,0 +1,15 @@ +# Description: +# Tools for filtering DEF file for TensorFlow on Windows +# +# On Windows, we use a DEF file generated by Bazel to export +# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). +# The maximum number of symbols that can be exported per DLL is 64K, +# so we have to filter some useless symbols through this python script. + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "def_file_filter", + srcs = ["def_file_filter.py"], + srcs_version = "PY2AND3", +) diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl new file mode 100644 index 0000000000..8bdc03eb0f --- /dev/null +++ b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl @@ -0,0 +1,168 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""def_file_filter.py - tool to filter a windows def file. + +The def file can be used to export symbols from the tensorflow dll to enable +tf.load_library(). + +Because the linker allows only 64K symbols to be exported per dll +we filter the symbols down to the essentials. The regular expressions +we use for this are specific to tensorflow. + +TODO: this works fine but there is an issue with exporting +'const char * const' and importing it from a user_ops. The problem is +on the importing end and using __declspec(dllimport) works around it. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import io +import os +import re +import subprocess +import sys +import tempfile + +# External tools we use that come with visual studio sdk +UNDNAME = "%{undname_bin_path}" + +# Exclude if matched +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") + +# Include if matched before exclude +INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" + r"google::protobuf::internal::ArenaImpl::AllocateAligned|" # for contrib/data/_prefetching_ops + r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops + r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops + r"tensorflow::internal::LogMessage|" + r"tensorflow::internal::LogString|" + r"tensorflow::internal::CheckOpMessageBuilder|" + r"tensorflow::internal::MakeCheckOpValueString|" + r"tensorflow::internal::PickUnusedPortOrDie|" + r"tensorflow::internal::ValidateDevice|" + r"tensorflow::ops::internal::Enter|" + r"tensorflow::strings::internal::AppendPieces|" + r"tensorflow::strings::internal::CatPieces|" + r"tensorflow::io::internal::JoinPathImpl") + +# Include if matched after exclude +INCLUDE_RE = re.compile(r"^(TF_\w*)$|" + r"^(TFE_\w*)$|" + r"nsync::|" + r"tensorflow::|" + r"functor::|" + r"perftools::gputools") + +# We want to identify data members explicitly in the DEF file, so that no one +# can implicitly link against the DLL if they use one of the variables exported +# from the DLL and the header they use does not decorate the symbol with +# __declspec(dllimport). It is easier to detect what a data symbol does +# NOT look like, so doing it with the below regex. +DATA_EXCLUDE_RE = re.compile(r"[)(]|" + r"vftable|" + r"vbtable|" + r"vcall|" + r"RTTI|" + r"protobuf::internal::ExplicitlyConstructed") + +def get_args(): + """Parse command line.""" + filename_list = lambda x: x.split(";") + parser = argparse.ArgumentParser() + parser.add_argument("--input", type=filename_list, + help="paths to input def file", + required=True) + parser.add_argument("--output", help="output deffile", required=True) + parser.add_argument("--target", help="name of the target", required=True) + args = parser.parse_args() + return args + + +def main(): + """main.""" + args = get_args() + + # Pipe dumpbin to extract all linkable symbols from libs. + # Good symbols are collected in candidates and also written to + # a temp file. + candidates = [] + tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False) + for def_file_path in args.input: + def_file = open(def_file_path, 'r') + for line in def_file: + cols = line.split() + sym = cols[0] + tmpfile.file.write(sym + "\n") + candidates.append(sym) + tmpfile.file.close() + + # Run the symbols through undname to get their undecorated name + # so we can filter on something readable. + with open(args.output, "w") as def_fp: + # track dupes + taken = set() + + # Header for the def file. + def_fp.write("LIBRARY " + args.target + "\n") + def_fp.write("EXPORTS\n") + def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") + + # Each symbols returned by undname matches the same position in candidates. + # We compare on undname but use the decorated name from candidates. + dupes = 0 + proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) + for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): + decorated = candidates[idx] + if decorated in taken: + # Symbol is already in output, done. + dupes += 1 + continue + + if not INCLUDEPRE_RE.search(line): + if EXCLUDE_RE.search(line): + continue + if not INCLUDE_RE.search(line): + continue + + if "deleting destructor" in line: + # Some of the symbols convered by INCLUDEPRE_RE export deleting + # destructor symbols, which is a bad idea. + # So we filter out such symbols here. + continue + + if DATA_EXCLUDE_RE.search(line): + def_fp.write("\t" + decorated + "\n") + else: + def_fp.write("\t" + decorated + " DATA\n") + taken.add(decorated) + def_fp.close() + + exit_code = proc.wait() + if exit_code != 0: + print("{} failed, exit={}".format(UNDNAME, exit_code)) + return exit_code + + os.unlink(tmpfile.name) + + print("symbols={}, taken={}, dupes={}" + .format(len(candidates), len(taken), dupes)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl new file mode 100644 index 0000000000..47539b2423 --- /dev/null +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -0,0 +1,56 @@ +"""Repository rule for def file filter autoconfiguration. + +This repository reuses Bazel's VC detect mechanism to find undname.exe, +which is a tool used in def_file_filter.py. + +def_file_filter.py is for filtering the DEF file for TensorFlow on Windows. +On Windows, we use a DEF file generated by Bazel to export symbols from the +tensorflow dynamic library(_pywrap_tensorflow.dll). The maximum number of +symbols that can be exported per DLL is 64K, so we have to filter some useless +symbols through this python script. + +`def_file_filter_config` depends on the following environment variables: + * `BAZEL_VC` + * `BAZEL_VS` + * `VS90COMNTOOLS` + * `VS100COMNTOOLS` + * `VS110COMNTOOLS` + * `VS120COMNTOOLS` + * `VS140COMNTOOLS` +""" + +load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_vc_path") +load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_msvc_tool") +load("@bazel_tools//tools/cpp:lib_cc_configure.bzl", "auto_configure_fail") + +def _def_file_filter_configure_impl(repository_ctx): + if repository_ctx.os.name.lower().find("windows") == -1: + repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") + repository_ctx.file("def_file_filter.py", "") + return + vc_path = find_vc_path(repository_ctx) + if vc_path == "visual-studio-not-found": + auto_configure_fail("Visual C++ build tools not found on your machine") + undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") + + repository_ctx.template( + "def_file_filter.py", + Label("//tensorflow/tools/def_file_filter:def_file_filter.py.tpl"), + { + "%{undname_bin_path}": undname_bin_path, + }) + repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") + + +def_file_filter_configure = repository_rule( + implementation = _def_file_filter_configure_impl, + environ = [ + "BAZEL_VC", + "BAZEL_VS", + "VS90COMNTOOLS", + "VS100COMNTOOLS", + "VS110COMNTOOLS", + "VS120COMNTOOLS", + "VS140COMNTOOLS" + ], +) diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index c1b1f79bbd..228d5ee35d 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,6 +17,14 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP +You can test specify version of TensorFlow: + +```shell +./local_test.sh ${whl_file_url} +``` + +For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. + **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 435f9d0dc9..caae7fd530 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,12 +16,11 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script peforms the following steps: -# 1) Build the docker-in-docker (dind) image capable of running docker and -# Kubernetes (k8s) cluster inside. +# This script performs the following steps: +# 1) Build the docker image capable of running distributed TensorFlow in docker. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh @@ -64,15 +63,9 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -LOCAL_K8S_CACHE=${HOME}/kubernetes -# Helper function -get_container_id_by_image_name() { - # Get the id of a container by image name - # Usage: get_docker_container_id_by_image_name - - docker ps | grep $1 | awk '{print $1}' -} +# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below +DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -84,7 +77,8 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - die "whl file location is not specified" + WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} + echo "use default whl file location" fi while true; do @@ -121,7 +115,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker-in-docker image for local k8s cluster. +# Build docker image for local distributed TensorFlow cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index 3630dbd740..cbcdbf5b80 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -114,6 +114,13 @@ def configure(src_base_path, gen_path, debug=False): for target, src in link_map.items(): if src is None: open(os.path.join(gen_path, target), "w").write("") + elif not os.path.exists(src): + # Git repo is configured in a way we don't support such as having + # packed refs. Even though in a git repo, tf.__git_version__ will not + # be accurate. + # TODO(mikecase): Support grabbing git info when using packed refs. + open(os.path.join(gen_path, target), "w").write("") + spec["git"] = False else: try: # In python 3.5, symlink function exists even on Windows. But requires diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index b7d7fac315..6e21aa2846 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -178,6 +178,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/util/tensor_bundle", ], diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d89afe85c7..d86f65325b 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -182,6 +182,36 @@ Status FuseBatchNormWithConv(const NodeMatch& match, return Status::OK(); } +Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, + std::vector* new_nodes) { + // Calculate the scale and offset values to apply. + std::vector scale_values; + std::vector offset_values; + TF_RETURN_IF_ERROR( + GetScaleAndOffsetValues(match, &scale_values, &offset_values)); + + // Fuse conv weights, and set the final output node name as batch_norm_node. + const NodeDef& batch_norm_node = match.node; + const NodeMatch& batch_to_space_node_match = match.inputs[0]; + const NodeMatch& conv_node_match = batch_to_space_node_match.inputs[0]; + const NodeDef& batch_to_space_node = batch_to_space_node_match.node; + const NodeDef& conv_node = conv_node_match.node; + + string biasadd_name = conv_node.name() + "/biasadd"; + TF_RETURN_IF_ERROR( + FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, + biasadd_name , new_nodes)); + + NodeDef new_batch_to_space_node = batch_to_space_node; + // reuse batch_norm node name + new_batch_to_space_node.set_name(batch_norm_node.name()); + new_batch_to_space_node.set_input(0, biasadd_name); + new_nodes->push_back(batch_to_space_node_match.inputs[1].node); + new_nodes->push_back(batch_to_space_node_match.inputs[2].node); + new_nodes->push_back(new_batch_to_space_node); + return Status::OK(); +} + Status FuseBatchNormWithConvConcat(const NodeMatch& match, std::vector* new_nodes) { // Calculate the scale and offset values to apply. @@ -284,6 +314,43 @@ Status FoldOldBatchNorms(const GraphDef& input_graph_def, current_graph_def = replaced_graph_def; } while (did_graph_change); + do { + did_graph_change = false; + GraphDef replaced_graph_def; + TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( + current_graph_def, // clang-format off + {"BatchNormWithGlobalNormalization|FusedBatchNorm", // batch_norm_node + { + {"BatchToSpaceND", // batch_to_space_node + { + {"Conv2D", // conv_node + { + {"*"}, // input_node + {"Const"}, // weights_node + } + }, + {"Const"}, // block_shape + {"Const"}, // crops + } + }, + {"Const"}, // mean_node + {"Const"}, // variance_node + {"Const"}, // beta_node + {"Const"}, // gamma_node + } + }, // clang-format on + [&did_graph_change](const NodeMatch& match, + const std::set& input_nodes, + const std::set& output_nodes, + std::vector* new_nodes) { + TF_RETURN_IF_ERROR(FuseBatchNormWithBatchToSpace(match, new_nodes)); + did_graph_change = true; + return Status::OK(); + }, + {}, &replaced_graph_def)); + current_graph_def = replaced_graph_def; + } while (did_graph_change); + do { did_graph_change = false; GraphDef replaced_graph_def; diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index b30ba9ac8b..272410c693 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -298,6 +299,96 @@ class FoldOldBatchNormsTest : public ::testing::Test { } }; +void TestFoldFusedBatchNormsWithBatchToSpace() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({2, 1, 3, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = Conv2D(root.WithOpName("conv_op"), input_op, weights_op, + {1, 1, 1, 1}, "VALID"); + + Tensor block_shape_data(DT_INT32, TensorShape({2})); + test::FillValues(&block_shape_data, {1, 2}); + Output block_shape_op = + Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); + + Tensor crops_data(DT_INT32, TensorShape({2, 2})); + test::FillValues(&crops_data, {0, 0, 0, 1}); + Output crops_op = + Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); + + Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), + conv_op, block_shape_op, crops_data); + + Tensor mean_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&mean_data, {10.0f, 20.0f}); + Output mean_op = + Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); + + Tensor variance_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&variance_data, {0.25f, 0.5f}); + Output variance_op = Const(root.WithOpName("variance_op"), + Input::Initializer(variance_data)); + + Tensor beta_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&beta_data, {0.1f, 0.6f}); + Output beta_op = + Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); + + Tensor gamma_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&gamma_data, {1.0f, 2.0f}); + Output gamma_op = + Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + NodeDef batch_norm_node; + batch_norm_node.set_op("FusedBatchNorm"); + batch_norm_node.set_name("output"); + AddNodeInput("batch_to_space_op", &batch_norm_node); + AddNodeInput("gamma_op", &batch_norm_node); + AddNodeInput("beta_op", &batch_norm_node); + AddNodeInput("mean_op", &batch_norm_node); + AddNodeInput("variance_op", &batch_norm_node); + SetNodeAttr("T", DT_FLOAT, &batch_norm_node); + SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); + SetNodeAttr("is_training", false, &batch_norm_node); + *(original_graph_def.mutable_node()->Add()) = batch_norm_node; + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, + &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("FusedBatchNormWithBatchToSpace", node.op()); + } +} + TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNorms) { TestFoldOldBatchNorms(); } @@ -315,5 +406,9 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { TestFoldFusedBatchNormsWithConcat(/*split=*/false); } +TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { + TestFoldFusedBatchNormsWithBatchToSpace(); +} + } // namespace graph_transforms } // namespace tensorflow diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 1833d67d82..2607b9d704 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -48,36 +48,65 @@ py_binary( deps = ["//tensorflow:tensorflow_py"], ) +COMMON_PIP_DEPS = [ + ":licenses", + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/boosted_trees:boosted_trees_pip", + "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:checkpointable_utils", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/gan:gan", + "//tensorflow/contrib/graph_editor:graph_editor_pip", + "//tensorflow/contrib/keras:keras", + "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/py2tf:py2tf", + "//tensorflow/contrib/py2tf/converters:converters", + "//tensorflow/contrib/py2tf/converters:test_lib", + "//tensorflow/contrib/py2tf/impl:impl", + "//tensorflow/contrib/py2tf/pyct:pyct", + "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", + "//tensorflow/contrib/slim:slim", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + "//tensorflow/contrib/specs:specs", + "//tensorflow/contrib/summary:summary_test_util", + "//tensorflow/contrib/tensor_forest:init_py", + "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + "//tensorflow/contrib/timeseries:timeseries_pip", + "//tensorflow/contrib/tpu", + "//tensorflow/examples/tutorials/mnist:package", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:meta_graph_testdata", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/saved_model:saved_model", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python:test_ops", + "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", +] + # On Windows, python binary is a zip file of runfiles tree. # Add everything to its data dependency for generating a runfiles tree # for building the pip package on Windows. py_binary( name = "simple_console_for_windows", srcs = ["simple_console_for_windows.py"], - data = [ - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/saved_model", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/contrib/summary:summary_test_util", - # These targets don't build on Windows yet. Exclude them for now. - # "//tensorflow/contrib/slim", - # "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - # "//tensorflow/contrib/specs", - # "//tensorflow/contrib/tensor_forest:init_py", - # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - # "//tensorflow/examples/tutorials/mnist:package", - ], + data = COMMON_PIP_DEPS, srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) @@ -108,6 +137,7 @@ filegroup( "@highwayhash//:LICENSE", "@jemalloc//:COPYING", "@jpeg//:LICENSE.md", + "@kafka//:LICENSE", "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", @@ -137,61 +167,12 @@ sh_binary( data = select({ "//tensorflow:windows": [":simple_console_for_windows"], "//tensorflow:windows_msvc": [":simple_console_for_windows"], - "//conditions:default": [ - ":licenses", - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", + "//conditions:default": COMMON_PIP_DEPS + [ ":simple_console", - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/boosted_trees:boosted_trees_pip", - "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", - "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/eager/python:evaluator", - "//tensorflow/contrib/gan:gan", - "//tensorflow/contrib/graph_editor:graph_editor_pip", - "//tensorflow/contrib/keras:keras", - "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/lite/python:interpreter_test_data", "//tensorflow/contrib/lite/toco:toco", "//tensorflow/contrib/lite/toco/python:toco_wrapper", "//tensorflow/contrib/lite/toco/python:toco_from_protos", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/py2tf:py2tf", - "//tensorflow/contrib/py2tf/converters:converters", - "//tensorflow/contrib/py2tf/converters:test_lib", - "//tensorflow/contrib/py2tf/impl:impl", - "//tensorflow/contrib/py2tf/pyct:pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", - "//tensorflow/contrib/receptive_field:receptive_field_pip", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/signal:test_util", - "//tensorflow/contrib/slim:slim", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - "//tensorflow/contrib/specs:specs", - "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/contrib/tensor_forest:init_py", - "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - "//tensorflow/contrib/timeseries:timeseries_pip", - "//tensorflow/contrib/tpu", - "//tensorflow/examples/tutorials/mnist:package", - "//tensorflow/python:distributed_framework_test_lib", - "//tensorflow/python:meta_graph_testdata", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/python/saved_model:saved_model", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python:test_ops", - "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], }) + if_mkl(["//third_party/mkl:intel_binary_blob"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e1a5f091ba..e0152da4df 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0-rc1' +_VERSION = '1.6.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -72,7 +72,7 @@ if sys.version_info < (3, 4): # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ - 'freeze_graph = tensorflow.python.tools.freeze_graph:main', + 'freeze_graph = tensorflow.python.tools.freeze_graph:run_main', 'toco_from_protos = tensorflow.contrib.lite.toco.python.toco_from_protos:main', 'toco = tensorflow.contrib.lite.toco.python.toco_wrapper:main', 'saved_model_cli = tensorflow.python.tools.saved_model_cli:main', diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index 77cc9f75f7..edd093510e 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -88,6 +88,7 @@ import os import shutil from google.cloud import datastore +from six import text_type def is_real_file(dirpath, fname): @@ -150,7 +151,7 @@ def upload_benchmark_data(client, data): """ test_result = json.loads(data) - test_name = unicode(test_result["name"]) + test_name = text_type(test_result["name"]) start_time = datetime.datetime.utcfromtimestamp( float(test_result["startTime"])) batch = [] @@ -162,7 +163,7 @@ def upload_benchmark_data(client, data): t_val.update({ "test": test_name, "start": start_time, - "info": unicode(data) + "info": text_type(data) }) batch.append(t_val) @@ -170,7 +171,7 @@ def upload_benchmark_data(client, data): # the attribute to be fetched and displayed. The full entry information is # also stored as a non-indexed JSON blob. for ent in test_result["entries"].get("entry", []): - ent_name = unicode(ent["name"]) + ent_name = text_type(ent["name"]) e_key = client.key("Entry") e_val = datastore.Entity(e_key, exclude_from_indexes=["info"]) e_val.update({ @@ -178,7 +179,7 @@ def upload_benchmark_data(client, data): "start": start_time, "entry": ent_name, "timing": ent["wallTime"], - "info": unicode(json.dumps(ent)) + "info": text_type(json.dumps(ent)) }) batch.append(e_val) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index abc9eb9bc1..a922808a70 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -12,6 +12,8 @@ load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compil load("//third_party:repo.bzl", "tf_http_archive") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +load("//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", + "def_file_filter_configure") def _extract_version_number(bazel_version): """Extracts the semantic version number from a version string @@ -67,7 +69,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # We must check the bazel version before trying to parse any other BUILD # files, in case the parsing of those build files depends on the bazel # version we require here. - check_bazel_version_at_least("0.5.4") + check_bazel_version_at_least("0.10.0") clang6_configure(name="local_config_clang6") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") @@ -75,6 +77,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sycl_configure(name="local_config_sycl") python_configure(name="local_config_python") + # For windows bazel build + # TODO: Remove def file filter when TensorFlow can export symbols properly on Windows. + def_file_filter_configure(name = "local_config_def_file_filter") + # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name="local_config_arm_compiler", diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 87a23925c4..4418ac32fc 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -526,12 +526,12 @@ config_setting( config_setting( name = "armeabi-v7a", - values = {"android_cpu": "armeabi-v7a"}, + values = {"cpu": "armeabi-v7a"}, ) config_setting( name = "arm64-v8a", - values = {"android_cpu": "arm64-v8a"}, + values = {"cpu": "arm64-v8a"}, ) config_setting( diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index a61a9e1f6c..a839ca717e 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -130,12 +130,16 @@ cc_library( ], hdrs = [ "config.h", + "src-cpp/rdkafkacpp.h", + "src-cpp/rdkafkacpp_int.h", + "src/lz4.c", + "src/snappy_compat.h", ], - defines = [ + copts = [ + "-Iexternal/kafka/src", + "-Iexternal/kafka/src-cpp", ], - includes = [ - "src", - "src-cpp", + defines = [ ], linkopts = [ "-lpthread", @@ -143,5 +147,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@boringssl//:ssl", + "@zlib_archive//:zlib", ], ) diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl index de06ad5f27..1dd8ab433a 100644 --- a/third_party/py/BUILD.tpl +++ b/third_party/py/BUILD.tpl @@ -2,20 +2,26 @@ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) +# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib +# See https://docs.python.org/3/extending/windows.html +cc_import( + name = "python_lib", + interface_library = select({ + ":windows": ":python_import_lib", + # A placeholder for Unix platforms which makes --no_build happy. + "//conditions:default": "not-existing.lib", + }), + system_provided = 1, +) + cc_library( name = "python_headers", hdrs = [":python_include"], - data = select({ - ":windows": [":python_import_lib"], + deps = select({ + ":windows": [":python_lib"], "//conditions:default": [], }), includes = ["python_include"], - linkopts = select({ - # TODO(pcloudy): Ideally, this should just go into deps after resolving - # https://github.com/bazelbuild/bazel/issues/3237, - ":windows": ["$(locations :python_import_lib)"], - "//conditions:default": [], - }), ) cc_library( -- GitLab From 310c32e4f793b591983ec6cf9635d1d0a86602fa Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 15 Mar 2018 13:01:28 -0700 Subject: [PATCH 072/960] TFLite iOS Makefile: Disable SSE4.1 for x86_64 build. PiperOrigin-RevId: 189232136 --- tensorflow/contrib/lite/ios_makefile.inc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/contrib/lite/ios_makefile.inc b/tensorflow/contrib/lite/ios_makefile.inc index fc6594c3a0..079320586f 100644 --- a/tensorflow/contrib/lite/ios_makefile.inc +++ b/tensorflow/contrib/lite/ios_makefile.inc @@ -31,9 +31,6 @@ ifeq ($(TARGET), IOS) ${IPHONEOS_SYSROOT} \ -arch $(IOS_ARCH) \ -O3 - ifeq ($(IOS_ARCH), x86_64) - CXXFLAGS += -msse4.1 - endif CCFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \ -fembed-bitcode \ -mno-thumb \ -- GitLab From 19e4a6ab8e400c90ccd0c95c6396a0d2cc925324 Mon Sep 17 00:00:00 2001 From: Surya Bhupatiraju Date: Thu, 15 Mar 2018 13:02:23 -0700 Subject: [PATCH 073/960] Add mean-only FID and diagonal-covariance-only FID variants to TFGAN. PiperOrigin-RevId: 189232299 --- .../eval/python/classifier_metrics_impl.py | 190 +++++++++++++++--- .../eval/python/classifier_metrics_test.py | 60 ++++++ 2 files changed, 226 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py index fdfabd07c1..323cbe6e76 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py @@ -44,11 +44,11 @@ from tensorflow.python.ops import functional_ops from tensorflow.python.ops import image_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_impl from tensorflow.python.ops import nn_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import resource_loader - __all__ = [ 'get_graph_def_from_disk', 'get_graph_def_from_resource', @@ -62,10 +62,11 @@ __all__ = [ 'frechet_inception_distance', 'frechet_classifier_distance', 'frechet_classifier_distance_from_activations', + 'mean_only_frechet_classifier_distance_from_activations', + 'diagonal_only_frechet_classifier_distance_from_activations', 'INCEPTION_DEFAULT_IMAGE_SIZE', ] - INCEPTION_URL = 'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz' INCEPTION_FROZEN_GRAPH = 'inceptionv1_for_inception_score.pb' INCEPTION_INPUT = 'Mul:0' @@ -77,8 +78,7 @@ INCEPTION_DEFAULT_IMAGE_SIZE = 299 def _validate_images(images, image_size): images = ops.convert_to_tensor(images) images.shape.with_rank(4) - images.shape.assert_is_compatible_with( - [None, image_size, image_size, None]) + images.shape.assert_is_compatible_with([None, image_size, image_size, None]) return images @@ -109,9 +109,10 @@ def _symmetric_matrix_square_root(mat, eps=1e-10): math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True) -def preprocess_image( - images, height=INCEPTION_DEFAULT_IMAGE_SIZE, - width=INCEPTION_DEFAULT_IMAGE_SIZE, scope=None): +def preprocess_image(images, + height=INCEPTION_DEFAULT_IMAGE_SIZE, + width=INCEPTION_DEFAULT_IMAGE_SIZE, + scope=None): """Prepare a batch of images for evaluation. This is the preprocessing portion of the graph from @@ -272,8 +273,11 @@ def run_inception(images, return activations -def run_image_classifier(tensor, graph_def, input_tensor, - output_tensor, scope='RunClassifier'): +def run_image_classifier(tensor, + graph_def, + input_tensor, + output_tensor, + scope='RunClassifier'): """Runs a network from a frozen graph. Args: @@ -433,8 +437,8 @@ def trace_sqrt_product(sigma, sigma_v): sqrt_sigma = _symmetric_matrix_square_root(sigma) # This is sqrt(A sigma_v A) above - sqrt_a_sigmav_a = math_ops.matmul( - sqrt_sigma, math_ops.matmul(sigma_v, sqrt_sigma)) + sqrt_a_sigmav_a = math_ops.matmul(sqrt_sigma, + math_ops.matmul(sigma_v, sqrt_sigma)) return math_ops.trace(_symmetric_matrix_square_root(sqrt_a_sigmav_a)) @@ -452,7 +456,7 @@ def frechet_classifier_distance(real_images, Given two Gaussian distribution with means m and m_w and covariance matrices C and C_w, this function calcuates - |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) + |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) which captures how different the distributions of real images and generated images (or more accurately, their visual features) are. Note that unlike the @@ -511,10 +515,140 @@ def frechet_classifier_distance(real_images, return frechet_classifier_distance_from_activations(real_a, gen_a) -def frechet_classifier_distance_from_activations( +def mean_only_frechet_classifier_distance_from_activations( real_activations, generated_activations): """Classifier distance for evaluating a generative model from activations. + Given two Gaussian distribution with means m and m_w and covariance matrices + C and C_w, this function calcuates + + |m - m_w|^2 + + which captures how different the distributions of real images and generated + images (or more accurately, their visual features) are. Note that unlike the + Inception score, this is a true distance and utilizes information about real + world images. + + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute frechet classifier distance when comparing two + generative models. + + In this variant, we only compute the difference between the means of the + fitted Gaussians. The computation leads to O(n) vs. O(n^2) memory usage, yet + still retains much of the same information as FID. + + Args: + real_activations: 2D array of activations of real images of size + [num_images, num_dims] to use to compute Frechet Inception distance. + generated_activations: 2D array of activations of generated images of size + [num_images, num_dims] to use to compute Frechet Inception distance. + + Returns: + The mean-only Frechet Inception distance. A floating-point scalar of the + same type as the output of the activations. + """ + real_activations.shape.assert_has_rank(2) + generated_activations.shape.assert_has_rank(2) + + activations_dtype = real_activations.dtype + if activations_dtype != dtypes.float64: + real_activations = math_ops.to_double(real_activations) + generated_activations = math_ops.to_double(generated_activations) + + # Compute means of activations. + m = math_ops.reduce_mean(real_activations, 0) + m_w = math_ops.reduce_mean(generated_activations, 0) + + # Next the distance between means. + mean = math_ops.square(linalg_ops.norm(m - m_w)) # This uses the L2 norm. + mofid = mean + if activations_dtype != dtypes.float64: + mofid = math_ops.cast(mofid, activations_dtype) + + return mofid + + +def diagonal_only_frechet_classifier_distance_from_activations( + real_activations, generated_activations): + """Classifier distance for evaluating a generative model. + + This is based on the Frechet Inception distance, but for an arbitrary + classifier. + + This technique is described in detail in https://arxiv.org/abs/1706.08500. + Given two Gaussian distribution with means m and m_w and covariance matrices + C and C_w, this function calcuates + + |m - m_w|^2 + (sigma + sigma_w - 2(sigma x sigma_w)^(1/2)) + + which captures how different the distributions of real images and generated + images (or more accurately, their visual features) are. Note that unlike the + Inception score, this is a true distance and utilizes information about real + world images. In this variant, we compute diagonal-only covariance matrices. + As a result, instead of computing an expensive matrix square root, we can do + something much simpler, and has O(n) vs O(n^2) space complexity. + + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute frechet classifier distance when comparing two + generative models. + + Args: + real_activations: Real images to use to compute Frechet Inception distance. + generated_activations: Generated images to use to compute Frechet Inception + distance. + + Returns: + The diagonal-only Frechet Inception distance. A floating-point scalar of + the same type as the output of the activations. + + Raises: + ValueError: If the shape of the variance and mean vectors are not equal. + """ + real_activations.shape.assert_has_rank(2) + generated_activations.shape.assert_has_rank(2) + + activations_dtype = real_activations.dtype + if activations_dtype != dtypes.float64: + real_activations = math_ops.to_double(real_activations) + generated_activations = math_ops.to_double(generated_activations) + + # Compute mean and covariance matrices of activations. + m, var = nn_impl.moments(real_activations, axes=[0]) + m_w, var_w = nn_impl.moments(generated_activations, axes=[0]) + + actual_shape = var.get_shape() + expected_shape = m.get_shape() + + if actual_shape != expected_shape: + raise ValueError('shape: {} must match expected shape: {}'.format( + actual_shape, expected_shape)) + + # Compute the two components of FID. + + # First the covariance component. + # Here, note that trace(A + B) = trace(A) + trace(B) + trace = math_ops.reduce_sum( + (var + var_w) - 2.0 * math_ops.sqrt(math_ops.multiply(var, var_w))) + + # Next the distance between means. + mean = math_ops.square(linalg_ops.norm(m - m_w)) # This uses the L2 norm. + dofid = trace + mean + if activations_dtype != dtypes.float64: + dofid = math_ops.cast(dofid, activations_dtype) + + return dofid + + +def frechet_classifier_distance_from_activations(real_activations, + generated_activations): + """Classifier distance for evaluating a generative model. + This methods computes the Frechet classifier distance from activations of real images and generated images. This can be used independently of the frechet_classifier_distance() method, especially in the case of using large @@ -525,13 +659,20 @@ def frechet_classifier_distance_from_activations( Given two Gaussian distribution with means m and m_w and covariance matrices C and C_w, this function calcuates - |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) + |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) which captures how different the distributions of real images and generated images (or more accurately, their visual features) are. Note that unlike the Inception score, this is a true distance and utilizes information about real world images. + Note that when computed using sample means and sample covariance matrices, + Frechet distance is biased. It is more biased for small sample sizes. (e.g. + even if the two distributions are the same, for a small sample size, the + expected Frechet distance is large). It is important to use the same + sample size to compute frechet classifier distance when comparing two + generative models. + Args: real_activations: 2D Tensor containing activations of real data. Shape is [batch_size, activation_size]. @@ -553,36 +694,37 @@ def frechet_classifier_distance_from_activations( # Compute mean and covariance matrices of activations. m = math_ops.reduce_mean(real_activations, 0) - m_v = math_ops.reduce_mean(generated_activations, 0) + m_w = math_ops.reduce_mean(generated_activations, 0) num_examples = math_ops.to_double(array_ops.shape(real_activations)[0]) # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T real_centered = real_activations - m sigma = math_ops.matmul( - real_centered, real_centered, transpose_a=True) / (num_examples - 1) + real_centered, real_centered, transpose_a=True) / ( + num_examples - 1) - gen_centered = generated_activations - m_v - sigma_v = math_ops.matmul( - gen_centered, gen_centered, transpose_a=True) / (num_examples - 1) + gen_centered = generated_activations - m_w + sigma_w = math_ops.matmul( + gen_centered, gen_centered, transpose_a=True) / ( + num_examples - 1) - # Find the Tr(sqrt(sigma sigma_v)) component of FID - sqrt_trace_component = trace_sqrt_product(sigma, sigma_v) + # Find the Tr(sqrt(sigma sigma_w)) component of FID + sqrt_trace_component = trace_sqrt_product(sigma, sigma_w) # Compute the two components of FID. # First the covariance component. # Here, note that trace(A + B) = trace(A) + trace(B) - trace = math_ops.trace(sigma + sigma_v) - 2.0 * sqrt_trace_component + trace = math_ops.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component # Next the distance between means. - mean = math_ops.square(linalg_ops.norm(m - m_v)) # This uses the L2 norm. + mean = math_ops.square(linalg_ops.norm(m - m_w)) # This uses the L2 norm. fid = trace + mean if activations_dtype != dtypes.float64: fid = math_ops.cast(fid, activations_dtype) return fid - frechet_inception_distance = functools.partial( frechet_classifier_distance, classifier_fn=functools.partial( diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py index 61dc8646dd..663e49bdca 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py @@ -50,6 +50,26 @@ def _expected_inception_score(logits): return np.exp(np.mean(per_example_logincscore)) +def _expected_mean_only_fid(real_imgs, gen_imgs): + m = np.mean(real_imgs, axis=0) + m_v = np.mean(gen_imgs, axis=0) + mean = np.square(m - m_v).sum() + mofid = mean + return mofid + + +def _expected_diagonal_only_fid(real_imgs, gen_imgs): + m = np.mean(real_imgs, axis=0) + m_v = np.mean(gen_imgs, axis=0) + var = np.var(real_imgs, axis=0) + var_v = np.var(gen_imgs, axis=0) + sqcc = np.sqrt(var * var_v) + mean = (np.square(m - m_v)).sum() + trace = (var + var_v - 2 * sqcc).sum() + dofid = mean + trace + return dofid + + def _expected_fid(real_imgs, gen_imgs): m = np.mean(real_imgs, axis=0) m_v = np.mean(gen_imgs, axis=0) @@ -285,6 +305,46 @@ class ClassifierMetricsTest(test.TestCase): self.assertAllClose(_expected_inception_score(logits), incscore_np) + def test_mean_only_frechet_classifier_distance_value(self): + """Test that `frechet_classifier_distance` gives the correct value.""" + np.random.seed(0) + + pool_real_a = np.float32(np.random.randn(256, 2048)) + pool_gen_a = np.float32(np.random.randn(256, 2048)) + + tf_pool_real_a = array_ops.constant(pool_real_a) + tf_pool_gen_a = array_ops.constant(pool_gen_a) + + mofid_op = classifier_metrics.mean_only_frechet_classifier_distance_from_activations( # pylint: disable=line-too-long + tf_pool_real_a, tf_pool_gen_a) + + with self.test_session() as sess: + actual_mofid = sess.run(mofid_op) + + expected_mofid = _expected_mean_only_fid(pool_real_a, pool_gen_a) + + self.assertAllClose(expected_mofid, actual_mofid, 0.0001) + + def test_diagonal_only_frechet_classifier_distance_value(self): + """Test that `frechet_classifier_distance` gives the correct value.""" + np.random.seed(0) + + pool_real_a = np.float32(np.random.randn(256, 2048)) + pool_gen_a = np.float32(np.random.randn(256, 2048)) + + tf_pool_real_a = array_ops.constant(pool_real_a) + tf_pool_gen_a = array_ops.constant(pool_gen_a) + + dofid_op = classifier_metrics.diagonal_only_frechet_classifier_distance_from_activations( # pylint: disable=line-too-long + tf_pool_real_a, tf_pool_gen_a) + + with self.test_session() as sess: + actual_dofid = sess.run(dofid_op) + + expected_dofid = _expected_diagonal_only_fid(pool_real_a, pool_gen_a) + + self.assertAllClose(expected_dofid, actual_dofid, 0.0001) + def test_frechet_classifier_distance_value(self): """Test that `frechet_classifier_distance` gives the correct value.""" np.random.seed(0) -- GitLab From 5f8fae9e8645be3ed76ba2b23a0d8c388e1e51e1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 13:19:16 -0700 Subject: [PATCH 074/960] Update ops-related pbtxt files. PiperOrigin-RevId: 189234789 --- .../core/ops/compat/ops_history.v1.pbtxt | 31 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 31 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 85dd1a423a..1834cc998c 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -51522,6 +51522,37 @@ op { } } } +op { + name: "SlideDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "window_size" + type: DT_INT64 + } + input_arg { + name: "stride" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Snapshot" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 3faa4eeada..763b104305 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -24319,6 +24319,37 @@ op { } } } +op { + name: "SlideDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "window_size" + type: DT_INT64 + } + input_arg { + name: "stride" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Snapshot" input_arg { -- GitLab From c17ba11c799be3ab24b826a0f1bace86de26c055 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 13:46:52 -0700 Subject: [PATCH 075/960] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 189239161 --- tensorflow/go/op/wrappers.go | 190 +++++++++++++++++------------------ 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 469d1e9adb..0424c12fd9 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -509,6 +509,101 @@ func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQua return op.Output(0) } +// Scatter `updates` into a new (initially zero) tensor according to `indices`. +// +// Creates a new tensor by applying sparse `updates` to individual +// values or slices within a zero tensor of the given `shape` according to +// indices. This operator is the inverse of the @{tf.gather_nd} operator which +// extracts values or slices from a given tensor. +// +// **WARNING**: The order in which updates are applied is nondeterministic, so the +// output will be nondeterministic if `indices` contains duplicates. +// +// `indices` is an integer tensor containing indices into a new tensor of shape +// `shape`. The last dimension of `indices` can be at most the rank of `shape`: +// +// indices.shape[-1] <= shape.rank +// +// The last dimension of `indices` corresponds to indices into elements +// (if `indices.shape[-1] = shape.rank`) or slices +// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of +// `shape`. `updates` is a tensor with shape +// +// indices.shape[:-1] + shape[indices.shape[-1]:] +// +// The simplest form of scatter is to insert individual elements in a tensor by +// index. For example, say we want to insert 4 scattered elements in a rank-1 +// tensor with 8 elements. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// shape = tf.constant([8]) +// scatter = tf.scatter_nd(indices, updates, shape) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` +// +// The resulting tensor would look like this: +// +// [0, 11, 0, 10, 9, 0, 0, 12] +// +// We can also, insert entire slices of a higher rank tensor all at once. For +// example, if we wanted to insert two slices in the first dimension of a +// rank-3 tensor with two matrices of new values. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[0], [2]]) +// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]], +// [[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]]]) +// shape = tf.constant([4, 4, 4]) +// scatter = tf.scatter_nd(indices, updates, shape) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` +// +// The resulting tensor would look like this: +// +// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], +// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] +// +// Arguments: +// indices: Index tensor. +// updates: Updates to scatter into output. +// shape: 1-D. The shape of the resulting tensor. +// +// Returns A new tensor with the given shape and updates applied according +// to the indices. +func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ScatterNd", + Input: []tf.Input{ + indices, updates, shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Replaces the contents of the table with the specified keys and values. // // The tensor `keys` must be of the same type as the keys of the table. @@ -24972,101 +25067,6 @@ func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// Scatter `updates` into a new (initially zero) tensor according to `indices`. -// -// Creates a new tensor by applying sparse `updates` to individual -// values or slices within a zero tensor of the given `shape` according to -// indices. This operator is the inverse of the @{tf.gather_nd} operator which -// extracts values or slices from a given tensor. -// -// **WARNING**: The order in which updates are applied is nondeterministic, so the -// output will be nondeterministic if `indices` contains duplicates. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of scatter is to insert individual elements in a tensor by -// index. For example, say we want to insert 4 scattered elements in a rank-1 -// tensor with 8 elements. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// shape = tf.constant([8]) -// scatter = tf.scatter_nd(indices, updates, shape) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [0, 11, 0, 10, 9, 0, 0, 12] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// shape = tf.constant([4, 4, 4]) -// scatter = tf.scatter_nd(indices, updates, shape) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], -// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] -// -// Arguments: -// indices: Index tensor. -// updates: Updates to scatter into output. -// shape: 1-D. The shape of the resulting tensor. -// -// Returns A new tensor with the given shape and updates applied according -// to the indices. -func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScatterNd", - Input: []tf.Input{ - indices, updates, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SpaceToDepthAttr is an optional argument to SpaceToDepth. type SpaceToDepthAttr func(optionalAttr) -- GitLab From 856438f65e5705b373413bce29758a92194ff9b6 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 15 Mar 2018 13:52:16 -0700 Subject: [PATCH 076/960] Fix a bug which caused slot variables to be shared when executing eagerly Threads the uid() name of a ResourceVariable through to the Optimizer. Tests that slot variables are unique in several ways. Previously ResourceVariable._shared_name was the Optimizer's slot key for a variable, which for tfe.Variable() is just the non-uniquified name. PiperOrigin-RevId: 189240115 --- .../eager/python/checkpointable_utils_test.py | 30 +++++++++++++++++++ .../python/ops/resource_variable_ops.py | 24 +++++++++------ tensorflow/python/training/adam_test.py | 9 ++++++ tensorflow/python/training/optimizer.py | 2 +- 4 files changed, 55 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 1ab94b88bd..4e0a9923ff 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -23,6 +23,7 @@ import six from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.python.client import session as session_lib +from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import constant_op @@ -523,6 +524,35 @@ class CheckpointingTests(test.TestCase): name, = named_variables.keys() self.assertEqual(name, "..ATTRIBUTES/a/.ATTRIBUTES/VARIABLE_VALUE") + def testAnonymousVarsInInit(self): + + class Model(training.Model): + + def __init__(self): + super(Model, self).__init__() + self.w = resource_variable_ops.ResourceVariable(0.0) + self.b = resource_variable_ops.ResourceVariable(0.0) + self.vars = [self.w, self.b] + + def call(self, x): + return x * self.w + self.b + + with context.eager_mode(): + model = Model() + optimizer = adam.AdamOptimizer(learning_rate=0.05) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + checkpoint = checkpointable_utils.Checkpoint( + model=model, optimizer=optimizer) + for _ in range(2): + with backprop.GradientTape() as tape: + loss = (constant_op.constant(1.) + - model(constant_op.constant(1.))) ** 2 + grad = tape.gradient(loss, model.vars) + optimizer.apply_gradients( + [(g, v) for g, v in zip(grad, model.vars)]) + checkpoint.save(checkpoint_prefix) + @test_util.run_in_graph_and_eager_modes() def testLateDependencyTracking(self): diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 54191ee765..affa7ae629 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -46,10 +46,6 @@ def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode): container = ops.get_default_graph()._container # pylint: disable=protected-access if container is None: container = "" - if not graph_mode: - # When in eager mode use a uid for the shared_name, to prevent accidental - # sharing. - shared_name = str(ops.uid()) handle = gen_resource_variable_ops.var_handle_op(shape=shape, dtype=dtype, shared_name=shared_name, name=name, @@ -368,6 +364,12 @@ class ResourceVariable(variables.Variable): if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access handle_name = ops._name_from_scope_name(name) + if self._in_graph_mode: + shared_name = handle_name + else: + # When in eager mode use a uid for the shared_name, to prevent + # accidental sharing. + shared_name = "%s_%d" % (handle_name, ops.uid()) if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't @@ -383,7 +385,7 @@ class ResourceVariable(variables.Variable): self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, - shared_name=handle_name, + shared_name=shared_name, name=name, graph_mode=self._in_graph_mode) self._shape = initial_value.get_shape() @@ -395,7 +397,7 @@ class ResourceVariable(variables.Variable): self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, - shared_name=handle_name, + shared_name=shared_name, name=name, graph_mode=False) self._shape = initial_value.get_shape() @@ -418,11 +420,12 @@ class ResourceVariable(variables.Variable): self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, - shared_name=handle_name, + shared_name=shared_name, name=name, graph_mode=self._in_graph_mode) self._shape = initial_value.get_shape() + self._unique_id = shared_name self._initial_value = initial_value if self._in_graph_mode else None self._handle_name = handle_name + ":0" self._dtype = initial_value.dtype.base_dtype @@ -503,6 +506,7 @@ class ResourceVariable(variables.Variable): self._shape = tensor_shape.TensorShape( self._handle.op.get_attr("shape")) self._handle_name = self._handle.name + self._unique_id = self._handle_name self._initializer_op = g.as_graph_element( ops.prepend_name_scope( variable_def.initializer_name, import_scope=import_scope)) @@ -851,7 +855,8 @@ class ResourceVariable(variables.Variable): tape.watch_variable(self) return _UnreadVariable( self._handle, self.dtype, self._shape, self._in_graph_mode, - self._handle_deleter if not self._in_graph_mode else None, op) + self._handle_deleter if not self._in_graph_mode else None, op, + self._unique_id) def assign(self, value, use_locking=None, name=None, read_value=True): """Assigns a new value to this variable. @@ -966,7 +971,7 @@ class _UnreadVariable(ResourceVariable): """ def __init__(self, handle, dtype, # pylint: disable=super-init-not-called - shape, in_graph_mode, deleter, parent_op): + shape, in_graph_mode, deleter, parent_op, unique_id): # We do not call super init on purpose. self._trainable = False self._save_slice_info = None @@ -979,6 +984,7 @@ class _UnreadVariable(ResourceVariable): self._handle_name = "" else: self._handle_name = self._handle.name + self._unique_id = unique_id self._dtype = dtype self._constraint = None self._cached_value = None diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py index af87d6f0e5..9be8b6aafe 100644 --- a/tensorflow/python/training/adam_test.py +++ b/tensorflow/python/training/adam_test.py @@ -319,6 +319,15 @@ class AdamOptimizerTest(test.TestCase): # fails. optimizer.apply_gradients([(grads0, var0)]) + def testSlotsUniqueEager(self): + with context.eager_mode(): + v1 = resource_variable_ops.ResourceVariable(1.) + v2 = resource_variable_ops.ResourceVariable(1.) + opt = adam.AdamOptimizer(1.) + opt.minimize(lambda: v1 + v2) + # There should be two non-slot variables, and two unique slot variables + # for v1 and v2 respectively. + self.assertEqual(6, len(set(opt.variables()))) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 7adaedef5b..af9cc3491c 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -61,7 +61,7 @@ def _deduplicate_indexed_slices(values, indices): def _var_key(var): if context.executing_eagerly(): - return var._shared_name # pylint: disable=protected-access + return var._unique_id # pylint: disable=protected-access return (var.op.graph, var.op.name) -- GitLab From 002b488dcd9c1eaac4f4aba2ac1301c32c6beb06 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 14:27:04 -0700 Subject: [PATCH 077/960] Fix the HLO alias analysis and copy insertion to cope with the new kConditional instruction. PiperOrigin-RevId: 189245979 --- .../compiler/xla/service/copy_insertion.cc | 44 +++++++++--- .../xla/service/hlo_alias_analysis.cc | 70 +++++++++++++++---- .../xla/service/hlo_dataflow_analysis.cc | 10 +-- .../xla/service/hlo_dataflow_analysis_test.cc | 67 +++++++++++------- .../compiler/xla/service/hlo_ordering.cc | 42 +++++++++-- .../compiler/xla/service/hlo_ordering_test.cc | 61 ++++++++++++++++ .../compiler/xla/tests/conditional_test.cc | 51 ++++++++++++++ 7 files changed, 287 insertions(+), 58 deletions(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index e9c974a046..40519ecc79 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -78,8 +78,9 @@ SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node, policy.copy_root_replicated_buffers = true; } for (const CallSite& site : node.caller_callsites()) { - // The kWhile instruction does not have an handling here, as the - // AddCopiesForWhile() API takes care of adding its own copies. + // The AddCopiesForConditional() already adds copies, but the copy remover + // removes them, so we re-add them by returning the policy here. But really + // the copy remover should not be removing them. if (site.instruction()->opcode() == HloOpcode::kConditional) { policy.copy_parameters_and_constants = true; policy.copy_root_replicated_buffers = true; @@ -321,6 +322,29 @@ Status AddCopiesForWhile(const HloAliasAnalysis& alias_analysis, return Status::OK(); } +// We add copies for all the indices of the true and false computaiton roots, +// in order to resolve interference. We later rely on the CopyRemover to drop +// the unnecessary ones. +Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis, + HloInstruction* conditional) { + VLOG(2) << "Adding copies for kConditional instruction " + << conditional->name(); + TF_RET_CHECK(conditional->opcode() == HloOpcode::kConditional); + + for (HloComputation* computation : + {conditional->true_computation(), conditional->false_computation()}) { + HloInstruction* root = computation->root_instruction(); + std::vector users = root->users(); + TF_ASSIGN_OR_RETURN(HloInstruction * deep_copy, + computation->DeepCopyInstruction(root)); + for (HloInstruction* user : users) { + TF_RETURN_IF_ERROR(root->ReplaceUseWith(user, deep_copy)); + } + computation->set_root_instruction(deep_copy); + } + return Status::OK(); +} + // Removes any control dependencies to or from the given instruction. Status StripControlDependenciesFrom(HloInstruction* instruction) { while (!instruction->control_successors().empty()) { @@ -348,6 +372,9 @@ Status AddCopiesToResolveInterference(HloModule* module) { for (HloInstruction* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kWhile) { TF_RETURN_IF_ERROR(AddCopiesForWhile(*alias_analysis, instruction)); + } else if (instruction->opcode() == HloOpcode::kConditional) { + TF_RETURN_IF_ERROR( + AddCopiesForConditional(*alias_analysis, instruction)); } } } @@ -596,6 +623,7 @@ class CopyRemover { auto is_live_range_before = [this](const ValueNode& a, const ValueNode& b) { + VLOG(3) << "Checking live range of " << *a.value << " WRT " << *b.value; if (LiveRangeBefore(a, b)) { VLOG(2) << " Live range of " << a.value->ToShortString() << " is before " << b.value->ToShortString(); @@ -610,7 +638,7 @@ class CopyRemover { VLOG(3) << copy->name() << " copies value " << src->value->ToShortString(); VLOG(3) << "Source buffer values: " << ValueListToString(src); - VLOG(3) << "Dest buffer values: " << ValueListToString(src); + VLOG(3) << "Dest buffer values: " << ValueListToString(dest); // A kCopy instruction copies an HLO value from a source buffer and // defines an HLO value in a destination buffer. Most generally, the @@ -786,16 +814,16 @@ class CopyRemover { // updated as copies are removed. bool LiveRangeBefore(const ValueNode& a, const ValueNode& b) { if (a.uses.empty()) { - VLOG(2) << "Empty uses"; + VLOG(2) << "Empty uses for " << *a.value; return ordering_.IsDefinedBefore(*a.value, *b.value); } for (const HloUse* use : a.uses) { - VLOG(2) << "use: " << *use; - VLOG(2) << "is before:" << *b.value; + VLOG(2) << "Checking use " << *use << " against " << *b.value; if (!ordering_.UseIsBeforeValueDefinition(*use, *b.value, dataflow_)) { - VLOG(2) << "Not before"; + VLOG(2) << "Use " << *use << " is NOT before " << *b.value; return false; } + VLOG(2) << "Use " << *use << " is before " << *b.value; } return true; } @@ -931,7 +959,6 @@ Status RemoveUnnecessaryCopies( CopyRemover copy_remover(*alias_analysis, ordering, module); XLA_VLOG_LINES(3, copy_remover.ToString()); - tensorflow::gtl::FlatSet existing_copies; for (HloComputation* computation : module->computations()) { for (HloInstruction* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kCopy && @@ -940,7 +967,6 @@ Status RemoveUnnecessaryCopies( } } } - return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index 30e32a46d7..a88283ed9a 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -171,24 +171,21 @@ class BufferValueMap { return value_to_buffer_number_.at(&value); } - // Compute and return a vector of buffers that the given value must be - // contained in due to HLO aliasing rules. - std::vector ComputeAliasedBuffers(const HloValue& value) { + void ComputeWhileAliasedBuffers(const HloValue& value, + std::vector* aliased_buffers) { + VLOG(3) << "Compute kWhile aliases"; // Value is init of a while (use is while). - std::vector aliased_buffers; for (const HloUse& use : value.uses()) { - VLOG(2) << "use of value " << value.ToShortString() << ": " << use; if (use.instruction->opcode() == HloOpcode::kWhile) { // Determine the while value that this shares a buffer with. const HloValue& while_value = dataflow_.GetUniqueValueAt(use.instruction, use.operand_index); - aliased_buffers.push_back(GetBufferForValue(while_value)); + aliased_buffers->push_back(GetBufferForValue(while_value)); VLOG(3) << " value is init value to a while; must share buffer with " "while value " << while_value.ToShortString(); } } - // Value is a parameter of a while body/condition. if (value.defining_instruction()->opcode() == HloOpcode::kParameter) { const HloComputation* computation = @@ -205,11 +202,10 @@ class BufferValueMap { VLOG(3) << " value is parameter value of the body or condition of a " "while; must share buffer with while value " << while_value.ToShortString(); - aliased_buffers.push_back(GetBufferForValue(while_value)); + aliased_buffers->push_back(GetBufferForValue(while_value)); } } } - // Value is the root of a while body. for (const HloPosition& position : value.positions()) { const HloComputation* computation = position.instruction->parent(); @@ -224,27 +220,71 @@ class BufferValueMap { const HloValue& while_value = dataflow_.GetUniqueValueAt( callsite.instruction(), position.index); - VLOG(3) << " value is root the body computation of a while; must " - "share buffer with while value " + VLOG(3) << " value @ " << position << " is root of " + << callsite.instruction()->name() + << "; body root and while value root must share buffer " + "among them : " << while_value.ToShortString(); - aliased_buffers.push_back(GetBufferForValue(while_value)); + aliased_buffers->push_back(GetBufferForValue(while_value)); } } } } - // Value is the output of the while instruction itself. if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { VLOG(3) << " value is output of a while instruction"; - aliased_buffers.push_back(GetBufferForValue(value)); + aliased_buffers->push_back(GetBufferForValue(value)); + } + } + + void ComputeConditionalAliasedBuffers( + const HloValue& value, std::vector* aliased_buffers) { + VLOG(3) << "Compute kConditional aliases"; + // Aliases the buffers of the true/false computations roots, with the one of + // the conditional. + for (const HloPosition& position : value.positions()) { + const HloComputation* computation = position.instruction->parent(); + const CallGraphNode& call_graph_node = + dataflow_.call_graph().GetNode(computation); + if (position.instruction == computation->root_instruction()) { + for (const CallSite& callsite : call_graph_node.caller_callsites()) { + if (callsite.instruction()->opcode() == HloOpcode::kConditional) { + // Call graph must have been flattened. + CHECK_EQ(call_graph_node.caller_callsites().size(), 1); + + const HloValue& cond_value = dataflow_.GetUniqueValueAt( + callsite.instruction(), position.index); + VLOG(3) + << " value @ " << position << " is root of " + << callsite.instruction()->name() + << "; true/false branch roots must share buffer among them : " + << cond_value.ToShortString(); + aliased_buffers->push_back(GetBufferForValue(cond_value)); + } + } + } + } + // Value is the output of the conditional instruction itself. + if (value.defining_instruction()->opcode() == HloOpcode::kConditional) { + VLOG(3) << " value is output of a conditional instruction"; + aliased_buffers->push_back(GetBufferForValue(value)); } + } + // Compute and return a vector of buffers that the given value must be + // contained in due to HLO aliasing rules. + std::vector ComputeAliasedBuffers(const HloValue& value) { + for (const HloUse& use : value.uses()) { + VLOG(2) << "Use of value " << value.ToShortString() << ": " << use; + } + std::vector aliased_buffers; + ComputeWhileAliasedBuffers(value, &aliased_buffers); + ComputeConditionalAliasedBuffers(value, &aliased_buffers); // Uniquify aliased buffers. std::sort(aliased_buffers.begin(), aliased_buffers.end()); aliased_buffers.erase( std::unique(aliased_buffers.begin(), aliased_buffers.end()), aliased_buffers.end()); - return aliased_buffers; } diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index 934e43ba48..0c37a8d75f 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -368,11 +368,11 @@ bool HloDataflowAnalysis::UpdateConditionalValueSet( conditional->true_computation()->root_instruction()), &GetInstructionValueSet( conditional->false_computation()->root_instruction())}; - // A phi-node is not defined for a kConditional instruction even though it - // represents a join point. This is because the current approach is to define - // a phi-node only for kWhile to account for the dataflow through back-edges - // and deal with the ambiguity in other cases. - return GetInstructionValueSet(conditional).AssignUnionOf(inputs); + if (ssa_form_) { + return Phi(conditional, inputs); + } else { + return GetInstructionValueSet(conditional).AssignUnionOf(inputs); + } } bool HloDataflowAnalysis::UpdateCopyValueSet(HloInstruction* copy) { diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 7bf3a1a060..07f69b8e13 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -1602,11 +1602,17 @@ TEST_P(HloDataflowAnalysisTest, ConditionalWithIdentity) { EXPECT_THAT(analysis.GetValueDefinedAt(constant2).uses(), ElementsAre(HloUse{conditional, 2, {}})); - EXPECT_EQ(analysis.values().size(), 3); - EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional)); - EXPECT_THAT(HloValuesAt(conditional), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant1), - analysis.GetValueDefinedAt(constant2))); + bool ssa_form = GetParam(); + if (ssa_form) { + EXPECT_EQ(analysis.values().size(), 4); + EXPECT_TRUE(analysis.ValueIsDefinedAt(conditional)); + } else { + EXPECT_EQ(analysis.values().size(), 3); + EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional)); + EXPECT_THAT(HloValuesAt(conditional), + UnorderedElementsAre(analysis.GetValueDefinedAt(constant1), + analysis.GetValueDefinedAt(constant2))); + } } TEST_P(HloDataflowAnalysisTest, ConditionalTakingTupleOperand) { @@ -1713,11 +1719,17 @@ TEST_P(HloDataflowAnalysisTest, ConditionalTakingTupleOperand) { HloUse{true_x, 0, {}}, HloUse{true_y, 0, {}}, HloUse{false_x, 0, {}}, HloUse{false_y, 0, {}})); - EXPECT_EQ(analysis.values().size(), 6); - EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional)); - EXPECT_THAT(HloValuesAt(conditional), - UnorderedElementsAre(analysis.GetValueDefinedAt(add), - analysis.GetValueDefinedAt(sub))); + bool ssa_form = GetParam(); + if (ssa_form) { + EXPECT_EQ(analysis.values().size(), 7); + EXPECT_TRUE(analysis.ValueIsDefinedAt(conditional)); + } else { + EXPECT_EQ(analysis.values().size(), 6); + EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional)); + EXPECT_THAT(HloValuesAt(conditional), + UnorderedElementsAre(analysis.GetValueDefinedAt(add), + analysis.GetValueDefinedAt(sub))); + } } TEST_P(HloDataflowAnalysisTest, NestedConditionals) { @@ -1834,20 +1846,27 @@ TEST_P(HloDataflowAnalysisTest, NestedConditionals) { EXPECT_EQ(analysis.GetUniqueValueAt(false_operand_cond), analysis.GetValueDefinedAt(constant2)); - EXPECT_EQ(analysis.values().size(), 9); - EXPECT_FALSE(analysis.ValueIsDefinedAt(inner_conditional)); - EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional)); - EXPECT_THAT( - HloValuesAt(inner_conditional), - UnorderedElementsAre( - analysis.GetValueDefinedAt(computation1->root_instruction()), - analysis.GetValueDefinedAt(computation2->root_instruction()))); - EXPECT_THAT( - HloValuesAt(conditional), - UnorderedElementsAre( - analysis.GetValueDefinedAt(computation1->root_instruction()), - analysis.GetValueDefinedAt(computation2->root_instruction()), - analysis.GetValueDefinedAt(computation3->root_instruction()))); + bool ssa_form = GetParam(); + if (ssa_form) { + EXPECT_EQ(analysis.values().size(), 11); + EXPECT_TRUE(analysis.ValueIsDefinedAt(inner_conditional)); + EXPECT_TRUE(analysis.ValueIsDefinedAt(conditional)); + } else { + EXPECT_EQ(analysis.values().size(), 9); + EXPECT_FALSE(analysis.ValueIsDefinedAt(inner_conditional)); + EXPECT_FALSE(analysis.ValueIsDefinedAt(conditional)); + EXPECT_THAT( + HloValuesAt(inner_conditional), + UnorderedElementsAre( + analysis.GetValueDefinedAt(computation1->root_instruction()), + analysis.GetValueDefinedAt(computation2->root_instruction()))); + EXPECT_THAT( + HloValuesAt(conditional), + UnorderedElementsAre( + analysis.GetValueDefinedAt(computation1->root_instruction()), + analysis.GetValueDefinedAt(computation2->root_instruction()), + analysis.GetValueDefinedAt(computation3->root_instruction()))); + } } INSTANTIATE_TEST_CASE_P(HloDataflowAnalysisInstantiation, diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc index 1b24d8da9e..e89d94bede 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering.cc @@ -66,6 +66,28 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a, } } + // If the common ancestor is a conditional instruction, even though the true + // and false computations are not really ordered per-se, we define the true + // computation to be ordered before the false one. + // This ensures that buffers can still be shared among the two computations + // as they will forcibly have disjoint liveness. + if (a_ancestor == b_ancestor && + a_ancestor->opcode() == HloOpcode::kConditional) { + const HloComputation* true_computation = a_ancestor->true_computation(); + const HloComputation* false_computation = a_ancestor->false_computation(); + if (call_graph_->InstructionIsNestedIn(a, true_computation) && + call_graph_->InstructionIsNestedIn(b, false_computation)) { + return true; + } + // If 'b' is the conditional ancestor, and 'a' is within the true or false + // computations, 'a' executes before 'b'. + if (b == a_ancestor && + (call_graph_->InstructionIsNestedIn(a, true_computation) || + call_graph_->InstructionIsNestedIn(a, false_computation))) { + return true; + } + } + return ExecutesBeforeInSameComputation(a_ancestor, b_ancestor); } @@ -118,7 +140,18 @@ bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const { b.defining_instruction()->while_condition()))) { return true; } - + // If 'b' is a conditional phi and 'a' is in the true or false computation, + // then 'a' executes before 'b'. + if (b.is_phi() && + b.defining_instruction()->opcode() == HloOpcode::kConditional && + (call_graph_->InstructionIsNestedIn( + a.defining_instruction(), + b.defining_instruction()->true_computation()) || + call_graph_->InstructionIsNestedIn( + a.defining_instruction(), + b.defining_instruction()->false_computation()))) { + return true; + } return ExecutesBefore(a.defining_instruction(), b.defining_instruction()); } @@ -212,18 +245,17 @@ bool HloOrdering::LiveRangeStrictlyBefore( VLOG(4) << "LiveRangeStrictlyBefore(a = " << a.ToShortString() << ", b = " << b.ToShortString() << ")"; if (!IsDefinedBefore(a, b)) { - VLOG(4) << "a not defined before b"; + VLOG(4) << a << " not defined before " << b; return false; } - // All uses of 'a' must be before 'b' is defined. for (const HloUse& use : a.uses()) { if (!UseIsBeforeValueDefinition(use, b, dataflow)) { - VLOG(4) << "use of a (" << use << ") not before b is defined"; + VLOG(4) << "use of " << a << " (" << use << ") not before " << b + << " is defined"; return false; } } - return true; } diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index a989fce632..441d790f0e 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -362,5 +362,66 @@ ENTRY while.v11 { ordering.ToString(); // Shouldn't crash. } +TEST_F(HloOrderingTest, ConditionalInstructionOrdering) { + const char* module_str = R"( +HloModule test_conditional_module + +true_branch { + param.1 = (s32[], s32[]) parameter(0) + get-tuple-element.1 = s32[] get-tuple-element(param.1), index=0 + get-tuple-element.2 = s32[] get-tuple-element(param.1), index=1 + add.1 = s32[] add(get-tuple-element.1, get-tuple-element.2) + ROOT tuple.1 = (s32[], s32[]) tuple(add.1, get-tuple-element.1) +} + +false_branch { + param.2 = (s32[], s32[]) parameter(0) + get-tuple-element.3 = s32[] get-tuple-element(param.2), index=0 + get-tuple-element.4 = s32[] get-tuple-element(param.2), index=1 + add.2 = s32[] add(get-tuple-element.3, get-tuple-element.4) + ROOT tuple.2 = (s32[], s32[]) tuple(add.2, get-tuple-element.4) +} + +ENTRY root { + param.3 = (pred[], (s32[], s32[])) parameter(0) + pred.1 = pred[] get-tuple-element(param.3), index=0 + cond_arg.1 = (s32[], s32[]) get-tuple-element(param.3), index=1 + conditional = (s32[], s32[]) conditional(pred.1, cond_arg.1, cond_arg.1), true_computation=true_branch, false_computation=false_branch + cond_res.1 = s32[] get-tuple-element(conditional), index=0 + cond_res.2 = s32[] get-tuple-element(conditional), index=1 + add.3 = s32[] add(cond_res.1, cond_res.2) + ROOT result = (s32[], s32[], s32[]) tuple(add.3, cond_res.1, cond_res.2) +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(module_str)); + TF_ASSERT_OK_AND_ASSIGN(auto dataflow, + HloDataflowAnalysis::Run(*module, /*ssa_form=*/true)); + DependencyHloOrdering ordering(module.get()); + + // Even though the true and false branches has no ordering, since they do not + // interfere (as they are mutually exclusive), we define the true computation + // to be before the false one. + // Similarly, any instruction in the true or false branches are considered + // before the conditional instruction. The roots are effectively "at the same + // time" WRT the conditional, but they are Phi-ed anyway. + HloInstruction* add_1 = FindInstruction(module.get(), "add.1"); + HloInstruction* add_2 = FindInstruction(module.get(), "add.2"); + HloInstruction* add_3 = FindInstruction(module.get(), "add.3"); + HloInstruction* conditional = FindInstruction(module.get(), "conditional"); + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(add_1), + dataflow->GetValueDefinedAt(add_2))); + EXPECT_TRUE( + ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(add_2), + dataflow->GetValueDefinedAt(conditional))); + EXPECT_TRUE( + ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(add_1), + dataflow->GetValueDefinedAt(conditional))); + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(add_1), + dataflow->GetValueDefinedAt(add_3))); + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(add_2), + dataflow->GetValueDefinedAt(add_3))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/conditional_test.cc b/tensorflow/compiler/xla/tests/conditional_test.cc index bc82167482..b917dee77b 100644 --- a/tensorflow/compiler/xla/tests/conditional_test.cc +++ b/tensorflow/compiler/xla/tests/conditional_test.cc @@ -571,5 +571,56 @@ XLA_TEST_F(ConditionalOpTest, ShapeMismatch) { "only parameter of true_computation")); } +XLA_TEST_F(ConditionalOpTest, SwappedInputsInSequentialConditionals) { + Shape tuple_shape = ShapeUtil::MakeTupleShape({r0f32_, r0f32_}); + Computation swapper; + { + ComputationBuilder builder(client_, TestName() + ".swapper"); + auto param0 = builder.Parameter(0, tuple_shape, "sp0"); + auto x = builder.GetTupleElement(param0, 0); + auto y = builder.GetTupleElement(param0, 1); + builder.Tuple({y, x}); + swapper = builder.Build().ConsumeValueOrDie(); + } + Computation forwarder; + { + ComputationBuilder builder(client_, TestName() + ".forwarder"); + auto param0 = builder.Parameter(0, tuple_shape, "fp0"); + auto x = builder.GetTupleElement(param0, 0); + auto y = builder.GetTupleElement(param0, 1); + builder.Tuple({x, y}); + forwarder = builder.Build().ConsumeValueOrDie(); + } + Computation main; + { + ComputationBuilder builder(client_, TestName() + ".main"); + auto param0 = builder.Parameter(0, tuple_shape, "mp0"); + auto x = builder.GetTupleElement(param0, 0); + auto y = builder.GetTupleElement(param0, 1); + auto lt_pred = builder.Lt(x, y); + auto res = builder.Conditional(lt_pred, param0, forwarder, param0, swapper); + auto ge_pred = builder.Ge(x, y); + builder.Conditional(ge_pred, res, swapper, res, forwarder); + main = builder.Build().ConsumeValueOrDie(); + } + + auto test_swap = [&](float a, float b) { + ComputationBuilder builder(client_, TestName()); + auto x = builder.ConstantR0(a); + auto y = builder.ConstantR0(b); + auto tuple_operand = builder.Tuple({x, y}); + builder.Call(main, {tuple_operand}); + + ComputeAndCompareTuple( + &builder, + *Literal::MakeTuple({Literal::CreateR0(a).get(), + Literal::CreateR0(b).get()}), + {}, error_spec_); + }; + + test_swap(3.11f, 9.4f); + test_swap(11.24f, 5.55f); +} + } // namespace } // namespace xla -- GitLab From eb9b8aaecedc89f6375167e001a1096281d7191d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 14:36:53 -0700 Subject: [PATCH 078/960] Internal cleanup. PiperOrigin-RevId: 189247461 --- .../contrib/lite/java/demo/app/src/main/BUILD | 2 +- .../java/demo/app/src/main/assets/labels.txt | 1001 ----------------- .../Camera2BasicFragment.java | 12 +- .../ImageClassifierQuantizedMobileNet.java | 3 +- 4 files changed, 8 insertions(+), 1010 deletions(-) delete mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index 654fa9d6d2..5eb749aae6 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -6,7 +6,7 @@ android_binary( name = "TfLiteCameraDemo", srcs = glob(["java/**/*.java"]), assets = [ - "@tflite_mobilenet//:labels.txt", + "//tensorflow/contrib/lite/java/demo/app/src/main/assets:labels_mobilenet_quant_v1_224.txt", "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite", ], assets_dir = "", diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt b/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt deleted file mode 100644 index fe811239d8..0000000000 --- a/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt +++ /dev/null @@ -1,1001 +0,0 @@ -background -tench -goldfish -great white shark -tiger shark -hammerhead -electric ray -stingray -cock -hen -ostrich -brambling -goldfinch -house finch -junco -indigo bunting -robin -bulbul -jay -magpie -chickadee -water ouzel -kite -bald eagle -vulture -great grey owl -European fire salamander -common newt -eft -spotted salamander -axolotl -bullfrog -tree frog -tailed frog -loggerhead -leatherback turtle -mud turtle -terrapin -box turtle -banded gecko -common iguana -American chameleon -whiptail -agama -frilled lizard -alligator lizard -Gila monster -green lizard -African chameleon -Komodo dragon -African crocodile -American alligator -triceratops -thunder snake -ringneck snake -hognose snake -green snake -king snake -garter snake -water snake -vine snake -night snake -boa constrictor -rock python -Indian cobra -green mamba -sea snake -horned viper -diamondback -sidewinder -trilobite -harvestman -scorpion -black and gold garden spider -barn spider -garden spider -black widow -tarantula -wolf spider -tick -centipede -black grouse -ptarmigan -ruffed grouse -prairie chicken -peacock -quail -partridge -African grey -macaw -sulphur-crested cockatoo -lorikeet -coucal -bee eater -hornbill -hummingbird -jacamar -toucan -drake -red-breasted merganser -goose -black swan -tusker -echidna -platypus -wallaby -koala -wombat -jellyfish -sea anemone -brain coral -flatworm -nematode -conch -snail -slug -sea slug -chiton -chambered nautilus -Dungeness crab -rock crab -fiddler crab -king crab -American lobster -spiny lobster -crayfish -hermit crab -isopod -white stork -black stork -spoonbill -flamingo -little blue heron -American egret -bittern -crane -limpkin -European gallinule -American coot -bustard -ruddy turnstone -red-backed sandpiper -redshank -dowitcher -oystercatcher -pelican -king penguin -albatross -grey whale -killer whale -dugong -sea lion -Chihuahua -Japanese spaniel -Maltese dog -Pekinese -Shih-Tzu -Blenheim spaniel -papillon -toy terrier -Rhodesian ridgeback -Afghan hound -basset -beagle -bloodhound -bluetick -black-and-tan coonhound -Walker hound -English foxhound -redbone -borzoi -Irish wolfhound -Italian greyhound -whippet -Ibizan hound -Norwegian elkhound -otterhound -Saluki -Scottish deerhound -Weimaraner -Staffordshire bullterrier -American Staffordshire terrier -Bedlington terrier -Border terrier -Kerry blue terrier -Irish terrier -Norfolk terrier -Norwich terrier -Yorkshire terrier -wire-haired fox terrier -Lakeland terrier -Sealyham terrier -Airedale -cairn -Australian terrier -Dandie Dinmont -Boston bull -miniature schnauzer -giant schnauzer -standard schnauzer -Scotch terrier -Tibetan terrier -silky terrier -soft-coated wheaten terrier -West Highland white terrier -Lhasa -flat-coated retriever -curly-coated retriever -golden retriever -Labrador retriever -Chesapeake Bay retriever -German short-haired pointer -vizsla -English setter -Irish setter -Gordon setter -Brittany spaniel -clumber -English springer -Welsh springer spaniel -cocker spaniel -Sussex spaniel -Irish water spaniel -kuvasz -schipperke -groenendael -malinois -briard -kelpie -komondor -Old English sheepdog -Shetland sheepdog -collie -Border collie -Bouvier des Flandres -Rottweiler -German shepherd -Doberman -miniature pinscher -Greater Swiss Mountain dog -Bernese mountain dog -Appenzeller -EntleBucher -boxer -bull mastiff -Tibetan mastiff -French bulldog -Great Dane -Saint Bernard -Eskimo dog -malamute -Siberian husky -dalmatian -affenpinscher -basenji -pug -Leonberg -Newfoundland -Great Pyrenees -Samoyed -Pomeranian -chow -keeshond -Brabancon griffon -Pembroke -Cardigan -toy poodle -miniature poodle -standard poodle -Mexican hairless -timber wolf -white wolf -red wolf -coyote -dingo -dhole -African hunting dog -hyena -red fox -kit fox -Arctic fox -grey fox -tabby -tiger cat -Persian cat -Siamese cat -Egyptian cat -cougar -lynx -leopard -snow leopard -jaguar -lion -tiger -cheetah -brown bear -American black bear -ice bear -sloth bear -mongoose -meerkat -tiger beetle -ladybug -ground beetle -long-horned beetle -leaf beetle -dung beetle -rhinoceros beetle -weevil -fly -bee -ant -grasshopper -cricket -walking stick -cockroach -mantis -cicada -leafhopper -lacewing -dragonfly -damselfly -admiral -ringlet -monarch -cabbage butterfly -sulphur butterfly -lycaenid -starfish -sea urchin -sea cucumber -wood rabbit -hare -Angora -hamster -porcupine -fox squirrel -marmot -beaver -guinea pig -sorrel -zebra -hog -wild boar -warthog -hippopotamus -ox -water buffalo -bison -ram -bighorn -ibex -hartebeest -impala -gazelle -Arabian camel -llama -weasel -mink -polecat -black-footed ferret -otter -skunk -badger -armadillo -three-toed sloth -orangutan -gorilla -chimpanzee -gibbon -siamang -guenon -patas -baboon -macaque -langur -colobus -proboscis monkey -marmoset -capuchin -howler monkey -titi -spider monkey -squirrel monkey -Madagascar cat -indri -Indian elephant -African elephant -lesser panda -giant panda -barracouta -eel -coho -rock beauty -anemone fish -sturgeon -gar -lionfish -puffer -abacus -abaya -academic gown -accordion -acoustic guitar -aircraft carrier -airliner -airship -altar -ambulance -amphibian -analog clock -apiary -apron -ashcan -assault rifle -backpack -bakery -balance beam -balloon -ballpoint -Band Aid -banjo -bannister -barbell -barber chair -barbershop -barn -barometer -barrel -barrow -baseball -basketball -bassinet -bassoon -bathing cap -bath towel -bathtub -beach wagon -beacon -beaker -bearskin -beer bottle -beer glass -bell cote -bib -bicycle-built-for-two -bikini -binder -binoculars -birdhouse -boathouse -bobsled -bolo tie -bonnet -bookcase -bookshop -bottlecap -bow -bow tie -brass -brassiere -breakwater -breastplate -broom -bucket -buckle -bulletproof vest -bullet train -butcher shop -cab -caldron -candle -cannon -canoe -can opener -cardigan -car mirror -carousel -carpenter's kit -carton -car wheel -cash machine -cassette -cassette player -castle -catamaran -CD player -cello -cellular telephone -chain -chainlink fence -chain mail -chain saw -chest -chiffonier -chime -china cabinet -Christmas stocking -church -cinema -cleaver -cliff dwelling -cloak -clog -cocktail shaker -coffee mug -coffeepot -coil -combination lock -computer keyboard -confectionery -container ship -convertible -corkscrew -cornet -cowboy boot -cowboy hat -cradle -crane -crash helmet -crate -crib -Crock Pot -croquet ball -crutch -cuirass -dam -desk -desktop computer -dial telephone -diaper -digital clock -digital watch -dining table -dishrag -dishwasher -disk brake -dock -dogsled -dome -doormat -drilling platform -drum -drumstick -dumbbell -Dutch oven -electric fan -electric guitar -electric locomotive -entertainment center -envelope -espresso maker -face powder -feather boa -file -fireboat -fire engine -fire screen -flagpole -flute -folding chair -football helmet -forklift -fountain -fountain pen -four-poster -freight car -French horn -frying pan -fur coat -garbage truck -gasmask -gas pump -goblet -go-kart -golf ball -golfcart -gondola -gong -gown -grand piano -greenhouse -grille -grocery store -guillotine -hair slide -hair spray -half track -hammer -hamper -hand blower -hand-held computer -handkerchief -hard disc -harmonica -harp -harvester -hatchet -holster -home theater -honeycomb -hook -hoopskirt -horizontal bar -horse cart -hourglass -iPod -iron -jack-o'-lantern -jean -jeep -jersey -jigsaw puzzle -jinrikisha -joystick -kimono -knee pad -knot -lab coat -ladle -lampshade -laptop -lawn mower -lens cap -letter opener -library -lifeboat -lighter -limousine -liner -lipstick -Loafer -lotion -loudspeaker -loupe -lumbermill -magnetic compass -mailbag -mailbox -maillot -maillot -manhole cover -maraca -marimba -mask -matchstick -maypole -maze -measuring cup -medicine chest -megalith -microphone -microwave -military uniform -milk can -minibus -miniskirt -minivan -missile -mitten -mixing bowl -mobile home -Model T -modem -monastery -monitor -moped -mortar -mortarboard -mosque -mosquito net -motor scooter -mountain bike -mountain tent -mouse -mousetrap -moving van -muzzle -nail -neck brace -necklace -nipple -notebook -obelisk -oboe -ocarina -odometer -oil filter -organ -oscilloscope -overskirt -oxcart -oxygen mask -packet -paddle -paddlewheel -padlock -paintbrush -pajama -palace -panpipe -paper towel -parachute -parallel bars -park bench -parking meter -passenger car -patio -pay-phone -pedestal -pencil box -pencil sharpener -perfume -Petri dish -photocopier -pick -pickelhaube -picket fence -pickup -pier -piggy bank -pill bottle -pillow -ping-pong ball -pinwheel -pirate -pitcher -plane -planetarium -plastic bag -plate rack -plow -plunger -Polaroid camera -pole -police van -poncho -pool table -pop bottle -pot -potter's wheel -power drill -prayer rug -printer -prison -projectile -projector -puck -punching bag -purse -quill -quilt -racer -racket -radiator -radio -radio telescope -rain barrel -recreational vehicle -reel -reflex camera -refrigerator -remote control -restaurant -revolver -rifle -rocking chair -rotisserie -rubber eraser -rugby ball -rule -running shoe -safe -safety pin -saltshaker -sandal -sarong -sax -scabbard -scale -school bus -schooner -scoreboard -screen -screw -screwdriver -seat belt -sewing machine -shield -shoe shop -shoji -shopping basket -shopping cart -shovel -shower cap -shower curtain -ski -ski mask -sleeping bag -slide rule -sliding door -slot -snorkel -snowmobile -snowplow -soap dispenser -soccer ball -sock -solar dish -sombrero -soup bowl -space bar -space heater -space shuttle -spatula -speedboat -spider web -spindle -sports car -spotlight -stage -steam locomotive -steel arch bridge -steel drum -stethoscope -stole -stone wall -stopwatch -stove -strainer -streetcar -stretcher -studio couch -stupa -submarine -suit -sundial -sunglass -sunglasses -sunscreen -suspension bridge -swab -sweatshirt -swimming trunks -swing -switch -syringe -table lamp -tank -tape player -teapot -teddy -television -tennis ball -thatch -theater curtain -thimble -thresher -throne -tile roof -toaster -tobacco shop -toilet seat -torch -totem pole -tow truck -toyshop -tractor -trailer truck -tray -trench coat -tricycle -trimaran -tripod -triumphal arch -trolleybus -trombone -tub -turnstile -typewriter keyboard -umbrella -unicycle -upright -vacuum -vase -vault -velvet -vending machine -vestment -viaduct -violin -volleyball -waffle iron -wall clock -wallet -wardrobe -warplane -washbasin -washer -water bottle -water jug -water tower -whiskey jug -whistle -wig -window screen -window shade -Windsor tie -wine bottle -wing -wok -wooden spoon -wool -worm fence -wreck -yawl -yurt -web site -comic book -crossword puzzle -street sign -traffic light -book jacket -menu -plate -guacamole -consomme -hot pot -trifle -ice cream -ice lolly -French loaf -bagel -pretzel -cheeseburger -hotdog -mashed potato -head cabbage -broccoli -cauliflower -zucchini -spaghetti squash -acorn squash -butternut squash -cucumber -artichoke -bell pepper -cardoon -mushroom -Granny Smith -strawberry -orange -lemon -fig -pineapple -banana -jackfruit -custard apple -pomegranate -hay -carbonara -chocolate sauce -dough -meat loaf -pizza -potpie -burrito -red wine -espresso -cup -eggnog -alp -bubble -cliff -coral reef -geyser -lakeside -promontory -sandbar -seashore -valley -volcano -ballplayer -groom -scuba diver -rapeseed -daisy -yellow lady's slipper -corn -acorn -hip -buckeye -coral fungus -agaric -gyromitra -stinkhorn -earthstar -hen-of-the-woods -bolete -ear -toilet tissue diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java index 9b9fdffab5..300786c3ca 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java @@ -299,7 +299,7 @@ public class Camera2BasicFragment extends Fragment // create either a new ImageClassifierQuantizedMobileNet or an ImageClassifierFloatInception classifier = new ImageClassifierQuantizedMobileNet(getActivity()); } catch (IOException e) { - Log.e(TAG, "Failed to initialize an image classifier."); + Log.e(TAG, "Failed to initialize an image classifier.", e); } startBackgroundThread(); } @@ -433,7 +433,7 @@ public class Camera2BasicFragment extends Fragment return; } } catch (CameraAccessException e) { - e.printStackTrace(); + Log.e(TAG, "Failed to access Camera", e); } catch (NullPointerException e) { // Currently an NPE is thrown when the Camera2API is used but not supported on the // device this code runs. @@ -478,7 +478,7 @@ public class Camera2BasicFragment extends Fragment } manager.openCamera(cameraId, stateCallback, backgroundHandler); } catch (CameraAccessException e) { - e.printStackTrace(); + Log.e(TAG, "Failed to open Camera", e); } catch (InterruptedException e) { throw new RuntimeException("Interrupted while trying to lock camera opening.", e); } @@ -545,7 +545,7 @@ public class Camera2BasicFragment extends Fragment runClassifier = false; } } catch (InterruptedException e) { - e.printStackTrace(); + Log.e(TAG, "Interrupted when stopping background thread", e); } } @@ -604,7 +604,7 @@ public class Camera2BasicFragment extends Fragment captureSession.setRepeatingRequest( previewRequest, captureCallback, backgroundHandler); } catch (CameraAccessException e) { - e.printStackTrace(); + Log.e(TAG, "Failed to set up config to capture Camera", e); } } @@ -615,7 +615,7 @@ public class Camera2BasicFragment extends Fragment }, null); } catch (CameraAccessException e) { - e.printStackTrace(); + Log.e(TAG, "Failed to preview Camera", e); } } diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java index c533de7927..e164ac7554 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -16,7 +16,6 @@ limitations under the License. package com.example.android.tflitecamerademo; import android.app.Activity; - import java.io.IOException; /** @@ -49,7 +48,7 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { @Override protected String getLabelPath() { - return "labels.txt"; + return "labels_mobilenet_quant_v1_224.txt"; } @Override -- GitLab From bb84d4deb1267f24e8105f4b1b40c264674bc134 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Thu, 15 Mar 2018 14:38:25 -0700 Subject: [PATCH 079/960] [Checkpointable] Make EagerIterator checkpointable. Use object-based save/restore to make dataset/iterator checkpointable in eager mode, this could potentially be extended to graph mode as well. PiperOrigin-RevId: 189247720 --- tensorflow/contrib/eager/python/BUILD | 1 + tensorflow/contrib/eager/python/datasets.py | 31 +++++++++- .../contrib/eager/python/datasets_test.py | 58 +++++++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 384ef7f963..eb810e06dd 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -70,6 +70,7 @@ cuda_py_test( srcs = ["datasets_test.py"], additional_deps = [ ":datasets", + ":checkpointable_utils", "//tensorflow/contrib/data/python/ops:transformation_ops", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/python:dtypes", diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 332bada57b..a4c3283dac 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -31,6 +31,8 @@ from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import checkpointable +from tensorflow.python.training.saver import BaseSaverBuilder _uid_counter = 0 _uid_lock = threading.Lock() @@ -44,7 +46,7 @@ def _generate_shared_name(prefix): return "{}{}".format(prefix, uid) -class Iterator(iterator_ops.EagerIterator): +class Iterator(iterator_ops.EagerIterator, checkpointable.CheckpointableBase): """An iterator producing tf.Tensor objects from a tf.data.Dataset. NOTE: Unlike the iterator created by the @@ -116,3 +118,30 @@ class Iterator(iterator_ops.EagerIterator): self._output_shapes, self._output_classes) else: return super(Iterator, self)._next_internal() + + # TODO(shivaniagrawal): Expose checkpointable stateful objects from dataset + # attributes(potential). + + class _Saveable(BaseSaverBuilder.SaveableObject): + """SaveableObject for saving/restoring iterator state.""" + + def __init__(self, iterator_resource, name): + serialized_iterator = gen_dataset_ops.serialize_iterator( + iterator_resource) + specs = [ + BaseSaverBuilder.SaveSpec(serialized_iterator, "", name + "_STATE") + ] + # pylint: disable=protected-access + super(Iterator._Saveable, self).__init__(iterator_resource, specs, name) + + def restore(self, restored_tensors, restored_shapes): + with ops.colocate_with(self.op): + return gen_dataset_ops.deserialize_iterator(self.op, + restored_tensors[0]) + + def _gather_saveables_for_checkpoint(self): + + def _saveable_factory(name): + return self._Saveable(self._resource, name) + + return {"ITERATOR": _saveable_factory} diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py index 4afadd88f5..c658505de4 100644 --- a/tensorflow/contrib/eager/python/datasets_test.py +++ b/tensorflow/contrib/eager/python/datasets_test.py @@ -16,6 +16,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + import threading import time @@ -24,6 +26,7 @@ import numpy as np from tensorflow.contrib import lookup from tensorflow.contrib.data.python.ops import threadpool from tensorflow.contrib.data.python.ops import unique +from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import datasets from tensorflow.python.data import Dataset from tensorflow.python.eager import test @@ -221,6 +224,61 @@ class IteratorTest(test.TestCase): # perform work. self.assertLessEqual(len(thread_ids), num_threads) + def testSaveRestore(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') + dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + dataset = dataset.map(math_ops.square).batch(2) + iterator = datasets.Iterator(dataset) + checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) + self.assertAllEqual([1, 4], iterator.get_next().numpy()) + save_path = checkpoint.save(checkpoint_prefix) + self.assertAllEqual([9, 16], iterator.get_next().numpy()) + self.assertAllEqual([25, 36], iterator.get_next().numpy()) + checkpoint.restore(save_path) + self.assertAllEqual([9, 16], iterator.get_next().numpy()) + self.assertAllEqual([25, 36], iterator.get_next().numpy()) + + def testSaveRestoreMultipleIterator(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') + dataset = Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + dataset = dataset.map(math_ops.square).batch(2) + iterator_1 = datasets.Iterator(dataset) + iterator_2 = datasets.Iterator(dataset) + dataset_2 = Dataset.range(10) + iterator_3 = datasets.Iterator(dataset_2) + + checkpoint = checkpointable_utils.Checkpoint( + iterator_1=iterator_1, iterator_2=iterator_2, iterator_3=iterator_3) + self.assertAllEqual([1, 4], iterator_1.get_next().numpy()) + self.assertEqual(0, iterator_3.get_next().numpy()) + self.assertEqual(1, iterator_3.get_next().numpy()) + self.assertEqual(2, iterator_3.get_next().numpy()) + + save_path = checkpoint.save(checkpoint_prefix) + self.assertAllEqual([1, 4], iterator_2.get_next().numpy()) + self.assertAllEqual([9, 16], iterator_2.get_next().numpy()) + self.assertEqual(3, iterator_3.get_next().numpy()) + checkpoint.restore(save_path) + self.assertAllEqual([9, 16], iterator_1.get_next().numpy()) + self.assertAllEqual([1, 4], iterator_2.get_next().numpy()) + self.assertEqual(3, iterator_3.get_next().numpy()) + + def testRestoreExhaustedIterator(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') + dataset = Dataset.range(3) + iterator = datasets.Iterator(dataset) + + checkpoint = checkpointable_utils.Checkpoint(iterator=iterator) + self.assertEqual(0, iterator.get_next().numpy()) + self.assertEqual(1, iterator.get_next().numpy()) + save_path = checkpoint.save(checkpoint_prefix) + self.assertEqual(2, iterator.get_next().numpy()) + checkpoint.restore(save_path) + self.assertEqual(2, iterator.get_next().numpy()) + class DatasetConstructorBenchmark(test.Benchmark): -- GitLab From a325b0ae208b3295dca574f93c214f7c924cea11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 14:40:12 -0700 Subject: [PATCH 080/960] Adds a GPU kernel registration for PlaceholderWithDefault, so we can avoid the issue of using it with a registered GPU device without soft placement. PiperOrigin-RevId: 189248024 --- tensorflow/core/kernels/identity_op.cc | 34 ++++++++++++------- .../python/kernel_tests/constant_op_test.py | 9 ++--- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc index a18a72c66d..b5603fecd8 100644 --- a/tensorflow/core/kernels/identity_op.cc +++ b/tensorflow/core/kernels/identity_op.cc @@ -101,6 +101,10 @@ REGISTER_SYCL_HOST_KERNEL(bool); REGISTER_KERNEL_BUILDER(Name("DebugGradientIdentity") \ .Device(DEVICE_GPU) \ .TypeConstraint("T"), \ + IdentityOp); \ + REGISTER_KERNEL_BUILDER(Name("PlaceholderWithDefault") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("dtype"), \ IdentityOp) TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); @@ -112,18 +116,24 @@ REGISTER_GPU_KERNEL(Variant); // A special GPU kernel for int32 and bool. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. -#define REGISTER_GPU_HOST_KERNEL(type) \ - REGISTER_KERNEL_BUILDER(Name("Identity") \ - .Device(DEVICE_GPU) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ - IdentityOp); \ - REGISTER_KERNEL_BUILDER(Name("RefIdentity") \ - .Device(DEVICE_GPU) \ - .HostMemory("input") \ - .HostMemory("output") \ - .TypeConstraint("T"), \ +#define REGISTER_GPU_HOST_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Identity") \ + .Device(DEVICE_GPU) \ + .HostMemory("input") \ + .HostMemory("output") \ + .TypeConstraint("T"), \ + IdentityOp); \ + REGISTER_KERNEL_BUILDER(Name("RefIdentity") \ + .Device(DEVICE_GPU) \ + .HostMemory("input") \ + .HostMemory("output") \ + .TypeConstraint("T"), \ + IdentityOp); \ + REGISTER_KERNEL_BUILDER(Name("PlaceholderWithDefault") \ + .Device(DEVICE_GPU) \ + .HostMemory("input") \ + .HostMemory("output") \ + .TypeConstraint("dtype"), \ IdentityOp) REGISTER_GPU_HOST_KERNEL(int32); diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index ffbdb0e61a..18796f7095 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -881,7 +881,7 @@ versions { class PlaceholderWithDefaultTest(test.TestCase): def testFullShape(self): - with self.test_session(): + with self.test_session(force_gpu=test_util.is_gpu_available()): p = array_ops.placeholder_with_default([[2, 2], [2, 2]], shape=[2, 2]) a = array_ops.identity(p) self.assertAllEqual([[2, 2], [2, 2]], a.eval()) @@ -892,7 +892,7 @@ class PlaceholderWithDefaultTest(test.TestCase): a.eval(feed_dict={p: [[6, 6, 6], [6, 6, 6]]}) def testPartialShape(self): - with self.test_session(): + with self.test_session(force_gpu=test_util.is_gpu_available()): p = array_ops.placeholder_with_default([1, 2, 3], shape=[None]) a = array_ops.identity(p) self.assertAllEqual([1, 2, 3], a.eval()) @@ -902,7 +902,7 @@ class PlaceholderWithDefaultTest(test.TestCase): a.eval(feed_dict={p: [[2, 2], [2, 2]]}) def testNoShape(self): - with self.test_session(): + with self.test_session(force_gpu=test_util.is_gpu_available()): p = array_ops.placeholder_with_default([17], shape=None) a = array_ops.identity(p) self.assertAllEqual([17], a.eval()) @@ -911,11 +911,12 @@ class PlaceholderWithDefaultTest(test.TestCase): [[3, 3], [3, 3]], a.eval(feed_dict={p: [[3, 3], [3, 3]]})) def testGradient(self): - with self.test_session(): + with self.test_session(force_gpu=test_util.is_gpu_available()): x = array_ops.placeholder(dtypes_lib.float32, [5, 7]) y = array_ops.placeholder_with_default(x, None) err = gradient_checker.compute_gradient_error(x, [5, 7], y, [5, 7]) self.assertLess(err, 1e-3) + if __name__ == "__main__": test.main() -- GitLab From f9dc34df6d56d2bcb67b563ade81a3f12bbcacd2 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Thu, 15 Mar 2018 14:45:34 -0700 Subject: [PATCH 081/960] Check for very large chunk sizes in WAV decoding Change how chunk sizes larger than 2GB are handled, since they're stored as unsigned int32s, so there are lots of ways for conversions to confuse the decoding logic. The new behavior is to fail with an error, since such large WAV files are not common, and are unsupported by many readers. PiperOrigin-RevId: 189248857 --- tensorflow/core/lib/wav/wav_io.cc | 70 +++--- tensorflow/core/lib/wav/wav_io.h | 33 +++ tensorflow/core/lib/wav/wav_io_test.cc | 319 +++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 32 deletions(-) diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc index 77d3c88998..51b9c6cd82 100644 --- a/tensorflow/core/lib/wav/wav_io.cc +++ b/tensorflow/core/lib/wav/wav_io.cc @@ -81,13 +81,38 @@ inline float Int16SampleToFloat(int16 data) { return data * kMultiplier; } +} // namespace + +// Handles moving the data index forward, validating the arguments, and avoiding +// overflow or underflow. +Status IncrementOffset(int old_offset, size_t increment, size_t max_size, + int* new_offset) { + if (old_offset < 0) { + return errors::InvalidArgument("Negative offsets are not allowed: ", + old_offset); + } + if (old_offset > max_size) { + return errors::InvalidArgument("Initial offset is outside data range: ", + old_offset); + } + *new_offset = old_offset + increment; + if (*new_offset > max_size) { + return errors::InvalidArgument("Data too short when trying to read string"); + } + // See above for the check that the input offset is positive. If it's negative + // here then it means that there's been an overflow in the arithmetic. + if (*new_offset < 0) { + return errors::InvalidArgument("Offset too large, overflowed: ", + *new_offset); + } + return Status::OK(); +} + Status ExpectText(const string& data, const string& expected_text, int* offset) { - const int new_offset = *offset + expected_text.size(); - if (new_offset > data.size()) { - return errors::InvalidArgument("Data too short when trying to read ", - expected_text); - } + int new_offset; + TF_RETURN_IF_ERROR( + IncrementOffset(*offset, expected_text.size(), data.size(), &new_offset)); const string found_text(data.begin() + *offset, data.begin() + new_offset); if (found_text != expected_text) { return errors::InvalidArgument("Header mismatch: Expected ", expected_text, @@ -97,40 +122,16 @@ Status ExpectText(const string& data, const string& expected_text, return Status::OK(); } -template -Status ReadValue(const string& data, T* value, int* offset) { - const int new_offset = *offset + sizeof(T); - if (new_offset > data.size()) { - return errors::InvalidArgument("Data too short when trying to read value"); - } - if (port::kLittleEndian) { - memcpy(value, data.data() + *offset, sizeof(T)); - } else { - *value = 0; - const uint8* data_buf = - reinterpret_cast(data.data() + *offset); - int shift = 0; - for (int i = 0; i < sizeof(T); ++i, shift += 8) { - *value = *value | (data_buf[i] << shift); - } - } - *offset = new_offset; - return Status::OK(); -} - Status ReadString(const string& data, int expected_length, string* value, int* offset) { - const int new_offset = *offset + expected_length; - if (new_offset > data.size()) { - return errors::InvalidArgument("Data too short when trying to read string"); - } + int new_offset; + TF_RETURN_IF_ERROR( + IncrementOffset(*offset, expected_length, data.size(), &new_offset)); *value = string(data.begin() + *offset, data.begin() + new_offset); *offset = new_offset; return Status::OK(); } -} // namespace - Status EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate, size_t num_channels, size_t num_frames, string* wav_string) { @@ -272,6 +273,11 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string, TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset)); uint32 chunk_size; TF_RETURN_IF_ERROR(ReadValue(wav_string, &chunk_size, &offset)); + if (chunk_size > std::numeric_limits::max()) { + return errors::InvalidArgument( + "WAV data chunk '", chunk_id, "' is too large: ", chunk_size, + " bytes, but the limit is ", std::numeric_limits::max()); + } if (chunk_id == kDataChunkId) { if (was_data_found) { return errors::InvalidArgument("More than one data chunk found in WAV"); diff --git a/tensorflow/core/lib/wav/wav_io.h b/tensorflow/core/lib/wav/wav_io.h index adca0ee303..f004524177 100644 --- a/tensorflow/core/lib/wav/wav_io.h +++ b/tensorflow/core/lib/wav/wav_io.h @@ -21,6 +21,9 @@ limitations under the License. #include #include +#include "tensorflow/core/lib/core/casts.h" +#include "tensorflow/core/lib/core/coding.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" @@ -55,6 +58,36 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string, uint32* sample_count, uint16* channel_count, uint32* sample_rate); +// Everything below here is only exposed publicly for testing purposes. + +// Handles moving the data index forward, validating the arguments, and avoiding +// overflow or underflow. +Status IncrementOffset(int old_offset, size_t increment, size_t max_size, + int* new_offset); + +// This function is only exposed in the header for testing purposes, as a +// template that needs to be instantiated. Reads a typed numeric value from a +// stream of data. +template +Status ReadValue(const string& data, T* value, int* offset) { + int new_offset; + TF_RETURN_IF_ERROR( + IncrementOffset(*offset, sizeof(T), data.size(), &new_offset)); + if (port::kLittleEndian) { + memcpy(value, data.data() + *offset, sizeof(T)); + } else { + *value = 0; + const uint8* data_buf = + reinterpret_cast(data.data() + *offset); + int shift = 0; + for (int i = 0; i < sizeof(T); ++i, shift += 8) { + *value = *value | (data_buf[i] << shift); + } + } + *offset = new_offset; + return Status::OK(); +} + } // namespace wav } // namespace tensorflow diff --git a/tensorflow/core/lib/wav/wav_io_test.cc b/tensorflow/core/lib/wav/wav_io_test.cc index 40ddd94abe..d8a83fc464 100644 --- a/tensorflow/core/lib/wav/wav_io_test.cc +++ b/tensorflow/core/lib/wav/wav_io_test.cc @@ -25,6 +25,12 @@ limitations under the License. namespace tensorflow { namespace wav { +// These are defined in wav_io.cc, and the signatures are here so we don't have +// to expose them in the public header. +Status ExpectText(const string& data, const string& expected_text, int* offset); +Status ReadString(const string& data, int expected_length, string* value, + int* offset); + TEST(WavIO, BadArguments) { float audio[] = {0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f}; string result; @@ -155,5 +161,318 @@ TEST(WavIO, BasicStereo) { EXPECT_EQ(expected, result); } +// Test how chunk sizes larger than 2GB are handled, since they're stored as +// unsigned int32s, so there are lots of ways for conversions to confuse the +// decoding logic. The expected behavior is to fail with an error, since such +// large WAV files are not common, and are unsupported by many readers. +// See b/72655902. +TEST(WavIO, ChunkSizeOverflow) { + std::vector wav_data = { + 'R', 'I', 'F', 'F', // ChunkID + 60, 0, 0, 0, // ChunkSize: 36 + SubChunk2Size + 'W', 'A', 'V', 'E', // Format + 'f', 'm', 't', ' ', // Subchunk1ID + 16, 0, 0, 0, // Subchunk1Size + 1, 0, // AudioFormat: 1=PCM + 1, 0, // NumChannels + 0x44, 0xac, 0, 0, // SampleRate: 44100 + 0x88, 0x58, 0x1, 0, // BytesPerSecond: SampleRate * NumChannels * + // BitsPerSample/8 + 2, 0, // BytesPerSample: NumChannels * BitsPerSample/8 + 16, 0, // BitsPerSample + 'd', 'a', 't', 'a', // Subchunk2ID + 8, 0, 0, 0, // Subchunk2Size: NumSamples * NumChannels * + // BitsPerSample/8 + 0, 0, // Sample 1: 0 + 0xff, 0x7f, // Sample 2: 32767 (saturated) + 0, 0, // Sample 3: 0 + 0x00, 0x80, // Sample 4: -32768 (saturated) + 'f', 'o', 'o', 'o', // Subchunk2ID + 0xff, 0xff, 0xff, 0xf8, // Chunk size that could cause an infinite loop. + 0, 0, // Sample 1: 0 + 0xff, 0x7f, // Sample 2: 32767 (saturated) + 0, 0, // Sample 3: 0 + 0x00, 0x80, // Sample 4: -32768 (saturated) + }; + string wav_data_string(wav_data.begin(), wav_data.end()); + std::vector decoded_audio; + uint32 decoded_sample_count; + uint16 decoded_channel_count; + uint32 decoded_sample_rate; + Status decode_status = DecodeLin16WaveAsFloatVector( + wav_data_string, &decoded_audio, &decoded_sample_count, + &decoded_channel_count, &decoded_sample_rate); + EXPECT_FALSE(decode_status.ok()); + EXPECT_TRUE(StringPiece(decode_status.error_message()).contains("too large")) + << decode_status.error_message(); +} + +TEST(WavIO, IncrementOffset) { + int new_offset = -1; + TF_EXPECT_OK(IncrementOffset(0, 10, 20, &new_offset)); + EXPECT_EQ(10, new_offset); + + new_offset = -1; + TF_EXPECT_OK(IncrementOffset(10, 4, 20, &new_offset)); + EXPECT_EQ(14, new_offset); + + new_offset = -1; + TF_EXPECT_OK(IncrementOffset(99, 1, 100, &new_offset)); + EXPECT_EQ(100, new_offset); + + new_offset = -1; + EXPECT_FALSE(IncrementOffset(-1, 1, 100, &new_offset).ok()); + + new_offset = -1; + EXPECT_FALSE(IncrementOffset(0, -1, 100, &new_offset).ok()); + + new_offset = -1; + EXPECT_FALSE(IncrementOffset(std::numeric_limits::max(), 1, + std::numeric_limits::max(), &new_offset) + .ok()); + + new_offset = -1; + EXPECT_FALSE(IncrementOffset(101, 1, 100, &new_offset).ok()); +} + +TEST(WavIO, ExpectText) { + std::vector test_data = { + 'E', 'x', 'p', 'e', 'c', 't', 'e', 'd', + }; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + TF_EXPECT_OK(ExpectText(test_string, "Expected", &offset)); + EXPECT_EQ(8, offset); + + offset = 0; + Status expect_status = ExpectText(test_string, "Unexpected", &offset); + EXPECT_FALSE(expect_status.ok()); + + offset = 0; + TF_EXPECT_OK(ExpectText(test_string, "Exp", &offset)); + EXPECT_EQ(3, offset); + TF_EXPECT_OK(ExpectText(test_string, "ected", &offset)); + EXPECT_EQ(8, offset); + expect_status = ExpectText(test_string, "foo", &offset); + EXPECT_FALSE(expect_status.ok()); +} + +TEST(WavIO, ReadString) { + std::vector test_data = { + 'E', 'x', 'p', 'e', 'c', 't', 'e', 'd', + }; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + string read_value; + TF_EXPECT_OK(ReadString(test_string, 2, &read_value, &offset)); + EXPECT_EQ("Ex", read_value); + EXPECT_EQ(2, offset); + + TF_EXPECT_OK(ReadString(test_string, 6, &read_value, &offset)); + EXPECT_EQ("pected", read_value); + EXPECT_EQ(8, offset); + + Status read_status = ReadString(test_string, 3, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + +TEST(WavIO, ReadValueInt8) { + std::vector test_data = {0x00, 0x05, 0xff, 0x80}; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + int8 read_value; + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(0, read_value); + EXPECT_EQ(1, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(5, read_value); + EXPECT_EQ(2, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(-1, read_value); + EXPECT_EQ(3, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(-128, read_value); + EXPECT_EQ(4, offset); + + Status read_status = ReadValue(test_string, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + +TEST(WavIO, ReadValueUInt8) { + std::vector test_data = {0x00, 0x05, 0xff, 0x80}; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + uint8 read_value; + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(0, read_value); + EXPECT_EQ(1, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(5, read_value); + EXPECT_EQ(2, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(255, read_value); + EXPECT_EQ(3, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(128, read_value); + EXPECT_EQ(4, offset); + + Status read_status = ReadValue(test_string, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + +TEST(WavIO, ReadValueInt16) { + std::vector test_data = { + 0x00, 0x00, // 0 + 0xff, 0x00, // 255 + 0x00, 0x01, // 256 + 0xff, 0xff, // -1 + 0x00, 0x80, // -32768 + }; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + int16 read_value; + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(0, read_value); + EXPECT_EQ(2, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(255, read_value); + EXPECT_EQ(4, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(256, read_value); + EXPECT_EQ(6, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(-1, read_value); + EXPECT_EQ(8, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(-32768, read_value); + EXPECT_EQ(10, offset); + + Status read_status = ReadValue(test_string, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + +TEST(WavIO, ReadValueUInt16) { + std::vector test_data = { + 0x00, 0x00, // 0 + 0xff, 0x00, // 255 + 0x00, 0x01, // 256 + 0xff, 0xff, // 65535 + 0x00, 0x80, // 32768 + }; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + uint16 read_value; + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(0, read_value); + EXPECT_EQ(2, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(255, read_value); + EXPECT_EQ(4, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(256, read_value); + EXPECT_EQ(6, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(65535, read_value); + EXPECT_EQ(8, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(32768, read_value); + EXPECT_EQ(10, offset); + + Status read_status = ReadValue(test_string, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + +TEST(WavIO, ReadValueInt32) { + std::vector test_data = { + 0x00, 0x00, 0x00, 0x00, // 0 + 0xff, 0x00, 0x00, 0x00, // 255 + 0x00, 0xff, 0x00, 0x00, // 65280 + 0x00, 0x00, 0xff, 0x00, // 16,711,680 + 0xff, 0xff, 0xff, 0xff, // -1 + }; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + int32 read_value; + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(0, read_value); + EXPECT_EQ(4, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(255, read_value); + EXPECT_EQ(8, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(65280, read_value); + EXPECT_EQ(12, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(16711680, read_value); + EXPECT_EQ(16, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(-1, read_value); + EXPECT_EQ(20, offset); + + Status read_status = ReadValue(test_string, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + +TEST(WavIO, ReadValueUInt32) { + std::vector test_data = { + 0x00, 0x00, 0x00, 0x00, // 0 + 0xff, 0x00, 0x00, 0x00, // 255 + 0x00, 0xff, 0x00, 0x00, // 65280 + 0x00, 0x00, 0xff, 0x00, // 16,711,680 + 0xff, 0xff, 0xff, 0xff, // 4,294,967,295 + }; + string test_string(test_data.begin(), test_data.end()); + + int offset = 0; + uint32 read_value; + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(0, read_value); + EXPECT_EQ(4, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(255, read_value); + EXPECT_EQ(8, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(65280, read_value); + EXPECT_EQ(12, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(16711680, read_value); + EXPECT_EQ(16, offset); + + TF_EXPECT_OK(ReadValue(test_string, &read_value, &offset)); + EXPECT_EQ(4294967295, read_value); + EXPECT_EQ(20, offset); + + Status read_status = ReadValue(test_string, &read_value, &offset); + EXPECT_FALSE(read_status.ok()); +} + } // namespace wav } // namespace tensorflow -- GitLab From 67fd87c5812f227193145d8436e4bdf9b315dfb7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 15 Mar 2018 14:53:52 -0700 Subject: [PATCH 082/960] Change ParseAndVerifyModule to take a StringPiece; NFC PiperOrigin-RevId: 189250126 --- .../cpu/parallel_task_assignment_test.cc | 4 ++-- .../xla/service/while_loop_simplifier_test.cc | 22 +++++++++---------- .../xla/tests/hlo_verified_test_base.cc | 3 ++- .../xla/tests/hlo_verified_test_base.h | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc index 79b00135c6..90191221eb 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc @@ -44,7 +44,7 @@ TEST_F(ParallelTaskAssignmentTest, DotOperationNotParallelized) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); TF_ASSERT_OK_AND_ASSIGN(bool changed, cpu::ParallelTaskAssigner( max_parallelism_, shape_size_func_) .Run(&module())); @@ -73,7 +73,7 @@ TEST_F(ParallelTaskAssignmentTest, } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); TF_ASSERT_OK_AND_ASSIGN(bool changed, cpu::ParallelTaskAssigner( max_parallelism_, shape_size_func_) .Run(&module())); diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index f1fea6d763..619e87caa5 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -68,7 +68,7 @@ void WhileLoopSimplifierTest::MakeModuleWithSimpleLoop(int num_iters) { hlo_string_template, "{{LOOP_BOUND}}", tensorflow::strings::StrCat(42 + num_iters), /*replace_all=*/true); - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); } void WhileLoopSimplifierTest::MakeModuleWithSimpleLoopTupleElementLoopBound( @@ -107,7 +107,7 @@ void WhileLoopSimplifierTest::MakeModuleWithSimpleLoopTupleElementLoopBound( hlo_string_template, "{{LOOP_BOUND}}", tensorflow::strings::StrCat(42 + num_iters), /*replace_all=*/true); - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); } TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { @@ -235,7 +235,7 @@ TEST_F(WhileLoopSimplifierTest, NonTupleShapedLoopNotSimplified) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -267,7 +267,7 @@ TEST_F(WhileLoopSimplifierTest, LoopSwappingTupleElementsNotSimplified) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -296,7 +296,7 @@ TEST_F(WhileLoopSimplifierTest, } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -319,7 +319,7 @@ TEST_F(WhileLoopSimplifierTest, LoopWithEmptyTupleNotSimplified) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -347,7 +347,7 @@ TEST_F(WhileLoopSimplifierTest, LoopWithElemUsedTwiceNotSimplified) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -389,7 +389,7 @@ TEST_F(WhileLoopSimplifierTest, RemoveUnusedLoopOperands) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); HloModule* the_module = &module(); EXPECT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); @@ -439,7 +439,7 @@ TEST_F(WhileLoopSimplifierTest, LoopWithNonTupleBodyShapeNotSimplified) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -472,7 +472,7 @@ TEST_F(WhileLoopSimplifierTest, } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } @@ -504,7 +504,7 @@ TEST_F(WhileLoopSimplifierTest, LoopWithArrayConstantNotSimplified) { } )"; - ParseAndVerifyModule(hlo_string.c_str()); + ParseAndVerifyModule(hlo_string); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc index 641907acf2..da4cf4ae0c 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -64,7 +64,8 @@ HloModule& HloVerifiedTestBase::module() { return *module_; } -void HloVerifiedTestBase::ParseAndVerifyModule(const char* hlo_text) { +void HloVerifiedTestBase::ParseAndVerifyModule( + tensorflow::StringPiece hlo_text) { CHECK(!module_) << "Called ParseModule when test already has a module."; TF_ASSERT_OK_AND_ASSIGN(module_, tools::Parse(hlo_text)); VerifyModule(); diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h index c0cb12bc93..e5bb14a883 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h @@ -44,7 +44,7 @@ class HloVerifiedTestBase : public HloTestBase { // Returns the default HloModule, lazily creating it if necessary via // HloTestBase::CreateNewModule(). HloModule& module(); - void ParseAndVerifyModule(const char* hlo_text); + void ParseAndVerifyModule(tensorflow::StringPiece hlo_text); // Sets the shape-size function used during hlo verification. If this isn't // called, a default ShapeVerifier is used instead. -- GitLab From 239eb8b652f94b43d51f7c7ffdbbfc02ad094a9c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 15:16:38 -0700 Subject: [PATCH 083/960] PIE binaries that depends on static libraries usually have text relocations in the final executable, which causes link warnings/errors (different linker behaves differently). The optimal way to fix this is to link the binary with shared library, however, the libraries are NVIDIA-proprietary, not all of them have shared version (for example: cuda_9_0/lib64/libculibos.a) PiperOrigin-RevId: 189254317 --- tensorflow/tensorflow.bzl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index a0ec1708d5..bab1e82c86 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -905,6 +905,14 @@ def tf_cuda_library(deps=None, cuda_deps=None, copts=tf_copts(), **kwargs): if not cuda_deps: cuda_deps = [] + if 'linkstatic' not in kwargs or kwargs['linkstatic'] != 1: + enable_text_relocation_linkopt = select({ + clean_dep("//tensorflow:darwin"): [], + "//conditions:default": ['-Wl,-z,notext'],}) + if 'linkopts' in kwargs: + kwargs['linkopts'] += enable_text_relocation_linkopt + else: + kwargs['linkopts'] = enable_text_relocation_linkopt native.cc_library( deps=deps + if_cuda(cuda_deps + [ clean_dep("//tensorflow/core:cuda"), -- GitLab From 6c62e650252ab32f83637a8de6720e73ffeca226 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 15:29:39 -0700 Subject: [PATCH 084/960] Pass error reporter to file copy allocation, and avoid loading model from file twice PiperOrigin-RevId: 189256489 --- .../native/nativeinterpreterwrapper_jni.cc | 27 ++++--- .../lite/NativeInterpreterWrapperTest.java | 16 +++- tensorflow/contrib/lite/model.cc | 74 ++++++++++++------- tensorflow/contrib/lite/model.h | 37 +++++----- tensorflow/contrib/lite/model_test.cc | 32 ++++++++ 5 files changed, 131 insertions(+), 55 deletions(-) diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index 21bcff40bd..cc448b03c3 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -334,6 +334,19 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( return reinterpret_cast(error_reporter); } +// Verifies whether the model is a flatbuffer file. +class JNIFlatBufferVerifier : public tflite::TfLiteVerifier { + public: + bool Verify(const char* data, int length, + tflite::ErrorReporter* reporter) override { + if (!VerifyModel(data, length)) { + reporter->Report("The model is not a valid Flatbuffer file"); + return false; + } + return true; + } +}; + JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createModel( JNIEnv* env, jclass clazz, jstring model_file, jlong error_handle) { @@ -342,17 +355,11 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModel( if (error_reporter == nullptr) return 0; const char* path = env->GetStringUTFChars(model_file, nullptr); - { - tflite::FileCopyAllocation allocation(path, nullptr); - if (!VerifyModel(allocation.base(), allocation.bytes())) { - throwException(env, kIllegalArgumentException, - "Contents of %s is not a valid flatbuffer model", path); - env->ReleaseStringUTFChars(model_file, path); - return 0; - } - } + std::unique_ptr verifier; + verifier.reset(new JNIFlatBufferVerifier()); - auto model = tflite::FlatBufferModel::BuildFromFile(path, error_reporter); + auto model = tflite::FlatBufferModel::VerifyAndBuildFromFile( + path, verifier.get(), error_reporter); if (!model) { throwException(env, kIllegalArgumentException, "Contents of %s does not encode a valid TensorFlowLite " diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index d6b4e9f438..dbe45e5a05 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -47,6 +47,9 @@ public final class NativeInterpreterWrapperTest { private static final String MODEL_WITH_CUSTOM_OP_PATH = "tensorflow/contrib/lite/java/src/testdata/with_custom_op.lite"; + private static final String NONEXISTING_MODEL_PATH = + "tensorflow/contrib/lite/java/src/testdata/nonexisting_model.bin"; + @Test public void testConstructor() { NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); @@ -60,7 +63,18 @@ public final class NativeInterpreterWrapperTest { NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(INVALID_MODEL_PATH); fail(); } catch (IllegalArgumentException e) { - assertThat(e).hasMessageThat().contains("is not a valid flatbuffer model"); + assertThat(e).hasMessageThat().contains("The model is not a valid Flatbuffer file"); + } + } + + @Test + public void testConstructorWithNonexistingModel() { + try { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(NONEXISTING_MODEL_PATH); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageThat().contains("The model is not a valid Flatbuffer file"); + assertThat(e).hasMessageThat().contains("Could not open"); } } diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 3cf6bcbfcd..f28d56af67 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -32,11 +32,46 @@ namespace tflite { const char* kEmptyTensorName = ""; +// Loads a model from `filename`. If `mmap_file` is true then use mmap, +// otherwise make a copy of the model in a buffer. +std::unique_ptr GetAllocationFromFile(const char* filename, + bool mmap_file, + ErrorReporter* error_reporter, + bool use_nnapi) { + std::unique_ptr allocation; + if (mmap_file) { + if (use_nnapi && NNAPIExists()) + allocation.reset(new NNAPIAllocation(filename, error_reporter)); + else + allocation.reset(new MMAPAllocation(filename, error_reporter)); + } else { + allocation.reset(new FileCopyAllocation(filename, error_reporter)); + } + return allocation; +} + std::unique_ptr FlatBufferModel::BuildFromFile( const char* filename, ErrorReporter* error_reporter) { std::unique_ptr model; - model.reset(new FlatBufferModel(filename, /*mmap_file=*/true, error_reporter, - /*use_nnapi=*/true)); + auto allocation = GetAllocationFromFile(filename, /*mmap_file=*/true, + error_reporter, /*use_nnapi=*/true); + model.reset(new FlatBufferModel(allocation.release(), error_reporter)); + if (!model->initialized()) model.reset(); + return model; +} + +std::unique_ptr FlatBufferModel::VerifyAndBuildFromFile( + const char* filename, TfLiteVerifier* verifier, + ErrorReporter* error_reporter) { + std::unique_ptr model; + auto allocation = GetAllocationFromFile(filename, /*mmap_file=*/true, + error_reporter, /*use_nnapi=*/true); + if (verifier && + !verifier->Verify(static_cast(allocation->base()), + allocation->bytes(), error_reporter)) { + return model; + } + model.reset(new FlatBufferModel(allocation.release(), error_reporter)); if (!model->initialized()) model.reset(); return model; } @@ -44,7 +79,9 @@ std::unique_ptr FlatBufferModel::BuildFromFile( std::unique_ptr FlatBufferModel::BuildFromBuffer( const char* buffer, size_t buffer_size, ErrorReporter* error_reporter) { std::unique_ptr model; - model.reset(new FlatBufferModel(buffer, buffer_size, error_reporter)); + Allocation* allocation = + new MemoryAllocation(buffer, buffer_size, error_reporter); + model.reset(new FlatBufferModel(allocation, error_reporter)); if (!model->initialized()) model.reset(); return model; } @@ -57,23 +94,6 @@ std::unique_ptr FlatBufferModel::BuildFromModel( return model; } -FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file, - ErrorReporter* error_reporter, bool use_nnapi) - : error_reporter_(error_reporter ? error_reporter - : DefaultErrorReporter()) { - if (mmap_file) { - if (use_nnapi && NNAPIExists()) - allocation_ = new NNAPIAllocation(filename, error_reporter); - else - allocation_ = new MMAPAllocation(filename, error_reporter); - } else { - allocation_ = new FileCopyAllocation(filename, error_reporter); - } - if (!allocation_->valid() || !CheckModelIdentifier()) return; - - model_ = ::tflite::GetModel(allocation_->base()); -} - bool FlatBufferModel::CheckModelIdentifier() const { if (!tflite::ModelBufferHasIdentifier(allocation_->base())) { const char* ident = flatbuffers::GetBufferIdentifier(allocation_->base()); @@ -85,21 +105,21 @@ bool FlatBufferModel::CheckModelIdentifier() const { return true; } -FlatBufferModel::FlatBufferModel(const char* ptr, size_t num_bytes, +FlatBufferModel::FlatBufferModel(const Model* model, ErrorReporter* error_reporter) : error_reporter_(error_reporter ? error_reporter : DefaultErrorReporter()) { - allocation_ = new MemoryAllocation(ptr, num_bytes, error_reporter); - if (!allocation_->valid()) return; - - model_ = ::tflite::GetModel(allocation_->base()); + model_ = model; } -FlatBufferModel::FlatBufferModel(const Model* model, +FlatBufferModel::FlatBufferModel(Allocation* allocation, ErrorReporter* error_reporter) : error_reporter_(error_reporter ? error_reporter : DefaultErrorReporter()) { - model_ = model; + allocation_ = allocation; + if (!allocation_->valid() || !CheckModelIdentifier()) return; + + model_ = ::tflite::GetModel(allocation_->base()); } FlatBufferModel::~FlatBufferModel() { delete allocation_; } diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 8dc1c794dc..38eea0e26b 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -41,6 +41,17 @@ limitations under the License. namespace tflite { +// Abstract interface that verifies whether a given model is legit. +// It facilitates the use-case to verify and build a model without loading it +// twice. +class TfLiteVerifier { + public: + // Returns true if the model is legit. + virtual bool Verify(const char* data, int length, + ErrorReporter* reporter) = 0; + virtual ~TfLiteVerifier() {} +}; + // An RAII object that represents a read-only tflite model, copied from disk, // or mmapped. This uses flatbuffers as the serialization format. class FlatBufferModel { @@ -50,6 +61,12 @@ class FlatBufferModel { const char* filename, ErrorReporter* error_reporter = DefaultErrorReporter()); + // Verifies whether the content of the file is legit, then builds a model + // based on the file. Returns a nullptr in case of failure. + static std::unique_ptr VerifyAndBuildFromFile( + const char* filename, TfLiteVerifier* verifier = nullptr, + ErrorReporter* error_reporter = DefaultErrorReporter()); + // Builds a model based on a pre-loaded flatbuffer. The caller retains // ownership of the buffer and should keep it alive until the returned object // is destroyed. Returns a nullptr in case of failure. @@ -82,23 +99,9 @@ class FlatBufferModel { bool CheckModelIdentifier() const; private: - // Loads a model from `filename`. If `mmap_file` is true then use mmap, - // otherwise make a copy of the model in a buffer. - // - // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be - // used. - explicit FlatBufferModel( - const char* filename, bool mmap_file = true, - ErrorReporter* error_reporter = DefaultErrorReporter(), - bool use_nnapi = false); - - // Loads a model from `ptr` and `num_bytes` of the model file. The `ptr` has - // to remain alive and unchanged until the end of this flatbuffermodel's - // lifetime. - // - // Note, if `error_reporter` is null, then a DefaultErrorReporter() will be - // used. - FlatBufferModel(const char* ptr, size_t num_bytes, + // Loads a model from a given allocation. FlatBufferModel will take over the + // ownership of `allocation`, and delete it in desctructor. + FlatBufferModel(Allocation* allocation, ErrorReporter* error_reporter = DefaultErrorReporter()); // Loads a model from Model flatbuffer. The `model` has to remain alive and diff --git a/tensorflow/contrib/lite/model_test.cc b/tensorflow/contrib/lite/model_test.cc index 66f22fd66a..ae6c1ece18 100644 --- a/tensorflow/contrib/lite/model_test.cc +++ b/tensorflow/contrib/lite/model_test.cc @@ -209,6 +209,38 @@ TEST(BasicFlatBufferModel, TestNullModel) { ASSERT_EQ(interpreter.get(), nullptr); } +// Mocks the verifier by setting the result in ctor. +class FakeVerifier : public tflite::TfLiteVerifier { + public: + explicit FakeVerifier(bool result) : result_(result) {} + bool Verify(const char* data, int length, + tflite::ErrorReporter* reporter) override { + return result_; + } + + private: + bool result_; +}; + +TEST(BasicFlatBufferModel, TestWithTrueVerifier) { + FakeVerifier verifier(true); + ASSERT_TRUE(FlatBufferModel::VerifyAndBuildFromFile( + "tensorflow/contrib/lite/testdata/test_model.bin", + &verifier)); +} + +TEST(BasicFlatBufferModel, TestWithFalseVerifier) { + FakeVerifier verifier(false); + ASSERT_FALSE(FlatBufferModel::VerifyAndBuildFromFile( + "tensorflow/contrib/lite/testdata/test_model.bin", + &verifier)); +} + +TEST(BasicFlatBufferModel, TestWithNullVerifier) { + ASSERT_TRUE(FlatBufferModel::VerifyAndBuildFromFile( + "tensorflow/contrib/lite/testdata/test_model.bin", nullptr)); +} + struct TestErrorReporter : public ErrorReporter { int Report(const char* format, va_list args) override { calls++; -- GitLab From 1e75c69339da2fbf2c5c5fbeb891243badae7ff8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 15:44:56 -0700 Subject: [PATCH 085/960] Automated g4 rollback of changelist 189231636 PiperOrigin-RevId: 189258641 --- README.md | 4 - SECURITY.md | 14 +- configure.py | 7 +- .../xla/service/generic_transfer_manager.cc | 9 +- .../compiler/xla/tests/convolution_test.cc | 2 +- tensorflow/contrib/BUILD | 10 +- tensorflow/contrib/__init__.py | 6 +- .../boosted_trees/lib/utils/batch_features.h | 6 +- tensorflow/contrib/cmake/README.md | 12 +- tensorflow/contrib/cmake/external/grpc.cmake | 1 - .../contrib/cmake/external/protobuf.cmake | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 4 - tensorflow/contrib/data/__init__.py | 4 - .../contrib/data/python/kernel_tests/BUILD | 17 - .../kernel_tests/slide_dataset_op_test.py | 242 --- tensorflow/contrib/data/python/ops/BUILD | 1 - tensorflow/contrib/data/python/ops/sliding.py | 102 -- tensorflow/contrib/distributions/BUILD | 2 - tensorflow/contrib/eager/python/BUILD | 5 +- .../python/examples/linear_regression/BUILD | 1 - tensorflow/contrib/factorization/BUILD | 5 +- .../contrib/ffmpeg/default/ffmpeg_lib.cc | 9 +- tensorflow/contrib/gan/BUILD | 1 - tensorflow/contrib/kafka/BUILD | 108 +- .../kafka/kernels/kafka_dataset_ops.cc | 4 +- tensorflow/contrib/kafka/ops/dataset_ops.cc | 44 - .../kafka/python/ops/kafka_dataset_ops.py | 9 +- .../kafka/python/ops/kafka_op_loader.py | 24 - .../contrib/kfac/python/kernel_tests/BUILD | 1 - tensorflow/contrib/labeled_tensor/BUILD | 1 - tensorflow/contrib/layers/BUILD | 2 - .../layers/python/layers/embedding_ops.py | 2 +- tensorflow/contrib/learn/BUILD | 12 +- .../learn/python/learn/ops/embeddings_ops.py | 2 +- tensorflow/contrib/lite/Makefile | 9 +- tensorflow/contrib/lite/arena_planner.h | 2 +- tensorflow/contrib/lite/build_rpi_lib.sh | 22 - tensorflow/contrib/lite/builtin_ops.h | 2 +- tensorflow/contrib/lite/error_reporter.h | 2 +- tensorflow/contrib/lite/g3doc/rpi.md | 50 - tensorflow/contrib/lite/interpreter.h | 2 +- tensorflow/contrib/lite/interpreter_test.cc | 2 +- tensorflow/contrib/lite/kernels/conv.cc | 2 +- .../contrib/lite/kernels/depthwise_conv.cc | 2 +- .../contrib/lite/kernels/fully_connected.cc | 2 +- tensorflow/contrib/lite/kernels/kernel_util.h | 2 +- .../contrib/lite/kernels/lsh_projection.cc | 2 +- tensorflow/contrib/lite/kernels/lstm.cc | 6 +- tensorflow/contrib/lite/kernels/reshape.cc | 12 +- .../contrib/lite/kernels/reshape_test.cc | 2 +- tensorflow/contrib/lite/kernels/test_util.cc | 4 +- .../kernels/unidirectional_sequence_lstm.cc | 2 +- tensorflow/contrib/lite/memory_planner.h | 4 +- tensorflow/contrib/lite/model.h | 2 +- .../contrib/lite/nnapi/NeuralNetworksShim.h | 2 +- tensorflow/contrib/lite/rpi_makefile.inc | 33 - .../schema/builtin_ops_header/generator.cc | 2 +- .../contrib/lite/simple_memory_arena.cc | 6 +- tensorflow/contrib/lite/simple_memory_arena.h | 6 +- tensorflow/contrib/lookup/BUILD | 1 - tensorflow/contrib/makefile/README.md | 2 - tensorflow/contrib/makefile/build_all_ios.sh | 3 +- tensorflow/contrib/mpi/mpi_utils.h | 2 - .../contrib/predictor/predictor_factories.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 2 - .../contrib/py2tf/converters/single_return.py | 2 +- tensorflow/contrib/py2tf/utils/BUILD | 1 - .../quantize/python/fold_batch_norms.py | 4 +- .../contrib/quantize/python/quant_ops.py | 4 +- .../contrib/quantize/python/quantize.py | 2 +- .../contrib/quantize/python/quantize_graph.py | 2 +- .../python/quantize_parameterized_test.py | 8 +- .../contrib/quantize/python/quantize_test.py | 2 +- .../contrib/remote_fused_graph/pylib/BUILD | 1 + tensorflow/contrib/rnn/python/ops/rnn_cell.py | 6 +- tensorflow/contrib/saved_model/BUILD | 1 - .../seq2seq/python/ops/beam_search_decoder.py | 7 +- tensorflow/contrib/session_bundle/BUILD | 1 - .../contrib/slim/python/slim/data/BUILD | 1 - tensorflow/contrib/tensor_forest/BUILD | 1 + tensorflow/contrib/tensorboard/BUILD | 1 - tensorflow/contrib/tensorrt/BUILD | 2 - tensorflow/contrib/tensorrt/README.md | 23 +- tensorflow/contrib/tensorrt/__init__.py | 18 +- .../contrib/tensorrt/convert/convert_graph.cc | 256 +-- .../contrib/tensorrt/convert/convert_graph.h | 8 +- .../contrib/tensorrt/convert/convert_nodes.cc | 1469 +++-------------- .../contrib/tensorrt/convert/convert_nodes.h | 53 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 11 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 39 +- tensorflow/contrib/tensorrt/log/trt_logger.cc | 8 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 4 +- .../contrib/tensorrt/python/__init__.py | 1 - .../contrib/tensorrt/python/trt_convert.py | 68 +- .../tensorrt/resources/trt_int8_calibrator.cc | 56 +- .../tensorrt/resources/trt_int8_calibrator.h | 15 +- .../contrib/tensorrt/test/test_tftrt.py | 57 +- tensorflow/contrib/tensorrt/trt_conversion.i | 63 +- tensorflow/contrib/timeseries/examples/BUILD | 5 +- .../timeseries/python/timeseries/BUILD | 5 +- .../timeseries/state_space_models/BUILD | 1 - tensorflow/contrib/tpu/BUILD | 1 - tensorflow/contrib/util/loader.py | 7 +- tensorflow/core/BUILD | 4 - .../base_api/api_def_SlideDataset.pbtxt | 18 - .../core/distributed_runtime/tensor_coding.cc | 4 +- tensorflow/core/framework/dataset.h | 4 +- tensorflow/core/graph/mkl_layout_pass.cc | 31 +- .../grappler/optimizers/loop_optimizer.cc | 8 +- tensorflow/core/kernels/BUILD | 12 +- tensorflow/core/kernels/data/BUILD | 14 - .../core/kernels/data/slide_dataset_op.cc | 252 --- tensorflow/core/kernels/depthtospace_op.cc | 3 - .../core/kernels/depthtospace_op_gpu.cu.cc | 6 - tensorflow/core/kernels/hexagon/BUILD | 1 - .../core/kernels/mkl_conv_grad_filter_ops.cc | 81 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 18 +- tensorflow/core/kernels/mkl_conv_ops.cc | 146 +- tensorflow/core/kernels/mkl_conv_ops.h | 117 +- .../core/kernels/mkl_input_conversion_op.cc | 7 +- tensorflow/core/kernels/mkl_relu_op.cc | 23 +- .../core/kernels/segment_reduction_ops.h | 14 +- tensorflow/core/kernels/spacetodepth_op.cc | 3 - .../core/kernels/spacetodepth_op_gpu.cu.cc | 6 - tensorflow/core/lib/core/stringpiece.cc | 2 + tensorflow/core/lib/core/stringpiece.h | 2 +- tensorflow/core/lib/io/record_reader.cc | 2 - tensorflow/core/lib/io/record_reader.h | 4 +- tensorflow/core/ops/dataset_ops.cc | 12 +- tensorflow/core/ops/nn_ops.cc | 8 - tensorflow/core/platform/tracing.h | 2 +- .../platform/windows/windows_file_system.cc | 3 +- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/community/welcome.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 56 +- tensorflow/docs_src/install/install_mac.md | 23 +- .../docs_src/install/install_sources.md | 4 +- .../docs_src/install/install_windows.md | 5 +- tensorflow/docs_src/performance/xla/jit.md | 2 +- .../docs_src/programmers_guide/debugger.md | 3 +- tensorflow/docs_src/programmers_guide/faq.md | 4 +- .../summaries_and_tensorboard.md | 2 +- .../docs_src/programmers_guide/using_tpu.md | 7 +- tensorflow/docs_src/tutorials/layers.md | 12 +- .../docs_src/tutorials/recurrent_quickdraw.md | 3 +- tensorflow/docs_src/tutorials/wide.md | 16 +- .../examples/android/AndroidManifest.xml | 4 - .../org/tensorflow/demo/CameraActivity.java | 7 +- .../org/tensorflow/demo/StylizeActivity.java | 60 - tensorflow/python/BUILD | 84 +- tensorflow/python/debug/BUILD | 1 - tensorflow/python/estimator/estimator.py | 32 +- tensorflow/python/estimator/training.py | 26 +- tensorflow/python/keras/BUILD | 5 +- tensorflow/python/kernel_tests/BUILD | 4 + .../python/kernel_tests/conv_ops_test.py | 20 +- .../kernel_tests/depthtospace_op_test.py | 10 +- .../kernel_tests/spacetodepth_op_test.py | 10 +- tensorflow/python/lib/io/file_io_test.py | 5 - tensorflow/python/ops/nn_ops.py | 2 +- tensorflow/python/ops/rnn.py | 17 +- tensorflow/python/ops/special_math_ops.py | 4 +- .../python/ops/special_math_ops_test.py | 5 - tensorflow/python/tools/freeze_graph.py | 36 +- tensorflow/python/tools/saved_model_cli.py | 60 - .../python/tools/saved_model_cli_test.py | 22 - tensorflow/stream_executor/cuda/cuda_dnn.cc | 33 +- tensorflow/tensorflow.bzl | 18 +- .../tools/api/tests/api_compatibility_test.py | 2 - tensorflow/tools/ci_build/Dockerfile.cmake | 5 +- .../windows/cpu/pip/build_tf_windows.sh | 3 +- tensorflow/tools/def_file_filter/BUILD | 0 tensorflow/tools/def_file_filter/BUILD.tpl | 15 - .../def_file_filter/def_file_filter.py.tpl | 168 -- .../def_file_filter_configure.bzl | 56 - tensorflow/tools/dist_test/README.md | 8 - tensorflow/tools/dist_test/local_test.sh | 22 +- tensorflow/tools/git/gen_git_source.py | 7 - tensorflow/tools/graph_transforms/BUILD | 1 - .../graph_transforms/fold_old_batch_norms.cc | 67 - .../fold_old_batch_norms_test.cc | 95 -- tensorflow/tools/pip_package/BUILD | 129 +- tensorflow/tools/pip_package/setup.py | 4 +- .../tools/test/upload_test_benchmarks.py | 9 +- tensorflow/workspace.bzl | 8 +- third_party/jpeg/jpeg.BUILD | 4 +- third_party/kafka/BUILD | 13 +- third_party/py/BUILD.tpl | 22 +- 191 files changed, 920 insertions(+), 4244 deletions(-) delete mode 100644 tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py delete mode 100644 tensorflow/contrib/data/python/ops/sliding.py delete mode 100644 tensorflow/contrib/kafka/ops/dataset_ops.cc delete mode 100644 tensorflow/contrib/kafka/python/ops/kafka_op_loader.py delete mode 100755 tensorflow/contrib/lite/build_rpi_lib.sh delete mode 100644 tensorflow/contrib/lite/g3doc/rpi.md delete mode 100644 tensorflow/contrib/lite/rpi_makefile.inc delete mode 100644 tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt delete mode 100644 tensorflow/core/kernels/data/slide_dataset_op.cc delete mode 100644 tensorflow/tools/def_file_filter/BUILD delete mode 100644 tensorflow/tools/def_file_filter/BUILD.tpl delete mode 100644 tensorflow/tools/def_file_filter/def_file_filter.py.tpl delete mode 100644 tensorflow/tools/def_file_filter/def_file_filter_configure.bzl diff --git a/README.md b/README.md index 3cdb6e478d..ef5bdc66ef 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,6 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. -Keep up to date with release announcements and security updates by -subscribing to -[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). - ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* diff --git a/SECURITY.md b/SECURITY.md index 2aaa9202d5..fea24b2739 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ report vulnerabilities in TensorFlow. ## TensorFlow models are programs -TensorFlow's runtime system interprets and executes programs. What machine +TensorFlow's runtime system interprets and executes programs. What machine learning practitioners term [**models**](https://developers.google.com/machine-learning/glossary/#model) are expressed as programs that TensorFlow executes. TensorFlow programs are encoded @@ -28,12 +28,12 @@ data you supply to TensorFlow to train a model, or to use a model to run inference on the data. **TensorFlow models are programs, and need to be treated as such from a security -perspective.** +perspective.** ## Running untrusted models As a general rule: **Always** execute untrusted models inside a sandbox (e.g., -[nsjail](https://github.com/google/nsjail)). +[nsjail](https://github.com/google/nsjail)). There are several ways in which a model could become untrusted. Obviously, if an untrusted party supplies TensorFlow kernels, arbitrary code may be executed. @@ -109,11 +109,11 @@ graphs known to the `ModelServer`. This means that an attacker may run graphs using untrusted inputs as described above, but they would not be able to execute arbitrary graphs. It is possible to safely expose a `ModelServer` directly to an untrusted network, **but only if the graphs it is configured to -use have been carefully audited to be safe**. +use have been carefully audited to be safe**. Similar to best practices for other servers, we recommend running any `ModelServer` with appropriate privileges (i.e., using a separate user with -reduced permissions). In the spirit of defense in depth, we recommend +reduced permisisons). In the spirit of defense in depth, we recommend authenticating requests to any TensorFlow server connected to an untrusted network, as well as sandboxing the server to minimize the adverse effects of any breach. @@ -133,7 +133,7 @@ which exhibit unexpected or unwanted behaviors. The fact that TensorFlow models can perform arbitrary computations means that they may read and write files, communicate via the network, produce deadlocks and infinite loops, or run out of memory. It is only when these behaviors are outside the specifications of the -operations involved that such behavior is a vulnerability. +operations involved that such behavior is a vulnerability. A `FileWriter` writing a file is not unexpected behavior and therefore is not a vulnerability in TensorFlow. A `MatMul` allowing arbitrary binary code execution @@ -168,7 +168,7 @@ below). Please use a descriptive subject line for your report email. After the initial reply to your report, the security team will endeavor to keep you informed of -the progress being made towards a fix and announcement. +the progress being made towards a fix and announcement. If you believe that an existing (public) issue is security-related, please send an email to `security@tensorflow.org`. The email should include the issue ID and diff --git a/configure.py b/configure.py index d14edef1be..97f46757ee 100644 --- a/configure.py +++ b/configure.py @@ -1048,10 +1048,7 @@ def set_tf_tensorrt_install_path(environ_cp): for lib_file in possible_files: if is_compatible(lib_file, cuda_ver, cudnn_ver): - matches = nvinfer_pattern.search(lib_file) - if len(matches.groups()) == 0: - continue - ver_str = matches.group(1) + ver_str = nvinfer_pattern.search(lib_file).group(1) ver = convert_version_to_int(ver_str) if len(ver_str) else 0 if ver > highest_ver[0]: highest_ver = [ver, ver_str, lib_file] @@ -1380,7 +1377,7 @@ def main(): # environment variables. environ_cp = dict(os.environ) - check_bazel_version('0.10.0') + check_bazel_version('0.5.4') reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index a99e2b7794..78dc0ad4fc 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -38,7 +38,14 @@ namespace xla { GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id, size_t pointer_size) - : platform_id_(platform_id), pointer_size_(pointer_size) {} + : platform_id_(platform_id), pointer_size_(pointer_size) { + // We currently only support kHostPlatformId for CPU, kCudaPlatformId for + // GPU and kInterpreterPlatformId for Interpreter. Before supporting other + // platforms, we need to test this transfer manager on them. + CHECK(platform_id_ == se::host::kHostPlatformId || + platform_id_ == se::interpreter::kInterpreterPlatformId || + platform_id_ == se::cuda::kCudaPlatformId); +} se::Platform::Id GenericTransferManager::PlatformId() const { return platform_id_; diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 72715398de..99640f5bb5 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -723,7 +723,7 @@ INSTANTIATE_TEST_CASE_P( ); #endif -XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { +TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); Shape input_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 986b61b3ea..bab37e8906 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,7 +8,6 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") -load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -52,6 +51,7 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", + "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -63,6 +63,7 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", + "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -109,10 +110,6 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", - ]) + if_not_windows([ - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code - "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code - "//tensorflow/contrib/kafka", # has some linking issue on opensssl. ]), ) @@ -124,7 +121,6 @@ cc_library( "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -151,7 +147,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", + "//tensorflow/contrib/kafka:kafka_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 669d611b01..4f6f539027 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow @@ -85,8 +83,7 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -if os.name != 'nt': - from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.lite.python import lite from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs @@ -95,7 +92,6 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") -del os del LazyLoader del absolute_import diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7815fa049a..da5e744851 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_int_features, (int64*) nullptr); + QCHECK_NE(num_dense_float_features, nullptr); + QCHECK_NE(num_sparse_float_features, nullptr); + QCHECK_NE(num_sparse_int_features, nullptr); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index fe83bb3204..8f85a75ee4 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an example C++ binary, and a PIP package containing the runtime and Python bindings. -### Prerequisites +### Pre-requisites * CMake version 3.5 or later. @@ -34,16 +34,14 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* Additional prerequisites for Microsoft Windows: +* Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 - Python 3.5 + - NumPy 1.11.0 or later -* Additional prerequisites for Linux: +* Additional pre-requisites for Linux: - Python 2.7 or later - [Docker](https://www.docker.com/) (for automated testing) - -* Python dependencies: - - wheel - NumPy 1.11.0 or later ### Known-good configurations @@ -104,7 +102,7 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the prerequisites detailed above, and set up your environment. +1. Install the pre-requisites detailed above, and set up your environment. * The following commands assume that you are using the Windows Command Prompt (`cmd.exe`). You will need to set up your environment to use the diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 17f65999fa..a9f43a3ecb 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -35,7 +35,6 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index ab464bc99a..aba8a5244e 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) +set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index b3e5b30826..9f96a4b797 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -476,10 +476,6 @@ if (tensorflow_BUILD_CC_TESTS) "${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*_test.cc" ) - list(REMOVE_ITEM tf_test_src_simple - ${tf_core_profiler_test_srcs} - ) - set(tf_test_lib tf_test_lib) add_library(${tf_test_lib} STATIC ${tf_src_testlib}) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 9212b69700..f09d156832 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -40,7 +40,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@rejection_resample @@scan @@shuffle_and_repeat -@@sliding_window_batch @@sloppy_interleave @@unbatch @@ -73,9 +72,6 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat -from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch -from tensorflow.python.data.ops.iterator_ops import Iterator -from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 2c4d4adfda..22418b38e3 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -498,23 +498,6 @@ py_test( ], ) -tf_py_test( - name = "slide_dataset_op_test", - size = "small", - srcs = ["slide_dataset_op_test.py"], - additional_deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:math_ops", - "//tensorflow/python:sparse_tensor", - "//third_party/py/numpy", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py deleted file mode 100644 index 33c48e20be..0000000000 --- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the experimental input pipeline ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.data.python.ops import sliding -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.platform import test - - -class SlideDatasetTest(test.TestCase): - - def testSlideDataset(self): - """Test an dataset that maps a TF function across its input elements.""" - components = (np.arange(7), - np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], - np.array(37.0) * np.arange(7)) - - count = array_ops.placeholder(dtypes.int64, shape=[]) - window_size = array_ops.placeholder(dtypes.int64, shape=[]) - stride = array_ops.placeholder(dtypes.int64, shape=[]) - - def _map_fn(x, y, z): - return math_ops.square(x), math_ops.square(y), math_ops.square(z) - - # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> - # RepeatDataset(count) -> _SlideDataset(window_size, stride). - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(_map_fn) - .repeat(count) - .apply(sliding.sliding_window_batch(window_size, stride)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - self.assertEqual([[None] + list(c.shape[1:]) for c in components], - [t.shape.as_list() for t in get_next]) - - with self.test_session() as sess: - # Slide over a finite input, where the window_size divides the - # total number of elements. - sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) - # Same formula with convolution layer. - num_batches = (20 * 7 - 14) // 7 + 1 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(14): - self.assertAllEqual(component[(i*7 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over a finite input, where the window_size does not - # divide the total number of elements. - sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) - - num_batches = (20 * 7 - 17) // 9 + 1 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(17): - self.assertAllEqual(component[(i*9 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over a finite input, which is less than window_size, - # should fail straight away. - sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over an empty input should fail straight away. - sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Empty window_size should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0}) - - # Invalid stride should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) - - def assertSparseValuesEqual(self, a, b): - self.assertAllEqual(a.indices, b.indices) - self.assertAllEqual(a.values, b.values) - self.assertAllEqual(a.dense_shape, b.dense_shape) - - def testSlideSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( - sliding.sliding_window_batch(5, 3)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - num_batches = (10 - 5) // 3 + 1 - for i in range(num_batches): - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], - values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], - dense_shape=[5, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSlideSparseWithDifferentDenseShapes(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=array_ops.expand_dims( - math_ops.range(i, dtype=dtypes.int64), 1), - values=array_ops.fill([math_ops.to_int32(i)], i), - dense_shape=[i]) - - iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( - sliding.sliding_window_batch(5, 3)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - num_batches = (10 - 5) // 3 + 1 - for i in range(num_batches): - actual = sess.run(get_next) - expected_indices = [] - expected_values = [] - for j in range(5): - for k in range(i * 3 + j): - expected_indices.append([j, k]) - expected_values.append(i * 3 + j) - expected = sparse_tensor.SparseTensorValue( - indices=expected_indices, - values=expected_values, - dense_shape=[5, i * 3 + 5 - 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testNestedSlideSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = (dataset_ops.Dataset.range(10) - .map(_sparse) - .apply(sliding.sliding_window_batch(4, 2)) - .apply(sliding.sliding_window_batch(3, 1)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - # Slide: 1st batch. - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], - [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], - [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], - values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], - dense_shape=[3, 4, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - # Slide: 2nd batch. - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], - [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], - [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], - values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], - dense_shape=[3, 4, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSlideShapeError(self): - - def generator(): - yield [1.0, 2.0, 3.0] - yield [4.0, 5.0, 6.0] - yield [7.0, 8.0, 9.0, 10.0] - - iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32, - output_shapes=[None]) - .apply(sliding.sliding_window_batch(3, 1)) - .make_initializable_iterator()) - next_element = iterator.get_next() - - with self.test_session() as sess: - sess.run(iterator.initializer) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r"Cannot batch tensors with different shapes in component 0. " - r"First element had shape \[3\] and element 2 had shape \[4\]."): - sess.run(next_element) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index c3331e9636..f03430c5c5 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -106,7 +106,6 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", - "sliding.py", "stats_ops.py", "threadpool.py", "unique.py", diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py deleted file mode 100644 index 19cc3cb89f..0000000000 --- a/tensorflow/contrib/data/python/ops/sliding.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Sliding dataset transformations.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import gen_dataset_ops - - -class _SlideDataset(dataset_ops.Dataset): - """A `Dataset` that passes a sliding window over its input.""" - - def __init__(self, input_dataset, window_size, stride=1): - """See `sliding_window_batch` for details.""" - super(_SlideDataset, self).__init__() - self._input_dataset = input_dataset - self._window_size = ops.convert_to_tensor( - window_size, dtype=dtypes.int64, name="window_size") - self._stride = ops.convert_to_tensor( - stride, dtype=dtypes.int64, name="stride") - - def _as_variant_tensor(self): - return gen_dataset_ops.slide_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - window_size=self._window_size, - stride=self._stride, - output_shapes=nest.flatten( - sparse.as_dense_shapes(self.output_shapes, self.output_classes)), - output_types=nest.flatten( - sparse.as_dense_types(self.output_types, self.output_classes))) - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - input_shapes = self._input_dataset.output_shapes - return nest.pack_sequence_as(input_shapes, [ - tensor_shape.vector(None).concatenate(s) - for s in nest.flatten(self._input_dataset.output_shapes) - ]) - - @property - def output_types(self): - return self._input_dataset.output_types - - -def sliding_window_batch(window_size, stride=1): - """A sliding window with size of `window_size` and step of `stride`. - - This transformation passes a sliding window over this dataset. The - window size is `window_size` and step size is `stride`. If the left - elements cannot fill up the sliding window, this transformation will - drop the final smaller element. For example: - - ```python - # NOTE: The following examples use `{ ... }` to represent the - # contents of a dataset. - a = { [1], [2], [3], [4], [5], [6] } - - a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) == - { - [[1], [2], [3]], - [[3], [4], [5]], - } - ``` - - Args: - window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of - elements in the sliding window. - stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the - steps moving the sliding window forward for one iteration. The default - is `1`. It must be in `[1, window_size)`. - - Returns: - A `Dataset` transformation function, which can be passed to - @{tf.data.Dataset.apply}. - """ - def _apply_fn(dataset): - return _SlideDataset(dataset, window_size, stride) - - return _apply_fn diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 1bd73ee704..6bd3f5f09b 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -454,7 +454,6 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1144,7 +1143,6 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index eb810e06dd..32aa2c0a4a 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -267,10 +267,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index 2f6cfdf31e..f86331af6f 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,7 +22,6 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index ad8568ad44..90f10f1fa8 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -224,10 +224,7 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = [ - "nomac", # b/73741358 - "notsan", # b/67512932 - ], + tags = ["notsan"], # b/67512932 deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 35341406a0..e61221a6b0 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -256,9 +256,6 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, if (p != std::string::npos) { string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); rgb24 = rgb24.substr(0, rgb24.find(",")); - // Strip anything after " ", in case the format is - // `640x360 [SAR 1:1 DAR 16:9]` - rgb24 = rgb24.substr(0, rgb24.find(" ")); string rgb24_width = rgb24.substr(0, rgb24.find("x")); string rgb24_height = rgb24.substr(rgb24_width.length() + 1); if (strings::safe_strtou32(rgb24_width, &width_value) && @@ -273,10 +270,8 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, // We only look for the first stream mapping to have the number of the // frames. // Once processed we will not further process stream mapping section. - if (line.find("frame=") == 0) { - // The format might be `frame= 166 ` or `frame=12488 ` - string number = line.substr(6); - number = number.substr(number.find_first_not_of(" ")); + if (line.find("frame= ") == 0) { + string number = line.substr(8, line.find(" ", 8)); number = number.substr(0, number.find(" ")); if (strings::safe_strtou32(number, &frames_value)) { in_mapping = false; diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index ff6f3b7441..0eb0e3cbe2 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,7 +354,6 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index 1c3974871c..efb403462a 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -1,93 +1,66 @@ -package(default_visibility = ["//tensorflow:internal"]) +package( + default_visibility = ["//visibility:private"], +) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load( - "//tensorflow:tensorflow.bzl", - "tf_gen_op_wrapper_py", - "tf_kernel_library", - "tf_custom_op_library", - "tf_custom_op_py_library", - "tf_gen_op_libs", - "tf_py_test", -) - -py_library( - name = "kafka", - srcs = ["__init__.py"], - srcs_version = "PY2AND3", - deps = [ - ":dataset_ops", - ], -) - -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = ["ops/dataset_ops.cc"], - deps = [":dataset_kernels"], -) - -tf_gen_op_libs( - op_lib_names = ["dataset_ops"], -) +load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") +load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("//tensorflow:tensorflow.bzl", "tf_py_test") -cc_library( - name = "dataset_kernels", +tf_kernel_library( + name = "kafka_kernels", srcs = ["kernels/kafka_dataset_ops.cc"], + visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:bounds_check_lib", + "//tensorflow/core/kernels:dataset", "//third_party/eigen3", "@kafka", - "@protobuf_archive//:protobuf_headers", ], - alwayslink = 1, ) -py_library( - name = "dataset_ops", - srcs = [ - "python/ops/kafka_dataset_ops.py", - ], - srcs_version = "PY2AND3", +tf_gen_op_libs( + op_lib_names = ["kafka_ops"], deps = [ - ":kafka_op_loader", - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", + "//tensorflow/core:lib", ], ) tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "python/ops/gen_dataset_ops.py", - deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"], -) - -tf_kernel_library( - name = "dataset_ops_kernels", - deps = [ - ":dataset_kernels", - "//tensorflow/core:framework", - ], - alwayslink = 1, + name = "gen_kafka_ops", + out = "python/ops/gen_kafka_ops.py", + require_shape_functions = True, + deps = [":kafka_ops_op_lib"], ) -tf_custom_op_py_library( - name = "kafka_op_loader", - srcs = ["python/ops/kafka_op_loader.py"], - dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"], - kernels = [ - ":dataset_ops_kernels", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", +py_library( + name = "kafka", + srcs = [ + "__init__.py", + "python/ops/kafka_dataset_ops.py", ], srcs_version = "PY2AND3", + visibility = ["//visibility:public"], deps = [ - ":gen_dataset_ops", + ":gen_kafka_ops", "//tensorflow/contrib/util:util_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", ], ) @@ -115,7 +88,6 @@ tf_py_test( ], tags = [ "manual", - "no_windows", "notap", ], ) @@ -123,9 +95,7 @@ tf_py_test( filegroup( name = "all_files", srcs = glob( - include = [ - "**/*", - ], + ["**/*"], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc index a4cd4a2cc4..88ef5f3571 100644 --- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/kernels/dataset.h" + +#include "tensorflow/core/framework/tensor.h" #include "src-cpp/rdkafkacpp.h" diff --git a/tensorflow/contrib/kafka/ops/dataset_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc deleted file mode 100644 index 8cdf16103b..0000000000 --- a/tensorflow/contrib/kafka/ops/dataset_ops.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -REGISTER_OP("KafkaDataset") - .Input("topics: string") - .Input("servers: string") - .Input("group: string") - .Input("eof: bool") - .Input("timeout: int64") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that emits the messages of one or more Kafka topics. - -topics: A `tf.string` tensor containing one or more subscriptions, - in the format of [topic:partition:offset:length], - by default length is -1 for unlimited. -servers: A list of bootstrap servers. -group: The consumer group id. -eof: If True, the kafka reader will stop on EOF. -timeout: The timeout value for the Kafka Consumer to wait - (in millisecond). -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py index a1624614d1..8e51d27a34 100644 --- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -17,9 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import -from tensorflow.contrib.kafka.python.ops import gen_dataset_ops -from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.contrib.kafka.python.ops import gen_kafka_ops +from tensorflow.python.data.ops.readers import Dataset from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -59,8 +58,8 @@ class KafkaDataset(Dataset): timeout, dtype=dtypes.int64, name="timeout") def _as_variant_tensor(self): - return gen_dataset_ops.kafka_dataset(self._topics, self._servers, - self._group, self._eof, self._timeout) + return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, + self._eof, self._timeout) @property def output_classes(self): diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py deleted file mode 100644 index ec2fdea962..0000000000 --- a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python helper for loading kafka ops and kernels.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.util import loader -from tensorflow.python.platform import resource_loader - -_dataset_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index d1c449402a..146ae8b7e2 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -114,7 +114,6 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 544065dac6..894e6f6946 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,7 +70,6 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index cc7bbabf21..852d06e1e3 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,7 +188,6 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -354,7 +353,6 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index ffa208540d..b62e3050cd 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -470,7 +470,7 @@ def embedding_lookup_unique(params, ids, name=None): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( - ids, math_ops.reduce_prod(shape, keepdims=True)) + ids, math_ops.reduce_prod(shape, keep_dims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index b05f5eeaee..f837ca3265 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -5,8 +5,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test") - package(default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", @@ -117,7 +115,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -173,7 +170,6 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -192,7 +188,6 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -431,10 +426,7 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", - tags = [ - "noasan", # b/73741358 - "nomac", - ], + tags = ["noasan"], deps = [ ":learn", "//tensorflow/python:array_ops", @@ -593,7 +585,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -823,7 +814,6 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index 8f9811cf25..b3b067b8e1 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -61,7 +61,7 @@ def embedding_lookup(params, ids, name='embedding_lookup'): ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( - ids, math_ops.reduce_prod(shape, keepdims=True)) + ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index b4504f246a..7f31629272 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX)gcc +CXX := $(CC_PREFIX) gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX)gcc -CFLAGS := -O3 -DNDEBUG +CC := $(CC_PREFIX) gcc +CFLAGS := LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,11 +57,10 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl + LIBS += -ldl -lpthread endif include $(MAKEFILE_DIR)/ios_makefile.inc -include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h index f84b3dad95..58bc164619 100644 --- a/tensorflow/contrib/lite/arena_planner.h +++ b/tensorflow/contrib/lite/arena_planner.h @@ -33,7 +33,7 @@ class AllocationInfo; // each tensor needs to be allocated and deallocated, and preallocates all the // necessary memory (the PlanAllocations phase). It then assigns portions of // this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may -// share some of the buffer if a tensor B is to be allocated after another tensor +// share some of the bufer if a tensor B is to be allocated after another tensor // A has been deallocated. // // If dynamic tensors are used the planning steps can be repeated during model diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh deleted file mode 100755 index 3824b16412..0000000000 --- a/tensorflow/contrib/lite/build_rpi_lib.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -x -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR/../../.." - -CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index ea3ae3489e..2218ea8eac 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -24,7 +24,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin // ops. typedef enum { kTfLiteBuiltinAdd = 0, diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h index 3c5f805f12..da193d2586 100644 --- a/tensorflow/contrib/lite/error_reporter.h +++ b/tensorflow/contrib/lite/error_reporter.h @@ -30,7 +30,7 @@ namespace tflite { // va_list args; // foo.Report("test %d", args); // where args is va_list // -// Subclass ErrorReporter to provide another reporting destination. +// Sublclass ErrorReporter to provide another reporting destination. // For example, if you have a GUI program, you might redirect to a buffer // that drives a GUI error log box. class ErrorReporter { diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md deleted file mode 100644 index 7a3a231626..0000000000 --- a/tensorflow/contrib/lite/g3doc/rpi.md +++ /dev/null @@ -1,50 +0,0 @@ -# TensorFlow Lite for Raspberry Pi - -## Cross compiling -### Installing toolchian -This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). - -To cross compiling TensorFlow Lite. First you should install the toolchain and libs. -```bash -sudo apt-get update -sudo apt-get install crossbuild-essential-armhf -``` -> If you are using docker, you may not use `sudo` - -### Building -Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: -> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. -```bash -./tensorflow/contrib/lite/download_dependencies.sh -``` -Note than you only need to to this once. - -You should then be able to compile: -```bash -./tensorflow/contrib/lite/build_rpi_lib.sh -``` - -This should compile a static library in: -`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. - -## Native compiling -This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). - -Log in to you RPI, install the toolchain. -```bash -sudo apt-get instal build-essential -``` - -First, clone this TensorFlow repository. Run this at the root of the repository: -```bash -./tensorflow/contrib/lite/download_dependencies.sh -``` -Note than you only need to to this once. - -You should then be able to compile: -```bash -./tensorflow/contrib/lite/build_rpi_lib.sh -``` - -This should compile a static library in: -`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 3749869f58..af143370ee 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -481,7 +481,7 @@ class Interpreter { // During Invoke(), Interpreter will allocate input tensors first, which are // known to be fixed size. Then it will allocate outputs from nodes as many // as possible. When there is a node that produces dynamic sized tensor. - // Interpreter will stop allocating tensors, set the value of next allocate + // Intepreter will stop allocating tensors, set the value of next allocate // node id, and execute the node to generate the output tensor before continue // to allocate successors. This process repeats until all nodes are executed. // NOTE: this relies on the order of nodes that is in topological order. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 72d4acedbe..7a029c7df8 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -40,7 +40,7 @@ TEST(BasicInterpreter, InvokeInvalidModel) { ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); } -// Test size accessor functions. +// Test size accesser functions. TEST(BasicInterpreter, TestSizeFunctions) { Interpreter interpreter; int base_index; diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index e0cd12f1b4..b91ba1a03d 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -64,7 +64,7 @@ struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index cad9ce114c..15dbfe08c8 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -52,7 +52,7 @@ enum KernelType { struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index 888e67966c..a77fe94e49 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -48,7 +48,7 @@ enum KernelType { struct OpData { // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h index 21da1daff7..28f53b9fbb 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.h +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -58,7 +58,7 @@ inline bool IsConstantTensor(TfLiteTensor* tensor) { } // Determines whether tensor is dynamic. Note that a tensor can be non-const and -// not dynamic. This function specifically checks for a dynamic tensor. +// not dynamic. This function specificially checks for a dynamic tensor. inline bool IsDynamicTensor(TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteDynamic; } diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc index 0ee35775d5..5f73b56ed9 100644 --- a/tensorflow/contrib/lite/kernels/lsh_projection.cc +++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// LSH Projection projects an input to a bit vector via locality sensitive +// LSH Projection projects an input to a bit vector via locality senstive // hashing. // // Options: diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 8cf1165135..b9255b23a5 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -213,9 +213,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, // present. // 2) If projection weight is present, then projection bias is optional. // TODO(ghodrat): make sure this is correct. - const bool projection_tensors_consistent = + const bool projecton_tensors_consistent = ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projection_tensors_consistent == true); + TF_LITE_ENSURE(context, projecton_tensors_consistent == true); return kTfLiteOk; } @@ -357,7 +357,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. + // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc index 438f70d311..f3e6ddc9f4 100644 --- a/tensorflow/contrib/lite/kernels/reshape.cc +++ b/tensorflow/contrib/lite/kernels/reshape.cc @@ -49,20 +49,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions); int num_output_elements = 1; - int stretch_dim = -1; + int strech_dim = -1; for (int i = 0; i < params->num_dimensions; ++i) { int value = params->shape[i]; if (value == -1) { - TF_LITE_ENSURE_EQ(context, stretch_dim, -1); - stretch_dim = i; + TF_LITE_ENSURE_EQ(context, strech_dim, -1); + strech_dim = i; } else { num_output_elements *= value; output_size->data[i] = value; } } - if (stretch_dim != -1) { - output_size->data[stretch_dim] = num_input_elements / num_output_elements; - num_output_elements *= output_size->data[stretch_dim]; + if (strech_dim != -1) { + output_size->data[strech_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_size->data[strech_dim]; } TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc index aecbd0399f..0fbcf6e6aa 100644 --- a/tensorflow/contrib/lite/kernels/reshape_test.cc +++ b/tensorflow/contrib/lite/kernels/reshape_test.cc @@ -60,7 +60,7 @@ TEST(ReshapeOpTest, TooManyDimensions) { TEST(ReshapeOpTest, TooManySpecialDimensions) { EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}), - "stretch_dim != -1"); + "strech_dim != -1"); } TEST(ReshapeOpTest, SimpleTest) { diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 0bb28b50b2..373310bd87 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -141,8 +141,8 @@ void SingleOpModel::SetBuiltinOp(BuiltinOperator type, void SingleOpModel::SetCustomOp( const string& name, const std::vector& custom_option, - const std::function& registration) { - custom_registrations_[name] = registration; + const std::function& registeration) { + custom_registrations_[name] = registeration; opcodes_.push_back( CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data())); operators_.push_back(CreateOperator( diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 42941a97db..508a570e2e 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -360,7 +360,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. + // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h index 0294ec815c..5cd6c20850 100644 --- a/tensorflow/contrib/lite/memory_planner.h +++ b/tensorflow/contrib/lite/memory_planner.h @@ -34,8 +34,8 @@ class MemoryPlanner { // [first_node, last_node]. virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0; - // Invalidates allocations made earlier. This is called when tensors sizes - // have changed. All planned allocations remain, but can't be used until + // Invalidates allocations made earliers. This is called when tensors sizes + // have change. All planned allocations remain, but can't be used until // ExecuteAllocations() is called. virtual TfLiteStatus ResetAllocations() = 0; }; diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 38eea0e26b..51a622a28d 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -81,7 +81,7 @@ class FlatBufferModel { const tflite::Model* model_spec, ErrorReporter* error_reporter = DefaultErrorReporter()); - // Releases memory or unmaps mmaped memory. + // Releases memory or unmaps mmaped meory. ~FlatBufferModel(); // Copying or assignment is disallowed to simplify ownership semantics. diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index bd49d327c9..76032771af 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -569,7 +569,7 @@ enum { ANEURALNETWORKS_LOGISTIC = 14, /** - * Projects an input to a bit vector via locality sensitive hashing. + * Projects an input to a bit vector via locality senstive hashing. * * Inputs: * * 0: Hash functions. Dim.size == 2, DataType: Float. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc deleted file mode 100644 index 832ef5824b..0000000000 --- a/tensorflow/contrib/lite/rpi_makefile.inc +++ /dev/null @@ -1,33 +0,0 @@ -# Settings for Raspberry Pi. -ifeq ($(TARGET), RPI) - ifeq ($(TARGET_ARCH), armv7) - CXXFLAGS += \ - -march=armv7-a \ - -mfpu=neon-vfpv4 \ - -funsafe-math-optimizations \ - -ftree-vectorize - - CCFLAGS += \ - -march=armv7-a \ - -mfpu=neon-vfpv4 \ - -funsafe-math-optimizations \ - -ftree-vectorize - - LDFLAGS := \ - -Wl,--no-export-dynamic \ - -Wl,--exclude-libs,ALL \ - -Wl,--gc-sections \ - -Wl,--as-needed - endif - - LIBS := \ - -lstdc++ \ - -lpthread \ - -lm \ - -ldl - - OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ - LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ - BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ - DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ -endif diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc index 640972de77..08bcfe4516 100644 --- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -46,7 +46,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin // ops. typedef enum { )"; diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc index 2f2004f56b..4aab244989 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.cc +++ b/tensorflow/contrib/lite/simple_memory_arena.cc @@ -113,21 +113,21 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) { underlying_buffer_size_ = required_size; underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; } - committed_ = true; + commited_ = true; return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; } TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc, char** output_ptr) { - TF_LITE_ENSURE(context, committed_); + TF_LITE_ENSURE(context, commited_); TF_LITE_ENSURE(context, output_ptr != nullptr); *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; return kTfLiteOk; } TfLiteStatus SimpleMemoryArena::Clear() { - committed_ = false; + commited_ = false; high_water_mark_ = 0; allocs_.clear(); return kTfLiteOk; diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h index 5faf78b59e..0535522374 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.h +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -22,7 +22,7 @@ limitations under the License. namespace tflite { // This little structure holds the offset and the size for a dynamic memory -// allocation in the memory arena. When the arena is committed and the +// allocation in the memory arena. When the arena is commited and the // underlying buffer is set, the alloc can be resolved into an actual memory // pointer. struct ArenaAlloc { @@ -43,7 +43,7 @@ struct ArenaAlloc { class SimpleMemoryArena { public: explicit SimpleMemoryArena(size_t arena_alignment) - : committed_(false), + : commited_(false), arena_alignment_(arena_alignment), high_water_mark_(0), underlying_buffer_size_(0), @@ -73,7 +73,7 @@ class SimpleMemoryArena { } private: - bool committed_; + bool commited_; size_t arena_alignment_; size_t high_water_mark_; std::unique_ptr underlying_buffer_; diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 0a6edc33c5..8ca03f4193 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -46,7 +46,6 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, - tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 6c3b02e12b..995230dfa8 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -194,8 +194,6 @@ with: srcs = glob(["libs/arm64-v8a/*.so"]), ``` -If you are building for Android TV (Shield TV devices), replace "portrait" with "landscape" for android:screenOrientation in all four activities in tensorflow/examples/android/AndroidManifest.xml - Then run: ```bash # Create dir for native libs diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index 9b148688c4..2d99791839 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -80,9 +80,10 @@ if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then fi else echo "${PRNT_SLCTV_BIN} found. Using it" + ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h + fi - ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h fi if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h index df055ff567..fa297c28cb 100644 --- a/tensorflow/contrib/mpi/mpi_utils.h +++ b/tensorflow/contrib/mpi/mpi_utils.h @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" -// Skip MPI C++ bindings support, this matches the usage in other places -#define OMPI_SKIP_MPICXX #include "third_party/mpi/mpi.h" #define MPI_CHECK(cmd) \ do { \ diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 6e77e934fe..04b5d5bdf1 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -53,7 +53,7 @@ def from_contrib_estimator(estimator, `Estimator`. """ if isinstance(estimator, core_estimator.Estimator): - raise TypeError('Expected estimator to be of type ' + raise TypeError('Espected estimator to be of type ' 'tf.contrib.learn.Estimator, but got type ' 'tf.python.estimator.Estimator. You likely want to call ' 'from_estimator.') @@ -88,7 +88,7 @@ def from_estimator(estimator, `Estimator`. """ if isinstance(estimator, contrib_estimator.Estimator): - raise TypeError('Expected estimator to be of type ' + raise TypeError('Espected estimator to be of type ' 'tf.python.estimator.Estimator, but got type ' 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 4bb6f76019..f624c42686 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -81,7 +81,6 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -92,7 +91,6 @@ py_test( name = "call_trees_test", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/contrib/py2tf/impl", diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 1194b98f5e..90bc22008f 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(DetectReturnInUnsupportedControlFlow, self).__init__() + super(gast.NodeVisitor, self).__init__() def visit_While(self, node): self.cant_return = True diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 8bc338e801..d029289f5a 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -83,7 +83,6 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 1afcbb8504..b278265639 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph): # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's - # correction). The variance tensor read from FuseBatchNorm has Bessel's + # correction). The variance tensor read from FuseBatchNorm has bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() @@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, Args: context: The scope under which we look for batch norm params - match: Object containing required batch norm tensors for correction + match: Object containg required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index a4f7b1b221..0a8e35080c 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). - max_var: a variable containing quantization range upper end(s). - per_channel: a boolean specifying whether to use per-channel quantization. + max_var: a variable containing quantization range lupper end(s). + per_channel: a boolean specifying whether to use per-channel quantizatioh. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index ec721afbc8..0608ab9302 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -267,7 +267,7 @@ def _InsertQuantOp(context, """Inserts a quant op between a producer op and (multiple) consumer ops. Args: - context: Context where producer and consumer operations are nested. + context: Context w,here producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 5abdcd2475..5a3a74cec4 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -158,7 +158,7 @@ def experimental_create_training_graph(input_graph=None, often fail. Args: - input_graph: The tf.Graph to be transformed, if None then defaults to the + input_graph: The tf.Graph to be transformed,if None then defaults to the default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index db745aa562..0624cc878b 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -419,7 +419,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -470,7 +470,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -526,7 +526,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -565,7 +565,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initializes with a truncated normal variable. + An initialized that initialzes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index b2e5707a6d..ef59475167 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -144,7 +144,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initializes with a truncated normal variable. + An initialized that initialzes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 54c66271cd..27f0a7f58f 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,6 +38,7 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 358b2eb02b..73f2607d84 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2133,7 +2133,7 @@ class Conv1DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_1d_lstm_cell", **kwargs): """Construct Conv1DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs) + super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs) class Conv2DLSTMCell(ConvLSTMCell): @@ -2144,7 +2144,7 @@ class Conv2DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_2d_lstm_cell", **kwargs): """Construct Conv2DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs) + super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs) class Conv3DLSTMCell(ConvLSTMCell): @@ -2155,7 +2155,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_3d_lstm_cell", **kwargs): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs) + super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) def _conv(args, filter_size, num_features, bias, bias_start=0.0): diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index b10757df47..245fe07f2b 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -53,7 +53,6 @@ py_test( size = "small", srcs = ["python/saved_model/reader_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":saved_model_py", diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 6adbb8be40..03fe31abf7 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -299,13 +299,12 @@ class BeamSearchDecoder(decoder.Decoder): """ finished, start_inputs = self._finished, self._start_inputs - dtype = nest.flatten(self._initial_cell_state)[0].dtype log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, - on_value=ops.convert_to_tensor(0.0, dtype=dtype), - off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), - dtype=dtype) + on_value=0.0, + off_value=-np.Inf, + dtype=nest.flatten(self._initial_cell_state)[0].dtype) initial_state = BeamSearchDecoderState( cell_state=self._initial_cell_state, diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 3ad88a8a22..67011c8fef 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -165,7 +165,6 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 7aa1684839..5daabbd62e 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,7 +61,6 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 07b6b1f142..1e4cc3f095 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -553,6 +553,7 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index db2e000ef8..d833744d0c 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,7 +9,6 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 906cc3f034..c832c6f2e0 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -83,7 +83,6 @@ cc_library( "kernels/trt_engine_op.h", ], copts = tf_copts(), - visibility = ["//visibility:public"], deps = [ ":trt_logging", ":trt_resources", @@ -155,7 +154,6 @@ py_library( deps = [ ":trt_convert_py", ":trt_ops_py", - "//tensorflow/python:errors", ], ) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 461e627e99..dfcce0fd00 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -2,8 +2,7 @@ Using TensorRT in TensorFlow ============================ This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. This is still a work in progress -but should be useable with most common graphs. +operator that wraps a subgraph in TensorRT. Compilation ----------- @@ -16,10 +15,26 @@ configure script should find the necessary components from the system automatically. If installed from tar packages, user has to set path to location where the library is installed during configuration. -```shell + +``` bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py directory +will be available. An example use is shown below. + +```python +import tensorflow as tf +import tensorflow.contrib.tensorrt as trt +#... create and train or load model +gdef = sess.graph.as_graph_def() +trt_gdef = trt.create_inference_graph( + gdef, #original graph_def + ["output"], #name of output node(s) + max_batch_size, #maximum batch size to run the inference + max_workspace_size_bytes) # max memory for TensorRT to use +tf.reset_default_graph() +tf.import_graph_def(graph_def=trt_gdef) +#...... run inference +``` diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index 140ad48282..fd551d70b4 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,18 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import errors - -# pylint: disable=unused-import,wildcard-import,g-import-not-at-top -try: - from tensorflow.contrib.tensorrt.python import * -except errors.NotFoundError as e: - no_trt_message = ( - '**** Failed to initialize TensorRT. This is either because the TensorRT' - ' installation path is not in LD_LIBRARY_PATH, or because you do not have' - ' it installed. If not installed, please go to' - ' https://developer.nvidia.com/tensorrt to download and install' - ' TensorRT ****') - print(no_trt_message) - raise e -# pylint: enable=unused-import,wildcard-import,g-import-not-at-top +# pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.tensorrt.python import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index eea8c8efa2..970f810473 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" -#include #include #include #include @@ -49,29 +48,13 @@ namespace tensorrt { namespace convert { namespace { -bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set candidate_ops = { - "Identity", - "Const", - "Conv2D", - "MaxPool", - "BiasAdd", - "Relu", - "Add", - "Mul", - "Sub", - "Rsqrt", - "Pad", - "Mean", - "AvgPool", - "ConcatV2", - "DepthwiseConv2dNative", - "FusedBatchNorm", - "FusedBatchNormV2", - // TODO(ben,jie): ... + "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", + "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); @@ -86,8 +69,6 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { incoming_edges->insert(edge); - } else { - VLOG(2) << edge->src()->name() << " N, "; } } } @@ -101,10 +82,7 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { - VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); - } else { - VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -131,150 +109,74 @@ std::unordered_map> BuildTensorNameMap( } return result; } -// TODO(sami): convert references to pointers -struct ConvertGraphParams { - ConvertGraphParams( - tensorflow::Graph& inp_graph, - const std::vector& output_node_names, - const std::set& subgraph_node_id_numbers, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map>* output_edges, - int engine_precision_mode) - : graph(inp_graph), - output_names(output_node_names), - subgraph_node_ids(subgraph_node_id_numbers), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - precision_mode(engine_precision_mode) {} - tensorflow::Graph& graph; - const std::vector& output_names; - const std::set& subgraph_node_ids; - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map>* output_edge_map; - int precision_mode; - std::vector> subgraph_inputs; - std::vector> subgraph_outputs; + +tensorflow::Status ConvertSubGraphToTensorRT( + const std::vector& output_names, + const std::set& subgraph_node_ids, + size_t max_batch_size, // Max batch size that engine will be created for + // Max amount of memory that engine will be allowed to consume, in bytes + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + tensorflow::Graph* graph) { tensorflow::EdgeSet subgraph_incoming_edges; - tensorflow::EdgeSet subgraph_outgoing_edges; -}; + GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges); + + std::vector> subgraph_inputs; -static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { - GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_incoming_edges); - for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); + // Collect inputs by looking for incoming edges + for (const tensorflow::Edge* edge : subgraph_incoming_edges) { + subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } - auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set> subgraph_outputs_set; // Collect outputs referenced from output_names - for (int node_id : p->subgraph_node_ids) { - tensorflow::Node* node = p->graph.FindNodeId(node_id); + auto output_name_to_index_map = BuildTensorNameMap(output_names); + for (int node_id : subgraph_node_ids) { + tensorflow::Node* node = graph->FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { + // Collect outputs referenced from outgoing edges + tensorflow::EdgeSet subgraph_outgoing_edges; + GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(subgraph_outputs_set.size()); - p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); - return tensorflow::Status::OK(); -}; - -tensorflow::Status GetCalibNode(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); + // Impose an ordering on the outputs + std::vector> subgraph_outputs( + subgraph_outputs_set.begin(), subgraph_outputs_set.end()); + // Build TensorRT node and add it to the graph tensorflow::NodeDef trt_node_def; - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); - TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( + *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, + max_batch_size, max_workspace_size_bytes, graph_properties, + &trt_node_def)); tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - TF_RETURN_IF_ERROR(status); - - for (auto in_edge : - params->subgraph_incoming_edges) { // loop over incoming edges and - // attach them to calib node - // tensorflow::Node* src_node = in_edge->src(); - auto src_output = in_edge->src_output(); - auto dst_node = in_edge->dst(); - auto dst_input = in_edge->dst_input(); - VLOG(1) << " update edge " << trt_node->name() << ":" << src_output - << " -> " << dst_node->name() << ":" << dst_input; - TF_RETURN_IF_ERROR( - params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); - } - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); - tensorflow::NodeDef trt_node_def; - - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); - tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - // AddNode does not wire edges. - // Re-map incoming edges to use the new TRT node instead of the orig subgraph - std::map, int> subgraph_edge_to_input_map; - for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { - subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); - } - for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { - std::pair old_src = {edge->src()->id(), edge->src_output()}; - int new_src_output = subgraph_edge_to_input_map.at(old_src); - params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, - new_src_output); - params->graph.RemoveEdge(edge); - } - - VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); - } - + tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); + for (size_t i = 0; i < subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(params->graph.UpdateEdge( - trt_node, new_src_output, edge->dst(), edge->dst_input())); + TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(), + edge->dst_input())); } // Remove the original subgraph - for (int node_id : params->subgraph_node_ids) { - tensorflow::Node* node = params->graph.FindNodeId(node_id); + for (int node_id : subgraph_node_ids) { + tensorflow::Node* node = graph->FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - params->graph.RemoveNode(node); + graph->RemoveNode(node); } return tensorflow::Status::OK(); } @@ -292,39 +194,12 @@ tensorflow::Status BuildNodeMap( } } // namespace -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { - VLOG(0) << "Starting Calib Conversion"; - tensorflow::Graph graph(tensorflow::OpRegistry::Global()); - TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &graph)); - // get calib nodes - std::vector calib_nodes; - for (auto node : graph.op_nodes()) { - if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node"; - calib_nodes.push_back(node); - } - } - VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); - if (calib_nodes.size() == 0) - return tensorflow::errors::FailedPrecondition( - "Graph doesn't contain any calibration nodes!." - " Please generate calibration graph and run calibration first"); - for (auto n : calib_nodes) { - TF_RETURN_IF_ERROR( - tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); - } - graph.ToGraphDef(infer_graph); - return tensorflow::Status::OK(); -} tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode = FP32MODE, int minimum_segment_size = 3) { - // optimization pass + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { + // Optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; @@ -334,23 +209,16 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::grappler::LayoutOptimizer optimizer; tensorflow::grappler::Cluster* cluster; - // virtual cluster + // Virtual cluster tensorflow::DeviceProperties device_properties; - device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); - // single machine - int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); - int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); - VLOG(2) << "cpu_cores: " << num_cpu_cores; - VLOG(2) << "gpus: " << num_gpus; - TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - // constant folding + // Constant folding item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); @@ -358,6 +226,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); + // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -374,7 +243,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = minimum_segment_size; + segment_options.minimum_segment_size = 2; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -383,37 +252,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); - std::unordered_map> output_edge_map; - int count = 0; - float total_num_nodes_in_segments = 0.; - for (auto s : segments) { - total_num_nodes_in_segments += s.size(); - } for (const std::set& subgraph_node_names : segments) { std::set subgraph_node_ids; - size_t max_mem_per_engine = - max_workspace_size_bytes * - ((float)subgraph_node_names.size() / total_num_nodes_in_segments); - std::stringstream oss; for (const string& node_name : subgraph_node_names) { - oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - VLOG(2) << "Subgraph nodes" << oss.str(); - ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, - max_mem_per_engine, static_graph_properties, - &output_edge_map, precision_mode); - if (precision_mode == INT8MODE) { - TF_RETURN_IF_ERROR(GetCalibNode(&p)); - } else { - tensorflow::Status status = ConvertSubGraphToTensorRT(&p); - if (status != tensorflow::Status::OK()) { - LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \n" - << status.ToString() << " SKIPPING......"; - } - count++; - } + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( + output_names, subgraph_node_ids, max_batch_size, + max_workspace_size_bytes, static_graph_properties, &graph)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index e1596e89e2..154ad3f2e8 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -28,11 +28,6 @@ namespace tensorflow { namespace tensorrt { namespace convert { -// This method converts an already generated calibration graph which was used in -// calibration runs to an inference graph -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); - // max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. // max_workspace_size_bytes: The upper bound of memory allowence for @@ -40,8 +35,7 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode, int minimum_segment_size); + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 75a3c3d034..9ee717dd7f 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -24,10 +24,6 @@ limitations under the License. #include #include -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" -#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" -#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" -#include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" @@ -36,7 +32,6 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tensor_coding.h" @@ -44,6 +39,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorrt/include/NvInfer.h" // Check if the types are equal. Cast to int first so that failure log message @@ -53,7 +49,6 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { -using ::tensorflow::strings::StrCat; namespace { @@ -70,8 +65,7 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, *trt_dtype = nvinfer1::DataType::kHALF; break; default: - return tensorflow::errors::InvalidArgument( - "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); + return tensorflow::errors::InvalidArgument("Unsupported data type"); } return tensorflow::Status::OK(); } @@ -118,18 +112,6 @@ static std::vector> CreateSamePadding( return padding; } -string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { - size_t last_scope_separator = 0; - for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { - if (op_name_a[i] != op_name_b[i]) { - break; - } else if (op_name_a[i] == '/') { - last_scope_separator = i + 1; - } - } - return op_name_a.substr(0, last_scope_separator); -} - class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, @@ -262,11 +244,6 @@ std::vector TFAttrs::get>(string key) const { return std::vector(attr.begin(), attr.end()); } -template <> -std::vector TFAttrs::get>(string key) const { - auto attr = this->at(key)->list().s(); - return std::vector(attr.begin(), attr.end()); -} template <> nvinfer1::Dims TFAttrs::get(string key) const { auto values = this->get>(key); @@ -289,17 +266,6 @@ tensorflow::DataType TFAttrs::get(string key) const { return this->at(key)->type(); } -template <> -float TFAttrs::get(string key) const { - return this->at(key)->f(); -} - -template <> -bool TFAttrs::get(string key) const { - return this->at(key)->b(); -} - -// TODO(jie): reorder4 & reorder2 should be merged? template void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -317,87 +283,29 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } } -template -void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, - T* odata, nvinfer1::DimsHW ostrides) { - for (int h = 0; h < shape.h(); ++h) { - for (int w = 0; w < shape.w(); ++w) { - odata[h * ostrides.h() + w * ostrides.w()] = - idata[h * ostrides.h() + w * ostrides.w()]; - } - } -} - -// TODO(jie): fallback to tensorflow!! -void ReorderCKtoKC(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights) { - int c = iweights.shape_.d[0]; - int k = iweights.shape_.d[1]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; - nvinfer1::DimsHW istrides = {1, k}; - nvinfer1::DimsHW ostrides = {c, 1}; - switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: { - Reorder2({k, c}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); - break; - } - case tensorflow::DataType::DT_HALF: { - Reorder2( - {k, c}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); - break; - } - default: - LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " - << DataTypeString(iweights.type_); - } -} - void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights, int num_groups) { + TRT_ShapedWeights* oweights) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - // TRT requires GKcRS, while TF depthwise has RSCK - // where c=1, C=G - VLOG(2) << "num_groups: " << num_groups; - int c = iweights.shape_.d[2] / num_groups; - VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c; - int k = iweights.shape_.d[3] * num_groups; - VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k; - oweights->shape_.d[0] = k / num_groups; - oweights->shape_.d[1] = c * num_groups; + int c = iweights.shape_.d[2]; + int k = iweights.shape_.d[3]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: { + case tensorflow::DataType::DT_FLOAT: Reorder4({k, c, r, s}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; - } - case tensorflow::DataType::DT_HALF: { - Reorder4( - {k, c, r, s}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); - break; - } - default: - LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " - << DataTypeString(iweights.type_); + LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; } } @@ -415,11 +323,12 @@ inline std::shared_ptr infer_object(T* obj) { return std::shared_ptr(obj, InferDeleter()); } +// Logger for GIE info/warning/errors class Converter; using OpConverter = std::function&, + std::vector const&, std::vector*)>; class Converter { @@ -427,57 +336,34 @@ class Converter { std::unordered_map op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - tensorflow::tensorrt::TRTWeightStore* weight_store_; - bool fp16_; + void register_op_converters(); + std::vector get_inputs( const tensorflow::NodeDef& node_def) { std::vector inputs; - for (auto const& input_name : node_def.input()) { - /************************************************************************* - * TODO(jie) handle case 1) here - * Normalizes the inputs and extracts associated metadata: - * 1) Inputs can contain a colon followed by a suffix of characters. - * That suffix may be a single number (e.g. inputName:1) or several - * word characters separated from a number by a colon - * (e.g. inputName:foo:1). The - * latter case is used to denote inputs and outputs of functions. - * 2) Control dependency inputs contain caret at the beginning and we - * remove this and annotate the edge as a control dependency. - ************************************************************************/ - string name = input_name[0] == '^' ? input_name.substr(1) : input_name; - auto first = name.find_first_of(':'); - if (first != string::npos && first + 2 == name.size() && - name[first + 1] == '0') - name.erase(first); - - VLOG(2) << "retrieve input: " << name; - if (trt_tensors_.count(name)) { - inputs.push_back(trt_tensors_.at(name)); - } else { - LOG(FATAL) << "input: " << name << " not availabled for node at, " - << node_def.name(); - } + for (const auto& input_name : node_def.input()) { + VLOG(2) << "Retrieve input: " << input_name; + inputs.push_back(trt_tensors_.at(input_name)); } return inputs; } public: - explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) - : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { + explicit Converter(nvinfer1::INetworkDefinition* trt_network) + : trt_network_(trt_network) { this->register_op_converters(); } - tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } + TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - weight_store_->store_.push_back(std::vector(weights.size_bytes())); - weights.SetValues(weight_store_->store_.back().data()); + temp_bufs_.push_back(std::vector(weights.size_bytes())); + weights.SetValues(temp_bufs_.back().data()); return weights; } - bool isFP16() { return fp16_; }; + TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -496,7 +382,7 @@ class Converter { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); - if (i != 0) output_name = StrCat(output_name, ":", i); + if (i != 0) output_name = output_name + ":" + std::to_string(i); if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } @@ -562,7 +448,7 @@ struct LambdaFactory { switch (op) { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; - return [](T t) -> T { return 1.0 / sqrt(t); }; + return [](T t) -> T { return 1.0 / std::sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; @@ -648,22 +534,6 @@ struct LambdaFactory { } }; -template <> -std::function LambdaFactory::unary() { - switch (op) { - case OP_CATEGORY::RSQRT: { - VLOG(2) << "RSQRT GETS DONE"; - return [](Eigen::half t) -> Eigen::half { - return Eigen::half(1.0 / sqrt(float(t))); - }; - } - case OP_CATEGORY::NEG: - return [](Eigen::half t) -> Eigen::half { return -t; }; - default: - VLOG(2) << "Not supported op for unary: " << static_cast(op); - return nullptr; - } -} tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights, LambdaFactory unary_op) { @@ -675,14 +545,6 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, std::transform(inp, inp + iweights.count(), oup, unary_op.unary()); break; } - case tensorflow::DataType::DT_HALF: { - auto inp = static_cast(iweights.GetValues()); - auto oup = - static_cast(const_cast(oweights->GetValues())); - std::transform(inp, inp + iweights.count(), oup, - unary_op.unary()); - break; - } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -726,32 +588,6 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, } break; } - case tensorflow::DataType::DT_HALF: { - auto inp_l = static_cast(iweights_l.GetValues()); - auto inp_r = static_cast(iweights_r.GetValues()); - auto oup = - static_cast(const_cast(oweights->GetValues())); - - if (iweights_l.count() != iweights_r.count()) { - // We only supports broadcast of RankZero - if (iweights_l.count() == 1) { - VLOG(2) << "I bet it is not working!" << (*inp_l); - std::transform(inp_r, inp_r + iweights_r.count(), oup, - binary_op.broadcast_l(*inp_l)); - } else if (iweights_r.count() == 1) { - VLOG(2) << "I bet it is not working!" << (*inp_r); - std::transform(inp_l, inp_l + iweights_l.count(), oup, - binary_op.broadcast_r(*inp_r)); - } else { - return tensorflow::errors::Unimplemented( - "Binary op with non-rankZero broadcast not supported"); - } - } else { - std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, - binary_op.binary()); - } - break; - } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -763,7 +599,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, tensorflow::Status ConstantFoldUnary( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input = inputs.at(0).weights(); @@ -777,12 +613,13 @@ tensorflow::Status ConstantFoldUnary( CHECK_EQ(weights_input.type_, TFAttrs(node_def).get("T")); + // Maybe I should do a switch LambdaFactory unary_op; if (node_def.op() == "Rsqrt") { // Compute rsqrt unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT; auto ret = UnaryCompute(weights_input, &weights_output, unary_op); - // Pass the output + // PAss the output if (ret == tensorflow::Status::OK()) { outputs->push_back(TRT_TensorOrWeights(weights_output)); } @@ -794,11 +631,11 @@ tensorflow::Status ConstantFoldUnary( } // TODO(jie,ben) broadcast is needed yet not implemented -// Let's get the simple stuff working first. Maybe we should fall back to TF +// Let's get the simple stuff working first. Maybe we should fall bakc to TF // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input_l = inputs.at(0).weights(); TRT_ShapedWeights weights_input_r = inputs.at(1).weights(); @@ -811,12 +648,12 @@ tensorflow::Status ConstantFoldBinary( "Binary op implicit broadcast not supported: " + node_def.op()); // TODO(jie): constant fold should really fall back to TF. - int num_dims = weights_input_l.shape_.nbDims; + int nb_dims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; - output_shape.nbDims = num_dims; - VLOG(2) << "nb_dims: " << num_dims + output_shape.nbDims = nb_dims; + VLOG(2) << "nb_dims: " << nb_dims << ", the other: " << weights_input_r.shape_.nbDims; - for (int i = 0; i < num_dims; i++) { + for (int i = 0; i < nb_dims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; } else if (weights_input_l.shape_.d[i] == 1 || @@ -841,6 +678,7 @@ tensorflow::Status ConstantFoldBinary( // Allocate output weights TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape); + // Maybe I should do a switch LambdaFactory binary_op; if (node_def.op() == "Sub") { binary_op.op = LambdaFactory::OP_CATEGORY::SUB; @@ -874,90 +712,48 @@ tensorflow::Status BinaryTensorOpWeight( // Maybe this part has to be moved into the block of rsqrt later // Check type consistency + auto dtype = TFAttrs(node_def).get("T"); + CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); + CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message // Check scale mode auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // default to element-wise + // Default to channel-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // TODO(jie): maybe use a permuatation instead to support more cases; - bool permutation_flag = false; - if (weights.count() == 1) { VLOG(2) << "UNIFORM"; scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { - // no broadcasting on Batch dimension; - VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims - << " tensor DIM: " << dims_t.nbDims; - if (dims_w.nbDims == dims_t.nbDims + 1) { - if (dims_w.d[0] == 1) { - for (int i = 1; i < dims_w.nbDims; i++) { - dims_w.d[i - 1] = dims_w.d[i]; - } - dims_w.nbDims--; - } else { - return tensorflow::errors::InvalidArgument( - "Binary op cannot operate on batch, " + node_def.name()); - } - } + // No broadcasting on Batch dimension; + assert(dims_w.d[0] == 1); - if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) { - scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // default is element; - for (int i = 1; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != dims_t.d[i]) { - // if dimension does not match, switch back to channel; - VLOG(2) << "channel"; - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - break; - } - } - // if channel as candidate, validate it - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { - for (int i = 1; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != 1) - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); - } - } else { - VLOG(2) << "elementwise"; + // Broadcasting on Channel dimension only allowed in kUNIFORM + assert(dims_w.d[1] == dims_t.d[0]); + assert(dims_w.nbDims == dims_t.nbDims); + + // Default is element; + for (int i = 2; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != dims_t.d[i - 1]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; } - } else if (dims_w.nbDims == 1 && - dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) { - // channel wise and broadcast required; - permutation_flag = true; - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - } else { - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); } - } - - // transpose last dimension - std::vector permutation(dims_t.nbDims + 1); - if (permutation_flag) { - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { - // we swap the last dimension into channel for trt. - // because of tensorflow default broadcasting rules. - for (int i = 0; i < static_cast(permutation.size()); i++) { - permutation[i] = i; + if (scale_mode == nvinfer1::ScaleMode::kELEMENTWISE) { + scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + for (int i = 2; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != 1) + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); } - permutation[1] = dims_t.nbDims; - permutation[dims_t.nbDims] = 1; - tensor = ctx.TransposeTensor(const_cast(tensor), - permutation); - } else { - return tensorflow::errors::InvalidArgument( - "Transpose cannot be applied, " + node_def.name()); } } - // prepare weights + // Prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); TRT_ShapedWeights power_weights(weights.type_); @@ -983,26 +779,88 @@ tensorflow::Status BinaryTensorOpWeight( scale_weights, power_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); - // transpose back dimension - if (permutation_flag) { - output_tensor = ctx.TransposeTensor(output_tensor, permutation); - } // Pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, const tensorflow::NodeDef& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector* outputs) { + static const std::unordered_map ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + // {"max", nvinfer1::ElementWiseOperation::kMAX}, + // {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // Get trt type & shape + TFAttrs attrs(node_def); + // Maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get("T"); + + // Check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); + + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast(tensor_l), + *const_cast(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // Pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} -tensorflow::Status ConvertConv2DHelper( +tensorflow::Status ConvertPlaceholder( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs, - int group // group ==0 specifies depthwise conv -) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + std::vector const& inputs, + std::vector* outputs) { + VLOG(2) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented(", cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get("dtype"); + nvinfer1::Dims dims = attrs.get("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} +tensorflow::Status ConvertConv2D(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + // TODO(jie): handle NHWC/NCHW transpose; + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + ReorderRSCKToKCRS(weights_rsck, &weights); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0]; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; TFAttrs attrs(node_def); int h_index = 2; @@ -1016,31 +874,11 @@ tensorflow::Status ConvertConv2DHelper( // TODO(jie): transpose it } - // tensor after transpose (NCHW) - auto tensor_dim = tensor->getDimensions(); - - int num_groups = group; - if (num_groups == 0) // depthwise convolution - num_groups = tensor_dim.d[0]; - VLOG(2) << "groups count: " << num_groups; - - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0] * num_groups; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; - VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); - // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get>("strides"); - VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; - VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] - << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); + auto tensor_dim = tensor->getDimensions(); std::vector> padding; // TODO(jie): padding. if (attrs.get("padding") == "SAME") { @@ -1081,11 +919,10 @@ tensorflow::Status ConvertConv2DHelper( layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); - layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", " + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -1098,101 +935,11 @@ tensorflow::Status ConvertConv2DHelper( return tensorflow::Status::OK(); } -tensorflow::Status ConvertConv2DHelper( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs, ConvolutionType type) { - switch (type) { - case ConvolutionType::DEFAULT: - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); - case ConvolutionType::DEPTHWISE_CONV: - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); - } - return tensorflow::errors::Unimplemented("unsupported convolution type at, " + - node_def.name()); -} - -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, const tensorflow::NodeDef& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector* outputs) { - static const std::unordered_map ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // get trt type & shape - TFAttrs attrs(node_def); - // maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get("T"); - - // check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast(tensor_l), - *const_cast(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertPlaceholder( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - VLOG(2) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get("dtype"); - nvinfer1::Dims dims = attrs.get("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertConv2D(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, - ConvolutionType::DEFAULT); -} - -tensorflow::Status ConvertConv2DDepthwise( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, - ConvolutionType::DEPTHWISE_CONV); -} - tensorflow::Status ConvertPool(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); int h_index = 2; @@ -1210,8 +957,6 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; - else if (node_def.op() == "AvgPool") - type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("Only supports Max pool"); @@ -1274,9 +1019,9 @@ tensorflow::Status ConvertPool(Converter& ctx, tensorflow::Status ConvertActivation( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( *const_cast(tensor), nvinfer1::ActivationType::kRELU); nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -1286,14 +1031,14 @@ tensorflow::Status ConvertActivation( tensorflow::Status ConvertScale(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) return tensorflow::errors::Unimplemented( "Only supports tensor op weight for now, at " + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); @@ -1310,26 +1055,9 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { VLOG(2) << "NCHW !!!!"; } - - auto dims = tensor->getDimensions(); - VLOG(2) << "tensor dimensions: " << dims.nbDims; - for (int i = 0; i < dims.nbDims; i++) { - VLOG(2) << "i: " << dims.d[i]; - } - dims = weights.shape_; - VLOG(2) << "tensor dimensions: " << dims.nbDims; - for (int i = 0; i < dims.nbDims; i++) { - VLOG(2) << "i: " << dims.d[i]; - } - - nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; - if (weights.shape_.d[0] == 1) { - mode = nvinfer1::ScaleMode::kUNIFORM; - } - - nvinfer1::IScaleLayer* layer = - ctx.network()->addScale(*const_cast(tensor), mode, - weights, empty_weights, empty_weights); + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + weights, empty_weights, empty_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); if (data_format == "NHWC") { @@ -1344,7 +1072,7 @@ tensorflow::Status ConvertScale(Converter& ctx, tensorflow::Status ConvertConst(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { const auto& weights_tensor = node_def.attr().at("value").tensor(); @@ -1363,144 +1091,20 @@ tensorflow::Status ConvertConst(Converter& ctx, VLOG(2) << "SCALAR!!!" << node_def.name(); nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { - VLOG(2) << "dimensions: " << tensor.dims(); - VLOG(2) << "size: " << weights_tensor.float_val_size(); - scalar_shape = GetTensorShape(tensor); - for (int i = 0; i < scalar_shape.nbDims; i++) - VLOG(2) << scalar_shape.d[i]; - if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size()) { - if (weights_tensor.float_val_size() == 1 || - scalar_shape.d[0] == weights_tensor.float_val_size()) { - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.float_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - } - } - } else { VLOG(2) << "Dimensions: " << tensor.dims(); - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.float_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { - scalar_shape.d[i] = 0; - scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; - } - } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - auto half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - half_tensor.device(defd) = - tensor.flat().template cast(); - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } - } else if (!weights_tensor.int_val().empty()) { - VLOG(2) << "int!!!" << node_def.name(); - nvinfer1::Dims scalar_shape; - if (tensor.dims() > 0) { - VLOG(2) << "dimensions: " << tensor.dims(); - scalar_shape = GetTensorShape(tensor); - if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size()) { - if (weights_tensor.int_val_size() == 1 || - scalar_shape.d[0] == weights_tensor.int_val_size()) { - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.int_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - } - } + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + GetTensorShape(tensor)); } else { - VLOG(2) << "dimensions: " << tensor.dims(); + VLOG(2) << "Dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.d[0] = 1; scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - TTypes::Flat half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - switch (dtype) { - case (tensorflow::DT_INT32): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT16): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_UINT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - default: - return tensorflow::errors::InvalidArgument( - "Datatype " + tensorflow::DataTypeString(dtype) + - " for FP16 conversion"); - break; - }; - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); - len_data = std::max(len_data, len_tensor); - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val() - .end()); // make a local copy first to flatten - // doesn't have to be contigous - memcpy(dst, tensor_data.data(), len_tensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); @@ -1526,7 +1130,7 @@ tensorflow::Status ConvertConst(Converter& ctx, tensorflow::Status ConvertIdentity( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { outputs->push_back(inputs.at(0)); return tensorflow::Status::OK(); @@ -1534,7 +1138,7 @@ tensorflow::Status ConvertIdentity( tensorflow::Status ConvertBinary(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2) return tensorflow::errors::FailedPrecondition( @@ -1561,7 +1165,7 @@ tensorflow::Status ConvertBinary(Converter& ctx, tensorflow::Status ConvertUnary(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 1) return tensorflow::errors::FailedPrecondition( @@ -1579,7 +1183,7 @@ tensorflow::Status ConvertUnary(Converter& ctx, tensorflow::Status ConvertReduce(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1587,7 +1191,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1625,7 +1229,6 @@ tensorflow::Status ConvertReduce(Converter& ctx, return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); if (index_list_data[i] == 1) permuted_index = 1; - idx_set.emplace(index_list_data[i]); } @@ -1633,7 +1236,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nb_dims; i++) { - if (idx_set.count(i) == 0) { + if (idx_set.count(i)) { permuted_index = i; break; } @@ -1668,13 +1271,12 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.TransposeTensor( const_cast(output_tensor), permutation_order); } - outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } tensorflow::Status ConvertPad(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1682,7 +1284,7 @@ tensorflow::Status ConvertPad(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1769,287 +1371,19 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } -tensorflow::Status ConvertConcat(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - // not including the last input (axis) here - int input_size = static_cast(inputs.size()) - 1; - - if (!inputs.at(0).is_tensor()) - return tensorflow::errors::InvalidArgument( - "Concat in TRT support only Tensor input, at " + node_def.name()); - - // We are retrieving the axis - TRT_ShapedWeights axis = inputs.at(input_size).weights(); - - TFAttrs attrs(node_def); - // auto attr_size = attrs.at("N")->i(); - // auto data_type = attrs.get("T"); - auto index_type = attrs.get("Tidx"); - - // TODO(jie): handle data type - // Only expect to handle INT32 as index attributes for now - if (index_type != tensorflow::DataType::DT_INT32) - return tensorflow::errors::Unimplemented( - "Tidx supports only DT_INT32, at " + node_def.name()); - - int index = *(static_cast(const_cast(axis.GetValues()))); - - // TODO(jie): early termination with no-op (attr_size==1) - - auto dim = inputs.at(0).tensor()->getDimensions(); - // dimension check - if (index > dim.nbDims + 1) - return tensorflow::errors::InvalidArgument( - "Concatenate on axis out of dimension range, at " + node_def.name()); - - if (index == 0) - return tensorflow::errors::InvalidArgument( - "Concatenate on batch dimension not supported, at " + node_def.name()); - - // incase we need permutation; - std::vector permutation_order(dim.nbDims + 1); - - for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i; - - if (index != 1) { - permutation_order[1] = index - 1; - permutation_order[index - 1] = 1; - } - - std::vector inputs_vec; - // Shap chack (all input tensor should have same shape) - // starting from 0 since we are probably also doing transpose here; - for (int i = 0; i < input_size; i++) { - auto tensor_i = inputs.at(i).tensor(); - auto dim_i = tensor_i->getDimensions(); - if (dim_i.nbDims != dim.nbDims) - return tensorflow::errors::InvalidArgument( - "Concatenate receives inputs with inconsistent dimensions, at " + - node_def.name()); - - for (int j = 0; j < dim.nbDims; j++) { - // check dimension consistency on non-concatenate axis - if (j != index - 1 && dim_i.d[j] != dim.d[j]) - return tensorflow::errors::InvalidArgument( - "Concatenate receives inputs with inconsistent shape, at" + - node_def.name()); - } - - // TRT does concatenation only on channel! - if (index != 1) - tensor_i = ctx.TransposeTensor(const_cast(tensor_i), - permutation_order); - - inputs_vec.push_back(tensor_i); - } - - // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( - const_cast(inputs_vec.data()), - inputs_vec.size()); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - if (index != 1) { - output_tensor = ctx.TransposeTensor(output_tensor, permutation_order); - } - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertFusedBatchNorm( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - TFAttrs attrs(node_def); - float epsilon = attrs.get("epsilon"); - auto data_format = attrs.get("data_format"); - if (data_format != "NCHW") { - return tensorflow::errors::Unimplemented( - "only data_format=NCHW is supported, at " + node_def.name()); - } - bool is_training = attrs.get("is_training"); - if (is_training) { - return tensorflow::errors::Unimplemented( - "only is_training=false is supported, at " + node_def.name()); - } - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - TRT_ShapedWeights scale_weights = inputs.at(1).weights(); - TRT_ShapedWeights offset_weights = inputs.at(2).weights(); - TRT_ShapedWeights mean_weights = inputs.at(3).weights(); - TRT_ShapedWeights variance_weights = inputs.at(4).weights(); - TRT_ShapedWeights dummy_power_weights(scale_weights.type_); - TRT_ShapedWeights combined_scale_weights = - ctx.get_temp_weights_like(scale_weights); - TRT_ShapedWeights combined_offset_weights = - ctx.get_temp_weights_like(offset_weights); - size_t nweight = scale_weights.count(); - if ((scale_weights.type_ == offset_weights.type_) && - (mean_weights.type_ == variance_weights.type_) && - (scale_weights.type_ == variance_weights.type_)) { - if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && - (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { - return tensorflow::errors::Unimplemented( - "only float32 or float16 weight data type is supported, for node " + - node_def.name() + " got " + - tensorflow::DataTypeString(scale_weights.type_)); - } - if (scale_weights.type_ == tensorflow::DT_FLOAT) { - for (size_t i = 0; i < nweight; ++i) { - float scale = (static_cast(scale_weights.GetValues()))[i]; - float offset = - (static_cast(offset_weights.GetValues()))[i]; - float mean = (static_cast(mean_weights.GetValues()))[i]; - float variance = - (static_cast(variance_weights.GetValues()))[i]; - float& combined_scale_ref = const_cast( - static_cast(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast( - static_cast(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); - combined_offset_ref = offset - mean * combined_scale_ref; - } - } else { - const Eigen::half* scale_vals = - (static_cast(scale_weights.GetValues())); - const Eigen::half* off_vals = - (static_cast(offset_weights.GetValues())); - const Eigen::half* mean_vals = - (static_cast(mean_weights.GetValues())); - const Eigen::half* variance_vals = - (static_cast(variance_weights.GetValues())); - Eigen::half* comb_scale_vals = const_cast( - static_cast(combined_scale_weights.GetValues())); - Eigen::half* comb_off_vals = const_cast( - static_cast(combined_offset_weights.GetValues())); - for (size_t i = 0; i < nweight; ++i) { - float scale(scale_vals[i]); - float offset(off_vals[i]); - float mean(mean_vals[i]); - float variance(variance_vals[i]); - float combined_scale_ref = scale / sqrtf(variance + epsilon); - comb_scale_vals[i] = Eigen::half(combined_scale_ref); - float combined_offset_ref = offset - mean * combined_scale_ref; - comb_off_vals[i] = Eigen::half(combined_offset_ref); - } - } - } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - combined_offset_weights.GetWeightsForTRT(), - combined_scale_weights.GetWeightsForTRT(), - dummy_power_weights.GetWeightsForTRT()); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertMatMul(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - - // TODO(jie): transpose! - TFAttrs attrs(node_def); - - TRT_ShapedWeights weights_ck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); - ReorderCKtoKC(weights_ck, &weights); - TRT_ShapedWeights biases(weights.type_); - - int noutput = weights.shape_.d[0]; - - nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected( - *const_cast(tensor), noutput, weights, biases); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertReshape( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - if (inputs.size() != 2 || !inputs.at(0).is_tensor() || - !inputs.at(1).is_weights()) - return tensorflow::errors::InvalidArgument( - "Input expects tensor and weights, at" + node_def.name()); - - // implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - auto dims = tensor->getDimensions(); - // restore implicit batch dimension - - TRT_ShapedWeights shape = inputs.at(1).weights(); - - TFAttrs attrs(node_def); - - auto padding_type = attrs.get("Tshape"); - - if (shape.shape_.nbDims != 1) - return tensorflow::errors::InvalidArgument( - "reshape new shape is not 1 dimensional, at " + node_def.name()); - - // Only expect to handle INT32 as attributes for now - if (padding_type != tensorflow::DataType::DT_INT32) - return tensorflow::errors::Unimplemented( - "reshape new shape supports only DT_INT32, at " + node_def.name()); - - auto shape_data = static_cast(const_cast(shape.GetValues())); - - if (shape_data[0] != -1) - return tensorflow::errors::InvalidArgument( - "reshape new shape first dimension is not -1, at " + node_def.name()); - - auto shape_num_dims = shape.shape_.d[0]; - VLOG(2) << "shape dimensions: " << shape_num_dims; - int volume_w = 1; - for (int i = 1; i < shape.shape_.d[0]; i++) volume_w *= shape_data[i]; - - int volume_t = 1; - for (int i = 0; i < dims.nbDims; i++) volume_t *= dims.d[i]; - - VLOG(2) << "volume: " << volume_t << " volume weights: " << volume_w; - if (volume_w != volume_t) - return tensorflow::errors::InvalidArgument( - "volume does not agree between tensor and new shape, at " + - node_def.name()); - - nvinfer1::IShuffleLayer* layer = - ctx.network()->addShuffle(*const_cast(tensor)); - - nvinfer1::Dims reshape_dims; - VLOG(2) << "new dimension: " << shape_num_dims - 1; - reshape_dims.nbDims = shape_num_dims - 1; - for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { - reshape_dims.d[i] = shape_data[i + 1]; - } - layer->setReshapeDimensions(reshape_dims); - VLOG(2) << "new dimension: " << shape_num_dims - 1; - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - auto dims_output = output_tensor->getDimensions(); - VLOG(2) << "output tensor dimension:" << dims_output.nbDims; - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - void Converter::register_op_converters() { // vgg_16 slim implementation op_registry_["Placeholder"] = ConvertPlaceholder; op_registry_["Conv2D"] = ConvertConv2D; - op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; op_registry_["Relu"] = ConvertActivation; op_registry_["MaxPool"] = ConvertPool; - op_registry_["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary op_registry_["BiasAdd"] = ConvertScale; op_registry_["Const"] = ConvertConst; + // op_registry_["MatMul"] = ConvertFullyConnected; // Not used in vgg // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed + // op_registry_["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -2059,364 +1393,26 @@ void Converter::register_op_converters() { op_registry_["Mean"] = ConvertReduce; op_registry_["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops - - op_registry_["ConcatV2"] = ConvertConcat; - op_registry_["MatMul"] = ConvertMatMul; - op_registry_["Reshape"] = ConvertReshape; - op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; - op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace -tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { - return tensorflow::errors::Unimplemented("Not implemented yet"); -} -tensorflow::Status ConvertCalibrationNodeToEngineNode( - tensorflow::Graph& graph, tensorflow::Node* c_node) { - const auto ndef = c_node->def(); - - TFAttrs attrs(ndef); - std::vector segment_nodes( - attrs.get>("segment_nodes")); - std::vector output_nodes( - attrs.get>("segment_output_names")); - std::vector input_names( - attrs.get>("input_names")); - string res_name = attrs.get("resource_name"); - VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; - string engine_name = "my_trt_op"; - { - const auto node_id = tensorflow::str_util::Split(res_name, "_"); - engine_name += node_id.back(); - } - std::map node_maps; - - for (auto n : graph.op_nodes()) { - node_maps.insert({n->name(), n}); - } - VLOG(1) << "Output Nodes:"; - std::vector out_types; - std::vector out_edges; - for (auto& i : output_nodes) { - auto node_port = tensorflow::str_util::Split(i, ":"); - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto out_node_name = node_port.at(0); - if (node_port.size() > 1) { - VLOG(1) << "Multi port output" << node_port.at(0) << " " - << node_port.at(1) << " size=" << node_port.size(); - } - auto node_it = node_maps.find(out_node_name); - if (node_it != node_maps.end()) { - tensorflow::Node* out_node = node_it->second; - int port = 0; - if (node_port.size() == 2) { - port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); - out_types.push_back(out_node->output_type(port)); - } else { - out_types.push_back(out_node->output_type(0)); - } - for (auto out_edge : out_node->out_edges()) { - if (out_edge->src_output() == port) { - out_edges.push_back(out_edge); - break; - } - } - } else { - LOG(WARNING) << " couldn't find output node " << out_node_name; - } - } - VLOG(1) << "Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - } - auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); - auto resmgr = trt_rm->getManager("TRTCalibOps"); - tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; - auto status = resmgr->Lookup(res_name, res_name, &calib_res); - if (!status.ok() || !calib_res->calibrator_) { - return tensorflow::errors::FailedPrecondition( - "You must run calibration" - " and inference conversion in the same proces"); - } - - calib_res->calibrator_->setDone(); - calib_res->thr_->join(); - delete calib_res->thr_; - if (!calib_res->engine_) { - LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " - "calibration graph?"; - } - auto weight_rmgr = trt_rm->getManager("WeightStore"); - TF_CHECK_OK(weight_rmgr->Delete( - res_name, res_name)); - auto engine_plan = calib_res->engine_->serialize(); - calib_res->engine_->destroy(); - calib_res->network_->destroy(); - calib_res->builder_->destroy(); - calib_res->thr_ = nullptr; - calib_res->engine_ = nullptr; - calib_res->builder_ = nullptr; - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - std::vector income_edges; - for (const auto in_edge : c_node->in_edges()) { - auto src = in_edge->src(); - int dest_port = in_edge->dst_input(); - income_edges.emplace_back(src->name(), in_edge->src_output(), - c_node->input_type(dest_port)); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); - tensorflow::NodeDef engine_node; - const char* engine_plan_data = static_cast(engine_plan->data()); - string engine_plan_string(engine_plan_data, - engine_plan_data + engine_plan->size()); - status = op_builder.Attr("serialized_engine", engine_plan_string) - .Attr("input_nodes", input_names) - .Attr("output_nodes", output_nodes) - .Attr("OutT", out_types) - .Finalize(&engine_node); - if (!status.ok()) { - LOG(ERROR) << "Engine Node creation failed"; - return status; - } - auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_CHECK_OK(status); - for (size_t i = 0; i < out_edges.size(); i++) { - VLOG(1) << "Connecting trt_engine_node output " << i << " with " - << out_edges.at(i)->dst()->name() << " port " - << out_edges.at(i)->dst_input(); - TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, - out_edges.at(i)->dst(), - out_edges.at(i)->dst_input())); - } - VLOG(1) << "Segment nodes:"; - for (auto& i : segment_nodes) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto it = node_maps.find(i); - if (it != node_maps.end()) { - graph.RemoveNode(it->second); - } - } - graph.RemoveNode(c_node); - return tensorflow::Status::OK(); -} - -tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort - std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); - // Select just the subgraph - std::list order; - for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { - order.push_front(node); // we want topological order to contstruct the - // network layer by layer - } - } - // topological order is needed to build TRT network - static int static_id = 0; - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - // TODO(sami,ben,jie): proper naming! - string calib_op_name = - StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); - static_id++; - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger_ = new tensorflow::tensorrt::Logger(); - op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - - if (!op_res->builder_) { - return tensorflow::errors::Internal( - "failed to create TensorRT builder object"); - } - - op_res->network_ = op_res->builder_->createNetwork(); - if (!op_res->network_) { - return tensorflow::errors::Internal( - "failed to create TensorRT network object"); - } - - // Build the network - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); - std::vector input_names; - std::vector input_dtypes; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= " << input.first; - int node_id = input.first; - int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - auto node_name = node->name(); - input_names.push_back(node_name); // insert original node name without port - // TODO(jie): alternative :) - if (!s.graph_properties.HasOutputProperties(node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - node_name); - - auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) - return tensorflow::errors::Internal( - "accessing output index of: ", output_idx, ", at node: ", node_name, - "with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(output_idx); - - tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); - - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - - VLOG(2) << "accessing output index of: " << output_idx - << ", at node: " << node_name - << "with output entry from shape_map: " << op_info_vec.size(); - - // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - - for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); - } - - // TODO(ben,jie): proper way to restore input tensor name? - auto input_tensor_name = node_name; - if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); - - nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); - - if (!input_tensor) - return tensorflow::errors::InvalidArgument( - "Failed to create Input layer"); - VLOG(2) << "input tensor name :" << input_tensor_name; - - if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) - return tensorflow::errors::AlreadyExists( - "output tensor already exists for op: " + input_tensor_name); - } - - VLOG(2) << "finished sorting"; - - for (const tensorflow::Node* node : order) { - const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); - TF_RETURN_IF_ERROR(converter.convert_node(node_def)); - } - - VLOG(2) << "finished conversion"; - - // Gather output metadata - std::vector output_names; - std::vector output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair& output : s.output_inds) { - int node_id = output.first; - int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - string op_name = node->name(); - string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - VLOG(1) << "output tensor name: " << tensor_name; - output_names.push_back(tensor_name); - auto tensor_or_weights = converter.get_tensor(tensor_name); - if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); - } - nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); - if (!tensor) { - return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); - } - converter.network()->markOutput(*tensor); - tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); - nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; - TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); - tensor->setType(trt_dtype); - } - - VLOG(2) << "finished output"; - - // Build the engine - op_res->builder_->setMaxBatchSize(s.max_batch_size); - op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); - - // Build the TRT op - // TODO(sami,ben,jie): proper naming! - tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - std::vector income_edges; - for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) - << ":" << output_idx - << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); - std::vector segment_names; - segment_names.reserve(s.subgraph_node_ids.size()); - for (int i : s.subgraph_node_ids) { - auto node = s.graph.FindNodeId(i); - segment_names.push_back(node->name()); - } - LOG(INFO) << "finished op preparation"; - - auto status = op_builder.Attr("segment_nodes", segment_names) - .Attr("input_names", input_names) - .Attr("segment_output_names", output_names) - .Attr("resource_name", calib_op_name) - .Finalize(s.trt_node); - - LOG(INFO) << status.ToString(); - LOG(INFO) << "finished op building"; - - return tensorflow::Status::OK(); -} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - tensorrt::convert::SubGraphParams& s) { + const tensorflow::Graph& graph, const std::set& subgraph_node_ids, + const std::vector>& input_inds, + const std::vector>& output_inds, size_t max_batch_size, + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); + tensorflow::GetPostOrder(graph, &order_vec); // Select just the subgraph std::list order; for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { + if (subgraph_node_ids.count(node->id())) { // We want topological order to contstruct the // network layer by layer order.push_front(node); @@ -2438,86 +1434,46 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "Failed to create TensorRT network object"); } - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - static int static_id = 0; - // TODO(sami,ben,jie): proper naming! - string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); - engine_name = StrCat(engine_name, static_id++); - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); - // Build the network - Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); + Converter converter(trt_network.get()); std::vector input_names; std::vector input_dtypes; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input!!!!!"; + for (std::pair const& input : input_inds) { int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); + tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); - // input_names should use the node name in the graph - // here it should be the input tensor name -> matching the binding - // insert original node name without port - auto tensor_name = node_name; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - - VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; - - auto shape_inference_node_name = node_name; - auto shape_inference_output_idx = output_idx; - // rewire the shape inference to original node in the graph - if (s.output_edge_map->count(tensor_name)) { - shape_inference_node_name = s.output_edge_map->at(tensor_name).second; - shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; - } - if (shape_inference_output_idx < 0) continue; - VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; - - if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - shape_inference_node_name); + input_names.push_back(node_name); // Insert original node name without port + // TODO(jie): alternative :) + if (!graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("Failed to find input node: " + + node_name); - auto op_info_vec = - s.graph_properties.GetOutputProperties(shape_inference_node_name); - if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( - "accessing output index of: ", shape_inference_output_idx, - ", at node: ", shape_inference_node_name, - " with output entry from shape_map: ", op_info_vec.size()); + "Accessing output index of: " + std::to_string(output_idx) + + ", at node: " + node_name + " with output entry from shape_map: " + + std::to_string(op_info_vec.size())); + + auto op_info = op_info_vec.at(output_idx); - auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "Accessing output index of: " << output_idx + VLOG(2) << "Accessing output index of: " << std::to_string(output_idx) << ", at node: " << node_name - << " with output entry from shape_map: " << op_info_vec.size(); + << " with output entry from shape_map: " + << std::to_string(op_info_vec.size()); + // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - // TODO(jie): TRT 3.x only support 4 dimensional input tensor. - // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) - return tensorflow::errors::Unimplemented("require 4 dimensional input"); - for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); @@ -2526,11 +1482,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) { - input_tensor_name = StrCat(node_name, ":", output_idx); - } + if (output_idx != 0) + input_tensor_name = node_name + ":" + std::to_string(output_idx); - input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -2557,22 +1511,14 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector output_names; std::vector output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair& output : s.output_inds) { + for (std::pair const& output : output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); + tensorflow::Node* node = graph.FindNodeId(node_id); string op_name = node->name(); string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); + tensor_name = tensor_name + ":" + std::to_string(output_idx); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -2594,25 +1540,19 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << "Finished output"; + // TODO(jie): static_id is not thread safe. + static int static_id = 0; // Build the engine - trt_builder->setMaxBatchSize(s.max_batch_size); - trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); - VLOG(0) << "Max batch size= " << s.max_batch_size - << " max workspace size= " << s.max_workspace_size_bytes; - if (s.precision_mode == FP16MODE) { - trt_builder->setHalf2Mode(true); - VLOG(0) << "Using FP16 precision mode"; - } - LOG(INFO) << "starting build engine"; + trt_builder->setMaxBatchSize(max_batch_size); + trt_builder->setMaxWorkspaceSize(max_workspace_size_bytes); + VLOG(0) << "Starting build engine " << static_id; + // TODO(ben,jie): half2 and int8 mode support string engine_plan_string; { auto trt_engine = infer_object(trt_builder->buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; - if (trt_engine.get() == nullptr) { - return tensorflow::errors::Internal("Engine building failure"); - } auto engine_plan = infer_object(trt_engine->serialize()); VLOG(0) << "Serialized engine"; const char* engine_plan_data = @@ -2620,19 +1560,18 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - TF_RETURN_IF_ERROR(weight_rmgr->Delete( - engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name; + + VLOG(0) << "Finished engine"; // Build the TRT op - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder( + tensorflow::strings::StrCat("my_trt_op", static_id++), "TRTEngineOp"); std::vector income_edges; - VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << i << " " << input_names.at(i); - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) + int output_idx = input_inds.at(i).second; + // We wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); income_edges.push_back(incoming_edge); @@ -2647,7 +1586,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(s.trt_node); + .Finalize(trt_node); VLOG(0) << status.ToString() << " finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 954a1e72f8..2e7fd19566 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -17,8 +17,6 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_ #include -#include -#include #include #include @@ -34,49 +32,16 @@ namespace tensorflow { namespace tensorrt { namespace convert { -const int FP32MODE = 0; -const int FP16MODE = 1; -const int INT8MODE = 2; +tensorflow::Status ConvertSubGraphToTensorRTNodeDef( + const tensorflow::Graph& graph, const std::set& subgraph_node_ids, + const std::vector>& + input_inds, // {node_id, output_idx} + const std::vector>& + output_inds, // {node_id, output_idx} + size_t max_batch_size, size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_prop, + tensorflow::NodeDef* trt_node); -struct SubGraphParams { - SubGraphParams( - tensorflow::Graph& inp_graph, - const std::set& subgraph_node_id_numbers, - const std::vector>& input_indices, - const std::vector>& output_indices, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map>* output_edges, - tensorflow::NodeDef* constructed_trt_node, - int engine_precision_mode = FP32MODE) - : graph(inp_graph), - subgraph_node_ids(subgraph_node_id_numbers), - input_inds(input_indices), - output_inds(output_indices), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - trt_node(constructed_trt_node), - precision_mode(engine_precision_mode) {} - - tensorflow::Graph& graph; - const std::set& subgraph_node_ids; - const std::vector>& input_inds; // {node_id, output_idx} - const std::vector>& output_inds; // {node_id, output_idx} - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map>* output_edge_map; - tensorflow::NodeDef* trt_node; - const int precision_mode; -}; - -// TODO(sami): Replace references with const reference or pointers -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); -tensorflow::Status InjectCalibrationNode(SubGraphParams& params); -tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, - tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index aea44fd8a2..1dcb87e768 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -21,11 +21,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/platform/stream_executor.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda/include/cuda_runtime_api.h" +#include "cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -114,13 +113,7 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ctx->set_output(i, t); } VLOG(2) << "Filled map for sending"; - // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files - const cudaStream_t* stream = CHECK_NOTNULL( - reinterpret_cast(ctx->op_device_context() - ->stream() - ->implementation() - ->CudaStreamMemberHack())); - calib_res->calibrator_->setBatch(input_data, *stream); + calib_res->calibrator_->setBatch(input_data); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index b32371b642..8efdf63ebe 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,12 +24,8 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -static ::tensorflow::tensorrt::Logger logger; -namespace gpu = ::perftools::gputools; -using IRuntime = nvinfer1::IRuntime; -using Dims = nvinfer1::Dims; - namespace tensorrt { +static ::tensorflow::tensorrt::Logger logger; TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine @@ -44,21 +40,10 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. - int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - cudaSetDevice(gpu_id); - int device; - cudaGetDevice(&device); - if (gpu_id != device) LOG(FATAL) << "set device failed!"; - - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - - IRuntime* infer = nvinfer1::createInferRuntime(logger); + nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); + trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); // Runtime is safe to delete after engine creation infer->destroy(); @@ -70,6 +55,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { size_t binding_index; int num_batch = 0; + bool valid = true; for (int i = 0; i < context->num_inputs(); i++) { // Grab the input tensor binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); @@ -78,12 +64,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { num_batch = input_shape.dim_size(0); - if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { - LOG(FATAL) << "input tensor batch larger than max_batch_size: " - << trt_engine_ptr_->getMaxBatchSize(); - } } else if (num_batch != input_shape.dim_size(0)) { - LOG(FATAL) << "input data inconsistent batch size"; + valid = false; break; } switch (trt_engine_ptr_->getBindingDataType(binding_index)) { @@ -99,6 +81,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } + // Might want a different way to inform the user of batch size inconsistency + if (!valid) LOG(WARNING) << "input data inconsistent batch size"; + for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. // Create an output tensor @@ -141,11 +126,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - // TODO(jie): trt enqueue does not return error - auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], - *stream, nullptr); - VLOG(2) << "enqueue returns: " << ret; - // sync should be done by TF. + // execution handled by TF since we are getting stream from TF. + // it is safe for CPU pointer array (buffers) to go out of scope after enqueue + trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index dda0dc9e71..7add8cb8b3 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << name_ << " " << msg; + VLOG(2) << msg; break; } case Severity::kWARNING: { - LOG(WARNING) << name_ << " " << msg; + LOG(WARNING) << msg; break; } case Severity::kERROR: { - LOG(ERROR) << name_ << " " << msg; + LOG(ERROR) << msg; break; } case Severity::kINTERNAL_ERROR: { - LOG(FATAL) << name_ << " " << msg; + LOG(FATAL) << msg; break; } // This is useless for now. But would catch it in future if enum changes. It diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index 7f3544f8cf..d71f66b933 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -27,11 +27,9 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { - public: - Logger(string name = "DefaultLogger") : name_(name){}; + private: void log(nvinfer1::ILogger::Severity severity, const char* msg) override; - private: string name_; }; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 0b2321b5fc..7e050a768c 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -20,6 +20,5 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.tensorrt.python.ops import trt_engine_op -from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 666220d78c..9454862f85 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -20,17 +20,11 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six -from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 -from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops -from tensorflow.python.grappler import tf_optimizer -from tensorflow.python.util import compat -# pylint: enable=unused-import,line-too-long # TODO(skama): get outputs from session when implemented as c++ @@ -38,33 +32,22 @@ from tensorflow.python.util import compat def create_inference_graph(input_graph_def, outputs, max_batch_size=1, - max_workspace_size_bytes=2 << 20, - precision_mode="FP32", - minimum_segment_size=3): + max_workspace_size_bytes=2 << 20): """Python wrapper for the TRT transormation. + Args: input_graph_def: GraphDef object containing a model to be transformed. - outputs: list of tensors or node names for the model outputs. + outputs: List of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) - precision_mode: one of 'FP32', 'FP16' and 'INT8' - minimum_segment_size: the minimum number of nodes required for a subgraph to - be replaced by TRTEngineOp. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: - ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ - supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} - if precision_mode.upper() not in supported_precision_modes: - raise ValueError(("precision mode '{}' is not supported." - "It should be one of {}").format( - precision_mode, "{'FP32', 'FP16', 'INT8'}")) - mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -100,7 +83,7 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes, mode, minimum_segment_size) + max_workspace_size_bytes) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -118,46 +101,3 @@ def create_inference_graph(input_graph_def, output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string # Save some memory return output_graph_def - - -def calib_graph_to_infer_graph(calibration_graph_def): - """Convert an existing calibration graph to inference graph. - - Args: - calibration_graph_def: the calibration GraphDef object with calibration data - Returns: - New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. - Raises: - RuntimeError: if the returned status message is malformed. - """ - - def py2string(inp): - return inp - - def py3string(inp): - return inp.decode("utf-8") - - if _six.PY2: - to_string = py2string - else: - to_string = py3string - - graph_str = calibration_graph_def.SerializeToString() - out = calib_convert(graph_str) - status = to_string(out[0]) - output_graph_def_string = out[1] - del graph_str # Save some memory - if len(status) < 2: - raise _impl.UnknownError(None, None, status) - if status[:2] != "OK": - msg = status.split(";") - if len(msg) == 1: - raise RuntimeError("Status message is malformed {}".format(status)) - # pylint: disable=protected-access - raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), - int(msg[0])) - # pylint: enable=protected-access - output_graph_def = graph_pb2.GraphDef() - output_graph_def.ParseFromString(output_graph_def_string) - del output_graph_def_string # Save some memory - return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 74df75902e..3d5cc76c42 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -23,7 +23,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda/include/cuda_runtime_api.h" +#include "cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { @@ -38,18 +38,22 @@ TRTInt8Calibrator::TRTInt8Calibrator( done_(false), dev_buffers_(dev_buffers), calib_running_(false), - batch_is_set_(false), engine_name_(engine_name) {} -bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, - const cudaStream_t stream) { - tensorflow::mutex_lock lock(cond_mtx_); - while ((calib_running_ || batch_is_set_) && - !done_) { // wait while calibration is running - cond_.wait(lock); - } +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + // TODO(aaroey): make sure that in future PR: + // 1. the mutex_lock is outside of the loop + // 2. wait() is used instead of wait_for() + // 3. done_ is to be protected by the mutex + // 4. the first batch is not missed if (done_) return false; - CHECK(!calib_running_ && !batch_is_set_); + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { auto devptr = dev_buffers_.find(it.first); @@ -61,32 +65,27 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - // TODO(sami,aaroey): Need to figureout a way to ensure synchronization - // between stream, perhaps using a tensor? - auto status = cudaMemcpyAsync(d.first, it.second, d.second, - cudaMemcpyDeviceToDevice, stream); + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); if (status != cudaSuccess) { LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first << "' failed with " << status; } } - - // TODO(Sami, aaorey): Find an alternative way! - cudaStreamSynchronize( - stream); // we have to wait for the stream before returning! - batch_is_set_ = true; + calib_running_.store(true, std::memory_order_release); // release builder cond_.notify_all(); return true; } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - tensorflow::mutex_lock lock(cond_mtx_); - calib_running_ = false; + calib_running_.store(false, std::memory_order_release); // wait for new batch cond_.notify_all(); - while ((!batch_is_set_ && !done_)) { // wait until new batch arrives - cond_.wait(lock); - + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; } if (done_) { return false; @@ -101,8 +100,6 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, bindings[i] = it->second.first; } - batch_is_set_ = false; - calib_running_ = true; return true; } @@ -110,12 +107,6 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } -void TRTInt8Calibrator::setDone() { - tensorflow::mutex_lock lock(cond_mtx_); - done_ = true; - cond_.notify_all(); -} - void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, std::size_t length) {} TRTInt8Calibrator::~TRTInt8Calibrator() { @@ -124,6 +115,5 @@ TRTInt8Calibrator::~TRTInt8Calibrator() { } // namespace tensorrt } // namespace tensorflow - #endif #endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index d77aa2c5ab..8830f7efe7 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,10 +24,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT - -#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" - namespace tensorflow { namespace tensorrt { // This class provides a 1 element queue to match TFs push model to @@ -42,9 +39,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map& data, - const cudaStream_t stream); - void setDone(); + bool setBatch(const std::unordered_map& data); + void setDone() { done_ = true; } const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); @@ -59,14 +55,11 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { const std::unordered_map> dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with // buffer names - bool calib_running_; - bool batch_is_set_; + std::atomic_bool calib_running_; string engine_name_; }; - } // namespace tensorrt } // namespace tensorflow - +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ #endif #endif -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 0b661bd536..c78f6f2224 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -60,7 +60,6 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): - """Run given graphdef once.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -75,65 +74,15 @@ def run_graph(gdef, dumm_inp): return val -# Use real data that is representatitive of the inference dataset -# for calibration. For this test script it is random data. -def run_calibration(gdef, dumm_inp): - """Run given calibration graph multiple times.""" - gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) - ops.reset_default_graph() - g = ops.Graph() - with g.as_default(): - inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) - inp = inp.outputs[0] - out = out.outputs[0] - with csess.Session( - config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: - # run over real calibration data here, we are mimicking a calibration set of - # 30 different batches. Use as much calibration data as you want - for _ in range(30): - val = sess.run(out, {inp: dumm_inp}) - return val - - if "__main__" in __name__: inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) - orig_graph = get_simple_graph_def() # use a frozen graph for inference + gdef = get_simple_graph_def() # Get optimized graph - trt_graph = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - o1 = run_graph(orig_graph, dummy_input) + trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0]) + o1 = run_graph(gdef, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) assert np.array_equal(o1, o2) assert np.array_equal(o3, o2) # sanity check - fp16_graph = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - int8_calib_gdef = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - o4 = run_graph(fp16_graph, dummy_input) - _ = run_calibration(int8_calib_gdef, dummy_input) - int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) - o5 = run_graph(int8_graph, dummy_input) - assert np.allclose(o1, o4) - assert np.allclose(o1, o5) print("Pass") diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 46480e99a1..d679945d56 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -64,17 +64,13 @@ PyObject* pair_helper(std::pair* in) { %ignoreall %unignore tensorflow; %unignore trt_convert; -%unignore calib_convert; %{ - std::pair trt_convert( string graph_def_string, // The serialized GraphDef string. std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, - int precision_mode, - int minimum_segment_size + size_t max_workspace_size_bytes // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -94,64 +90,16 @@ std::pair trt_convert( return std::pair{out_status, ""}; } - if(precision_mode < 0 || precision_mode > 2){ - out_status = "InvalidArgument;Invalid precision_mode"; - return std::pair{out_status, ""}; - } if (!output_names.size()) { out_status = "InvalidArgument;Size of the output_names vector is 0"; return std::pair{out_status, ""}; + // return ""; } tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph, precision_mode, minimum_segment_size); - if (!conversion_status.ok()) { - auto retCode = (int)conversion_status.code(); - char buff[2000]; - snprintf(buff, 2000, "%d;%s", retCode, - conversion_status.error_message().c_str()); - out_status = buff; - return std::pair{out_status, ""}; - } - string result; - if (!outGraph.SerializeToString(&result)) { - out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; - return std::pair{out_status, ""}; - } - out_status = "OK;All good!"; - return std::pair{out_status, result}; -#else - // Returns FAILED_PRECONDITION. - return std::pair{"9;TensorRT is not enabled!", ""}; -#endif // GOOGLE_CUDA && GOOGLE_TENSORRT -} - -std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& - // unfortunately we can't use TF_Status here since it - // is in c/c_api and brings in a lot of other libraries - // which in turn declare ops. These ops are included - // statically in our library and cause an abort when - // module is loaded due to double registration - // until Tensorflow properly exposes these headers - // we have to work around this by returning a string - // and converting it to exception on python side. - //,TF_Status* out_status) { -) { -#if GOOGLE_CUDA && GOOGLE_TENSORRT - string out_status; - - tensorflow::GraphDef graph_def; - if (!graph_def.ParseFromString(graph_def_string)) { - out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; - return std::pair{out_status, ""}; - } - - tensorflow::GraphDef outGraph; - tensorflow::Status conversion_status = - tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, - &outGraph); + &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -174,13 +122,10 @@ std::pair calib_convert(string graph_def_string // const tenso } %} -std::pair calib_convert(string graph_def_string); - std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, - int precision_mode, int minimum_segment_size); + size_t max_workspace_size_bytes); %unignoreall diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index 70bf67c779..bb86ecb220 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,10 +25,7 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", # b/67513579 - ], + tags = ["notsan"], # b/67513579 deps = [ ":predict", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 64f5cd8357..ed3ed4c0e1 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -156,7 +156,9 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = ["no_pip_gpu"], # b/63391119 + tags = [ + "no_pip_gpu", # b/63391119 + ], deps = [ ":feature_keys", ":head", @@ -425,7 +427,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 - "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index 07df7bc9a5..c86d06e923 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,7 +40,6 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index f9d433a45b..ed930e44e8 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -225,7 +225,6 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index dca01d26f4..f4283cd9ed 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,10 +42,9 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid making every user_ops aware of windows, re-write - # the file extension from .so to .dll if .so file doesn't exist. - if not os.path.exists(path): - path = re.sub(r'\.so$', '.dll', path) + # To avoid makeing every user_ops aware of windows, re-write + # the file extension from .so to .dll. + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5c9fd2f406..352e183104 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3327,10 +3327,6 @@ tf_cc_test( size = "small", srcs = ["common_runtime/function_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), - tags = [ - "manual", - "no_oss", - ], deps = [ ":core", ":core_cpu", diff --git a/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt deleted file mode 100644 index 9fabe7863e..0000000000 --- a/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt +++ /dev/null @@ -1,18 +0,0 @@ -op { - graph_op_name: "SlideDataset" - in_arg { - name: "window_size" - description: <contents()); - input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index cfe23d1ffe..beaf0adbc5 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -474,11 +474,11 @@ class GraphDatasetBase : public DatasetBase { } // Key for storing the Dataset graph in the serialized format. - TF_EXPORT static const char kDatasetGraphKey[]; + static const char kDatasetGraphKey[]; // Key for storing the output node of the Dataset graph in the serialized // format. - TF_EXPORT static const char kDatasetGraphOutputNodeKey[]; + static const char kDatasetGraphOutputNodeKey[]; private: Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 1507b6eae2..02038c5d77 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2492,10 +2492,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { mkl_op_registry::GetMklOpName(csinfo_.identity), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn), - CopyAttrsLRN, LrnRewrite}); + CopyAttrsLRN, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), - CopyAttrsLRN, LrnRewrite}); + CopyAttrsLRN, AlwaysRewrite}); rinfo_.push_back({csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool), CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); @@ -2865,28 +2865,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized - // path. The unoptimized path is slow. Thus we dont rewrite the node - // and use default Eigen. But for depth_radius=2, MKL DNN optimized - // path is taken, i.e., eigen node is rewritten by MKl DNN node. - static bool LrnRewrite(const Node* n) { - CHECK_NOTNULL(n); - - int depth_radius; - CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); - - // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN - // and use eigen node instead - if (depth_radius == 2) { - return true; - } - VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" - << "case is not optimized by Intel MKL, thus using Eigen op" - << "for LRN " ; - - return false; - } - static bool AddNRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -3550,13 +3528,11 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, string data_format; string padding; std::vector strides; - std::vector dilations; bool use_cudnn_on_gpu; // Get all attributes from old node. TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3565,7 +3541,6 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); - nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); @@ -3803,14 +3778,12 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, DataType T_pred, T_succ; string padding; std::vector strides; - std::vector dilations; string data_format_pred, data_format_succ; bool use_cudnn_on_gnu; TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(pred->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 8f13c4a702..244653504d 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -409,7 +408,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { frame_children_[frame_ids[0]].insert(frame_ids[1]); frame_parent_[frame_ids.back()] = frame_ids[frame_ids.size() - 2]; } - if (frame_ids.size() >= 1) { + if (!frame_ids.empty()) { frame_children_.insert(std::make_pair(frame_ids.back(), empty_set_)); if (node->op() == "LoopCond") { if (loop_cond_.count(frame_ids.back())) { @@ -428,7 +427,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { } for (auto it = frame_children_.begin(); it != frame_children_.end(); ++it) { - if (it->second.size() == 0) { + if (it->second.empty()) { worklist.push_back(it->first); } } @@ -441,7 +440,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { if (parent_it != frame_parent_.end()) { int parent_id = parent_it->second; frame_children_[parent_id].erase(frame_id); - if (frame_children_[parent_id].size() == 0) { + if (frame_children_[parent_id].empty()) { worklist.push_back(parent_id); } } @@ -465,6 +464,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { TF_RETURN_IF_ERROR(RemoveStackOps(item.graph, optimized_graph)); + optimized_graph_ = optimized_graph; // Set up helper data structures. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 2e39f25fc1..48d5955ad1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5157,6 +5157,7 @@ tf_kernel_library( ], hdrs = [ "meta_support.h", + "quantization_utils.h", "reference_gemm.h", ], deps = [ @@ -5231,7 +5232,6 @@ tf_cc_test( name = "quantization_utils_test", srcs = ["quantization_utils_test.cc"], deps = [ - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5294,7 +5294,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5356,7 +5355,6 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5379,7 +5377,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5444,7 +5441,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5465,7 +5461,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5505,7 +5500,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5562,7 +5556,6 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5585,7 +5578,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5622,7 +5614,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5644,7 +5635,6 @@ tf_cc_test( deps = [ ":batch_norm_op", ":ops_testutil", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 01754ec21a..484d4f88d6 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -113,19 +113,6 @@ tf_kernel_library( ], ) -tf_kernel_library( - name = "slide_dataset_op", - srcs = ["slide_dataset_op.cc"], - deps = [ - ":dataset", - "//tensorflow/core:dataset_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/kernels:batch_util", - ], -) - tf_kernel_library( name = "padded_batch_dataset_op", srcs = ["padded_batch_dataset_op.cc"], @@ -551,7 +538,6 @@ tf_kernel_library( ":scan_dataset_op", ":shuffle_dataset_op", ":skip_dataset_op", - ":slide_dataset_op", ":sparse_tensor_slice_dataset_op", ":sql_dataset_ops", ":stats_aggregator_ops", diff --git a/tensorflow/core/kernels/data/slide_dataset_op.cc b/tensorflow/core/kernels/data/slide_dataset_op.cc deleted file mode 100644 index 4f3537b691..0000000000 --- a/tensorflow/core/kernels/data/slide_dataset_op.cc +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/framework/partial_tensor_shape.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/kernels/batch_util.h" -#include "tensorflow/core/kernels/data/dataset.h" - -namespace tensorflow { - -namespace { - -// See documentation in ../ops/dataset_ops.cc for a high-level -// description of the following op. - -class SlideDatasetOp : public UnaryDatasetOpKernel { - public: - explicit SlideDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx) {} - - void MakeDataset(OpKernelContext* ctx, DatasetBase* input, - DatasetBase** output) override { - int64 window_size = 0; - int64 stride = 1; - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "window_size", &window_size)); - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "stride", &stride)); - OP_REQUIRES( - ctx, window_size > 0, - errors::InvalidArgument("Window size must be greater than zero.")); - OP_REQUIRES( - ctx, stride > 0 && stride < window_size, - errors::InvalidArgument("Stride must be in [1, window_size).")); - - *output = new Dataset(ctx, window_size, stride, input); - } - - private: - class Dataset : public GraphDatasetBase { - public: - Dataset(OpKernelContext* ctx, int64 window_size, int64 stride, const DatasetBase* input) - : GraphDatasetBase(ctx), window_size_(window_size), stride_(stride), input_(input) { - input_->Ref(); - - const auto& input_shapes = input_->output_shapes(); - output_shapes_.reserve(input_shapes.size()); - for (const auto& input_shape : input_shapes) { - output_shapes_.emplace_back( - PartialTensorShape({-1}).Concatenate(input_shape)); - } - } - - ~Dataset() override { input_->Unref(); } - - std::unique_ptr MakeIterator( - const string& prefix) const override { - return std::unique_ptr(new Iterator( - Iterator::Params{this, strings::StrCat(prefix, "::Slide")})); - } - - const DataTypeVector& output_dtypes() const override { - return input_->output_dtypes(); - } - - const std::vector& output_shapes() const override { - return output_shapes_; - } - - string DebugString() override { - return strings::StrCat("SlideDatasetOp(", window_size_, ", ", stride_, ")::Dataset"); - } - - protected: - Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); - Node* window_size = nullptr; - Node* stride = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size)); - TF_RETURN_IF_ERROR(b->AddScalar(stride_, &stride)); - TF_RETURN_IF_ERROR( - b->AddDataset(this, {input_graph_node, window_size, stride}, output)); - return Status::OK(); - } - - private: - - class Iterator : public DatasetIterator { - public: - explicit Iterator(const Params& params) - : DatasetIterator(params), - input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} - - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - const int64 window_size = dataset()->window_size_; - const int64 stride = dataset()->stride_; - std::vector> batch_elements; - { - mutex_lock l(mu_); - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } - batch_elements.reserve(window_size); - const bool first_call = cache_.empty(); - if (first_call) { - cache_.reserve(window_size); - } else { - // Reuse cache in the previous iteration. - cache_.swap(batch_elements); - } - // Fill up with new elements. - *end_of_sequence = false; - for (size_t i = batch_elements.size(); i < window_size && !*end_of_sequence; - ++i) { - std::vector batch_element_tuple; - TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, - end_of_sequence)); - if (!*end_of_sequence) { - batch_elements.push_back(std::move(batch_element_tuple)); - } else { - input_impl_.reset(); - } - } - // Drop the final smaller blocks. - if (batch_elements.size() < window_size) { - DCHECK(*end_of_sequence); - return Status::OK(); - } - // Cache the data used for the next iteration. - for (size_t i = stride; i < window_size; ++i) { - cache_.emplace_back(batch_elements[i]); - } - } - - // Construct output tensors. - // Those codes below are copied from batch_dataset_op.cc. - const size_t num_tuple_components = batch_elements[0].size(); - const int64 num_batch_elements = batch_elements.size(); - for (size_t component_index = 0; component_index < num_tuple_components; - ++component_index) { - const Tensor& first_element = batch_elements[0][component_index]; - TensorShape batch_component_shape({num_batch_elements}); - batch_component_shape.AppendShape(first_element.shape()); - Tensor batch_component(cpu_allocator(), first_element.dtype(), - batch_component_shape); - // Build the output tuple component by copying one slice - // from each input element in the batch. - for (size_t i = 0; i < num_batch_elements; ++i) { - if (batch_elements[i][component_index].shape() != - first_element.shape()) { - return errors::InvalidArgument( - "Cannot batch tensors with different shapes in component ", - component_index, ". First element had shape ", - first_element.shape().DebugString(), " and element ", i, - " had shape ", - batch_elements[i][component_index].shape().DebugString(), - "."); - } - TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice( - std::move(batch_elements[i][component_index]), &batch_component, - i)); - } - out_tensors->emplace_back(std::move(batch_component)); - } - *end_of_sequence = false; - return Status::OK(); - } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - if (!input_impl_) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impl_empty"), "")); - } else { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - } - // Save cache. - TF_RETURN_IF_ERROR( - writer->WriteScalar(strings::StrCat("cache_size"), cache_.size())); - for (int64 i = 0; i < cache_.size(); i++) { - TF_RETURN_IF_ERROR(writer->WriteScalar( - strings::StrCat("cache[", i, "]_size"), cache_[i].size())); - for (int64 j = 0; j < cache_[i].size(); j++) { - TF_RETURN_IF_ERROR(writer->WriteTensor( - strings::StrCat("cache[", i, "][", j, "]"), cache_[i][j])); - } - } - return Status::OK(); - } - - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - if (!reader->Contains(full_name("input_impl_empty"))) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } else { - input_impl_.reset(); - } - // Restore cache. - int64 cache_size; - TF_RETURN_IF_ERROR( - reader->ReadScalar(strings::StrCat("cache_size"), &cache_size)); - cache_.resize(cache_size); - for (int64 i = 0; i < cache_size; i++) { - int64 vector_size; - TF_RETURN_IF_ERROR(reader->ReadScalar( - strings::StrCat("cache[", i, "]_size"), &vector_size)); - cache_[i].resize(vector_size); - for (int64 j = 0; j < vector_size; j++) { - TF_RETURN_IF_ERROR(reader->ReadTensor( - strings::StrCat("cache[", i, "][", j, "]"), &cache_[i][j])); - } - } - return Status::OK(); - } - - private: - mutex mu_; - std::vector> cache_ GUARDED_BY(mu_); - std::unique_ptr input_impl_ GUARDED_BY(mu_); - }; - - const int64 window_size_; - const int64 stride_; - const DatasetBase* const input_; - std::vector output_shapes_; - }; -}; - -REGISTER_KERNEL_BUILDER(Name("SlideDataset").Device(DEVICE_CPU), - SlideDatasetOp); - -} // namespace - -} // namespace tensorflow diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index b74a09e2cb..39aa3e9eb0 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -187,9 +187,6 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); -REGISTER_KERNEL_BUILDER( - Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), - DepthToSpaceOp); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 0656081177..184c703599 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -238,12 +238,6 @@ struct DepthToSpaceOpFunctor { template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; -// Instantiate the GPU implementations for Eigen::half. -template struct functor::DepthToSpaceOpFunctor; -template struct functor::DepthToSpaceOpFunctor; - // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 7688305019..108d59db2c 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -45,7 +45,6 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/kernels:cwise_op", - "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:reduction_ops", "//tensorflow/core/kernels:remote_fused_graph_execute_utils", diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index e0706568b1..1401bc65a4 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -444,7 +444,6 @@ class MklConv2DCustomBackpropFilterOp ~MklConv2DCustomBackpropFilterOp() {} private: - const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -493,9 +492,7 @@ class MklConv2DCustomBackpropFilterOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, - const memory::dims& strides, - const memory::dims& dilations, + Tensor** output_tensor, const memory::dims& strides, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -521,32 +518,31 @@ class MklConv2DCustomBackpropFilterOp bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x); } - if (biasEnabled && (bias_grad != nullptr)) { - // Create convolution backward weights with bias primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding) : - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, - bwd_output_format, output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); + // Create convolution backward weights primitive. + auto bwd_desc = + (biasEnabled && (bias_grad != nullptr)) + ? convolution_backward_weights::desc( + convolution_direct, input->GetOpMemDesc(), + output->GetOpMemDesc(), bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, + padding) + : convolution_backward_weights::desc( + convolution_direct, input->GetOpMemDesc(), + output->GetOpMemDesc(), outbackprop->GetOpMemDesc(), strides, + padding_l, padding_r, padding); + + auto bwd_pd = convolution_backward_weights::primitive_desc( + bwd_desc, cpu_engine, conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, bwd_output_format, + output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); + if (biasEnabled && (bias_grad != nullptr)) { // Allocate bias_grad tensor TensorShape bias_grad_shape({depth}); Tensor* bias_grad_tensor = nullptr; @@ -557,32 +553,11 @@ class MklConv2DCustomBackpropFilterOp memory::desc({bias_grad_dims}, MklDnnType(), memory::format::x); bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor); bias_grad->SetUsrMemDataHandle(bias_grad_tensor); + } - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, - bias_grad); + if (biasEnabled && (bias_grad != nullptr)) { + PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad); } else { - // Create convolution backward weights primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding) : - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, - bwd_output_format, output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output); } } diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index d203c04934..eeed009531 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -369,7 +369,6 @@ class MklConv2DCustomBackpropInputOp private: const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0, kInputIndex_OutBackProp = 2; - const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -420,9 +419,7 @@ class MklConv2DCustomBackpropInputOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, - const memory::dims& strides, - const memory::dims& dilations, + Tensor** output_tensor, const memory::dims& strides, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -435,16 +432,9 @@ class MklConv2DCustomBackpropInputOp CHECK_NOTNULL(output_tensor); // Create convolution backward data primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_data::desc(convolution_direct, - output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding): - convolution_backward_data::desc(convolution_direct, - output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); + auto bwd_desc = convolution_backward_data::desc( + convolution_direct, output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, padding); auto bwd_pd = convolution_backward_data::primitive_desc( bwd_desc, cpu_engine, conv_fwd_pd); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f0818eb96d..1440da8f82 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -493,7 +493,6 @@ class MklConv2DOp : public OpKernel { ~MklConv2DOp() {} explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); @@ -510,20 +509,6 @@ class MklConv2DOp : public OpKernel { errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); - OP_REQUIRES(context, dilations_.size() == 4, - errors::InvalidArgument("Sliding window dilations field must " - "specify 4 dimensions")); - const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N'); - const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C'); - const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H'); - const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W'); - OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, - errors::InvalidArgument( - "Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); - OP_REQUIRES( - context, dilation_h > 0 && dilation_w > 0, - errors::InvalidArgument("Dilated rates should be larger than 0.")); } void Compute(OpKernelContext* context) override { @@ -545,19 +530,17 @@ class MklConv2DOp : public OpKernel { MklDnnData filter(&cpu_engine); MklDnnData output(&cpu_engine); - memory::dims src_dims, filter_dims, padding_l, padding_r, - dilations, strides; + memory::dims src_dims, filter_dims, padding_l, padding_r, strides; memory::dims output_dims_tf_order, output_dims_mkl_order; // Get shapes of input tensors in MKL-DNN order - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, - dilations_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); conv_utl.GetConvFwdSizesInMklOrder( src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, - &dilations, &output_dims_tf_order, &output_dims_mkl_order, - &padding_l, &padding_r); + &output_dims_tf_order, &output_dims_mkl_order, &padding_l, + &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -570,7 +553,6 @@ class MklConv2DOp : public OpKernel { // Need semantics for Null MKL tensor MklDnnShape output_mkl_shape; output_mkl_shape.SetMklTensor(false); - AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor, src_tf_shape, output_mkl_shape); @@ -614,79 +596,55 @@ class MklConv2DOp : public OpKernel { filter.SetOpMemDesc(filter_dims, memory::format::any); output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - // MKLDNN dilation starts from 0. - dilations[kDilationH] -= 1; - dilations[kDilationW] -= 1; - + // If bias is enabled, then do the same steps as above for bias. if (biasEnabled) { - // Create convolution primitive with Bias. - MklDnnData bias(&cpu_engine); - memory::dims bias_size; - conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); - const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); - bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); - bias.SetOpMemDesc(bias_size, memory::format::any); - - // Create convolution primitive with Bias. - // Use MKLDNN dilated convolution in case of dilated rate (>0). - auto conv_desc = (dilations[kDilationH] > 0 || - dilations[kDilationW] > 0) ? - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), - output.GetOpMemDesc(), strides, dilations, - padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)): - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), - output.GetOpMemDesc(), strides, - padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, - cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, - output_dims_mkl_order, tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, - filter_out_tensor); + MklDnnData bias(&cpu_engine); + memory::dims bias_size; + conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); + const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); + bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); + bias.SetOpMemDesc(bias_size, memory::format::any); + + // Create convolution primitive with Bias. + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, + filter_out_tensor); } else { - // Create convolution primitive without Bias. - // Use MKLDNN dilated convolution in case of dilated rate (>0). - auto conv_desc = (dilations[kDilationH] > 0 || - dilations[kDilationW] > 0) ? - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)): - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, - cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, - nullptr, &output, filter_out_tensor); + // Create convolution primitive without Bias. + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output, + filter_out_tensor); } } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -700,12 +658,10 @@ class MklConv2DOp : public OpKernel { private: std::vector strides_; - std::vector dilations_; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; - const int kDilationH = 0, kDilationW = 1; // Allocate output tensor. void AllocateOutputTensor( diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 7ca10db895..9dd88221a8 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -58,16 +58,13 @@ class MklDnnConvUtil { protected: OpKernelContext* context_; // We don't own this. std::vector strides_; - std::vector dilations_; Padding padding_; TensorFormat data_format_; public: MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, - Padding pad, TensorFormat fm, - const std::vector& dilations) : - context_(context), strides_(strides), padding_(pad), - data_format_(fm), dilations_(dilations) {} + Padding pad, TensorFormat fm) + : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -81,16 +78,6 @@ class MklDnnConvUtil { *strides = {stride_rows, stride_cols}; } - // Calculate Convolution dilations - virtual inline void GetDilationsInMklOrder(memory::dims *dilations) { - // For now we take the dilation from the second and third dimensions only - // (we do not support dilation on the batch or depth dimension). - CHECK_NOTNULL(dilations); - int dilations_rows = GetTensorDim(dilations_, data_format_, 'H'); - int dilations_cols = GetTensorDim(dilations_, data_format_, 'W'); - *dilations = {dilations_rows, dilations_cols}; - } - // Calculate Convolution input size in MKL-DNN order. MKL-DNN // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's @@ -226,8 +213,7 @@ class MklDnnConvUtil { // TODO(nhasabni): Add similar function for input and filter in MklShape. virtual inline void GetOutputAndPadSizeInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, - const memory::dims& strides, const memory::dims& dilations, - memory::dims* output_dims_tf_order, + const memory::dims& strides, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -246,8 +232,6 @@ class MklDnnConvUtil { // Stride is vector of 2 elements: {s_r, s_c} int stride_rows = strides[0]; int stride_cols = strides[1]; - int dilation_rows = dilations[0]; - int dilation_cols = dilations[1]; // Output batch is same as input batch. int out_batch = GetTensorDim(input_shape, data_format_, 'N'); @@ -257,13 +241,11 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, - GetWindowedOutputSizeVerboseV2(input_rows, filter_rows, - dilation_rows, stride_rows, padding_, + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_rows, filter_rows, stride_rows, padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, - GetWindowedOutputSizeVerboseV2(input_cols, filter_cols, - dilation_cols, stride_cols, padding_, + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_cols, filter_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) @@ -289,8 +271,7 @@ class MklDnnConvUtil { // // Function does not return anything, but sets error in context status. inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, - const memory::dims& strides, const memory::dims& dilations, + size_t src_index, size_t filter_index, const memory::dims& strides, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -305,9 +286,9 @@ class MklDnnConvUtil { errors::InvalidArgument("input must be 4-dimensional", input_tf_shape.DebugString())); - GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, - strides, dilations, output_dims_tf_order, - output_dims_mkl_order, pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, strides, + output_dims_tf_order, output_dims_mkl_order, + pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -319,14 +300,12 @@ class MklDnnConvUtil { inline void GetConvFwdSizesInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, memory::dims* input_dims, memory::dims* filter_dims, - memory::dims* strides, memory::dims *dilations, - memory::dims* output_dims_tf_order, + memory::dims* strides, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); - CHECK_NOTNULL(dilations); CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -337,9 +316,7 @@ class MklDnnConvUtil { GetFilterSizeInMklOrder(input_shape, filter_shape, filter_dims); if (!context_->status().ok()) return; GetStridesInMklOrder(strides); - GetDilationsInMklOrder(dilations); - GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, - *strides, *dilations, + GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; @@ -367,21 +344,7 @@ class MklConv2DBackpropCommonOp : public OpKernel { context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); - OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); - OP_REQUIRES(context, dilations_.size() == 4, - errors::InvalidArgument("Sliding window dilations field must " - "specify 4 dimensions")); - int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); - int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); - int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); - int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); - OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), - errors::InvalidArgument( - "Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); - OP_REQUIRES( - context, dilation_h > 0 && dilation_w > 0, - errors::InvalidArgument("Dilated rates should be larger than 0.")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -443,16 +406,15 @@ class MklConv2DBackpropCommonOp : public OpKernel { // By default, all dims are in MKL order. Only dims in TF order // are those with prefix tf_order. memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims; - memory::dims padding_l, padding_r, dilations, strides, fwd_output_dims; + memory::dims padding_l, padding_r, strides, fwd_output_dims; memory::dims fwd_output_dims_tf_order; // Get forward convolution parameters. - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, - dilations_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); conv_utl.GetConvFwdSizesInMklOrder( input_tf_shape, filter_tf_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, - &padding_l, &padding_r); + &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, + &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward @@ -475,21 +437,10 @@ class MklConv2DBackpropCommonOp : public OpKernel { memory::format::hwio); // Tensorflow Output of Conv2D is in data_format order. auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), tf_fmt); - - const int kDilationH = 0, kDilationW = 1; - dilations[kDilationH] -= 1; - dilations[kDilationW] -= 1; - auto fwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0)? - convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_input_md, - fwd_filter_md, fwd_out_md, - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)) : - convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_input_md, - fwd_filter_md, fwd_out_md, - strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto fwd_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, fwd_input_md, fwd_filter_md, + fwd_out_md, strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Create memory for user data. Describe how the inputs and outputs of @@ -534,9 +485,8 @@ class MklConv2DBackpropCommonOp : public OpKernel { // Operator-specific call to create and execute primitive. CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter, - &outbackprop, &output, &output_tensor, - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_), + &outbackprop, &output, &output_tensor, strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_), bwd_output_dims, bwd_output_format); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -585,21 +535,20 @@ class MklConv2DBackpropCommonOp : public OpKernel { virtual memory::format GetOutputFormat(const memory::format data_format) = 0; /// Create and execute the primitive storing output in the output_tensor. - virtual void CreatePrimitive(OpKernelContext* context, - const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, - MklDnnData* output, Tensor** output_tensor, const memory::dims& strides, - const memory::dims& dilations, const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) = 0; + virtual void CreatePrimitive( + OpKernelContext* context, const engine& cpu_engine, + const convolution_forward::primitive_desc& conv_fwd_pd, + MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, + MklDnnData* output, Tensor** output_tensor, + const memory::dims& strides, const memory::dims& padding_l, + const memory::dims& padding_r, padding_kind padding, + const memory::dims& bwd_output_dims, + memory::format bwd_output_format) = 0; // Get the data_format {NCHW, NHWC} TensorFormat GetTFDataFormat() { return data_format_; } private: - std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index d91f7107c5..e9a2376b54 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -442,11 +442,12 @@ class MklInputConversionOp : public OpKernel { auto input_tf_md = mkl_output_mkl_shape.GetTfLayout(); tf_input.SetUsrMem(input_tf_md, tf_tensor); - // Create reorder between tensorflow layout and Mkl layout if necessary + // Create reorder between tensorflow layout and Mkl layout. std::vector net; - tf_input.CheckReorderToOpMem( + CHECK_EQ(tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), - tensor_out, &net); + tensor_out, &net), + true); stream(stream::kind::eager).submit(net).wait(); // -- The tensor in MKL format passes through -- diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 0a0f69522f..267f4f8d12 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -392,7 +392,7 @@ class MklReluOpBase : public OpKernel { Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } @@ -437,15 +437,11 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - - // Allocate output and MklDnnShape tensors separately for possible - // in-place operation - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {src_index}, dst_index, tf_shape_dst, &dst_tensor)); - AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); + AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst, + dnn_shape_dst); // Destination memory descriptor is same as source memory descriptor. - auto &dst_md = src_md; + auto dst_md = src_md; dst.SetUsrMem(dst_md, dst_tensor); // execute net @@ -496,7 +492,7 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } @@ -607,13 +603,8 @@ class MklReluGradOpBase : public OpKernel { // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - - // Allocate diff_src and MklDnnShape tensors separately for possible - // in-place operation - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {diff_dst_index}, diff_src_index, tf_shape_diff_src, - &diff_src_tensor)); - AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); + AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, + tf_shape_diff_src, dnn_shape_diff_src); // diff_src memory descriptor is same as memory descriptor for both // inputs. diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index d0703d7576..4abfbfb1a6 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -13,16 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ -#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ - - -// This file requires the following include because it uses CudaAtomicMax: -// #include "tensorflow/core/util/cuda_kernel_helper.h" - -// Unfortunately we can't add the #include, since it breaks compilation for -// non-GPU targets. This only breaks in clang, because it's more strict for -// template code and CudaAtomicMax is used in template context. +#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -138,4 +130,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index e59adfc6ac..23df1c35e5 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -187,9 +187,6 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); -REGISTER_KERNEL_BUILDER( - Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), - SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index f38459724a..db05ca1ed2 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -234,12 +234,6 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; -// Instantiate the GPU implementations for Eigen::half. -template struct functor::SpaceToDepthOpFunctor; -template struct functor::SpaceToDepthOpFunctor; - // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 0b006fa2b4..5bd79778a6 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -55,4 +55,6 @@ StringPiece StringPiece::substr(size_t pos, size_t n) const { return StringPiece(data_ + pos, n); } +const StringPiece::size_type StringPiece::npos = size_type(-1); + } // namespace tensorflow diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 2d00f717dc..910e4d9e2a 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -65,7 +65,7 @@ class StringPiece { iterator begin() const { return data_; } iterator end() const { return data_ + size_; } - static const size_t npos = size_type(-1); + static const size_t npos; // Return the ith byte in the referenced data. // REQUIRES: n < size() diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 6de850bb20..254fdf115d 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -205,9 +205,7 @@ Status RecordReader::SkipNBytes(uint64 offset) { if (options_.buffer_size > 0) { TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); } -#if !defined(IS_SLIM_BUILD) } -#endif return Status::OK(); } // namespace io diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 26278e0328..62dd2efb79 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LIB_IO_RECORD_READER_H_ #define TENSORFLOW_LIB_IO_RECORD_READER_H_ -#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/io/inputstream_interface.h" #if !defined(IS_SLIM_BUILD) +#include "tensorflow/core/lib/io/inputstream_interface.h" #include "tensorflow/core/lib/io/zlib_compression_options.h" #include "tensorflow/core/lib/io/zlib_inputstream.h" #endif // IS_SLIM_BUILD diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 9a4b616e5d..bdbbf6d7c3 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -265,16 +265,6 @@ REGISTER_OP("BatchDataset") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::ScalarShape); -// TODO(mrry): move SlideDataset to contrib in the future. -REGISTER_OP("SlideDataset") - .Input("input_dataset: variant") - .Input("window_size: int64") - .Input("stride: int64") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); - REGISTER_OP("PaddedBatchDataset") .Input("input_dataset: variant") .Input("batch_size: int64") diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index d6a0f38033..910fbaca9e 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1498,7 +1498,6 @@ REGISTER_OP("_MklConv2D") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( MKL version of Conv2D operator. Uses MKL DNN APIs to perform 2D convolution. @@ -1517,7 +1516,6 @@ REGISTER_OP("__MklDummyConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node does not perform anything. It is just created as an intermediate output of @@ -1543,7 +1541,6 @@ REGISTER_OP("_MklConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2D and BiasAdd operator. Uses MKL DNN APIs to perform 2D convolution and add Bias to the output of convolution. @@ -1566,7 +1563,6 @@ REGISTER_OP("_MklConv2DBackpropFilter") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -1593,7 +1589,6 @@ REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1638,7 +1633,6 @@ REGISTER_OP("_MklConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1674,7 +1668,6 @@ REGISTER_OP("_MklConv2DWithBiasBackpropBias") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2DBackpropBias. Uses MKL DNN APIs to compute the gradients of convolution with respect to the bias. @@ -1697,7 +1690,6 @@ REGISTER_OP("_MklConv2DBackpropInput") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h index eebbeaeba6..8f7bff1bb0 100644 --- a/tensorflow/core/platform/tracing.h +++ b/tensorflow/core/platform/tracing.h @@ -103,7 +103,7 @@ class Tracing { friend class ScopedAnnotation; friend class TraceMe; - TF_EXPORT static std::atomic tracing_engine_; + static std::atomic tracing_engine_; static Tracing::Engine* engine() { return tracing_engine_.load(std::memory_order_acquire); } diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index 682e46e0fc..b6b3722caa 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -382,8 +382,7 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( Status WindowsFileSystem::FileExists(const string& fname) { constexpr int kOk = 0; - std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); - if (_waccess(ws_translated_fname.c_str(), kOk) == 0) { + if (_access(TranslateName(fname).c_str(), kOk) == 0) { return Status::OK(); } return errors::NotFound(fname, " not found"); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 22f2c02b78..7405e01e14 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 6d0458e678..9f6fe91b14 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -51,8 +51,6 @@ Europe: TensorFlow provides multiple communication paths. To pick the right path, please read the following list carefully: - * For new release announcements and security updates, subscribe to - [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). * To ask or answer technical questions about TensorFlow, use [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). For example, ask or search Stack Overflow about a particular error message @@ -67,5 +65,5 @@ please read the following list carefully: on GitHub. For example, use the issue tracker to request a new operation in TensorFlow. * To report vulnerabilities, please follow our - [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). + [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/SECURITY.md). diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 0481c97885..818798555a 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 8f89898c92..4c6dfa8daf 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 0ee9c849e1..527884863e 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0 + 1.6.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0 + 1.6.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0 + 1.6.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.6.0 + 1.6.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
d If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 3e8744bf9d..e3e115d9f6 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -41,8 +41,7 @@ must be installed on your system: [NVIDIA's documentation](https://developer.nvidia.com/cudnn). Ensure that you create the `CUDA_HOME` environment variable as described in the NVIDIA documentation. - * GPU card with CUDA Compute Capability 3.0 or higher for building - from source and 3.5 or higher for our binaries. See + * GPU card with CUDA Compute Capability 3.0 or higher. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. @@ -189,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -294,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -357,23 +356,24 @@ where: to 6006. * TensorFlowCPUImage is required. It identifies the Docker container. Specify one of the following values: - * tensorflow/tensorflow, which is the TensorFlow CPU binary image. - * tensorflow/tensorflow:latest-devel, which is the latest + * gcr.io/tensorflow/tensorflow, which is the TensorFlow CPU binary image. + * gcr.io/tensorflow/tensorflow:latest-devel, which is the latest TensorFlow CPU Binary image plus source code. - * tensorflow/tensorflow:version, which is the + * gcr.io/tensorflow/tensorflow:version, which is the specified version (for example, 1.1.0rc1) of TensorFlow CPU binary image. - * tensorflow/tensorflow:version-devel, which is + * gcr.io/tensorflow/tensorflow:version-devel, which is the specified version (for example, 1.1.0rc1) of the TensorFlow GPU binary image plus source code. - TensorFlow images are available at + gcr.io is the Google Container Registry. Note that some + TensorFlow images are also available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches the latest TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ docker run -it tensorflow/tensorflow bash
+$ docker run -it gcr.io/tensorflow/tensorflow bash
 
The following command also launches the latest TensorFlow CPU binary image in a @@ -381,7 +381,7 @@ Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ docker run -it -p 8888:8888 tensorflow/tensorflow
+$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -405,14 +405,14 @@ where: hostPort and containerPort to `8888`. * TensorFlowGPUImage specifies the Docker container. You must specify one of the following values: - * tensorflow/tensorflow:latest-gpu, which is the latest + * gcr.io/tensorflow/tensorflow:latest-gpu, which is the latest TensorFlow GPU binary image. - * tensorflow/tensorflow:latest-devel-gpu, which is + * gcr.io/tensorflow/tensorflow:latest-devel-gpu, which is the latest TensorFlow GPU Binary image plus source code. - * tensorflow/tensorflow:version-gpu, which is the + * gcr.io/tensorflow/tensorflow:version-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image. - * tensorflow/tensorflow:version-devel-gpu, which is + * gcr.io/tensorflow/tensorflow:version-devel-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image plus source code. @@ -421,7 +421,7 @@ following command launches the latest TensorFlow GPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ nvidia-docker run -it tensorflow/tensorflow:latest-gpu bash
+$ nvidia-docker run -it gcr.io/tensorflow/tensorflow:latest-gpu bash
 
The following command also launches the latest TensorFlow GPU binary image @@ -429,13 +429,13 @@ in a Docker container. In this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu
+$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
 
The following command installs an older TensorFlow version (0.12.1):
-$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:0.12.1-gpu
+$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:0.12.1-gpu
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -505,7 +505,7 @@ If you installed through Docker, start a Docker container from which you can run bash. For example:
-$ docker run -it tensorflow/tensorflow bash
+$ docker run -it gcr.io/tensorflow/tensorflow bash
 
@@ -647,14 +647,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -666,14 +666,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -685,14 +685,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -704,14 +704,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 205db8e6bd..623ca6bb79 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -292,23 +292,24 @@ where: to 6006. * TensorFlowImage is required. It identifies the Docker container. You must specify one of the following values: - * tensorflow/tensorflow: TensorFlow binary image. - * tensorflow/tensorflow:latest-devel: TensorFlow + * gcr.io/tensorflow/tensorflow: TensorFlow binary image. + * gcr.io/tensorflow/tensorflow:latest-devel: TensorFlow Binary image plus source code. -The TensorFlow images are available at +gcr.io is the Google Container Registry. Note that some +TensorFlow images are also available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches a TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell: -
$ docker run -it tensorflow/tensorflow bash
+
$ docker run -it gcr.io/tensorflow/tensorflow bash
The following command also launches a TensorFlow CPU binary image in a Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook: -
$ docker run -it -p 8888:8888 tensorflow/tensorflow
+
$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
Docker will download the TensorFlow binary image the first time you launch it. @@ -350,7 +351,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl @@ -375,7 +376,7 @@ do the following: If you installed through Docker, start a Docker container that runs bash. For example: -
$ docker run -it tensorflow/tensorflow bash
+
$ docker run -it gcr.io/tensorflow/tensorflow bash
@@ -523,7 +524,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
 
@@ -531,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index c09c9c2c0c..acf0af0d9d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0 on Linux: +for TensorFlow 1.6.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 2413bc9cfb..f0a30ee394 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -17,7 +17,7 @@ You must choose one of the following types of TensorFlow to install: NVIDIA® GPU, you must install this version. Note that this version of TensorFlow is typically much easier to install (typically, in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. Prebuilt binaries will use AVX instructions. + installing this version first. * **TensorFlow with GPU support**. TensorFlow programs typically run significantly faster on a GPU than on a CPU. Therefore, if your system has a NVIDIA® GPU meeting the prerequisites shown below @@ -41,8 +41,7 @@ installed on your system: Note that cuDNN is typically installed in a different location from the other CUDA DLLs. Ensure that you add the directory where you installed the cuDNN DLL to your `%PATH%` environment variable. - * GPU card with CUDA Compute Capability 3.0 or higher for building - from source and 3.5 or higher for our binaries. See + * GPU card with CUDA Compute Capability 3.0 or higher. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. diff --git a/tensorflow/docs_src/performance/xla/jit.md b/tensorflow/docs_src/performance/xla/jit.md index d9a979ccbd..d4dc3e57c8 100644 --- a/tensorflow/docs_src/performance/xla/jit.md +++ b/tensorflow/docs_src/performance/xla/jit.md @@ -157,7 +157,7 @@ to fuse Ops is visible by starting at `hlo_graph_0.dot` and viewing each diagram in succession. To Render the .dot file into a png, install -[GraphViz](https://www.graphviz.org/download/) and run: +[GraphViz](http://www.graphviz.org/Download..php) and run: ```shell dot -Tpng hlo_graph_80.dot -o hlo_graph_80.png diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index d1399814ee..5fb1c2da88 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -459,7 +459,7 @@ accuracy_score = classifier.evaluate(x=test_set.data, [debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_tflearn_iris.py), -based on [tf-learn's iris tutorial](https://www.tensorflow.org/versions/r1.2/get_started/tflearn), contains a full example of how to +based on {$tflearn$tf-learn's iris tutorial}, contains a full example of how to use the tfdbg with `Estimator`s. To run this example, do: ```none @@ -753,7 +753,6 @@ There are three possible workarounds or solutions: # For LocalCLIDebugHook hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")] ``` - Make sure that the directory pointed to by dump_root is empty or nonexistent. tfdbg cleans up the dump directories before exiting. * Reduce the batch size used during the runs. diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 392ac6f7f1..1548d43877 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -159,7 +159,7 @@ available. These operations allow you to build sophisticated @{$reading_data$input pipelines}, at the cost of making the TensorFlow computation somewhat more complicated. See the how-to documentation for -@{$reading_data#creating_threads_to_prefetch_using_queuerunner_objects$using `QueueRunner` objects to drive queues and readers} +@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using `QueueRunner` objects to drive queues and readers} for more information on how to use them. ## Variables @@ -272,7 +272,7 @@ Prefer predefined TensorFlow operations such as @{tf.decode_raw}, If your data is not easily parsable with the built-in TensorFlow operations, consider converting it, offline, to a format that is easily parsable, such -as @{tf.python_io.TFRecordWriter$`TFRecord`} format. +as ${tf.python_io.TFRecordWriter$`TFRecord`} format. The more efficient method to customize the parsing behavior is to @{$adding_an_op$add a new op written in C++} that parses your diff --git a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md index fadfa03e78..79280d246a 100644 --- a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md +++ b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md @@ -83,7 +83,7 @@ data than you need, though. Instead, consider running the merged summary op every `n` steps. The code example below is a modification of the -[simple MNIST tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py), +@{$layers$simple MNIST tutorial}, in which we have added some summary ops, and run them every ten steps. If you run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able to visualize statistics, such as how the weights or accuracy varied during diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index a9c2cb3e33..d74d7f3181 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -129,9 +129,10 @@ my_tpu_estimator = tf.contrib.tpu.TPUEstimator( Typically the `FLAGS` would be set by command line arguments. To switch from training locally to training on a cloud TPU you would need to: -* Set `FLAGS.use_tpu` to `True` -* Set `FLAGS.tpu_name` so the `tf.contrib.cluster_resolver.TPUClusterResolver` can find it -* Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). + 1) Set `FLAGS.use_tpu` to `True` + 1) Set `FLAGS.tpu_name` so the + `tf.contrib.cluster_resolver.TPUClusterResolver` can find it + 1) Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). ## Optimizer diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 9b17d0d4d5..ee03f440c9 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -193,7 +193,7 @@ to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). +Classifier"](#training-and-evaluating-the-cnn-mnist-classifier). ### Input Layer @@ -446,7 +446,7 @@ tf.nn.softmax(logits, name="softmax_tensor") > Note: We use the `name` argument to explicitly name this operation > `softmax_tensor`, so we can reference it later. (We'll set up logging for the -> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook)). +> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook). We compile our predictions in a dict, and return an `EstimatorSpec` object: @@ -534,8 +534,9 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining_the_training_op_for_the_model$"Defining the training op for the model"} -> in the @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} tutorial. +> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining +> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in +> tf.estimator"} tutorial. ### Add evaluation metrics @@ -624,8 +625,7 @@ operation earlier when we generated the probabilities in `cnn_model_fn`. > Note: If you don't explicitly assign a name to an operation via the `name` > argument, TensorFlow will assign a default name. A couple easy ways to > discover the names applied to operations are to visualize your graph on -> @{$graph_viz$TensorBoard}) or to enable the -> @{$programmers_guide/debugger$TensorFlow Debugger (tfdbg)}. +> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger (tfdbg)}. Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index 7584a76ba5..e22536adb6 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -109,8 +109,7 @@ This download will take a while and download a bit more than 23GB of data. To convert the `ndjson` files to @{$python/python_io#tfrecords_format_details$TFRecord} files containing -[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) -protos run the following command. +${tf.train.Example} protos run the following command. ```shell python create_dataset.py --ndjson_path rnn_tutorial_data \ diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index 27ce75a30d..005dc020f9 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -74,8 +74,8 @@ Here's a list of columns available in the Census Income dataset: | relationship | Categorical | Wife, Own-child, Husband, | : : : Not-in-family, Other-relative, : : : : Unmarried. : -| race | Categorical | Amer-Indian-Eskimo, Asian-Pac- | -: : : Islander, Black, White, Other. : +| race | Categorical | White, Asian-Pac-Islander, | +: : : Amer-Indian-Eskimo, Other, Black. : | gender | Categorical | Female, Male. | | capital_gain | Continuous | Capital gains recorded. | | capital_loss | Continuous | Capital Losses recorded. | @@ -247,7 +247,7 @@ hours_per_week = tf.feature_column.numeric_column('hours_per_week') ### Making Continuous Features Categorical through Bucketization Sometimes the relationship between a continuous feature and the label is not -linear. As a hypothetical example, a person's income may grow with age in the +linear. As an hypothetical example, a person's income may grow with age in the early stage of one's career, then the growth may slow at some point, and finally the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can @@ -361,16 +361,6 @@ The first line of the final output should be something like `accuracy: 0.83557522`, which means the accuracy is 83.6%. Feel free to try more features and transformations and see if you can do even better! -After the model is evaluated, we can use the model to predict whether an individual has an annual income of over -50,000 dollars given an individual's information input. -```python - pred_iter = model.predict(input_fn=lambda: input_fn(FLAGS.test_data, 1, False, 1)) - for pred in pred_iter: - print(pred['classes']) -``` - -The model prediction output would be like `[b'1']` or `[b'0']` which means whether corresponding individual has an annual income of over 50,000 dollars or not. - If you'd like to see a working end-to-end example, you can download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) and set the `model_type` flag to `wide`. diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index 5c47ce6b67..bb75431a1f 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -40,7 +40,6 @@ - @@ -50,7 +49,6 @@ - @@ -60,7 +58,6 @@ - @@ -70,7 +67,6 @@ - diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java index 429138abe5..8bd4abb154 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java @@ -351,10 +351,6 @@ public abstract class CameraActivity extends Activity protected void setFragment() { String cameraId = chooseCamera(); - if (cameraId == null) { - Toast.makeText(this, "No Camera Detected", Toast.LENGTH_SHORT).show(); - finish(); - } Fragment fragment; if (useCamera2API) { @@ -420,8 +416,7 @@ public abstract class CameraActivity extends Activity @Override public boolean onKeyDown(final int keyCode, final KeyEvent event) { - if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP - || keyCode == KeyEvent.KEYCODE_BUTTON_L1 || keyCode == KeyEvent.KEYCODE_DPAD_CENTER) { + if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP) { debug = !debug; requestRender(); onSetDebug(debug); diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java index 33ec65e9f7..6a66ec3927 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java @@ -16,10 +16,8 @@ package org.tensorflow.demo; -import android.app.UiModeManager; import android.content.Context; import android.content.res.AssetManager; -import android.content.res.Configuration; import android.graphics.Bitmap; import android.graphics.Bitmap.Config; import android.graphics.BitmapFactory; @@ -33,11 +31,9 @@ import android.graphics.Typeface; import android.media.ImageReader.OnImageAvailableListener; import android.os.Bundle; import android.os.SystemClock; -import android.util.DisplayMetrics; import android.util.Size; import android.util.TypedValue; import android.view.Display; -import android.view.KeyEvent; import android.view.MotionEvent; import android.view.View; import android.view.View.OnClickListener; @@ -47,7 +43,6 @@ import android.widget.BaseAdapter; import android.widget.Button; import android.widget.GridView; import android.widget.ImageView; -import android.widget.RelativeLayout; import android.widget.Toast; import java.io.IOException; import java.io.InputStream; @@ -386,27 +381,6 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL grid = (GridView) findViewById(R.id.grid_layout); grid.setAdapter(adapter); grid.setOnTouchListener(gridTouchAdapter); - - // Change UI on Android TV - UiModeManager uiModeManager = (UiModeManager) getSystemService(UI_MODE_SERVICE); - if (uiModeManager.getCurrentModeType() == Configuration.UI_MODE_TYPE_TELEVISION) { - DisplayMetrics displayMetrics = new DisplayMetrics(); - getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); - int styleSelectorHeight = displayMetrics.heightPixels; - int styleSelectorWidth = displayMetrics.widthPixels - styleSelectorHeight; - RelativeLayout.LayoutParams layoutParams = new RelativeLayout.LayoutParams(styleSelectorWidth, ViewGroup.LayoutParams.MATCH_PARENT); - - // Calculate number of style in a row, so all the style can show up without scrolling - int numOfStylePerRow = 3; - while (styleSelectorWidth / numOfStylePerRow * Math.ceil((float) (adapter.getCount() - 2) / numOfStylePerRow) > styleSelectorHeight) { - numOfStylePerRow++; - } - grid.setNumColumns(numOfStylePerRow); - layoutParams.addRule(RelativeLayout.ALIGN_PARENT_RIGHT); - grid.setLayoutParams(layoutParams); - adapter.buttons.clear(); - } - setStyle(adapter.items[0], 1.0f); } @@ -628,38 +602,4 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL borderedText.drawLines(canvas, 10, canvas.getHeight() - 10, lines); } - - @Override - public boolean onKeyDown(int keyCode, KeyEvent event) { - int moveOffset = 0; - switch (keyCode) { - case KeyEvent.KEYCODE_DPAD_LEFT: - moveOffset = -1; - break; - case KeyEvent.KEYCODE_DPAD_RIGHT: - moveOffset = 1; - break; - case KeyEvent.KEYCODE_DPAD_UP: - moveOffset = -1 * grid.getNumColumns(); - break; - case KeyEvent.KEYCODE_DPAD_DOWN: - moveOffset = grid.getNumColumns(); - break; - default: - return super.onKeyDown(keyCode, event); - } - - // get the highest selected style - int currentSelect = 0; - float highestValue = 0; - for (int i = 0; i < adapter.getCount(); i++) { - if (adapter.items[i].value > highestValue) { - currentSelect = i; - highestValue = adapter.items[i].value; - } - } - setStyle(adapter.items[(currentSelect + moveOffset + adapter.getCount()) % adapter.getCount()], 1); - - return true; - } } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3cbeb34c54..a206685af6 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -28,7 +28,6 @@ load("//tensorflow:tensorflow.bzl", "py_tests") load("//tensorflow:tensorflow.bzl", "tf_py_build_info_genrule") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") -load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") @@ -107,19 +106,20 @@ py_library( ":training", ":util", ":weights_broadcast_ops", - "//tensorflow/contrib:contrib_py", + "//third_party/py/numpy", "//tensorflow/core:protos_all_py", "//tensorflow/python/data", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras", + "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", - "//tensorflow/python/ops/losses", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", - "//third_party/py/numpy", - ], + ] + if_not_windows([ + "//tensorflow/contrib:contrib_py", + ]), ) tf_py_build_info_genrule() @@ -947,6 +947,7 @@ py_test( srcs = ["framework/contrib_test.py"], main = "framework/contrib_test.py", srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", @@ -1311,6 +1312,7 @@ py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -1652,6 +1654,7 @@ py_test( size = "small", srcs = ["ops/clip_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":client_testlib", ":clip_ops", @@ -2718,6 +2721,7 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, + tags = ["no_windows"], ) cuda_py_test( @@ -3301,65 +3305,6 @@ tf_py_wrap_cc( tf_additional_gdr_deps()), ) -# ** Targets for Windows build (start) ** -# We need the following targets to expose symbols from _pywrap_tensorflow.dll - -# Build a cc_binary from tf_custom_op_library_additional_deps_impl, -# it contains all object code from its dependencies. -cc_binary( - name = "tf_custom_op_library_additional_deps.so", - linkshared = 1, - linkstatic = 1, - deps = tf_custom_op_library_additional_deps_impl(), -) - -# Get a DEF file generated by parsing all object files -# of tf_custom_op_library_additional_deps.so -filegroup( - name = "pywrap_tensorflow_def_file", - srcs = [":tf_custom_op_library_additional_deps.so"], - output_group = "def_file", -) - -# Filter the DEF file to reduce the number of symbols to 64K or less. -# Note that we also write the name of the pyd file into DEF file so that -# the dynamic libraries of custom ops can find it at runtime. -genrule( - name = "pywrap_tensorflow_filtered_def_file", - srcs = [":pywrap_tensorflow_def_file"], - outs = ["pywrap_tensorflow_filtered_def_file.def"], - cmd = select({ - "//tensorflow:windows": """ - $(location @local_config_def_file_filter//:def_file_filter) \\ - --input $(location :pywrap_tensorflow_def_file) \\ - --output $@ \\ - --target _pywrap_tensorflow_internal.pyd - """, - "//conditions:default": "touch $@", # Just a placeholder for Unix platforms - }), - tools = ["@local_config_def_file_filter//:def_file_filter"], -) - -# Get the import library of _pywrap_tensorflow_internal.dll -filegroup( - name = "pywrap_tensorflow_import_lib_file", - srcs = [":_pywrap_tensorflow_internal.so"], - output_group = "interface_library", -) - -# Create a cc_import rule for the import library of _pywrap_tensorflow_internal.dll -# so that custom ops' dynamic libraries can link against it. -cc_import( - name = "pywrap_tensorflow_import_lib", - interface_library = select({ - "//tensorflow:windows": ":pywrap_tensorflow_import_lib_file", - "//conditions:default": "not_exsiting_on_unix.lib", # Just a placeholder for Unix platforms - }), - system_provided = 1, -) - -# ** Targets for Windows build (end) ** - py_library( name = "lib", srcs = [ @@ -3732,6 +3677,7 @@ py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":client_testlib", ":lib", @@ -4012,11 +3958,7 @@ py_test( srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", tags = [ - "manual", - "no_cuda_on_cpu_tap", - "no_oss", "no_windows", - "notap", ], deps = [ ":client", @@ -4039,6 +3981,7 @@ py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":checkpoint_ops_gen", ":client", @@ -4079,7 +4022,10 @@ py_test( size = "medium", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], # b/67945581 + tags = [ + "no_windows", + "notsan", # b/67945581 + ], deps = [ ":array_ops", ":client_testlib", diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index c60f692390..512d292ee2 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -913,7 +913,6 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 5245a050a1..9fcbd4ff77 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -139,8 +139,8 @@ class Estimator(object): to configure Estimators from hyper parameter tuning. * `config`: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. - Allows updating things in your `model_fn` based on - configuration such as `num_ps_replicas`, or `model_dir`. + Allows updating things in your model_fn based on configuration + such as `num_ps_replicas`, or `model_dir`. * Returns: `EstimatorSpec` @@ -301,11 +301,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where `features` is a `Tensor` or a - dictionary of string feature name to `Tensor` and `labels` is a + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. Both - `features` and `labels` are consumed by `model_fn`. They should - satisfy the expectation of `model_fn` from inputs. + features and labels are consumed by `model_fn`. They should satisfy + the expectation of `model_fn` from inputs. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the training loop. @@ -381,11 +381,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where `features` is a `Tensor` or a - dictionary of string feature name to `Tensor` and `labels` is a + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. Both - `features` and `labels` are consumed by `model_fn`. They should - satisfy the expectation of `model_fn` from inputs. + features and labels are consumed by `model_fn`. They should satisfy + the expectation of `model_fn` from inputs. steps: Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. @@ -457,17 +457,17 @@ class Estimator(object): checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. yield_single_examples: If False, yield the whole batch as returned by the - `model_fn` instead of decomposing the batch into individual elements. - This is useful if `model_fn` returns some tensors whose first dimension - is not equal to the batch size. + model_fn instead of decomposing the batch into individual elements. This + is useful if model_fn return some tensor with first dimension not + equal to the batch size Yields: Evaluated values of `predictions` tensors. Raises: - ValueError: Could not find a trained model in `model_dir`. - ValueError: If batch length of predictions is not the same and - `yield_single_examples` is True. + ValueError: Could not find a trained model in model_dir. + ValueError: if batch length of predictions are not same and + yield_single_examples is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index e38b765da5..2cc3331a15 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -128,16 +128,9 @@ class TrainSpec( """Creates a validated `TrainSpec` instance. Args: - input_fn: A function that provides input data for training as minibatches. - See @{$get_started/premade_estimators#create_input_functions} for more - information. The function should construct and return one of - the following: - * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a - tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a - `Tensor` or a dictionary of string label name to `Tensor`. - + input_fn: Training input function returning a tuple of: + features - `Tensor` or dictionary of string feature name to `Tensor`. + labels - `Tensor` or dictionary of `Tensor` with labels. max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the @@ -192,16 +185,9 @@ class EvalSpec( """Creates a validated `EvalSpec` instance. Args: - input_fn: A function that constructs the input data for evaluation. - See @{$get_started/premade_estimators#create_input_functions} for more - information. The function should construct and return one of - the following: - * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a - tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a - `Tensor` or a dictionary of string label name to `Tensor`. - + input_fn: Evaluation input function returning a tuple of: + features - `Tensor` or dictionary of string feature name to `Tensor`. + labels - `Tensor` or dictionary of `Tensor` with labels. steps: Int. Positive number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. See `Estimator.evaluate` for details. diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index f74881f179..eef91e9c5b 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -636,10 +636,7 @@ py_test( size = "small", srcs = ["_impl/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index f27ca5c205..5b0c38fa5d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -295,6 +295,7 @@ tf_py_test( "//tensorflow/python:nn_grad", ], data = ["//tensorflow/core:image_testdata"], + tags = ["no_windows"], ) tf_py_test( @@ -1138,6 +1139,7 @@ tf_py_test( "//tensorflow/python:variables", ], data = ["//tensorflow/core:lmdb_testdata"], + tags = ["no_windows"], ) cuda_py_test( @@ -2327,6 +2329,7 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 4, + tags = ["no_windows"], ) cuda_py_test( @@ -2457,6 +2460,7 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, + tags = ["no_windows"], ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 25525cc128..f4fe01f868 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -970,7 +970,7 @@ class Conv2DTest(test.TestCase): self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 6, 1], @@ -984,7 +984,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -998,7 +998,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -1012,7 +1012,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 4, 3], @@ -1026,7 +1026,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 3, 1], @@ -1040,7 +1040,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 6, 1], @@ -1054,7 +1054,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 2, 3, 1], @@ -1068,7 +1068,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[0, 2, 3, 1], @@ -1082,7 +1082,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): # The GPU version of this test is not very stable. So adjusting the # error threshold to 1e-4. @@ -1098,7 +1098,7 @@ class Conv2DTest(test.TestCase): err=1e-4) def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 3, 1], diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index f0beabb4e2..96c9718b83 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -35,8 +35,8 @@ from tensorflow.python.platform import tf_logging class DepthToSpaceTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): - input_nhwc = math_ops.cast(inputs, dtype) + def _testOne(self, inputs, block_size, outputs): + input_nhwc = math_ops.to_float(inputs) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.depth_to_space(input_nhwc, block_size) @@ -59,12 +59,6 @@ class DepthToSpaceTest(test.TestCase): x_out = [[[[1], [2]], [[3], [4]]]] self._testOne(x_np, block_size, x_out) - def testBasicFloat16(self): - x_np = [[[[1, 2, 3, 4]]]] - block_size = 2 - x_out = [[[[1], [2]], [[3], [4]]]] - self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) - # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testBlockSize2(self): diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index cd90d16aac..b76135764f 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -34,8 +34,8 @@ from tensorflow.python.platform import tf_logging class SpaceToDepthTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): - input_nhwc = math_ops.cast(inputs, dtype) + def _testOne(self, inputs, block_size, outputs): + input_nhwc = math_ops.to_float(inputs) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.space_to_depth(input_nhwc, block_size) @@ -58,12 +58,6 @@ class SpaceToDepthTest(test.TestCase): x_out = [[[[1, 2, 3, 4]]]] self._testOne(x_np, block_size, x_out) - def testBasicFloat16(self): - x_np = [[[[1], [2]], [[3], [4]]]] - block_size = 2 - x_out = [[[[1, 2, 3, 4]]]] - self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) - # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testLargerInput2x2(self): diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index 223858edfa..a751607aaa 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -485,11 +485,6 @@ class FileIoTest(test.TestCase): f.flush() self.assertEqual(content, f.read(len(content) + 1)) - def testUTF8StringPathExists(self): - file_path = os.path.join(self._base_dir, "UTF8测试_file_exist") - file_io.write_string_to_file(file_path, "testing") - v = file_io.file_exists(file_path) - self.assertEqual(v, True) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 87fe253f18..fb3fe77b4d 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -699,7 +699,7 @@ def convolution( `padded_input` is obtained by zero padding the input using an effective spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and output striding `strides` as described in the - @{$python/nn#Convolution$comment here}. + @{tf.nn.convolution$comment here}. In the case that `data_format` does start with `"NC"`, the `input` and output (but not the `filter`) are simply transposed as follows: diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 42af7f8b27..c59eccc174 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -867,7 +867,7 @@ def raw_rnn(cell, loop_fn, ```python time = tf.constant(0, dtype=tf.int32) - (finished, next_input, initial_state, emit_structure, loop_state) = loop_fn( + (finished, next_input, initial_state, _, loop_state) = loop_fn( time=time, cell_output=None, cell_state=None, loop_state=None) emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype) state = initial_state @@ -878,7 +878,7 @@ def raw_rnn(cell, loop_fn, loop_state=loop_state) # Emit zeros and copy forward state for minibatch entries that are finished. state = tf.where(finished, state, next_state) - emit = tf.where(finished, tf.zeros_like(emit_structure), emit) + emit = tf.where(finished, tf.zeros_like(emit), emit) emit_ta = emit_ta.write(time, emit) # If any new minibatch entries are marked as finished, mark these. finished = tf.logical_or(finished, next_finished) @@ -938,15 +938,10 @@ def raw_rnn(cell, loop_fn, and `emit_output`: the output to store for this iteration. Note that `emit_output` should be a `Tensor` or (possibly nested) - tuple of tensors which is aggregated in the `emit_ta` inside the - `while_loop`. For the first call to `loop_fn`, the `emit_output` - corresponds to the `emit_structure` which is then used to determine the - size of the `zero_tensor` for the `emit_ta` (defaults to - `cell.output_size`). For the subsequent calls to the `loop_fn`, the - `emit_output` corresponds to the actual output tensor - that is to be aggregated in the `emit_ta`. The parameter `cell_state` - and output `next_cell_state` may be either a single or (possibly nested) - tuple of tensors. The parameter `loop_state` and + tuple of tensors with shapes and structure matching `cell.output_size` + and `cell_output` above. The parameter `cell_state` and output + `next_cell_state` may be either a single or (possibly nested) tuple + of tensors. The parameter `loop_state` and output `next_loop_state` may be either a single or (possibly nested) tuple of `Tensor` and `TensorArray` objects. This last parameter may be ignored by `loop_fn` and the return value may be `None`. If it diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py index 5e2146b79f..6d7eaababc 100644 --- a/tensorflow/python/ops/special_math_ops.py +++ b/tensorflow/python/ops/special_math_ops.py @@ -163,7 +163,7 @@ def einsum(equation, *inputs, **kwargs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) + match = re.match('([a-z,]+)(->[a-z]*)?', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) @@ -402,7 +402,7 @@ def _exponential_space_einsum(equation, *inputs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) + match = re.match('([a-z,]+)(->[a-z]*)?', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py index d7c3a7e8dc..2c212f4548 100644 --- a/tensorflow/python/ops/special_math_ops_test.py +++ b/tensorflow/python/ops/special_math_ops_test.py @@ -192,9 +192,6 @@ class EinsumTest(test.TestCase): 'abc,cba', 'dba,ead,cad->bce', 'aef,fbc,dca->bde', - 'iJ,Jk->ik', - 'iJ,Ki->JK', - 'iJk,Jklm->Jk' ] long_cases = [ @@ -211,8 +208,6 @@ class EinsumTest(test.TestCase): 'ijk ijk', 'ij.jk->ik', 'ij...,jk...->ik...', - 'ij,k ->kji', - 'ij,k-> kji', # axis in output that does not exist 'ij,jk->im', diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index e9f1def48c..a52f325ddb 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -56,6 +56,8 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as saver_lib +FLAGS = None + def freeze_graph_with_def_protos(input_graph_def, input_saver_def, @@ -254,24 +256,25 @@ def freeze_graph(input_graph, checkpoint_version=checkpoint_version) -def main(unused_args, flags): - if flags.checkpoint_version == 1: +def main(unused_args): + if FLAGS.checkpoint_version == 1: checkpoint_version = saver_pb2.SaverDef.V1 - elif flags.checkpoint_version == 2: + elif FLAGS.checkpoint_version == 2: checkpoint_version = saver_pb2.SaverDef.V2 else: print("Invalid checkpoint version (must be '1' or '2'): %d" % - flags.checkpoint_version) + FLAGS.checkpoint_version) return -1 - freeze_graph(flags.input_graph, flags.input_saver, flags.input_binary, - flags.input_checkpoint, flags.output_node_names, - flags.restore_op_name, flags.filename_tensor_name, - flags.output_graph, flags.clear_devices, flags.initializer_nodes, - flags.variable_names_whitelist, flags.variable_names_blacklist, - flags.input_meta_graph, flags.input_saved_model_dir, - flags.saved_model_tags, checkpoint_version) + freeze_graph(FLAGS.input_graph, FLAGS.input_saver, FLAGS.input_binary, + FLAGS.input_checkpoint, FLAGS.output_node_names, + FLAGS.restore_op_name, FLAGS.filename_tensor_name, + FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes, + FLAGS.variable_names_whitelist, FLAGS.variable_names_blacklist, + FLAGS.input_meta_graph, FLAGS.input_saved_model_dir, + FLAGS.saved_model_tags, checkpoint_version) + -def run_main(): +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.register("type", "bool", lambda v: v.lower() == "true") parser.add_argument( @@ -373,10 +376,5 @@ def run_main(): separated by \',\'. For tag-set contains multiple tags, all tags \ must be passed in.\ """) - flags, unparsed = parser.parse_known_args() - - my_main = lambda unused_args: main(unused_args, flags) - app.run(main=my_main, argv=[sys.argv[0]] + unparsed) - -if __name__ == '__main__': - run_main() + FLAGS, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b88be4ae04..b0e9e3e5ed 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -38,15 +38,11 @@ from tensorflow.core.example import example_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.debug.wrappers import local_cli_wrapper -from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils -# Set of ops to blacklist. -_OP_BLACKLIST = set(['WriteFile', 'ReadFile']) - def _show_tag_sets(saved_model_dir): """Prints the tag-sets stored in SavedModel directory. @@ -246,27 +242,6 @@ def get_signature_def_map(saved_model_dir, tag_set): return meta_graph.signature_def -def scan_meta_graph_def(meta_graph_def): - """Scans meta_graph_def and reports if there are ops on blacklist. - - Print ops if they are on black list, or print success if no blacklisted ops - found. - - Args: - meta_graph_def: MetaGraphDef protocol buffer. - """ - all_ops_set = set( - meta_graph_lib.ops_used_by_graph_def(meta_graph_def.graph_def)) - blacklisted_ops = _OP_BLACKLIST & all_ops_set - if blacklisted_ops: - # TODO(yifeif): print more warnings - print('MetaGraph with tag set %s contains the following blacklisted ops:' % - meta_graph_def.meta_info_def.tags, blacklisted_ops) - else: - print('MetaGraph with tag set %s does not contain blacklisted ops.' % - meta_graph_def.meta_info_def.tags) - - def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key, input_tensor_key_feed_dict, outdir, overwrite_flag, tf_debug=False): @@ -634,21 +609,6 @@ def run(args): args.overwrite, tf_debug=args.tf_debug) -def scan(args): - """Function triggered by scan command. - - Args: - args: A namespace parsed from command line. - """ - if args.tag_set: - scan_meta_graph_def( - saved_model_utils.get_meta_graph_def(args.dir, args.tag_set)) - else: - saved_model = reader.read_saved_model(args.dir) - for meta_graph_def in saved_model.meta_graphs: - scan_meta_graph_def(meta_graph_def) - - def create_parser(): """Creates a parser that parse the command line arguments. @@ -770,26 +730,6 @@ def create_parser(): 'SavedModel.') parser_run.set_defaults(func=run) - # scan command - scan_msg = ('Usage example:\n' - 'To scan for blacklisted ops in SavedModel:\n' - '$saved_model_cli scan --dir /tmp/saved_model\n' - 'To scan a specific MetaGraph, pass in --tag_set\n') - parser_scan = subparsers.add_parser( - 'scan', - description=scan_msg, - formatter_class=argparse.RawTextHelpFormatter) - parser_scan.add_argument( - '--dir', - type=str, - required=True, - help='directory containing the SavedModel to execute') - parser_scan.add_argument( - '--tag_set', - type=str, - help='tag-set of graph in SavedModel to scan, separated by \',\'') - parser_scan.set_defaults(func=scan) - return parser diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index eedc893a38..f99c844845 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -525,28 +525,6 @@ signature_def['serving_default']: y_expected = np.array([[2.5], [3.0]]) self.assertAllClose(y_expected, y_actual) - def testScanCommand(self): - self.parser = saved_model_cli.create_parser() - base_path = test.test_src_dir_path(SAVED_MODEL_PATH) - args = self.parser.parse_args(['scan', '--dir', base_path]) - with captured_output() as (out, _): - saved_model_cli.scan(args) - output = out.getvalue().strip() - self.assertTrue('does not contain blacklisted ops' in output) - - def testScanCommandFoundBlacklistedOp(self): - self.parser = saved_model_cli.create_parser() - base_path = test.test_src_dir_path(SAVED_MODEL_PATH) - args = self.parser.parse_args( - ['scan', '--dir', base_path, '--tag_set', 'serve']) - op_blacklist = saved_model_cli._OP_BLACKLIST - saved_model_cli._OP_BLACKLIST = set(['VariableV2']) - with captured_output() as (out, _): - saved_model_cli.scan(args) - saved_model_cli._OP_BLACKLIST = op_blacklist - output = out.getvalue().strip() - self.assertTrue('\'VariableV2\'' in output) - if __name__ == '__main__': test.main() diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 03e3e0857f..0b3b060fe7 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -274,8 +274,7 @@ CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 7000 #define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \ - __macro(cudnnSetConvolutionMathType) \ - __macro(cudnnSetRNNMatrixMathType) + __macro(cudnnSetConvolutionMathType) // clang-format on CUDNN_DNN_ROUTINE_EACH_R7(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -587,19 +586,6 @@ static bool TensorOpMathEnabled() { return is_enabled; } -// A helper function to decide whether to enable the TENSOR_OP_MATH math type -// for RNNs. -static bool RnnTensorOpMathEnabled() { - static bool is_enabled = [] { - bool is_disabled = false; - TF_CHECK_OK( - tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH", - /*default_val=*/false, &is_disabled)); - return !is_disabled; - }(); - return is_enabled; -} - // A helper function to decide whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT // in batchnorm. This mode can be faster in some tasks because an optimized path // may be selected for CUDNN_DATA_FLOAT and CUDNN_DATA_HALF data types, compute @@ -1138,9 +1124,6 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { SetFailure(cudnn_params_desc_->Status()); return; } - if (data_type == CUDNN_DATA_HALF) { - set_use_tensor_op_math(true); - } } ~CudnnRnnDescriptor() override { if (rnn_desc_) { @@ -1149,20 +1132,6 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { CUDNN_RETURN_IF_FAIL(status, "Unable to destroy RNN descriptor"); } } - void set_use_tensor_op_math(bool use_tensor_op_math) { -#if CUDNN_VERSION >= 7000 - cudnnMathType_t math_type = - (use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH); - if (RnnTensorOpMathEnabled()) { - cudnnStatus_t status = - wrap::cudnnSetRNNMatrixMathType(parent_, rnn_desc_, math_type); - if (status != CUDNN_STATUS_SUCCESS) { - LOG(FATAL) << "could not set cudnn RNN math type: " - << ToString(status); - } - } -#endif - } cudnnRNNDescriptor_t handle() const { if (!ok()) return nullptr; return rnn_desc_; diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index bab1e82c86..9b0db8a112 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1176,20 +1176,6 @@ def tf_custom_op_library_additional_deps(): "@protobuf_archive//:protobuf_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), - ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"]) - -# A list of targets that contains the implemenation of -# tf_custom_op_library_additional_deps. It's used to generate a DEF file for -# exporting symbols from _pywrap_tensorflow.dll on Windows. -def tf_custom_op_library_additional_deps_impl(): - return [ - # for @nsync//:nsync_headers - "//third_party/nsync:nsync_cpp", - # for //third_party/eigen3 - clean_dep("//third_party/eigen3"), - # for //tensorflow/core:framework_headers_lib - clean_dep("//tensorflow/core:framework"), - clean_dep("//tensorflow/core:reader_base"), ] # Traverse the dependency graph along the "deps" attribute of the @@ -1276,7 +1262,6 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(is_external=True), - features = ["windows_export_all_symbols"], linkopts=linkopts + select({ "//conditions:default": [ "-lm", @@ -1423,8 +1408,7 @@ def tf_py_wrap_cc(name, ]) + tf_extension_copts()), linkopts=tf_extension_linkopts() + extra_linkopts, linkstatic=1, - deps=deps + extra_deps, - **kwargs) + deps=deps + extra_deps) native.genrule( name="gen_" + cc_library_pyd_name, srcs=[":" + cc_library_name], diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index baa7a0889d..5268bba3cc 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -247,8 +247,6 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor = public_api.PublicAPIVisitor(visitor) public_api_visitor.do_not_descend_map['tf'].append('contrib') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] - # TODO(annarev): Make slide_dataset available in API. - public_api_visitor.private_map['tf'] = ['slide_dataset'] traverse.traverse(api, public_api_visitor) proto_dict = visitor.GetProtos() diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake index d5dea4f3e4..ec90c83aac 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cmake +++ b/tensorflow/tools/ci_build/Dockerfile.cmake @@ -23,12 +23,11 @@ RUN /install/install_deb_packages.sh RUN apt-get update RUN apt-get install -y --no-install-recommends python-pip -RUN pip install --upgrade wheel RUN pip install --upgrade astor RUN pip install --upgrade gast RUN pip install --upgrade numpy RUN pip install --upgrade termcolor # Install golang -RUN apt-get install -t xenial-backports -y golang-1.9 -ENV PATH=${PATH}:/usr/lib/go-1.9/bin +RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable +RUN apt-get install -y golang diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 40189a6d1b..8b8ba31a0d 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -65,5 +65,4 @@ bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ - //${PY_TEST_DIR}/tensorflow/python/... \ - //${PY_TEST_DIR}/tensorflow/contrib/... + //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/def_file_filter/BUILD b/tensorflow/tools/def_file_filter/BUILD deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tensorflow/tools/def_file_filter/BUILD.tpl b/tensorflow/tools/def_file_filter/BUILD.tpl deleted file mode 100644 index 3cb72f4979..0000000000 --- a/tensorflow/tools/def_file_filter/BUILD.tpl +++ /dev/null @@ -1,15 +0,0 @@ -# Description: -# Tools for filtering DEF file for TensorFlow on Windows -# -# On Windows, we use a DEF file generated by Bazel to export -# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). -# The maximum number of symbols that can be exported per DLL is 64K, -# so we have to filter some useless symbols through this python script. - -package(default_visibility = ["//visibility:public"]) - -py_binary( - name = "def_file_filter", - srcs = ["def_file_filter.py"], - srcs_version = "PY2AND3", -) diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl deleted file mode 100644 index 8bdc03eb0f..0000000000 --- a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""def_file_filter.py - tool to filter a windows def file. - -The def file can be used to export symbols from the tensorflow dll to enable -tf.load_library(). - -Because the linker allows only 64K symbols to be exported per dll -we filter the symbols down to the essentials. The regular expressions -we use for this are specific to tensorflow. - -TODO: this works fine but there is an issue with exporting -'const char * const' and importing it from a user_ops. The problem is -on the importing end and using __declspec(dllimport) works around it. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import io -import os -import re -import subprocess -import sys -import tempfile - -# External tools we use that come with visual studio sdk -UNDNAME = "%{undname_bin_path}" - -# Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") - -# Include if matched before exclude -INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" - r"google::protobuf::internal::ArenaImpl::AllocateAligned|" # for contrib/data/_prefetching_ops - r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops - r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops - r"tensorflow::internal::LogMessage|" - r"tensorflow::internal::LogString|" - r"tensorflow::internal::CheckOpMessageBuilder|" - r"tensorflow::internal::MakeCheckOpValueString|" - r"tensorflow::internal::PickUnusedPortOrDie|" - r"tensorflow::internal::ValidateDevice|" - r"tensorflow::ops::internal::Enter|" - r"tensorflow::strings::internal::AppendPieces|" - r"tensorflow::strings::internal::CatPieces|" - r"tensorflow::io::internal::JoinPathImpl") - -# Include if matched after exclude -INCLUDE_RE = re.compile(r"^(TF_\w*)$|" - r"^(TFE_\w*)$|" - r"nsync::|" - r"tensorflow::|" - r"functor::|" - r"perftools::gputools") - -# We want to identify data members explicitly in the DEF file, so that no one -# can implicitly link against the DLL if they use one of the variables exported -# from the DLL and the header they use does not decorate the symbol with -# __declspec(dllimport). It is easier to detect what a data symbol does -# NOT look like, so doing it with the below regex. -DATA_EXCLUDE_RE = re.compile(r"[)(]|" - r"vftable|" - r"vbtable|" - r"vcall|" - r"RTTI|" - r"protobuf::internal::ExplicitlyConstructed") - -def get_args(): - """Parse command line.""" - filename_list = lambda x: x.split(";") - parser = argparse.ArgumentParser() - parser.add_argument("--input", type=filename_list, - help="paths to input def file", - required=True) - parser.add_argument("--output", help="output deffile", required=True) - parser.add_argument("--target", help="name of the target", required=True) - args = parser.parse_args() - return args - - -def main(): - """main.""" - args = get_args() - - # Pipe dumpbin to extract all linkable symbols from libs. - # Good symbols are collected in candidates and also written to - # a temp file. - candidates = [] - tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False) - for def_file_path in args.input: - def_file = open(def_file_path, 'r') - for line in def_file: - cols = line.split() - sym = cols[0] - tmpfile.file.write(sym + "\n") - candidates.append(sym) - tmpfile.file.close() - - # Run the symbols through undname to get their undecorated name - # so we can filter on something readable. - with open(args.output, "w") as def_fp: - # track dupes - taken = set() - - # Header for the def file. - def_fp.write("LIBRARY " + args.target + "\n") - def_fp.write("EXPORTS\n") - def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") - - # Each symbols returned by undname matches the same position in candidates. - # We compare on undname but use the decorated name from candidates. - dupes = 0 - proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) - for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): - decorated = candidates[idx] - if decorated in taken: - # Symbol is already in output, done. - dupes += 1 - continue - - if not INCLUDEPRE_RE.search(line): - if EXCLUDE_RE.search(line): - continue - if not INCLUDE_RE.search(line): - continue - - if "deleting destructor" in line: - # Some of the symbols convered by INCLUDEPRE_RE export deleting - # destructor symbols, which is a bad idea. - # So we filter out such symbols here. - continue - - if DATA_EXCLUDE_RE.search(line): - def_fp.write("\t" + decorated + "\n") - else: - def_fp.write("\t" + decorated + " DATA\n") - taken.add(decorated) - def_fp.close() - - exit_code = proc.wait() - if exit_code != 0: - print("{} failed, exit={}".format(UNDNAME, exit_code)) - return exit_code - - os.unlink(tmpfile.name) - - print("symbols={}, taken={}, dupes={}" - .format(len(candidates), len(taken), dupes)) - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl deleted file mode 100644 index 47539b2423..0000000000 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ /dev/null @@ -1,56 +0,0 @@ -"""Repository rule for def file filter autoconfiguration. - -This repository reuses Bazel's VC detect mechanism to find undname.exe, -which is a tool used in def_file_filter.py. - -def_file_filter.py is for filtering the DEF file for TensorFlow on Windows. -On Windows, we use a DEF file generated by Bazel to export symbols from the -tensorflow dynamic library(_pywrap_tensorflow.dll). The maximum number of -symbols that can be exported per DLL is 64K, so we have to filter some useless -symbols through this python script. - -`def_file_filter_config` depends on the following environment variables: - * `BAZEL_VC` - * `BAZEL_VS` - * `VS90COMNTOOLS` - * `VS100COMNTOOLS` - * `VS110COMNTOOLS` - * `VS120COMNTOOLS` - * `VS140COMNTOOLS` -""" - -load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_vc_path") -load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_msvc_tool") -load("@bazel_tools//tools/cpp:lib_cc_configure.bzl", "auto_configure_fail") - -def _def_file_filter_configure_impl(repository_ctx): - if repository_ctx.os.name.lower().find("windows") == -1: - repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") - repository_ctx.file("def_file_filter.py", "") - return - vc_path = find_vc_path(repository_ctx) - if vc_path == "visual-studio-not-found": - auto_configure_fail("Visual C++ build tools not found on your machine") - undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") - - repository_ctx.template( - "def_file_filter.py", - Label("//tensorflow/tools/def_file_filter:def_file_filter.py.tpl"), - { - "%{undname_bin_path}": undname_bin_path, - }) - repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") - - -def_file_filter_configure = repository_rule( - implementation = _def_file_filter_configure_impl, - environ = [ - "BAZEL_VC", - "BAZEL_VS", - "VS90COMNTOOLS", - "VS100COMNTOOLS", - "VS110COMNTOOLS", - "VS120COMNTOOLS", - "VS140COMNTOOLS" - ], -) diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index 228d5ee35d..c1b1f79bbd 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,14 +17,6 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP -You can test specify version of TensorFlow: - -```shell -./local_test.sh ${whl_file_url} -``` - -For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. - **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index caae7fd530..435f9d0dc9 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,11 +16,12 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script performs the following steps: -# 1) Build the docker image capable of running distributed TensorFlow in docker. +# This script peforms the following steps: +# 1) Build the docker-in-docker (dind) image capable of running docker and +# Kubernetes (k8s) cluster inside. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh @@ -63,9 +64,15 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" +LOCAL_K8S_CACHE=${HOME}/kubernetes -# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below -DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" +# Helper function +get_container_id_by_image_name() { + # Get the id of a container by image name + # Usage: get_docker_container_id_by_image_name + + docker ps | grep $1 | awk '{print $1}' +} # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -77,8 +84,7 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} - echo "use default whl file location" + die "whl file location is not specified" fi while true; do @@ -115,7 +121,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker image for local distributed TensorFlow cluster. +# Build docker-in-docker image for local k8s cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index cbcdbf5b80..3630dbd740 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -114,13 +114,6 @@ def configure(src_base_path, gen_path, debug=False): for target, src in link_map.items(): if src is None: open(os.path.join(gen_path, target), "w").write("") - elif not os.path.exists(src): - # Git repo is configured in a way we don't support such as having - # packed refs. Even though in a git repo, tf.__git_version__ will not - # be accurate. - # TODO(mikecase): Support grabbing git info when using packed refs. - open(os.path.join(gen_path, target), "w").write("") - spec["git"] = False else: try: # In python 3.5, symlink function exists even on Windows. But requires diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 6e21aa2846..b7d7fac315 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -178,7 +178,6 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/util/tensor_bundle", ], diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d86f65325b..d89afe85c7 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -182,36 +182,6 @@ Status FuseBatchNormWithConv(const NodeMatch& match, return Status::OK(); } -Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, - std::vector* new_nodes) { - // Calculate the scale and offset values to apply. - std::vector scale_values; - std::vector offset_values; - TF_RETURN_IF_ERROR( - GetScaleAndOffsetValues(match, &scale_values, &offset_values)); - - // Fuse conv weights, and set the final output node name as batch_norm_node. - const NodeDef& batch_norm_node = match.node; - const NodeMatch& batch_to_space_node_match = match.inputs[0]; - const NodeMatch& conv_node_match = batch_to_space_node_match.inputs[0]; - const NodeDef& batch_to_space_node = batch_to_space_node_match.node; - const NodeDef& conv_node = conv_node_match.node; - - string biasadd_name = conv_node.name() + "/biasadd"; - TF_RETURN_IF_ERROR( - FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, - biasadd_name , new_nodes)); - - NodeDef new_batch_to_space_node = batch_to_space_node; - // reuse batch_norm node name - new_batch_to_space_node.set_name(batch_norm_node.name()); - new_batch_to_space_node.set_input(0, biasadd_name); - new_nodes->push_back(batch_to_space_node_match.inputs[1].node); - new_nodes->push_back(batch_to_space_node_match.inputs[2].node); - new_nodes->push_back(new_batch_to_space_node); - return Status::OK(); -} - Status FuseBatchNormWithConvConcat(const NodeMatch& match, std::vector* new_nodes) { // Calculate the scale and offset values to apply. @@ -314,43 +284,6 @@ Status FoldOldBatchNorms(const GraphDef& input_graph_def, current_graph_def = replaced_graph_def; } while (did_graph_change); - do { - did_graph_change = false; - GraphDef replaced_graph_def; - TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( - current_graph_def, // clang-format off - {"BatchNormWithGlobalNormalization|FusedBatchNorm", // batch_norm_node - { - {"BatchToSpaceND", // batch_to_space_node - { - {"Conv2D", // conv_node - { - {"*"}, // input_node - {"Const"}, // weights_node - } - }, - {"Const"}, // block_shape - {"Const"}, // crops - } - }, - {"Const"}, // mean_node - {"Const"}, // variance_node - {"Const"}, // beta_node - {"Const"}, // gamma_node - } - }, // clang-format on - [&did_graph_change](const NodeMatch& match, - const std::set& input_nodes, - const std::set& output_nodes, - std::vector* new_nodes) { - TF_RETURN_IF_ERROR(FuseBatchNormWithBatchToSpace(match, new_nodes)); - did_graph_change = true; - return Status::OK(); - }, - {}, &replaced_graph_def)); - current_graph_def = replaced_graph_def; - } while (did_graph_change); - do { did_graph_change = false; GraphDef replaced_graph_def; diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 272410c693..b30ba9ac8b 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" -#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -299,96 +298,6 @@ class FoldOldBatchNormsTest : public ::testing::Test { } }; -void TestFoldFusedBatchNormsWithBatchToSpace() { - auto root = tensorflow::Scope::NewRootScope(); - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - - Tensor input_data(DT_FLOAT, TensorShape({2, 1, 3, 2})); - test::FillValues( - &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, - -5.0f, -3.0f, -6.0f}); - Output input_op = - Const(root.WithOpName("input_op"), Input::Initializer(input_data)); - - Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); - test::FillValues(&weights_data, - {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); - Output weights_op = - Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); - - Output conv_op = Conv2D(root.WithOpName("conv_op"), input_op, weights_op, - {1, 1, 1, 1}, "VALID"); - - Tensor block_shape_data(DT_INT32, TensorShape({2})); - test::FillValues(&block_shape_data, {1, 2}); - Output block_shape_op = - Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); - - Tensor crops_data(DT_INT32, TensorShape({2, 2})); - test::FillValues(&crops_data, {0, 0, 0, 1}); - Output crops_op = - Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); - - Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), - conv_op, block_shape_op, crops_data); - - Tensor mean_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&mean_data, {10.0f, 20.0f}); - Output mean_op = - Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); - - Tensor variance_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&variance_data, {0.25f, 0.5f}); - Output variance_op = Const(root.WithOpName("variance_op"), - Input::Initializer(variance_data)); - - Tensor beta_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&beta_data, {0.1f, 0.6f}); - Output beta_op = - Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); - - Tensor gamma_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&gamma_data, {1.0f, 2.0f}); - Output gamma_op = - Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); - - GraphDef original_graph_def; - TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); - - NodeDef batch_norm_node; - batch_norm_node.set_op("FusedBatchNorm"); - batch_norm_node.set_name("output"); - AddNodeInput("batch_to_space_op", &batch_norm_node); - AddNodeInput("gamma_op", &batch_norm_node); - AddNodeInput("beta_op", &batch_norm_node); - AddNodeInput("mean_op", &batch_norm_node); - AddNodeInput("variance_op", &batch_norm_node); - SetNodeAttr("T", DT_FLOAT, &batch_norm_node); - SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); - SetNodeAttr("is_training", false, &batch_norm_node); - *(original_graph_def.mutable_node()->Add()) = batch_norm_node; - - std::unique_ptr original_session(NewSession(SessionOptions())); - TF_ASSERT_OK(original_session->Create(original_graph_def)); - std::vector original_outputs; - TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); - - GraphDef fused_graph_def; - TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, - &fused_graph_def)); - - std::unique_ptr fused_session(NewSession(SessionOptions())); - TF_ASSERT_OK(fused_session->Create(fused_graph_def)); - std::vector fused_outputs; - TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); - - test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); - - for (const NodeDef& node : fused_graph_def.node()) { - EXPECT_NE("FusedBatchNormWithBatchToSpace", node.op()); - } -} - TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNorms) { TestFoldOldBatchNorms(); } @@ -406,9 +315,5 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { TestFoldFusedBatchNormsWithConcat(/*split=*/false); } -TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { - TestFoldFusedBatchNormsWithBatchToSpace(); -} - } // namespace graph_transforms } // namespace tensorflow diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 2607b9d704..1833d67d82 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -48,65 +48,36 @@ py_binary( deps = ["//tensorflow:tensorflow_py"], ) -COMMON_PIP_DEPS = [ - ":licenses", - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/boosted_trees:boosted_trees_pip", - "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", - "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/eager/python:evaluator", - "//tensorflow/contrib/gan:gan", - "//tensorflow/contrib/graph_editor:graph_editor_pip", - "//tensorflow/contrib/keras:keras", - "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/py2tf:py2tf", - "//tensorflow/contrib/py2tf/converters:converters", - "//tensorflow/contrib/py2tf/converters:test_lib", - "//tensorflow/contrib/py2tf/impl:impl", - "//tensorflow/contrib/py2tf/pyct:pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", - "//tensorflow/contrib/receptive_field:receptive_field_pip", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/signal:test_util", - "//tensorflow/contrib/slim:slim", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - "//tensorflow/contrib/specs:specs", - "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/contrib/tensor_forest:init_py", - "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - "//tensorflow/contrib/timeseries:timeseries_pip", - "//tensorflow/contrib/tpu", - "//tensorflow/examples/tutorials/mnist:package", - "//tensorflow/python:distributed_framework_test_lib", - "//tensorflow/python:meta_graph_testdata", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/python/saved_model:saved_model", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python:test_ops", - "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", -] - # On Windows, python binary is a zip file of runfiles tree. # Add everything to its data dependency for generating a runfiles tree # for building the pip package on Windows. py_binary( name = "simple_console_for_windows", srcs = ["simple_console_for_windows.py"], - data = COMMON_PIP_DEPS, + data = [ + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/saved_model", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/contrib/summary:summary_test_util", + # These targets don't build on Windows yet. Exclude them for now. + # "//tensorflow/contrib/slim", + # "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + # "//tensorflow/contrib/specs", + # "//tensorflow/contrib/tensor_forest:init_py", + # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + # "//tensorflow/examples/tutorials/mnist:package", + ], srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) @@ -137,7 +108,6 @@ filegroup( "@highwayhash//:LICENSE", "@jemalloc//:COPYING", "@jpeg//:LICENSE.md", - "@kafka//:LICENSE", "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", @@ -167,12 +137,61 @@ sh_binary( data = select({ "//tensorflow:windows": [":simple_console_for_windows"], "//tensorflow:windows_msvc": [":simple_console_for_windows"], - "//conditions:default": COMMON_PIP_DEPS + [ + "//conditions:default": [ + ":licenses", + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", ":simple_console", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/boosted_trees:boosted_trees_pip", + "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:checkpointable_utils", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/gan:gan", + "//tensorflow/contrib/graph_editor:graph_editor_pip", + "//tensorflow/contrib/keras:keras", + "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/lite/python:interpreter_test_data", "//tensorflow/contrib/lite/toco:toco", "//tensorflow/contrib/lite/toco/python:toco_wrapper", "//tensorflow/contrib/lite/toco/python:toco_from_protos", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/py2tf:py2tf", + "//tensorflow/contrib/py2tf/converters:converters", + "//tensorflow/contrib/py2tf/converters:test_lib", + "//tensorflow/contrib/py2tf/impl:impl", + "//tensorflow/contrib/py2tf/pyct:pyct", + "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", + "//tensorflow/contrib/slim:slim", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + "//tensorflow/contrib/specs:specs", + "//tensorflow/contrib/summary:summary_test_util", + "//tensorflow/contrib/tensor_forest:init_py", + "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + "//tensorflow/contrib/timeseries:timeseries_pip", + "//tensorflow/contrib/tpu", + "//tensorflow/examples/tutorials/mnist:package", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:meta_graph_testdata", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/saved_model:saved_model", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python:test_ops", + "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], }) + if_mkl(["//third_party/mkl:intel_binary_blob"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e0152da4df..e1a5f091ba 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0' +_VERSION = '1.6.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -72,7 +72,7 @@ if sys.version_info < (3, 4): # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ - 'freeze_graph = tensorflow.python.tools.freeze_graph:run_main', + 'freeze_graph = tensorflow.python.tools.freeze_graph:main', 'toco_from_protos = tensorflow.contrib.lite.toco.python.toco_from_protos:main', 'toco = tensorflow.contrib.lite.toco.python.toco_wrapper:main', 'saved_model_cli = tensorflow.python.tools.saved_model_cli:main', diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index edd093510e..77cc9f75f7 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -88,7 +88,6 @@ import os import shutil from google.cloud import datastore -from six import text_type def is_real_file(dirpath, fname): @@ -151,7 +150,7 @@ def upload_benchmark_data(client, data): """ test_result = json.loads(data) - test_name = text_type(test_result["name"]) + test_name = unicode(test_result["name"]) start_time = datetime.datetime.utcfromtimestamp( float(test_result["startTime"])) batch = [] @@ -163,7 +162,7 @@ def upload_benchmark_data(client, data): t_val.update({ "test": test_name, "start": start_time, - "info": text_type(data) + "info": unicode(data) }) batch.append(t_val) @@ -171,7 +170,7 @@ def upload_benchmark_data(client, data): # the attribute to be fetched and displayed. The full entry information is # also stored as a non-indexed JSON blob. for ent in test_result["entries"].get("entry", []): - ent_name = text_type(ent["name"]) + ent_name = unicode(ent["name"]) e_key = client.key("Entry") e_val = datastore.Entity(e_key, exclude_from_indexes=["info"]) e_val.update({ @@ -179,7 +178,7 @@ def upload_benchmark_data(client, data): "start": start_time, "entry": ent_name, "timing": ent["wallTime"], - "info": text_type(json.dumps(ent)) + "info": unicode(json.dumps(ent)) }) batch.append(e_val) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a922808a70..abc9eb9bc1 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -12,8 +12,6 @@ load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compil load("//third_party:repo.bzl", "tf_http_archive") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") -load("//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", - "def_file_filter_configure") def _extract_version_number(bazel_version): """Extracts the semantic version number from a version string @@ -69,7 +67,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # We must check the bazel version before trying to parse any other BUILD # files, in case the parsing of those build files depends on the bazel # version we require here. - check_bazel_version_at_least("0.10.0") + check_bazel_version_at_least("0.5.4") clang6_configure(name="local_config_clang6") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") @@ -77,10 +75,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sycl_configure(name="local_config_sycl") python_configure(name="local_config_python") - # For windows bazel build - # TODO: Remove def file filter when TensorFlow can export symbols properly on Windows. - def_file_filter_configure(name = "local_config_def_file_filter") - # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name="local_config_arm_compiler", diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 4418ac32fc..87a23925c4 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -526,12 +526,12 @@ config_setting( config_setting( name = "armeabi-v7a", - values = {"cpu": "armeabi-v7a"}, + values = {"android_cpu": "armeabi-v7a"}, ) config_setting( name = "arm64-v8a", - values = {"cpu": "arm64-v8a"}, + values = {"android_cpu": "arm64-v8a"}, ) config_setting( diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index a839ca717e..a61a9e1f6c 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -130,23 +130,18 @@ cc_library( ], hdrs = [ "config.h", - "src-cpp/rdkafkacpp.h", - "src-cpp/rdkafkacpp_int.h", - "src/lz4.c", - "src/snappy_compat.h", - ], - copts = [ - "-Iexternal/kafka/src", - "-Iexternal/kafka/src-cpp", ], defines = [ ], + includes = [ + "src", + "src-cpp", + ], linkopts = [ "-lpthread", ], visibility = ["//visibility:public"], deps = [ "@boringssl//:ssl", - "@zlib_archive//:zlib", ], ) diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl index 1dd8ab433a..de06ad5f27 100644 --- a/third_party/py/BUILD.tpl +++ b/third_party/py/BUILD.tpl @@ -2,26 +2,20 @@ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) -# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib -# See https://docs.python.org/3/extending/windows.html -cc_import( - name = "python_lib", - interface_library = select({ - ":windows": ":python_import_lib", - # A placeholder for Unix platforms which makes --no_build happy. - "//conditions:default": "not-existing.lib", - }), - system_provided = 1, -) - cc_library( name = "python_headers", hdrs = [":python_include"], - deps = select({ - ":windows": [":python_lib"], + data = select({ + ":windows": [":python_import_lib"], "//conditions:default": [], }), includes = ["python_include"], + linkopts = select({ + # TODO(pcloudy): Ideally, this should just go into deps after resolving + # https://github.com/bazelbuild/bazel/issues/3237, + ":windows": ["$(locations :python_import_lib)"], + "//conditions:default": [], + }), ) cc_library( -- GitLab From 829f2aff4e663c13b71253707c8a867ca929bb1e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 15 Mar 2018 15:47:09 -0700 Subject: [PATCH 086/960] Rename CreateXyzHlo utilities to MakeXyzHlo as discussed on cr/188968478; NFC The rationale here is that MakeXyzHlo is less likely to be confused with HloInstruction::CreateXyz and we already have a convention of using a "Make" prefix for ergonomic factory functions. PiperOrigin-RevId: 189259036 --- .../xla/service/algebraic_simplifier.cc | 33 +++++----- .../compiler/xla/service/gather_expander.cc | 28 ++++---- .../compiler/xla/service/gpu/pad_insertion.cc | 10 +-- .../xla/service/hlo_creation_utils.cc | 64 +++++++++---------- .../compiler/xla/service/hlo_creation_utils.h | 34 +++++----- tensorflow/compiler/xla/service/while_util.cc | 12 ++-- 6 files changed, 89 insertions(+), 92 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index be7aa307d2..971c2935c8 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -385,7 +385,7 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { auto* c2 = rhs; TF_ASSIGN_OR_RETURN(auto* sum_of_constants, - CreateBinaryHlo(HloOpcode::kAdd, c1, c2)); + MakeBinaryHlo(HloOpcode::kAdd, c1, c2)); return ReplaceWithNewInstruction( add, HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd, lhs->mutable_operand(0), @@ -636,16 +636,14 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { // (A / B) / (C / D) => (A / B)*(D / C) => (A * D) / (B * C) if (lhs->opcode() == HloOpcode::kDivide && rhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN( - auto a_times_d, - CreateBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(0), - rhs->mutable_operand(1))); - TF_ASSIGN_OR_RETURN( - auto b_times_c, - CreateBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(1), - rhs->mutable_operand(0))); - TF_ASSIGN_OR_RETURN(auto new_divide, CreateBinaryHlo(HloOpcode::kDivide, - a_times_d, b_times_c)); + TF_ASSIGN_OR_RETURN(auto a_times_d, MakeBinaryHlo(HloOpcode::kMultiply, + lhs->mutable_operand(0), + rhs->mutable_operand(1))); + TF_ASSIGN_OR_RETURN(auto b_times_c, MakeBinaryHlo(HloOpcode::kMultiply, + lhs->mutable_operand(1), + rhs->mutable_operand(0))); + TF_ASSIGN_OR_RETURN(auto new_divide, MakeBinaryHlo(HloOpcode::kDivide, + a_times_d, b_times_c)); return ReplaceInstruction(divide, new_divide); } @@ -654,7 +652,7 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { if (lhs->opcode() == HloOpcode::kDivide) { TF_ASSIGN_OR_RETURN( auto b_times_c, - CreateBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); + MakeBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, @@ -663,9 +661,8 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { // A / (B / C) => (A*C) / B if (rhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN( - auto a_times_c, - CreateBinaryHlo(HloOpcode::kMultiply, lhs, rhs->mutable_operand(1))); + TF_ASSIGN_OR_RETURN(auto a_times_c, MakeBinaryHlo(HloOpcode::kMultiply, lhs, + rhs->mutable_operand(1))); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, @@ -1300,8 +1297,8 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { } TF_ASSIGN_OR_RETURN(HloInstruction * nonzero_pad, - CreatePadHlo(pad->mutable_operand(0), - pad->mutable_operand(1), nonzero_padding)); + MakePadHlo(pad->mutable_operand(0), + pad->mutable_operand(1), nonzero_padding)); // Copy the layout from the original pad instructions. The new pad and the // slice instruction should all have the same layout. TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( @@ -1329,7 +1326,7 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { TF_ASSIGN_OR_RETURN( HloInstruction * slice, - CreateSliceHlo(nonzero_pad, start_indices, end_indices, strides)); + MakeSliceHlo(nonzero_pad, start_indices, end_indices, strides)); // Verify that the slice shape matches the pad shape. TF_RET_CHECK(ShapeUtil::Compatible(slice->shape(), pad->shape())); diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc index a133d81067..58c62d8ce9 100644 --- a/tensorflow/compiler/xla/service/gather_expander.cc +++ b/tensorflow/compiler/xla/service/gather_expander.cc @@ -39,7 +39,7 @@ static StatusOr TransposeIndexVectorDimToLast( } } permutation.push_back(index_vector_dim); - return CreateTransposeHlo(gather_indices, permutation); + return MakeTransposeHlo(gather_indices, permutation); } // If the gather_indices holds scalar indices (i.e. gather_indices has rank N @@ -133,16 +133,16 @@ static StatusOr ExpandIndexVectorIntoOperandSpace( dim_numbers.gather_dims_to_operand_dims_size()) { TF_ASSIGN_OR_RETURN( HloInstruction * component_to_concat, - CreateSliceHlo( - index_vector, /*start_indices=*/{index_vector_dim_index}, - /*limit_indices=*/{index_vector_dim_index + 1}, /*strides=*/{1})); + MakeSliceHlo(index_vector, /*start_indices=*/{index_vector_dim_index}, + /*limit_indices=*/{index_vector_dim_index + 1}, + /*strides=*/{1})); expanded_index_components.push_back(component_to_concat); } else { expanded_index_components.push_back(zero); } } - return CreateConcatHlo(expanded_index_components, /*dimension=*/0); + return MakeConcatHlo(expanded_index_components, /*dimension=*/0); } // This generates the body of the while that implements the main data movement @@ -159,8 +159,8 @@ static StatusOr> GatherLoopBody( TF_ASSIGN_OR_RETURN( HloInstruction * induction_var_as_vector, - CreateBroadcastHlo(induction_var, /*broadcast_dimensions=*/{}, - /*result_shape_bounds=*/{1})); + MakeBroadcastHlo(induction_var, /*broadcast_dimensions=*/{}, + /*result_shape_bounds=*/{1})); TF_ASSIGN_OR_RETURN( HloInstruction * index_into_gather_indices, @@ -169,8 +169,8 @@ static StatusOr> GatherLoopBody( TF_ASSIGN_OR_RETURN( HloInstruction * index_vector_2d, - CreateDynamicSliceHlo(gather_indices, index_into_gather_indices, - {1, index_vector_size})); + MakeDynamicSliceHlo(gather_indices, index_into_gather_indices, + {1, index_vector_size})); TF_ASSIGN_OR_RETURN(HloInstruction * index_vector, ElideDegenerateDims(index_vector_2d, {0})); @@ -181,8 +181,8 @@ static StatusOr> GatherLoopBody( operand->shape().dimensions_size())); TF_ASSIGN_OR_RETURN(HloInstruction * gathered_slice, - CreateDynamicSliceHlo(operand, gathered_slice_start, - gather.gather_window_bounds())); + MakeDynamicSliceHlo(operand, gathered_slice_start, + gather.gather_window_bounds())); TF_ASSIGN_OR_RETURN( HloInstruction * gathered_slice_for_update, @@ -197,8 +197,8 @@ static StatusOr> GatherLoopBody( TF_ASSIGN_OR_RETURN( HloInstruction * updated_accumulator, - CreateDynamicUpdateSliceHlo(output_accumulator, gathered_slice_for_update, - index_vector_into_accumulator)); + MakeDynamicUpdateSliceHlo(output_accumulator, gathered_slice_for_update, + index_vector_into_accumulator)); // New loop state -- only the accumulator has changed. The // WhileUtil::MakeCountedLoop functions takes care of the induction variable @@ -250,7 +250,7 @@ static StatusOr PermuteGatherAndWindowDims( } } - return CreateTransposeHlo(accumulator, permutation); + return MakeTransposeHlo(accumulator, permutation); } // High Level Algorithm diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc index fa405b9329..7bda4e2fcd 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc @@ -69,7 +69,7 @@ HloInstruction* MaybePaddedAndSlicedInput( HloInstruction* padding = computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(element_type)))); - input = CreatePadHlo(input, padding, padding_config).ValueOrDie(); + input = MakePadHlo(input, padding, padding_config).ValueOrDie(); } if (window_util::HasNegativePadding(conv_window)) { @@ -92,8 +92,8 @@ HloInstruction* MaybePaddedAndSlicedInput( std::max(0LL, -conv_window.dimensions(i).padding_high()); } - input = CreateSliceHlo(input, start_indices, limit_indices, strides) - .ValueOrDie(); + input = + MakeSliceHlo(input, start_indices, limit_indices, strides).ValueOrDie(); } return input; @@ -126,7 +126,7 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window, HloInstruction* padding = computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(element_type)))); - return CreatePadHlo(kernel, padding, padding_config).ValueOrDie(); + return MakePadHlo(kernel, padding, padding_config).ValueOrDie(); } } // namespace @@ -238,7 +238,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(input->shape().element_type())))); HloInstruction* padded_input = - CreatePadHlo(input, padding, input_padding_config).ValueOrDie(); + MakePadHlo(input, padding, input_padding_config).ValueOrDie(); // The shape of the backward_conv CustomCall is a tuple (conv_result, // scratch_buffer). Extract out the shape of conv_result. diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 4585bffa42..fbe71f8d5b 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -23,8 +23,8 @@ namespace xla { using tensorflow::gtl::ArraySlice; using tensorflow::strings::StrCat; -StatusOr CreateBinaryHlo(HloOpcode opcode, HloInstruction* lhs, - HloInstruction* rhs) { +StatusOr MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs, + HloInstruction* rhs) { HloComputation* computation = lhs->parent(); CHECK_EQ(computation, rhs->parent()); TF_ASSIGN_OR_RETURN(Shape binary_op_shape, @@ -33,9 +33,9 @@ StatusOr CreateBinaryHlo(HloOpcode opcode, HloInstruction* lhs, HloInstruction::CreateBinary(binary_op_shape, opcode, lhs, rhs)); } -StatusOr CreatePadHlo(HloInstruction* operand, - HloInstruction* padding_value, - const PaddingConfig& padding_config) { +StatusOr MakePadHlo(HloInstruction* operand, + HloInstruction* padding_value, + const PaddingConfig& padding_config) { HloComputation* computation = operand->parent(); CHECK_EQ(computation, padding_value->parent()); TF_ASSIGN_OR_RETURN( @@ -46,10 +46,10 @@ StatusOr CreatePadHlo(HloInstruction* operand, pad_shape, operand, padding_value, padding_config)); } -StatusOr CreateSliceHlo(HloInstruction* operand, - ArraySlice start_indices, - ArraySlice limit_indices, - ArraySlice strides) { +StatusOr MakeSliceHlo(HloInstruction* operand, + ArraySlice start_indices, + ArraySlice limit_indices, + ArraySlice strides) { HloComputation* computation = operand->parent(); TF_ASSIGN_OR_RETURN(Shape slice_shape, ShapeInference::InferSliceShape( operand->shape(), start_indices, @@ -58,7 +58,7 @@ StatusOr CreateSliceHlo(HloInstruction* operand, slice_shape, operand, start_indices, limit_indices, strides)); } -StatusOr CreateConvolveHlo( +StatusOr MakeConvolveHlo( HloInstruction* lhs, HloInstruction* rhs, const Window& window, const ConvolutionDimensionNumbers& dimension_numbers) { HloComputation* computation = lhs->parent(); @@ -70,8 +70,8 @@ StatusOr CreateConvolveHlo( convolve_shape, lhs, rhs, window, dimension_numbers)); } -StatusOr CreateTransposeHlo(HloInstruction* operand, - ArraySlice dimensions) { +StatusOr MakeTransposeHlo(HloInstruction* operand, + ArraySlice dimensions) { HloComputation* computation = operand->parent(); TF_ASSIGN_OR_RETURN( Shape transpose_shape, @@ -80,23 +80,23 @@ StatusOr CreateTransposeHlo(HloInstruction* operand, HloInstruction::CreateTranspose(transpose_shape, operand, dimensions)); } -StatusOr CreateReshapeHlo(const Shape& result_shape, - HloInstruction* operand) { +StatusOr MakeReshapeHlo(const Shape& result_shape, + HloInstruction* operand) { HloComputation* computation = operand->parent(); return computation->AddInstruction( HloInstruction::CreateReshape(result_shape, operand)); } -StatusOr CreateReshapeHlo( +StatusOr MakeReshapeHlo( ArraySlice result_shape_dim_bounds, HloInstruction* operand) { Shape new_shape = ShapeUtil::MakeShape(operand->shape().element_type(), result_shape_dim_bounds); - return CreateReshapeHlo(new_shape, operand); + return MakeReshapeHlo(new_shape, operand); } -StatusOr CreateDynamicSliceHlo(HloInstruction* operand, - HloInstruction* start_indices, - ArraySlice slice_sizes) { +StatusOr MakeDynamicSliceHlo(HloInstruction* operand, + HloInstruction* start_indices, + ArraySlice slice_sizes) { HloComputation* computation = operand->parent(); CHECK_EQ(computation, start_indices->parent()); TF_ASSIGN_OR_RETURN( @@ -107,7 +107,7 @@ StatusOr CreateDynamicSliceHlo(HloInstruction* operand, dynamic_slice_shape, operand, start_indices, slice_sizes)); } -StatusOr CreateDynamicUpdateSliceHlo( +StatusOr MakeDynamicUpdateSliceHlo( HloInstruction* operand, HloInstruction* update, HloInstruction* start_indices) { HloComputation* computation = operand->parent(); @@ -121,7 +121,7 @@ StatusOr CreateDynamicUpdateSliceHlo( dynamic_update_slice_shape, operand, update, start_indices)); } -StatusOr CreateBroadcastHlo( +StatusOr MakeBroadcastHlo( HloInstruction* operand, ArraySlice broadcast_dimensions, ArraySlice result_shape_bounds) { HloComputation* computation = operand->parent(); @@ -132,8 +132,8 @@ StatusOr CreateBroadcastHlo( broadcast_shape, operand, broadcast_dimensions)); } -StatusOr CreateGetTupleElementHlo(HloInstruction* operand, - int64 index) { +StatusOr MakeGetTupleElementHlo(HloInstruction* operand, + int64 index) { HloComputation* computation = operand->parent(); TF_ASSIGN_OR_RETURN( @@ -143,8 +143,8 @@ StatusOr CreateGetTupleElementHlo(HloInstruction* operand, HloInstruction::CreateGetTupleElement(gte_shape, operand, index)); } -StatusOr CreateConcatHlo(ArraySlice operands, - int64 dimension) { +StatusOr MakeConcatHlo(ArraySlice operands, + int64 dimension) { CHECK_GT(operands.size(), 0); HloComputation* computation = operands[0]->parent(); @@ -181,7 +181,7 @@ StatusOr CollapseFirstNDims(HloInstruction* operand, int64 n) { Shape output_shape = ShapeUtil::MakeShape(operand_shape.element_type(), new_shape_dims); - return CreateReshapeHlo(output_shape, operand); + return MakeReshapeHlo(output_shape, operand); } StatusOr ExpandFirstDimIntoNDims( @@ -198,7 +198,7 @@ StatusOr ExpandFirstDimIntoNDims( std::back_inserter(expanded_shape_dim_bounds)); Shape new_shape = ShapeUtil::MakeShape(operand->shape().element_type(), expanded_shape_dim_bounds); - return CreateReshapeHlo(new_shape, operand); + return MakeReshapeHlo(new_shape, operand); } StatusOr ExpandLastDimIntoNDims( @@ -216,7 +216,7 @@ StatusOr ExpandLastDimIntoNDims( c_copy(expanded_dims, std::back_inserter(expanded_shape_dim_bounds)); Shape new_shape = ShapeUtil::MakeShape(operand->shape().element_type(), expanded_shape_dim_bounds); - return CreateReshapeHlo(new_shape, operand); + return MakeReshapeHlo(new_shape, operand); } StatusOr ElideDegenerateDims(HloInstruction* operand, @@ -241,7 +241,7 @@ StatusOr ElideDegenerateDims(HloInstruction* operand, c_reverse(new_shape_dim_bounds); Shape output_shape = ShapeUtil::MakeShape(input_shape.element_type(), new_shape_dim_bounds); - return CreateReshapeHlo(output_shape, operand); + return MakeReshapeHlo(output_shape, operand); } StatusOr PadVectorWithZeros(HloInstruction* operand, @@ -258,7 +258,7 @@ StatusOr PadVectorWithZeros(HloInstruction* operand, HloInstruction* zero = computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(operand->shape().element_type())))); - return CreatePadHlo(operand, zero, padding_config); + return MakePadHlo(operand, zero, padding_config); } StatusOr BroadcastZeros( @@ -267,8 +267,8 @@ StatusOr BroadcastZeros( HloInstruction* zero = computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(element_type)))); - return CreateBroadcastHlo(zero, /*broadcast_dimensions=*/{}, - /*result_shape_bounds=*/broadcast_dimensions); + return MakeBroadcastHlo(zero, /*broadcast_dimensions=*/{}, + /*result_shape_bounds=*/broadcast_dimensions); } StatusOr> CreateComputationWithSignature( diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h index 2b03a849cf..6032ebab74 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.h +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h @@ -28,73 +28,73 @@ namespace xla { // Creates a binary HLO instruction and adds it to the computation containing // `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation). -StatusOr CreateBinaryHlo(HloOpcode opcode, HloInstruction* lhs, - HloInstruction* rhs); +StatusOr MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs, + HloInstruction* rhs); // Creates a pad HLO instruction and adds it to the computation containing // `operand` and `padding_value` (`operand` and `padding_value` must be in the // same computation). -StatusOr CreatePadHlo(HloInstruction* operand, - HloInstruction* padding_value, - const PaddingConfig& padding_config); +StatusOr MakePadHlo(HloInstruction* operand, + HloInstruction* padding_value, + const PaddingConfig& padding_config); // Creates a slice HLO instruction and adds it to the computation containing // `operand`. -StatusOr CreateSliceHlo( +StatusOr MakeSliceHlo( HloInstruction* operand, tensorflow::gtl::ArraySlice start_indices, tensorflow::gtl::ArraySlice limit_indices, tensorflow::gtl::ArraySlice strides); // Creates a convolution HLO instruction and adds it to the computation // containing `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation). -StatusOr CreateConvolveHlo( +StatusOr MakeConvolveHlo( HloInstruction* lhs, HloInstruction* rhs, const Window& window, const ConvolutionDimensionNumbers& dimension_numbers); // Creates a transpose HLO instruction and adds it to the computation containing // `operand`. -StatusOr CreateTransposeHlo( +StatusOr MakeTransposeHlo( HloInstruction* operand, tensorflow::gtl::ArraySlice dimensions); // Creates a reshape HLO instruction and adds it to the computation containing // `operand`. -StatusOr CreateReshapeHlo(const Shape& result_shape, - HloInstruction* operand); +StatusOr MakeReshapeHlo(const Shape& result_shape, + HloInstruction* operand); -StatusOr CreateReshapeHlo( +StatusOr MakeReshapeHlo( tensorflow::gtl::ArraySlice result_shape_dim_bounds, HloInstruction* operand); // Creates a dynamic-slice HLO instruction and adds it to the computation // containing `operand` and `start_indices` (`operand` and `start_indices` must // be in the same computation). -StatusOr CreateDynamicSliceHlo( +StatusOr MakeDynamicSliceHlo( HloInstruction* operand, HloInstruction* start_indices, tensorflow::gtl::ArraySlice slice_sizes); // Creates a dynamic-update-slice HLO instruction and adds it to the computation // containing `operand`, `update` and `start_indices` (`operand`, `update` and // `start_indices` must be in the same computation). -StatusOr CreateDynamicUpdateSliceHlo( +StatusOr MakeDynamicUpdateSliceHlo( HloInstruction* operand, HloInstruction* update, HloInstruction* start_indices); // Creates a broadcast HLO instruction and adds it to the computation containing // `operand`. -StatusOr CreateBroadcastHlo( +StatusOr MakeBroadcastHlo( HloInstruction* operand, tensorflow::gtl::ArraySlice broadcast_dimensions, tensorflow::gtl::ArraySlice result_shape_bounds); // Creates a GetTupleElement HLO instruction and adds it to the computation // containing `operand`. -StatusOr CreateGetTupleElementHlo(HloInstruction* operand, - int64 index); +StatusOr MakeGetTupleElementHlo(HloInstruction* operand, + int64 index); // Creates a Concatenate HLO instruction and adds it to the computation // containing `operands` (`operands` must be non-empty and every element must be // contained in the same computation). -StatusOr CreateConcatHlo( +StatusOr MakeConcatHlo( tensorflow::gtl::ArraySlice operands, int64 dimension); // ----------------------------------------------------------------------------- diff --git a/tensorflow/compiler/xla/service/while_util.cc b/tensorflow/compiler/xla/service/while_util.cc index 7441a7ad39..8cd5882f32 100644 --- a/tensorflow/compiler/xla/service/while_util.cc +++ b/tensorflow/compiler/xla/service/while_util.cc @@ -155,10 +155,10 @@ MakeCountedLoopConditionComputation(const Shape& loop_state_shape, HloInstruction* param = cond_computation->parameter_instruction(0); TF_ASSIGN_OR_RETURN(HloInstruction * counter, - CreateGetTupleElementHlo(param, 0)); + MakeGetTupleElementHlo(param, 0)); TF_ASSIGN_OR_RETURN( HloInstruction * compare, - CreateBinaryHlo(HloOpcode::kLt, counter, trip_count_constant)); + MakeBinaryHlo(HloOpcode::kLt, counter, trip_count_constant)); cond_computation->set_root_instruction(compare); return std::move(cond_computation); } @@ -175,14 +175,14 @@ static StatusOr> MakeCountedLoopBodyComputation( HloInstruction* param = body_computation->parameter_instruction(0); TF_ASSIGN_OR_RETURN(HloInstruction * indvar, - CreateGetTupleElementHlo(param, 0)); + MakeGetTupleElementHlo(param, 0)); TF_ASSIGN_OR_RETURN(HloInstruction * next_indvar, - CreateBinaryHlo(HloOpcode::kAdd, indvar, one)); + MakeBinaryHlo(HloOpcode::kAdd, indvar, one)); std::vector loop_body_generator_args; for (int64 i = 1, e = loop_state_shape.tuple_shapes_size(); i < e; i++) { TF_ASSIGN_OR_RETURN(HloInstruction * tuple_element, - CreateGetTupleElementHlo(param, i)); + MakeGetTupleElementHlo(param, i)); loop_body_generator_args.push_back(tuple_element); } TF_ASSIGN_OR_RETURN(std::vector next_state, @@ -238,7 +238,7 @@ static Shape MakeLoopStateShape(const WhileUtil::LoopStateTy& init_values) { std::vector result; for (int64 i = 0, e = init_values.size(); i < e; i++) { TF_ASSIGN_OR_RETURN(HloInstruction * user_state, - CreateGetTupleElementHlo(while_instr, i + 1)); + MakeGetTupleElementHlo(while_instr, i + 1)); result.push_back(user_state); } return result; -- GitLab From 72a0b0efbc74a03fb60a547a474c0fdb2115d898 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 15 Mar 2018 15:58:44 -0700 Subject: [PATCH 087/960] Clarifying when is it possible to use a tape while it is still active. PiperOrigin-RevId: 189260773 --- tensorflow/c/eager/tape.h | 2 ++ tensorflow/python/eager/backprop_test.py | 11 +++++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 11 ++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index bdb0815d6b..c7bd3bdafd 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -152,6 +152,8 @@ class GradientTape { gtl::ArraySlice output_gradients, std::vector* result); + bool IsPersistent() const { return persistent_; } + private: TensorTape tensor_tape_; OpTape op_tape_; diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 07a2155d24..5934293dfc 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -195,6 +195,17 @@ class BackpropTest(test.TestCase): g, = backprop.gradients_function(loss, [0])(logits, labels) self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) + def testGradientWithinTapeBlock(self): + v1 = resource_variable_ops.ResourceVariable(1.) + with backprop.GradientTape() as t: + loss = 2 * v1 + with self.assertRaises(RuntimeError): + t.gradient(loss, [v1]) + with backprop.GradientTape(persistent=True) as t: + loss = 2 * v1 + grad = t.gradient(loss, [v1]) + self.assertAllEqual(grad[0], 2.0) + @test_util.assert_no_new_tensors def testSecondGrad(self): diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index fe9785dc66..701f68b8f7 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1323,6 +1323,16 @@ std::vector MakeTensorList(PyObject* tensors) { PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, PyObject* target, PyObject* sources, PyObject* output_gradients, TF_Status* status) { + TFE_Py_Tape* tape_obj = reinterpret_cast(tape); + if (!tape_obj->tape->IsPersistent()) { + auto* tape_set = GetTapeSet(); + if (tape_set->find(tape_obj) != tape_set->end()) { + PyErr_SetString(PyExc_RuntimeError, + "Trying to call tape.gradient on a non-persistent tape " + "while it is still active."); + return nullptr; + } + } PyVSpace c_vspace(vspace); if (!c_vspace.Initialize().ok()) { return nullptr; @@ -1348,7 +1358,6 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, Py_INCREF(tensor); } } - TFE_Py_Tape* tape_obj = reinterpret_cast(tape); std::vector result; status->status = tape_obj->tape->ComputeGradient( c_vspace, target_vec, sources_vec, outgrad_vec, &result); -- GitLab From cc13b530198d2454c1c422a42aad4edf07257e9d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 16:29:14 -0700 Subject: [PATCH 088/960] Add initial host transfer ops for XLA support for computation on the host during a compiled computation. PiperOrigin-RevId: 189265297 --- .../jit/encapsulate_subgraphs_pass.cc | 49 +++--- .../jit/encapsulate_subgraphs_pass_test.cc | 142 ++++++++++-------- tensorflow/compiler/tf2xla/xla_compiler.cc | 12 +- tensorflow/compiler/tf2xla/xla_compiler.h | 8 +- 4 files changed, 124 insertions(+), 87 deletions(-) diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 2d175c40f9..0685036c9d 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -53,6 +53,8 @@ namespace tensorflow { const char* const kXlaCompiledKernelAttr = "_XlaCompiledKernel"; const char* const kXlaNumConstantArgsAttr = "_XlaNumConstantArgs"; const char* const kXlaNumResourceArgsAttr = "_XlaNumResourceArgs"; +const char* const kXlaHostTransferSequencerAttr = + "_xla_host_transfer_sequencer"; namespace { @@ -143,7 +145,7 @@ struct NodeSlot { // everything to use it. static const char* const kArgOp = "_Arg"; static const char* const kRetValOp = "_Retval"; -static const char* const kHostComputeOp = "_XlaHostCompute"; +static const char* const kHostComputeOp = "XlaHostCompute"; static const char* const kSendFromHostOp = "_XlaSendFromHost"; static const char* const kRecvAtHostOp = "_XlaRecvAtHost"; @@ -328,8 +330,8 @@ class Encapsulator { Status MakeSequencingNode(const string& subgraph_name, Graph* graph_out); // If there is a sequencer node, adds a control edge from the sequencer to - // all the downstream nodes of call_node_outputs. - void ConnectSequencerToOutputs(Graph* graph_out); + // the call node. + void ConnectSequencerToCallNode(Graph* graph_out); Status AddShapeInferenceInfo( const string& outside_compilation_subgraph_name, @@ -425,6 +427,10 @@ class Encapsulator { // NodeDef for the function call node. NodeDef call_node_def_; + // Name that is used for the call node. This may not be + // call_node_def_.name() if the client supplies a rewrite lambda. + string function_def_name_; + // Placeholder node simulating the host compute key in the output graph. // Not owned. Node* host_compute_key_placeholder_ = nullptr; @@ -863,25 +869,21 @@ Status Encapsulator::Subgraph::MakeSequencingNode(const string& subgraph_name, NodeDef seq_def; NodeDefBuilder builder(strings::StrCat(subgraph_name, "_sequencer"), "NoOp"); + builder.Attr(kXlaHostTransferSequencerAttr, subgraph_name); + builder.Device(device_); Status s = builder.Finalize(&seq_def); if (!s.ok()) return s; sequencer_ = graph_out->AddNode(seq_def, &s); if (!s.ok()) return s; - sequencer_->set_assigned_device_name(device_); } return Status::OK(); } -void Encapsulator::Subgraph::ConnectSequencerToOutputs(Graph* graph_out) { +void Encapsulator::Subgraph::ConnectSequencerToCallNode(Graph* graph_out) { if (sequencer_ != nullptr) { - std::unordered_set output_dependencies; - for (Node* node : call_node_outputs_->out_nodes()) { - output_dependencies.insert(node); - } - for (Node* node : output_dependencies) { - graph_out->AddControlEdge(sequencer_, node); - } + VLOG(2) << "ConnectSequencerToCallNode"; + graph_out->AddControlEdge(sequencer_, call_node_inputs_); } } @@ -927,6 +929,8 @@ Status Encapsulator::Subgraph::BuildFunctionDef( name = call_node_def_.op(); } + function_def_name_ = name; + FunctionDef fdef; TF_RETURN_IF_ERROR(GraphToFunctionDef(*graph_, name, &fdef)); @@ -982,7 +986,7 @@ Status Encapsulator::Subgraph::AddShapeInferenceInfo( Status Encapsulator::Subgraph::ReplaceFunctionDef( FunctionLibraryDefinition* library) { - const string& name = call_node_def_.name(); + const string& name = function_def_name_; FunctionDef fdef; TF_RETURN_IF_ERROR(GraphToFunctionDef(*graph_, name, &fdef)); @@ -1126,6 +1130,8 @@ Status Encapsulator::Subgraph::AddRecvAtHostNode( NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name, "_", oc_subgraph_name, "_recv"), kRecvAtHostOp); + // TODO(misard) When we add replication the device placement will have to be + // redone. builder.Device(device_); builder.Attr("Toutputs", dtypes); // TODO(misard) For now we only support TPU device 0. @@ -1138,7 +1144,6 @@ Status Encapsulator::Subgraph::AddRecvAtHostNode( oc_subgraph->recv_at_host = graph_out->AddNode(recv_def, &s); if (!s.ok()) return s; - oc_subgraph->recv_at_host->set_assigned_device_name(device_); graph_out->AddEdge(host_compute_key_placeholder_, 0, oc_subgraph->recv_at_host, 0); @@ -1178,6 +1183,8 @@ Status Encapsulator::Subgraph::AddSendFromHostNode( NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name, "_", oc_subgraph_name, "_send"), kSendFromHostOp); + // TODO(misard) When we add replication the device placement will have to be + // redone. builder.Device(device_); builder.Attr("Tinputs", dtypes); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, @@ -1191,7 +1198,6 @@ Status Encapsulator::Subgraph::AddSendFromHostNode( oc_subgraph->send_from_host = graph_out->AddNode(send_def, &s); if (!s.ok()) return s; - oc_subgraph->send_from_host->set_assigned_device_name(device_); graph_out->AddEdge(host_compute_key_placeholder_, 0, oc_subgraph->send_from_host, inputs.size()); @@ -1675,7 +1681,7 @@ Status Encapsulator::AddEdgesToOutputGraph( for (auto& subgraph_entry : subgraphs_) { Subgraph& subgraph = subgraph_entry.second; - subgraph.ConnectSequencerToOutputs(graph_out); + subgraph.ConnectSequencerToCallNode(graph_out); } return Status::OK(); @@ -1827,8 +1833,12 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( (*static_shape_out)[in_edge->dst_input()] = proto; } } else { + has_parent_with_unknown_shape = true; if (!visited[src_node->id()]) { - has_parent_with_unknown_shape = true; + if (VLOG_IS_ON(2)) { + TensorShapeProto proto; + context->ShapeHandleToProto(shape, &proto); + } stack.push_back({src_node, false}); } } @@ -1981,6 +1991,11 @@ Status Encapsulator::GetShapeInfoForOutsideCompilationSends( TF_RETURN_IF_ERROR(MakeGraphForOutsideCompilationSends( *graph_out, &pruned_graph, &shape_refiner, &node_images, library)); + if (VLOG_IS_ON(1)) { + dump_graph::DumpGraphToFile("pruned_graph_for_shape_inference", + *pruned_graph, library); + } + for (auto& subgraph_entry : subgraphs_) { Subgraph& subgraph = subgraph_entry.second; // Find all the recv_at_host nodes in this subgraph. diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index d7bea56a72..711b1424c7 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -29,6 +29,9 @@ limitations under the License. namespace tensorflow { namespace { +const char* const kXlaHostTransferSequencerAttr = + "_xla_host_transfer_sequencer"; + template bool EqualProtoMap(const ::tensorflow::protobuf::Map& a, const ::tensorflow::protobuf::Map& b, @@ -248,7 +251,7 @@ bool EqualFunctionDefLibrary(const FunctionDefLibrary& expected, // These dummy Op registrations are here because the real Op registrations live // in contrib and there can't be a dependence from this test to contrib. -REGISTER_OP("_XlaHostCompute") +REGISTER_OP("XlaHostCompute") .Input("inputs: Tinputs") .Output("outputs: Toutputs") .Attr("Tinputs: list(type) >= 0") @@ -321,8 +324,13 @@ REGISTER_OP("AddNLikeTest") .SetIsCommutative() .SetIsAggregate(); -Node* NoOp(const GraphDefBuilder::Options& opts) { - return ops::SourceOp("NoOp", opts); +Node* Sequencer(const GraphDefBuilder::Options& opts, + const string& call_node_name) { + if (opts.HaveError()) return nullptr; + NodeBuilder node_builder(opts.GetNameForOp("NoOp"), "NoOp", + opts.op_registry()); + return opts.WithAttr(kXlaHostTransferSequencerAttr, call_node_name) + .FinalizeBuilder(&node_builder); } Node* Input(const GraphDefBuilder::Options& opts) { @@ -870,7 +878,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { {}, {"outside_compilation_O1_host_compute"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"C:o:0", "c:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -888,10 +896,6 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { Node* a = Input(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); - NodeBuilder node_builder("F1", "F1", lib_def.get()); - node_builder.Input(a).Input(b); - Node* call = b2.opts().FinalizeBuilder(&node_builder); - Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv = @@ -906,10 +910,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); - Node* s = NoOp( - b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send})); + Node* s = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), + "F1"); + + NodeBuilder node_builder("F1", "F1", lib_def.get()); + node_builder.Input(a).Input(b); + Node* call = + b2.opts().WithControlInputs({s}).FinalizeBuilder(&node_builder); - Binary(a, call, b2.opts().WithName("G").WithControlInputs({s, e})); + Binary(a, call, b2.opts().WithName("G").WithControlInputs({e})); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1014,7 +1024,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {}, {"outside_compilation_O1_host_compute"}}, {{"outside_compilation_O2_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"D:o:0", "F:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1023,7 +1033,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {"shapes", gtl::ArraySlice({})}}, {"F"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"C:o:0", "D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1041,10 +1051,6 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { Node* a = Input(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); - NodeBuilder node_builder("F1", "F1", lib_def.get()); - node_builder.Input(a).Input(b); - Node* call = b2.opts().FinalizeBuilder(&node_builder); - Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv1 = @@ -1070,11 +1076,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", {h}, b2.opts().WithName("outside_compilation_F1_O2_send")); - Node* s = NoOp(b2.opts() - .WithName("F1_sequencer") - .WithControlInputs({recv1, send1, recv2, send2})); + Node* s = Sequencer(b2.opts() + .WithName("F1_sequencer") + .WithControlInputs({recv1, send1, recv2, send2}), + "F1"); - Binary(g, call, b2.opts().WithName("J").WithControlInput(s)); + NodeBuilder node_builder("F1", "F1", lib_def.get()); + node_builder.Input(a).Input(b); + Node* call = b2.opts().WithControlInput(s).FinalizeBuilder(&node_builder); + + Binary(g, call, b2.opts().WithName("J")); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1156,7 +1167,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {}, {"outside_compilation_O1_host_compute"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"C:o:0", "D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1176,7 +1187,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { "BinaryTest", {"f_0_arg", "outside_compilation_O1_host_compute:outputs:0"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"G:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1207,32 +1218,34 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), + "F1"); + NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); - Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); - Node* s1 = NoOp( - b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1})); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); Node* key_constant2 = KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder")); Node* recv2 = RecvAtHost( ops::NodeOut(key_constant2, 0), "host_compute_channel_F2_O1", {DT_FLOAT}, b2.opts().WithName("outside_compilation_F2_O1_recv")); - Node* h = Binary(ops::NodeOut(call1, 1), recv2, - b2.opts().WithName("H").WithControlInput(s1)); + Node* h = Binary(ops::NodeOut(call1, 1), recv2, b2.opts().WithName("H")); Node* send2 = SendFromHost( ops::NodeOut(key_constant2, 0), "host_compute_channel_F2_O1", {h}, b2.opts().WithName("outside_compilation_F2_O1_send")); + Node* s2 = Sequencer( + b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}), + "F2"); NodeBuilder node_builder2("F2", "F2", lib_def.get()); node_builder2.Input(e).Input(call1); Node* call2 = b2.opts() - .WithControlInputs({s1, e, call1}) + .WithControlInputs({s2, e, call1}) .FinalizeBuilder(&node_builder2); - Node* s2 = NoOp( - b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2})); - Binary(call2, ops::NodeOut(call2, 1), - b2.opts().WithName("J").WithControlInput(s2)); + Binary(call2, ops::NodeOut(call2, 1), b2.opts().WithName("J")); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1280,7 +1293,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { "BinaryTest", {"D:o:0", "outside_compilation_O1_host_compute:outputs:0"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {}, {{"Tinputs", gtl::ArraySlice({})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1304,12 +1317,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { Node* send1 = SendFromHost( ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, b2.opts().WithName("outside_compilation_F1_O1_send")); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInput(send1), "F1"); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); - Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); - Node* s1 = NoOp(b2.opts().WithName("F1_sequencer").WithControlInput(send1)); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); - Unary(call1, b2.opts().WithName("G").WithControlInput(s1)); + Unary(call1, b2.opts().WithName("G")); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1358,7 +1373,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { "BinaryTest", {"D:o:0", "outside_compilation_O1_host_compute:outputs:0"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {}, {{"Tinputs", gtl::ArraySlice({})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1386,13 +1401,15 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { Node* send1 = SendFromHost( ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, b2.opts().WithName("outside_compilation_F1_O1_send")); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), + "F1"); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); - Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); - Node* s1 = NoOp( - b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1})); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); - Unary(call1, b2.opts().WithName("G").WithControlInput(s1)); + Unary(call1, b2.opts().WithName("G")); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1434,7 +1451,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}}, {{"F"}, "UnaryTest", {"D:o:0"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({})}, @@ -1457,12 +1474,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Unary(recv1, b2.opts().WithName("E")); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInput(recv1), "F1"); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); - Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); - Node* s1 = NoOp(b2.opts().WithName("F1_sequencer").WithControlInput(recv1)); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); - Binary(e, call1, b2.opts().WithName("G").WithControlInput(s1)); + Binary(e, call1, b2.opts().WithName("G")); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1509,7 +1528,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { {}, {"outside_compilation_O1_host_compute"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"D:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({})}, @@ -1537,13 +1556,15 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); + Node* s1 = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), + "F1"); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); - Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); - Node* s1 = NoOp( - b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1})); + Node* call1 = + b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1); - Binary(e, call1, b2.opts().WithName("G").WithControlInput(s1)); + Binary(e, call1, b2.opts().WithName("G")); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } @@ -1668,7 +1689,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { {}, {"outside_compilation_O1_host_compute"}}, {{"outside_compilation_O1_host_compute"}, - "_XlaHostCompute", + "XlaHostCompute", {"c:o:0"}, {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, @@ -1687,11 +1708,6 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { Node* b = Input(b2.opts().WithName("B")); Node* c = Unary(a, b2.opts().WithName("C")); - NodeBuilder node_builder("F1", "F1", lib_def.get()); - node_builder.Input(b).Input(c); - Node* call = - b2.opts().WithControlInputs({c}).FinalizeBuilder(&node_builder); - Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv = RecvAtHost( @@ -1706,10 +1722,16 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); - Node* s = NoOp( - b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send})); + Node* s = Sequencer( + b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), + "F1"); + + NodeBuilder node_builder("F1", "F1", lib_def.get()); + node_builder.Input(b).Input(c); + Node* call = + b2.opts().WithControlInputs({s, c}).FinalizeBuilder(&node_builder); - Binary(a, call, b2.opts().WithName("G").WithControlInputs({s, e})); + Binary(a, call, b2.opts().WithName("G").WithControlInputs({e})); TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected)); } diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 0dc5118c9c..7cdf4d1b3e 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -718,8 +718,8 @@ Status XlaCompiler::GetChannelHandle(const string& key, namespace { -void SetTransfer(const string& key, const std::vector& types, - const std::vector& shapes, +void SetTransfer(const string& key, gtl::ArraySlice types, + gtl::ArraySlice shapes, tf2xla::HostTransferMetadata* transfer) { transfer->set_key(key); CHECK(types.size() == shapes.size()); @@ -733,8 +733,8 @@ void SetTransfer(const string& key, const std::vector& types, } // namespace Status XlaCompiler::SetDeviceToHostMetadata( - const string& key, const std::vector& types, - const std::vector& shapes) { + const string& key, gtl::ArraySlice types, + gtl::ArraySlice shapes) { if (host_compute_sends_.find(key) != host_compute_sends_.end()) { return errors::InvalidArgument( "Duplicate calls to SetDeviceToHostMetadata with key ", key); @@ -760,8 +760,8 @@ Status XlaCompiler::GetDeviceToHostShapes( } Status XlaCompiler::SetHostToDeviceMetadata( - const string& key, const std::vector& types, - const std::vector& shapes) { + const string& key, gtl::ArraySlice types, + gtl::ArraySlice shapes) { if (host_compute_recvs_.find(key) != host_compute_sends_.end()) { return errors::InvalidArgument( "Duplicate calls to SetHostToDeviceMetadata with key ", key); diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index a70d2637e0..5f1c631976 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -304,8 +304,8 @@ class XlaCompiler { // Sets the shapes and types for the device to host transfer associated with // 'key'. Status SetDeviceToHostMetadata(const string& key, - const std::vector& types, - const std::vector& shapes); + gtl::ArraySlice types, + gtl::ArraySlice shapes); // Gets the shapes the device to host transfer associated with 'key'. Status GetDeviceToHostShapes(const string& key, @@ -314,8 +314,8 @@ class XlaCompiler { // Sets the shapes and types for the host to device transfer associated with // 'key'. Status SetHostToDeviceMetadata(const string& key, - const std::vector& types, - const std::vector& shapes); + gtl::ArraySlice types, + gtl::ArraySlice shapes); const Options& options() const { return options_; } xla::Client* client() const { return options_.client; } -- GitLab From f4719f33c0507e0e216f8370898bf56397654d1e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 15 Mar 2018 16:52:23 -0700 Subject: [PATCH 089/960] Symbolic gradient optimization PiperOrigin-RevId: 189268327 --- tensorflow/core/BUILD | 6 +- tensorflow/core/grappler/optimizers/BUILD | 3 + .../grappler/optimizers/function_optimizer.cc | 128 ++++++++++++++++++ .../grappler/optimizers/function_optimizer.h | 6 +- .../optimizers/function_optimizer_test.cc | 55 +++++++- .../grappler/optimizers/meta_optimizer.cc | 6 +- 6 files changed, 192 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 352e183104..14769c3770 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2065,10 +2065,13 @@ tf_cuda_library( CORE_CPU_BASE_HDRS = GRAPH_HDRS + [ "common_runtime/device.h", + "common_runtime/device_mgr.h", "common_runtime/eval_const_tensor.h", "common_runtime/graph_runner.h", "common_runtime/shape_refiner.h", "framework/versions.h", + "common_runtime/process_function_library_runtime.h", + "common_runtime/function.h", ] tf_cuda_library( @@ -2115,19 +2118,16 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/costmodel_manager.h", "common_runtime/debugger_state_interface.h", "common_runtime/device_factory.h", - "common_runtime/device_mgr.h", "common_runtime/device_set.h", "common_runtime/dma_helper.h", "common_runtime/eigen_thread_pool.h", "common_runtime/executor.h", - "common_runtime/function.h", "common_runtime/graph_optimizer.h", "common_runtime/local_device.h", "common_runtime/memory_types.h", "common_runtime/mkl_cpu_allocator.h", "common_runtime/optimization_registry.h", "common_runtime/pending_counts.h", - "common_runtime/process_function_library_runtime.h", "common_runtime/process_util.h", "common_runtime/profile_handler.h", "common_runtime/renamed_device.h", diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 0df5307d9c..3499879dee 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -141,6 +141,8 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":graph_optimizer", + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", @@ -156,6 +158,7 @@ tf_cc_test( ":function_optimizer", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:functional_ops", "//tensorflow/core:all_kernels", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index b17715e742..d47887bfc8 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -15,10 +15,16 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/function_optimizer.h" #include +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" @@ -128,6 +134,121 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, return Status::OK(); } +class FakeCPUDevice : public Device { + public: + FakeCPUDevice(Env* env, const DeviceAttributes& attr) : Device(env, attr) {} + Status Sync() override { return Status::OK(); } +}; + +Status InlineSymbolicGradient(const NodeDef& node, + const FunctionDefLibrary& library, + GraphDef* inlined_graph) { + Env* env = Env::Default(); + DeviceAttributes attr; + attr.set_name("/device:CPU:0"); + attr.set_device_type("CPU"); + FakeCPUDevice* dev = new FakeCPUDevice(env, attr); + std::vector devices; + devices.push_back(dev); + DeviceMgr dvc_mgr(devices); + FunctionLibraryDefinition function_library(OpRegistry::Global(), library); + + GraphDef graph_def; + + // Create a node to anchor the gradient inputs + NodeDef* inlined_input = graph_def.add_node(); + inlined_input->set_name("FunctionInputs"); + inlined_input->set_op("IdentityN"); + AttrValue::ListValue* type_list = + (*inlined_input->mutable_attr())["T"].mutable_list(); + for (const auto& type : node.attr().at("Tin").list().type()) { + type_list->add_type(static_cast(type)); + } + + // Add the gradient node + NodeDef* inlined = graph_def.add_node(); + *inlined = node; + inlined->clear_input(); + for (int i = 0; i < node.attr().at("Tin").list().type_size(); ++i) { + inlined->add_input(strings::StrCat(inlined_input->name(), ":", i)); + } + + // Create a node to anchor the gradient outputs + NodeDef* inlined_output = graph_def.add_node(); + inlined_output->set_name("FunctionOutputs"); + inlined_output->set_op("IdentityN"); + type_list = (*inlined_output->mutable_attr())["T"].mutable_list(); + for (const auto& type : node.attr().at("Tout").list().type()) { + type_list->add_type(static_cast(type)); + } + for (int i = 0; i < node.attr().at("Tout").list().type_size(); ++i) { + inlined_output->add_input(strings::StrCat(inlined->name(), ":", i)); + } + + // Convert the graphdef to a graph + OptimizerOptions optimizer_opts; + optimizer_opts.set_do_function_inlining(true); + ProcessFunctionLibraryRuntime pflr(&dvc_mgr, env, + graph_def.versions().producer(), + &function_library, optimizer_opts); + FunctionLibraryRuntime* flr = pflr.GetFLR(dev->name()); + CHECK(flr); + GraphConstructorOptions graph_ctor_opts; + graph_ctor_opts.allow_internal_ops = true; + graph_ctor_opts.expect_device_spec = false; + Graph graph(function_library); + TF_RETURN_IF_ERROR( + ConvertGraphDefToGraph(graph_ctor_opts, graph_def, &graph)); + + // Recursively inline the functions until there is nothing more to inline. We + // should at least expand one function. + int counter = 0; + while (counter < 50 && ExpandInlineFunctions(flr, &graph)) { + ++counter; + } + if (counter == 0) { + // Nothing was inlined + return errors::InvalidArgument( + strings::StrCat("Failed to inline node ", node.name())); + } + + GraphDef inlined_graph_def; + graph.ToGraphDef(&inlined_graph_def); + + // Add the default values of attributes to the nodes that have been inlined. + TF_RETURN_IF_ERROR(AddDefaultAttrsToGraphDef(&inlined_graph_def, + *graph.op_registry(), 0, true)); + + // Add the inlined nodes to the graph + for (NodeDef& inlined_node : *inlined_graph_def.mutable_node()) { + if (inlined_node.name() == "FunctionOutputs") { + inlined_node.set_name(node.name()); + for (int i = 0; i < inlined_node.input_size(); ++i) { + inlined_node.set_input( + i, strings::StrCat(node.name(), "/", inlined_node.input(i))); + } + } else if (inlined_node.name() == "FunctionInputs") { + inlined_node.set_name( + strings::StrCat(node.name(), "/", inlined_node.name())); + inlined_node.clear_input(); + for (int i = 0; i < node.input_size(); ++i) { + inlined_node.add_input(node.input(i)); + } + } else { + inlined_node.set_name( + strings::StrCat(node.name(), "/", inlined_node.name())); + for (int i = 0; i < inlined_node.input_size(); ++i) { + inlined_node.set_input( + i, strings::StrCat(node.name(), "/", inlined_node.input(i))); + } + } + inlined_node.set_device(node.device()); + inlined_graph->add_node()->Swap(&inlined_node); + } + + return Status::OK(); +} + Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { std::unordered_map functions; @@ -159,6 +280,13 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // Inline functions when possible. for (const NodeDef& node : item.graph.node()) { + if (opt_level_ == RewriterConfig::AGGRESSIVE) { + if (node.op() == "SymbolicGradient") { + TF_RETURN_IF_ERROR(InlineSymbolicGradient(node, item.graph.library(), + optimized_graph)); + continue; + } + } auto it = functions.find(node.op()); if (it == functions.end()) { *optimized_graph->add_node() = node; diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.h b/tensorflow/core/grappler/optimizers/function_optimizer.h index 5c80226e9d..b124efe01d 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.h +++ b/tensorflow/core/grappler/optimizers/function_optimizer.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { @@ -25,7 +26,7 @@ namespace grappler { // operations to make the overall graph more efficient. class FunctionOptimizer : public GraphOptimizer { public: - FunctionOptimizer() {} + FunctionOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} ~FunctionOptimizer() override {} string name() const override { return "function_optimizer"; }; @@ -35,6 +36,9 @@ class FunctionOptimizer : public GraphOptimizer { void Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& optimized_graph, double result) override; + + private: + RewriterConfig::Toggle opt_level_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index bafcdf4923..b1c55d838d 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/function_optimizer.h" +#include "tensorflow/cc/ops/functional_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -40,7 +42,7 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { test::function::XTimesTwo(), }); - FunctionOptimizer optimizer; + FunctionOptimizer optimizer(RewriterConfig::DEFAULT); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -133,7 +135,7 @@ TEST_F(FunctionOptimizerTest, FixedTypeFunction) { x_times_two, }); - FunctionOptimizer optimizer; + FunctionOptimizer optimizer(RewriterConfig::DEFAULT); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -219,7 +221,7 @@ TEST_F(FunctionOptimizerTest, FunctionWithOutputMapping) { func, }); - FunctionOptimizer optimizer; + FunctionOptimizer optimizer(RewriterConfig::DEFAULT); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -317,7 +319,7 @@ TEST_F(FunctionOptimizerTest, FunctionWithInputForwarding) { func, }); - FunctionOptimizer optimizer; + FunctionOptimizer optimizer(RewriterConfig::DEFAULT); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -368,7 +370,7 @@ TEST_F(FunctionOptimizerTest, FunctionWithoutInput) { func, }); - FunctionOptimizer optimizer; + FunctionOptimizer optimizer(RewriterConfig::DEFAULT); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -377,6 +379,49 @@ TEST_F(FunctionOptimizerTest, FunctionWithoutInput) { EXPECT_EQ(item.graph.DebugString(), output.DebugString()); } +TEST_F(FunctionOptimizerTest, SymbolicGradients) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + // auto T = DT_FLOAT; + FunctionDef func = FunctionDefHelper::Define( + "TestFunc", {"x:float", "y:float"}, {"l:float"}, {}, + { + {{"z"}, "Add", {"x", "y"}, {{"T", DT_FLOAT}}}, + FunctionDefHelper::Const("zero", 0), + FunctionDefHelper::Const("one", 1), + {{"r"}, "Rank", {"z"}, {{"T", DT_FLOAT}}}, + {{"indices"}, "Range", {"zero", "r", "one"}}, + {{"l"}, "Sum", {"z", "indices"}, {{"T", DT_FLOAT}}}, + }); + + auto dummy_variable = ops::Variable(scope, {2, 2}, DT_FLOAT); + auto x = ops::Const(scope, 1.0f); + auto y = ops::Const(scope, 2.0f); + auto dl = ops::Const(scope, 3.0f); + + NameAttrList fn; + fn.set_name("TestFunc"); + (*fn.mutable_attr())["T"].set_type(DT_FLOAT); + auto g0 = ops::SymbolicGradient(scope, std::initializer_list{x, y, dl}, + {DT_FLOAT, DT_FLOAT}, fn); + auto out1 = ops::Identity(scope.WithOpName("out1"), g0.output[0]); + auto out2 = ops::Identity(scope.WithOpName("out2"), g0.output[1]); + + GrapplerItem item; + TF_EXPECT_OK(scope.ToGraphDef(&item.graph)); + *item.graph.mutable_library()->add_function() = func; + + FunctionOptimizer optimizer(RewriterConfig::AGGRESSIVE); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::vector expected = EvaluateNodes(item.graph, {"out1", "out2"}); + std::vector optimized = EvaluateNodes(output, {"out1", "out2"}); + test::ExpectTensorEqual(expected[0], optimized[0]); + test::ExpectTensorEqual(expected[1], optimized[1]); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 3a764937fd..7b2e7a1fe0 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -58,7 +58,7 @@ std::unique_ptr MetaOptimizer::NewOptimizer( graph_optimizer.reset(new ModelPruner()); } if (optimizer == "function") { - graph_optimizer.reset(new FunctionOptimizer()); + graph_optimizer.reset(new FunctionOptimizer(cfg_.function_optimization())); } if (optimizer == "constfold") { graph_optimizer.reset(new ConstantFolding(cpu_device_)); @@ -95,8 +95,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr(new ModelPruner())); } if (cfg_.function_optimization() != RewriterConfig::OFF) { - optimizers.push_back( - std::unique_ptr(new FunctionOptimizer())); + optimizers.push_back(std::unique_ptr( + new FunctionOptimizer(cfg_.function_optimization()))); } if (cfg_.constant_folding() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( -- GitLab From 2dd9a59d292c1226d03a1fa5bd551e5ccc5e6f9c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 17:46:39 -0700 Subject: [PATCH 090/960] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 189274859 --- tensorflow/go/op/wrappers.go | 190 +++++++++++++++++------------------ 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 0424c12fd9..469d1e9adb 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -509,101 +509,6 @@ func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQua return op.Output(0) } -// Scatter `updates` into a new (initially zero) tensor according to `indices`. -// -// Creates a new tensor by applying sparse `updates` to individual -// values or slices within a zero tensor of the given `shape` according to -// indices. This operator is the inverse of the @{tf.gather_nd} operator which -// extracts values or slices from a given tensor. -// -// **WARNING**: The order in which updates are applied is nondeterministic, so the -// output will be nondeterministic if `indices` contains duplicates. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of scatter is to insert individual elements in a tensor by -// index. For example, say we want to insert 4 scattered elements in a rank-1 -// tensor with 8 elements. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// shape = tf.constant([8]) -// scatter = tf.scatter_nd(indices, updates, shape) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [0, 11, 0, 10, 9, 0, 0, 12] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// shape = tf.constant([4, 4, 4]) -// scatter = tf.scatter_nd(indices, updates, shape) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], -// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] -// -// Arguments: -// indices: Index tensor. -// updates: Updates to scatter into output. -// shape: 1-D. The shape of the resulting tensor. -// -// Returns A new tensor with the given shape and updates applied according -// to the indices. -func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScatterNd", - Input: []tf.Input{ - indices, updates, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Replaces the contents of the table with the specified keys and values. // // The tensor `keys` must be of the same type as the keys of the table. @@ -25067,6 +24972,101 @@ func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Scatter `updates` into a new (initially zero) tensor according to `indices`. +// +// Creates a new tensor by applying sparse `updates` to individual +// values or slices within a zero tensor of the given `shape` according to +// indices. This operator is the inverse of the @{tf.gather_nd} operator which +// extracts values or slices from a given tensor. +// +// **WARNING**: The order in which updates are applied is nondeterministic, so the +// output will be nondeterministic if `indices` contains duplicates. +// +// `indices` is an integer tensor containing indices into a new tensor of shape +// `shape`. The last dimension of `indices` can be at most the rank of `shape`: +// +// indices.shape[-1] <= shape.rank +// +// The last dimension of `indices` corresponds to indices into elements +// (if `indices.shape[-1] = shape.rank`) or slices +// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of +// `shape`. `updates` is a tensor with shape +// +// indices.shape[:-1] + shape[indices.shape[-1]:] +// +// The simplest form of scatter is to insert individual elements in a tensor by +// index. For example, say we want to insert 4 scattered elements in a rank-1 +// tensor with 8 elements. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// shape = tf.constant([8]) +// scatter = tf.scatter_nd(indices, updates, shape) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` +// +// The resulting tensor would look like this: +// +// [0, 11, 0, 10, 9, 0, 0, 12] +// +// We can also, insert entire slices of a higher rank tensor all at once. For +// example, if we wanted to insert two slices in the first dimension of a +// rank-3 tensor with two matrices of new values. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[0], [2]]) +// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]], +// [[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]]]) +// shape = tf.constant([4, 4, 4]) +// scatter = tf.scatter_nd(indices, updates, shape) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` +// +// The resulting tensor would look like this: +// +// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], +// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] +// +// Arguments: +// indices: Index tensor. +// updates: Updates to scatter into output. +// shape: 1-D. The shape of the resulting tensor. +// +// Returns A new tensor with the given shape and updates applied according +// to the indices. +func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ScatterNd", + Input: []tf.Input{ + indices, updates, shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // SpaceToDepthAttr is an optional argument to SpaceToDepth. type SpaceToDepthAttr func(optionalAttr) -- GitLab From 6b9eebb0c024ee3500fef5ec8019c7b5ee4cb6c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 18:22:09 -0700 Subject: [PATCH 091/960] Internal cleanup. PiperOrigin-RevId: 189278596 --- tensorflow/c/eager/c_api.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 0811bd363f..455bc19be8 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -1213,8 +1213,11 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, // Note that `h` may not be currently ready. However execution order will // make sure that `h` is ready before the copy is actually done. CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); + TFE_TensorHandle* output = node->dst(); + // Note that calling Add makes `node` accessible by the TFE_Executor thread. + // So further accesses need to be thread-safe. ctx->executor.Add(node); - return node->dst(); + return output; } else { TFE_TensorHandle* output = nullptr; status->status = TensorHandleCopyToDevice(h, ctx, dstd, &output); -- GitLab From 4c1081cfddbba33dbd12a28635265bc718697e23 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 15 Mar 2018 21:33:44 -0700 Subject: [PATCH 092/960] Implement out of bounds behavior for gather in the HLO evaluator This makes the OOB behavior of gather in the HLO evaluator consistent with DynamicSlice while we figure out the semantics we want long term. The HLO->HLO gather expander inherits the wrapping behavior of dynamic-slice because it lowers the gather ops to loops of dynamic slices. PiperOrigin-RevId: 189293175 --- .../compiler/xla/service/hlo_evaluator.cc | 11 ++- .../xla/tests/gather_operation_test.cc | 80 ++++++++++++++++++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 91341b5d35..693004d364 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2771,6 +2771,8 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) { gather->gather_dimension_numbers(), /*input_shape=*/operand.shape(), /*output_shape=*/shape); + const Shape& operand_shape = operand.shape(); + auto gather_inner_loop_body = [&](ArraySlice output_window_index, ArraySlice input_gather_index, @@ -2780,9 +2782,16 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) { output_window_index_to_input_index(output_window_index)); for (int i = 0, e = output_index.size(); i < e; i++) { output_index[i] = output_gather_index[i] + output_window_index[i]; + DCHECK_LT(output_index[i], shape.dimensions(i)); } for (int i = 0, e = input_index.size(); i < e; i++) { - input_index[i] = input_gather_index[i] + input_window_index[i]; + // TODO(b/74360564): We should implement whatever out of bounds behavior + // we decide for dynamic-slice here as well. + input_index[i] = (input_gather_index[i] + input_window_index[i]) % + operand_shape.dimensions(i); + if (input_index[i] < 0) { + input_index[i] += operand_shape.dimensions(i); + } } TF_RETURN_IF_ERROR( result->CopyElementFrom(operand, input_index, output_index)); diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc index 4e2f19ade1..0830e9c8f0 100644 --- a/tensorflow/compiler/xla/tests/gather_operation_test.cc +++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc @@ -31,12 +31,16 @@ class GatherOperationTest : public HloTestBase { protected: void RunTest(const string& hlo_text, Literal* operand, Literal* gather_indices) { + RunTest(hlo_text, {operand, gather_indices}); + } + + void RunTest(const string& hlo_text, + tensorflow::gtl::ArraySlice args) { HloModuleConfig config; config.set_debug_options(GetDebugOptionsForTest()); TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, tools::Parse(hlo_text, config)); - EXPECT_TRUE( - RunAndCompare(std::move(module), {operand, gather_indices}, nullopt)); + EXPECT_TRUE(RunAndCompare(std::move(module), args, nullopt)); } }; @@ -259,5 +263,77 @@ ENTRY main { RunTest(hlo_text, operand.get(), gather_indices.get()); } +XLA_TEST_F(GatherOperationTest, OutOfBoundsIndex) { + // Out of bounds indices must not crash, and the indices in range should + // produce the same values across all backends. + // + // TODO(b/74360564): Once we have a well defined semantics for OOB accesses, + // we should get rid of the mask and check that backends produce the same + // value for OOB indices too. + + const string hlo_text = R"( +HloModule BatchDynamicSlice + +ENTRY main { + operand = s32[3,3]{1,0} parameter(0) + indices = s32[6,2]{1,0} parameter(1) + gather = s32[6,1,1]{2,1,0} gather(operand, indices), + output_window_dims={1,2}, + elided_window_dims={}, + gather_dims_to_operand_dims={0,1}, + index_vector_dim=1, + window_bounds={1,1} + gather_reshaped = s32[6]{0} reshape(gather) + in_bounds_mask = s32[6]{0} parameter(2) + ROOT result = s32[6]{0} multiply(gather_reshaped, in_bounds_mask) +} +)"; + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = Literal::CreateR2( + {{2, 7}, {2, 1}, {1, 1}, {5, 1}, {2147483647, 1}, {1, 2}}); + std::unique_ptr in_bounds_mask = + Literal::CreateR1({0, 1, 1, 0, 0, 1}); + + RunTest(hlo_text, + {operand.get(), gather_indices.get(), in_bounds_mask.get()}); +} + +XLA_TEST_F(GatherOperationTest, NegativeIndex) { + // Negative indices must not crash, and the indices in range should produce + // the same values across all backends. + // + // TODO(b/74360564): Once we have a well defined semantics for negative + // accesses, we should get rid of the mask and check that backends produce the + // same value for negative indices too. + + const string hlo_text = R"( +HloModule BatchDynamicSlice + +ENTRY main { + operand = s32[3,3]{1,0} parameter(0) + indices = s32[6,2]{1,0} parameter(1) + gather = s32[6,1,1]{2,1,0} gather(operand, indices), + output_window_dims={1,2}, + elided_window_dims={}, + gather_dims_to_operand_dims={0,1}, + index_vector_dim=1, + window_bounds={1,1} + gather_reshaped = s32[6]{0} reshape(gather) + in_bounds_mask = s32[6]{0} parameter(2) + ROOT result = s32[6]{0} multiply(gather_reshaped, in_bounds_mask) +} +)"; + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = Literal::CreateR2( + {{2, -1}, {2, 1}, {1, 1}, {-500, 1}, {-2147483648, 1}, {1, 2}}); + std::unique_ptr in_bounds_mask = + Literal::CreateR1({0, 1, 1, 0, 0, 1}); + + RunTest(hlo_text, + {operand.get(), gather_indices.get(), in_bounds_mask.get()}); +} + } // namespace } // namespace xla -- GitLab From 048a149cfec52db38c457fc7937eee0f4cc50888 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 15 Mar 2018 21:39:15 -0700 Subject: [PATCH 093/960] Automated g4 rollback of changelist 189234789 PiperOrigin-RevId: 189293458 --- .../core/ops/compat/ops_history.v1.pbtxt | 31 ------------------- tensorflow/core/ops/ops.pbtxt | 31 ------------------- 2 files changed, 62 deletions(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 1834cc998c..85dd1a423a 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -51522,37 +51522,6 @@ op { } } } -op { - name: "SlideDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "window_size" - type: DT_INT64 - } - input_arg { - name: "stride" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "Snapshot" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 763b104305..3faa4eeada 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -24319,37 +24319,6 @@ op { } } } -op { - name: "SlideDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "window_size" - type: DT_INT64 - } - input_arg { - name: "stride" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} op { name: "Snapshot" input_arg { -- GitLab From 6485bb7029c6d856c7ffa744168a8864ef6c986c Mon Sep 17 00:00:00 2001 From: Tatiana Shpeisman Date: Thu, 15 Mar 2018 22:04:33 -0700 Subject: [PATCH 094/960] MKL DNN: fix the TF1.6 speed issue by fixing MKL DNN LRN taking the optimum path (#17605) (#17751) * MKL DNN: fix the TF1.6 speed issue by fixing MKL DNN LRN * fixed typos in the doc for LrnRewrite --- tensorflow/core/graph/mkl_layout_pass.cc | 26 ++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 02038c5d77..568fc87e65 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2492,10 +2492,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { mkl_op_registry::GetMklOpName(csinfo_.identity), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn), - CopyAttrsLRN, AlwaysRewrite}); + CopyAttrsLRN, LrnRewrite}); rinfo_.push_back({csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), - CopyAttrsLRN, AlwaysRewrite}); + CopyAttrsLRN, LrnRewrite}); rinfo_.push_back({csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool), CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); @@ -2865,6 +2865,28 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // path is taken, i.e., eigen node is rewritten by MKl DNN node. + static bool LrnRewrite(const Node* n) { + CHECK_NOTNULL(n); + + int depth_radius; + CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); + + // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN + // and use eigen node instead + if (depth_radius == 2) { + return true; + } + VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" + << "case is not optimized by Intel MKL, thus using Eigen op" + << "for LRN " ; + + return false; + } + static bool AddNRewrite(const Node* n) { CHECK_NOTNULL(n); -- GitLab From 17024c0afd19e3713ab5016602372c5244e11183 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 22:28:26 -0700 Subject: [PATCH 095/960] Propagate min-max when resolving constant Reshape op. PiperOrigin-RevId: 189296593 --- .../resolve_constant_unary.cc | 58 ++++++++++++++----- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index 6d5636d744..d4db6f1c00 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -28,24 +28,45 @@ limitations under the License. namespace toco { +bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) { + auto& output_array = model->GetArray(op.outputs[0]); + if (output_array.minmax) { + return false; + } + const auto& input_array = model->GetArray(op.inputs[0]); + if (!input_array.minmax) { + return false; + } + const auto& input_minmax = input_array.GetMinMax(); + CHECK(!output_array.minmax); + auto& output_minmax = output_array.GetOrCreateMinMax(); + output_minmax.min = input_minmax.min; + output_minmax.max = input_minmax.max; + return true; +} + bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { const auto unary_it = model->operators.begin() + op_index; const auto* unary_op = unary_it->get(); - // Test for unary ops of types that we know how to resolve - if (unary_op->type != OperatorType::kCast && - unary_op->type != OperatorType::kNeg && - unary_op->type != OperatorType::kTensorFlowRsqrt && - unary_op->type != OperatorType::kTensorFlowSqrt && - unary_op->type != OperatorType::kTensorFlowSquare && - unary_op->type != OperatorType::kTensorFlowSum && - unary_op->type != OperatorType::kTensorFlowMin && - unary_op->type != OperatorType::kTensorFlowMax && - unary_op->type != OperatorType::kTensorFlowReshape && - unary_op->type != OperatorType::kRelu6 && - unary_op->type != OperatorType::kRelu1 && - unary_op->type != OperatorType::kRelu) { - return false; + // Test for unary ops of types that we know how to resolve. + switch (unary_op->type) { + case OperatorType::kCast: + case OperatorType::kNeg: + case OperatorType::kTensorFlowRsqrt: + case OperatorType::kTensorFlowSqrt: + case OperatorType::kTensorFlowSquare: + case OperatorType::kTensorFlowSum: + case OperatorType::kTensorFlowMin: + case OperatorType::kTensorFlowMax: + case OperatorType::kTensorFlowReshape: + case OperatorType::kRelu6: + case OperatorType::kRelu1: + case OperatorType::kRelu: + break; + default: + return false; } + // Check if the input is a constant parameter. if (!IsConstantParameterArray(*model, unary_op->inputs[0])) { return false; @@ -79,6 +100,12 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { return false; } + // The min-max is only copied for ops that copy data without arithmetic. + // In future trivial transpose, etc, can be handled here. + if (unary_op->type == OperatorType::kTensorFlowReshape) { + CopyMinMaxFromFirstInput(*unary_op, model); + } + const auto& input_array = model->GetArray(unary_op->inputs[0]); // We have already tested above for existence of buffers (synonymous to being // a constant param). @@ -138,8 +165,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { } } else if (unary_op->type == OperatorType::kTensorFlowReshape) { CHECK(input_buffer_size == output_buffer_size); - memcpy(output_float_data.data(), (*input_float_data).data(), - output_buffer_size * sizeof(output_float_data[0])); + output_float_data = *input_float_data; } else if (unary_op->type == OperatorType::kTensorFlowSum) { CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs"; if (!IsConstantParameterArray(*model, unary_op->inputs[1])) { -- GitLab From 47407ccb99a61fd5115130020ff8ef5ef9272433 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Fri, 16 Mar 2018 14:49:16 +0800 Subject: [PATCH 096/960] Fix minor typo in saved_model.md (#17513) * Fix minor typo in saved_model.md * Fix broken link and revert wrong typo fix --- tensorflow/docs_src/programmers_guide/saved_model.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index d01d187e86..55ee42dd64 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -3,7 +3,7 @@ The @{tf.train.Saver} class provides methods to save and restore models. The @{tf.saved_model.simple_save} function is an easy way to build a @{tf.saved_model$saved model} suitable for serving. -[Estimators](/programmers_guide/estimators) automatically save and restore +[Estimators](@{$programmers_guide/estimators}) automatically save and restore variables in the `model_dir`. ## Save and restore variables @@ -400,7 +400,7 @@ defined in: After training an `Estimator` model, you may want to create a service from that model that takes requests and returns a result. You can run such a -service locally on your machine or deploy it scalably in the cloud. +service locally on your machine or deploy it in the cloud. To prepare a trained Estimator for serving, you must export it in the standard SavedModel format. This section explains how to: -- GitLab From bd1dac4cba7831b327d0c9410e494ee7cf2b93a5 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Thu, 15 Mar 2018 23:56:10 -0700 Subject: [PATCH 097/960] Don't put quantization variables in EMA collection by default. PiperOrigin-RevId: 189302082 --- tensorflow/contrib/quantize/python/quantize.py | 4 ++-- tensorflow/contrib/quantize/python/quantize_graph.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 0608ab9302..6cc097b20e 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -44,7 +44,7 @@ def Quantize(graph, activation_bits=8, ema_decay=0.999, quant_delay=None, - vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES): + vars_collection=ops.GraphKeys.GLOBAL_VARIABLES): """Updates graph with quantization operations. Args: @@ -262,7 +262,7 @@ def _InsertQuantOp(context, bits=8, ema_decay=0.999, quant_delay=None, - vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, + vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, narrow_range=False): """Inserts a quant op between a producer op and (multiple) consumer ops. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 5a3a74cec4..be4fc39651 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -72,6 +72,8 @@ def _create_graph(input_graph=None, def create_training_graph(input_graph=None, quant_delay=0): """Rewrites a training input_graph in place for simulated quantization. + Variables added by the rewrite get added to the global variables collection. + The graph has fake quantization ops inserted to simulate the error introduced by quantization. Since the graph is transformed in place, the expected behavior of previously held references to nodes and tensors may @@ -118,6 +120,8 @@ def create_training_graph(input_graph=None, quant_delay=0): def create_eval_graph(input_graph=None): """Rewrites an eval input_graph in place for simulated quantization. + Variables added by the rewrite get added to the global variables collection. + The graph has fake quantization ops inserted to simulate the error introduced by quantization. Since the graph is transformed in place, the expected behavior of previously held references to nodes and tensors may @@ -141,6 +145,8 @@ def experimental_create_training_graph(input_graph=None, freeze_bn_delay=int(2e5)): """Rewrites a training input_graph in place for simulated quantization. + Variables added by the rewrite get added to the global variables collection. + This function has additional experimental options not (yet) available to create_training_graph. The resulting behavior may be undefined. @@ -188,6 +194,8 @@ def experimental_create_eval_graph(input_graph=None, activation_bits=8): """Rewrites an eval input_graph in place for simulated quantization. + Variables added by the rewrite get added to the global variables collection. + This function has additional experimental options not (yet) available to create_eval_graph. The resulting behavior may be undefined. -- GitLab From 9c4a5b962fb84661ebbf128d83345157e9e3d224 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 03:45:32 -0700 Subject: [PATCH 098/960] [tf2xla] Introduce XlaTensorInfo XlaTensorInfo is side-band data for Tensors. It can be used to store information about Tensors that is not possible to store in the Tensor itself. The XlaTensorInfos are managed by XlaTensorInfoManager, which is an Allocator, which allows it to release the TensorInfos when the underlying Tensor is released. Looking up an XlaTensorInfo for a Tensor requires a hash table lookup. This implementation keeps this off the fast path and only looks the tensorinfos up when they are required. PiperOrigin-RevId: 189319553 --- tensorflow/compiler/jit/BUILD | 17 ++ .../compiler/jit/kernels/xla_launch_op.cc | 17 +- tensorflow/compiler/jit/xla_cpu_device.cc | 7 +- tensorflow/compiler/jit/xla_device.cc | 38 ++-- tensorflow/compiler/jit/xla_device.h | 23 ++- tensorflow/compiler/jit/xla_device_context.cc | 53 ++++-- tensorflow/compiler/jit/xla_device_context.h | 13 +- tensorflow/compiler/jit/xla_gpu_device.cc | 7 +- tensorflow/compiler/jit/xla_launch_util.cc | 163 ++++++++++-------- tensorflow/compiler/jit/xla_launch_util.h | 67 +++++-- tensorflow/compiler/jit/xla_tensor_info.cc | 56 ++++++ tensorflow/compiler/jit/xla_tensor_info.h | 85 +++++++++ tensorflow/core/framework/tensor.h | 2 + 13 files changed, 414 insertions(+), 134 deletions(-) create mode 100644 tensorflow/compiler/jit/xla_tensor_info.cc create mode 100644 tensorflow/compiler/jit/xla_tensor_info.h diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index c4a2d4ab03..39eb390f38 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -118,6 +118,21 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "xla_tensor_info", + srcs = ["xla_tensor_info.cc"], + hdrs = ["xla_tensor_info.h"], + deps = [ + ":common", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + cc_library( name = "xla_device", srcs = [ @@ -136,6 +151,7 @@ cc_library( ":common", ":jit_compilation_passes", ":xla_launch_util", + ":xla_tensor_info", "//tensorflow/compiler/jit/ops:xla_ops", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:dump_graph", @@ -182,6 +198,7 @@ cc_library( deps = [ ":common", ":xla_compilation_cache", + ":xla_tensor_info", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index cd7f8dd779..e24a9a0751 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -114,10 +114,16 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { // this is more obviously correct.) core::ScopedUnref cache_ref(cache); + const XlaDevice::Metadata* metadata; + Status s = XlaDevice::GetMetadata(ctx, &metadata); + + XlaTensorInfoManager* tensor_info_manager = nullptr; + if (s.ok()) { + tensor_info_manager = &metadata->tensor_info_manager(); + } + // Get the platform_id_ for XLA_* devices. if (platform_id_ == nullptr) { - const XlaDevice::Metadata* metadata; - Status s = XlaDevice::GetMetadata(ctx, &metadata); if (s.ok()) { platform_id_ = metadata->platform()->id(); } @@ -148,8 +154,8 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Executing XLA Computation..."; - XlaComputationLaunchContext launch_context(num_resource_args_, client, - &xla_allocator); + XlaComputationLaunchContext launch_context( + num_resource_args_, client, &xla_allocator, tensor_info_manager); launch_context.PopulateInputs(ctx, kernel, variables); // Execute the computation. @@ -166,8 +172,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time: " << elapsed << "us"; - launch_context.PopulateOutputs(ctx, kernel, - run_result.ConsumeValueOrDie()->release()); + launch_context.PopulateOutputs(ctx, kernel, run_result.ConsumeValueOrDie()); VLOG(1) << "Done"; } diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc index e238252751..db3bf3ea33 100644 --- a/tensorflow/compiler/jit/xla_cpu_device.cc +++ b/tensorflow/compiler/jit/xla_cpu_device.cc @@ -39,9 +39,10 @@ Status XlaCpuDeviceFactory::CreateDevices(const SessionOptions& options, (void)registrations; std::unique_ptr device; - TF_RETURN_IF_ERROR(XlaDevice::Create( - "Host", DEVICE_XLA_CPU, 0, DEVICE_CPU_XLA_JIT, options, name_prefix, - /*register_device_for_compilation=*/true, &device)); + TF_RETURN_IF_ERROR(XlaDevice::Create("Host", DEVICE_XLA_CPU, 0, + DEVICE_CPU_XLA_JIT, options, name_prefix, + /*register_device_for_compilation=*/true, + /*transfer_as_literal=*/false, &device)); devices->push_back(device.release()); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index d4d8fe1c1d..e4e11d4ce2 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -109,7 +109,7 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, const string& name_prefix, bool register_device_for_compilation, - std::unique_ptr* device) { + bool transfer_as_literal, std::unique_ptr* device) { VLOG(1) << "XlaDevice::Create " << platform_name << " " << device_name << ":" << device_ordinal; @@ -137,15 +137,17 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( device->reset(new XlaDevice(options, attrs, device_ordinal, DeviceType(jit_device_name), - platform.ValueOrDie())); + platform.ValueOrDie(), transfer_as_literal)); return Status::OK(); } -XlaDevice::Metadata::Metadata(int device_ordinal, se::Platform* platform, - const DeviceType& device_type) +XlaDevice::Metadata::Metadata( + int device_ordinal, se::Platform* platform, const DeviceType& device_type, + std::unique_ptr* tensor_info_manager) : device_ordinal_(device_ordinal), device_type_(device_type), - platform_(platform) {} + platform_(platform), + tensor_info_manager_(*tensor_info_manager) {} int XlaDevice::Metadata::device_ordinal() const { return device_ordinal_; } @@ -160,6 +162,10 @@ const DeviceType& XlaDevice::Metadata::jit_device_type() const { return device_type_; } +XlaTensorInfoManager& XlaDevice::Metadata::tensor_info_manager() const { + return *tensor_info_manager_; +} + /* static */ Status XlaDevice::GetMetadata(OpKernelContext* ctx, const Metadata** metadata) { XlaDevice* xla_device = @@ -177,13 +183,19 @@ const DeviceType& XlaDevice::Metadata::jit_device_type() const { XlaDevice::XlaDevice(const SessionOptions& options, const DeviceAttributes& attrs, int device_ordinal, - const DeviceType& jit_device_name, se::Platform* platform) + const DeviceType& jit_device_name, se::Platform* platform, + bool transfer_as_literal) : LocalDevice(options, attrs), - xla_metadata_(device_ordinal, platform, jit_device_name), + xla_metadata_( + device_ordinal, platform, jit_device_name, + // Pass tensor_info_manager_ by reference as it is initialized lazily. + &tensor_info_manager_), device_ordinal_(device_ordinal), jit_device_name_(jit_device_name), xla_allocator_(nullptr), - platform_(platform) {} + platform_(platform), + tensor_info_manager_(nullptr), + transfer_as_literal_(transfer_as_literal) {} XlaDevice::~XlaDevice() {} @@ -208,6 +220,7 @@ Allocator* XlaDevice::GetAllocator(AllocatorAttributes attr) { xla::Backend* backend = client()->mutable_backend(); xla_allocator_ = XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( backend, device_ordinal_); + tensor_info_manager_.reset(new XlaTensorInfoManager(xla_allocator_)); } return xla_allocator_; } @@ -225,7 +238,11 @@ Status XlaDevice::FillContextMap(const Graph* graph, VLOG(1) << "XlaDevice::FillContextMap"; device_context_map->resize(graph->num_node_ids()); TF_ASSIGN_OR_RETURN(se::Stream * stream, GetStream()); - auto ctx = new XlaDeviceContext(stream); + // Call GetAllocator for the side-effect of ensuring the allocator and + // XlaTensorInfoManager is created. + (void)GetAllocator({}); + auto ctx = new XlaDeviceContext(stream, tensor_info_manager_.get(), + transfer_as_literal_); for (Node* n : graph->nodes()) { VLOG(2) << n->id() << " : " << n->type_string() << " : " << n->name(); ctx->Ref(); @@ -273,7 +290,8 @@ Status XlaDevice::MakeTensorFromProto(const TensorProto& tensor_proto, Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape()); Notification n; TF_ASSIGN_OR_RETURN(se::Stream * stream, GetStream()); - XlaTransferManager manager(stream); + XlaTransferManager manager(stream, tensor_info_manager_.get(), + transfer_as_literal_); manager.CopyCPUTensorToDevice(&parsed, this, ©, [&n, &status](const Status& s) { status = s; diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index d2ec38293c..0f4476296b 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -26,6 +26,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_ #define TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_ +#include "tensorflow/compiler/jit/xla_tensor_info.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/local_device.h" @@ -48,7 +49,8 @@ class XlaDevice : public LocalDevice { class Metadata { public: Metadata(int device_ordinal, perftools::gputools::Platform* platform, - const DeviceType& device_type); + const DeviceType& device_type, + std::unique_ptr* tensor_info_manager); // The index of the device on this host. int device_ordinal() const; @@ -56,11 +58,13 @@ class XlaDevice : public LocalDevice { perftools::gputools::Platform* platform() const; xla::LocalClient* client() const; const DeviceType& jit_device_type() const; + XlaTensorInfoManager& tensor_info_manager() const; private: const int device_ordinal_; const DeviceType device_type_; perftools::gputools::Platform* platform_; // Not owned. + std::unique_ptr& tensor_info_manager_; TF_DISALLOW_COPY_AND_ASSIGN(Metadata); }; @@ -71,15 +75,20 @@ class XlaDevice : public LocalDevice { // Factory function. 'platform_name' is the name of the XLA platform. // 'device_name' is the name of the Tensorflow device to create. // 'jit_device_name' is the name of the corresponding JIT device. + // 'transfer_as_literal' is true if device<->host transfers must be done using + // XLA's TransferLiteral{To,From}Device interface. If false, we can use + // ThenMemcpy instead. static Status Create(const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, const string& name_prefix, bool register_device_for_compilation, + bool transfer_as_literal, std::unique_ptr* device); XlaDevice(const SessionOptions& options, const DeviceAttributes& attrs, int device_ordinal, const DeviceType& jit_device_name, - ::perftools::gputools::Platform* platform); + ::perftools::gputools::Platform* platform, + bool transfer_as_literal); ~XlaDevice() override; Allocator* GetAllocator(AllocatorAttributes attr) override; @@ -113,6 +122,16 @@ class XlaDevice : public LocalDevice { // copying back and forth between CPU and the device, and // computations enqueued by XLA. xla::Backend::StreamPtr stream_; + // Manages sideband data about tensors, in particular the on-device shape tree + // if the tensor requires multiple device buffers to represent (for example, + // tuple shapes). + // This is a unique_ptr because XlaTensorInfoManager is non-copy-constructible + // and we need to initialize this lazily (as we also lazily initialize the + // underlying allocator). + std::unique_ptr tensor_info_manager_; + // Must we use XLA's transfer manager for correct host<->device transfers? if + // false, we can use ThenMemcpy() instead. + bool transfer_as_literal_; }; // Builds dummy OpKernel registrations on 'device' for the JIT operators diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index c936222f32..b57f82f98e 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_device_context.h" +#include "tensorflow/compiler/jit/xla_launch_util.h" #include "tensorflow/compiler/tf2xla/literal_util.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" @@ -52,7 +53,12 @@ void XlaDeviceAllocator::DeallocateRaw(void* ptr) { void XlaDeviceAllocator::GetStats(AllocatorStats* stats) { stats->Clear(); } -XlaTransferManager::XlaTransferManager(se::Stream* stream) : stream_(stream) {} +XlaTransferManager::XlaTransferManager( + se::Stream* stream, XlaTensorInfoManager* tensor_info_manager, + bool transfer_as_literal) + : stream_(stream), + tensor_info_manager_(tensor_info_manager), + transfer_as_literal_(transfer_as_literal) {} void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, @@ -72,13 +78,19 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, se::DeviceMemoryBase dev_dst_ptr(dst_ptr, total_bytes); Status status; - stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes); - // TODO(hpucha): Make this asynchronous. - Status block_status = stream_->BlockHostUntilDone(); - if (!block_status.ok()) { - status = xla::InternalError( - "Failed to complete data transfer on stream %p: %s", stream_, - block_status.error_message().c_str()); + if (transfer_as_literal_) { + status = xla::Unimplemented( + "XlaTransferManager::CopyCPUTensorToDevice not implemented for " + "literals"); + } else { + stream_->ThenMemcpy(&dev_dst_ptr, src_ptr, total_bytes); + // TODO(hpucha): Make this asynchronous. + Status block_status = stream_->BlockHostUntilDone(); + if (!block_status.ok()) { + status = xla::InternalError( + "Failed to complete data transfer on stream %p: %s", stream_, + block_status.error_message().c_str()); + } } done(status); @@ -108,13 +120,19 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor, void* dst_ptr = DMAHelper::base(cpu_tensor); Status status; - stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes); - // TODO(hpucha): Make this asynchronous. - Status block_status = stream_->BlockHostUntilDone(); - if (!block_status.ok()) { - status = xla::InternalError( - "Failed to complete data transfer on stream %p: %s", stream_, - block_status.error_message().c_str()); + if (transfer_as_literal_) { + status = xla::Unimplemented( + "XlaTransferManager::CopyDeviceTensorToCPU not implemented for " + "literals"); + } else { + stream_->ThenMemcpy(dst_ptr, dev_src_ptr, total_bytes); + // TODO(hpucha): Make this asynchronous. + Status block_status = stream_->BlockHostUntilDone(); + if (!block_status.ok()) { + status = xla::InternalError( + "Failed to complete data transfer on stream %p: %s", stream_, + block_status.error_message().c_str()); + } } done(status); @@ -125,7 +143,10 @@ void XlaTransferManager::CopyDeviceTensorToCPU(const Tensor* device_tensor, done(Status::OK()); } -XlaDeviceContext::XlaDeviceContext(se::Stream* stream) : manager_(stream) {} +XlaDeviceContext::XlaDeviceContext(se::Stream* stream, + XlaTensorInfoManager* tensor_info_manager, + bool transfer_as_literal) + : manager_(stream, tensor_info_manager, transfer_as_literal) {} void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h index c4edcd474e..df02f4eac4 100644 --- a/tensorflow/compiler/jit/xla_device_context.h +++ b/tensorflow/compiler/jit/xla_device_context.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "tensorflow/compiler/jit/xla_tensor_info.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/core/framework/allocator.h" @@ -49,7 +50,9 @@ class XlaDeviceAllocator : public Allocator { // Helper class for managing data transfers between host and XLA devices. class XlaTransferManager { public: - explicit XlaTransferManager(perftools::gputools::Stream* stream); + explicit XlaTransferManager(perftools::gputools::Stream* stream, + XlaTensorInfoManager* tensor_info_manager, + bool transfer_as_literal); void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, Tensor* device_tensor, StatusCallback done) const; @@ -62,6 +65,10 @@ class XlaTransferManager { // Stream obtained from a Device, used to transfer tensors between // CPU and device. perftools::gputools::Stream* stream_; + // The tensor info manager, for access to sideband information about tensors. + XlaTensorInfoManager* tensor_info_manager_; + // True if we must use XLA's TransferManager for correct device transfers. + bool transfer_as_literal_; }; // DeviceContext for operators assigned to XlaDevice devices. The @@ -69,7 +76,9 @@ class XlaTransferManager { // wraps the methods in XlaTransferManager. class XlaDeviceContext : public DeviceContext { public: - explicit XlaDeviceContext(perftools::gputools::Stream* stream); + explicit XlaDeviceContext(perftools::gputools::Stream* stream, + XlaTensorInfoManager* tensor_info_manager, + bool transfer_as_literal); void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, Tensor* device_tensor, diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index 2326070358..383ed879ef 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -39,9 +39,10 @@ Status XlaGpuDeviceFactory::CreateDevices(const SessionOptions& options, (void)registrations; std::unique_ptr device; - Status status = XlaDevice::Create( - "CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options, name_prefix, - /*register_device_for_compilation=*/true, &device); + Status status = XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, + DEVICE_GPU_XLA_JIT, options, name_prefix, + /*register_device_for_compilation=*/true, + /*transfer_as_literal=*/false, &device); if (!status.ok()) { // Treat failures as non-fatal; there might not be a GPU in the machine. VLOG(1) << "Failed to create XLA_GPU device: " << status; diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 8322dd2e82..689fa3299c 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -56,74 +56,56 @@ XlaAllocator::XlaAllocator(const gpu::Platform* platform, OpKernelContext* op_context) : xla::DeviceMemoryAllocator(platform), op_context_(op_context) {} -XlaAllocator::~XlaAllocator() = default; +XlaAllocator::~XlaAllocator() { CHECK(allocated_.empty()); } xla::StatusOr XlaAllocator::Allocate( int device_ordinal, uint64 size, bool retry_on_failure) { - AllocatorAttributes allocator_attrs; - allocator_attrs.set_on_host(false); - - AllocationAttributes allocation_attrs; - allocation_attrs.no_retry_on_failure = !retry_on_failure; - - Tensor t; - Status status = op_context_->allocate_temp( - DT_UINT8, TensorShape({static_cast(size)}), &t, allocator_attrs, - allocation_attrs); - if (!status.ok()) { - VLOG(2) << "Allocation failed " << size; - return status; - } - void* data = - reinterpret_cast(const_cast(t.tensor_data().data())); - tensors_[data] = t; + void* data = op_context_->device()->GetAllocator({})->AllocateRaw( + Allocator::kAllocatorAlignment, size); + allocated_.insert(data); return gpu::DeviceMemoryBase(data, size); } -Status XlaAllocator::RegisterArgument(const Tensor* t) { - void* data = - reinterpret_cast(const_cast(t->tensor_data().data())); - tensors_[data] = *t; - return Status::OK(); -} +void XlaAllocator::Release(void* ptr) { allocated_.erase(ptr); } Status XlaAllocator::Deallocate(int device_ordinal, gpu::DeviceMemoryBase* mem) { - if (mem->opaque() != nullptr) { - if (tensors_.erase(mem->opaque()) == 0) { - return tensorflow::errors::InvalidArgument("Unknown tensor address"); - } + if (allocated_.count(mem->opaque())) { + op_context_->device()->GetAllocator({})->DeallocateRaw(mem->opaque()); + allocated_.erase(mem->opaque()); } return Status::OK(); } -Status XlaAllocator::MakeTensorFromBuffer(gpu::DeviceMemoryBase buffer, - DataType dtype, - const TensorShape& shape, - Tensor* out_tensor) const { - void* ptr = const_cast(buffer.opaque()); - auto it = tensors_.find(ptr); - if (it == tensors_.end()) { - return errors::InvalidArgument("Unknown tensor address"); - } - const Tensor& tensor = it->second; - - int64 output_size = DataTypeSize(dtype) * shape.num_elements(); - if (tensor.TotalBytes() == output_size) { - out_tensor->UnsafeCopyFromInternal(tensor, dtype, shape); - } else { - Tensor slice = tensor.Slice(0, output_size); - out_tensor->UnsafeCopyFromInternal(slice, dtype, shape); - } - return Status::OK(); +namespace { +// Return the 'index''th subtree of the given ShapedBuffer as a ShapedBuffer. +xla::ShapedBuffer ExtractSubShapedBuffer(const xla::ShapedBuffer& shaped_buffer, + int index) { + xla::Shape on_host_shape = xla::ShapeUtil::GetTupleElementShape( + shaped_buffer.on_host_shape(), index); + xla::Shape on_device_shape = xla::ShapeUtil::GetTupleElementShape( + shaped_buffer.on_device_shape(), index); + + xla::ShapedBuffer sub_shaped_buffer(on_host_shape, on_device_shape, + shaped_buffer.platform(), + shaped_buffer.device_ordinal()); + + auto& shape_tree = shaped_buffer.buffers(); + auto& sub_shape_tree = sub_shaped_buffer.buffers(); + sub_shape_tree.CopySubtreeFrom(shape_tree, + /*source_base_index=*/{index}, + /*target_base_index=*/{}); + return sub_shaped_buffer; } +} // namespace XlaComputationLaunchContext::XlaComputationLaunchContext( int64 num_resource_args, xla::LocalClient* client, - XlaAllocator* xla_allocator) + XlaAllocator* xla_allocator, XlaTensorInfoManager* tensor_info_manager) : num_resource_args_(num_resource_args), client_(client), - xla_allocator_(xla_allocator) {} + xla_allocator_(xla_allocator), + tensor_info_manager_(tensor_info_manager) {} void XlaComputationLaunchContext::PopulateInputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, @@ -145,29 +127,35 @@ void XlaComputationLaunchContext::PopulateInputs( t = &(ctx->input(arg_num)); } - gpu::DeviceMemoryBase dmem = gpu::DeviceMemoryBase( - const_cast(t->tensor_data().data()), t->tensor_data().size()); - const xla::Shape on_device_shape = client_->backend().transfer_manager()->HostShapeToDeviceShape(shape); - CHECK(xla::ShapeUtil::Equal(shape, on_device_shape)) - << "On-device shape " - << xla::ShapeUtil::HumanStringWithLayout(on_device_shape) - << " not the same as on-host shape " - << xla::ShapeUtil::HumanStringWithLayout(shape); - arg_buffers_[i] = xla::MakeUnique( - /*on_host_shape=*/shape, /*on_device_shape=*/shape, client_->platform(), - client_->default_device_ordinal()); - arg_buffers_[i]->set_buffer(dmem, /*index=*/{}); - arg_ptrs_[i] = arg_buffers_[i].get(); - - OP_REQUIRES_OK(ctx, xla_allocator_->RegisterArgument(t)); + if (xla::ShapeUtil::IsTuple(on_device_shape)) { + CHECK(tensor_info_manager_); + const XlaTensorInfo* tensor_info = + tensor_info_manager_->GetTensorInfo(*t); + CHECK(tensor_info && tensor_info->has_shaped_buffer()); + arg_ptrs_[i] = + const_cast(&tensor_info->shaped_buffer()); + } else { + CHECK(xla::ShapeUtil::Equal(shape, on_device_shape)) + << "On-device shape " + << xla::ShapeUtil::HumanStringWithLayout(on_device_shape) + << " not the same as on-host shape " + << xla::ShapeUtil::HumanStringWithLayout(shape); + gpu::DeviceMemoryBase dmem = gpu::DeviceMemoryBase( + const_cast(t->tensor_data().data()), t->tensor_data().size()); + arg_buffers_[i] = xla::MakeUnique( + /*on_host_shape=*/shape, /*on_device_shape=*/shape, + client_->platform(), client_->default_device_ordinal()); + arg_buffers_[i]->set_buffer(dmem, /*index=*/{}); + arg_ptrs_[i] = arg_buffers_[i].get(); + } } } void XlaComputationLaunchContext::PopulateOutputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, - std::unique_ptr output) { + std::unique_ptr output) { gpu::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; @@ -180,6 +168,11 @@ void XlaComputationLaunchContext::PopulateOutputs( // Copy XLA results to the OpOutputList. int output_num = 0; for (int i = 0; i < ctx->num_outputs(); ++i) { + AllocatorAttributes alloc_attrs = ctx->output_alloc_attr(i); + Allocator* allocator = ctx->device()->GetAllocator(alloc_attrs); + if (tensor_info_manager_ && !alloc_attrs.on_host()) { + allocator = tensor_info_manager_; + } if (kernel->outputs[i].is_constant) { // Output is a constant. const Tensor& const_tensor = kernel->outputs[i].constant_value; @@ -204,11 +197,19 @@ void XlaComputationLaunchContext::PopulateOutputs( VLOG(2) << "Retval " << i << " shape " << shape.DebugString(); gpu::DeviceMemoryBase buffer = output->buffer({output_num}); - Tensor output_tensor; - // Looks up the owning Tensor by buffer address. - OP_REQUIRES_OK(ctx, xla_allocator_->MakeTensorFromBuffer( - buffer, ctx->expected_output_dtype(i), shape, - &output_tensor)); + Tensor output_tensor = XlaTensorBuffer::MakeTensor( + ctx->expected_output_dtype(i), shape, buffer, allocator); + xla_allocator_->Release(buffer.opaque()); + + xla::Shape output_shape = xla::ShapeUtil::GetTupleElementShape( + output->on_device_shape(), output_num); + if (xla::ShapeUtil::IsTuple(output_shape)) { + CHECK(tensor_info_manager_); + XlaTensorInfo* tensor_info = + tensor_info_manager_->GetOrCreateTensorInfo(output_tensor); + tensor_info->set_shaped_buffer( + ExtractSubShapedBuffer(*output, output_num)); + } ctx->set_output(i, output_tensor); ++output_num; } @@ -221,6 +222,10 @@ void XlaComputationLaunchContext::PopulateOutputs( // Apply variable updates, if any. VLOG(2) << "Applying variable updates"; for (int i = 0; i < kernel->resource_updates.size(); ++i) { + Allocator* allocator = ctx->device()->GetAllocator({}); + if (tensor_info_manager_) { + allocator = tensor_info_manager_; + } const XlaCompiler::ResourceUpdate& write = kernel->resource_updates[i]; OP_REQUIRES(ctx, write.input_index >= 0 && write.input_index < ctx->num_inputs(), @@ -243,11 +248,19 @@ void XlaComputationLaunchContext::PopulateOutputs( mutex_lock ml(*variable->mu()); OP_REQUIRES(ctx, variable->tensor()->dtype() == write.type, errors::Internal("Mismatched type in variable write")); - - // Looks up the owning Tensor by buffer address. - OP_REQUIRES_OK(ctx, - xla_allocator_->MakeTensorFromBuffer( - buffer, write.type, write.shape, variable->tensor())); + *variable->tensor() = + XlaTensorBuffer::MakeTensor(write.type, write.shape, buffer, allocator); + xla_allocator_->Release(buffer.opaque()); + + xla::Shape output_shape = xla::ShapeUtil::GetTupleElementShape( + output->on_device_shape(), output_num); + if (xla::ShapeUtil::IsTuple(output_shape)) { + CHECK(tensor_info_manager_); + XlaTensorInfo* tensor_info = + tensor_info_manager_->GetOrCreateTensorInfo(*variable->tensor()); + tensor_info->set_shaped_buffer( + ExtractSubShapedBuffer(*output, output_num)); + } ++output_num; } } diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 9fd356fce5..8694f6ce58 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -19,8 +19,10 @@ limitations under the License. #define TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_ #include "tensorflow/compiler/jit/xla_compilation_cache.h" +#include "tensorflow/compiler/jit/xla_tensor_info.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/variable_ops.h" @@ -52,16 +54,8 @@ class XlaAllocator : public xla::DeviceMemoryAllocator { Status Deallocate(int device_ordinal, perftools::gputools::DeviceMemoryBase* mem) override; - // Register an Tensor (input or resource variable) with the allocator. If - // the operation returns an alias to one of its inputs, then the allocator - // needs to be able to handle it. - Status RegisterArgument(const Tensor* t); - - // Makes 'tensor' a wrapper around the data buffer at 'ptr'. The buffer is - // interpreted as having data type 'dtype' and shape 'shape'. - Status MakeTensorFromBuffer(perftools::gputools::DeviceMemoryBase buffer, - DataType dtype, const TensorShape& shape, - Tensor* out_tensor) const; + // Un-track 'ptr' - do not delete it on destruction. + void Release(void* ptr); // The Tensorflow BFC allocator used on GPU allows host-side deallocation // before GPU execution takes place. Tensorflow uses the ordering of the main @@ -74,11 +68,7 @@ class XlaAllocator : public xla::DeviceMemoryAllocator { private: OpKernelContext* const op_context_; - - // Map from pointer address to the owning Tensor; used by - // MakeTensorFromBuffer. Also used to automatically release Tensors when the - // allocator is freed. - std::unordered_map tensors_; + std::unordered_set allocated_; }; // Helper class to perform the marshalling of TensorFlow inputs and outputs to @@ -86,7 +76,8 @@ class XlaAllocator : public xla::DeviceMemoryAllocator { class XlaComputationLaunchContext { public: XlaComputationLaunchContext(int64 num_resource_args, xla::LocalClient* client, - XlaAllocator* xla_allocator); + XlaAllocator* xla_allocator, + XlaTensorInfoManager* tensor_info_manager); // Add all inputs within `ctx` as XLA arguments (returned by arguments()). // `variables` is a map from TensorFlow argument number to resource variable. @@ -97,7 +88,7 @@ class XlaComputationLaunchContext { // Given the XLA output in `output`, populate all outputs of `ctx`. void PopulateOutputs(OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, - std::unique_ptr output); + std::unique_ptr output); // Return the argument list. Only valid after PopulateInputs() has been // called. @@ -107,10 +98,52 @@ class XlaComputationLaunchContext { int64 num_resource_args_; xla::LocalClient* client_; XlaAllocator* xla_allocator_; + XlaTensorInfoManager* tensor_info_manager_; std::vector> arg_buffers_; std::vector arg_ptrs_; }; +// A simple TensorBuffer implementation that allows us to create Tensors that +// take ownership of pre-allocated memory. +class XlaTensorBuffer : public TensorBuffer { + public: + XlaTensorBuffer(const void* ptr, size_t expected_size, size_t actual_size, + Allocator* allocator) + : expected_size_(expected_size), + actual_size_(actual_size), + allocator_(allocator) { + data_ = const_cast(ptr); + } + + ~XlaTensorBuffer() override { allocator_->DeallocateRaw(data_); } + + void* data() const override { return data_; } + size_t size() const override { return expected_size_; } + + TensorBuffer* root_buffer() override { return this; } + + void FillAllocationDescription(AllocationDescription* proto) const override { + proto->set_allocated_bytes(actual_size_); + } + + static Tensor MakeTensor(DataType dtype, const TensorShape& shape, + perftools::gputools::DeviceMemoryBase buffer, + Allocator* allocator) { + size_t expected_size = shape.num_elements() * DataTypeSize(dtype); + auto* tensor_buffer = new XlaTensorBuffer(buffer.opaque(), expected_size, + buffer.size(), allocator); + Tensor t(dtype, shape, tensor_buffer); + tensor_buffer->Unref(); + return t; + } + + private: + void* data_; + size_t expected_size_; + size_t actual_size_; + Allocator* allocator_; +}; + } // namespace tensorflow #endif diff --git a/tensorflow/compiler/jit/xla_tensor_info.cc b/tensorflow/compiler/jit/xla_tensor_info.cc new file mode 100644 index 0000000000..0ce18c27cb --- /dev/null +++ b/tensorflow/compiler/jit/xla_tensor_info.cc @@ -0,0 +1,56 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_tensor_info.h" + +namespace tensorflow { + +const XlaTensorInfo* XlaTensorInfoManager::GetTensorInfo( + const void* device_ptr) const { + mutex_lock lock(lock_); + auto iterator = tensor_infos_.find(device_ptr); + return (iterator == tensor_infos_.end()) ? nullptr + : tensor_infos_.at(device_ptr).get(); +} + +XlaTensorInfo* XlaTensorInfoManager::GetOrCreateTensorInfo( + const void* device_ptr) { + mutex_lock lock(lock_); + auto iterator = tensor_infos_.find(device_ptr); + if (iterator != tensor_infos_.end()) { + return iterator->second.get(); + } + auto iterator_and_inserted = + tensor_infos_.emplace(device_ptr, MakeUnique()); + CHECK(iterator_and_inserted.second); + return iterator_and_inserted.first->second.get(); +} + +const XlaTensorInfo* XlaTensorInfoManager::GetTensorInfo(const Tensor& tensor) { + return GetTensorInfo(tensor.tensor_data().data()); +} + +XlaTensorInfo* XlaTensorInfoManager::GetOrCreateTensorInfo( + const Tensor& tensor) { + return GetOrCreateTensorInfo(tensor.tensor_data().data()); +} + +void XlaTensorInfoManager::DeallocateRaw(void* ptr) { + wrapped()->DeallocateRaw(ptr); + mutex_lock lock(lock_); + tensor_infos_.erase(ptr); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_tensor_info.h b/tensorflow/compiler/jit/xla_tensor_info.h new file mode 100644 index 0000000000..0b0736bf01 --- /dev/null +++ b/tensorflow/compiler/jit/xla_tensor_info.h @@ -0,0 +1,85 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_XLA_TENSOR_INFO_H_ +#define TENSORFLOW_COMPILER_JIT_XLA_TENSOR_INFO_H_ + +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +// Information about a tensor. The XlaTensorInfoManager can maintain one of +// these per device Tensor. +class XlaTensorInfo { + public: + XlaTensorInfo() {} + + // Some Tensors can have complex on-device shapes, including tuple shapes. To + // manage the memory for these tensors a ShapedBuffer may be required. + + // Return true if this TensorInfo contains a ShapedBuffer. + bool has_shaped_buffer() const { return shaped_buffer_ != nullptr; } + // Return the contained ShapedBuffer. + // REQUIRES: has_shaped_buffer() + const xla::ShapedBuffer& shaped_buffer() const { return *shaped_buffer_; } + // Mutates the TensorInfo to set the ShapedBuffer. + void set_shaped_buffer(xla::ShapedBuffer shaped_buffer) { + shaped_buffer_.reset(new xla::ShapedBuffer(std::move(shaped_buffer))); + } + + private: + // The optional contained ShapedBuffer. + std::unique_ptr shaped_buffer_; +}; + +// Manages XlaTensorInfo objects. This class is also an Allocator, so that +// XlaTensorInfo objects can be deleted when their Tensor is deallocated. +class XlaTensorInfoManager : public AllocatorWrapper { + public: + // Creates a new XlaTensorInfoManager, delegating all DeallocateRaw calls to + // allocator. + XlaTensorInfoManager(Allocator* allocator) : AllocatorWrapper(allocator) {} + + // Returns the XlaTensorInfo for the given device memory pointer or nullptr if + // none exists. + const XlaTensorInfo* GetTensorInfo(const void* device_ptr) const; + // Returns the XlaTensorInfo for the device memory pointer extracted from + // tensor or nullptr if none exists. + const XlaTensorInfo* GetTensorInfo(const Tensor& tensor); + + // Returns the XlaTensorInfo for the given device memory pointer, creating one + // if necessary. + XlaTensorInfo* GetOrCreateTensorInfo(const Tensor& tensor); + // Returns the XlaTensorInfo for the device memory pointer extracted from + // tensor, creating one if necessary. + XlaTensorInfo* GetOrCreateTensorInfo(const void* device_ptr); + + // Allocator interface + void DeallocateRaw(void* ptr) override; + + private: + mutable mutex lock_; + // The managed tensor infos. The mapped value is a unique_ptr so that returned + // references are stable over rehashes. + std::unordered_map> tensor_infos_ + GUARDED_BY(lock_); +}; +} // namespace tensorflow + +#endif diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 9ae4bb5a2c..4d10f7efb5 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -483,6 +483,8 @@ class Tensor { friend class TensorTestHelper; // For access to set_shape friend class OpKernelContext; // For access to RefCountIsOne(). friend class ScopedAllocator; // For access to buf_. + friend class XlaTensorBuffer; // For access to the private constructor taking + // the buffer template friend class AssignVariableOp; // For access to RefCountIsOne(). template -- GitLab From 3abdbfd431241f05c383eaaf0f8b499b063aa1cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 05:53:38 -0700 Subject: [PATCH 099/960] Clean up and clarify the 'install from source' page. Remove reference to CUDA and cuDNN versions for 'install from source' from the 'install Linux' documentation. The 'install from source' should be the authoritative page for this. PiperOrigin-RevId: 189328669 --- tensorflow/docs_src/install/install_linux.md | 51 +++++++------------ .../docs_src/install/install_sources.md | 45 +++++++--------- 2 files changed, 35 insertions(+), 61 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index e3e115d9f6..88ceca3cda 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -31,49 +31,32 @@ If you are installing TensorFlow with GPU support using one of the mechanisms described in this guide, then the following NVIDIA software must be installed on your system: - * CUDA® Toolkit 9.0. For details, see - [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/#axzz4VZnqTJ2A). - Ensure that you append the relevant Cuda pathnames to the + * [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see + [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/). + Ensure that you append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environment variable as described in the NVIDIA documentation. - * The NVIDIA drivers associated with CUDA Toolkit 9.0. - * cuDNN v7.0. For details, see - [NVIDIA's documentation](https://developer.nvidia.com/cudnn). + * [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see + [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/). Ensure that you create the `CUDA_HOME` environment variable as - described in the NVIDIA documentation. - * GPU card with CUDA Compute Capability 3.0 or higher. See - [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for - a list of supported GPU cards. - * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. - This library provides advanced profiling support. To install this library, - issue the following command for CUDA Toolkit >= 8.0: - -
-    $ sudo apt-get install cuda-command-line-tools
-    
+ described in NVIDIA's documentation. + * [CUPTI](http://docs.nvidia.com/cuda/cupti/) ships with the CUDA Toolkit, but + you also need to append its path to the `LD_LIBRARY_PATH` environment + variable: - and add its path to your `LD_LIBRARY_PATH` environment variable: +
 $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
-
-    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64
-    
+In order to run TensorFlow computations on the GPU, you also need: - For CUDA Toolkit <= 7.5 do: - -
-    $ sudo apt-get install libcupti-dev
-    
+ * A GPU card with CUDA Compute Capability 3.0 or higher. See + [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for + a list of supported GPU cards. + * [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA + Toolkit. If you have an earlier version of the preceding packages, please upgrade to the specified versions. If upgrading is not possible, then you may still run -TensorFlow with GPU support, but only if you do the following: - - * Install TensorFlow from sources as documented in - @{$install_sources$Installing TensorFlow from Sources}. - * Install or upgrade to at least the following NVIDIA versions: - * CUDA toolkit 7.0 or greater - * cuDNN v3 or greater - * GPU card with CUDA Compute Capability 3.0 or higher. +TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}. ## Determine how to install TensorFlow diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index acf0af0d9d..4e7b07d78b 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -133,30 +133,21 @@ The following NVIDIA hardware must be installed on your system: The following NVIDIA software must be installed on your system: - * NVIDIA's Cuda Toolkit (>= 7.0). We recommend version 9.0. + * [CUDA Toolkit](http://nvidia.com/cuda) (>= 7.0). We recommend version 9.0. For details, see - [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/#axzz4VZnqTJ2A). - Ensure that you append the relevant Cuda pathnames to the + [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/). + Ensure that you append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environment variable as described in the NVIDIA documentation. - * The NVIDIA drivers associated with NVIDIA's Cuda Toolkit. - * cuDNN (>= v3). We recommend version 6.0. For details, see - [NVIDIA's documentation](https://developer.nvidia.com/cudnn), - particularly the description of appending the appropriate pathname - to your `LD_LIBRARY_PATH` environment variable. - -Finally, you must also install `libcupti` which for Cuda Toolkit >= 8.0 you do via - -
 $ sudo apt-get install cuda-command-line-tools 
- -and add its path to your `LD_LIBRARY_PATH` environment variable: - -
 $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
- -For Cuda Toolkit <= 7.5, you install `libcupti-dev` by invoking the following command: - -
 $ sudo apt-get install libcupti-dev 
+ * [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA + Toolkit. + * [cuDNN SDK](http://developer.nvidia.com/cudnn) (>= v3). We recommend version 7.0. For details, see + [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/). + * [CUPTI](http://docs.nvidia.com/cuda/cupti/) ships with the CUDA Toolkit, but + you also need to append its path to the `LD_LIBRARY_PATH` environment + variable: +
 $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
### Next @@ -240,8 +231,8 @@ such as compiler flags. You must run this script *prior* to creating the pip package and installing TensorFlow. If you wish to build TensorFlow with GPU, `configure` will ask -you to specify the version numbers of Cuda and cuDNN. If several -versions of Cuda or cuDNN are installed on your system, explicitly select +you to specify the version numbers of CUDA and cuDNN. If several +versions of CUDA or cuDNN are installed on your system, explicitly select the desired version instead of relying on the default. One of the questions that `configure` will ask is as follows: @@ -289,12 +280,12 @@ Do you wish to build TensorFlow with CUDA support? [y/N] Y CUDA support will be enabled for TensorFlow Do you want to use clang as CUDA compiler? [y/N] nvcc will be used as CUDA compiler -Please specify the Cuda SDK version you want to use, e.g. 7.0. [Leave empty to default to CUDA 9.0]: 9.0 +Please specify the CUDA SDK version you want to use, e.g. 7.0. [Leave empty to default to CUDA 9.0]: 9.0 Please specify the location where CUDA 9.0 toolkit is installed. Refer to README.md for more details. [Default is /usr/local/cuda]: Please specify which gcc should be used by nvcc as the host compiler. [Default is /usr/bin/gcc]: Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 7.0]: 7 Please specify the location where cuDNN 7 library is installed. Refer to README.md for more details. [Default is /usr/local/cuda]: -Please specify a list of comma-separated Cuda compute capabilities you want to build with. +Please specify a list of comma-separated CUDA compute capabilities you want to build with. You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus. Please note that each additional compute capability significantly increases your build time and binary size. [Default is: "3.5,5.2"]: 3.0 @@ -304,14 +295,14 @@ Configuration finished If you told `configure` to build for GPU support, then `configure` -will create a canonical set of symbolic links to the Cuda libraries -on your system. Therefore, every time you change the Cuda library paths, +will create a canonical set of symbolic links to the CUDA libraries +on your system. Therefore, every time you change the CUDA library paths, you must rerun the `configure` script before re-invoking the bazel build command. Note the following: - * Although it is possible to build both Cuda and non-Cuda configs + * Although it is possible to build both CUDA and non-CUDA configs under the same source tree, we recommend running `bazel clean` when switching between these two configurations in the same source tree. * If you don't run the `configure` script *before* running the -- GitLab From 9e62c648a84f664fe338e1dec2db0f5e89ec3147 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Fri, 16 Mar 2018 07:40:49 -0700 Subject: [PATCH 100/960] [XLA:python] Fix a bug where returning an status would not incref Py_None. PiperOrigin-RevId: 189337748 --- tensorflow/compiler/xla/python/local_computation_builder.i | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index b5354131c9..b2681d5e8b 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -216,6 +216,7 @@ tensorflow::ImportNumpy(); PyExc_RuntimeError, $1.ToString().c_str()); return NULL; } + Py_INCREF(Py_None); $result = Py_None; } -- GitLab From f4c6bd6b422c6383ac814c50aa2243442e1049cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 07:54:08 -0700 Subject: [PATCH 101/960] - Adds support for shared embedding layers (e.g. in RNNs), and shared Conv2D layers. - Some minor refactoring of internal structure in fisher_blocks and layer_collection PiperOrigin-RevId: 189338874 --- .../kernel_tests/layer_collection_test.py | 18 +- .../contrib/kfac/python/ops/fisher_blocks.py | 330 ++++++++++++++---- .../kfac/python/ops/layer_collection.py | 260 ++++++++++---- tensorflow/contrib/kfac/python/ops/utils.py | 8 + 4 files changed, 483 insertions(+), 133 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py index bae6bd7a3b..ba22099340 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py @@ -135,8 +135,22 @@ class LayerCollectionTest(test.TestCase): array_ops.constant(6), 16, approx=layer_collection.APPROX_DIAGONAL_NAME) - - self.assertEqual(9, len(lc.get_blocks())) + lc.register_fully_connected_multi( + array_ops.constant(1), + (array_ops.constant(2), array_ops.constant(3)), + (array_ops.constant(4), array_ops.constant(5))) + lc.register_conv2d_multi( + params=array_ops.ones((2, 3, 4, 5)), + strides=[1, 1, 1, 1], + padding='SAME', + inputs=(array_ops.ones((1, 2, 3, 4)), array_ops.ones((5, 6, 7, 8))), + outputs=(array_ops.ones((1, 1, 1, 5)), array_ops.ones((2, 2, 2, 10)))) + lc.register_embedding_multi( + array_ops.constant((1,)), + (array_ops.constant(2), array_ops.constant(3)), + (array_ops.constant(4), array_ops.constant(5))) + + self.assertEqual(12, len(lc.get_blocks())) def testRegisterBlocksMultipleRegistrations(self): with ops.Graph().as_default(): diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index 31f4689fbf..79d0424dca 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -48,6 +48,7 @@ from tensorflow.contrib.kfac.python.ops import utils from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.util import nest # For blocks corresponding to convolutional layers, or any type of block where # the parameters can be thought of as being replicated in time or space, @@ -74,6 +75,86 @@ def set_global_constants(normalize_damping_power=None, pi_type=None): PI_TYPE = pi_type +def _make_partitionedtensors_inputs(inputs): + """Constructs PartitionedTensor for inputs. + + The purpose of this method is to package up the towers/minibatch dimension + of these arrays into PartitionedTensor objects. + + Args: + inputs: a 1-D list of Tensors. Index is tower/mini-batch. + + Returns: + A PartitionedTensor. + """ + return utils.PartitionedTensor(inputs) + + +def _make_partitionedtensors_grads(grads_list): + """Constructs PartitionedTensor for grads_list. + + The purpose of this method is to package up the towers/minibatch dimension + of these arrays into PartitionedTensor objects. + + Args: + grads_list: 2-D list of Tensors. First index is for source, second + index for tower. + + Returns: + Tuple of PartitionedTensors, one per source. + """ + return tuple(utils.PartitionedTensor(grads) for grads in grads_list) + + +def _make_partitionedtensors_multi_inputs(inputs): + """Constructs PartitionedTensors for inputs. + + The purpose of this method is to package up the towers/minibatch dimension + of these arrays into PartitionedTensor objects. + + This version of this function is for use with FisherBlocks that deal with + multiple uses or time-steps. One PartitionedTensor is created for each + use/time-step. The FisherBlock will be responsible for concatenating + (or doing whatever else it wants) with the resulting lists. + + Args: + inputs: a 2-D list of Tensors. First index is tower/mini-batch, second is + use/time-step. + + Returns: + A tuple of PartitionedTensor's, one per use/time-step. + """ + num_uses = len(inputs[0]) + assert all(len(input_) == num_uses for input_ in inputs) + + return tuple(utils.PartitionedTensor(input_) for input_ in zip(*inputs)) + + +def _make_partitionedtensors_multi_grads(grads_list): + """Constructs PartitionedTensors for grads_list. + + The purpose of this method is to package up the towers/minibatch dimension + of these arrays into PartitionedTensor objects. + + This version of this function is for use with FisherBlocks that deal with + multiple uses or time-steps. One PartitionedTensor is created for each + use/time-step. The FisherBlock will be responsible for concatenating + (or doing whatever else it wants) with the resulting lists. + + Args: + grads_list: 3-D list of Tensors. First index is for source, second is for + tower, third is for use/time-step. + + Returns: + 2-D tuple of PartitionedTensors. First index is for source, second is for + use/time-step. + """ + num_uses = len(grads_list[0][0]) + assert all(len(grad) == num_uses for grads in grads_list for grad in grads) + return tuple(tuple(utils.PartitionedTensor(grad) + for grad in zip(*grads)) for grads in grads_list) + + def normalize_damping(damping, num_replications): """Normalize damping after adjusting scale by NORMALIZE_DAMPING_POWER.""" if NORMALIZE_DAMPING_POWER: @@ -396,57 +477,6 @@ class InputOutputMultiMinibatch(object): def _outputs(self): return self.__outputs - def _package_minibatches(self, grads_list): - """Constructs PartitionedTensor for inputs, grads_list. - - The purpose of this method is to package up the towers/minibatch dimension - of these arrays into PartitionedTensor objects. - - Args: - grads_list: 2-D list of Tensors. First index is for source, second - index for tower. - - Returns: - inputs: PartitionedTensor. - grads_list: Tuple of PartitionedTensors, one per source. - """ - inputs = utils.PartitionedTensor(self._inputs) - grads_list = tuple(utils.PartitionedTensor(grads) for grads in grads_list) - - return inputs, grads_list - - def _package_minibatches_multi(self, grads_list): - """Constructs PartitionedTensors for inputs, grads_list. - - The purpose of this method is to package up the towers/minibatch dimension - of these arrays into PartitionedTensor objects. - - This version of this function is for use with FisherBlocks that deal with - multiple uses or time-steps. One PartitionedTensor is created for each - use/time-step. - - Args: - grads_list: 3-D tuple of Tensors. First index is for source, second - index is for tower, third is for use/time-step. - - Returns: - inputs: A tuple of PartitionedTensor's, one per use/time-step. - grads_list: 2-D tuple of PartitionedTensors. First index is for source, - second is for use/time-step. - """ - # self._inputs is a 2-D tuple. First index is tower/mini-batch, second is - # use/time-step. - inputs = self._inputs - num_uses = len(inputs[0]) - assert all(len(input_) == num_uses for input_ in inputs) - assert all(len(grad) == num_uses for grads in grads_list for grad in grads) - - inputs = tuple(utils.PartitionedTensor(input_) for input_ in zip(*inputs)) - grads_list = tuple(tuple(utils.PartitionedTensor(grad) - for grad in zip(*grads)) for grads in grads_list) - - return inputs, grads_list - class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): """FisherBlock for fully-connected (dense) layers using a diagonal approx. @@ -485,7 +515,8 @@ class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): super(FullyConnectedDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._package_minibatches(grads_list) + inputs = _make_partitionedtensors_inputs(self._inputs) + grads_list = _make_partitionedtensors_grads(grads_list) self._factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedDiagonalFactor, @@ -598,7 +629,8 @@ class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): super(ConvDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._package_minibatches(grads_list) + inputs = _make_partitionedtensors_inputs(self._inputs) + grads_list = _make_partitionedtensors_grads(grads_list) # Infer number of locations upon which convolution is applied. self._num_locations = num_conv_locations(inputs.shape.as_list(), @@ -711,7 +743,7 @@ class KroneckerProductFB(FisherBlock): class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): """K-FAC FisherBlock for embedding layers. - This FisherBlock is similar to EmbeddingKFACFB, except that its + This FisherBlock is similar to FullyConnectedKFACBasicFB, except that its input factor is approximated by a diagonal matrix. In the case that each example references exactly one embedding, this approximation is exact. @@ -740,17 +772,78 @@ class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): damping: 0-D Tensor or float. 'damping' * identity is approximately added to this FisherBlock's Fisher approximation. """ - inputs, grads_list = self._package_minibatches(grads_list) + inputs = _make_partitionedtensors_inputs(self._inputs) + grads_list = _make_partitionedtensors_grads(grads_list) - self._input_factor = self._layer_collection.make_or_get_factor( # - fisher_factors.EmbeddingInputKroneckerFactor, # + self._input_factor = self._layer_collection.make_or_get_factor( + fisher_factors.EmbeddingInputKroneckerFactor, (inputs, self._vocab_size)) - self._output_factor = self._layer_collection.make_or_get_factor( # - fisher_factors.FullyConnectedKroneckerFactor, # - (grads_list,)) + self._output_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedKroneckerFactor, (grads_list,)) self._setup_damping(damping) +class EmbeddingKFACMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): + """K-FAC FisherBlock for embedding layers used multiple times in the graph. + + Similar to EmbeddingKFACFB except that this version supports multiple uses + of the parameter within a single model. These uses could correspond to + "time-steps", but they don't have to. + + Does not support bias parameters. + """ + + def __init__(self, layer_collection, vocab_size): + """Creates a EmbeddingKFACMultiIndepFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + vocab_size: int. Size of vocabulary for this embedding layer. + """ + self._vocab_size = vocab_size + + super(EmbeddingKFACMultiIndepFB, self).__init__(layer_collection) + + def instantiate_factors(self, grads_list, damping): + """Instantiate Kronecker Factors for this FisherBlock. + + Args: + grads_list: List of list of list of Tensors. grads_list[i][j][k] is the + gradient of the loss with respect to 'outputs' from source 'i', + tower/mini-batch 'j', and use/time-step 'k'. Each Tensor has shape + [tower_minibatch_size, output_size]. + damping: 0-D Tensor or float. 'damping' * identity is approximately added + to this FisherBlock's Fisher approximation. + """ + inputs = self._inputs + self._num_uses = num_uses = len(inputs[0]) + + # Check that all mini-batches/towers have the same number of uses + assert all(len(input_) == num_uses for input_ in inputs) + # Do the same for grads_list + assert all(len(grad) == num_uses for grad in grads for grads in grads_list) + # Merge uses and towers/minibatches dimensions together so we can handle + # it using a non-multi factor. + inputs = nest.flatten(inputs) + + # Note that we call the multi version of make_partitionedtensors only for + # grads_list here. + inputs = _make_partitionedtensors_inputs(inputs) + grads_list = _make_partitionedtensors_multi_grads(grads_list) + + self._input_factor = self._layer_collection.make_or_get_factor( + fisher_factors.EmbeddingInputKroneckerFactor, + (inputs, self._vocab_size)) + self._output_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedMultiKF, (grads_list,)) + self._setup_damping(damping, normalization=num_uses) + + @property + def _renorm_coeff(self): + return self._num_uses + + class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): """K-FAC FisherBlock for fully-connected (dense) layers. @@ -781,13 +874,14 @@ class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): damping: 0-D Tensor or float. 'damping' * identity is approximately added to this FisherBlock's Fisher approximation. """ - inputs, grads_list = self._package_minibatches(grads_list) + inputs = _make_partitionedtensors_inputs(self._inputs) + grads_list = _make_partitionedtensors_grads(grads_list) - self._input_factor = self._layer_collection.make_or_get_factor( # - fisher_factors.FullyConnectedKroneckerFactor, # + self._input_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedKroneckerFactor, ((inputs,), self._has_bias)) - self._output_factor = self._layer_collection.make_or_get_factor( # - fisher_factors.FullyConnectedKroneckerFactor, # + self._output_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedKroneckerFactor, (grads_list,)) self._setup_damping(damping) @@ -858,12 +952,13 @@ class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): super(ConvKFCBasicFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._package_minibatches(grads_list) - # Infer number of locations upon which convolution is applied. self._num_locations = num_conv_locations(self._inputs[0].shape.as_list(), self._strides) + inputs = _make_partitionedtensors_inputs(self._inputs) + grads_list = _make_partitionedtensors_grads(grads_list) + self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvInputKroneckerFactor, (inputs, self._filter_shape, self._padding, self._strides, @@ -1139,6 +1234,10 @@ def num_conv_locations(input_shape, strides): class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): """FisherBlock for fully-connected layers that share parameters. + + This class implements the "independence across time" approximation from the + following paper: + https://openreview.net/pdf?id=HyMTkQZAb """ def __init__(self, layer_collection, has_bias=False): @@ -1156,7 +1255,8 @@ class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): def instantiate_factors(self, grads_list, damping): self._num_uses = float(len(self._inputs[0])) - inputs, grads_list = self._package_minibatches_multi(grads_list) + inputs = _make_partitionedtensors_multi_inputs(self._inputs) + grads_list = _make_partitionedtensors_multi_grads(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedMultiKF, @@ -1175,6 +1275,92 @@ class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): return self._outputs +class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): + """FisherBlock for 2D convolutional layers using the basic KFC approx. + + Similar to ConvKFCBasicFB except that this version supports multiple + uses/time-steps via a standard independence approximation. Similar to the + "independence across time" used in FullyConnectedMultiIndepFB but generalized + in the obvious way to conv layers. + """ + + def __init__(self, + layer_collection, + params, + padding, + strides=None, + dilation_rate=None, + data_format=None, + extract_patches_fn=None): + """Creates a ConvKFCBasicMultiIndepFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + params: The parameters (Tensor or tuple of Tensors) of this layer. If + kernel alone, a Tensor of shape [..spatial_filter_shape.., + in_channels, out_channels]. If kernel and bias, a tuple of 2 elements + containing the previous and a Tensor of shape [out_channels]. + padding: str. Padding method. + strides: List of ints or None. Contains [..spatial_filter_strides..] if + 'extract_patches_fn' is compatible with tf.nn.convolution(), else + [1, ..spatial_filter_strides, 1]. + dilation_rate: List of ints or None. Rate for dilation along each spatial + dimension if 'extract_patches_fn' is compatible with + tf.nn.convolution(), else [1, ..spatial_dilation_rates.., 1]. + data_format: str or None. Format of input data. + extract_patches_fn: str or None. Name of function that extracts image + patches. One of "extract_convolution_patches", "extract_image_patches", + "extract_pointwise_conv2d_patches". + """ + self._padding = padding + self._strides = maybe_tuple(strides) + self._dilation_rate = maybe_tuple(dilation_rate) + self._data_format = data_format + self._extract_patches_fn = extract_patches_fn + self._has_bias = isinstance(params, (tuple, list)) + + fltr = params[0] if self._has_bias else params + self._filter_shape = tuple(fltr.shape.as_list()) + + super(ConvKFCBasicMultiIndepFB, self).__init__(layer_collection) + + def instantiate_factors(self, grads_list, damping): + # Infer number of locations upon which convolution is applied. + self._num_locations = num_locations = num_conv_locations( + self._inputs[0][0].shape.as_list(), self._strides) + + # The first index is tower/minibatch, the second is use/time-step + inputs = self._inputs + self._num_uses = num_uses = len(inputs[0]) + + # Check that all mini-batches/towers have the same number of uses + assert all(len(input_) == num_uses for input_ in inputs) + assert all(len(grad) == num_uses for grads in grads_list for grad in grads) + + # Fold uses/time-step and towers/minibatches dimensions together + inputs = nest.flatten(inputs) + # And do the same for grads_list + grads_list = tuple(nest.flatten(grads) for grads in grads_list) + + inputs = _make_partitionedtensors_inputs(inputs) + grads_list = _make_partitionedtensors_grads(grads_list) + + self._input_factor = self._layer_collection.make_or_get_factor( + fisher_factors.ConvInputKroneckerFactor, + (inputs, self._filter_shape, self._padding, self._strides, + self._dilation_rate, self._data_format, self._extract_patches_fn, + self._has_bias)) + self._output_factor = self._layer_collection.make_or_get_factor( + fisher_factors.ConvOutputKroneckerFactor, (grads_list,)) + + self._setup_damping(damping, normalization=(num_locations * num_uses)) + + @property + def _renorm_coeff(self): + return self._num_locations * self._num_uses + + class SeriesFBApproximation(enum.IntEnum): """See FullyConnectedSeriesFB.__init__ for description and usage.""" option1 = 1 @@ -1184,7 +1370,8 @@ class SeriesFBApproximation(enum.IntEnum): class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): """FisherBlock for fully-connected layers that share parameters across time. - See the following preprint for details: + This class implements the "Option 1" and "Option 2" approximation from the + following paper: https://openreview.net/pdf?id=HyMTkQZAb See the end of the appendix of the paper for a pseudo-code of the @@ -1218,7 +1405,10 @@ class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): def instantiate_factors(self, grads_list, damping): self._num_timesteps = len(self._inputs[0]) - inputs, grads_list = self._package_minibatches_multi(grads_list) + assert len(grads_list[0][0]) == self._num_timesteps + + inputs = _make_partitionedtensors_multi_inputs(self._inputs) + grads_list = _make_partitionedtensors_multi_grads(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedMultiKF, ((inputs,), self._has_bias)) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 4eb5e4c092..00eae8b399 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -60,6 +60,10 @@ _CONV2D_APPROX_TO_BLOCK_TYPES = { APPROX_DIAGONAL_NAME: fb.ConvDiagonalFB, } +_EMBEDDING_APPROX_TO_BLOCK_TYPES = { + APPROX_KRONECKER_NAME: fb.EmbeddingKFACFB +} + APPROX_KRONECKER_INDEP_NAME = "kron_indep" APPROX_KRONECKER_SERIES_1_NAME = "kron_series_1" APPROX_KRONECKER_SERIES_2_NAME = "kron_series_2" @@ -72,6 +76,14 @@ _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES = { option=2) } +_CONV2D_MULTI_APPROX_TO_BLOCK_TYPES = { + APPROX_KRONECKER_INDEP_NAME: fb.ConvKFCBasicMultiIndepFB +} + +_EMBEDDING_MULTI_APPROX_TO_BLOCK_TYPES = { + APPROX_KRONECKER_INDEP_NAME: fb.EmbeddingKFACMultiIndepFB +} + # Possible value for 'reuse' keyword argument. Sets 'reuse' to # tf.get_variable_scope().reuse. VARIABLE_SCOPE = "VARIABLE_SCOPE" @@ -169,9 +181,12 @@ class LayerCollection(object): self._default_generic_approximation = APPROX_FULL_NAME self._default_embedding_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_approximation = APPROX_KRONECKER_NAME - self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME + self._default_conv2d_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_multi_approximation = ( - APPROX_KRONECKER_SERIES_2_NAME) + APPROX_KRONECKER_INDEP_NAME) + self._default_conv2d_multi_approximation = ( + APPROX_KRONECKER_INDEP_NAME) + self._default_embedding_multi_approximation = APPROX_KRONECKER_INDEP_NAME self.loss_colocation_ops = {} self._vars_to_uses = defaultdict(lambda: 0) @@ -245,14 +260,14 @@ class LayerCollection(object): @property def default_conv2d_approximation(self): - return self._default_convolution_2d_approximation + return self._default_conv2d_approximation def set_default_conv2d_approximation(self, value): if value not in _CONV2D_APPROX_TO_BLOCK_TYPES: raise ValueError( "{} is not a valid approximation for 2d convolutional layers.".format( value)) - self._default_convolution_2d_approximation = value + self._default_conv2d_approximation = value @property def default_fully_connected_multi_approximation(self): @@ -264,6 +279,14 @@ class LayerCollection(object): "multi layer.".format(value)) self._default_fully_connected_multi_approximation = value + @property + def default_conv2d_multi_approximation(self): + return self._default_conv2d_multi_approximation + + @property + def default_embedding_multi_approximation(self): + return self._default_embedding_multi_approximation + def register_block(self, layer_key, fisher_block, reuse=VARIABLE_SCOPE): """Validates and registers the layer_key associated with the fisher_block. @@ -526,13 +549,24 @@ class LayerCollection(object): else: return None + def _get_block_type(self, params, approx, default, approx_to_type): + if approx is None: + approx = self._get_linked_approx(params) + if approx is None: + approx = default + + if approx not in approx_to_type: + raise ValueError("Bad value {} for approx.".format(approx)) + + return approx_to_type[approx], approx + def register_embedding(self, params, inputs, outputs, approx=None, reuse=VARIABLE_SCOPE): - """Registers a fully connnected layer. + """Registers an embedding layer. Args: params: Embedding matrix of shape [vocab_size, embedding_size]. @@ -540,7 +574,8 @@ class LayerCollection(object): into embedding matrix. outputs: Tensor of shape [batch_size, output_size]. Outputs produced by layer. - approx: str. Must be "kron". + approx: str or None. If not None must be "kron". The Fisher + approximation to use. If None the default value is used. (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -550,20 +585,15 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ - if approx is None: - approx = self._get_linked_approx(params) - if approx is None: - approx = self.default_embedding_approximation - - if approx != APPROX_KRONECKER_NAME: - raise ValueError("Bad value {} for approx.".format(approx)) + block_type, approx = self._get_block_type( + params, approx, self.default_embedding_approximation, + _EMBEDDING_APPROX_TO_BLOCK_TYPES) if isinstance(params, (tuple, list)): raise ValueError("Bias not supported.") - vocab_size = int(params.shape[0]) block = self.register_block( - params, fb.EmbeddingKFACFB(self, vocab_size), reuse=reuse) + params, block_type(self, vocab_size), reuse=reuse) block.register_additional_minibatch(inputs, outputs) self._add_uses(params, 1) @@ -583,7 +613,9 @@ class LayerCollection(object): inputs: Tensor of shape [batch_size, input_size]. Inputs to layer. outputs: Tensor of shape [batch_size, output_size]. Outputs produced by layer. - approx: str. One of "kron" or "diagonal". + approx: str or None. If not None must be one of "kron" or "diagonal". + The Fisher approximation to use. If None the default value is used. + (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -593,17 +625,12 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ - if approx is None: - approx = self._get_linked_approx(params) - if approx is None: - approx = self.default_fully_connected_approximation - if approx not in _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES: - raise ValueError("Bad value {} for approx.".format(approx)) + block_type, approx = self._get_block_type( + params, approx, self.default_fully_connected_approximation, + _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES) - block_type = _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES[approx] has_bias = isinstance(params, (tuple, list)) - block = self.register_block(params, block_type(self, has_bias=has_bias), reuse=reuse) block.register_additional_minibatch(inputs, outputs) @@ -635,7 +662,9 @@ class LayerCollection(object): Output produced by layer. data_format: str or None. Format of data. dilations: List of 4 ints. Dilations along each dimension. - approx: str. One of "kron" or "diagonal". + approx: str or None. If not None must be one of "kron" or "diagonal". + The Fisher approximation to use. If None the default value is used. + (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -646,15 +675,14 @@ class LayerCollection(object): ValueError: If reuse == True and FisherBlock found but of the wrong type. """ - if approx is None: - approx = self._get_linked_approx(params) - if approx is None: - approx = self.default_conv2d_approximation - - if approx not in _CONV2D_APPROX_TO_BLOCK_TYPES: - raise ValueError("Bad value {} for approx.".format(approx)) + block_type, approx = self._get_block_type( + params, approx, self.default_conv2d_approximation, + _CONV2D_APPROX_TO_BLOCK_TYPES) - block_type = _CONV2D_APPROX_TO_BLOCK_TYPES[approx] + # It feels bad to pass in configuration that has to do with the internal + # implementation. And then we can't use the same constructor for both + # anymore and are thus forced to use this ugly if-statement. + # TODO(b/74793309): Clean this up? if approx == APPROX_KRONECKER_NAME: block = self.register_block( params, @@ -680,7 +708,7 @@ class LayerCollection(object): data_format=data_format), reuse=reuse) else: - raise NotImplementedError + raise NotImplementedError(approx) block.register_additional_minibatch(inputs, outputs) @@ -712,7 +740,9 @@ class LayerCollection(object): dilation_rate: List of ints of length len(..input_spatial_size..). Dilations along spatial dimension. data_format: str or None. Format of data. - approx: str. One of "kron" or "diagonal". + approx: str or None. If not None must be one of "kron" or "diagonal". + The Fisher approximation to use. If None the default value is used. + (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -722,6 +752,8 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ + # TODO(b/74793309): Have this use _get_block_type like the other + # registration functions? assert approx is None or approx == APPROX_KRONECKER_NAME block = self.register_block( @@ -762,7 +794,8 @@ class LayerCollection(object): rate: None or List of ints of length 2. Dilation rates in spatial dimensions. data_format: str or None. Format of data. - approx: None or str. Must be "diagonal" if non-None. + approx: str or None. If not None must "diagonal". The Fisher + approximation to use. If None the default value is used. (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -772,6 +805,8 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ + # TODO(b/74793309): Have this use _get_block_type like the other + # registration functions? assert approx is None or approx == APPROX_DIAGONAL_NAME assert data_format in [None, "NHWC"] @@ -803,7 +838,7 @@ class LayerCollection(object): reuse=VARIABLE_SCOPE): """Register a call to tf.nn.separable_conv2d(). - Note: This requires access to intermediate outputs betwee depthwise and + Note: This requires access to intermediate outputs between depthwise and pointwise convolutions. Args: @@ -824,7 +859,9 @@ class LayerCollection(object): rate: None or List of ints of length 2. Dilation rate of depthwise conv2d kernel in spatial dimensions. data_format: str or None. Format of data. - approx: None or str. Must be "kron" if non-None. + approx: str or None. If not None must be one of "kron" or "diagonal". + The Fisher approximation to use. If None the default value is used. + (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -865,7 +902,9 @@ class LayerCollection(object): Args: params: Tensor or tuple of Tensors corresponding to the parameters. batch_size: 0-D Tensor. Size of the minibatch. - approx: str. One of "full" or "diagonal". + approx: str or None. It not None, must be one of "full" or "diagonal". + The Fisher approximation to use. If None the default value is used. + (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -875,16 +914,10 @@ class LayerCollection(object): KeyError: If reuse == True but no FisherBlock found for 'params'. ValueError: If reuse == True and FisherBlock found but of the wrong type. """ + block_type, approx = self._get_block_type( + params, approx, self.default_generic_approximation, + _GENERIC_APPROX_TO_BLOCK_TYPES) - if approx is None: - approx = self._get_linked_approx(params) - if approx is None: - approx = self.default_generic_approximation - - if approx not in _GENERIC_APPROX_TO_BLOCK_TYPES: - raise ValueError("Bad value {} for approx.".format(approx)) - - block_type = _GENERIC_APPROX_TO_BLOCK_TYPES[approx] block = self.register_block(params, block_type(self, params), reuse=reuse) block.register_additional_minibatch(batch_size) @@ -903,11 +936,15 @@ class LayerCollection(object): this layer. Weight matrix should have shape [input_size, output_size]. Bias should have shape [output_size]. inputs: A list of tensors, each of shape [batch_size, input_size]. Inputs - to layer. In the case of RNNs, one Tensor per time step. + to layer. The list indexes each use in the graph (which might + correspond to a "time-step" in an RNN). outputs: A list of tensors, the same length as 'inputs', each of shape - [batch_size, output_size]. Outputs produced by layer. In the case of - RNNs, one Tensor per time step. - approx: str. One of "kron_indep", "kron_series_1", or "kron_series_2". + [batch_size, output_size]. Outputs produced by layer. The list indexes + each use in the graph (which might correspond to a "time-step" in an + RNN). Needs to correspond with the order used in 'inputs'. + approx: str or None. If not None, must be of "kron_indep", "kron_series_1" + or "kron_series_2". The Fisher approximation to use. If None the default + value is used. (Default: None) reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. @@ -915,28 +952,129 @@ class LayerCollection(object): Raises: ValueError: For improper value to 'approx'. """ - if approx is None: - approx = self._get_linked_approx(params) - if approx is None: - approx = self.default_fully_connected_multi_approximation - has_bias = isinstance(params, (tuple, list)) + block_type, approx = self._get_block_type( + params, approx, self.default_fully_connected_multi_approximation, + _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES) # TODO(b/70283649): something along the lines of find_canonical_output # should be added back in here (and for the other block types, arguably). - if approx not in _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES: - raise ValueError("Bad value {} for approx.".format(approx)) - block_type = _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES[approx] - + has_bias = isinstance(params, (tuple, list)) block = self.register_block(params, block_type(self, has_bias=has_bias), reuse=reuse) block.register_additional_minibatch(inputs, outputs) + + assert len(inputs) == len(outputs) + self._add_uses(params, len(inputs)) + + def register_conv2d_multi(self, + params, + strides, + padding, + inputs, + outputs, + data_format=None, + dilations=None, + approx=None, + reuse=VARIABLE_SCOPE): + """Registers convolutional layers with shared parameters. + + Args: + params: Tensor or 2-tuple of Tensors corresponding to weight and bias of + this layer. Weight matrix should have shape [kernel_height, + kernel_width, in_channels, out_channels]. Bias should have shape + [out_channels]. + strides: 1-D Tensor of length 4. Strides for convolution kernel. + padding: string. see tf.nn.conv2d for valid values. + inputs: A list of Tensors, each of shape [batch_size, height, width, + in_channels]. Inputs to layer. The list indexes each use in the graph + (which might correspond to a "time-step" in an RNN). + outputs: A list of Tensors, each of shape [batch_size, height, width, + out_channels]. Output produced by layer. The list indexes each use + in the graph (which might correspond to a "time-step" in an RNN). + Needs to correspond with the order used in 'inputs'. + data_format: str or None. Format of data. + dilations: List of 4 ints. Dilations along each dimension. + approx: str or None. If not None must by "kron_indep". The Fisher + approximation to use. If None the default value is used. + (Default: None) + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + block_type, approx = self._get_block_type( + params, approx, self.default_conv2d_multi_approximation, + _CONV2D_MULTI_APPROX_TO_BLOCK_TYPES) + + block = self.register_block( + params, + block_type( + layer_collection=self, + params=params, + padding=padding, + strides=strides, + data_format=data_format, + dilation_rate=dilations, + extract_patches_fn="extract_image_patches"), + reuse=reuse) + + block.register_additional_minibatch(inputs, outputs) + + assert len(inputs) == len(outputs) self._add_uses(params, len(inputs)) # TODO(b/74108452): change the loss registration functions names to refer # to "loss functions" instead of distributions. Following naming convention # of the loss function classes themselves. + def register_embedding_multi(self, + params, + inputs, + outputs, + approx=None, + reuse=VARIABLE_SCOPE): + """Registers embedding layers with shared parameters. + + Args: + params: Embedding matrix of shape [vocab_size, embedding_size]. + inputs: A list of Tensors, each of shape [batch_size, input_size] and + dtype int32. Indices into embedding matrix. The list indexes each use + in the graph (which might correspond to a "time-step" in an RNN). + outputs: A list of Tensors, each of shape [batch_size, output_size]. + Outputs produced by layer. The list indexes each use in the graph + (which might correspond to a "time-step" in an RNN). Needs to + correspond with the order used in 'inputs'. + approx: str or None. If not None must by "kron_indep". The Fisher + approximation to use. If None the default value is used. + (Default: None) + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + block_type, approx = self._get_block_type( + params, approx, self.default_embedding_multi_approximation, + _EMBEDDING_MULTI_APPROX_TO_BLOCK_TYPES) + + if isinstance(params, (tuple, list)): + raise ValueError("Bias not supported.") + vocab_size = int(params.shape[0]) + + block = self.register_block( + params, block_type(self, vocab_size), reuse=reuse) + block.register_additional_minibatch(inputs, outputs) + + self._add_uses(params, len(inputs)) + def register_categorical_predictive_distribution(self, logits, seed=None, diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index af26f5e56b..c589b18193 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -659,6 +659,14 @@ class PartitionedTensor(object): def __hash__(self): return hash(tuple(self.tensors)) + def __eq__(self, other): + if not isinstance(other, PartitionedTensor): + return False + return self.tensors == other.tensors + + def __ne__(self, other): + return not self == other # pylint: disable=g-comparison-negation + def as_tensor(self, dtype=None, name=None, as_ref=False): with ops.name_scope(name, "PartitionedTensor.as_tensor", self.tensors): assert not as_ref -- GitLab From 09d3d2dcc1b1b9ee7282b37bc4e0f212c577f6a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 08:59:02 -0700 Subject: [PATCH 102/960] Fix a typo in docstring for index_table_from_tensor. PiperOrigin-RevId: 189345585 --- tensorflow/contrib/lookup/lookup_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index 62f1c810fc..a57a1e5421 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -105,7 +105,7 @@ def index_table_from_tensor(mapping, ... tf.tables_initializer().run() - ids.eval() ==> [0, 1, 4, 2] + ids.eval() ==> [0, 1, 3, 2] ``` Args: -- GitLab From 4b5cb6c49934b6c62fcfc4bf710a30dcce2568d3 Mon Sep 17 00:00:00 2001 From: Frank Perbet Date: Fri, 16 Mar 2018 09:02:48 -0700 Subject: [PATCH 103/960] Make the graph_editor C-API friendly: always construct ops with their inputs. PiperOrigin-RevId: 189346024 --- .../graph_editor/tests/transform_test.py | 32 +++++++-- tensorflow/contrib/graph_editor/transform.py | 66 +++++++++++++------ 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py index ca00394388..2a1b78042d 100644 --- a/tensorflow/contrib/graph_editor/tests/transform_test.py +++ b/tensorflow/contrib/graph_editor/tests/transform_test.py @@ -23,6 +23,7 @@ from tensorflow.contrib import graph_editor as ge from tensorflow.contrib.graph_editor.tests import match from tensorflow.python.client import session from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -84,9 +85,9 @@ class TransformTest(test.TestCase): def test_transform(self): transformer = ge.Transformer() - def my_transform_op_handler(info, op): + def my_transform_op_handler(info, op, new_inputs): add_noise = op.name.startswith("Add") - op_, op_outputs_ = ge.transform.copy_op_handler(info, op) + op_, op_outputs_ = ge.transform.copy_op_handler(info, op, new_inputs) if not add_noise: return op_, op_outputs_ # add some noise to op @@ -201,15 +202,36 @@ class TransformTest(test.TestCase): get_operation_by_name("res/grad/mul1_grad/Mul_1")) # Make sure _original_ops are as expected. - self.assertEquals(original_mul1_grad._original_op.name, u"mul1") - self.assertEquals(result_mul1_grad._original_op.name, u"res/mul1") - self.assertNotEquals(res.name, g.name) + self.assertEqual(original_mul1_grad._original_op.name, u"mul1") + self.assertEqual(result_mul1_grad._original_op.name, u"res/mul1") + self.assertNotEqual(res.name, g.name) with session.Session() as sess: sess.run(variables.global_variables_initializer()) g_val, res_val = sess.run([g, res]) self.assertNear(g_val, 0.0, ERROR_TOLERANCE) self.assertNear(res_val, 0.0, ERROR_TOLERANCE) + def test_graph_while_loop(self): + graph = ops.Graph() + with graph.as_default(): + max_index = array_ops.placeholder(dtype=dtypes.int32, shape=tuple()) + index_start = constant_op.constant(1) + sum_start = constant_op.constant(0) + _, result = control_flow_ops.while_loop( + cond=lambda i, unused_s: i <= max_index, + body=lambda i, s: (i + 1, s + i), + loop_vars=[index_start, sum_start]) + copied_graph = ops.Graph() + _, copy_info = ge.copy( + graph, dst_graph=copied_graph, dst_scope="imported") + copied_result = copy_info.transformed(result) + copied_max_index = copy_info.transformed(max_index) + with copied_graph.as_default(): + with session.Session() as sess: + n = 10 + sum_val = sess.run(copied_result, feed_dict={copied_max_index: n}) + self.assertEqual(sum_val, 55) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index 14ac529665..03c9afe813 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -30,6 +30,7 @@ from tensorflow.contrib.graph_editor import select from tensorflow.contrib.graph_editor import subgraph from tensorflow.contrib.graph_editor import util from tensorflow.python.framework import ops as tf_ops +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -129,20 +130,26 @@ def transform_op_if_inside_handler(info, op, keep_if_possible=True): return None -def copy_op_handler(info, op, copy_shape=True): +def copy_op_handler(info, op, new_inputs, copy_shape=True): """Copy a `tf.Operation`. Args: info: Transform._TmpInfo instance. op: the `tf.Operation` to be copied. + new_inputs: The new inputs for this op. copy_shape: also copy the shape of the tensor Returns: A `(op, op_outputs)` tuple containing the transformed op and its outputs. """ + # The `new_inputs` was added to this function. For compatibility reason, + # let's raise an error if `new_inputs` is a boolean. + if isinstance(new_inputs, bool): + raise TypeError("the `new_inputs` argument must be an iterable.") + # pylint: disable=protected-access # Clone the node def: - node_def_ = deepcopy(op._node_def) + node_def_ = deepcopy(op.node_def) # Transform name: name_ = info.new_name(op.name) @@ -155,10 +162,10 @@ def copy_op_handler(info, op, copy_shape=True): # Make a copy of the op_def too. # Its unique to every _type_ of Operation. - op_def_ = deepcopy(op._op_def) + op_def_ = deepcopy(op.op_def) # Initialize a new Operation instance - op_ = tf_ops.Operation(node_def_, info.graph_, [], output_types_, + op_ = tf_ops.Operation(node_def_, info.graph_, new_inputs, output_types_, [], input_types_, None, op_def_) # copy the shape over @@ -170,6 +177,7 @@ def copy_op_handler(info, op, copy_shape=True): # attribute to exist, we will create a dummy original_op first and then # later finalise it with the actual original_op when all the ops have # been copied. + # TODO(fkp): Stop worrying about _original_op and remove this code? if op._original_op: op_._original_op = op._original_op @@ -328,6 +336,14 @@ class _TmpInfo(object): for key in self.graph.get_all_collection_keys()) self.cyclic_ops = [] self.transform_original_op_handler = transform_op_if_inside_handler + # The graph is transformed op by op, in the same order the original ops + # where created. However, this is sometimes not possible due to cycles + # (e.g. while loops). So when the transformer creates a new op whose + # inputs do not exist yet, temporary placeholders are created and stored + # in this `tmp_cyclic_ts` container. During a second pass, + # those temporary tensors are replaced by the proper transformed tensors + # (see the function `_finalize_cycle`). + self.tmp_cyclic_ts = [] def new_name(self, name): """Compute a destination name from a source name. @@ -428,10 +444,10 @@ class Transformer(object): # Create temporary info used during this transform call info = _TmpInfo(sgv, dst_graph, dst_scope, src_scope) - info.transform_original_op_handler = self.transform_original_op_handler self._copy_ops(info) - self._connect_ops(info) + self._finalize_cycle(info) + self._connect_control_inputs(info) # Compute information about the transformation res_info = TransformerInfo(info) @@ -440,10 +456,12 @@ class Transformer(object): def _copy_ops(self, info): """Copy ops without connecting them.""" - for op in info.sgv.ops: + sorted_ops = sorted(info.sgv.ops, key=lambda op: op._id) # pylint: disable=protected-access + for op in sorted_ops: logging.debug("Copying op: %s", op.name) + new_inputs = [self._transformed_t(info, t) for t in op.inputs] # TODO(fkp): return a subgraph? - op_, op_outputs_ = self.transform_op_handler(info, op) + op_, op_outputs_ = self.transform_op_handler(info, op, new_inputs) if op is op_: raise ValueError("In-place transformation not allowed.") @@ -456,27 +474,31 @@ class Transformer(object): info.transformed_ts[op_output] = op_output_ self.assign_collections_handler(info, op_output, op_output_) - def _connect_ops(self, info): + def _finalize_cycle(self, info): + for t, tmp_t_ in info.tmp_cyclic_ts: + if t not in info.transformed_ts: + raise ValueError("The tensor {} should be transformed by now.".format( + t.name)) + op_ = tmp_t_.consumers()[0] + t_ = info.transformed_ts[t] + op_._update_input(list(op_.inputs).index(tmp_t_), t_) # pylint: disable=protected-access + + def _connect_control_inputs(self, info): """Connect the previously copied ops.""" for op in info.sgv.ops: - logging.debug("Finalizing op: %s", op.name) + logging.debug("Connecting control inputs of op: %s", op.name) op_ = info.transformed_ops[op] - # pylint: disable=protected-access - if op_.inputs: - raise ValueError("The newly transformed op should not have " - "any inputs yet: {}".format(op_.name)) - inputs_ = [self._transformed_t(info, t) for t in op.inputs] - for t in inputs_: - op_._add_input(t) - # Finalize original op. + # TODO(fkp): Stop worrying about _original_op and remove this code? + # pylint: disable=protected-access if op._original_op: - original_op = info.transform_original_op_handler(info, op._original_op) + original_op = self.transform_original_op_handler(info, op._original_op) if original_op is None: logging.debug("Could not find original op for: %s", op_.name) else: op_._original_op = original_op + # pylint: enable=protected-access # Finalize control inputs: control_inputs_ = [self.transform_control_input_handler(info, ci) @@ -528,6 +550,12 @@ class Transformer(object): def _transformed_t(self, info, t): """Return tre transformed tensor of `t`.""" if t not in info.transformed_ts: + if t.op in info.ops: + with info.graph_.as_default(): + tmp_t_ = array_ops.placeholder( + shape=t.shape, dtype=t.dtype, name="ge_tmp") + info.tmp_cyclic_ts.append((t, tmp_t_)) + return tmp_t_ # If op is not in the subgraph. if t in info.sgv_inputs_set: # t is an input of the subgraph. -- GitLab From aef5751d17cf5d53de45b7833c26d11cd9473cde Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 09:32:43 -0700 Subject: [PATCH 104/960] Upgrade gRPC version used in OSS Tensorflow PiperOrigin-RevId: 189349737 --- tensorflow/contrib/cmake/external/grpc.cmake | 2 +- tensorflow/workspace.bzl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index a9f43a3ecb..95106dba1f 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc) -set(GRPC_TAG 730b778632e79cc3c96ad237f282d687ee325ce7) +set(GRPC_TAG 575bda39755b98d1f7099406bb57a6e3b2074874) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index abc9eb9bc1..8d739158c5 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -452,11 +452,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "grpc", urls = [ - "https://mirror.bazel.build/github.com/grpc/grpc/archive/730b778632e79cc3c96ad237f282d687ee325ce7.tar.gz", - "https://github.com/grpc/grpc/archive/730b778632e79cc3c96ad237f282d687ee325ce7.tar.gz", + "https://mirror.bazel.build/github.com/grpc/grpc/archive/575bda39755b98d1f7099406bb57a6e3b2074874.tar.gz", + "https://github.com/grpc/grpc/archive/575bda39755b98d1f7099406bb57a6e3b2074874.tar.gz", ], - sha256 = "8c91a8d12e1e868cf51f7340b75507a8aa017a7e1b56f46ed6816aeb803dc9bd", - strip_prefix = "grpc-730b778632e79cc3c96ad237f282d687ee325ce7", + sha256 = "f08a5c8e265191b39cc74915b1bc1fd380d86cd0176c92b7cce30b6ac50514ad", + strip_prefix = "grpc-575bda39755b98d1f7099406bb57a6e3b2074874", ) tf_http_archive( -- GitLab From 14009941a5dbe9f58c0866d9152a4ddd5ebc7d54 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 09:42:45 -0700 Subject: [PATCH 105/960] Doc grammar and style fixes for macOS installation. PiperOrigin-RevId: 189351224 --- tensorflow/docs_src/install/install_mac.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 623ca6bb79..99745fcc6d 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -238,7 +238,7 @@ take the following steps: operating system and Python version. Find the appropriate value for tfBinaryURL [here](#the_url_of_the_tensorflow_python_package). For example, if - you are installing TensorFlow for Mac OS and Python 2.7 + you are installing TensorFlow for macOS and Python 2.7 issue the following command:
 $ sudo pip install --upgrade \
@@ -513,12 +513,7 @@ RuntimeError: Broken toolchain: cannot link a simple C program
## The URL of the TensorFlow Python package A few installation mechanisms require the URL of the TensorFlow Python package. -The value you specify depends on three factors: - - * operating system - * Python version - -This section documents the relevant values for Mac OS installations. +The value you specify depends on your Python version. ### Python 2.7 -- GitLab From 6dd0b7e350133e62ea74dc1fb6a502044184b071 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 09:53:20 -0700 Subject: [PATCH 106/960] Eliminate use of grpc::CoreCodegenInterface, which is marked as an internal interface PiperOrigin-RevId: 189352860 --- tensorflow/core/distributed_runtime/rpc/BUILD | 1 + .../rpc/grpc_serialization_traits.h | 57 +++++++------------ .../rpc/grpc_worker_service_impl.h | 4 +- 3 files changed, 23 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD index e9d5390c63..9dae1b9859 100644 --- a/tensorflow/core/distributed_runtime/rpc/BUILD +++ b/tensorflow/core/distributed_runtime/rpc/BUILD @@ -259,6 +259,7 @@ cc_library( hdrs = ["grpc_serialization_traits.h"], deps = [ "@grpc//:grpc++_unsecure", + "@grpc//:grpc_unsecure", ], ) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_serialization_traits.h b/tensorflow/core/distributed_runtime/rpc/grpc_serialization_traits.h index 730124c25e..e7f5fb0c6a 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_serialization_traits.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_serialization_traits.h @@ -18,6 +18,7 @@ limitations under the License. #include "grpc++/impl/codegen/proto_utils.h" #include "grpc++/support/slice.h" +#include "grpc/grpc.h" namespace grpc { @@ -30,13 +31,13 @@ class GrpcBufferWriter final public: explicit GrpcBufferWriter(grpc_byte_buffer** bp, int block_size) : block_size_(block_size), byte_count_(0), have_backup_(false) { - *bp = g_core_codegen_interface->grpc_raw_byte_buffer_create(NULL, 0); + *bp = grpc_raw_byte_buffer_create(NULL, 0); slice_buffer_ = &(*bp)->data.raw.slice_buffer; } ~GrpcBufferWriter() override { if (have_backup_) { - g_core_codegen_interface->grpc_slice_unref(backup_slice_); + grpc_slice_unref(backup_slice_); } } @@ -45,24 +46,24 @@ class GrpcBufferWriter final slice_ = backup_slice_; have_backup_ = false; } else { - slice_ = g_core_codegen_interface->grpc_slice_malloc(block_size_); + slice_ = grpc_slice_malloc(block_size_); } *data = GRPC_SLICE_START_PTR(slice_); // On win x64, int is only 32bit GPR_CODEGEN_ASSERT(GRPC_SLICE_LENGTH(slice_) <= INT_MAX); byte_count_ += * size = (int)GRPC_SLICE_LENGTH(slice_); - g_core_codegen_interface->grpc_slice_buffer_add(slice_buffer_, slice_); + grpc_slice_buffer_add(slice_buffer_, slice_); return true; } void BackUp(int count) override { - g_core_codegen_interface->grpc_slice_buffer_pop(slice_buffer_); + grpc_slice_buffer_pop(slice_buffer_); if (count == block_size_) { backup_slice_ = slice_; } else { - backup_slice_ = g_core_codegen_interface->grpc_slice_split_tail( - &slice_, GRPC_SLICE_LENGTH(slice_) - count); - g_core_codegen_interface->grpc_slice_buffer_add(slice_buffer_, slice_); + backup_slice_ = + grpc_slice_split_tail(&slice_, GRPC_SLICE_LENGTH(slice_) - count); + grpc_slice_buffer_add(slice_buffer_, slice_); } // It's dangerous to keep an inlined grpc_slice as the backup slice, since // on a following Next() call, a reference will be returned to this slice @@ -85,29 +86,12 @@ class GrpcBufferWriter final class GrpcBufferReader final : public ::grpc::protobuf::io::ZeroCopyInputStream { - typedef void (CoreCodegenInterface::*OldReaderInitAPI)( - grpc_byte_buffer_reader* reader, grpc_byte_buffer* buffer); - typedef int (CoreCodegenInterface::*NewReaderInitAPI)( - grpc_byte_buffer_reader* reader, grpc_byte_buffer* buffer); - void ReaderInit(OldReaderInitAPI ptr, grpc_byte_buffer_reader* reader, - grpc_byte_buffer* buffer) { - (g_core_codegen_interface->*ptr)(reader, buffer); - } - void ReaderInit(NewReaderInitAPI ptr, grpc_byte_buffer_reader* reader, - grpc_byte_buffer* buffer) { - int result = (g_core_codegen_interface->*ptr)(reader, buffer); - (void)result; - } - public: explicit GrpcBufferReader(grpc_byte_buffer* buffer) : byte_count_(0), backup_count_(0) { - ReaderInit(&CoreCodegenInterface::grpc_byte_buffer_reader_init, &reader_, - buffer); - } - ~GrpcBufferReader() override { - g_core_codegen_interface->grpc_byte_buffer_reader_destroy(&reader_); + (void)grpc_byte_buffer_reader_init(&reader_, buffer); } + ~GrpcBufferReader() override { grpc_byte_buffer_reader_destroy(&reader_); } bool Next(const void** data, int* size) override { if (backup_count_ > 0) { @@ -118,11 +102,10 @@ class GrpcBufferReader final backup_count_ = 0; return true; } - if (!g_core_codegen_interface->grpc_byte_buffer_reader_next(&reader_, - &slice_)) { + if (!grpc_byte_buffer_reader_next(&reader_, &slice_)) { return false; } - g_core_codegen_interface->grpc_slice_unref(slice_); + grpc_slice_unref(slice_); *data = GRPC_SLICE_START_PTR(slice_); // On win x64, int is only 32bit GPR_CODEGEN_ASSERT(GRPC_SLICE_LENGTH(slice_) <= INT_MAX); @@ -176,18 +159,18 @@ class UnlimitedSizeProtoSerializationTraits { return Status(StatusCode::INTERNAL, "Message length was negative"); } else if (byte_size <= tensorflow_helper::kGrpcBufferWriterMaxBufferLength) { - grpc_slice slice = g_core_codegen_interface->grpc_slice_malloc(byte_size); + grpc_slice slice = grpc_slice_malloc(byte_size); GPR_CODEGEN_ASSERT( GRPC_SLICE_END_PTR(slice) == msg.SerializeWithCachedSizesToArray(GRPC_SLICE_START_PTR(slice))); - *bp = g_core_codegen_interface->grpc_raw_byte_buffer_create(&slice, 1); - g_core_codegen_interface->grpc_slice_unref(slice); - return g_core_codegen_interface->ok(); + *bp = grpc_raw_byte_buffer_create(&slice, 1); + grpc_slice_unref(slice); + return Status::OK; } else { tensorflow_helper::GrpcBufferWriter writer( bp, tensorflow_helper::kGrpcBufferWriterMaxBufferLength); return msg.SerializeToZeroCopyStream(&writer) - ? g_core_codegen_interface->ok() + ? Status::OK : Status(StatusCode::INTERNAL, "Failed to serialize message"); } } @@ -197,7 +180,7 @@ class UnlimitedSizeProtoSerializationTraits { if (buffer == nullptr) { return Status(StatusCode::INTERNAL, "No payload"); } - Status result = g_core_codegen_interface->ok(); + Status result = Status::OK; { tensorflow_helper::GrpcBufferReader reader(buffer); ::grpc::protobuf::io::CodedInputStream decoder(&reader); @@ -214,7 +197,7 @@ class UnlimitedSizeProtoSerializationTraits { result = Status(StatusCode::INTERNAL, "Did not read entire message"); } } - g_core_codegen_interface->grpc_byte_buffer_destroy(buffer); + grpc_byte_buffer_destroy(buffer); return result; } }; diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h index 1a5e2edfb2..2a2f7e3ffb 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service_impl.h @@ -88,7 +88,7 @@ class SerializationTraits if (buffer == nullptr) { return Status(StatusCode::INTERNAL, "No payload"); } - Status result = g_core_codegen_interface->ok(); + Status result = Status::OK; if (result.ok()) { ::tensorflow::GrpcByteSource source(buffer); auto s = msg->ParseFrom(&source); @@ -98,7 +98,7 @@ class SerializationTraits "TensorResponse parse error", s.ToString())); } } - g_core_codegen_interface->grpc_byte_buffer_destroy(buffer); + grpc_byte_buffer_destroy(buffer); return result; } }; -- GitLab From 6271467df2f0bf7c776f888bed3c6722502efc51 Mon Sep 17 00:00:00 2001 From: Jakub Kolodziejczyk Date: Sat, 17 Mar 2018 01:58:09 +0900 Subject: [PATCH 107/960] Updated tf.Session.run(...) documentation Added information that order in which `fetches` are evaluated inside the call is undefined. Discussion: https://github.com/tensorflow/tensorflow/issues/13133 --- tensorflow/python/client/session.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 29f06c8f22..9b7e853e8c 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -889,6 +889,8 @@ class BaseSession(SessionInterface): Either a single value if `fetches` is a single graph element, or a list of values if `fetches` is a list, or a dictionary with the same keys as `fetches` if that is a dictionary (described above). + Order in which `fetches` operations are evaluated inside the call + is undefined. Raises: RuntimeError: If this `Session` is in an invalid state (e.g. has been -- GitLab From a0bd058ad1406585634330772bfda76fd27d87d7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 09:54:27 -0700 Subject: [PATCH 108/960] Move CreateSubProcess from test.h to subprocess.h PiperOrigin-RevId: 189353033 --- .../xla/legacy_flags/parse_flags_from_env_test.cc | 1 + .../core/distributed_runtime/rpc/grpc_testlib.cc | 2 +- tensorflow/core/distributed_runtime/rpc/grpc_testlib.h | 1 + tensorflow/core/platform/posix/subprocess.cc | 10 ++++++++++ tensorflow/core/platform/posix/test.cc | 9 --------- tensorflow/core/platform/subprocess.h | 9 +++++++++ tensorflow/core/platform/test.h | 7 ------- tensorflow/core/platform/windows/subprocess.h | 10 ++++++++++ tensorflow/core/platform/windows/test.cc | 5 ----- 9 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc index a3b4286f4c..7b6ae311c1 100644 --- a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc +++ b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/subprocess.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/command_line_flags.h" diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc index c237f2dce4..89f83f9f24 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.cc @@ -57,7 +57,7 @@ Status TestCluster::MakeTestCluster(const SessionOptions& options, int n, tf_jobs, "--tf_job=localhost", strings::StrCat("--tf_task=", i), strings::StrCat("--num_cpus=", num_cpus), strings::StrCat("--num_gpus=", num_gpus)}); - ret->subprocesses_.emplace_back(testing::CreateSubProcess(argv)); + ret->subprocesses_.emplace_back(CreateSubProcess(argv)); bool success = ret->subprocesses_[i]->Start(); if (!success) { return errors::Internal("Could not start subprocess"); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.h b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.h index 4b3a03b1d7..d5baaae353 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_testlib.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_testlib.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/subprocess.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session_options.h" diff --git a/tensorflow/core/platform/posix/subprocess.cc b/tensorflow/core/platform/posix/subprocess.cc index cefc66831a..a661c34ef0 100644 --- a/tensorflow/core/platform/posix/subprocess.cc +++ b/tensorflow/core/platform/posix/subprocess.cc @@ -20,6 +20,8 @@ limitations under the License. #include #include #include +#include +#include #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/subprocess.h" @@ -461,4 +463,12 @@ int SubProcess::Communicate(const string* stdin_input, string* stdout_output, return WaitInternal(&status) ? status : -1; } +std::unique_ptr CreateSubProcess(const std::vector& argv) { + std::unique_ptr proc(new SubProcess()); + proc->SetProgram(argv[0], argv); + proc->SetChannelAction(CHAN_STDERR, ACTION_DUPPARENT); + proc->SetChannelAction(CHAN_STDOUT, ACTION_DUPPARENT); + return proc; +} + } // namespace tensorflow diff --git a/tensorflow/core/platform/posix/test.cc b/tensorflow/core/platform/posix/test.cc index a69127b3e8..28f7478a6d 100644 --- a/tensorflow/core/platform/posix/test.cc +++ b/tensorflow/core/platform/posix/test.cc @@ -20,19 +20,10 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/subprocess.h" namespace tensorflow { namespace testing { -std::unique_ptr CreateSubProcess(const std::vector& argv) { - std::unique_ptr proc(new SubProcess()); - proc->SetProgram(argv[0], argv); - proc->SetChannelAction(CHAN_STDERR, ACTION_DUPPARENT); - proc->SetChannelAction(CHAN_STDOUT, ACTION_DUPPARENT); - return proc; -} - int PickUnusedPortOrDie() { return internal::PickUnusedPortOrDie(); } string TensorFlowSrcRoot() { diff --git a/tensorflow/core/platform/subprocess.h b/tensorflow/core/platform/subprocess.h index dfdcf82173..dcc0c1a4ee 100644 --- a/tensorflow/core/platform/subprocess.h +++ b/tensorflow/core/platform/subprocess.h @@ -16,6 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_PLATFORM_SUBPROCESS_H_ #define TENSORFLOW_PLATFORM_SUBPROCESS_H_ +#include +#include + namespace tensorflow { // Channel identifiers. @@ -43,6 +46,12 @@ enum ChannelAction { // Supports spawning and killing child processes. class SubProcess; +// Returns an object that represents a child process that will be +// launched with the given command-line arguments `argv`. The process +// must be explicitly started by calling the Start() method on the +// returned object. +std::unique_ptr CreateSubProcess(const std::vector& argv); + } // namespace tensorflow #include "tensorflow/core/platform/platform.h" diff --git a/tensorflow/core/platform/test.h b/tensorflow/core/platform/test.h index 295957c3d8..99bae63edf 100644 --- a/tensorflow/core/platform/test.h +++ b/tensorflow/core/platform/test.h @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/platform.h" -#include "tensorflow/core/platform/subprocess.h" #include "tensorflow/core/platform/types.h" // As of September 2016, we continue to attempt to avoid the use of gmock aka @@ -49,12 +48,6 @@ string TensorFlowSrcRoot(); // Returns the same value for the lifetime of the process. int RandomSeed(); -// Returns an object that represents a child process that will be -// launched with the given command-line arguments `argv`. The process -// must be explicitly started by calling the Start() method on the -// returned object. -std::unique_ptr CreateSubProcess(const std::vector& argv); - // Returns an unused port number, for use in multi-process testing. // NOTE: This function is not thread-safe. int PickUnusedPortOrDie(); diff --git a/tensorflow/core/platform/windows/subprocess.h b/tensorflow/core/platform/windows/subprocess.h index 66ec44885d..f00471d484 100644 --- a/tensorflow/core/platform/windows/subprocess.h +++ b/tensorflow/core/platform/windows/subprocess.h @@ -16,11 +16,21 @@ limitations under the License. #ifndef TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_ #define TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_ +#include +#include + +#include "tensorflow/core/platform/logging.h" + namespace tensorflow { // SubProcess is not yet implemented for Windows. class SubProcess {}; +std::unique_ptr CreateSubProcess(const std::vector& argv) { + LOG(FATAL) << "CreateSubProcess NOT IMPLEMENTED for Windows yet ! "; + return nullptr; +} + } // namespace tensorflow #endif // TENSORFLOW_PLATFORM_WINDOWS_SUBPROCESS_H_ diff --git a/tensorflow/core/platform/windows/test.cc b/tensorflow/core/platform/windows/test.cc index 584acad91b..ad2b7bc6ff 100644 --- a/tensorflow/core/platform/windows/test.cc +++ b/tensorflow/core/platform/windows/test.cc @@ -22,11 +22,6 @@ limitations under the License. namespace tensorflow { namespace testing { -std::unique_ptr CreateSubProcess(const std::vector& argv) { - LOG(FATAL) << "CreateSubProcess NOT IMPLEMENTED for Windows yet ! "; - return nullptr; -} - int PickUnusedPortOrDie() { return internal::PickUnusedPortOrDie(); } string TensorFlowSrcRoot() { -- GitLab From 386ba370d41e8872a9db0d45239d7b00c14ef309 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 10:10:16 -0700 Subject: [PATCH 109/960] Added StrContains, StartsWith, and EndsWith functions to str_util.h. Marked contains, starts_with, ends_with, and consume StringPiece methods as deprecated. This will allow tensorflow::StringPiece to be more easily replaced with absl::string_view (once the deprecated methods are removed) as absl::string_view does not contain those methods. PiperOrigin-RevId: 189355316 --- tensorflow/core/lib/core/stringpiece.h | 4 ++ tensorflow/core/lib/strings/str_util.cc | 21 +++++++- tensorflow/core/lib/strings/str_util.h | 15 ++++++ tensorflow/core/lib/strings/str_util_test.cc | 52 ++++++++++++++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 910e4d9e2a..79409cce4b 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -88,11 +88,13 @@ class StringPiece { size_t find(char c, size_t pos = 0) const; size_t rfind(char c, size_t pos = npos) const; + // DEPRECATED: Use tensorflow::str_util::StrContains instead. bool contains(StringPiece s) const; // Checks whether StringPiece starts with x and if so advances the beginning // of it to past the match. It's basically a shortcut for starts_with // followed by remove_prefix. + // DEPRECATED: Use tensorflow::str_util::ConsumePrefix instead. bool Consume(StringPiece x) { if (starts_with(x)) { remove_prefix(x.size_); @@ -113,10 +115,12 @@ class StringPiece { int compare(StringPiece b) const; // Return true iff "x" is a prefix of "*this" + // DEPRECATED: Use tensorflow::str_util::StartsWith instead. bool starts_with(StringPiece x) const { return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0)); } // Return true iff "x" is a suffix of "*this" + // DEPRECATED: Use tensorflow::str_util::EndsWith instead. bool ends_with(StringPiece x) const { return ((size_ >= x.size_) && (memcmp(data_ + (size_ - x.size_), x.data_, x.size_) == 0)); diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index d28857803d..9dbb74f6b8 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -373,7 +373,7 @@ size_t RemoveWhitespaceContext(StringPiece* text) { } bool ConsumePrefix(StringPiece* s, StringPiece expected) { - if (s->starts_with(expected)) { + if (StartsWith(*s, expected)) { s->remove_prefix(expected.size()); return true; } @@ -381,7 +381,7 @@ bool ConsumePrefix(StringPiece* s, StringPiece expected) { } bool ConsumeSuffix(StringPiece* s, StringPiece expected) { - if (s->ends_with(expected)) { + if (EndsWith(*s, expected)) { s->remove_suffix(expected.size()); return true; } @@ -452,5 +452,22 @@ bool SplitAndParseAsFloats(StringPiece text, char delim, result); } +bool StrContains(StringPiece haystack, StringPiece needle) { + return std::search(haystack.begin(), haystack.end(), needle.begin(), + needle.end()) != haystack.end(); +} + +bool StartsWith(StringPiece text, StringPiece prefix) { + return prefix.empty() || + (text.size() >= prefix.size() && + memcmp(text.data(), prefix.data(), prefix.size()) == 0); +} + +bool EndsWith(StringPiece text, StringPiece suffix) { + return suffix.empty() || (text.size() >= suffix.size() && + memcmp(text.data() + (text.size() - suffix.size()), + suffix.data(), suffix.size()) == 0); +} + } // namespace str_util } // namespace tensorflow diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h index 44c52850fa..f062eddef8 100644 --- a/tensorflow/core/lib/strings/str_util.h +++ b/tensorflow/core/lib/strings/str_util.h @@ -141,6 +141,21 @@ bool SplitAndParseAsInts(StringPiece text, char delim, bool SplitAndParseAsFloats(StringPiece text, char delim, std::vector* result); +// StartsWith() +// +// Returns whether a given string `text` begins with `prefix`. +bool StartsWith(StringPiece text, StringPiece prefix); + +// EndsWith() +// +// Returns whether a given string `text` ends with `suffix`. +bool EndsWith(StringPiece text, StringPiece suffix); + +// StrContains() +// +// Returns whether a given string `haystack` contains the substring `needle`. +bool StrContains(StringPiece haystack, StringPiece needle); + // ------------------------------------------------------------------ // Implementation details below template diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc index 6d461241f7..63643c3e8e 100644 --- a/tensorflow/core/lib/strings/str_util_test.cc +++ b/tensorflow/core/lib/strings/str_util_test.cc @@ -430,4 +430,56 @@ TEST(StringReplace, EmptyStringReplaceAll) { EXPECT_EQ("", str_util::StringReplace("", "a", "X", /*replace_all=*/true)); } +TEST(StartsWith, Basic) { + const string s1( + "123" + "\0" + "456", + 7); + const StringPiece a("foobar"); + const StringPiece b(s1); + const StringPiece e; + EXPECT_TRUE(str_util::StartsWith(a, a)); + EXPECT_TRUE(str_util::StartsWith(a, "foo")); + EXPECT_TRUE(str_util::StartsWith(a, e)); + EXPECT_TRUE(str_util::StartsWith(b, s1)); + EXPECT_TRUE(str_util::StartsWith(b, b)); + EXPECT_TRUE(str_util::StartsWith(b, e)); + EXPECT_TRUE(str_util::StartsWith(e, "")); + EXPECT_FALSE(str_util::StartsWith(a, b)); + EXPECT_FALSE(str_util::StartsWith(b, a)); + EXPECT_FALSE(str_util::StartsWith(e, a)); +} + +TEST(EndsWith, Basic) { + const string s1( + "123" + "\0" + "456", + 7); + const StringPiece a("foobar"); + const StringPiece b(s1); + const StringPiece e; + EXPECT_TRUE(str_util::EndsWith(a, a)); + EXPECT_TRUE(str_util::EndsWith(a, "bar")); + EXPECT_TRUE(str_util::EndsWith(a, e)); + EXPECT_TRUE(str_util::EndsWith(b, s1)); + EXPECT_TRUE(str_util::EndsWith(b, b)); + EXPECT_TRUE(str_util::EndsWith(b, e)); + EXPECT_TRUE(str_util::EndsWith(e, "")); + EXPECT_FALSE(str_util::EndsWith(a, b)); + EXPECT_FALSE(str_util::EndsWith(b, a)); + EXPECT_FALSE(str_util::EndsWith(e, a)); +} + +TEST(StrContains, Basic) { + StringPiece a("abcdefg"); + StringPiece b("abcd"); + StringPiece c("efg"); + StringPiece d("gh"); + EXPECT_TRUE(str_util::StrContains(a, b)); + EXPECT_TRUE(str_util::StrContains(a, c)); + EXPECT_TRUE(!str_util::StrContains(a, d)); +} + } // namespace tensorflow -- GitLab From 86398ed80a09030255226678eee5d4be583a61c4 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 16 Mar 2018 10:23:41 -0700 Subject: [PATCH 110/960] Fix sed invocation in copy_binary.py script for Mac. Script was explicitly calling /bin/sed which was not being found on MacOS Kokoro builds. Removing calling "sed" in script. PiperOrigin-RevId: 189357296 --- tensorflow/tools/ci_build/copy_binary.py | 55 +++++++++++++----------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index ff26b052f3..b5a282b64a 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -29,13 +29,9 @@ import argparse import os import re import shutil -import subprocess +import tempfile import zipfile -UNZIP_CMD = "/usr/bin/unzip" -ZIP_CMD = "/usr/bin/zip" -SED_CMD = "/bin/sed" - TF_NIGHTLY_REGEX = r"(.+)tf_nightly(|_gpu)-(\d\.\d\.\d.dev[\d]{0,8})-(.+)\.whl" BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" @@ -65,26 +61,35 @@ def copy_binary(directory, origin_tag, new_tag, version, gpu=False): origin_binary = BINARY_STRING_TEMPLATE % (package, version, origin_tag) new_binary = BINARY_STRING_TEMPLATE % (package, version, new_tag) zip_ref = zipfile.ZipFile(directory + origin_binary, "r") - zip_ref.extractall() - zip_ref.close() - old_py_ver = re.search(r"(cp\d\d-cp\d\d)", origin_tag).group(1) - new_py_ver = re.search(r"(cp\d\d-cp\d\d)", new_tag).group(1) - subprocess.check_call( - "%s -i s/%s/%s/g %s-%s.dist-info/WHEEL" % (SED_CMD, old_py_ver, - new_py_ver, package, version), - shell=True) - zout = zipfile.ZipFile(directory + new_binary, "w", zipfile.ZIP_DEFLATED) - zip_these_files = [ - "%s-%s.dist-info" % (package, version), - "%s-%s.data" % (package, version) - ] - for dirname in zip_these_files: - for root, _, files in os.walk(dirname): - for filename in files: - zout.write(os.path.join(root, filename)) - zout.close() - for dirname in zip_these_files: - shutil.rmtree(dirname) + + try: + tmpdir = tempfile.mkdtemp() + os.chdir(tmpdir) + + zip_ref.extractall() + zip_ref.close() + old_py_ver = re.search(r"(cp\d\d-cp\d\d)", origin_tag).group(1) + new_py_ver = re.search(r"(cp\d\d-cp\d\d)", new_tag).group(1) + + wheel_file = os.path.join( + tmpdir, "%s-%s.dist-info" % (package, version), "WHEEL") + with open(wheel_file, "r") as f: + content = f.read() + with open(wheel_file, "w") as f: + f.write(content.replace(old_py_ver, new_py_ver)) + + zout = zipfile.ZipFile(directory + new_binary, "w", zipfile.ZIP_DEFLATED) + zip_these_files = [ + "%s-%s.dist-info" % (package, version), + "%s-%s.data" % (package, version), + ] + for dirname in zip_these_files: + for root, _, files in os.walk(dirname): + for filename in files: + zout.write(os.path.join(root, filename)) + zout.close() + finally: + shutil.rmtree(tmpdir) def main(): -- GitLab From 4091e498ba8dedc8f4ad5952dfe1262e735e7f42 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 10:29:11 -0700 Subject: [PATCH 111/960] Fix naming BatchNorm_Fold//batch_norm_correction -> BatchNorm_Fold/batch_norm_correction. PiperOrigin-RevId: 189358090 --- .../contrib/quantize/python/fold_batch_norms.py | 3 ++- .../contrib/quantize/python/fold_batch_norms_test.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index b278265639..e8a0d41425 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -317,7 +317,8 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, """ g = ops.get_default_graph() - with g.name_scope(context + '/batch_norm_correction'): + prefix = '' if not context else context + '/' + with g.name_scope(prefix + 'batch_norm_correction'): recip_sigma_mv = math_ops.rsqrt( match.moving_variance_tensor + match.batch_epsilon) recip_sigma = math_ops.rsqrt(match.variance_tensor + match.batch_epsilon) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py index c90a18ab03..af31467476 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py @@ -128,6 +128,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) + for op in g.get_operations(): + self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) + def testFoldConv2d(self): self._RunTestOverParameters(self._TestFoldConv2d) @@ -196,6 +199,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) + for op in g.get_operations(): + self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) + def testFoldConv2dUnknownShape(self): self._RunTestOverParameters(self._TestFoldConv2dUnknownShape) @@ -260,6 +266,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) + for op in g.get_operations(): + self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) + def testFoldFullyConnectedLayer(self): self._RunTestOverParameters(self._TestFoldFullyConnectedLayer) @@ -337,6 +346,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) + for op in g.get_operations(): + self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) + def testFoldDepthwiseConv2d(self): self._RunTestOverParameters(self._TestFoldDepthwiseConv2d) -- GitLab From 7bc1f803ea53f06677bc1f96cba59d1c751fc09a Mon Sep 17 00:00:00 2001 From: Frank Perbet Date: Fri, 16 Mar 2018 10:48:32 -0700 Subject: [PATCH 112/960] Automated g4 rollback of changelist 189346024 PiperOrigin-RevId: 189361083 --- .../graph_editor/tests/transform_test.py | 32 ++------- tensorflow/contrib/graph_editor/transform.py | 66 ++++++------------- 2 files changed, 24 insertions(+), 74 deletions(-) diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py index 2a1b78042d..ca00394388 100644 --- a/tensorflow/contrib/graph_editor/tests/transform_test.py +++ b/tensorflow/contrib/graph_editor/tests/transform_test.py @@ -23,7 +23,6 @@ from tensorflow.contrib import graph_editor as ge from tensorflow.contrib.graph_editor.tests import match from tensorflow.python.client import session from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -85,9 +84,9 @@ class TransformTest(test.TestCase): def test_transform(self): transformer = ge.Transformer() - def my_transform_op_handler(info, op, new_inputs): + def my_transform_op_handler(info, op): add_noise = op.name.startswith("Add") - op_, op_outputs_ = ge.transform.copy_op_handler(info, op, new_inputs) + op_, op_outputs_ = ge.transform.copy_op_handler(info, op) if not add_noise: return op_, op_outputs_ # add some noise to op @@ -202,36 +201,15 @@ class TransformTest(test.TestCase): get_operation_by_name("res/grad/mul1_grad/Mul_1")) # Make sure _original_ops are as expected. - self.assertEqual(original_mul1_grad._original_op.name, u"mul1") - self.assertEqual(result_mul1_grad._original_op.name, u"res/mul1") - self.assertNotEqual(res.name, g.name) + self.assertEquals(original_mul1_grad._original_op.name, u"mul1") + self.assertEquals(result_mul1_grad._original_op.name, u"res/mul1") + self.assertNotEquals(res.name, g.name) with session.Session() as sess: sess.run(variables.global_variables_initializer()) g_val, res_val = sess.run([g, res]) self.assertNear(g_val, 0.0, ERROR_TOLERANCE) self.assertNear(res_val, 0.0, ERROR_TOLERANCE) - def test_graph_while_loop(self): - graph = ops.Graph() - with graph.as_default(): - max_index = array_ops.placeholder(dtype=dtypes.int32, shape=tuple()) - index_start = constant_op.constant(1) - sum_start = constant_op.constant(0) - _, result = control_flow_ops.while_loop( - cond=lambda i, unused_s: i <= max_index, - body=lambda i, s: (i + 1, s + i), - loop_vars=[index_start, sum_start]) - copied_graph = ops.Graph() - _, copy_info = ge.copy( - graph, dst_graph=copied_graph, dst_scope="imported") - copied_result = copy_info.transformed(result) - copied_max_index = copy_info.transformed(max_index) - with copied_graph.as_default(): - with session.Session() as sess: - n = 10 - sum_val = sess.run(copied_result, feed_dict={copied_max_index: n}) - self.assertEqual(sum_val, 55) - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index 03c9afe813..14ac529665 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -30,7 +30,6 @@ from tensorflow.contrib.graph_editor import select from tensorflow.contrib.graph_editor import subgraph from tensorflow.contrib.graph_editor import util from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -130,26 +129,20 @@ def transform_op_if_inside_handler(info, op, keep_if_possible=True): return None -def copy_op_handler(info, op, new_inputs, copy_shape=True): +def copy_op_handler(info, op, copy_shape=True): """Copy a `tf.Operation`. Args: info: Transform._TmpInfo instance. op: the `tf.Operation` to be copied. - new_inputs: The new inputs for this op. copy_shape: also copy the shape of the tensor Returns: A `(op, op_outputs)` tuple containing the transformed op and its outputs. """ - # The `new_inputs` was added to this function. For compatibility reason, - # let's raise an error if `new_inputs` is a boolean. - if isinstance(new_inputs, bool): - raise TypeError("the `new_inputs` argument must be an iterable.") - # pylint: disable=protected-access # Clone the node def: - node_def_ = deepcopy(op.node_def) + node_def_ = deepcopy(op._node_def) # Transform name: name_ = info.new_name(op.name) @@ -162,10 +155,10 @@ def copy_op_handler(info, op, new_inputs, copy_shape=True): # Make a copy of the op_def too. # Its unique to every _type_ of Operation. - op_def_ = deepcopy(op.op_def) + op_def_ = deepcopy(op._op_def) # Initialize a new Operation instance - op_ = tf_ops.Operation(node_def_, info.graph_, new_inputs, output_types_, + op_ = tf_ops.Operation(node_def_, info.graph_, [], output_types_, [], input_types_, None, op_def_) # copy the shape over @@ -177,7 +170,6 @@ def copy_op_handler(info, op, new_inputs, copy_shape=True): # attribute to exist, we will create a dummy original_op first and then # later finalise it with the actual original_op when all the ops have # been copied. - # TODO(fkp): Stop worrying about _original_op and remove this code? if op._original_op: op_._original_op = op._original_op @@ -336,14 +328,6 @@ class _TmpInfo(object): for key in self.graph.get_all_collection_keys()) self.cyclic_ops = [] self.transform_original_op_handler = transform_op_if_inside_handler - # The graph is transformed op by op, in the same order the original ops - # where created. However, this is sometimes not possible due to cycles - # (e.g. while loops). So when the transformer creates a new op whose - # inputs do not exist yet, temporary placeholders are created and stored - # in this `tmp_cyclic_ts` container. During a second pass, - # those temporary tensors are replaced by the proper transformed tensors - # (see the function `_finalize_cycle`). - self.tmp_cyclic_ts = [] def new_name(self, name): """Compute a destination name from a source name. @@ -444,10 +428,10 @@ class Transformer(object): # Create temporary info used during this transform call info = _TmpInfo(sgv, dst_graph, dst_scope, src_scope) + info.transform_original_op_handler = self.transform_original_op_handler self._copy_ops(info) - self._finalize_cycle(info) - self._connect_control_inputs(info) + self._connect_ops(info) # Compute information about the transformation res_info = TransformerInfo(info) @@ -456,12 +440,10 @@ class Transformer(object): def _copy_ops(self, info): """Copy ops without connecting them.""" - sorted_ops = sorted(info.sgv.ops, key=lambda op: op._id) # pylint: disable=protected-access - for op in sorted_ops: + for op in info.sgv.ops: logging.debug("Copying op: %s", op.name) - new_inputs = [self._transformed_t(info, t) for t in op.inputs] # TODO(fkp): return a subgraph? - op_, op_outputs_ = self.transform_op_handler(info, op, new_inputs) + op_, op_outputs_ = self.transform_op_handler(info, op) if op is op_: raise ValueError("In-place transformation not allowed.") @@ -474,31 +456,27 @@ class Transformer(object): info.transformed_ts[op_output] = op_output_ self.assign_collections_handler(info, op_output, op_output_) - def _finalize_cycle(self, info): - for t, tmp_t_ in info.tmp_cyclic_ts: - if t not in info.transformed_ts: - raise ValueError("The tensor {} should be transformed by now.".format( - t.name)) - op_ = tmp_t_.consumers()[0] - t_ = info.transformed_ts[t] - op_._update_input(list(op_.inputs).index(tmp_t_), t_) # pylint: disable=protected-access - - def _connect_control_inputs(self, info): + def _connect_ops(self, info): """Connect the previously copied ops.""" for op in info.sgv.ops: - logging.debug("Connecting control inputs of op: %s", op.name) + logging.debug("Finalizing op: %s", op.name) op_ = info.transformed_ops[op] - # Finalize original op. - # TODO(fkp): Stop worrying about _original_op and remove this code? # pylint: disable=protected-access + if op_.inputs: + raise ValueError("The newly transformed op should not have " + "any inputs yet: {}".format(op_.name)) + inputs_ = [self._transformed_t(info, t) for t in op.inputs] + for t in inputs_: + op_._add_input(t) + + # Finalize original op. if op._original_op: - original_op = self.transform_original_op_handler(info, op._original_op) + original_op = info.transform_original_op_handler(info, op._original_op) if original_op is None: logging.debug("Could not find original op for: %s", op_.name) else: op_._original_op = original_op - # pylint: enable=protected-access # Finalize control inputs: control_inputs_ = [self.transform_control_input_handler(info, ci) @@ -550,12 +528,6 @@ class Transformer(object): def _transformed_t(self, info, t): """Return tre transformed tensor of `t`.""" if t not in info.transformed_ts: - if t.op in info.ops: - with info.graph_.as_default(): - tmp_t_ = array_ops.placeholder( - shape=t.shape, dtype=t.dtype, name="ge_tmp") - info.tmp_cyclic_ts.append((t, tmp_t_)) - return tmp_t_ # If op is not in the subgraph. if t in info.sgv_inputs_set: # t is an input of the subgraph. -- GitLab From 53cbb5a50e94a772b595dcadd63af3b79bb884c7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 11:29:00 -0700 Subject: [PATCH 113/960] Fixing constant output arrays by inserting synthetic reshapes. PiperOrigin-RevId: 189368237 --- tensorflow/contrib/lite/toco/toco_tooling.cc | 4 + tensorflow/contrib/lite/toco/tooling_util.cc | 111 +++++++++++++++++++ tensorflow/contrib/lite/toco/tooling_util.h | 12 ++ 3 files changed, 127 insertions(+) diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 024335b5e4..ca66110ba3 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -289,6 +289,10 @@ void Transform(const TocoFlags& toco_flags, Model* model) { EncodeConstantArraysMinMaxByWrappingThemInFakeQuantNodes(model); } + // Fix any issues with IO edges. This must happen after any transform that + // may modify the structure of the edges. + FixEdgeArrays(model); + LogDump(kLogLevelModelChanged, "AFTER TRANSFORMATIONS", *model); if (output_format != GRAPHVIZ_DOT && output_format != TFLITE) { diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index e70291ad0e..2362206a14 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1047,6 +1047,117 @@ void CheckModelCounts(const Model& model) { } } +void FixEdgeArrays(Model* model) { + for (const string& output_array_name : model->flags.output_arrays()) { + if (!GetOpWithOutput(*model, output_array_name)) { + // Output has no operator producing it. Change that by inserting a copy. + LOG(WARNING) << "Fixing constant output array " << output_array_name + << " by inserting a copy. This is not optimal."; + string intermediate_array_name = + AvailableArrayName(*model, output_array_name + "_copy"); + CloneArray(model, output_array_name, intermediate_array_name); + InsertCopyOperator(model, intermediate_array_name, output_array_name); + } + } +} + +void InsertCopyOperator(Model* model, const string& source_array_name, + const string& target_array_name) { + // Drop constant data from the target array as the copy will be done at + // runtime. + Array& target_array = model->GetOrCreateArray(target_array_name); + target_array.buffer.reset(); + + // Reshape to the same size. This should be a no-op. + const Array& source_array = model->GetArray(source_array_name); + std::vector shape = source_array.shape().dims(); + + // Insert copy operator. + auto* copy_op = new TensorFlowReshapeOperator; + copy_op->inputs = { + source_array_name, + CreateInt32Array(model, target_array_name + "_copy_shape", shape)}; + copy_op->outputs = {target_array_name}; + model->operators.emplace_back(copy_op); +} + +namespace { +template +void CopyArrayBuffer(const Array& source_array, Array* target_array) { + if (source_array.buffer) { + const auto& source_buffer = source_array.GetBuffer(); + auto& target_buffer = target_array->GetMutableBuffer(); + target_buffer.data = source_buffer.data; + } +} +} // namespace + +void CloneArray(Model* model, const string& source_array_name, + const string& target_array_name) { + CHECK(!model->HasArray(target_array_name)); + const Array& source_array = model->GetArray(source_array_name); + Array& target_array = model->GetOrCreateArray(target_array_name); + + switch (source_array.data_type) { + case ArrayDataType::kBool: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kFloat: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kInt8: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kUint8: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kInt16: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kUint16: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kInt32: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kUint32: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kInt64: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kUint64: + CopyArrayBuffer(source_array, &target_array); + break; + case ArrayDataType::kString: + CopyArrayBuffer(source_array, &target_array); + break; + default: + LOG(FATAL) << "Unsupported data type: " + << ArrayDataTypeName(source_array.data_type); + return; + } + + if (source_array.minmax) { + const auto& smm = source_array.GetMinMax(); + auto& tmm = target_array.GetOrCreateMinMax(); + tmm.min = smm.min; + tmm.max = smm.max; + } + + if (source_array.quantization_params) { + const auto& sqp = source_array.GetQuantizationParams(); + auto& tqp = target_array.GetOrCreateQuantizationParams(); + tqp.zero_point = sqp.zero_point; + tqp.scale = sqp.scale; + } + + target_array.data_type = source_array.data_type; + target_array.final_data_type = source_array.final_data_type; + + target_array.copy_shape(source_array.shape()); +} + void MakeArrayDims(int num_dims, int batch, int height, int width, int depth, std::vector* out_dims) { CHECK(out_dims->empty()); diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 05360e3b0a..d3b7224fe3 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -144,6 +144,18 @@ void FixOperatorOrdering(Model* model); void FixNoMissingArray(Model* model); void FixNoOrphanedArray(Model* model); +// Fixes input/output arrays that may have issues during export or inference. +void FixEdgeArrays(Model* model); + +// Inserts a no-op reshape operator between the source array and the target +// array. This effectively just copies the data. +void InsertCopyOperator(Model* model, const string& source_array_name, + const string& target_array_name); + +// Clones an array with all data and parameters. +void CloneArray(Model* model, const string& source_array_name, + const string& target_array_name); + void ResolveModelFlags(const ModelFlags& model_flags, Model* model); template -- GitLab From fff901507e932188932fe21ae56c55e4aba5ae52 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 11:45:42 -0700 Subject: [PATCH 114/960] Set number of threads at Java interpreter constructor so that Conv Kernels can be selected properly. Remove setNumThreads in the Java API as its behavior is ambiguous. PiperOrigin-RevId: 189370770 --- .../java/org/tensorflow/lite/Interpreter.java | 11 ++++++++ .../lite/NativeInterpreterWrapper.java | 25 ++++++++++++------- .../native/nativeinterpreterwrapper_jni.cc | 18 +++---------- .../native/nativeinterpreterwrapper_jni.h | 16 +++--------- .../java/org/tensorflow/lite/TestHelper.java | 14 ----------- tensorflow/contrib/lite/model.cc | 8 +++++- tensorflow/contrib/lite/model.h | 2 ++ 7 files changed, 43 insertions(+), 51 deletions(-) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index cc17b491f2..14f461f5f9 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -77,6 +77,17 @@ public final class Interpreter implements AutoCloseable { wrapper = new NativeInterpreterWrapper(mappedByteBuffer); } + /** + * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file and + * specifies the number of threads used for inference. + * + *

The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code + * Interpreter}. + */ + public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer, int numThreads) { + wrapper = new NativeInterpreterWrapper(mappedByteBuffer, numThreads); + } + /** * Runs model inference if the model takes only one input, and provides only one output. * diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index 518e8b3a96..dbf8f8f7cc 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -34,7 +34,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { NativeInterpreterWrapper(String modelPath) { errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModel(modelPath, errorHandle); - interpreterHandle = createInterpreter(modelHandle, errorHandle); + interpreterHandle = createInterpreter(modelHandle, errorHandle, /* numThreads= */ -1); isMemoryAllocated = true; } @@ -47,7 +47,20 @@ final class NativeInterpreterWrapper implements AutoCloseable { modelByteBuffer = mappedByteBuffer; errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle); - interpreterHandle = createInterpreter(modelHandle, errorHandle); + interpreterHandle = createInterpreter(modelHandle, errorHandle, /* numThreads= */ -1); + isMemoryAllocated = true; + } + + /** + * Initializes a {@code NativeInterpreterWrapper} with a {@code MappedByteBuffer} and specifies + * the number of inference threads. The MappedByteBuffer should not be modified after the + * construction of a {@code NativeInterpreterWrapper}. + */ + NativeInterpreterWrapper(MappedByteBuffer mappedByteBuffer, int numThreads) { + modelByteBuffer = mappedByteBuffer; + errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); + modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle); + interpreterHandle = createInterpreter(modelHandle, errorHandle, numThreads); isMemoryAllocated = true; } @@ -140,10 +153,6 @@ final class NativeInterpreterWrapper implements AutoCloseable { useNNAPI(interpreterHandle, useNNAPI); } - void setNumThreads(int numRecommendedThreads) { - numThreads(interpreterHandle, numRecommendedThreads); - } - /** Gets index of an input given its name. */ int getInputIndex(String name) { if (inputsIndexes == null) { @@ -312,15 +321,13 @@ final class NativeInterpreterWrapper implements AutoCloseable { private static native void useNNAPI(long interpreterHandle, boolean state); - private static native void numThreads(long interpreterHandle, int numRecommendedThreads); - private static native long createErrorReporter(int size); private static native long createModel(String modelPathOrBuffer, long errorHandle); private static native long createModelWithBuffer(MappedByteBuffer modelBuffer, long errorHandle); - private static native long createInterpreter(long modelHandle, long errorHandle); + private static native long createInterpreter(long modelHandle, long errorHandle, int numThreads); private static native void delete(long errorHandle, long modelHandle, long interpreterHandle); diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index cc448b03c3..844226203b 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h" - namespace { const int kByteBufferValue = 999; @@ -316,16 +315,6 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, interpreter->UseNNAPI(static_cast(state)); } -JNIEXPORT void JNICALL -Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, - jclass clazz, - jlong handle, - jint num_threads) { - tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle); - if (interpreter == nullptr) return; - interpreter->SetNumThreads(static_cast(num_threads)); -} - JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createErrorReporter( JNIEnv* env, jclass clazz, jint size) { @@ -401,7 +390,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer( JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( - JNIEnv* env, jclass clazz, jlong model_handle, jlong error_handle) { + JNIEnv* env, jclass clazz, jlong model_handle, jlong error_handle, + jint num_threads) { tflite::FlatBufferModel* model = convertLongToModel(env, model_handle); if (model == nullptr) return 0; BufferErrorReporter* error_reporter = @@ -409,8 +399,8 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( if (error_reporter == nullptr) return 0; auto resolver = ::tflite::CreateOpResolver(); std::unique_ptr interpreter; - TfLiteStatus status = - tflite::InterpreterBuilder(*model, *(resolver.get()))(&interpreter); + TfLiteStatus status = tflite::InterpreterBuilder(*model, *(resolver.get()))( + &interpreter, static_cast(num_threads)); if (status != kTfLiteOk) { throwException(env, kIllegalArgumentException, "Cannot create interpreter: %s", diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index fb76125471..0e28a77fee 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -69,17 +69,6 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_useNNAPI(JNIEnv* env, jlong handle, jboolean state); -/* - * Class: org_tensorflow_lite_NativeInterpreterWrapper - * Method: - * Signature: (JI) - */ -JNIEXPORT void JNICALL -Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env, - jclass clazz, - jlong handle, - jint num_threads); - /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: @@ -110,11 +99,12 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createModelWithBuffer( /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JJ)J + * Signature: (JJI)J */ JNIEXPORT jlong JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( - JNIEnv* env, jclass clazz, jlong model_handle, jlong error_handle); + JNIEnv* env, jclass clazz, jlong model_handle, jlong error_handle, + jint num_threads); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java index 3722e51b3b..3aef0c3bb6 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java @@ -33,20 +33,6 @@ public class TestHelper { } } - /** - * Sets the number of threads for an {@code Interpreter}. - * - * @param interpreter an instance of {@code Interpreter}. If it is not initialized, an {@code - * IllegalArgumentException} will be thrown. - * @param numRecommendedThreads an integer value indicating the number of recommended threads. - */ - public static void setNumThreads(Interpreter interpreter, int numRecommendedThreads) { - if (interpreter != null && interpreter.wrapper != null) { - interpreter.wrapper.setNumThreads(numRecommendedThreads); - } else { - throw new IllegalArgumentException("Interpreter has not initialized; Failed to setUseNNAPI."); - } - } /** * Gets the last inference duration in nanoseconds. It returns null if there is no previous * inference run or the last inference run failed. diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index f28d56af67..f7daa6fc9d 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -759,6 +759,11 @@ TfLiteStatus InterpreterBuilder::ParseTensors( TfLiteStatus InterpreterBuilder::operator()( std::unique_ptr* interpreter) { + return operator()(interpreter, /*num_threads=*/-1); +} + +TfLiteStatus InterpreterBuilder::operator()( + std::unique_ptr* interpreter, int num_threads) { if (!interpreter) { error_reporter_->Report( "Null output pointer passed to InterpreterBuilder."); @@ -813,7 +818,8 @@ TfLiteStatus InterpreterBuilder::operator()( if ((**interpreter).AddTensors(tensors->Length()) != kTfLiteOk) { return cleanup_and_error(); } - + // Set num threads + (**interpreter).SetNumThreads(num_threads); // Parse inputs/outputs (**interpreter).SetInputs(FlatBufferIntArrayToVector(subgraph->inputs())); (**interpreter).SetOutputs(FlatBufferIntArrayToVector(subgraph->outputs())); diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 51a622a28d..0c777760cb 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -154,6 +154,8 @@ class InterpreterBuilder { InterpreterBuilder(const InterpreterBuilder&) = delete; InterpreterBuilder& operator=(const InterpreterBuilder&) = delete; TfLiteStatus operator()(std::unique_ptr* interpreter); + TfLiteStatus operator()(std::unique_ptr* interpreter, + int num_threads); private: TfLiteStatus BuildLocalIndexToRegistrationMapping(); -- GitLab From 0b247ec25de24813a6185a0dc4fbf17edd1c177a Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 16 Mar 2018 11:47:26 -0700 Subject: [PATCH 115/960] Allow variable lists to change when saving repeatedly using tfe.Checkpoint For example allows saving a checkpoint before slot variables have been created When graph building, restore() is still bound to a frozen set of variables. PiperOrigin-RevId: 189371256 --- .../eager/python/checkpointable_utils.py | 28 ++++++----- .../eager/python/checkpointable_utils_test.py | 48 ++++++++++++++++++- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index 389d4a03c8..0a34f3b3f6 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -523,6 +523,18 @@ class _SessionWithFeedDictAdditions(session_lib.SessionInterface): fetches=fetches, feed_dict=feed_dict, **kwargs) +def _copy_saver_with_new_var_list(old_saver, new_var_list): + """Copy a `tf.train.Saver`'s state to a new Saver with different variables.""" + new_saver = saver_lib.Saver(var_list=new_var_list) + # TODO(allenl): Move to copying functionality to Saver? + # pylint: disable=protected-access + new_saver._last_checkpoints = old_saver._last_checkpoints + new_saver._checkpoints_to_be_deleted = old_saver._checkpoints_to_be_deleted + new_saver._next_checkpoint_time = old_saver._next_checkpoint_time + # pylint: enable=protected-access + return new_saver + + class CheckpointableSaver(object): """Saves and restores a `Checkpointable` object and its dependencies. @@ -623,19 +635,13 @@ class CheckpointableSaver(object): name=_OBJECT_GRAPH_PROTO_KEY) if self._last_save_object_graph != graph_proto: if self._last_save_object_graph is not None: - raise NotImplementedError( - "Using a single Saver to save a mutated object graph is not " - "currently supported when graph building. Use a different Saver " - "when the object graph changes (save ops will be duplicated when " - "graph building), or file a feature request if this limitation " - "bothers you.") - saver = saver_lib.Saver(var_list=named_variables) - self._last_save_saver = saver + self._last_save_saver = _copy_saver_with_new_var_list( + old_saver=self._last_save_saver, new_var_list=named_variables) + else: + self._last_save_saver = saver_lib.Saver(var_list=named_variables) self._last_save_object_graph = graph_proto - else: - saver = self._last_save_saver with ops.device("/cpu:0"): - save_path = saver.save( + save_path = self._last_save_saver.save( sess=_SessionWithFeedDictAdditions( session=session, feed_additions=feed_additions), save_path=file_prefix, diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 4e0a9923ff..690f3ee67a 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -545,13 +545,13 @@ class CheckpointingTests(test.TestCase): checkpoint = checkpointable_utils.Checkpoint( model=model, optimizer=optimizer) for _ in range(2): + checkpoint.save(checkpoint_prefix) with backprop.GradientTape() as tape: loss = (constant_op.constant(1.) - model(constant_op.constant(1.))) ** 2 grad = tape.gradient(loss, model.vars) optimizer.apply_gradients( [(g, v) for g, v in zip(grad, model.vars)]) - checkpoint.save(checkpoint_prefix) @test_util.run_in_graph_and_eager_modes() def testLateDependencyTracking(self): @@ -899,6 +899,52 @@ class CheckpointingTests(test.TestCase): expected_filenames, os.listdir(checkpoint_directory)) + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testCheckpointCleanupChangingVarList(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + self.evaluate(checkpointable_utils.gather_initializers(obj)) + checkpoint = checkpointable_utils.Checkpoint(obj=obj) + looped_variables = [] + for iteration in range(10): + new_variable = resource_variable_ops.ResourceVariable(iteration) + self.evaluate(new_variable.initializer) + setattr(checkpoint, "var_%d" % iteration, new_variable) + checkpoint.save(checkpoint_prefix) + looped_variables.append(new_variable) + expected_filenames = ["checkpoint"] + # We've copied the saver each time, but checkpoint management should still + # be consistent. + for checkpoint_number in range(6, 11): + expected_filenames.append("ckpt-%d.index" % (checkpoint_number,)) + expected_filenames.append( + "ckpt-%d.data-00000-of-00001" % (checkpoint_number,)) + six.assertCountEqual( + self, + expected_filenames, + os.listdir(checkpoint_directory)) + for v in looped_variables: + self.evaluate(v.assign(314)) + checkpoint.restore(checkpoint_prefix + "-6").run_restore_ops() + self.assertEqual(314, self.evaluate(checkpoint.var_9)) + self.assertEqual(314, self.evaluate(checkpoint.var_8)) + self.assertEqual(314, self.evaluate(checkpoint.var_6)) + self.assertEqual(5, self.evaluate(checkpoint.var_5)) + self.assertEqual(1, self.evaluate(checkpoint.var_1)) + self.assertEqual(0, self.evaluate(checkpoint.var_0)) + if context.executing_eagerly(): + checkpoint.restore(checkpoint_prefix + "-10").run_restore_ops() + self.assertEqual(9, self.evaluate(checkpoint.var_9)) + self.assertEqual(8, self.evaluate(checkpoint.var_8)) + self.assertEqual(1, self.evaluate(checkpoint.var_1)) + self.assertEqual(0, self.evaluate(checkpoint.var_0)) + else: + # Restoring into modified graphs is an error while graph building. + with self.assertRaises(NotImplementedError): + checkpoint.restore(checkpoint_prefix + "-10").run_restore_ops() + def testManyRestoresGraph(self): """Restores after the first should not modify the graph.""" with context.graph_mode(): -- GitLab From 13354ed1adb221af60f159cd2b8034dc6e5ef43a Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 16 Mar 2018 11:53:14 -0700 Subject: [PATCH 116/960] [TF:XLA] Bump open source llvm revision to r327616 PiperOrigin-RevId: 189372065 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 8d739158c5..db70e4515b 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/738ee045416377e8c2094f7f61508ac1c178ff37.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/738ee045416377e8c2094f7f61508ac1c178ff37.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/cfb3cd346a75b17856c4e2ba6365e15d9ab0c763.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/cfb3cd346a75b17856c4e2ba6365e15d9ab0c763.tar.gz", ], - sha256 = "4442ed6a05c13752338036b1b9f16b09264de24b6c0bf62325fb9ff75a09340f", - strip_prefix = "llvm-738ee045416377e8c2094f7f61508ac1c178ff37", + sha256 = "2cf79b1891926b7af6173c1031d040fc07b2682ff66039c5822e074566c48956", + strip_prefix = "llvm-cfb3cd346a75b17856c4e2ba6365e15d9ab0c763", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 6655570f12dba22fe752796471635e109e682056 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 14 Mar 2018 18:39:25 -0700 Subject: [PATCH 117/960] [tf.data] Fix Python shape inference for `tf.contrib.data.map_and_batch()`. Previously, it would incorrectly report that all batches have the same size, not accounting for the possibility of the last batch being partial. Fixes #17720. PiperOrigin-RevId: 189121488 --- .../python/kernel_tests/batch_dataset_op_test.py | 14 ++++++++++++++ tensorflow/contrib/data/python/ops/batching.py | 3 +-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 71dc1c1172..a2da953c7b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -387,6 +387,20 @@ class BatchDatasetTest(test.TestCase): def testBatchAndMapDatasetWithParallelBatching(self): return self._testBatchAndMapDatasetHelper(num_parallel_batches=10) + def testMapAndBatchYieldsPartialBatch(self): + iterator = (dataset_ops.Dataset.range(10) + .apply(batching.map_and_batch( + lambda x: array_ops.reshape(x * x, [1]), 4)) + .make_one_shot_iterator()) + self.assertEqual([None, 1], iterator.output_shapes.as_list()) + next_element = iterator.get_next() + with self.test_session() as sess: + self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) + self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) + self.assertAllEqual([[64], [81]], sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testMapAndBatchSparse(self): def _sparse(i): diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 6eb512dec6..6463d75750 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -374,8 +374,7 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): @property def output_shapes(self): return nest.pack_sequence_as(self._output_shapes, [ - tensor_shape.vector(tensor_util.constant_value( - self._batch_size)).concatenate(s) + tensor_shape.vector(None).concatenate(s) for s in nest.flatten(self._output_shapes) ]) -- GitLab From 7432b190a6485dffc0d548c5be0a69ffd36a7828 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 16 Mar 2018 12:11:18 -0700 Subject: [PATCH 118/960] [tf.data] Fix typo in `Dataset.prefetch()` docstring. PiperOrigin-RevId: 189374898 --- tensorflow/python/data/ops/dataset_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 390ce852b1..a0c5a43a45 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -563,7 +563,7 @@ class Dataset(object): Args: buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the - maximum number elements that will be buffered when prefetching. + maximum number of elements that will be buffered when prefetching. Returns: Dataset: A `Dataset`. -- GitLab From 7858441854f2101fff7c450e3a72348c3d739e9d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 12:12:52 -0700 Subject: [PATCH 119/960] Remove empty buckets in latency_stats as it makes the report unreadable. This is also consistent with how SummaryHistoOp in summary_op.cc works. PiperOrigin-RevId: 189375113 --- tensorflow/core/kernels/data/stats_aggregator_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/stats_aggregator_ops.cc b/tensorflow/core/kernels/data/stats_aggregator_ops.cc index 5a2dd9c43d..17103627e0 100644 --- a/tensorflow/core/kernels/data/stats_aggregator_ops.cc +++ b/tensorflow/core/kernels/data/stats_aggregator_ops.cc @@ -47,7 +47,7 @@ class StatsAggregatorImpl : public StatsAggregator { Summary::Value* value = out_summary->add_value(); value->set_tag(name); histogram.EncodeToProto(value->mutable_histo(), - true /* preserve_zero_buckets */); + false /* doesn't preserve zero buckets */); } } -- GitLab From 504d103a405654f029e8902d97d4dd8f3aa07513 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Fri, 16 Mar 2018 12:34:34 -0700 Subject: [PATCH 120/960] [XLA:python] Plumb hlo_profile flag. PiperOrigin-RevId: 189377860 --- .../compiler/xla/client/executable_build_options.cc | 7 +++++++ .../compiler/xla/client/executable_build_options.h | 6 ++++++ .../compiler/xla/python/local_computation_builder.i | 13 +++++++++++++ tensorflow/compiler/xla/python/xla_client.py | 1 + tensorflow/compiler/xla/service/local_service.cc | 2 ++ 5 files changed, 29 insertions(+) diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc index 804e34f5e7..d84f2018e1 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.cc +++ b/tensorflow/compiler/xla/client/executable_build_options.cc @@ -76,4 +76,11 @@ ExecutableBuildOptions::generate_hlo_graph() const { return generate_hlo_graph_; } +ExecutableBuildOptions& ExecutableBuildOptions::set_hlo_profile(bool enabled) { + hlo_profile_ = enabled; + return *this; +} + +bool ExecutableBuildOptions::hlo_profile() const { return hlo_profile_; } + } // namespace xla diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h index 3a52dbac9a..3e18e5de64 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.h +++ b/tensorflow/compiler/xla/client/executable_build_options.h @@ -57,11 +57,17 @@ class ExecutableBuildOptions { ExecutableBuildOptions& set_generate_hlo_graph(string regex); const tensorflow::gtl::optional& generate_hlo_graph() const; + // If set, specifies that we should record an HLO profile during execution and + // log it after execution (as in DebugOptions). + ExecutableBuildOptions& set_hlo_profile(bool enabled); + bool hlo_profile() const; + // Returns a string representation of the build options, suitable for // debugging. string ToString() const; private: + bool hlo_profile_ = false; int device_ordinal_ = -1; Shape result_layout_; bool result_layout_set_ = false; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index b2681d5e8b..ca91cf0d50 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -833,6 +833,19 @@ tensorflow::ImportNumpy(); } Py_DECREF(o); + o = PyObject_GetAttrString($input, "hlo_profile"); + if (o == NULL) { + return NULL; + } + if (o != Py_None) { + if (!PyBool_Check(o)) { + PyErr_SetString(PyExc_TypeError, "ExecutableBuildOptions.hlo_profile must be a bool or None."); + return NULL; + } + build_options.set_hlo_profile(o == Py_True); + } + Py_DECREF(o); + o = PyObject_GetAttrString($input, "result_shape"); if (o == nullptr) { return nullptr; diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 90cda42f32..d747a0b65c 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -320,6 +320,7 @@ class CompileOptions(object): def __init__(self): self.generate_hlo_graph = None + self.hlo_profile = False def transfer_to_infeed(value, replica_number=None): diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 07f989d4fa..74aa6eaa17 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -119,6 +119,8 @@ StatusOr> LocalService::CompileExecutable( } ExecutionOptions execution_options = CreateDefaultExecutionOptions(); + execution_options.mutable_debug_options()->set_xla_hlo_profile( + build_options.hlo_profile()); if (build_options.generate_hlo_graph().has_value()) { execution_options.mutable_debug_options()->set_xla_generate_hlo_graph( build_options.generate_hlo_graph().value()); -- GitLab From 784d7736bdfc9c9d06098db56d4dc883bc12f0ea Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 12:55:18 -0700 Subject: [PATCH 121/960] BREAKING_CHANGE: Remove SigmoidCentered bijector. - SoftmaxCentered solely works on vector events, and supports broadcasting. - Sigmoid exists for event_ndims=0 cases. PiperOrigin-RevId: 189380445 --- tensorflow/contrib/distributions/BUILD | 19 --- .../kernel_tests/bijectors/chain_test.py | 8 +- .../kernel_tests/bijectors/invert_test.py | 7 +- .../bijectors/sigmoid_centered_test.py | 57 --------- .../bijectors/softmax_centered_test.py | 56 +++------ .../transformed_distribution_test.py | 12 +- .../python/ops/bijectors/__init__.py | 2 - .../python/ops/bijectors/sigmoid_centered.py | 39 ------ .../python/ops/bijectors/softmax_centered.py | 114 +++++------------- .../python/ops/vector_diffeomixture.py | 2 +- .../python/contrib.distributions.bijectors.md | 1 - 11 files changed, 59 insertions(+), 258 deletions(-) delete mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_centered_test.py delete mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 6bd3f5f09b..e9c827a618 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -1105,25 +1105,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "sigmoid_centered_test", - size = "small", - srcs = ["python/kernel_tests/bijectors/sigmoid_centered_test.py"], - additional_deps = [ - ":bijectors_py", - ":distributions_py", - "//third_party/py/numpy", - "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) - # Tests for SinhArcSinh bijector. The file name has the extra "_bijector" to # avoid BUILD rule name conflicts with the distribution by the same name. cuda_py_test( diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py index 20e7543084..a748acd667 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/chain_test.py @@ -66,12 +66,10 @@ class ChainBijectorTest(test.TestCase): def testShapeGetters(self): with self.test_session(): bijector = Chain([ - SoftmaxCentered( - event_ndims=1, validate_args=True), - SoftmaxCentered( - event_ndims=0, validate_args=True) + SoftmaxCentered(validate_args=True), + SoftmaxCentered(validate_args=True), ]) - x = tensor_shape.TensorShape([]) + x = tensor_shape.TensorShape([1]) y = tensor_shape.TensorShape([2 + 1]) self.assertAllEqual(y, bijector.forward_event_shape(x)) self.assertAllEqual( diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py index 28e3e31354..58ba9cedb1 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py @@ -37,8 +37,7 @@ class InvertBijectorTest(test.TestCase): bijectors.Exp(event_ndims=1), bijectors.Affine(shift=[0., 1.], scale_diag=[2., 3.]), bijectors.Softplus(event_ndims=1), - bijectors.SoftmaxCentered(event_ndims=1), - bijectors.SigmoidCentered(), + bijectors.SoftmaxCentered(), ]: rev = bijectors.Invert(fwd) self.assertEqual("_".join(["invert", fwd.name]), rev.name) @@ -61,9 +60,9 @@ class InvertBijectorTest(test.TestCase): def testShapeGetters(self): with self.test_session(): - bijector = bijectors.Invert(bijectors.SigmoidCentered(validate_args=True)) + bijector = bijectors.Invert(bijectors.SoftmaxCentered(validate_args=True)) x = tensor_shape.TensorShape([2]) - y = tensor_shape.TensorShape([]) + y = tensor_shape.TensorShape([1]) self.assertAllEqual(y, bijector.forward_event_shape(x)) self.assertAllEqual( y.as_list(), diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_centered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_centered_test.py deleted file mode 100644 index 4ff3f334cc..0000000000 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sigmoid_centered_test.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Bijector.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_centered import SigmoidCentered -from tensorflow.python.platform import test - - -class SigmoidCenteredBijectorTest(test.TestCase): - """Tests correctness of the Y = g(X) = (1 + exp(-X))^-1 transformation.""" - - def testBijector(self): - with self.test_session(): - sigmoid = SigmoidCentered() - self.assertEqual("sigmoid_centered", sigmoid.name) - x = np.log([[2., 3, 4], - [4., 8, 12]]) - y = [[[2. / 3, 1. / 3], - [3. / 4, 1. / 4], - [4. / 5, 1. / 5]], - [[4. / 5, 1. / 5], - [8. / 9, 1. / 9], - [12. / 13, 1. / 13]]] - self.assertAllClose(y, sigmoid.forward(x).eval()) - self.assertAllClose(x, sigmoid.inverse(y).eval()) - self.assertAllClose( - -np.sum(np.log(y), axis=2), - sigmoid.inverse_log_det_jacobian(y).eval(), - atol=0., - rtol=1e-7) - self.assertAllClose( - -sigmoid.inverse_log_det_jacobian(y).eval(), - sigmoid.forward_log_det_jacobian(x).eval(), - atol=0., - rtol=1e-7) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py index 4a7679daad..cad4dd1ac8 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py @@ -34,34 +34,9 @@ rng = np.random.RandomState(42) class SoftmaxCenteredBijectorTest(test.TestCase): """Tests correctness of the Y = g(X) = exp(X) / sum(exp(X)) transformation.""" - def testBijectorScalar(self): - with self.test_session(): - softmax = SoftmaxCentered() # scalar by default - self.assertEqual("softmax_centered", softmax.name) - x = np.log([[2., 3, 4], - [4., 8, 12]]) - y = [[[2. / 3, 1. / 3], - [3. / 4, 1. / 4], - [4. / 5, 1. / 5]], - [[4. / 5, 1. / 5], - [8. / 9, 1. / 9], - [12. / 13, 1. / 13]]] - self.assertAllClose(y, softmax.forward(x).eval()) - self.assertAllClose(x, softmax.inverse(y).eval()) - self.assertAllClose( - -np.sum(np.log(y), axis=2), - softmax.inverse_log_det_jacobian(y).eval(), - atol=0., - rtol=1e-7) - self.assertAllClose( - -softmax.inverse_log_det_jacobian(y).eval(), - softmax.forward_log_det_jacobian(x).eval(), - atol=0., - rtol=1e-7) - def testBijectorVector(self): with self.test_session(): - softmax = SoftmaxCentered(event_ndims=1) + softmax = SoftmaxCentered() self.assertEqual("softmax_centered", softmax.name) x = np.log([[2., 3, 4], [4., 8, 12]]) y = [[0.2, 0.3, 0.4, 0.1], [0.16, 0.32, 0.48, 0.04]] @@ -80,7 +55,7 @@ class SoftmaxCenteredBijectorTest(test.TestCase): def testBijectorUnknownShape(self): with self.test_session(): - softmax = SoftmaxCentered(event_ndims=1) + softmax = SoftmaxCentered() self.assertEqual("softmax_centered", softmax.name) x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) real_x = np.log([[2., 3, 4], [4., 8, 12]]) @@ -106,24 +81,21 @@ class SoftmaxCenteredBijectorTest(test.TestCase): def testShapeGetters(self): with self.test_session(): - for x, y, b in ((tensor_shape.TensorShape([]), - tensor_shape.TensorShape([2]), - SoftmaxCentered( - event_ndims=0, validate_args=True)), - (tensor_shape.TensorShape([4]), - tensor_shape.TensorShape([5]), - SoftmaxCentered( - event_ndims=1, validate_args=True))): - self.assertAllEqual(y, b.forward_event_shape(x)) - self.assertAllEqual(y.as_list(), - b.forward_event_shape_tensor(x.as_list()).eval()) - self.assertAllEqual(x, b.inverse_event_shape(y)) - self.assertAllEqual(x.as_list(), - b.inverse_event_shape_tensor(y.as_list()).eval()) + x = tensor_shape.TensorShape([4]) + y = tensor_shape.TensorShape([5]) + bijector = SoftmaxCentered(validate_args=True) + self.assertAllEqual(y, bijector.forward_event_shape(x)) + self.assertAllEqual(y.as_list(), + bijector.forward_event_shape_tensor( + x.as_list()).eval()) + self.assertAllEqual(x, bijector.inverse_event_shape(y)) + self.assertAllEqual(x.as_list(), + bijector.inverse_event_shape_tensor( + y.as_list()).eval()) def testBijectiveAndFinite(self): with self.test_session(): - softmax = SoftmaxCentered(event_ndims=1) + softmax = SoftmaxCentered() x = np.linspace(-50, 50, num=10).reshape(5, 2).astype(np.float32) # Make y values on the simplex with a wide range. y_0 = np.ones(5).astype(np.float32) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py index af13553c32..f0ba1ec3eb 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py @@ -186,12 +186,14 @@ class TransformedDistributionTest(test.TestCase): standard_normal = ds.Normal(loc=0., scale=1.) multi_logit_normal = self._cls()( distribution=standard_normal, - bijector=softmax) - x = [[-np.log(3.), 0.], - [np.log(3), np.log(5)]] + bijector=softmax, + event_shape=[1]) + x = [[[-np.log(3.)], [0.]], + [[np.log(3)], [np.log(5)]]] y = softmax.forward(x).eval() - expected_log_pdf = (stats.norm(loc=0., scale=1.).logpdf(x) - - np.sum(np.log(y), axis=-1)) + expected_log_pdf = ( + np.squeeze(stats.norm(loc=0., scale=1.).logpdf(x)) - + np.sum(np.log(y), axis=-1)) self.assertAllClose(expected_log_pdf, multi_logit_normal.log_prob(y).eval()) self.assertAllClose( diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index 452f1caa30..bc6b02542e 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -35,7 +35,6 @@ @@RealNVP @@Reshape @@Sigmoid -@@SigmoidCentered @@SinhArcsinh @@SoftmaxCentered @@Softplus @@ -72,7 +71,6 @@ from tensorflow.contrib.distributions.python.ops.bijectors.power_transform impor from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import * from tensorflow.contrib.distributions.python.ops.bijectors.reshape import * from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid import * -from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_centered import * from tensorflow.contrib.distributions.python.ops.bijectors.sinh_arcsinh import * from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import * from tensorflow.contrib.distributions.python.ops.bijectors.softplus import * diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py deleted file mode 100644 index 223bc9d042..0000000000 --- a/tensorflow/contrib/distributions/python/ops/bijectors/sigmoid_centered.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SigmoidCentered bijector.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.distributions.python.ops.bijectors import softmax_centered - - -__all__ = [ - "SigmoidCentered", -] - - -class SigmoidCentered(softmax_centered.SoftmaxCentered): - """Bijector which computes Y = g(X) = exp([X 0]) / (1 + exp(-X)). - - Equivalent to: `bijector.SoftmaxCentered(event_ndims=0)`. - - See `bijector.SoftmaxCentered` for more details. - """ - - def __init__(self, validate_args=False, name="sigmoid_centered"): - super(SigmoidCentered, self).__init__( - event_ndims=0, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py index 24add40445..dc94fd0a38 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py @@ -19,10 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.distributions.python.ops import distribution_util -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -45,17 +42,14 @@ class SoftmaxCentered(bijector.Bijector): e.g., `softmax(x) = exp(x-c) / sum(exp(x-c))` where `c` is the implicit last coordinate. - Because we append a coordinate, this bijector only supports `event_ndim in [0, - 1]`, i.e., scalars and vectors. - Example Use: ```python - bijector.SoftmaxCentered(event_ndims=1).forward(tf.log([2, 3, 4])) + bijector.SoftmaxCentered().forward(tf.log([2, 3, 4])) # Result: [0.2, 0.3, 0.4, 0.1] # Extra result: 0.1 - bijector.SoftmaxCentered(event_ndims=1).inverse([0.2, 0.3, 0.4, 0.1]) + bijector.SoftmaxCentered().inverse([0.2, 0.3, 0.4, 0.1]) # Result: tf.log([2, 3, 4]) # Extra coordinate removed. ``` @@ -67,82 +61,47 @@ class SoftmaxCentered(bijector.Bijector): """ def __init__(self, - event_ndims=0, validate_args=False, name="softmax_centered"): self._graph_parents = [] self._name = name - with self._name_scope("init", values=[event_ndims]): - event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") - event_ndims = tensor_util.constant_value(event_ndims) - if event_ndims is None or event_ndims not in [0, 1]: - raise ValueError("`event_ndims` must be a TF constant which is 0 or 1") - self._static_event_ndims = event_ndims super(SoftmaxCentered, self).__init__( - event_ndims=event_ndims, + event_ndims=1, validate_args=validate_args, name=name) def _forward_event_shape(self, input_shape): - if input_shape.ndims is None: + if input_shape.ndims is None or input_shape[-1] is None: return input_shape - if input_shape.ndims != self._static_event_ndims: - raise ValueError("input_shape.dims = %d != %d" % - (input_shape.ndims, self._static_event_ndims)) - if input_shape.ndims == 0: - return tensor_shape.TensorShape([2]) - if input_shape.ndims == 1: - return tensor_shape.TensorShape(input_shape[0] + 1) - # Unreachable code: - raise ValueError("event_ndims = %d must be 0 or 1" % input_shape.ndims) + return tensor_shape.TensorShape([input_shape[-1] + 1]) def _forward_event_shape_tensor(self, input_shape): - ndims = array_ops.shape(input_shape) - if self.validate_args: - # It is not possible for a negative shape so we need only check <= 1. - is_zero_or_one = check_ops.assert_equal( - ndims, 0 if self._static_event_ndims == 0 else 1, - message="event_ndims must be 0 or 1") - ndims = control_flow_ops.with_dependencies([is_zero_or_one], ndims) - if self._static_event_ndims == 0: - return ops.convert_to_tensor( - [2], dtype=dtypes.int32, name="output_shape") - return input_shape + 1 + return (input_shape[-1] + 1)[..., array_ops.newaxis] def _inverse_event_shape(self, output_shape): - if output_shape.ndims is None: + if output_shape.ndims is None or output_shape[-1] is None: return output_shape - if output_shape.ndims != 1: - raise ValueError("output_shape.ndims = %d != 1" % output_shape.ndims) - if self._static_event_ndims == 0: - return tensor_shape.TensorShape([]) - return tensor_shape.TensorShape(output_shape[0] - 1) + if output_shape[-1] <= 1: + raise ValueError("output_shape[-1] = %d <= 1" % output_shape[-1]) + return tensor_shape.TensorShape([output_shape[-1] - 1]) def _inverse_event_shape_tensor(self, output_shape): - ndims = array_ops.shape(output_shape)[0] if self.validate_args: # It is not possible for a negative shape so we need only check <= 1. - is_one = check_ops.assert_equal( - ndims, 1, message="event_ndims must be 1") - ndims = control_flow_ops.with_dependencies([is_one], ndims) - if self._static_event_ndims == 0: - return ops.convert_to_tensor([], dtype=dtypes.int32, name="output_shape") - return array_ops.expand_dims(output_shape[0] - 1, dim=0) + is_greater_one = check_ops.assert_greater( + output_shape[-1], 1, message="Need last dimension greater than 1.") + output_shape = control_flow_ops.with_dependencies( + [is_greater_one], output_shape) + return (output_shape[-1] - 1)[..., array_ops.newaxis] def _forward(self, x): # Pad the last dim with a zeros vector. We need this because it lets us # infer the scale in the inverse function. - y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x - y = distribution_util.pad(y, axis=-1, back=True) + y = distribution_util.pad(x, axis=-1, back=True) # Set shape hints. if x.shape.ndims is not None: - shape = x.shape.as_list() - if self._static_event_ndims == 0: - shape += [2] - elif shape[-1] is not None: - shape[-1] += 1 - shape = tensor_shape.TensorShape(shape) + shape = x.shape[:-1].concatenate(x.shape[-1] + 1) y.shape.assert_is_compatible_with(shape) y.set_shape(shape) @@ -167,17 +126,9 @@ class SoftmaxCentered(bijector.Bijector): log_normalization = (-x[..., -1])[..., array_ops.newaxis] x = x[..., :-1] + log_normalization - if self._static_event_ndims == 0: - x = array_ops.squeeze(x, squeeze_dims=-1) - # Set shape hints. if y.shape.ndims is not None: - shape = y.shape.as_list() - if self._static_event_ndims == 0: - shape = shape[:-1] - elif shape[-1] is not None: - shape[-1] -= 1 - shape = tensor_shape.TensorShape(shape) + shape = y.shape[:-1].concatenate(y.shape[-1] - 1) x.shape.assert_is_compatible_with(shape) x.set_shape(shape) @@ -203,19 +154,16 @@ class SoftmaxCentered(bijector.Bijector): return -math_ops.reduce_sum(math_ops.log(y), axis=-1) def _forward_log_det_jacobian(self, x): - if self._static_event_ndims == 0: - return x - 2. * nn_ops.softplus(x) - else: - # This code is similar to nn_ops.log_softmax but different because we have - # an implicit zero column to handle. I.e., instead of: - # reduce_sum(logits - reduce_sum(exp(logits), dim)) - # we must do: - # log_normalization = 1 + reduce_sum(exp(logits)) - # -log_normalization + reduce_sum(logits - log_normalization) - log_normalization = nn_ops.softplus( - math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) - fldj = (-log_normalization + - math_ops.reduce_sum(x - log_normalization, - axis=-1, - keep_dims=True)) - return array_ops.squeeze(fldj, squeeze_dims=-1) + # This code is similar to nn_ops.log_softmax but different because we have + # an implicit zero column to handle. I.e., instead of: + # reduce_sum(logits - reduce_sum(exp(logits), dim)) + # we must do: + # log_normalization = 1 + reduce_sum(exp(logits)) + # -log_normalization + reduce_sum(logits - log_normalization) + log_normalization = nn_ops.softplus( + math_ops.reduce_logsumexp(x, axis=-1, keep_dims=True)) + fldj = (-log_normalization + + math_ops.reduce_sum(x - log_normalization, + axis=-1, + keep_dims=True)) + return array_ops.squeeze(fldj, squeeze_dims=-1) diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index 0c747f8e68..3208ecdf64 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -181,7 +181,7 @@ def quadrature_scheme_softmaxnormal_quantiles( edges = array_ops.reshape(edges, shape=array_ops.concat([ [-1], array_ops.ones([batch_ndims], dtype=dtypes.int32)], axis=0)) quantiles = dist.quantile(edges) - quantiles = SoftmaxCentered(event_ndims=1).forward(quantiles) + quantiles = SoftmaxCentered().forward(quantiles) # Cyclically permute left by one. perm = array_ops.concat([ math_ops.range(1, 1 + batch_ndims), [0]], axis=0) diff --git a/tensorflow/docs_src/api_guides/python/contrib.distributions.bijectors.md b/tensorflow/docs_src/api_guides/python/contrib.distributions.bijectors.md index 0ce187b329..e169897f31 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.distributions.bijectors.md +++ b/tensorflow/docs_src/api_guides/python/contrib.distributions.bijectors.md @@ -28,6 +28,5 @@ To apply a `Bijector`, use `distributions.TransformedDistribution`. * @{tf.contrib.distributions.bijectors.Inline} * @{tf.contrib.distributions.bijectors.Invert} * @{tf.contrib.distributions.bijectors.PowerTransform} -* @{tf.contrib.distributions.bijectors.SigmoidCentered} * @{tf.contrib.distributions.bijectors.SoftmaxCentered} * @{tf.contrib.distributions.bijectors.Softplus} -- GitLab From dcbac5e690598f7fe80dbcdc264f1e5a851c8499 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 16 Mar 2018 12:56:26 -0700 Subject: [PATCH 122/960] [XLA] BF16 conversion folding for CRS; remove no-op conversions in propagation. If CRS has tuple output, it needs special handling in conversion folding. BF16 propagation could result in BF16->BF16 conversions, which can be removed. PiperOrigin-RevId: 189380578 --- .../service/bfloat16_conversion_folding.cc | 87 +++++++++++++++---- .../bfloat16_conversion_folding_test.cc | 51 ++++++++++- .../xla/service/bfloat16_propagation.cc | 25 ++++++ .../xla/service/bfloat16_propagation.h | 5 ++ .../xla/service/bfloat16_propagation_test.cc | 40 +++++++++ 5 files changed, 189 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc index 432448e9bb..08d0152e3c 100644 --- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc @@ -34,6 +34,9 @@ class BFloat16ConversionFoldingVisitor : public DfsHloVisitorWithDefault { Status DefaultAction(HloInstruction* hlo) override; + // Special handling for cross-replica-sum which can have a tuple output. + Status HandleCrossReplicaSum(HloInstruction* crs) override; + static bool Run(HloComputation* computation, const BFloat16Support* bfloat16_support) { BFloat16ConversionFoldingVisitor visitor(computation, bfloat16_support); @@ -84,6 +87,25 @@ Status BFloat16ConversionFoldingVisitor::FoldOperandConversion( return Status::OK(); } +namespace { + +// Returns whether hlo has users and all users are conversions from F32 to BF16. +bool AllUsersAreF32ToBF16Converts(const HloInstruction* hlo) { + if (hlo->user_count() == 0 || hlo->shape().element_type() != F32) { + return false; + } + for (const auto user : hlo->users()) { + if (user->opcode() == HloOpcode::kConvert && + user->shape().element_type() == BF16) { + continue; + } + return false; + } + return true; +} + +} // namespace + Status BFloat16ConversionFoldingVisitor::TryFoldBF16Conversions( HloInstruction* hlo) { std::vector bf16_to_f32_operands; @@ -104,22 +126,9 @@ Status BFloat16ConversionFoldingVisitor::TryFoldBF16Conversions( } } - bool fold_output_conversion = hlo->user_count() > 0 && - hlo->shape().element_type() == F32 && - bfloat16_support_->SupportsBF16Output(*hlo) && - hlo != computation_->root_instruction(); - if (fold_output_conversion) { - for (auto user : hlo->users()) { - if (user->opcode() == HloOpcode::kConvert && - user->shape().element_type() == BF16) { - continue; - } - // We should not change the output type if any user is not a conversion - // from F32 to BF16. - fold_output_conversion = false; - break; - } - } + const bool fold_output_conversion = + AllUsersAreF32ToBF16Converts(hlo) && + bfloat16_support_->SupportsBF16Output(*hlo); if (!bfloat16_support_->SupportsMixedPrecisions(*hlo)) { if (has_other_f32_operands || @@ -171,6 +180,52 @@ Status BFloat16ConversionFoldingVisitor::DefaultAction(HloInstruction* hlo) { return TryFoldBF16Conversions(hlo); } +Status BFloat16ConversionFoldingVisitor::HandleCrossReplicaSum( + HloInstruction* crs) { + if (!ShapeUtil::IsTuple(crs->shape()) || + !bfloat16_support_->SupportsMixedPrecisions(*crs)) { + return DefaultAction(crs); + } + + // First use DefaultAction() to handle the operands. It can't handle + // tuple-shaped output. + TF_RETURN_IF_ERROR(DefaultAction(crs)); + + // Then do per-tuple-element handling on the output. + std::vector> per_tuple_element_gtes( + crs->operand_count()); + for (auto user : crs->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + return Status::OK(); + } + per_tuple_element_gtes[user->tuple_index()].push_back(user); + } + + for (int64 i = 0; i < crs->operand_count(); ++i) { + // Fold conversions only when all the get-tuple-elements' users are + // conversions from F32 to BF16. + auto all_gte_users_are_bf16_convert = [&per_tuple_element_gtes, i]() { + for (auto gte : per_tuple_element_gtes[i]) { + if (!AllUsersAreF32ToBF16Converts(gte)) { + return false; + } + } + return true; + }; + if (!all_gte_users_are_bf16_convert()) { + continue; + } + + ShapeUtil::GetMutableSubshape(crs->mutable_shape(), {i}) + ->set_element_type(BF16); + for (auto gte : per_tuple_element_gtes[i]) { + TF_RETURN_IF_ERROR(FoldOutputConversions(gte)); + } + } + + return Status::OK(); +} + StatusOr BFloat16ConversionFolding::Run(HloModule* module) { XLA_VLOG_LINES( 2, "BFloat16ConversionFolding::Run(), before:\n" + module->ToString()); diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc index cb37759439..28e71c2054 100644 --- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc @@ -37,7 +37,8 @@ class TestBFloat16Support : public BFloat16Support { if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kSubtract || hlo.opcode() == HloOpcode::kTuple || - hlo.opcode() == HloOpcode::kGetTupleElement) { + hlo.opcode() == HloOpcode::kGetTupleElement || + hlo.opcode() == HloOpcode::kCrossReplicaSum) { return true; } return false; @@ -47,7 +48,8 @@ class TestBFloat16Support : public BFloat16Support { if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kSubtract || hlo.opcode() == HloOpcode::kTuple || - hlo.opcode() == HloOpcode::kGetTupleElement) { + hlo.opcode() == HloOpcode::kGetTupleElement || + hlo.opcode() == HloOpcode::kCrossReplicaSum) { return true; } return false; @@ -55,7 +57,8 @@ class TestBFloat16Support : public BFloat16Support { bool SupportsMixedPrecisions(const HloInstruction& hlo) const override { if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kTuple || - hlo.opcode() == HloOpcode::kGetTupleElement) { + hlo.opcode() == HloOpcode::kGetTupleElement || + hlo.opcode() == HloOpcode::kCrossReplicaSum) { return true; } return false; @@ -206,4 +209,46 @@ TEST_F(BFloat16ConversionFoldingTest, DoNotFoldTuple) { EXPECT_EQ(tuple->operand(1), convert0); } +TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, bf16_shape, "a")); + HloInstruction* convert_a = + builder.AddInstruction(HloInstruction::CreateConvert(f32_shape, a)); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32_shape, "b")); + + HloInstruction* crs = + builder.AddInstruction(HloInstruction::CreateCrossReplicaSum( + ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b})); + HloInstruction* gte_a = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32_shape, crs, 0)); + HloInstruction* gte_b = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32_shape, crs, 1)); + HloInstruction* convert_gte_b = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, gte_b)); + HloInstruction* tuple = builder.AddInstruction( + HloInstruction::CreateTuple({gte_a, convert_gte_b})); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(FoldConversions(module.get())); + + EXPECT_EQ(computation->root_instruction(), tuple); + EXPECT_EQ(tuple->operand(0), gte_a); + EXPECT_EQ(tuple->operand(1), gte_b); + EXPECT_EQ(gte_a->shape().element_type(), F32); + EXPECT_EQ(gte_b->shape().element_type(), BF16); + EXPECT_EQ(crs->operand(0), a); + EXPECT_EQ(crs->operand(1), b); + EXPECT_EQ(a->shape().element_type(), BF16); + EXPECT_EQ(b->shape().element_type(), F32); + EXPECT_EQ(ShapeUtil::GetSubshape(crs->shape(), {0}).element_type(), F32); + EXPECT_EQ(ShapeUtil::GetSubshape(crs->shape(), {1}).element_type(), BF16); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 531f36e8c5..7195c31d9c 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -627,6 +627,27 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( return Status::OK(); } +Status BFloat16Propagation::RemoveNoopConversions(HloModule* module) { + for (auto computation : module->computations()) { + for (auto hlo : computation->MakeInstructionPostOrder()) { + if (hlo->opcode() != HloOpcode::kConvert) { + continue; + } + auto source = hlo->mutable_operand(0); + if (!ShapeUtil::Equal(source->shape(), hlo->shape())) { + continue; + } + const bool is_root = hlo == computation->root_instruction(); + TF_RETURN_IF_ERROR(hlo->ReplaceAllUsesWith(source)); + if (is_root) { + computation->set_root_instruction(source); + } + TF_RETURN_IF_ERROR(computation->RemoveInstructionAndUnusedOperands(hlo)); + } + } + return Status::OK(); +} + // The algorithm first does a forward pass (parameters to root) to determine a // set of instructions to consider using bfloat16, then does a backward pass to // determine the precisions of those instructions according to the need of @@ -677,6 +698,10 @@ StatusOr BFloat16Propagation::Run(HloModule* module) { // defining instruction's shape has changed. So we need to adjust the output // shapes of instructions according to the HLO values they refer to. TF_RETURN_IF_ERROR(ResolveInconsistencyOfAliasingBuffers(module)); + + // This pass could have turned an F32 -> BF16 conversion to a no-op (BF16 -> + // BF16), so we remove them now. + TF_RETURN_IF_ERROR(RemoveNoopConversions(module)); return true; } diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h index 89a5ac5db1..1744e9db90 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.h +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h @@ -133,6 +133,11 @@ class BFloat16Propagation : public HloPassInterface { // by the given HLO. void AdjustCalledComputationRoot(HloInstruction* hlo); + // *************************** + // Removes no-op conversions (same source and target shapes) that can be + // produced this pass. + Status RemoveNoopConversions(HloModule* module); + // *************************** // Functions called and state used by two or more passes. diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc index 5950b004b3..88f8301416 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc @@ -617,4 +617,44 @@ TEST_F(BFloat16PropagationTest, DoNotPropagateWhilesCallingSameComputation) { EXPECT_EQ(computation->root_instruction(), dot); } +// Tests that if this pass turns an F32 -> BF16 conversion into a no-op (BF16 -> +// BF16 conversion), then it will remove that conversion. +TEST_F(BFloat16PropagationTest, NoopConversionRemoved) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {4, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {4, 4}); + + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "param")); + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kAdd, param, param)); + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kAdd, param, param)); + HloInstruction* tuple = + builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32_shape, tuple, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32_shape, tuple, 1)); + HloInstruction* convert0 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, gte0)); + HloInstruction* convert1 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, gte1)); + HloInstruction* add2 = builder.AddInstruction(HloInstruction::CreateBinary( + bf16_shape, HloOpcode::kAdd, convert0, convert1)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), add2); + EXPECT_EQ(add2->operand(0), gte0); + EXPECT_EQ(add2->operand(1), gte1); + EXPECT_EQ(gte0->shape().element_type(), BF16); + EXPECT_EQ(gte1->shape().element_type(), BF16); + EXPECT_EQ(add0->shape().element_type(), BF16); + EXPECT_EQ(add1->shape().element_type(), BF16); +} + } // namespace xla -- GitLab From a5dd6369841a2ea9ecd6e2a56416859afb0c7716 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 13:00:31 -0700 Subject: [PATCH 123/960] Move if_op kernel to //third_party/tensorflow/compiler/tf2xla/kernels PiperOrigin-RevId: 189381067 --- tensorflow/compiler/tf2xla/kernels/BUILD | 17 ++ tensorflow/compiler/tf2xla/kernels/if_op.cc | 226 ++++++++++++++++++++ tensorflow/compiler/tf2xla/kernels/if_op.h | 59 +++++ 3 files changed, 302 insertions(+) create mode 100644 tensorflow/compiler/tf2xla/kernels/if_op.cc create mode 100644 tensorflow/compiler/tf2xla/kernels/if_op.h diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index d2fa933cf9..0bbfe86de3 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -93,6 +93,7 @@ tf_kernel_library( "shape_util.h", ], deps = [ + ":if_op", ":while_op", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", @@ -154,6 +155,22 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "if_op", + srcs = ["if_op.cc"], + hdrs = ["if_op.h"], + deps = [ + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla/ops:functional_ops", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], +) + # Kernels that only work on CPU, because they use XLA custom calls. # Only link this when using the CPU backend for XLA. tf_kernel_library( diff --git a/tensorflow/compiler/tf2xla/kernels/if_op.cc b/tensorflow/compiler/tf2xla/kernels/if_op.cc new file mode 100644 index 0000000000..eefbe55c81 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/if_op.cc @@ -0,0 +1,226 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/kernels/if_op.h" + +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/xla_context.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" + +namespace tensorflow { + +XlaIfOp::XlaIfOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + const NameAttrList* name_attr; + OP_REQUIRES_OK(ctx, ctx->GetAttr("then_branch", &name_attr)); + then_branch_ = *name_attr; + OP_REQUIRES_OK(ctx, ctx->GetAttr("else_branch", &name_attr)); + else_branch_ = *name_attr; + + OP_REQUIRES_OK(ctx, ctx->GetAttr("Tcond", &cond_type_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("Tin", &input_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("Tout", &output_types_)); +} + +// TODO(b/35949885): There is duplication here with the handling of the +// while_op. Refactor the common code out/rework. +void XlaIfOp::Compile(XlaOpKernelContext* ctx) { + xla::ComputationBuilder* b = ctx->builder(); + + OP_REQUIRES(ctx, cond_type_ == DT_BOOL, + errors::InvalidArgument( + "Condition argument must be a boolean for XLA compilation")); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(ctx->InputShape(0)), + errors::InvalidArgument( + "Condition argument must be a scalar for XLA compilation")); + + VLOG(1) << "Building If: " << input_types_.size() << " inputs"; + + std::vector inputs(input_types_.size()); + std::vector arguments(input_types_.size()); + for (int i = 0; i < input_types_.size(); ++i) { + XlaCompiler::Argument& arg = arguments[i]; + DataType type = ctx->input_type(i + 1); + if (type == DT_RESOURCE) { + XlaResource* resource; + OP_REQUIRES_OK(ctx, ctx->GetResourceInput(i + 1, &resource)); + + arg.initialized = resource->initialized(); + arg.kind = XlaCompiler::Argument::kResource; + arg.resource_kind = resource->kind(); + OP_REQUIRES_OK(ctx, resource->Pack(&inputs[i], b)); + + arg.type = resource->type(); + arg.shape = resource->shape(); + OP_REQUIRES(ctx, arg.initialized, + errors::Unimplemented("Uninitialized arguments: ", arg.name)); + arg.tensor_array_size = resource->tensor_array_size(); + for (const auto& gradient : resource->tensor_array_gradients()) { + arg.tensor_array_gradients.insert(gradient.first); + } + arg.name = resource->name(); + VLOG(2) << "Resource " << resource->name() + << " type: " << DataTypeString(arg.type) + << " shape: " << arg.shape.DebugString() + << " initialized: " << arg.initialized; + } else { + arg.kind = XlaCompiler::Argument::kParameter; + arg.type = input_types_[i]; + arg.shape = ctx->InputShape(i + 1); + inputs[i] = ctx->Input(i + 1); + VLOG(2) << "Arg type: " << DataTypeString(arg.type) + << " shape: " << arg.shape.DebugString(); + } + } + + // Compile both branches of the conditional. + XlaCompiler::CompileOptions options; + options.use_tuple_arg = true; + options.resolve_compile_time_constants = false; + options.return_updated_values_for_all_resources = true; + options.is_entry_computation = false; + XlaCompiler* compiler = ctx->compiler(); + + XlaCompiler::CompilationResult then_result; + OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, then_branch_, + arguments, &then_result)); + XlaCompiler::CompilationResult else_result; + OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, else_branch_, + arguments, &else_result)); + + for (XlaCompiler::CompilationResult* result : {&then_result, &else_result}) { + for (const XlaCompiler::ResourceUpdate& update : result->resource_updates) { + XlaResource* resource; + OP_REQUIRES_OK(ctx, + ctx->GetResourceInput(update.input_index + 1, &resource)); + XlaCompiler::Argument& arg = arguments[update.input_index]; + + // Add any TensorArray gradients touched by the then/else computation to + // the enclosing graph. + for (const string& grad_source : update.tensor_array_gradients_accessed) { + VLOG(5) << "TensorArray " << resource->name() << " accessed gradient " + << grad_source; + XlaResource* gradient; + OP_REQUIRES_OK(ctx, resource->GetOrCreateTensorArrayGradient( + grad_source, b, &gradient)); + } + // Add all of the TensorArray gradients to the argument. For simplicity, + // we always pass all known gradients. + for (const auto& gradient : resource->tensor_array_gradients()) { + arg.tensor_array_gradients.insert(gradient.first); + } + } + } + + // Check that both branches have identical input shapes. + OP_REQUIRES(ctx, then_result.xla_input_shapes.size() == 1, + errors::FailedPrecondition("Expected one input shape")); + xla::Shape then_input_shape = then_result.xla_input_shapes[0]; + OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(then_input_shape), + errors::FailedPrecondition("Expected tuple shape")); + OP_REQUIRES(ctx, else_result.xla_input_shapes.size() == 1, + errors::FailedPrecondition("Expected one input shape")); + xla::Shape else_input_shape = else_result.xla_input_shapes[0]; + OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(else_input_shape), + errors::FailedPrecondition("Expected tuple shape")); + OP_REQUIRES(ctx, + xla::ShapeUtil::Compatible(then_input_shape, else_input_shape), + errors::InvalidArgument( + "Input shapes of then and else branches do not match: ", + xla::ShapeUtil::HumanString(then_input_shape), " vs. ", + xla::ShapeUtil::HumanString(else_input_shape))); + + // Check that both branches have identical output shapes. + OP_REQUIRES( + ctx, + xla::ShapeUtil::Compatible(then_result.xla_output_shape, + else_result.xla_output_shape), + errors::InvalidArgument( + "Output shapes of then and else branches do not match: ", + xla::ShapeUtil::HumanString(then_result.xla_output_shape), " vs. ", + xla::ShapeUtil::HumanString(else_result.xla_output_shape))); + + VLOG(2) << "Input shape: " << xla::ShapeUtil::HumanString(then_input_shape); + VLOG(2) << "Output shape: " + << xla::ShapeUtil::HumanString(then_result.xla_output_shape); + + // We set return_updated_values_for_all_resources=true and we pass the same + // arguments to both computations, so the resource update count must match. + OP_REQUIRES(ctx, + then_result.resource_updates.size() == + else_result.resource_updates.size(), + errors::FailedPrecondition( + "Different number of resources in then and else branch")); + for (int i = 0; i < then_result.resource_updates.size(); ++i) { + const auto& lhs = then_result.resource_updates[i]; + const auto& rhs = else_result.resource_updates[i]; + bool equal = lhs.input_index == rhs.input_index && lhs.shape == rhs.shape && + lhs.tensor_array_gradients_accessed == + rhs.tensor_array_gradients_accessed; + OP_REQUIRES( + ctx, equal, + errors::FailedPrecondition( + "Mismatch in resource of then and else branch for resource ", i)); + } + + xla::ComputationDataHandle outputs = + b->Conditional(ctx->Input(0), b->Tuple(inputs), *then_result.computation, + b->Tuple(inputs), *else_result.computation); + // Sets non-variable outputs. + for (int i = 0; i < output_types_.size(); ++i) { + if (ctx->input_type(i) != DT_RESOURCE) { + xla::ComputationDataHandle output_handle = b->GetTupleElement(outputs, i); + if (VLOG_IS_ON(2)) { + LOG(INFO) << "Setting output " << i; + auto shape_or = b->GetShape(output_handle); + if (shape_or.ok()) { + LOG(INFO) << "Shape for output " << i << ": " + << xla::ShapeUtil::HumanString(*shape_or.ValueOrDie()); + } else { + LOG(INFO) << "Shape unknown for output " << i; + } + } + ctx->SetOutput(i, output_handle); + } + } + + // Updates the values of any resource variables modified by the conditional + // bodies. + for (XlaCompiler::CompilationResult* result : {&then_result, &else_result}) { + for (int i = 0; i < result->resource_updates.size(); ++i) { + const XlaCompiler::ResourceUpdate& update = result->resource_updates[i]; + XlaResource* resource; + OP_REQUIRES_OK(ctx, + ctx->GetResourceInput(update.input_index + 1, &resource)); + if (update.modified) { + int pos = result->outputs.size() + i; + OP_REQUIRES_OK(ctx, + resource->SetFromPack( + arguments[update.input_index].tensor_array_gradients, + b->GetTupleElement(outputs, pos), b)); + } + VLOG(2) << "If variable: pos: " << update.input_index + << " name: " << resource->name() + << " modified: " << update.modified + << " type: " << DataTypeString(update.type) + << " shape: " << update.shape.DebugString(); + } + } + VLOG(1) << "Done building If"; +} + +REGISTER_XLA_OP(Name("XlaIf").AllowResourceTypes(), XlaIfOp); + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/if_op.h b/tensorflow/compiler/tf2xla/kernels/if_op.h new file mode 100644 index 0000000000..f9bc98a198 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/if_op.h @@ -0,0 +1,59 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_KERNELS_IF_OP_H_ +#define TENSORFLOW_COMPILER_TF2XLA_KERNELS_IF_OP_H_ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/core/framework/attr_value.pb.h" + +namespace tensorflow { + +// This TensorFlow op provides a functional conditional primitive. +// +// The outputs of the then/else branches must agree on the number, types, and +// shapes of the Tensors carried around the two bodies. +// +// Computations in then/else bodies may read from and write to resource +// variables. +// Resource variables may be passed as arguments to the then/else function's +// bodies. The XlaCompiler converts resource variable arguments +// into parameters to the XLA computation and moves them to the end of the +// parameter list, and by using the `return_updated_values_for_all_variables` +// we ensure that all variables that appear in the input also appear at the +// end of the then/else bodies output. This ensures the then/else bodies output +// signatures match. +// +// It is the user's responsibility to ensure that each non-variable _Arg matches +// the corresponding _Retval. +class XlaIfOp : public XlaOpKernel { + public: + explicit XlaIfOp(OpKernelConstruction* ctx); + + void Compile(XlaOpKernelContext* ctx) override; + + private: + TF_DISALLOW_COPY_AND_ASSIGN(XlaIfOp); + + NameAttrList then_branch_; + NameAttrList else_branch_; + DataType cond_type_; + DataTypeVector input_types_; + DataTypeVector output_types_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_KERNELS_IF_OP_H_ -- GitLab From a243f6e0299c733935d9e11a5f5eeafd2fe929ce Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Fri, 16 Mar 2018 13:11:36 -0700 Subject: [PATCH 124/960] Estimate prediction size and error out if it is larger than protobuf limit. PiperOrigin-RevId: 189382429 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index b3a7a4bd8d..32f15e60cd 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -25,6 +25,7 @@ import threading import time import traceback +import numpy as np import six from six.moves import queue as Queue # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin @@ -69,6 +70,7 @@ _TPU_ESTIMATOR = 'tpu_estimator' _ITERATIONS_PER_LOOP_VAR = 'iterations_per_loop' _BATCH_SIZE_KEY = 'batch_size' _CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' +_ONE_GIGABYTE = 1024 * 1024 * 1024 _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] @@ -2083,6 +2085,10 @@ class TPUEstimator(estimator_lib.Estimator): host_ops = host_call_ret['host_call'] predictions = host_call_ret['predictions'] + _verify_cross_hosts_transfer_size( + predictions, message=( + 'The estimated size for TPUEstimatorSpec.predictions is too ' + 'large.')) stopping_signals = host_call_ret['signals'] with ops.control_dependencies(host_ops): @@ -2096,13 +2102,6 @@ class TPUEstimator(estimator_lib.Estimator): host_ops), ] + input_hooks - # TODO(b/73813593): Delete this logging once the bug is resolved. - logging.info( - 'If the Tensors in TPUEstimatorSpec.predictions dict are large, ' - 'you might observe the TPU program getting stuck (b/73813593). ' - 'Consider using small Tensors in the predictions dict to verify ' - 'the issue and report on the bug.') - return model_fn_lib.EstimatorSpec( mode, prediction_hooks=hooks, @@ -2515,3 +2514,21 @@ class _SignalsHelper(object): @staticmethod def as_tensor_list(signals): return [signals[key] for key in sorted(signals.iterkeys())] + + +def _verify_cross_hosts_transfer_size(tensor_dict, message): + total_size = 0 + tensor_structure = {} + for key, tensor in tensor_dict.items(): + shape = tensor.shape + size = np.product(shape) * tensor.dtype.size + tensor_structure[key] = shape + total_size += size + if total_size >= _ONE_GIGABYTE: + raise ValueError( + '{} The transfer size is larger than the protobuf limit. Please ' + 'consider to use Tensors with smaller shapes or reduce batch ' + 'size. Given:\n' + '{}'.format(message, '\n'.join([ + ' -- Key: {}, Shape: {}'.format(k, v) + for k, v in tensor_structure.items()]))) -- GitLab From 77c17f79399c4444ad49ac10f02e141c266c740e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 13:43:12 -0700 Subject: [PATCH 125/960] Fixed a typo. PiperOrigin-RevId: 189386932 --- tensorflow/contrib/lite/builtin_ops.h | 3 +-- tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 2218ea8eac..e4652a3e70 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -24,8 +24,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin -// ops. +// Note: CUSTOM and DELEGATE are 2 special ops which are not real built-in ops. typedef enum { kTfLiteBuiltinAdd = 0, kTfLiteBuiltinAveragePool2d = 1, diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc index 08bcfe4516..ac408d2f94 100644 --- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -46,8 +46,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin -// ops. +// Note: CUSTOM and DELEGATE are 2 special ops which are not real built-in ops. typedef enum { )"; -- GitLab From af1c668e2db5bcfcd3b156800c4b982c9423e858 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 16 Mar 2018 14:39:08 -0700 Subject: [PATCH 126/960] Downgrade run-after-mutation error to a log warning. This is to ease the transition to the C API. Some tests currently mutate the graph after running it but currently pass. This error was meant to guard against existing behavior, so it's not a regression to make it a warning instead for now. PiperOrigin-RevId: 189395472 --- tensorflow/c/c_api.cc | 19 +-- tensorflow/c/c_api_internal.h | 17 +-- tensorflow/python/client/session_test.py | 140 ----------------------- 3 files changed, 19 insertions(+), 157 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 778cb667e2..18eeb28168 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -647,11 +647,11 @@ void RecordMutation(TF_Graph* graph, const TF_Operation& op, for (auto it : graph->sessions) { mutex_lock session_lock(it.first->mu); if (it.first->last_num_graph_nodes > op.node.id()) { - it.second = FailedPrecondition( + it.second = strings::StrCat( "Operation '", op.node.DebugString(), "' was changed by ", mutation_type, - " after it was run by a session. Nodes can be mutated " - "only before they are executed by a session. Either don't modify " + " after it was run by a session. This mutation will have no effect, " + "and will trigger an error in the future. Either don't modify " "nodes after running them or create a new session."); } } @@ -722,10 +722,11 @@ bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) { mutex_lock session_lock(session->mu); const Graph& graph = session->graph->graph; - status->status = session->graph->sessions[session]; - if (!status->status.ok()) { - session->graph->mu.unlock(); - return false; + const string& mutation_warning = session->graph->sessions[session]; + if (!mutation_warning.empty()) { + // TODO(b/74949947): turn this back into an error status + LOG(WARNING) << mutation_warning; + session->graph->sessions[session].clear(); } const auto num_nodes = graph.num_node_ids(); @@ -2475,7 +2476,7 @@ TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opt, TF_Session* new_session = new TF_Session(session, graph); if (graph != nullptr) { mutex_lock l(graph->mu); - graph->sessions[new_session] = Status::OK(); + graph->sessions[new_session] = ""; } return new_session; } else { @@ -2541,7 +2542,7 @@ TF_Session* TF_LoadSessionFromSavedModel( TF_Session* session = new TF_Session(bundle.session.release(), graph); - graph->sessions[session] = Status::OK(); + graph->sessions[session] = ""; session->last_num_graph_nodes = graph->graph.num_node_ids(); return session; #endif // __ANDROID__ diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index e885a69927..95652a1137 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -84,19 +84,20 @@ struct TF_Graph { std::unordered_map name_map GUARDED_BY(mu); - // The keys of this map are all the active sessions using this graph. - // Each value is the current "runnability" status of the corresponding - // session. Under normal conditions all statuses are Status::OK(), but - // if some operation is mutated after it was run by a session (this - // is detected in RecordMutation function), that session is no longer - // safe to run. Its status will contain the error that will be returned - // to the user, should she try running this session. + // The keys of this map are all the active sessions using this graph. Each + // value records whether the graph has been mutated since the corresponding + // session has been run (this is detected in RecordMutation function). If the + // string is empty, no mutation has occurred. Otherwise the string is a + // description of the mutation suitable for returning to the user. // // Sessions are added to this map in TF_NewSession, and removed in // TF_DeleteSession. // TF_Graph may only / must be deleted when // sessions.size() == 0 && delete_requested == true - tensorflow::gtl::FlatMap sessions + // + // TODO(b/74949947): mutations currently trigger a warning instead of a bad + // status, this should be reverted when possible. + tensorflow::gtl::FlatMap sessions GUARDED_BY(mu); bool delete_requested GUARDED_BY(mu); // set true by TF_DeleteGraph diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 6c7339f3d8..3b12e06f43 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -29,7 +29,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.core.framework import attr_value_pb2 -from tensorflow.core.framework import types_pb2 from tensorflow.core.lib.core import error_codes_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session @@ -1884,144 +1883,5 @@ class SessionTest(test_util.TensorFlowTestCase): sess.run(a, feed_dict={a: 1}) -class GraphMutationTest(test_util.TensorFlowTestCase): - - def setUp(self): - self._original_use_c_api_value = ops._USE_C_API - ops._USE_C_API = True - super(GraphMutationTest, self).setUp() - - def tearDown(self): - ops._USE_C_API = self._original_use_c_api_value - super(GraphMutationTest, self).tearDown() - - def testUpdateInputAfterRunning(self): - with ops.Graph().as_default() as g: - a = constant_op.constant(1.0) - b = constant_op.constant(2.0) - c = a + b - - with session.Session(graph=g) as sess: - self.assertAllEqual(3.0, sess.run(c)) - c.op._update_input(1, a) # pylint: disable=protected-access - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'add.*was changed by updating input tensor after it was run'): - sess.run(c) - - # Check that running the graph with a new session is fine - with session.Session(graph=g) as sess2: - self.assertAllEqual(2.0, sess2.run(c)) - - def testSetDeviceAfterRunning(self): - with ops.Graph().as_default() as g: - a = constant_op.constant(1.0) - b = constant_op.constant(2.0) - c = a + b - - with session.Session(graph=g) as sess: - self.assertAllEqual(3.0, sess.run(c)) - c.op._set_device('/cpu:0') # pylint: disable=protected-access - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'add.*was changed by setting device after it was run'): - sess.run(c) - - def testSetAttrAfterRunning(self): - with ops.Graph().as_default() as g: - a = constant_op.constant(1.0, dtype=dtypes.float32) - b = math_ops.cast(a, dtypes.float64) - - with session.Session(graph=g) as sess: - self.assertAllEqual(1.0, sess.run(b)) - b.op._set_attr('DstT', attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT)) - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'Cast.*was changed by setting attribute after it was run'): - sess.run(b) - - def testRunModifyRun(self): - with ops.Graph().as_default() as g: - a = constant_op.constant(1.0) - b = constant_op.constant(2.0) - c = a + b - - with session.Session(graph=g) as sess: - self.assertAllEqual(3.0, sess.run(c)) - - d = b + c - d.op._update_input(0, a) # pylint: disable=protected-access - self.assertAllEqual(3.0, sess.run(c)) - self.assertAllEqual(4.0, sess.run(d)) - - def testRunModifyRunTwoSessions(self): - with ops.Graph().as_default() as g: - a = constant_op.constant(1.0) - b = constant_op.constant(2.0) - c = a + b - - with session.Session(graph=g) as sess1: - with session.Session(graph=g) as sess2: - self.assertAllEqual(3.0, sess1.run(c)) - self.assertAllEqual(3.0, sess2.run(c)) - - d = b + c - d.op._update_input(0, a) # pylint: disable=protected-access - self.assertAllEqual(3.0, sess2.run(c)) - self.assertAllEqual(4.0, sess2.run(d)) - - d.op._update_input(0, b) # pylint: disable=protected-access - self.assertAllEqual(3.0, sess1.run(c)) - self.assertAllEqual(5.0, sess1.run(d)) - - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'add.*was changed by updating input tensor after it was run'): - sess2.run(c) - - def testTwoSessionsOneRunBeforeModification(self): - with ops.Graph().as_default() as g, ops.device('/cpu:0'): - a = constant_op.constant(1.0) - b = constant_op.constant(2.0) - c = a + b - - with session.Session(graph=g) as sess1: - with session.Session(graph=g) as sess2: - sess1.run(c) - - c.op._set_device('/cpu:0') # pylint: disable=protected-access - - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'add.*was changed by setting device after it was run'): - sess1.run(c) - - # sess2 was not run before modification - self.assertAllEqual(3.0, sess2.run(c)) - - def testTwoSessionsBothRunBeforeModification(self): - with ops.Graph().as_default() as g, ops.device('/cpu:0'): - a = constant_op.constant(1.0) - b = constant_op.constant(2.0) - c = a + b - - with session.Session(graph=g) as sess1: - with session.Session(graph=g) as sess2: - sess1.run(c) - sess2.run(c) - - c.op._set_device('/cpu:0') # pylint: disable=protected-access - - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'add.*was changed by setting device after it was run'): - sess1.run(c) - - with self.assertRaisesRegexp( - errors.FailedPreconditionError, - 'add.*was changed by setting device after it was run'): - sess2.run(c) - - if __name__ == '__main__': googletest.main() -- GitLab From 7b33deb9c8a7c62f194e21b6364400d26e511567 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 16 Mar 2018 15:10:39 -0700 Subject: [PATCH 127/960] Don't inline functions in the grappler item builder since this part of the code doesn't support custom ops. Instead we will rely on the function optimizer. PiperOrigin-RevId: 189400462 --- .../core/grappler/grappler_item_builder.cc | 5 +- .../core/grappler/grappler_item_builder.h | 2 - .../grappler/grappler_item_builder_test.cc | 92 ------------------- tensorflow/python/grappler/item.i | 1 - tensorflow/python/grappler/tf_optimizer.i | 1 - 5 files changed, 2 insertions(+), 99 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index d7b300321a..288587ce9b 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -78,7 +78,7 @@ void InitializeTensor(DataType type, Tensor* tensor) { // correct optimizations. Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, const ItemConfig& cfg) { - if (!cfg.apply_optimizations && !cfg.inline_functions) { + if (!cfg.apply_optimizations && !cfg.erase_noinline_attributes) { return Status::OK(); } @@ -88,7 +88,7 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, // Make a local copy of graph def, because we need to change some things. GraphDef graph_def(graph_def_arg); - if (cfg.inline_functions && cfg.erase_noinline_attributes) { + if (cfg.erase_noinline_attributes) { // TF optimizer doesn't inline functions with "_noinline" attribute, // so let's go over the function library and erase it. for (auto& func : *graph_def.mutable_library()->mutable_function()) { @@ -113,7 +113,6 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, } else { optimizer_opts->set_opt_level(::tensorflow::OptimizerOptions_Level_L0); } - optimizer_opts->set_do_function_inlining(cfg.inline_functions); // Create the function library runtime. std::unique_ptr pflr( diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h index c877d91163..6d181e49e6 100644 --- a/tensorflow/core/grappler/grappler_item_builder.h +++ b/tensorflow/core/grappler/grappler_item_builder.h @@ -40,8 +40,6 @@ struct ItemConfig { int placeholder_unknown_output_shape_dim = -1; // If true, does L1 optimizations. bool apply_optimizations = false; - // If true, does inlining. - bool inline_functions = false; // If true, erases all "_noinline" attributes from user-defined functions. // Has no effect if "inline_functions" is disabled. bool erase_noinline_attributes = false; diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc index 29488e4b7e..4b90bf3038 100644 --- a/tensorflow/core/grappler/grappler_item_builder_test.cc +++ b/tensorflow/core/grappler/grappler_item_builder_test.cc @@ -35,96 +35,6 @@ namespace { class GrapplerItemBuilderTest : public ::testing::Test {}; -// Create a sample graph with a symbolic gradient for sum. -void SampleSumSymbolicGradientGraphdef( - GraphDef *def, CollectionDef *fetches, - std::vector *names_of_ops_of_inline) { - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - - tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); - - auto dummy_variable = Variable(scope, {2, 2}, DT_FLOAT); - auto x = Const(scope, 1.0f); - auto y = Const(scope, 2); - auto z = Const(scope, 3.0f); - TF_ASSERT_OK(scope.status()); - - NameAttrList fn; - fn.set_name("Sum"); - (*fn.mutable_attr())["T"].set_type(DT_FLOAT); - auto g0 = SymbolicGradient(scope, std::initializer_list{x, y, z}, - {DT_FLOAT, DT_INT32}, fn); - - // TODO(bsteiner): we should rewrite the feed/fetch nodes to reflect the - // inlining that's done in the item builder - // fetches->mutable_node_list()->add_value(g0[0].name()); - fetches->mutable_node_list()->add_value("SymbolicGradient/dx"); - fetches->mutable_node_list()->add_value("SymbolicGradient/dy_reshaped"); - - TF_CHECK_OK(scope.ToGraphDef(def)); - - // Add names of the ops that replace the Mul symbolic gradient during - // inlining. This is for validation. - *names_of_ops_of_inline = { - "SymbolicGradient/dx", "SymbolicGradient/tile_scaling", - "SymbolicGradient/dy_reshaped", "SymbolicGradient/y_shape", - "SymbolicGradient/x_shape", "SymbolicGradient/stitch_idx0", - "SymbolicGradient/x_rank", "SymbolicGradient/stitch_val1", - "SymbolicGradient/i_shape", "SymbolicGradient/di", - "SymbolicGradient/zero", "SymbolicGradient/one"}; -} - -std::unique_ptr CreateGrapplerItem(const GraphDef &def, - const CollectionDef &fetches) { - MetaGraphDef meta_def; - ItemConfig cfg; - cfg.inline_functions = true; - *meta_def.mutable_graph_def() = def; - (*meta_def.mutable_collection_def())["train_op"] = fetches; - return GrapplerItemFromMetaGraphDef("0", meta_def, cfg); -} - -int CountSymbolicGradientOps(const std::unique_ptr &item) { - int n_symb_grads = 0; - for (const auto &node : item->graph.node()) { - if (node.op() == FunctionLibraryDefinition::kGradientOp) { - n_symb_grads++; - } - } - return n_symb_grads; -} - -int CountOpsWithNames(const std::unique_ptr &item, - const std::vector &names) { - std::set names_set(names.begin(), names.end()); - int n_with_names = 0; - for (const auto &node : item->graph.node()) { - if (names_set.find(node.name()) != names_set.end()) { - n_with_names++; - } - } - return n_with_names; -} - -TEST_F(GrapplerItemBuilderTest, SymbolicGradientInlining) { - // Create sample sum symbolic gradient graph. - GraphDef def; - CollectionDef fetches; - std::vector ops_of_inline; - SampleSumSymbolicGradientGraphdef(&def, &fetches, &ops_of_inline); - - // Create the inlined graph. - std::unique_ptr with_inline = CreateGrapplerItem(def, fetches); - - // For the inlined graph, there should be 0 symbolic gradient ops. - EXPECT_EQ(0, CountSymbolicGradientOps(with_inline)); - - // For the inlined graph, make sure all the required expanded op’s are in the - // graph. - EXPECT_EQ(ops_of_inline.size(), - CountOpsWithNames(with_inline, ops_of_inline)); -} - TEST_F(GrapplerItemBuilderTest, AssetFilepathOverrideTest) { MetaGraphDef meta_graph; @@ -273,7 +183,6 @@ TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) { (*meta_graph.mutable_collection_def())["train_op"] = train_op; ItemConfig cfg; - cfg.inline_functions = false; std::unique_ptr item = GrapplerItemFromMetaGraphDef("0", meta_graph, cfg); @@ -294,7 +203,6 @@ TEST_F(GrapplerItemBuilderTest, GraphWithCustomOps) { (*meta_graph.mutable_collection_def())["train_op"] = train_op; ItemConfig cfg; - cfg.inline_functions = false; std::unique_ptr item = GrapplerItemFromMetaGraphDef("0", meta_graph, cfg); diff --git a/tensorflow/python/grappler/item.i b/tensorflow/python/grappler/item.i index 9a84c60b04..593d38206d 100644 --- a/tensorflow/python/grappler/item.i +++ b/tensorflow/python/grappler/item.i @@ -83,7 +83,6 @@ static GItem TF_NewItem( tensorflow::grappler::ItemConfig cfg; cfg.ignore_user_placement = ignore_user_placement; cfg.ignore_colocation = ignore_colocation; - cfg.inline_functions = true; std::unique_ptr item = tensorflow::grappler::GrapplerItemFromMetaGraphDef("item", meta_graph, cfg); if (!item) { diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i index de9326ccfc..39ca71e99a 100644 --- a/tensorflow/python/grappler/tf_optimizer.i +++ b/tensorflow/python/grappler/tf_optimizer.i @@ -98,7 +98,6 @@ PyObject* TF_OptimizeGraph( const tensorflow::MetaGraphDef& metagraph, bool verbose, const string& graph_id, TF_Status* out_status) { tensorflow::grappler::ItemConfig item_config; - item_config.inline_functions = false; item_config.apply_optimizations = false; item_config.ignore_user_placement = false; std::unique_ptr grappler_item = -- GitLab From 42e319741cbf50a2414aee330d8a1e385f083d20 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Fri, 16 Mar 2018 15:13:25 -0700 Subject: [PATCH 128/960] [XLA] Fix forward for HLO profiling test, explicitly set profiling preference. PiperOrigin-RevId: 189400869 --- tensorflow/compiler/xla/service/hlo_execution_profile.cc | 4 ++-- tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index f0df93b61d..c3ccbf0f0c 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -111,8 +111,8 @@ HloExecutionProfile::HloExecutionProfile( : hlo_profile_printer_data_(*hlo_profile_printer_data), hlo_profile_index_map_(*hlo_profile_index_map), profile_counters_( - /*count*/ hlo_profile_index_map_.total_count(), - /*value*/ 0) {} + /*count=*/hlo_profile_index_map_.total_count(), + /*value=*/0) {} void HloExecutionProfile::SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken) { diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc index 9ad2a19853..24b9f37a80 100644 --- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc +++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc @@ -144,7 +144,7 @@ void ExecuteAndFetchProfile(string* profile_output, LocalClient* client, TF_ASSERT_OK_AND_ASSIGN( std::unique_ptr local_executable, client->Compile(computation, {&lhs_arg_shape, &rhs_arg_shape}, - ExecutableBuildOptions())); + ExecutableBuildOptions().set_hlo_profile(true))); Executable* executable = local_executable->executable(); HloExecutionProfile hlo_execution_profile( -- GitLab From 6f0dd0425c51360fe2be5a938a8f3fb39e420fa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jiongyan=20Zhang=20=28=E5=BC=A0=E7=82=AF=E8=A1=8D=29?= Date: Sat, 17 Mar 2018 06:33:45 +0800 Subject: [PATCH 129/960] Add doc on the order of eigenvalues returned by tf.self_adjoint_eig (#16909) * Supplement docs for selfAdjointEig * Replace increasing with non-decreasing --- tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt | 3 ++- .../core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt | 3 ++- tensorflow/python/ops/linalg_ops.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt index 51d63eeb56..7be9a958ab 100644 --- a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt @@ -19,6 +19,7 @@ form square matrices, with the same constraints as the single matrix SelfAdjointEig. The result is a [..., M+1, M] matrix with [..., 0,:] containing the -eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. +eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues +are sorted in non-decreasing order. END } diff --git a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt index 4a5e125258..fae9e84fc8 100644 --- a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt @@ -31,7 +31,8 @@ END summary: "Computes the eigen decomposition of one or more square self-adjoint matrices." description: < Date: Fri, 16 Mar 2018 15:31:10 -0700 Subject: [PATCH 130/960] Don't fail when optimizing the gradients of noinline functions PiperOrigin-RevId: 189403170 --- .../grappler/optimizers/function_optimizer.cc | 10 +- .../optimizers/function_optimizer_test.cc | 103 +++++++++++++++++- 2 files changed, 103 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index d47887bfc8..3f2afdeef1 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -189,7 +189,7 @@ Status InlineSymbolicGradient(const NodeDef& node, OptimizerOptions optimizer_opts; optimizer_opts.set_do_function_inlining(true); ProcessFunctionLibraryRuntime pflr(&dvc_mgr, env, - graph_def.versions().producer(), + inlined_graph->versions().producer(), &function_library, optimizer_opts); FunctionLibraryRuntime* flr = pflr.GetFLR(dev->name()); CHECK(flr); @@ -206,11 +206,6 @@ Status InlineSymbolicGradient(const NodeDef& node, while (counter < 50 && ExpandInlineFunctions(flr, &graph)) { ++counter; } - if (counter == 0) { - // Nothing was inlined - return errors::InvalidArgument( - strings::StrCat("Failed to inline node ", node.name())); - } GraphDef inlined_graph_def; graph.ToGraphDef(&inlined_graph_def); @@ -278,7 +273,7 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, return Status::OK(); } - // Inline functions when possible. + *optimized_graph->mutable_versions() = item.graph.versions(); for (const NodeDef& node : item.graph.node()) { if (opt_level_ == RewriterConfig::AGGRESSIVE) { if (node.op() == "SymbolicGradient") { @@ -301,7 +296,6 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // TODO(bsteiner): trim the library to remove unused function definitions *optimized_graph->mutable_library() = item.graph.library(); - *optimized_graph->mutable_versions() = item.graph.versions(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index b1c55d838d..52a1118080 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -382,7 +382,6 @@ TEST_F(FunctionOptimizerTest, FunctionWithoutInput) { TEST_F(FunctionOptimizerTest, SymbolicGradients) { tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); - // auto T = DT_FLOAT; FunctionDef func = FunctionDefHelper::Define( "TestFunc", {"x:float", "y:float"}, {"l:float"}, {}, { @@ -394,7 +393,6 @@ TEST_F(FunctionOptimizerTest, SymbolicGradients) { {{"l"}, "Sum", {"z", "indices"}, {{"T", DT_FLOAT}}}, }); - auto dummy_variable = ops::Variable(scope, {2, 2}, DT_FLOAT); auto x = ops::Const(scope, 1.0f); auto y = ops::Const(scope, 2.0f); auto dl = ops::Const(scope, 3.0f); @@ -422,6 +420,107 @@ TEST_F(FunctionOptimizerTest, SymbolicGradients) { test::ExpectTensorEqual(expected[1], optimized[1]); } +TEST_F(FunctionOptimizerTest, SymbolicGradientsIdentity) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + FunctionDef func = FunctionDefHelper::Create( + // Name + "Identity_func", + // Args + {"in: float"}, + // Return values + {"out: float"}, + // Attr def + {}, + // Nodes + {{{"Identity"}, "Identity", {"in"}, {{"T", DT_FLOAT}}}}, + // Mapping + {{"out", "Identity:output:0"}}); + + auto x = ops::Const(scope, 1.0f, {3, 5, 7}); + auto z = ops::Const(scope, 3.0f, {3, 5, 7}); + + NameAttrList fn; + fn.set_name("Identity_func"); + auto g0 = ops::SymbolicGradient(scope, std::initializer_list{x, z}, + {DT_FLOAT}, fn); + auto out = ops::Identity(scope.WithOpName("out"), g0.output[0]); + + GrapplerItem item; + TF_EXPECT_OK(scope.ToGraphDef(&item.graph)); + *item.graph.mutable_library()->add_function() = func; + + FunctionOptimizer optimizer(RewriterConfig::AGGRESSIVE); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(13, output.node_size()); + EXPECT_EQ("Const", output.node(0).name()); + EXPECT_EQ("Const_1", output.node(1).name()); + EXPECT_EQ("SymbolicGradient/FunctionInputs", output.node(2).name()); + EXPECT_EQ("SymbolicGradient", output.node(3).name()); + EXPECT_EQ("SymbolicGradient/SymbolicGradient/Identity", + output.node(4).name()); + EXPECT_EQ("SymbolicGradient/Func/_0", output.node(5).name()); + EXPECT_EQ("SymbolicGradient/Func/_1", output.node(6).name()); + EXPECT_EQ("SymbolicGradient/Func/_2", output.node(7).name()); + EXPECT_EQ("SymbolicGradient/SymbolicGradient/Func/_1/dx", + output.node(8).name()); + EXPECT_EQ("SymbolicGradient/Func/_3", output.node(9).name()); + EXPECT_EQ("SymbolicGradient/Func/_4", output.node(10).name()); + EXPECT_EQ("SymbolicGradient/Func/_5", output.node(11).name()); + EXPECT_EQ("out", output.node(12).name()); + for (int i = 2; i < 4; ++i) { + EXPECT_EQ("IdentityN", output.node(i).op()); + } + for (int i = 4; i < 11; ++i) { + EXPECT_EQ("Identity", output.node(i).op()); + } + + std::vector expected = EvaluateNodes(item.graph, {"out"}); + std::vector optimized = EvaluateNodes(output, {"out"}); + test::ExpectTensorEqual(expected[0], optimized[0]); +} + +TEST_F(FunctionOptimizerTest, SymbolicGradientsNoInlineFunc) { + FunctionDef func = FunctionDefHelper::Define( + "TestFunc", {"x:float", "y:float"}, {"l:float"}, {}, + { + {{"z"}, "Add", {"x", "y"}, {{"T", DT_FLOAT}}}, + FunctionDefHelper::Const("zero", 0), + FunctionDefHelper::Const("one", 1), + {{"r"}, "Rank", {"z"}, {{"T", DT_FLOAT}}}, + {{"indices"}, "Range", {"zero", "r", "one"}}, + {{"l"}, "Sum", {"z", "indices"}, {{"T", DT_FLOAT}}}, + }); + (*func.mutable_attr())["_noinline"].set_b(true); + + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + auto x = ops::Const(scope, 1.0f); + auto y = ops::Const(scope, 2.0f); + auto dl = ops::Const(scope, 3.0f); + + NameAttrList fn; + fn.set_name("TestFunc"); + (*fn.mutable_attr())["T"].set_type(DT_FLOAT); + auto g0 = ops::SymbolicGradient(scope, std::initializer_list{x, y, dl}, + {DT_FLOAT, DT_FLOAT}, fn); + auto out1 = ops::Identity(scope.WithOpName("out1"), g0.output[0]); + auto out2 = ops::Identity(scope.WithOpName("out2"), g0.output[1]); + + GrapplerItem item; + TF_EXPECT_OK(scope.ToGraphDef(&item.graph)); + *item.graph.mutable_library()->add_function() = func; + + FunctionOptimizer optimizer(RewriterConfig::AGGRESSIVE); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + // The optimizer should succeed but the graphs should be the same. + TF_EXPECT_OK(status); + CompareGraphs(item.graph, output); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 8cabc19c6b986d86c3e2b2d8d40f49a9400f926b Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Fri, 16 Mar 2018 15:36:15 -0700 Subject: [PATCH 131/960] Consolidate all moving_average updates in batchnorm into one implementation. PiperOrigin-RevId: 189404070 --- tensorflow/python/layers/normalization.py | 50 +++++++++-------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index c23d755a8e..8b79a92cc4 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -319,7 +319,6 @@ class BatchNormalization(base.Layer): initializer=self.moving_variance_initializer, trainable=False) - self._one_minus_decay = 1.0 - self.momentum if self.renorm: # Create variables to maintain the moving mean and standard deviation. # These are used in training and thus are different from the moving @@ -360,20 +359,14 @@ class BatchNormalization(base.Layer): self._scope.set_partitioner(partitioner) self.built = True - def _assign_moving_average(self, variable, value, one_minus_decay): + def _assign_moving_average(self, variable, value, momentum): with ops.name_scope(None, 'AssignMovingAvg', - [variable, value, one_minus_decay]) as scope: + [variable, value, momentum]) as scope: with ops.colocate_with(variable): + decay = ops.convert_to_tensor(1.0 - momentum, name='decay') update_delta = math_ops.multiply( - math_ops.subtract(variable.read_value(), value), - one_minus_decay) - if isinstance(variable, resource_variable_ops.ResourceVariable): - # state_ops.assign_sub does an extra read_variable_op after the - # assign. We avoid that here. - return gen_resource_variable_ops.assign_sub_variable_op( - variable.handle, update_delta, name=scope) - else: - return state_ops.assign_sub(variable, update_delta, name=scope) + math_ops.subtract(variable.read_value(), value), decay) + return state_ops.assign_sub(variable, update_delta, name=scope) def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" @@ -412,22 +405,16 @@ class BatchNormalization(base.Layer): training_value = utils.constant_value(training) if training_value is None: - one_minus_decay = utils.smart_cond(training, - lambda: self._one_minus_decay, - lambda: 0.) + momentum = utils.smart_cond(training, lambda: self.momentum, lambda: 1.0) else: - one_minus_decay = ops.convert_to_tensor(self._one_minus_decay) + momentum = ops.convert_to_tensor(self.momentum) if training_value or training_value is None: mean_update = self._assign_moving_average(self.moving_mean, mean, - one_minus_decay) + momentum) variance_update = self._assign_moving_average(self.moving_variance, - variance, one_minus_decay) - if not context.executing_eagerly(): - # Note that in Eager mode, the updates are already executed when running - # assign_moving_averages. So we do not need to put them into - # collections. - self.add_update(mean_update, inputs=inputs) - self.add_update(variance_update, inputs=inputs) + variance, momentum) + self.add_update(mean_update, inputs=inputs) + self.add_update(variance_update, inputs=inputs) return output @@ -464,6 +451,7 @@ class BatchNormalization(base.Layer): """Updates a moving average and weight, returns the unbiased value.""" value = array_ops.identity(value) def _do_update(): + """Updates the var and weight, returns their updated ratio.""" # Update the variables without zero debiasing. The debiasing will be # accomplished by dividing the exponential moving average by the weight. # For example, after a single update, the moving average would be @@ -472,11 +460,14 @@ class BatchNormalization(base.Layer): # Make sure the weight is not updated until before r and d computation. with ops.control_dependencies([value]): weight_value = array_ops.constant(1., dtype=weight.dtype) - new_var = moving_averages.assign_moving_average( - var, value, self.renorm_momentum, zero_debias=False) - new_weight = moving_averages.assign_moving_average( - weight, weight_value, self.renorm_momentum, zero_debias=False) + new_var = self._assign_moving_average(var, value, self.renorm_momentum) + new_weight = self._assign_moving_average(weight, weight_value, + self.renorm_momentum) + # TODO(yuefengz): the updates to var and weighted can not be batched + # together if we fetch their updated values here. Consider calculating + # new values and delaying the updates. return new_var / new_weight + def _fake_update(): return array_ops.identity(var) return utils.smart_cond(training, _do_update, _fake_update) @@ -601,8 +592,7 @@ class BatchNormalization(base.Layer): if in_eager_mode and not self.trainable: return - return moving_averages.assign_moving_average( - var, value, self.momentum, zero_debias=False) + return self._assign_moving_average(var, value, self.momentum) mean_update = utils.smart_cond( training, -- GitLab From d1a173677ea98c34f5ce872897796923b14c0d6a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 15:55:14 -0700 Subject: [PATCH 132/960] Remove identity transpose nodes. PiperOrigin-RevId: 189406518 --- .../optimizers/arithmetic_optimizer.cc | 75 ++++++++++++------- .../optimizers/arithmetic_optimizer.h | 2 +- .../optimizers/arithmetic_optimizer_test.cc | 27 ++++--- 3 files changed, 64 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 675cd8f072..3a67c4b056 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -933,55 +933,67 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { }; // Removes inverse transpose nodes -class RemoveInverseTranspose : public ArithmeticOptimizerStage { +class RemoveIdentityTranspose : public ArithmeticOptimizerStage { public: - explicit RemoveInverseTranspose(const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("RemoveInverseTranspose", ctx) {} - ~RemoveInverseTranspose() override = default; + explicit RemoveIdentityTranspose(const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("RemoveIdentityTranspose", ctx) {} + ~RemoveIdentityTranspose() override = default; bool IsSupported(const NodeDef* node) const override { return IsTranspose(*node) || IsConjugateTranspose(*node); } + // TODO(rmlarsen): Forward control dependencies on the bypassed + // transpose nodes. Status TrySimplify(const NodeDef* node, string* simplified_node_name) override { CHECK(IsSupported(node)); NodeDef* input; TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &input)); + NodeDef* node_perm; + TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &node_perm)); + std::vector node_perm_values; + TF_RETURN_IF_ERROR(GetPermutation(*node_perm, &node_perm_values)); if (input->op() == node->op()) { - NodeDef* node_perm; + // Remove pairs of transposes that cancel each other. NodeDef* input_perm; - - TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &node_perm)); TF_RETURN_IF_ERROR(GetInputNode(input->input(1), &input_perm)); - - // Try 32-bit indices. - std::vector node_perm_values; - std::vector input_perm_values; - if (ValuesFromConstNode(*node_perm, &node_perm_values) && - ValuesFromConstNode(*input_perm, &input_perm_values) && - AreInversePermutations(node_perm_values, input_perm_values)) { + std::vector input_perm_values; + TF_RETURN_IF_ERROR(GetPermutation(*input_perm, &input_perm_values)); + if (AreInversePermutations(node_perm_values, input_perm_values)) { *simplified_node_name = input->input(0); } - // Try 64-bit indices. - std::vector node_perm_values64; - std::vector input_perm_values64; - if (ValuesFromConstNode(*node_perm, &node_perm_values64) && - ValuesFromConstNode(*input_perm, &input_perm_values64) && - AreInversePermutations(node_perm_values64, input_perm_values64)) { - *simplified_node_name = input->input(0); + } else { + // Remove simple identity transposes. + if (IsIdentityPermutation(node_perm_values)) { + *simplified_node_name = node->input(0); } } - return Status::OK(); } private: - template - bool AreInversePermutations(const std::vector& a, - const std::vector& b) { + Status GetPermutation(const NodeDef& node_perm, + std::vector* perm64) const { + std::vector perm32; + if (ValuesFromConstNode(node_perm, &perm32)) { + perm64->reserve(perm32.size()); + for (int val : perm32) { + perm64->push_back(static_cast(val)); + } + return Status::OK(); + } + if (ValuesFromConstNode(node_perm, perm64)) { + return Status::OK(); + } + return errors::InvalidArgument("Couldn't extract permutation from ", + node_perm.name()); + } + + bool AreInversePermutations(const std::vector& a, + const std::vector& b) { if (a.size() != b.size()) { return false; } @@ -992,6 +1004,15 @@ class RemoveInverseTranspose : public ArithmeticOptimizerStage { } return true; } + + bool IsIdentityPermutation(const std::vector& perm) { + for (int64 i = 0; i < perm.size(); ++i) { + if (i != perm[i]) { + return false; + } + } + return true; + } }; // Remove redundant Bitcasts. @@ -1663,9 +1684,9 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { stages.push_back(std::unique_ptr( new HoistCommonFactorOutOfAggregation(ctx))); } - if (options_.remove_inverse_transpose) { + if (options_.remove_identity_transpose) { stages.push_back(std::unique_ptr( - new RemoveInverseTranspose(ctx))); + new RemoveIdentityTranspose(ctx))); } if (options_.remove_redundant_bitcast) { stages.push_back(std::unique_ptr( diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index bd2f42ee8c..95c1e14258 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -60,7 +60,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool enable_try_simplify_and_replace = true; bool combine_add_to_addn = false; bool hoist_common_factor_out_of_aggregation = true; - bool remove_inverse_transpose = true; + bool remove_identity_transpose = true; bool remove_redundant_bitcast = true; bool remove_redundant_cast = true; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index d677aee589..6f7a95c2ed 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -92,7 +92,7 @@ class ArithmeticOptimizerTest : public GrapplerTest { options.enable_try_simplify_and_replace = false; options.combine_add_to_addn = false; options.hoist_common_factor_out_of_aggregation = false; - options.remove_inverse_transpose = false; + options.remove_identity_transpose = false; options.remove_redundant_bitcast = false; options.remove_redundant_cast = false; optimizer->options_ = options; @@ -112,9 +112,9 @@ class ArithmeticOptimizerTest : public GrapplerTest { optimizer->options_.hoist_common_factor_out_of_aggregation = true; } - void EnableOnlyRemoveInverseTranspose(ArithmeticOptimizer* optimizer) { + void EnableOnlyRemoveIdentityTranspose(ArithmeticOptimizer* optimizer) { DisableAllStages(optimizer); - optimizer->options_.remove_inverse_transpose = true; + optimizer->options_.remove_identity_transpose = true; } void EnableOnlyRemoveRedundantBitcast(ArithmeticOptimizer* optimizer) { @@ -876,7 +876,7 @@ TEST_F(ArithmeticOptimizerTest, NoReorderTransposeCast) { EXPECT_EQ(1, num_transposes); } -TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposes) { +TEST_F(ArithmeticOptimizerTest, RemoveIdentityTransposes) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output inputs_shape = ops::Const(s.WithOpName("inputs_shape"), {8, 3, 28, 28}, {4}); @@ -884,18 +884,21 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposes) { ops::RandomUniform(s.WithOpName("inputs"), inputs_shape, DT_FLOAT); Output perm1 = ops::Const(s.WithOpName("perm1"), {0, 2, 3, 1}, {4}); Output perm2 = ops::Const(s.WithOpName("perm2"), {0, 3, 1, 2}, {4}); + Output perm3 = ops::Const(s.WithOpName("perm2"), {0, 1, 2, 3}, {4}); Output transpose1 = ops::Transpose(s.WithOpName("transpose1"), inputs, perm1); Output transpose2 = ops::Transpose(s.WithOpName("transpose2"), transpose1, perm2); - Output outputs = ops::Identity(s.WithOpName("outputs"), transpose2); + Output transpose3 = ops::Transpose(s.WithOpName("transpose3"), inputs, perm3); + Output id1 = ops::Identity(s.WithOpName("id1"), transpose2); + Output id2 = ops::Identity(s.WithOpName("id2"), transpose3); GrapplerItem item; - item.fetch = {"outputs"}; + item.fetch = {"id1", "id2"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; ArithmeticOptimizer optimizer; - EnableOnlyRemoveInverseTranspose(&optimizer); + EnableOnlyRemoveIdentityTranspose(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); std::set nodes_after_optimization; @@ -903,10 +906,10 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposes) { nodes_after_optimization.insert(node.name()); } EXPECT_EQ(nodes_after_optimization, - std::set({"inputs_shape", "inputs", "outputs"})); + std::set({"id1", "id2", "inputs_shape", "inputs"})); } -TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposesMultipleOutputs) { +TEST_F(ArithmeticOptimizerTest, RemoveIdentityTransposesMultipleOutputs) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output inputs_shape = ops::Const(s.WithOpName("inputs_shape"), {8, 9, 28, 28}, {4}); @@ -927,7 +930,7 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposesMultipleOutputs) { GraphDef output; ArithmeticOptimizer optimizer; - EnableOnlyRemoveInverseTranspose(&optimizer); + EnableOnlyRemoveIdentityTranspose(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); for (const NodeDef& node : output.node()) { @@ -955,7 +958,7 @@ TEST_F(ArithmeticOptimizerTest, RemoveTransposesWithControlDependency) { GraphDef output; ArithmeticOptimizer optimizer; - EnableOnlyRemoveInverseTranspose(&optimizer); + EnableOnlyRemoveIdentityTranspose(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); NodeMap node_map(&output); @@ -983,7 +986,7 @@ TEST_F(ArithmeticOptimizerTest, NotRemoveTransposes) { GraphDef output; ArithmeticOptimizer optimizer; - EnableOnlyRemoveInverseTranspose(&optimizer); + EnableOnlyRemoveIdentityTranspose(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); EXPECT_EQ(6, output.node_size()); -- GitLab From dfe8e75a693d3d7613ce1c7e349747a070df9858 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Mar 2018 16:00:11 -0700 Subject: [PATCH 133/960] Increase kMaxEagerTensorParentSize to 64. PiperOrigin-RevId: 189407226 --- tensorflow/python/eager/pywrap_tensor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 105c09e81f..519814b979 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -163,7 +163,7 @@ PyObject* PyIntFromDataType(TF_DataType l) { extern "C" { -static const int kMaxEagerTensorParentSize = 32; +static const int kMaxEagerTensorParentSize = 64; // TODO(agarwal): store context handle in EagerTensor. typedef struct EagerTensor { -- GitLab From f06cce76e1f134a8787e28e7adc7182e24cdf1a5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 16 Mar 2018 16:07:06 -0700 Subject: [PATCH 134/960] Deleted dead code PiperOrigin-RevId: 189408200 --- tensorflow/core/grappler/costs/graph_properties.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 5aa4962072..93a722f038 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -67,12 +67,6 @@ class GraphProperties { void ClearInputProperties(const string& node_name); void ClearOutputProperties(const string& node_name); - static void FillTensorPropertiesFromContext( - const shape_inference::ShapeHandle&, const DataType&, - shape_inference::InferenceContext*, - std::unordered_map* dim_ids, - OpInfo::TensorProperties*); - private: // Merges shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. -- GitLab From 139c6ce00610b43e05855135dab987997bcfe11f Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 16 Mar 2018 16:10:46 -0700 Subject: [PATCH 135/960] Always sets self._built in tfe.metrics when build() is called. PiperOrigin-RevId: 189408745 --- tensorflow/contrib/eager/python/metrics_impl.py | 12 ++++++++++++ tensorflow/contrib/eager/python/metrics_test.py | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index 1490c2ccac..2f2347736a 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -109,6 +109,18 @@ class Metric(checkpointable.CheckpointableBase): pos = scope.name.rfind(scope_name) self._name = name + scope.name[pos + len(scope_name):] self._scope = scope + + # Ensures that if the user calls build directly we still set self._built to + # True to prevent variables from being recreated. + self._build = self.build + + def actual_build(*args, **kwargs): + self._build(*args, **kwargs) + self._built = True + self.build = actual_build + self.build.__doc__ = self._build.__doc__ + + # Captures construction scope for proper initialization. if context.executing_eagerly(): self._construction_scope = context.eager_mode else: diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index 6b5450ba89..15ac889191 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -195,6 +195,15 @@ class MetricsTest(test.TestCase): m2 = metrics.Mean() m2(2) + def testBuildMean(self): + # Verify that calling build() on Mean and then calling it won't recreate + # variables. + m = metrics.Mean() + m.build() + old_numer = m.numer + m(0.0) + self.assertTrue(old_numer is m.numer) + def testMetricsChain(self): with context.graph_mode(), self.test_session(): m1 = metrics.Mean() -- GitLab From 71728ef0eb2a823cf77ac22ec45b76d4c10196de Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 16 Mar 2018 16:27:02 -0700 Subject: [PATCH 136/960] Relax "_output_shapes" error checking in C++ graph importer. This is to make the behavior inline with what Python's import_graph_def does. It'd probably be better to raise an error, but some people are already depending on the import_graph_def behavior in order to import modified ops. PiperOrigin-RevId: 189411025 --- tensorflow/core/graph/graph_constructor.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 627309078a..76ee88e684 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -568,13 +568,22 @@ Status GraphConstructor::ValidateShape(Node* node) { auto* ic = refiner_->GetContext(node); DCHECK(ic != nullptr) << "ShapeRefiner::AddNode() should have created the InferenceContext"; - if (shape_attrs.size() != node->num_outputs()) { + if (shape_attrs.size() < node->num_outputs()) { return errors::InvalidArgument( "Node '", node->name(), "' has ", node->num_outputs(), " outputs but the ", kAttrName, " attribute specifies shapes for ", shape_attrs.size(), " outputs"); } - for (int i = 0; i < shape_attrs.size(); ++i) { + // NOTE(skyewm): we don't raise an error here because some users depend on + // this behavior, even though it's unsafe. + // TODO(b/74619486): raise an error. + if (shape_attrs.size() > node->num_outputs()) { + LOG(WARNING) << "Node '" << node->name() << "' has " << node->num_outputs() + << " outputs but the " << kAttrName + << " attribute specifies shapes for " << shape_attrs.size() + << " outputs. Output shapes may be inaccurate."; + } + for (int i = 0; i < node->num_outputs(); ++i) { const TensorShapeProto& p = shape_attrs[i]; shape_inference::ShapeHandle h; Status s = ic->MakeShapeFromShapeProto(p, &h); -- GitLab From bd33984641fda2f892b77bb2a1ac8c33c7a2211a Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 16 Mar 2018 17:00:17 -0700 Subject: [PATCH 137/960] TFTS: Allow cold-starting from SavedModels This means the model starts from its default start state and is fed a series (filtering) to warm up its state. This warmed up state can then be used to make predictions. Some shape fiddling with the receiver_fn to make feeding state optional, and a new signature for cold-starting which uses the model's default start state. Some other shape fiddling to make feeding strings to SavedModels work more smoothly in the cold-start part of the LSTM example. I was squeezing out the last dimension of "scalar" exogenous features, now I'm leaving them, which matches the placeholder generation logic. PiperOrigin-RevId: 189414869 --- .../timeseries/examples/known_anomaly.py | 17 ++--- .../contrib/timeseries/examples/lstm.py | 22 ++++++- .../python/timeseries/estimators.py | 40 ++++++++---- .../python/timeseries/feature_keys.py | 1 + .../timeseries/python/timeseries/head.py | 11 +++- .../python/timeseries/input_pipeline.py | 3 +- .../python/timeseries/saved_model_utils.py | 62 +++++++++++++++++-- 7 files changed, 126 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly.py b/tensorflow/contrib/timeseries/examples/known_anomaly.py index c08c0b0acb..e77628ddd3 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly.py @@ -53,6 +53,15 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): one_hot_feature = tf.feature_column.indicator_column( categorical_column=string_feature) + def _exogenous_update_condition(times, features): + del times # unused + # Make exogenous updates sparse by setting an update condition. This in + # effect allows missing exogenous features: if the condition evaluates to + # False, no update is performed. Otherwise we sometimes end up with "leaky" + # updates which add unnecessary uncertainty to the model even when there is + # no changepoint. + return tf.equal(tf.squeeze(features["is_changepoint"], axis=-1), "yes") + estimator = tf.contrib.timeseries.StructuralEnsembleRegressor( periodicities=12, # Extract a smooth period by constraining the number of latent values @@ -60,13 +69,7 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): cycle_num_latent_values=3, num_features=1, exogenous_feature_columns=[one_hot_feature], - # Make exogenous updates sparse by setting an update condition. This in - # effect allows missing exogenous features: if the condition evaluates to - # False, no update is performed. Otherwise we sometimes end up with - # "leaky" updates which add unnecessary uncertainty to the model even when - # there is no changepoint. - exogenous_update_condition= - lambda times, features: tf.equal(features["is_changepoint"], "yes")) + exogenous_update_condition=_exogenous_update_condition) reader = tf.contrib.timeseries.CSVReader( csv_file_name, # Indicate the format of our CSV file. First we have two standard columns, diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py index 2eee878196..b1c7475442 100644 --- a/tensorflow/contrib/timeseries/examples/lstm.py +++ b/tensorflow/contrib/timeseries/examples/lstm.py @@ -236,20 +236,36 @@ def train_and_predict( [evaluation["mean"][0], predictions["mean"]], axis=0)) all_times = numpy.concatenate([times, predictions["times"]], axis=0) - # Export the model in SavedModel format. + # Export the model in SavedModel format. We include a bit of extra boilerplate + # for "cold starting" as if we didn't have any state from the Estimator, which + # is the case when serving from a SavedModel. If Estimator output is + # available, the result of "Estimator.evaluate" can be passed directly to + # `tf.contrib.timeseries.saved_model_utils.predict_continuation` as the + # `continue_from` argument. + with tf.Graph().as_default(): + filter_feature_tensors, _ = evaluation_input_fn() + with tf.train.MonitoredSession() as session: + # Fetch the series to "warm up" our state, which will allow us to make + # predictions for its future values. This is just a dictionary of times, + # values, and exogenous features mapping to numpy arrays. The use of an + # input_fn is just a convenience for the example; they can also be + # specified manually. + filter_features = session.run(filter_feature_tensors) if export_directory is None: export_directory = tempfile.mkdtemp() input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel( export_directory, input_receiver_fn) - # Predict using the SavedModel + # Warm up and predict using the SavedModel with tf.Graph().as_default(): with tf.Session() as session: signatures = tf.saved_model.loader.load( session, [tf.saved_model.tag_constants.SERVING], export_location) + state = tf.contrib.timeseries.saved_model_utils.cold_start_filter( + signatures=signatures, session=session, features=filter_features) saved_model_output = ( tf.contrib.timeseries.saved_model_utils.predict_continuation( - continue_from=evaluation, signatures=signatures, + continue_from=state, signatures=signatures, session=session, steps=100, exogenous_features=predict_exogenous_features)) # The exported model gives the same results as the Estimator.predict() diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index 8d13343e82..469cea4fd2 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -33,9 +33,11 @@ from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.training import training as train +from tensorflow.python.util import nest class TimeSeriesRegressor(estimator_lib.Estimator): @@ -98,11 +100,11 @@ class TimeSeriesRegressor(estimator_lib.Estimator): def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" placeholders = {} - placeholders[feature_keys.TrainEvalFeatures.TIMES] = ( - array_ops.placeholder( - name=feature_keys.TrainEvalFeatures.TIMES, - dtype=dtypes.int64, - shape=[default_batch_size, default_series_length])) + time_placeholder = array_ops.placeholder( + name=feature_keys.TrainEvalFeatures.TIMES, + dtype=dtypes.int64, + shape=[default_batch_size, default_series_length]) + placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder # Values are only necessary when filtering. For prediction the default # value will be ignored. placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( @@ -145,15 +147,29 @@ class TimeSeriesRegressor(estimator_lib.Estimator): # use only static metadata from the returned Tensors. with ops.Graph().as_default(): self._model.initialize_graph() - model_start_state = self._model.get_start_state() - for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary( - model_start_state).items(): + # Evaluate the initial state as same-dtype "zero" values. These zero + # constants aren't used, but are necessary for feeding to + # placeholder_with_default for the "cold start" case where state is not + # fed to the model. + def _zeros_like_constant(tensor): + return tensor_util.constant_value(array_ops.zeros_like(tensor)) + start_state = nest.map_structure( + _zeros_like_constant, self._model.get_start_state()) + batch_size_tensor = array_ops.shape(time_placeholder)[0] + for prefixed_state_name, state in ts_head_lib.state_to_dictionary( + start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( - (default_batch_size,)).concatenate(state_tensor.get_shape()) - placeholders[prefixed_state_name] = array_ops.placeholder( + (default_batch_size,)).concatenate(state.shape) + default_state_broadcast = array_ops.tile( + state[None, ...], + multiples=array_ops.concat( + [batch_size_tensor[None], + array_ops.ones(len(state.shape), dtype=dtypes.int32)], + axis=0)) + placeholders[prefixed_state_name] = array_ops.placeholder_with_default( + input=default_state_broadcast, name=prefixed_state_name, - shape=state_shape_with_batch, - dtype=state_tensor.dtype) + shape=state_shape_with_batch) return export_lib.ServingInputReceiver(placeholders, placeholders) return _serving_input_receiver_fn diff --git a/tensorflow/contrib/timeseries/python/timeseries/feature_keys.py b/tensorflow/contrib/timeseries/python/timeseries/feature_keys.py index 970b9aa8ac..56566ee2e3 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/feature_keys.py +++ b/tensorflow/contrib/timeseries/python/timeseries/feature_keys.py @@ -72,3 +72,4 @@ class SavedModelLabels(object): """Names of signatures exported with export_savedmodel.""" PREDICT = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY FILTER = "filter" + COLD_START_FILTER = "cold_start_filter" diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py index f4d9351432..3d7e615290 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -150,6 +150,12 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc with variable_scope.variable_scope("model", reuse=True): filtering_outputs = self.create_loss( features, estimator_lib.ModeKeys.EVAL) + with variable_scope.variable_scope("model", reuse=True): + no_state_features = { + k: v for k, v in features.items() + if not k.startswith(feature_keys.State.STATE_PREFIX)} + cold_filtering_outputs = self.create_loss( + no_state_features, estimator_lib.ModeKeys.EVAL) return estimator_lib.EstimatorSpec( mode=estimator_lib.ModeKeys.PREDICT, export_outputs={ @@ -157,7 +163,10 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc export_lib.PredictOutput(prediction_outputs), feature_keys.SavedModelLabels.FILTER: export_lib.PredictOutput( - state_to_dictionary(filtering_outputs.end_state)) + state_to_dictionary(filtering_outputs.end_state)), + feature_keys.SavedModelLabels.COLD_START_FILTER: + export_lib.PredictOutput( + state_to_dictionary(cold_filtering_outputs.end_state)) }, # Likely unused, but it is necessary to return `predictions` to satisfy # the Estimator's error checking. diff --git a/tensorflow/contrib/timeseries/python/timeseries/input_pipeline.py b/tensorflow/contrib/timeseries/python/timeseries/input_pipeline.py index 04225333b9..403c6e2cb4 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/input_pipeline.py +++ b/tensorflow/contrib/timeseries/python/timeseries/input_pipeline.py @@ -492,8 +492,7 @@ class CSVReader(ReaderBaseTimeSeriesParser): features_lists.setdefault(column_name, []).append(value) features = {} for column_name, values in features_lists.items(): - if (len(values) == 1 and - column_name != feature_keys.TrainEvalFeatures.VALUES): + if column_name == feature_keys.TrainEvalFeatures.TIMES: features[column_name] = values[0] else: features[column_name] = array_ops.stack(values, axis=1) diff --git a/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py b/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py index 97f6d36a87..0461abdc19 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py @@ -15,6 +15,7 @@ """Convenience functions for working with time series saved_models. @@predict_continuation +@@cold_start_filter @@filter_continuation """ @@ -30,10 +31,12 @@ from tensorflow.contrib.timeseries.python.timeseries import model_utils as _mode from tensorflow.python.util.all_util import remove_undocumented -def _colate_features_to_feeds_and_fetches(continue_from, signature, features, - graph): +def _colate_features_to_feeds_and_fetches(signature, features, graph, + continue_from=None): """Uses a saved model signature to construct feed and fetch dictionaries.""" - if _feature_keys.FilteringResults.STATE_TUPLE in continue_from: + if continue_from is None: + state_values = {} + elif _feature_keys.FilteringResults.STATE_TUPLE in continue_from: # We're continuing from an evaluation, so we need to unpack/flatten state. state_values = _head.state_to_dictionary( continue_from[_feature_keys.FilteringResults.STATE_TUPLE]) @@ -115,6 +118,55 @@ def predict_continuation(continue_from, return output +def cold_start_filter(signatures, session, features): + """Perform filtering using an exported saved model. + + Filtering refers to updating model state based on new observations. + Predictions based on the returned model state will be conditioned on these + observations. + + Starts from the model's default/uninformed state. + + Args: + signatures: The `MetaGraphDef` protocol buffer returned from + `tf.saved_model.loader.load`. Used to determine the names of Tensors to + feed and fetch. Must be from the same model as `continue_from`. + session: The session to use. The session's graph must be the one into which + `tf.saved_model.loader.load` loaded the model. + features: A dictionary mapping keys to Numpy arrays, with several possible + shapes (requires keys `FilteringFeatures.TIMES` and + `FilteringFeatures.VALUES`): + Single example; `TIMES` is a scalar and `VALUES` is either a scalar or a + vector of length [number of features]. + Sequence; `TIMES` is a vector of shape [series length], `VALUES` either + has shape [series length] (univariate) or [series length x number of + features] (multivariate). + Batch of sequences; `TIMES` is a vector of shape [batch size x series + length], `VALUES` has shape [batch size x series length] or [batch + size x series length x number of features]. + In any case, `VALUES` and any exogenous features must have their shapes + prefixed by the shape of the value corresponding to the `TIMES` key. + Returns: + A dictionary containing model state updated to account for the observations + in `features`. + """ + filter_signature = signatures.signature_def[ + _feature_keys.SavedModelLabels.COLD_START_FILTER] + features = _input_pipeline._canonicalize_numpy_data( # pylint: disable=protected-access + data=features, + require_single_batch=False) + output_tensors_by_name, feed_dict = _colate_features_to_feeds_and_fetches( + signature=filter_signature, + features=features, + graph=session.graph) + output = session.run(output_tensors_by_name, feed_dict=feed_dict) + # Make it easier to chain filter -> predict by keeping track of the current + # time. + output[_feature_keys.FilteringResults.TIMES] = features[ + _feature_keys.FilteringFeatures.TIMES] + return output + + def filter_continuation(continue_from, signatures, session, features): """Perform filtering using an exported saved model. @@ -124,8 +176,8 @@ def filter_continuation(continue_from, signatures, session, features): Args: continue_from: A dictionary containing the results of either an Estimator's - evaluate method or a previous filter_continuation. Used to determine the - model state to start filtering from. + evaluate method or a previous filter step (cold start or + continuation). Used to determine the model state to start filtering from. signatures: The `MetaGraphDef` protocol buffer returned from `tf.saved_model.loader.load`. Used to determine the names of Tensors to feed and fetch. Must be from the same model as `continue_from`. -- GitLab From e3930fc11f042416a34ed5526bc506e1e0e32660 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 16 Mar 2018 17:05:51 -0700 Subject: [PATCH 138/960] Add user_ops.my_fact to the new TensorFlow API. PiperOrigin-RevId: 189415577 --- tensorflow/python/BUILD | 1 + tensorflow/python/user_ops/user_ops.py | 2 ++ tensorflow/tools/api/generator/BUILD | 1 + tensorflow/tools/api/tests/api_compatibility_test.py | 11 ----------- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a206685af6..9a29986c3b 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2585,6 +2585,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":user_ops_gen", + ":util", "@six_archive//:six", ], ) diff --git a/tensorflow/python/user_ops/user_ops.py b/tensorflow/python/user_ops/user_ops.py index 6f9b5d92bb..20ea3b0f62 100644 --- a/tensorflow/python/user_ops/user_ops.py +++ b/tensorflow/python/user_ops/user_ops.py @@ -23,8 +23,10 @@ from tensorflow.python.ops import gen_user_ops as _gen_user_ops # go/tf-wildcard-import from tensorflow.python.ops.gen_user_ops import * # pylint: disable=wildcard-import +from tensorflow.python.util.tf_export import tf_export +@tf_export('user_ops.my_fact') def my_fact(): """Example of overriding the generated code for an Op.""" return _gen_user_ops.fact() diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD index 14ce8dbeb3..d9b0260c9f 100644 --- a/tensorflow/tools/api/generator/BUILD +++ b/tensorflow/tools/api/generator/BUILD @@ -127,6 +127,7 @@ genrule( "api/test/__init__.py", "api/train/__init__.py", "api/train/queue_runner/__init__.py", + "api/user_ops/__init__.py", ], cmd = "$(location create_python_api) $(OUTS)", tools = ["create_python_api"], diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 5268bba3cc..99c47fd601 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -268,17 +268,6 @@ class ApiCompatibilityTest(test.TestCase): for filename in golden_file_list } - # user_ops is an empty module. It is currently available in TensorFlow API - # but we don't keep empty modules in the new API. - # We delete user_ops from golden_proto_dict to make sure assert passes - # when diffing new API against goldens. - # TODO(annarev): remove user_ops from goldens once we switch to new API. - tf_module = golden_proto_dict['tensorflow'].tf_module - for i in range(len(tf_module.member)): - if tf_module.member[i].name == 'user_ops': - del tf_module.member[i] - break - # Diff them. Do not fail if called with update. # If the test is run to update goldens, only report diffs but do not fail. self._AssertProtoDictEquals( -- GitLab From 85087076d1e0a50a96870643e1413720ba77403f Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 16 Mar 2018 17:10:16 -0700 Subject: [PATCH 139/960] Automated g4 rollback of changelist 189228094 PiperOrigin-RevId: 189416074 --- tensorflow/python/client/session.py | 19 --------------- tensorflow/python/client/session_test.py | 31 ------------------------ 2 files changed, 50 deletions(-) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 29f06c8f22..924d62992a 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -21,7 +21,6 @@ from __future__ import print_function import functools import re import threading -import warnings import numpy as np @@ -1625,9 +1624,6 @@ class InteractiveSession(BaseSession): ``` """ - _count_lock = threading.Lock() - _active_session_count = 0 # GUARDED_BY(_count_lock) - def __init__(self, target='', graph=None, config=None): """Creates a new interactive TensorFlow session. @@ -1656,15 +1652,6 @@ class InteractiveSession(BaseSession): config.graph_options.place_pruned_graph = True super(InteractiveSession, self).__init__(target, graph, config) - with InteractiveSession._count_lock: - if InteractiveSession._active_session_count > 0: - warnings.warn('An interactive session is already active. This can ' - 'cause out-of-memory errors in some cases. You must ' - 'explicitly call `InteractiveSession.close()` to release ' - 'resources held by the other session(s).') - InteractiveSession._active_session_count += 1 - self._closed = False - self._default_session = self.as_default() self._default_session.enforce_nesting = False self._default_session.__enter__() @@ -1677,12 +1664,6 @@ class InteractiveSession(BaseSession): def close(self): """Closes an `InteractiveSession`.""" super(InteractiveSession, self).close() - with InteractiveSession._count_lock: - if not self._closed: - InteractiveSession._active_session_count -= 1 - self._closed = True - else: - return if self._explicit_graph is not None: self._default_graph.__exit__(None, None, None) self._default_session.__exit__(None, None, None) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 3b12e06f43..3bf2a9e4dd 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -22,7 +22,6 @@ import os import sys import threading import time -import warnings import numpy as np import six @@ -66,10 +65,6 @@ ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape) # @test_util.with_c_api class SessionTest(test_util.TensorFlowTestCase): - def setUp(self): - super(SessionTest, self).setUp() - warnings.simplefilter('always') - def testUseExistingGraph(self): with ops.Graph().as_default() as g, ops.device('/cpu:0'): a = constant_op.constant(6.0, shape=[1, 1]) @@ -1195,32 +1190,6 @@ class SessionTest(test_util.TensorFlowTestCase): self.assertAllEqual([[24.0]], e.eval()) sess.close() - def testMultipleInteractiveSessionsWarning(self): - # Reinitialize the global state to ensure that the expected warnings will - # be emitted. - session.InteractiveSession._active_session_count = 0 # pylint: disable=protected-access - - sess = session.InteractiveSession() - sess.close() - # Opening and closing interactive sessions serially should not warn. - with warnings.catch_warnings(record=True) as w: - sess = session.InteractiveSession() - sess.close() - self.assertEqual(0, len(w)) - - with warnings.catch_warnings(record=True) as w: - sess = session.InteractiveSession() - self.assertEqual(0, len(w)) - with warnings.catch_warnings(record=True) as w: - sess2 = session.InteractiveSession() - self.assertEqual(1, len(w)) - self.assertTrue('An interactive session is already active. This can cause ' - 'out-of-memory errors in some cases. You must explicitly ' - 'call `InteractiveSession.close()` to release resources ' - 'held by the other session(s).' in str(w[0].message)) - sess2.close() - sess.close() - def testInteractivePlacePrunedGraph(self): sess = session.InteractiveSession() -- GitLab From dc492520e408a9f3f771ec960d3c4db96be42265 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Sat, 17 Mar 2018 10:14:20 +0800 Subject: [PATCH 140/960] Fix incorrect link of checkpoint files in CNN tutorials (#17776) --- tensorflow/docs_src/tutorials/deep_cnn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md index 6797540204..6a4c9a9b07 100644 --- a/tensorflow/docs_src/tutorials/deep_cnn.md +++ b/tensorflow/docs_src/tutorials/deep_cnn.md @@ -268,7 +268,7 @@ in `cifar10_input.py`. `cifar10_train.py` periodically @{tf.train.Saver$saves} all model parameters in -@{$variables#saving-and-restoring$checkpoint files} +@{$programmers_guide/saved_model$checkpoint files} but it does *not* evaluate the model. The checkpoint file will be used by `cifar10_eval.py` to measure the predictive performance (see [Evaluating a Model](#evaluating-a-model) below). -- GitLab From d8315b74750683cec1758149afffa1ec9213120f Mon Sep 17 00:00:00 2001 From: Raghuraman Krishnamoorthi Date: Fri, 16 Mar 2018 19:26:56 -0700 Subject: [PATCH 141/960] Update docs for fake quant to reflect support for bitwidths from 2 to 16 inclusive. PiperOrigin-RevId: 189426173 --- .../core/api_def/base_api/api_def_FakeQuantWithMinMaxArgs.pbtxt | 2 +- .../core/api_def/base_api/api_def_FakeQuantWithMinMaxVars.pbtxt | 2 +- .../base_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt | 2 +- .../api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxArgs.pbtxt b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxArgs.pbtxt index 561c86ddf6..599bbce65f 100644 --- a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxArgs.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxArgs.pbtxt @@ -6,7 +6,7 @@ Attributes `[min; max]` define the clamping range for the `inputs` data. `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and then de-quantized and output as floats in `[min; max]` interval. -`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +`num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. Quantization is called fake since the output is still in floating point. END diff --git a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVars.pbtxt b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVars.pbtxt index 2713c01b27..1976ffb8aa 100644 --- a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVars.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVars.pbtxt @@ -8,7 +8,7 @@ and `max` to 'outputs' tensor of same shape as `inputs`. `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and then de-quantized and output as floats in `[min; max]` interval. -`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +`num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. This operation has a gradient and thus allows for training `min` and `max` values. diff --git a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt index e293d4d084..c0fac6a445 100644 --- a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt @@ -9,7 +9,7 @@ to 'outputs' tensor of same shape as `inputs`. `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and then de-quantized and output as floats in `[min; max]` interval. -`num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +`num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. This operation has a gradient and thus allows for training `min` and `max` values. diff --git a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt index 8a4ab368b5..2051903f6d 100644 --- a/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt @@ -40,7 +40,7 @@ END attr { name: "num_bits" description: < Date: Sat, 17 Mar 2018 11:44:41 +0900 Subject: [PATCH 142/960] Fix typo (#17759) * fix typo --- tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 272410c693..7651a03fe5 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -398,7 +398,7 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNorms) { } TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { - // Test axis is not 3, so all weigths and offsets are fused to each of inputs + // Test axis is not 3, so all weights and offsets are fused to each of inputs // of conv2d. TestFoldFusedBatchNormsWithConcat(/*split=*/true); // Test axis = 3, BatchNorm weights and offsets will be split before fused -- GitLab From c941c087a9dfd5b27eff00ead928c9ee208e9a35 Mon Sep 17 00:00:00 2001 From: Giovanni Terlingen Date: Sat, 17 Mar 2018 04:33:26 +0100 Subject: [PATCH 143/960] Add missing linking libs for iOS (#17764) * Add missing linking libs for iOS --- tensorflow/examples/ios/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/ios/README.md b/tensorflow/examples/ios/README.md index 5bdaeb43ce..5d7bd36837 100644 --- a/tensorflow/examples/ios/README.md +++ b/tensorflow/examples/ios/README.md @@ -119,11 +119,13 @@ rundown: `tensorflow/contrib/makefile/gen/lib` to the Library Search Paths setting. - You'll also need to add `libprotobuf.a` and `libprotobuf-lite.a` from - `tensorflow/contrib/makefile/gen/protobuf_ios/lib` to your _Build Stages_ and - _Library Search Paths_. + `tensorflow/contrib/makefile/gen/protobuf_ios/lib` + and `nsync.a` from `tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11` + to your _Build Stages_ and _Library Search Paths_. - The _Header Search_ paths needs to contain: - the root folder of tensorflow, + - `tensorflow/contrib/makefile/downloads/nsync/public` - `tensorflow/contrib/makefile/downloads/protobuf/src` - `tensorflow/contrib/makefile/downloads`, - `tensorflow/contrib/makefile/downloads/eigen`, and -- GitLab From a14114177ffec4f0c8665db60bec8819aade0c81 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Fri, 16 Mar 2018 22:58:50 -0700 Subject: [PATCH 144/960] Update version strings for 1.7.0-rc1 (#17786) --- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 14 ++++++------ tensorflow/tools/pip_package/setup.py | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 15082bb337..40eebd1db0 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 733c7a6625..9059b3f3b6 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 421215f367..2e47a6d212 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 7758520c50..eff066d200 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.7.0-rc0 + 1.7.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.7.0-rc0 + 1.7.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.7.0-rc0 + 1.7.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.7.0-rc0 + 1.7.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0-rc0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -

javac -cp libtensorflow-1.7.0-rc0.jar HelloTF.java
+
javac -cp libtensorflow-1.7.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.7.0-rc0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.7.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.7.0-rc0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.7.0-rc1.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index f4d4e65548..378946b459 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -189,7 +189,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -294,7 +294,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -647,14 +647,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -666,14 +666,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -685,14 +685,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -704,14 +704,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 055a463718..fa6951a8f1 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl @@ -523,7 +523,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
 
@@ -531,5 +531,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 10840295f9..0454c172f8 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.7.0rc0 on Linux: +for TensorFlow 1.7.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc1-py2-none-any.whl
 
## Validate your installation @@ -459,8 +459,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- - + + @@ -480,7 +480,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
- + @@ -495,8 +495,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
- - + + diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 69825a0d7c..7a3184d64d 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.7.0-rc0' +_VERSION = '1.7.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From dd2d558714ccbf24e180f075746bdc4bbb745b97 Mon Sep 17 00:00:00 2001 From: Aghasy Date: Sat, 17 Mar 2018 17:27:24 +0400 Subject: [PATCH 145/960] fix nested scope issue --- tensorflow/core/platform/default/logging.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index f0efa31d55..2c134f1be9 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -64,11 +64,11 @@ class LogMessageFatal : public LogMessage { }; #define _TF_LOG_INFO \ - ::tensorflow::internal::LogMessage(__FILE__, __LINE__, tensorflow::INFO) + ::tensorflow::internal::LogMessage(__FILE__, __LINE__, ::tensorflow::INFO) #define _TF_LOG_WARNING \ - ::tensorflow::internal::LogMessage(__FILE__, __LINE__, tensorflow::WARNING) + ::tensorflow::internal::LogMessage(__FILE__, __LINE__, ::tensorflow::WARNING) #define _TF_LOG_ERROR \ - ::tensorflow::internal::LogMessage(__FILE__, __LINE__, tensorflow::ERROR) + ::tensorflow::internal::LogMessage(__FILE__, __LINE__, ::tensorflow::ERROR) #define _TF_LOG_FATAL \ ::tensorflow::internal::LogMessageFatal(__FILE__, __LINE__) -- GitLab From 6a0b4e177620626596c610f129a66233ffb6f5af Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Sat, 17 Mar 2018 09:57:03 -0700 Subject: [PATCH 146/960] [XLA] Fix points-to set calculation in HLO ListScheduler. Previously the list scheduler considered that an instruction used only the buffers defined by its operands. This is inaccurate in the presence of aliasing?an instruction may potentially use anything in the points-to set of the operand, including buffers defined by an ancestor of an operand. Change to use the full points-to set instead. PiperOrigin-RevId: 189460681 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_ordering_test.cc | 47 -------- .../compiler/xla/service/hlo_scheduling.cc | 9 +- .../xla/service/hlo_scheduling_test.cc | 102 ++++++++++++++++++ 4 files changed, 108 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index fba20c94ca..43c56484ea 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1129,6 +1129,7 @@ tf_cc_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index 441d790f0e..37a7fbad97 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -34,53 +34,6 @@ namespace { class HloOrderingTest : public HloTestBase {}; -TEST_F(HloOrderingTest, LastUseScheduledFirst) { - // Tests scheduling of the following HLO code: - // - // %ab = abs(%param) - // %exp = exp(%param) - // %add = add(%ab, %exp) - // %negate = negate(%exp) - // %sub = subtract(%add, %negate) - // - // %add should be scheduled before %negate because %add is the last (and only) - // use of %ab. Scheduling %add first then frees up %ab's buffer. - const Shape vec = ShapeUtil::MakeShape(xla::F32, {42}); - auto builder = HloComputation::Builder(TestName()); - auto param = - builder.AddInstruction(HloInstruction::CreateParameter(0, vec, "param")); - auto ab = builder.AddInstruction( - HloInstruction::CreateUnary(vec, HloOpcode::kAbs, param)); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vec, HloOpcode::kExp, param)); - - auto add = builder.AddInstruction( - HloInstruction::CreateBinary(vec, HloOpcode::kAdd, ab, exp)); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vec, HloOpcode::kNegate, exp)); - auto sub = builder.AddInstruction( - HloInstruction::CreateBinary(vec, HloOpcode::kSubtract, add, negate)); - - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - - TF_ASSERT_OK_AND_ASSIGN( - SequentialHloOrdering::HloModuleSequence sequence, - CreateMemoryMinimizingSequence(*module, [](const LogicalBuffer& buffer) { - return ShapeUtil::ByteSizeOf(buffer.shape()); - })); - // Verify that all instructions are in the sequence. - EXPECT_EQ(module->entry_computation()->instruction_count(), - sequence.at(module->entry_computation()).size()); - - // The first instruction should be the parameter and the last the root "sub". - EXPECT_EQ(param, sequence.at(module->entry_computation()).front()); - EXPECT_EQ(sub, sequence.at(module->entry_computation()).back()); - - SequentialHloOrdering ordering(module.get(), sequence); - EXPECT_TRUE(ordering.ExecutesBefore(add, negate)); -} - TEST_F(HloOrderingTest, InstructionsInDifferentComputations) { // Tests the ordering of instructions in different computations using the // following HLO code: diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index da448ed71a..099dd8dd8e 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -103,10 +103,11 @@ class ListScheduler { for (auto* instruction : computation.instructions()) { tensorflow::gtl::FlatSet instr_uses; for (auto* operand : instruction->operands()) { - for (const LogicalBuffer* buffer : - points_to_analysis.GetBuffersDefinedByInstruction(operand)) { - instr_uses.insert(buffer); - } + points_to_analysis.GetPointsToSet(operand).ForEachElement( + [&](const ShapeIndex& /*index*/, + const PointsToSet::BufferList& buffers) { + instr_uses.insert(buffers.begin(), buffers.end()); + }); } buffer_uses_[instruction] = std::vector( instr_uses.begin(), instr_uses.end()); diff --git a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc index 7fb338e704..2dd6e43851 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_ordering.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -89,5 +90,106 @@ TEST_F(MinimumMemoryForSequenceTest, MultiComputation) { MinimumMemoryForSequence(module_sequence, size_fn).ValueOrDie()); } +class HloSchedulingTest : public HloTestBase {}; + +TEST_F(HloSchedulingTest, LastUseScheduledFirst) { + // Tests scheduling of the following HLO code: + // + // %ab = abs(%param) + // %exp = exp(%param) + // %add = add(%ab, %exp) + // %negate = negate(%exp) + // %sub = subtract(%add, %negate) + // + // %add should be scheduled before %negate because %add is the last (and only) + // use of %ab. Scheduling %add first then frees up %ab's buffer. + const Shape vec = ShapeUtil::MakeShape(xla::F32, {42}); + auto builder = HloComputation::Builder(TestName()); + auto param = + builder.AddInstruction(HloInstruction::CreateParameter(0, vec, "param")); + auto ab = builder.AddInstruction( + HloInstruction::CreateUnary(vec, HloOpcode::kAbs, param)); + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(vec, HloOpcode::kExp, param)); + + auto add = builder.AddInstruction( + HloInstruction::CreateBinary(vec, HloOpcode::kAdd, ab, exp)); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vec, HloOpcode::kNegate, exp)); + auto sub = builder.AddInstruction( + HloInstruction::CreateBinary(vec, HloOpcode::kSubtract, add, negate)); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + TF_ASSERT_OK_AND_ASSIGN( + SequentialHloOrdering::HloModuleSequence sequence, + CreateMemoryMinimizingSequence(*module, [](const LogicalBuffer& buffer) { + return ShapeUtil::ByteSizeOf(buffer.shape()); + })); + // Verify that all instructions are in the sequence. + EXPECT_EQ(module->entry_computation()->instruction_count(), + sequence.at(module->entry_computation()).size()); + + // The first instruction should be the parameter and the last the root "sub". + EXPECT_EQ(param, sequence.at(module->entry_computation()).front()); + EXPECT_EQ(sub, sequence.at(module->entry_computation()).back()); + + SequentialHloOrdering ordering(module.get(), sequence); + EXPECT_TRUE(ordering.ExecutesBefore(add, negate)); +} + +TEST_F(HloSchedulingTest, ListSchedulerHandlesAliasing) { + const char* module_str = R"( +HloModule test_aliasing_module + +ENTRY root { + param = s32[1000] parameter(0) + p0 = s32[1000] copy(param) + p1 = s32[1000] copy(param) + t = (s32[1000], s32[1000]) tuple(p0, p1) + a = s32[1000] get-tuple-element(t), index=0 + b = s32[1000] get-tuple-element(t), index=1 + c = s32[1000] add(a, b) + d = s32[1000] add(c, b) + e = s32[1000] add(c, c) + f = s32[1000] add(e, e) + ROOT result = (s32[1000], s32[1000], s32[1000]) tuple(d, e, f) +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(module_str)); + + auto size_fn = [](const LogicalBuffer& buffer) { + return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); + }; + TF_ASSERT_OK_AND_ASSIGN( + SequentialHloOrdering::HloModuleSequence sequence, + CreateMemoryMinimizingSequence(*module, size_fn, + SchedulerAlgorithm::kListSchedule)); + // Verify that all instructions are in the sequence. + EXPECT_EQ(module->entry_computation()->instruction_count(), + sequence.at(module->entry_computation()).size()); + + std::unordered_map instructions_by_name; + for (const HloInstruction* instruction : + sequence.at(module->entry_computation())) { + instructions_by_name[instruction->name()] = instruction; + } + + // The first instruction should be the parameter and the last the root. + EXPECT_EQ(instructions_by_name.at("param"), + sequence.at(module->entry_computation()).front()); + EXPECT_EQ(instructions_by_name.at("result"), + sequence.at(module->entry_computation()).back()); + + // Instructions "d" and "e" will both be schedulable at the same time, but + // instruction "d" allows us to free the buffer of "p1", so the list scheduler + // should prefer it. + SequentialHloOrdering ordering(module.get(), sequence); + EXPECT_TRUE(ordering.ExecutesBefore(instructions_by_name.at("d"), + instructions_by_name.at("e"))); +} + } // namespace } // namespace xla -- GitLab From 6e20f3bdbdaf9bae2a67ee9cc9728963bc8b563f Mon Sep 17 00:00:00 2001 From: Piotr Czapla Date: Sat, 17 Mar 2018 19:16:21 +0100 Subject: [PATCH 147/960] Added training parameter to batch_normalization (#16134) * Added training parameter to batch_normalization According to the docs the batch_normalization layer does not work properly if the parameter is not set correctly. * Clean up multiple tests for training mode --- tensorflow/examples/learn/resnet.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/examples/learn/resnet.py b/tensorflow/examples/learn/resnet.py index 9542e55250..c00de932a8 100755 --- a/tensorflow/examples/learn/resnet.py +++ b/tensorflow/examples/learn/resnet.py @@ -53,6 +53,8 @@ def res_net_model(features, labels, mode): ndim = int(sqrt(input_shape[1])) x = tf.reshape(x, [-1, ndim, ndim, 1]) + training = (mode == tf.estimator.ModeKeys.TRAIN) + # First convolution expands to 64 channels with tf.variable_scope('conv_layer1'): net = tf.layers.conv2d( @@ -60,7 +62,7 @@ def res_net_model(features, labels, mode): filters=64, kernel_size=7, activation=tf.nn.relu) - net = tf.layers.batch_normalization(net) + net = tf.layers.batch_normalization(net, training=training) # Max pool net = tf.layers.max_pooling2d( @@ -88,7 +90,7 @@ def res_net_model(features, labels, mode): kernel_size=1, padding='valid', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) with tf.variable_scope(name + '/conv_bottleneck'): conv = tf.layers.conv2d( @@ -97,7 +99,7 @@ def res_net_model(features, labels, mode): kernel_size=3, padding='same', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) # 1x1 convolution responsible for restoring dimension with tf.variable_scope(name + '/conv_out'): @@ -108,7 +110,7 @@ def res_net_model(features, labels, mode): kernel_size=1, padding='valid', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) # shortcut connections that turn the network into its counterpart # residual function (identity shortcut) @@ -154,7 +156,7 @@ def res_net_model(features, labels, mode): loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) # Create training op. - if mode == tf.estimator.ModeKeys.TRAIN: + if training: optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) -- GitLab From 1fb724f28486d1eec7d9368b6c3b8600664cf8a3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 17 Mar 2018 11:21:02 -0700 Subject: [PATCH 148/960] Normally tf2xla (autoclustering, jit_scope and rewrite) rely on graph optimization passes to outline subgraphs. The XLA device itself only sees Compute() calls for _XlaLaunch ops. All other ops are registered with a dummy op factory that just prints an error. This patch adds an alternative, selected at registration time, that disables default graph optimization and instead registers a non-dummy op implementation. This op implementation compiles the op "on demand"; it generates a fake graph containing _Arg and _Retval nodes and calls into the XlaCompiler code as usual. This allows the device to be used as a "normal" TensorFlow device, as well as from Eager mode, at the expense of performance. Later additions will add the ability to create traces to amortize kernel launch overhead, and the ability to combine op-by-op/tracing and autoclustering with jit_scope annotations. PiperOrigin-RevId: 189463593 --- tensorflow/compiler/jit/BUILD | 3 + .../compiler/jit/kernels/xla_launch_op.cc | 6 +- tensorflow/compiler/jit/legacy_flags/BUILD | 12 ++ .../jit/legacy_flags/xla_device_flags.cc | 56 +++++ .../jit/legacy_flags/xla_device_flags.h | 47 ++++ .../compiler/jit/xla_compilation_cache.cc | 202 +++++++++--------- .../compiler/jit/xla_compilation_cache.h | 30 ++- .../compiler/jit/xla_compile_on_demand_op.cc | 178 +++++++++++++++ .../compiler/jit/xla_compile_on_demand_op.h | 56 +++++ tensorflow/compiler/jit/xla_cpu_device.cc | 13 +- tensorflow/compiler/jit/xla_device.cc | 31 ++- tensorflow/compiler/jit/xla_device.h | 7 +- tensorflow/compiler/jit/xla_device_context.cc | 4 + tensorflow/compiler/jit/xla_gpu_device.cc | 14 +- tensorflow/compiler/jit/xla_launch_util.cc | 14 +- tensorflow/compiler/jit/xla_tensor_info.h | 16 ++ tensorflow/compiler/tests/BUILD | 11 +- tensorflow/compiler/tests/xla_test.py | 8 + .../tf2xla/kernels/batchtospace_op.cc | 4 +- .../tf2xla/kernels/segment_reduction_ops.cc | 4 +- .../tf2xla/kernels/stateless_random_ops.cc | 2 + tensorflow/compiler/tf2xla/xla_compiler.cc | 42 ++++ tensorflow/compiler/tf2xla/xla_compiler.h | 8 + 23 files changed, 637 insertions(+), 131 deletions(-) create mode 100644 tensorflow/compiler/jit/legacy_flags/xla_device_flags.cc create mode 100644 tensorflow/compiler/jit/legacy_flags/xla_device_flags.h create mode 100644 tensorflow/compiler/jit/xla_compile_on_demand_op.cc create mode 100644 tensorflow/compiler/jit/xla_compile_on_demand_op.h diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 39eb390f38..0475cd9ff2 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -76,6 +76,7 @@ cc_library( ":jit_compilation_passes", ":xla_device", "//tensorflow/compiler/jit/kernels:xla_launch_op", + "//tensorflow/compiler/jit/legacy_flags:xla_device_flags", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla/service:cpu_plugin", # buildcleaner: keep @@ -136,11 +137,13 @@ cc_library( cc_library( name = "xla_device", srcs = [ + "xla_compile_on_demand_op.cc", "xla_device.cc", "xla_device_context.cc", "xla_device_ops.cc", ], hdrs = [ + "xla_compile_on_demand_op.h", "xla_device.h", "xla_device_context.h", "xla_device_ops.h", diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index e24a9a0751..8a8e8bb8df 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -148,7 +148,11 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; - OP_REQUIRES_OK(ctx, cache->Compile(options, function_, num_constant_args_, + std::map constant_args; + for (int i = 0; i < num_constant_args_; ++i) { + constant_args.insert({i, ctx->input(i)}); + } + OP_REQUIRES_OK(ctx, cache->Compile(options, function_, constant_args, variables, ctx, &kernel, &executable, /*compile_options=*/nullptr)); diff --git a/tensorflow/compiler/jit/legacy_flags/BUILD b/tensorflow/compiler/jit/legacy_flags/BUILD index 4491dd6ac8..9cd66fc13c 100644 --- a/tensorflow/compiler/jit/legacy_flags/BUILD +++ b/tensorflow/compiler/jit/legacy_flags/BUILD @@ -52,6 +52,18 @@ cc_library( ], ) +cc_library( + name = "xla_device_flags", + srcs = ["xla_device_flags.cc"], + hdrs = ["xla_device_flags.h"], + deps = + [ + "//tensorflow/compiler/xla/legacy_flags:parse_flags_from_env", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/jit/legacy_flags/xla_device_flags.cc b/tensorflow/compiler/jit/legacy_flags/xla_device_flags.cc new file mode 100644 index 0000000000..1bb2fce2db --- /dev/null +++ b/tensorflow/compiler/jit/legacy_flags/xla_device_flags.cc @@ -0,0 +1,56 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Legacy flags for the XLA bridge's xla_device module. + +#include +#include + +#include "tensorflow/compiler/jit/legacy_flags/xla_device_flags.h" +#include "tensorflow/compiler/xla/legacy_flags/parse_flags_from_env.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace tensorflow { +namespace legacy_flags { + +// Pointers to the parsed value of the flags and flag descriptors, initialized +// via flags_init. +static XlaDeviceFlags* flags; +static std::vector* flag_list; +static std::once_flag flags_init; + +// Allocate *flags. Called via call_once(&flags_init,...). +static void AllocateFlags() { + flags = new XlaDeviceFlags; + flags->tf_xla_compile_on_demand = false; + flag_list = new std::vector({ + Flag("tf_xla_compile_on_demand", &flags->tf_xla_compile_on_demand, + "Switch a device into 'on-demand' mode, where instead of " + "autoclustering ops are compiled one by one just-in-time."), + }); + xla::legacy_flags::ParseFlagsFromEnv(*flag_list); +} + +// Return a pointer to the XlaDeviceFlags struct; +// repeated calls return the same pointer. +// This should be called only after Flags::Parse() has returned. +XlaDeviceFlags* GetXlaDeviceFlags() { + std::call_once(flags_init, &AllocateFlags); + return flags; +} + +} // namespace legacy_flags +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/legacy_flags/xla_device_flags.h b/tensorflow/compiler/jit/legacy_flags/xla_device_flags.h new file mode 100644 index 0000000000..27b22121ac --- /dev/null +++ b/tensorflow/compiler/jit/legacy_flags/xla_device_flags.h @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_XLA_DEVICE_FLAGS_H_ +#define TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_XLA_DEVICE_FLAGS_H_ + +// Legacy flags for the XLA bridge's xla_device module. + +#include + +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace tensorflow { +namespace legacy_flags { + +// The values of flags associated with the XLA bridge's +// xla_device module. +typedef struct { + // Switch the CPU device into "on-demand" mode, where instead of + // autoclustering ops are compiled one by one just-in-time. + // Enabling this mode by a legacy flag is a temporary mechanism. When this + // feature is battle-tested, we will switch this to be a session option. + bool tf_xla_compile_on_demand; +} XlaDeviceFlags; + +// Return a pointer to the XlaDeviceFlags struct; +// repeated calls return the same pointer. +// This should be called only after Flags::Parse() has returned. +XlaDeviceFlags* GetXlaDeviceFlags(); + +} // namespace legacy_flags +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_XLA_DEVICE_FLAGS_H_ diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 8cc79a9bd0..6430975335 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -92,39 +92,30 @@ uint64 XlaCompilationCache::Signature::Hash::operator()( } Status XlaCompilationCache::BuildSignature( - const NameAttrList& function, int num_constant_args, + const NameAttrList& function, const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, Signature* signature) { signature->name = Canonicalize(function.name(), AttrSlice(&function.attr())); - signature->arg_values.resize(num_constant_args); - - signature->arg_types.reserve(ctx->num_inputs() - num_constant_args); - - // Inputs are in the order: constants, non-constants, resource variables. - int input_num = 0; - // Use the values of compile time constants in the signature-> - while (input_num < num_constant_args) { - signature->arg_values[input_num] = ctx->input(input_num); - ++input_num; - } - // Add the types and shapes of the remaining arguments. - while (input_num < ctx->num_inputs() - variable_args.size()) { - signature->arg_types.emplace_back(ctx->input_dtype(input_num), - ctx->input(input_num).shape()); - ++input_num; - } - // For variable signatures, use the type and shape of the variable's - // current value. - for (auto& iterator : variable_args) { - const OptionalTensor& variable = iterator.second; - TF_RET_CHECK(input_num < ctx->num_inputs()); - if (variable.present) { - signature->arg_types.emplace_back(variable.value.dtype(), - variable.value.shape()); + signature->arg_values.reserve(constant_args.size()); + + signature->arg_types.reserve(ctx->num_inputs() - constant_args.size()); + + for (int i = 0; i < ctx->num_inputs(); ++i) { + if (constant_args.count(i) > 0) { + // Use the values of compile time constants in the signature. + signature->arg_values.push_back(constant_args.at(i)); + } else if (variable_args.count(i) > 0) { + const OptionalTensor& variable = variable_args.at(i); + if (variable.present) { + signature->arg_types.emplace_back(variable.value.dtype(), + variable.value.shape()); + } else { + signature->arg_types.emplace_back(DT_INVALID, TensorShape()); + } } else { - signature->arg_types.emplace_back(DT_INVALID, TensorShape()); + signature->arg_types.emplace_back(ctx->input_dtype(i), + ctx->input(i).shape()); } - ++input_num; } return Status::OK(); } @@ -132,74 +123,58 @@ Status XlaCompilationCache::BuildSignature( namespace { // Builds a XlaCompiler::Argument vector from the arguments to the _XlaLaunch -// op. The first `num_constant_args` arguments must be host-memory Tensors. -Status BuildArguments(int num_constant_args, +// op. +Status BuildArguments(const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, std::vector* args) { args->resize(ctx->num_inputs()); - int input_num = 0; - - // Handles compile-time constants. - TF_RET_CHECK(num_constant_args <= ctx->num_inputs()); - while (input_num < num_constant_args) { - const Tensor& input = ctx->input(input_num); - TF_RET_CHECK(input.dtype() != DT_RESOURCE); - XlaCompiler::Argument& arg = (*args)[input_num]; - arg.kind = XlaCompiler::Argument::kConstant; - arg.type = input.dtype(); - arg.shape = input.shape(); - arg.constant_value = input; - ++input_num; - } - - // Handles the non-constant arguments. - int num_variable_args = variable_args.size(); - int num_nonconst_args = - ctx->num_inputs() - num_variable_args - num_constant_args; - TF_RET_CHECK(num_nonconst_args >= 0); - while (input_num < num_constant_args + num_nonconst_args) { - const Tensor& input = ctx->input(input_num); - TF_RET_CHECK(input.dtype() != DT_RESOURCE); + for (int64 input_num = 0; input_num < ctx->num_inputs(); ++input_num) { XlaCompiler::Argument& arg = (*args)[input_num]; - if (input.NumElements() > 0) { - arg.kind = XlaCompiler::Argument::kParameter; - } else { + if (constant_args.count(input_num) > 0) { + // Handles compile-time constants. + const Tensor& input = constant_args.at(input_num); + TF_RET_CHECK(input.dtype() != DT_RESOURCE); arg.kind = XlaCompiler::Argument::kConstant; + arg.type = input.dtype(); + arg.shape = input.shape(); arg.constant_value = input; - } - arg.type = input.dtype(); - arg.shape = input.shape(); - ++input_num; - } - - // Handles resource variables. - TF_RET_CHECK(input_num + num_variable_args == ctx->num_inputs()); - for (auto& iterator : variable_args) { - const Tensor& input = ctx->input(input_num); - TF_RET_CHECK(input.dtype() == DT_RESOURCE); - - XlaCompiler::Argument& arg = (*args)[input_num]; - - arg.name = iterator.second.name; - arg.kind = XlaCompiler::Argument::kResource; - arg.resource_kind = XlaResource::kVariable; - if (iterator.second.present) { - const Tensor& value = iterator.second.value; - arg.type = value.dtype(); - arg.shape = value.shape(); - arg.initialized = true; + } else if (variable_args.count(input_num) == 0) { + // Handles the non-constant arguments. + const Tensor& input = ctx->input(input_num); + TF_RET_CHECK(input.dtype() != DT_RESOURCE); + if (input.NumElements() > 0) { + arg.kind = XlaCompiler::Argument::kParameter; + } else { + arg.kind = XlaCompiler::Argument::kConstant; + arg.constant_value = input; + } + arg.type = input.dtype(); + arg.shape = input.shape(); } else { - // The values of uninitialized variables are not passed as inputs, since - // they are meaningless. However, it is legal to assign to a resource - // variable for the first time inside the XLA computation, so we do permit - // uninitialized variables. - arg.initialized = false; - arg.type = DT_INVALID; - arg.shape = TensorShape(); + // Handles resource variables. + const Tensor& input = ctx->input(input_num); + TF_RET_CHECK(input.dtype() == DT_RESOURCE); + const OptionalTensor& variable = variable_args.at(input_num); + arg.name = variable.name; + arg.kind = XlaCompiler::Argument::kResource; + arg.resource_kind = XlaResource::kVariable; + if (variable.present) { + const Tensor& value = variable.value; + arg.type = value.dtype(); + arg.shape = value.shape(); + arg.initialized = true; + } else { + // The values of uninitialized variables are not passed as inputs, since + // they are meaningless. However, it is legal to assign to a resource + // variable for the first time inside the XLA computation, so we do + // permit uninitialized variables. + arg.initialized = false; + arg.type = DT_INVALID; + arg.shape = TensorShape(); + } } - ++input_num; } return Status::OK(); @@ -234,16 +209,43 @@ Status XlaCompilationCache::BuildExecutable( Status XlaCompilationCache::Compile( const XlaCompiler::Options& options, const NameAttrList& function, - int num_constant_args, const std::map& variable_args, - OpKernelContext* ctx, + const std::map& constant_args, + const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable, const XlaCompiler::CompileOptions* compile_options) { + return CompileImpl(options, function, constant_args, variable_args, ctx, + compilation_result, executable, compile_options, false); +} + +Status XlaCompilationCache::CompileSingleOp( + const XlaCompiler::Options& options, + const std::map& constant_args, + const std::map& variable_args, OpKernelContext* ctx, + const XlaCompiler::CompilationResult** compilation_result, + xla::LocalExecutable** executable, + const XlaCompiler::CompileOptions* compile_options) { + const NodeDef& def = ctx->op_kernel().def(); + NameAttrList name; + name.set_name(def.op()); + *name.mutable_attr() = def.attr(); + return CompileImpl(options, name, constant_args, variable_args, ctx, + compilation_result, executable, compile_options, true); +} + +Status XlaCompilationCache::CompileImpl( + const XlaCompiler::Options& options, const NameAttrList& function, + const std::map& constant_args, + const std::map& variable_args, OpKernelContext* ctx, + const XlaCompiler::CompilationResult** compilation_result, + xla::LocalExecutable** executable, + const XlaCompiler::CompileOptions* compile_options, + bool compile_single_op) { VLOG(1) << "XlaCompilationCache::Compile " << DebugString(); if (VLOG_IS_ON(2)) { VLOG(2) << "num_inputs=" << ctx->num_inputs() - << " num_constant_args=" << num_constant_args + << " num_constant_args=" << constant_args.size() << " num_variable_args=" << variable_args.size(); for (int i = 0; i < ctx->num_inputs(); i++) { TensorShape shape = ctx->input(i).shape(); @@ -264,11 +266,12 @@ Status XlaCompilationCache::Compile( } } - TF_RET_CHECK(num_constant_args + variable_args.size() <= ctx->num_inputs()); + TF_RET_CHECK(constant_args.size() + variable_args.size() <= + ctx->num_inputs()); Signature signature; - TF_RETURN_IF_ERROR(BuildSignature(function, num_constant_args, variable_args, - ctx, &signature)); + TF_RETURN_IF_ERROR( + BuildSignature(function, constant_args, variable_args, ctx, &signature)); VLOG(2) << "Signature: " << SignatureDebugString(signature); // The outer lock protects the existence of the cache entry. It does not @@ -295,13 +298,20 @@ Status XlaCompilationCache::Compile( // a long time.) std::vector args; TF_RETURN_IF_ERROR( - BuildArguments(num_constant_args, variable_args, ctx, &args)); + BuildArguments(constant_args, variable_args, ctx, &args)); XlaCompiler compiler(options); entry->compiled = true; - entry->compilation_status = compiler.CompileFunction( - compile_options ? *compile_options : XlaCompiler::CompileOptions(), - function, args, &entry->compilation_result); + + if (compile_single_op) { + entry->compilation_status = compiler.CompileSingleOp( + compile_options ? *compile_options : XlaCompiler::CompileOptions(), + signature.name, ctx, args, &entry->compilation_result); + } else { + entry->compilation_status = compiler.CompileFunction( + compile_options ? *compile_options : XlaCompiler::CompileOptions(), + function, args, &entry->compilation_result); + } } *compilation_result = &entry->compilation_result; if (entry->compilation_status.ok() && executable) { diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h index d506378314..5c0c79b880 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.h +++ b/tensorflow/compiler/jit/xla_compilation_cache.h @@ -52,8 +52,8 @@ class XlaCompilationCache : public ResourceBase { // Compiles a function into a XlaCompiler::CompilationResult that can be used // to execute an XLA Computation. Compilation results are cached. // `function` is the name of a Tensorflow function to compile. - // `num_constant_args` is the number of compile-time constant arguments to - // `function`. `variable_args` is a snapshot of the current values of the + // `constant_args` is a maps of tensorflow argument number to constant value. + // `variable_args` is a snapshot of the current values of the // resource variable arguments to `function`; uninitialized variables are // represented by an absent OptionalTensor. // The result of compilation is written to `*compilation_result`, which must @@ -62,19 +62,40 @@ class XlaCompilationCache : public ResourceBase { // executable pointer may be null if the computation has no non-constant // outputs. Status Compile(const XlaCompiler::Options& options, - const NameAttrList& function, int num_constant_args, + const NameAttrList& function, + const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable, const XlaCompiler::CompileOptions* compile_options); + // As above, but calls XlaCompiler::CompileSingleOp instead of + // XlaCompiler::CompileFunction. + Status CompileSingleOp( + const XlaCompiler::Options& options, + const std::map& constant_args, + const std::map& variable_args, OpKernelContext* ctx, + const XlaCompiler::CompilationResult** compilation_result, + xla::LocalExecutable** executable, + const XlaCompiler::CompileOptions* compile_options); + xla::LocalClient* client() const { return client_; } const DeviceType& device_type() const { return device_type_; } string DebugString() override; private: + // Common implementation of Compile and CompileSingleOp. + Status CompileImpl(const XlaCompiler::Options& options, + const NameAttrList& function, + const std::map& constant_args, + const std::map& variable_args, + OpKernelContext* ctx, + const XlaCompiler::CompilationResult** compilation_result, + xla::LocalExecutable** executable, + const XlaCompiler::CompileOptions* compile_options, + bool compile_single_op); // Takes `result` which has been compiled from a Tensorflow subgraph to a // XLA computation already, and generates an XLA LocalExecutable `executable`. Status BuildExecutable(const XlaCompiler::Options& options, @@ -104,7 +125,8 @@ class XlaCompilationCache : public ResourceBase { static string SignatureDebugString(const Signature& sig); // Builds the signature for a compilation. - Status BuildSignature(const NameAttrList& function, int num_constant_args, + Status BuildSignature(const NameAttrList& function, + const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, Signature* signature); diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc new file mode 100644 index 0000000000..915b9ce84a --- /dev/null +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -0,0 +1,178 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Defines the XlaCompileOnDemandOp. + +#include "tensorflow/compiler/jit/xla_compile_on_demand_op.h" +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" + +namespace tensorflow { + +namespace { +std::map GetVariables(OpKernelContext* ctx) { + std::map variables; + for (int64 i = 0; i < ctx->num_inputs(); ++i) { + if (ctx->input(i).dtype() == DT_RESOURCE) { + Var* variable = nullptr; + ResourceHandle handle = HandleFromInput(ctx, i); + OptionalTensor& optional = variables[i]; + optional.name = handle.name(); + if (LookupResource(ctx, handle, &variable).ok()) { + tf_shared_lock lock(*variable->mu()); + optional.present = true; + optional.value = *variable->tensor(); + } + } + } + return variables; +} +} // namespace + +Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, + const XlaDevice::Metadata& metadata, + const XlaCompiler::CompilationResult* result, + xla::LocalExecutable* executable) { + std::map variables = GetVariables(ctx); + int64 num_resource_args = variables.size(); + + xla::LocalClient* client = metadata.client(); + XlaTensorInfoManager* tensor_info_manager = &metadata.tensor_info_manager(); + + // Builds an XLA allocator for the device. + XlaAllocator xla_allocator(client->platform(), ctx); + XlaComputationLaunchContext launch_context( + num_resource_args, client, &xla_allocator, tensor_info_manager); + + launch_context.PopulateInputs(ctx, result, variables); + + perftools::gputools::Stream* stream = + ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; + TF_RET_CHECK(stream); + + VLOG(2) << "Executing computation."; + xla::ExecutableRunOptions run_options; + run_options.set_stream(stream); + run_options.set_allocator(&xla_allocator); + run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); + + auto run_result = executable->Run(launch_context.arguments(), run_options); + TF_RETURN_IF_ERROR(run_result.status()); + + launch_context.PopulateOutputs(ctx, result, run_result.ConsumeValueOrDie()); + return Status::OK(); +} + +bool XlaCompileOnDemandOp::MustArgumentBeConstant(const OpKernel* op_kernel, + int64 argument_idx) { + // TODO(jmolloy): This could be expensive, so memoize. + auto* constant_inputs = tensorflow::XlaOpRegistry::CompileTimeConstantInputs( + op_kernel->def().op()); + CHECK(constant_inputs); + std::set constant_input_indices; + for (const auto& name : *constant_inputs) { + int start, stop; + TF_CHECK_OK(op_kernel->InputRange(name, &start, &stop)); + for (int i = start; i < stop; ++i) { + constant_input_indices.insert(i); + } + } + return constant_input_indices.count(argument_idx) > 0; +} + +bool XlaCompileOnDemandOp::ShouldArgumentBeConstant(const OpKernel* op_kernel, + int64 argument_idx) { + // Right now we only create kConstant arguments when absolutely required, but + // there may be benefit in eagerly constant-folding a larger subset of + // arguments in the future. + return MustArgumentBeConstant(op_kernel, argument_idx); +} + +Status XlaCompileOnDemandOp::Compile( + OpKernelContext* ctx, const XlaDevice::Metadata& metadata, + const XlaCompiler::CompilationResult** result, + xla::LocalExecutable** executable) { + XlaTensorInfoManager* tensor_info_manager = &metadata.tensor_info_manager(); + + std::map constant_arguments; + for (int64 i = 0; i < ctx->num_inputs(); ++i) { + const Tensor& device_tensor = ctx->input(i); + if (const XlaTensorInfo* tensor_info = + tensor_info_manager->GetTensorInfo(device_tensor)) { + if (tensor_info->has_host_tensor() && + ShouldArgumentBeConstant(&ctx->op_kernel(), i)) { + constant_arguments[i] = tensor_info->host_tensor(); + } + } + if (constant_arguments.count(i) == 0 && + MustArgumentBeConstant(&ctx->op_kernel(), i)) { + // Slow path; the argument is not available as a host constant so we must + // fetch it synchronously. + Tensor host_tensor; + TF_RETURN_IF_ERROR(ctx->allocate_temp( + device_tensor.dtype(), device_tensor.shape(), &host_tensor)); + Notification n; + ctx->op_device_context()->CopyDeviceTensorToCPU( + &device_tensor, "ConstantArgument", + reinterpret_cast(ctx->device()), &host_tensor, + [&](Status status) { n.Notify(); }); + n.WaitForNotification(); + constant_arguments[i] = host_tensor; + } + } + + // We store information about the JIT-compiled XLA computation + // in the ResourceMgr. + ResourceMgr* rm = ctx->resource_manager(); + CHECK(rm); + + XlaCompilationCache* cache; + TF_RETURN_IF_ERROR(rm->LookupOrCreate( + rm->default_container(), "xla_cache", &cache, + [&](XlaCompilationCache** cache) { + *cache = new XlaCompilationCache(metadata.client(), + metadata.jit_device_type()); + return Status::OK(); + })); + // Hold the reference to the JIT during evaluation. (We could probably + // free it sooner because the ResourceMgr will retain a reference, but + // this is more obviously correct.) + core::ScopedUnref cache_ref(cache); + + XlaCompiler::Options options; + DeviceType device_type = metadata.jit_device_type(); + options.device_type = &device_type; + options.client = metadata.client(); + options.flib_def = + new FunctionLibraryDefinition(OpRegistry::Global(), FunctionDefLibrary{}); + + std::map variable_args = GetVariables(ctx); + return cache->CompileSingleOp(options, constant_arguments, variable_args, ctx, + result, executable, + /*compile_options=*/nullptr); +} + +void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) { + const XlaCompiler::CompilationResult* result; + xla::LocalExecutable* executable; + const XlaDevice::Metadata* metadata; + OP_REQUIRES_OK(ctx, XlaDevice::GetMetadata(ctx, &metadata)); + OP_REQUIRES_OK(ctx, Compile(ctx, *metadata, &result, &executable)); + OP_REQUIRES_OK(ctx, Run(ctx, *metadata, result, executable)); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.h b/tensorflow/compiler/jit/xla_compile_on_demand_op.h new file mode 100644 index 0000000000..23c6f3903f --- /dev/null +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.h @@ -0,0 +1,56 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// The XlaCompileOnDemandOp is an OpKernel that, when its Compute method is +// called, will generate an xla::Computation and run it asynchronously. + +#ifndef TENSORFLOW_COMPILER_JIT_XLA_COMPILE_ON_DEMAND_OP_H_ +#define TENSORFLOW_COMPILER_JIT_XLA_COMPILE_ON_DEMAND_OP_H_ + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// An OpKernel that compiles an op to an XLA computation and runs it. Unlike +// _XlaLaunch this doesn't rely on any rewrites of the graphdef - it will run a +// vanilla TensorFlow op as long as the bridge supports it. +// +// Importantly _XlaLaunch assumes all input and output tensors are on the host, +// whereas XlacompileOnDemandOp works with tensors in device memory. +class XlaCompileOnDemandOp : public OpKernel { + public: + explicit XlaCompileOnDemandOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + void Compute(OpKernelContext* ctx) override; + + private: + XlaCompiler::Argument CreateCompilerArgument(OpKernelContext* ctx, int64 i); + bool ShouldArgumentBeConstant(const OpKernel* op_kernel, int64 argument_idx); + bool MustArgumentBeConstant(const OpKernel* op_kernel, int64 argument_idx); + Status Compile(OpKernelContext* ctx, const XlaDevice::Metadata& metadata, + const XlaCompiler::CompilationResult** result, + xla::LocalExecutable** executable); + Status Run(OpKernelContext* ctx, const XlaDevice::Metadata& metadata, + const XlaCompiler::CompilationResult* result, + xla::LocalExecutable* executable); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_XLA_COMPILE_ON_DEMAND_OP_H_ diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc index db3bf3ea33..d2dfdeea68 100644 --- a/tensorflow/compiler/jit/xla_cpu_device.cc +++ b/tensorflow/compiler/jit/xla_cpu_device.cc @@ -17,6 +17,8 @@ limitations under the License. // operators using XLA via the XLA "Host" (CPU) backend. #include "tensorflow/compiler/jit/kernels/xla_launch_op.h" +#include "tensorflow/compiler/jit/legacy_flags/xla_device_flags.h" +#include "tensorflow/compiler/jit/xla_compile_on_demand_op.h" #include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/compiler/jit/xla_device_ops.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -34,6 +36,15 @@ class XlaCpuDeviceFactory : public DeviceFactory { Status XlaCpuDeviceFactory::CreateDevices(const SessionOptions& options, const string& name_prefix, std::vector* devices) { + legacy_flags::XlaDeviceFlags* flags = legacy_flags::GetXlaDeviceFlags(); + bool compile_on_demand = flags->tf_xla_compile_on_demand; + + XlaOpRegistry::DeviceRegistration registration; + registration.compilation_device_name = DEVICE_CPU_XLA_JIT; + registration.requires_compilation = !compile_on_demand; + registration.enable_jit_by_default = false; + registration.compile_resource_ops = true; + static XlaDeviceOpRegistrations* registrations = RegisterXlaDeviceKernels(DEVICE_XLA_CPU, DEVICE_CPU_XLA_JIT); (void)registrations; @@ -41,7 +52,7 @@ Status XlaCpuDeviceFactory::CreateDevices(const SessionOptions& options, std::unique_ptr device; TF_RETURN_IF_ERROR(XlaDevice::Create("Host", DEVICE_XLA_CPU, 0, DEVICE_CPU_XLA_JIT, options, name_prefix, - /*register_device_for_compilation=*/true, + registration, /*transfer_as_literal=*/false, &device)); devices->push_back(device.release()); return Status::OK(); diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index e4e11d4ce2..82048f5d78 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/jit/xla_compile_on_demand_op.h" #include "tensorflow/compiler/jit/xla_device_context.h" #include "tensorflow/compiler/jit/xla_device_ops.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" @@ -108,21 +109,15 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( /* static */ Status XlaDevice::Create( const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, - const string& name_prefix, bool register_device_for_compilation, + const string& name_prefix, + const XlaOpRegistry::DeviceRegistration& registration, bool transfer_as_literal, std::unique_ptr* device) { VLOG(1) << "XlaDevice::Create " << platform_name << " " << device_name << ":" << device_ordinal; - if (register_device_for_compilation) { - // These are no-ops if they have already been done previously for - // this device_name/compilation_device_name pair. - XlaOpRegistry::DeviceRegistration registration; - registration.compilation_device_name = jit_device_name; - registration.requires_compilation = true; - registration.enable_jit_by_default = false; - registration.compile_resource_ops = true; - XlaOpRegistry::RegisterCompilationDevice(device_name, registration); - } + // These are no-ops if they have already been done previously for + // this device_name/compilation_device_name pair. + XlaOpRegistry::RegisterCompilationDevice(device_name, registration); auto platform = se::MultiPlatformManager::PlatformWithName(platform_name); if (!platform.ok()) { @@ -306,19 +301,23 @@ Status XlaDevice::MakeTensorFromProto(const TensorProto& tensor_proto, XlaDeviceOpRegistrations* RegisterXlaDeviceKernels(const char* device, const char* jit_device) { + // Any op assigned to the device that isn't rewritten by the graph rewriter + // gets executed by a n XlaCompileOnDemandOp, which compiles it and executes + // it just-in-time. + kernel_factory::OpKernelRegistrar::Factory factory = + [](OpKernelConstruction* context) -> OpKernel* { + return new XlaCompileOnDemandOp(context); + }; XlaOpRegistry::RegisterCompilationKernels(); XlaDeviceOpRegistrations* registrations = new XlaDeviceOpRegistrations; - auto dummy_factory = [](OpKernelConstruction* context) -> OpKernel* { - return new XlaDeviceDummyOp(context); - }; for (const KernelDef* jit_def : XlaOpRegistry::DeviceKernels( jit_device, /*include_compilation_only_kernels=*/false)) { KernelDef* def = new KernelDef(*jit_def); def->set_device_type(device); registrations->op_kernel_registrars.emplace_back( - new kernel_factory::OpKernelRegistrar(def, "XlaDeviceDummyOp", - dummy_factory)); + new kernel_factory::OpKernelRegistrar(def, "XlaCompileOnDemandOp", + factory)); } return registrations; } diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index 0f4476296b..9cd9167e52 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -27,6 +27,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_ #include "tensorflow/compiler/jit/xla_tensor_info.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/local_device.h" @@ -81,7 +82,7 @@ class XlaDevice : public LocalDevice { static Status Create(const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, const string& name_prefix, - bool register_device_for_compilation, + const XlaOpRegistry::DeviceRegistration& registration, bool transfer_as_literal, std::unique_ptr* device); @@ -113,7 +114,7 @@ class XlaDevice : public LocalDevice { // Which hardware device in the client's platform this XlaDevice controls. const int device_ordinal_; // The name of the device that is used to compile Ops for this XlaDevice. - const DeviceType& jit_device_name_; + DeviceType jit_device_name_; // Memory allocator associated with this device. Allocator* xla_allocator_; // Not owned. ::perftools::gputools::Platform* platform_; // Not owned. @@ -134,7 +135,7 @@ class XlaDevice : public LocalDevice { bool transfer_as_literal_; }; -// Builds dummy OpKernel registrations on 'device' for the JIT operators +// Builds OpKernel registrations on 'device' for the JIT operators // registered on 'jit_device'. Returns ownership of a XlaDeviceOpRegistrations // object that encapsulates the kernel registrations. struct XlaDeviceOpRegistrations { diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index b57f82f98e..88f7c15f0b 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -93,6 +93,10 @@ void XlaTransferManager::CopyCPUTensorToDevice(const Tensor* cpu_tensor, } } + XlaTensorInfo* tensor_info = + tensor_info_manager_->GetOrCreateTensorInfo(*device_tensor); + tensor_info->set_host_tensor(*cpu_tensor); + done(status); return; } diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index 383ed879ef..5a1db81774 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -34,15 +34,21 @@ class XlaGpuDeviceFactory : public DeviceFactory { Status XlaGpuDeviceFactory::CreateDevices(const SessionOptions& options, const string& name_prefix, std::vector* devices) { + XlaOpRegistry::DeviceRegistration registration; + registration.compilation_device_name = DEVICE_GPU_XLA_JIT; + registration.requires_compilation = true; + registration.enable_jit_by_default = false; + registration.compile_resource_ops = true; + static XlaDeviceOpRegistrations* registrations = RegisterXlaDeviceKernels(DEVICE_XLA_GPU, DEVICE_GPU_XLA_JIT); (void)registrations; std::unique_ptr device; - Status status = XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, - DEVICE_GPU_XLA_JIT, options, name_prefix, - /*register_device_for_compilation=*/true, - /*transfer_as_literal=*/false, &device); + Status status = + XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options, + name_prefix, registration, + /*transfer_as_literal=*/false, &device); if (!status.ok()) { // Treat failures as non-fatal; there might not be a GPU in the machine. VLOG(1) << "Failed to create XLA_GPU device: " << status; diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 689fa3299c..076cbd2084 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -176,21 +176,33 @@ void XlaComputationLaunchContext::PopulateOutputs( if (kernel->outputs[i].is_constant) { // Output is a constant. const Tensor& const_tensor = kernel->outputs[i].constant_value; + Tensor* output_tensor; const size_t total_bytes = const_tensor.TotalBytes(); if (stream && total_bytes > 0) { // Copy host -> device. (Empty tensors don't have backing buffers.) VLOG(1) << "Constant output tensor on device"; - Tensor* output_tensor; + TF_CHECK_OK( ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); const void* src_ptr = DMAHelper::base(&const_tensor); void* dst_ptr = DMAHelper::base(output_tensor); gpu::DeviceMemoryBase gpu_dst_ptr(dst_ptr, total_bytes); + // Memcpying asynchronously is safe for the GPU, but the CPU uses a + // shared allocator so hold a reference to the copied-to buffer until + // complete. + TensorReference ref(*output_tensor); stream->ThenMemcpy(&gpu_dst_ptr, src_ptr, total_bytes); + stream->ThenDoHostCallback([ref] { ref.Unref(); }); } else { // No copy required. ctx->set_output(i, const_tensor); + output_tensor = ctx->mutable_output(i); + } + if (tensor_info_manager_) { + XlaTensorInfo* tensor_info = + tensor_info_manager_->GetOrCreateTensorInfo(*output_tensor); + tensor_info->set_host_tensor(const_tensor); } } else { const TensorShape& shape = kernel->outputs[i].shape; diff --git a/tensorflow/compiler/jit/xla_tensor_info.h b/tensorflow/compiler/jit/xla_tensor_info.h index 0b0736bf01..fbd6ad770f 100644 --- a/tensorflow/compiler/jit/xla_tensor_info.h +++ b/tensorflow/compiler/jit/xla_tensor_info.h @@ -43,9 +43,25 @@ class XlaTensorInfo { shaped_buffer_.reset(new xla::ShapedBuffer(std::move(shaped_buffer))); } + // Some tensors on the device may have known values on the host. We use these + // in on-demand mode to avoid re-copying values from the device if we know the + // host value already. + + // Return true if this TensorInfo contains a host tensor. + bool has_host_tensor() const { return host_tensor_ != nullptr; } + // Return the contained host tensor. + // REQUIRES: has_host_tensor() + const Tensor& host_tensor() const { return *host_tensor_; } + // Sets the contained host tensor. + void set_host_tensor(const Tensor& tensor) { + host_tensor_.reset(new Tensor(tensor)); + } + private: // The optional contained ShapedBuffer. std::unique_ptr shaped_buffer_; + // An optional host tensor value. + std::unique_ptr host_tensor_; }; // Manages XlaTensorInfo objects. This class is also an Allocator, so that diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 85a2adab28..bbb6089ea8 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -86,7 +86,10 @@ tf_xla_py_test( # ArgMax needs CustomCall on CPU, which is not available in normal # (not precompiled) TensorFlow. The flag below excludes the CPU # backend. - disabled_backends = "cpu", + disabled_backends = [ + "cpu", + "cpu_ondemand", + ], deps = [ ":xla_test", "//tensorflow/python:array_ops", @@ -315,6 +318,8 @@ tf_xla_py_test( name = "function_test", size = "small", srcs = ["function_test.py"], + # Functions are not implemented in the on-demand compilation model yet. + disabled_backends = "cpu_ondemand", deps = [ ":xla_test", "//tensorflow/python:array_ops", @@ -551,6 +556,8 @@ tf_xla_py_test( name = "stack_ops_test", size = "small", srcs = ["stack_ops_test.py"], + # Stack ops are not implemented in the on-demand compilation model yet. + disabled_backends = "cpu_ondemand", deps = [ ":xla_test", "//tensorflow/python:array_ops", @@ -577,6 +584,8 @@ tf_xla_py_test( name = "tensor_array_ops_test", size = "small", srcs = ["tensor_array_ops_test.py"], + # TensorArray ops are not implemented in the on-demand compilation model yet. + disabled_backends = "cpu_ondemand", deps = [ ":xla_test", "//tensorflow/python:array_ops", diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py index cc778f1c3c..e924fe1e61 100644 --- a/tensorflow/compiler/tests/xla_test.py +++ b/tensorflow/compiler/tests/xla_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import contextlib +import os import random import re @@ -44,6 +45,8 @@ flags.DEFINE_string('test_device', None, flags.DEFINE_string('types', None, 'Types to test. Comma-separated list.') flags.DEFINE_string('disabled_manifest', None, 'Path to a file with a list of tests that should not run.') +flags.DEFINE_string('tf_xla_flags', None, + 'Value to set the TF_XLA_FLAGS environment variable to') class XLATestCase(test.TestCase): @@ -97,6 +100,8 @@ class XLATestCase(test.TestCase): disabled_tests = [] disabled_method_types = [] for l in manifest_file.read().splitlines(): + if not l: + continue entry = comments_re.sub('', l).strip().split(' ') if len(entry) == 1: disabled_tests.append(entry[0]) @@ -113,6 +118,9 @@ class XLATestCase(test.TestCase): for name in types]) manifest_file.close() + if FLAGS.tf_xla_flags is not None: + os.environ['TF_XLA_FLAGS'] = FLAGS.tf_xla_flags + @property def all_tf_types(self): name = '{}.{}'.format(type(self).__name__, self._testMethodName) diff --git a/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc b/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc index cbade79e85..569950c2df 100644 --- a/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batchtospace_op.cc @@ -184,9 +184,7 @@ class BatchToSpaceOp : public XlaOpKernel { private: int block_size_; }; -REGISTER_XLA_OP(Name("BatchToSpace") - .CompileTimeConstInput("crops") - .CompileTimeConstInput("block_shape"), +REGISTER_XLA_OP(Name("BatchToSpace").CompileTimeConstInput("crops"), BatchToSpaceOp); } // namespace diff --git a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc index 80d6df6c48..498342a988 100644 --- a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc @@ -83,7 +83,9 @@ class UnsortedSegmentSum : public XlaOpKernel { DataType dtype_; }; -REGISTER_XLA_OP(Name("UnsortedSegmentSum"), UnsortedSegmentSum); +REGISTER_XLA_OP( + Name("UnsortedSegmentSum").CompileTimeConstInput("num_segments"), + UnsortedSegmentSum); } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc index b10880de77..5bb773d97f 100644 --- a/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/stateless_random_ops.cc @@ -239,6 +239,7 @@ class StatelessRandomUniformOp : public XlaOpKernel { // TODO(phawkins): generalize to non-float, non-int32 seed types. REGISTER_XLA_OP(Name("StatelessRandomUniform") + .CompileTimeConstInput("shape") .TypeConstraint("dtype", DT_FLOAT) .TypeConstraint("Tseed", DT_INT32), StatelessRandomUniformOp); @@ -272,6 +273,7 @@ class StatelessRandomNormalOp : public XlaOpKernel { // TODO(phawkins): generalize to non-float, non-int32 seed types. REGISTER_XLA_OP(Name("StatelessRandomNormal") + .CompileTimeConstInput("shape") .TypeConstraint("dtype", DT_FLOAT) .TypeConstraint("Tseed", DT_INT32), StatelessRandomNormalOp); diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 7cdf4d1b3e..86263d847a 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -600,6 +600,48 @@ Status XlaCompiler::BuildArguments( return Status::OK(); } +Status XlaCompiler::CompileSingleOp( + const XlaCompiler::CompileOptions& options, string const& name, + OpKernelContext* ctx, const std::vector& args, + CompilationResult* result) { + // TODO(b/74182462): We implement this by creating a new dummy Graph including + // _Arg nodes, and let CompileGraph walk it. This could be optimized. + std::unique_ptr graph(new Graph(OpRegistry::Global())); + + Status status; + // First create the actual node we care about computing. + Node* main_node = graph->AddNode(ctx->op_kernel().def(), &status); + TF_RETURN_IF_ERROR(status); + + // Create dummy _Arg nodes. Link these to `node` and also via a control + // dependency edge to the _SOURCE node. + for (int64 i = 0; i < ctx->num_inputs(); ++i) { + Node* node; + string name = strings::StrCat(ctx->op_kernel().name(), "_", i, "_arg"); + Status status = NodeBuilder(name, "_Arg") + .ControlInput(graph->source_node()) + .Attr("T", ctx->input_dtype(i)) + .Attr("index", i) + .Finalize(graph.get(), &node); + TF_RETURN_IF_ERROR(status); + graph->AddEdge(node, 0, main_node, i); + } + + // Similarly with return values, create dummy _Retval nodes fed by `node`. + for (int64 i = 0; i < ctx->num_outputs(); ++i) { + Node* node; + string name = strings::StrCat(ctx->op_kernel().name(), "_", i, "_retval"); + Status status = NodeBuilder(name, "_Retval") + .Input(main_node, i) + .Attr("T", ctx->expected_output_dtype(i)) + .Attr("index", i) + .Finalize(graph.get(), &node); + TF_RETURN_IF_ERROR(status); + } + + return CompileGraph(options, name, std::move(graph), args, result); +} + Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, string const& name, std::unique_ptr graph, diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 5f1c631976..a6747bbe72 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -289,6 +289,14 @@ class XlaCompiler { const std::vector& args, CompilationResult* result); + // Compiles a single Op, given by an OpKernelContext, into an + // xla::Computation. Similar to CompileFunction but takes a single Op as + // input. + Status CompileSingleOp(const CompileOptions& options, string const& name, + OpKernelContext* ctx, + const std::vector& args, + CompilationResult* result); + // Returns the shape of the XLA parameter for an argument 'arg'. // See the class comment for more details about the argument passing // convention. -- GitLab From 91cbf1e83d85930b59c071553109506b076cee01 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Sat, 17 Mar 2018 21:12:18 +0200 Subject: [PATCH 149/960] Fix typo in `dataset_ops.py`: `datset` -> `dataset`. Tested: This is a noop. --- tensorflow/python/data/ops/dataset_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index c1ba67e474..9c62d5700c 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -769,7 +769,7 @@ class Dataset(object): return PaddedBatchDataset(self, batch_size, padded_shapes, padding_values) def map(self, map_func, num_parallel_calls=None): - """Maps `map_func` across this datset. + """Maps `map_func` across this dataset. Args: map_func: A function mapping a nested structure of tensors (having -- GitLab From 705afa34fc4540593b6aa6dc6dd22ae02d41abea Mon Sep 17 00:00:00 2001 From: brett koonce Date: Sat, 17 Mar 2018 12:22:23 -0700 Subject: [PATCH 150/960] contrib: minor spelling tweaks (#17788) packages: model_pruning rnn solvers tensorrt --- tensorflow/contrib/model_pruning/python/layers/layers.py | 2 +- tensorflow/contrib/model_pruning/python/pruning.py | 2 +- tensorflow/contrib/rnn/ops/gru_ops.cc | 2 +- .../contrib/rnn/python/kernel_tests/lstm_ops_test.py | 2 +- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 4 ++-- tensorflow/contrib/solvers/python/ops/least_squares.py | 2 +- tensorflow/contrib/solvers/python/ops/linear_equations.py | 2 +- tensorflow/contrib/tensorrt/convert/convert_graph.h | 2 +- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 8 ++++---- tensorflow/contrib/tensorrt/python/trt_convert.py | 2 +- .../contrib/tensorrt/resources/trt_int8_calibrator.cc | 2 +- tensorflow/contrib/tensorrt/test/test_tftrt.py | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/model_pruning/python/layers/layers.py b/tensorflow/contrib/model_pruning/python/layers/layers.py index 988748ad75..466daf204a 100644 --- a/tensorflow/contrib/model_pruning/python/layers/layers.py +++ b/tensorflow/contrib/model_pruning/python/layers/layers.py @@ -214,7 +214,7 @@ def masked_convolution(inputs, elif data_format == 'NCHW': df = 'channels_first' else: - raise ValueError('Unsupported data fromat', data_format) + raise ValueError('Unsupported data format', data_format) layer = layer_class( filters=num_outputs, diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index 86963be4b8..5146a4a2de 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -216,7 +216,7 @@ def _partitioned_variable_assign(partitioned_var, new_value): """Assign op for partitioned variables. Args: - partitioned_var: A partitioned tensotflow variable + partitioned_var: A partitioned tensorflow variable new_value: Value to be assigned to the variable var Returns: diff --git a/tensorflow/contrib/rnn/ops/gru_ops.cc b/tensorflow/contrib/rnn/ops/gru_ops.cc index e91d1e8a80..9c8e40851a 100644 --- a/tensorflow/contrib/rnn/ops/gru_ops.cc +++ b/tensorflow/contrib/rnn/ops/gru_ops.cc @@ -69,7 +69,7 @@ Element-wise dot product of a and b is represented by ab Element-wise dot product is represented by \circ Matrix multiplication is represented by * -Baises are initialized with : +Biases are initialized with : `b_ru` - constant_initializer(1.0) `b_c` - constant_initializer(0.0) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index 7957edf68c..ffd2421894 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -54,7 +54,7 @@ def blocks_match(sess, use_peephole): initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212) with variable_scope.variable_scope("test", initializer=initializer): - # magic naming so that the cells pick up these variables and resuse them + # magic naming so that the cells pick up these variables and reuse them if use_peephole: wci = variable_scope.get_variable( "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtypes.float32) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 358b2eb02b..2f6ae9f367 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -534,7 +534,7 @@ class GridLSTMCell(rnn_cell_impl.RNNCell): initializer: (optional) The initializer to use for the weight and projection matrices, default None. num_unit_shards: (optional) int, default 1, How to split the weight - matrix. If > 1,the weight matrix is stored across num_unit_shards. + matrix. If > 1, the weight matrix is stored across num_unit_shards. forget_bias: (optional) float, default 1.0, The initial bias of the forget gates, used to reduce the scale of forgetting at the beginning of the training. @@ -993,7 +993,7 @@ class BidirectionalGridLSTMCell(GridLSTMCell): initializer: (optional) The initializer to use for the weight and projection matrices, default None. num_unit_shards: (optional) int, default 1, How to split the weight - matrix. If > 1,the weight matrix is stored across num_unit_shards. + matrix. If > 1, the weight matrix is stored across num_unit_shards. forget_bias: (optional) float, default 1.0, The initial bias of the forget gates, used to reduce the scale of forgetting at the beginning of the training. diff --git a/tensorflow/contrib/solvers/python/ops/least_squares.py b/tensorflow/contrib/solvers/python/ops/least_squares.py index fb7c0eb649..6e164f5342 100644 --- a/tensorflow/contrib/solvers/python/ops/least_squares.py +++ b/tensorflow/contrib/solvers/python/ops/least_squares.py @@ -33,7 +33,7 @@ def cgls(operator, rhs, tol=1e-6, max_iter=20, name="cgls"): r"""Conjugate gradient least squares solver. Solves a linear least squares problem \\(||A x - rhs||_2\\) for a single - righ-hand side, using an iterative, matrix-free algorithm where the action of + right-hand side, using an iterative, matrix-free algorithm where the action of the matrix A is represented by `operator`. The CGLS algorithm implicitly applies the symmetric conjugate gradient algorithm to the normal equations \\(A^* A x = A^* rhs\\). The iteration terminates when either diff --git a/tensorflow/contrib/solvers/python/ops/linear_equations.py b/tensorflow/contrib/solvers/python/ops/linear_equations.py index d791d46763..9305c6a11c 100644 --- a/tensorflow/contrib/solvers/python/ops/linear_equations.py +++ b/tensorflow/contrib/solvers/python/ops/linear_equations.py @@ -41,7 +41,7 @@ def conjugate_gradient(operator, r"""Conjugate gradient solver. Solves a linear system of equations `A*x = rhs` for selfadjoint, positive - definite matrix `A` and righ-hand side vector `rhs`, using an iterative, + definite matrix `A` and right-hand side vector `rhs`, using an iterative, matrix-free algorithm where the action of the matrix A is represented by `operator`. The iteration terminates when either the number of iterations exceeds `max_iter` or when the residual norm has been reduced to `tol` diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index e1596e89e2..e01e4a5328 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -35,7 +35,7 @@ tensorflow::Status ConvertCalibGraphToInferGraph( // max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. -// max_workspace_size_bytes: The upper bound of memory allowence for +// max_workspace_size_bytes: The upper bound of memory allowance for // engine building. tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 75a3c3d034..92a692baa7 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -455,7 +455,7 @@ class Converter { if (trt_tensors_.count(name)) { inputs.push_back(trt_tensors_.at(name)); } else { - LOG(FATAL) << "input: " << name << " not availabled for node at, " + LOG(FATAL) << "input: " << name << " not available for node at, " << node_def.name(); } } @@ -884,7 +884,7 @@ tensorflow::Status BinaryTensorOpWeight( // default to element-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // TODO(jie): maybe use a permuatation instead to support more cases; + // TODO(jie): maybe use a permutation instead to support more cases; bool permutation_flag = false; if (weights.count() == 1) { @@ -1498,7 +1498,7 @@ tensorflow::Status ConvertConst(Converter& ctx, weights_tensor.int_val().begin(), weights_tensor.int_val() .end()); // make a local copy first to flatten - // doesn't have to be contigous + // doesn't have to be contiguous memcpy(dst, tensor_data.data(), len_tensor); // store into weight store weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } @@ -2212,7 +2212,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { std::list order; for (tensorflow::Node* node : order_vec) { if (s.subgraph_node_ids.count(node->id())) { - order.push_front(node); // we want topological order to contstruct the + order.push_front(node); // we want topological order to construct the // network layer by layer } } diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 666220d78c..338475d90e 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -41,7 +41,7 @@ def create_inference_graph(input_graph_def, max_workspace_size_bytes=2 << 20, precision_mode="FP32", minimum_segment_size=3): - """Python wrapper for the TRT transormation. + """Python wrapper for the TRT transformation. Args: input_graph_def: GraphDef object containing a model to be transformed. diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 74df75902e..dc7c93f869 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -61,7 +61,7 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - // TODO(sami,aaroey): Need to figureout a way to ensure synchronization + // TODO(sami,aaroey): Need to figure out a way to ensure synchronization // between stream, perhaps using a tensor? auto status = cudaMemcpyAsync(d.first, it.second, d.second, cudaMemcpyDeviceToDevice, stream); diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 0b661bd536..ad01bedd8f 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -75,7 +75,7 @@ def run_graph(gdef, dumm_inp): return val -# Use real data that is representatitive of the inference dataset +# Use real data that is representative of the inference dataset # for calibration. For this test script it is random data. def run_calibration(gdef, dumm_inp): """Run given calibration graph multiple times.""" -- GitLab From 10cddb4268b174f879956a2d1124b8ae1044c425 Mon Sep 17 00:00:00 2001 From: "Xiaoming (Jason) Cui" Date: Sat, 17 Mar 2018 13:17:13 -0700 Subject: [PATCH 151/960] Fixed issue #92, timeline_test unit test fails, changed the test so that it can take cpu name changed with MKLDNN naming conversion (#17775) --- tensorflow/python/client/timeline_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index 9641b8b7f2..5e6b5acdb0 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -155,9 +155,12 @@ class TimelineTest(test.TestCase): ctf = step_analysis.chrome_trace.format_to_string() self._validateTrace(ctf) maximums = step_analysis.allocator_maximums - self.assertTrue('cpu' in maximums) + cpuname = 'cpu' + if 'mklcpu' in maximums: + cpuname = 'mkl' + cpuname + self.assertTrue(cpuname in maximums) cpu_max = maximums[ - 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums['cpu'] + 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname] # At least num1 + num2, both float32s (4 bytes each) self.assertGreater(cpu_max.num_bytes, 8) self.assertGreater(cpu_max.timestamp, 0) -- GitLab From 2fa81dc522f984666bf5ba8f2392cbacb464a852 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Sun, 18 Mar 2018 17:23:05 +0800 Subject: [PATCH 152/960] Fix broken link of internal anchor in rnn quickdraw --- tensorflow/docs_src/tutorials/recurrent_quickdraw.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index 7584a76ba5..fd1a56c1b1 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -38,8 +38,8 @@ To try the code for this tutorial: 1. [Download the data](#download-the-data) in `TFRecord` format from [here](http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz) and unzip it. More details about [how to obtain the original Quick, Draw! - data](#optional-download-the-full-quick-draw-data) and [how to convert that - to `TFRecord` files](#optional-converting-the-data) is available below. + data](#optional_download_the_full_quick_draw_data) and [how to convert that + to `TFRecord` files](#optional_converting_the_data) is available below. 1. Execute the tutorial code with the following command to train the RNN-based model described in this tutorial. Make sure to adjust the paths to point to -- GitLab From a52a22912fd6a5a5d8434d08753a11ab8de4bdd3 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Sun, 18 Mar 2018 14:39:42 +0200 Subject: [PATCH 153/960] Simplify `rejection_resample` test to remove unnecessary iterator initialization. Tested: - bazel test :resample_test --- tensorflow/contrib/data/python/kernel_tests/resample_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 0ac8d7359f..c16207fa48 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -45,12 +45,10 @@ class ResampleTest(test.TestCase): target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, - seed=27)).make_initializable_iterator()) - init_op = iterator.initializer + seed=27)).make_one_shot_iterator()) get_next = iterator.get_next() with self.test_session() as sess: - sess.run(init_op) returned = [] with self.assertRaises(errors.OutOfRangeError): while True: -- GitLab From 168559c27a2070d8f069e1f11ffd641ca579afc6 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Sun, 18 Mar 2018 22:10:10 +0800 Subject: [PATCH 154/960] Fix two more case-sensitive anchor link --- tensorflow/docs_src/tutorials/recurrent_quickdraw.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index fd1a56c1b1..5d83fbe2a3 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -108,7 +108,7 @@ This download will take a while and download a bit more than 23GB of data. ### Optional: Converting the data To convert the `ndjson` files to -@{$python/python_io#tfrecords_format_details$TFRecord} files containing +@{$python/python_io#TFRecords_Format_Details$TFRecord} files containing [`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) protos run the following command. @@ -118,7 +118,7 @@ protos run the following command. ``` This will store the data in 10 shards of -@{$python/python_io#tfrecords_format_details$TFRecord} files with 10000 items +@{$python/python_io#TFRecords_Format_Details$TFRecord} files with 10000 items per class for the training data and 1000 items per class as eval data. This conversion process is described in more detail in the following. -- GitLab From 485bbb94dc35bf619e59f2be26a54f97b443c451 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Sun, 18 Mar 2018 22:17:45 +0800 Subject: [PATCH 155/960] Fix several broken links in kernel method tutorials --- tensorflow/docs_src/tutorials/kernel_methods.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md index b1f06ce0a3..2b35f0a157 100644 --- a/tensorflow/docs_src/tutorials/kernel_methods.md +++ b/tensorflow/docs_src/tutorials/kernel_methods.md @@ -1,7 +1,11 @@ # Improving Linear Models Using Explicit Kernel Methods Note: This document uses a deprecated version of @{tf.estimator}, +<<<<<<< HEAD which has a different interface (see `tf.contrib.learn Estimator`). +======= +which has a @{tf.contrib.learn.estimator$different interface}. +>>>>>>> Fix several broken links in kernel method tutorials It also uses other `contrib` methods whose @{$version_compat#not_covered$API may not be stable}. @@ -53,7 +57,7 @@ In order to feed data to a `tf.contrib.learn Estimator`, it is helpful to conver it to Tensors. For this, we will use an `input function` which adds Ops to the TensorFlow graph that, when executed, create mini-batches of Tensors to be used downstream. For more background on input functions, check -@{$get_started/premade_estimators#input_fn$this section on input functions}. +@{$get_started/premade_estimators#create_input_functions$this section on input functions}. In this example, we will use the `tf.train.shuffle_batch` Op which, besides converting numpy arrays to Tensors, allows us to specify the batch_size and whether to randomize the input every time the input_fn Ops are executed -- GitLab From 1aa0acf2ffa471b3fbd24481113d2ba8adb14b95 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Sun, 18 Mar 2018 22:25:05 +0800 Subject: [PATCH 156/960] Forgot to save when rebase master --- tensorflow/docs_src/tutorials/kernel_methods.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md index 2b35f0a157..e322ccf7c5 100644 --- a/tensorflow/docs_src/tutorials/kernel_methods.md +++ b/tensorflow/docs_src/tutorials/kernel_methods.md @@ -1,11 +1,7 @@ # Improving Linear Models Using Explicit Kernel Methods Note: This document uses a deprecated version of @{tf.estimator}, -<<<<<<< HEAD which has a different interface (see `tf.contrib.learn Estimator`). -======= -which has a @{tf.contrib.learn.estimator$different interface}. ->>>>>>> Fix several broken links in kernel method tutorials It also uses other `contrib` methods whose @{$version_compat#not_covered$API may not be stable}. -- GitLab From 70bb4240b9ccd1099d378548ffed87d88d160441 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Sun, 18 Mar 2018 22:27:56 +0800 Subject: [PATCH 157/960] Fix different interface link in kernel method --- tensorflow/docs_src/tutorials/kernel_methods.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md index e322ccf7c5..73e5c51057 100644 --- a/tensorflow/docs_src/tutorials/kernel_methods.md +++ b/tensorflow/docs_src/tutorials/kernel_methods.md @@ -1,7 +1,7 @@ # Improving Linear Models Using Explicit Kernel Methods Note: This document uses a deprecated version of @{tf.estimator}, -which has a different interface (see `tf.contrib.learn Estimator`). +which has a @{tf.contrib.learn.Estimator$different interface}. It also uses other `contrib` methods whose @{$version_compat#not_covered$API may not be stable}. -- GitLab From 8851c6fdedfd226f5f9c7da09cecaf6cdea06477 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 18 Mar 2018 07:48:30 -0700 Subject: [PATCH 158/960] Fix build PiperOrigin-RevId: 189506945 --- tensorflow/compiler/jit/xla_interpreter_device.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc index a329451b14..9e098c46f4 100644 --- a/tensorflow/compiler/jit/xla_interpreter_device.cc +++ b/tensorflow/compiler/jit/xla_interpreter_device.cc @@ -41,10 +41,17 @@ Status XlaInterpreterDeviceFactory::CreateDevices( DEVICE_XLA_INTERPRETER, DEVICE_INTERPRETER_XLA_JIT); (void)registrations; + XlaOpRegistry::DeviceRegistration registration; + registration.compilation_device_name = DEVICE_INTERPRETER_XLA_JIT; + registration.requires_compilation = true; + registration.enable_jit_by_default = false; + registration.compile_resource_ops = true; + std::unique_ptr device; - TF_RETURN_IF_ERROR(XlaDevice::Create( - "Interpreter", DEVICE_XLA_INTERPRETER, 0, DEVICE_INTERPRETER_XLA_JIT, - options, name_prefix, /*register_device_for_compilation=*/true, &device)); + TF_RETURN_IF_ERROR(XlaDevice::Create("Interpreter", DEVICE_XLA_INTERPRETER, 0, + DEVICE_INTERPRETER_XLA_JIT, options, + name_prefix, registration, + /*transfer_as_literal=*/false, &device)); devices->push_back(device.release()); return Status::OK(); } -- GitLab From 2b1b9ea110bcf26f047689564298de43ab83db18 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Mon, 19 Mar 2018 00:34:25 +0800 Subject: [PATCH 159/960] Fix the broken link ofr build the op library in extend tutorials --- tensorflow/docs_src/extend/add_filesys.md | 2 +- tensorflow/docs_src/extend/new_data_formats.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/extend/add_filesys.md b/tensorflow/docs_src/extend/add_filesys.md index 06f11de4eb..bc0f662f0c 100644 --- a/tensorflow/docs_src/extend/add_filesys.md +++ b/tensorflow/docs_src/extend/add_filesys.md @@ -225,7 +225,7 @@ it will use the `FooBarFileSystem` implementation. Next, you must build a shared object containing this implementation. An example of doing so using bazel's `cc_binary` rule can be found [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD#L244), -but you may use any build system to do so. See the section on @{$adding_an_op#build-the-op-library$building the op library} for similar +but you may use any build system to do so. See the section on @{$adding_an_op#build_the_op_library$building the op library} for similar instructions. The result of building this target is a `.so` shared object file. diff --git a/tensorflow/docs_src/extend/new_data_formats.md b/tensorflow/docs_src/extend/new_data_formats.md index b3cc968047..10e717c280 100644 --- a/tensorflow/docs_src/extend/new_data_formats.md +++ b/tensorflow/docs_src/extend/new_data_formats.md @@ -167,7 +167,7 @@ REGISTER_KERNEL_BUILDER(Name("TextLineReader").Device(DEVICE_CPU), ``` The last step is to add the Python wrapper. You can either do this by -@{$adding_an_op#building_the_op_library$compiling a dynamic library} +@{$adding_an_op#build_the_op_library$compiling a dynamic library} or, if you are building TensorFlow from source, adding to `user_ops.py`. For the latter, you will import `tensorflow.python.ops.io_ops` in [`tensorflow/python/user_ops/user_ops.py`](https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py) -- GitLab From 70cd9ed2d2ea37a6da6f813a99b32c03e90736a4 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Sun, 18 Mar 2018 14:35:24 -0700 Subject: [PATCH 160/960] Fix random_uniform documentation formatting (#17805) --- tensorflow/python/ops/random_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index db8159579a..6a2dd3f1cd 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -209,7 +209,7 @@ def random_uniform(shape, maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on the range of random values to generate. Defaults to 1 if `dtype` is floating point. - dtype: The type of the output: 'float16`, `float32`, `float64`, `int32`, + dtype: The type of the output: `float16`, `float32`, `float64`, `int32`, or `int64`. seed: A Python integer. Used to create a random seed for the distribution. See @{tf.set_random_seed} -- GitLab From 838a8f54f92452a15e3bb62a23ad5cd67e86933f Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Sun, 18 Mar 2018 18:18:47 -0400 Subject: [PATCH 161/960] Support TensorArray in BeamSearchDecoder state. (#13312) * Support TensorArray in BeamSearchDecoder state. * Use gather_nd for reordering and test more shapes. * Add a flag to disable TensorArrays reordering. * Add shape checks before reordering a TensorArray. * Directly use float32 member of dtypes * Directly access dimension value if defined * Add more TensorArrays reordering constraints * Do not unstack reordered TensorArrays * Improve warning for ignored TensorArrays * Consistent static and runtime dimensions check * Use comparison operators * Fix dynamic checks and add tests * Make static checks error a warning * Fix pylint errors --- .../kernel_tests/attention_wrapper_test.py | 3 + .../kernel_tests/beam_search_decoder_test.py | 104 ++++++++++- .../seq2seq/python/ops/attention_wrapper.py | 19 +- .../seq2seq/python/ops/beam_search_decoder.py | 169 +++++++++++++++++- 4 files changed, 280 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index b427dff88b..c4139dde49 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -222,6 +222,9 @@ class AttentionWrapperTest(test.TestCase): self.assertEqual( (None, batch_size, None), tuple(state_alignment_history.get_shape().as_list())) + nest.assert_same_structure( + cell.state_size, + cell.zero_state(batch_size, dtypes.float32)) # Remove the history from final_state for purposes of the # remainder of the tests. final_state = final_state._replace(alignment_history=()) # pylint: disable=protected-access diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py index 9265540317..178328619f 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py @@ -27,6 +27,7 @@ from tensorflow.contrib.seq2seq.python.ops import beam_search_ops from tensorflow.contrib.seq2seq.python.ops import decoder from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.layers import core as layers_core from tensorflow.python.ops import array_ops @@ -70,6 +71,98 @@ class TestGatherTree(test.TestCase): self.assertAllEqual(expected_result, res_) + def _test_gather_tree_from_array(self, + depth_ndims=0, + merged_batch_beam=False): + array = np.array( + [[[1, 2, 3], [4, 5, 6], [7, 8, 9], [0, 0, 0]], + [[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 0]]]).transpose([1, 0, 2]) + parent_ids = np.array( + [[[0, 0, 0], [0, 1, 1], [2, 1, 2], [-1, -1, -1]], + [[0, 0, 0], [1, 1, 0], [2, 0, 1], [0, 1, 0]]]).transpose([1, 0, 2]) + expected_array = np.array( + [[[2, 2, 2], [6, 5, 6], [7, 8, 9], [0, 0, 0]], + [[2, 3, 2], [7, 5, 7], [8, 9, 8], [11, 12, 0]]]).transpose([1, 0, 2]) + sequence_length = [[3, 3, 3], [4, 4, 3]] + + array = ops.convert_to_tensor( + array, dtype=dtypes.float32) + parent_ids = ops.convert_to_tensor( + parent_ids, dtype=dtypes.int32) + expected_array = ops.convert_to_tensor( + expected_array, dtype=dtypes.float32) + + max_time = array_ops.shape(array)[0] + batch_size = array_ops.shape(array)[1] + beam_width = array_ops.shape(array)[2] + + def _tile_in_depth(tensor): + # Generate higher rank tensors by concatenating tensor and tensor + 1. + for _ in range(depth_ndims): + tensor = array_ops.stack([tensor, tensor + 1], -1) + return tensor + + if merged_batch_beam: + array = array_ops.reshape( + array, [max_time, batch_size * beam_width]) + expected_array = array_ops.reshape( + expected_array, [max_time, batch_size * beam_width]) + + if depth_ndims > 0: + array = _tile_in_depth(array) + expected_array = _tile_in_depth(expected_array) + + sorted_array = beam_search_decoder.gather_tree_from_array( + array, parent_ids, sequence_length) + + with self.test_session() as sess: + sorted_array = sess.run(sorted_array) + expected_array = sess.run(expected_array) + self.assertAllEqual(expected_array, sorted_array) + + def test_gather_tree_from_array_scalar(self): + self._test_gather_tree_from_array() + + def test_gather_tree_from_array_1d(self): + self._test_gather_tree_from_array(depth_ndims=1) + + def test_gather_tree_from_array_1d_with_merged_batch_beam(self): + self._test_gather_tree_from_array(depth_ndims=1, merged_batch_beam=True) + + def test_gather_tree_from_array_2d(self): + self._test_gather_tree_from_array(depth_ndims=2) + + +class TestArrayShapeChecks(test.TestCase): + + def _test_array_shape_dynamic_checks(self, static_shape, dynamic_shape, + batch_size, beam_width, is_valid=True): + t = array_ops.placeholder_with_default( + np.random.randn(*static_shape).astype(np.float32), + shape=dynamic_shape) + + batch_size = array_ops.constant(batch_size) + check_op = beam_search_decoder._check_batch_beam(t, batch_size, beam_width) # pylint: disable=protected-access + + with self.test_session() as sess: + if is_valid: + sess.run(check_op) + else: + with self.assertRaises(errors.InvalidArgumentError): + sess.run(check_op) + + def test_array_shape_dynamic_checks(self): + self._test_array_shape_dynamic_checks( + (8, 4, 5, 10), (None, None, 5, 10), 4, 5, is_valid=True) + self._test_array_shape_dynamic_checks( + (8, 20, 10), (None, None, 10), 4, 5, is_valid=True) + self._test_array_shape_dynamic_checks( + (8, 21, 10), (None, None, 10), 4, 5, is_valid=False) + self._test_array_shape_dynamic_checks( + (8, 4, 6, 10), (None, None, None, 10), 4, 5, is_valid=False) + self._test_array_shape_dynamic_checks( + (8, 4), (None, None), 4, 5, is_valid=False) + class TestEosMasking(test.TestCase): """Tests EOS masking used in beam search.""" @@ -319,7 +412,8 @@ class TestLargeBeamStep(test.TestCase): class BeamSearchDecoderTest(test.TestCase): - def _testDynamicDecodeRNN(self, time_major, has_attention): + def _testDynamicDecodeRNN(self, time_major, has_attention, + with_alignment_history=False): encoder_sequence_length = np.array([3, 2, 3, 1, 1]) decoder_sequence_length = np.array([2, 0, 1, 2, 3]) batch_size = 5 @@ -359,7 +453,7 @@ class BeamSearchDecoderTest(test.TestCase): cell=cell, attention_mechanism=attention_mechanism, attention_layer_size=attention_depth, - alignment_history=False) + alignment_history=with_alignment_history) cell_state = cell.zero_state( dtype=dtypes.float32, batch_size=batch_size_tensor * beam_width) if has_attention: @@ -420,6 +514,12 @@ class BeamSearchDecoderTest(test.TestCase): def testDynamicDecodeRNNBatchMajorYesAttention(self): self._testDynamicDecodeRNN(time_major=False, has_attention=True) + def testDynamicDecodeRNNBatchMajorYesAttentionWithAlignmentHistory(self): + self._testDynamicDecodeRNN( + time_major=False, + has_attention=True, + with_alignment_history=True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index f8da5a3e17..9ff8a343f1 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -1278,7 +1278,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): attention_state=self._item_or_tuple( a.state_size for a in self._attention_mechanisms), alignment_history=self._item_or_tuple( - () for _ in self._attention_mechanisms)) # sometimes a TensorArray + a.alignments_size if self._alignment_history else () + for a in self._attention_mechanisms)) # sometimes a TensorArray def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. @@ -1318,22 +1319,26 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) + initial_alignments = [ + attention_mechanism.initial_alignments(batch_size, dtype) + for attention_mechanism in self._attention_mechanisms] return AttentionWrapperState( cell_state=cell_state, time=array_ops.zeros([], dtype=dtypes.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), - alignments=self._item_or_tuple( - attention_mechanism.initial_alignments(batch_size, dtype) - for attention_mechanism in self._attention_mechanisms), + alignments=self._item_or_tuple(initial_alignments), attention_state=self._item_or_tuple( attention_mechanism.initial_state(batch_size, dtype) for attention_mechanism in self._attention_mechanisms), alignment_history=self._item_or_tuple( - tensor_array_ops.TensorArray(dtype=dtype, size=0, - dynamic_size=True) + tensor_array_ops.TensorArray( + dtype, + size=0, + dynamic_size=True, + element_shape=alignment.shape) if self._alignment_history else () - for _ in self._attention_mechanisms)) + for alignment in initial_alignments)) def call(self, inputs, state): """Perform a step of attention-wrapped RNN. diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 6adbb8be40..a26107b0d7 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.platform import tf_logging from tensorflow.python.util import nest __all__ = [ @@ -121,14 +122,114 @@ def tile_batch(t, multiplier, name=None): return nest.map_structure(lambda t_: _tile_batch(t_, multiplier), t) +def gather_tree_from_array(t, parent_ids, sequence_length): + """Calculates the full beams for `TensorArray`s. + + Args: + t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of + shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` + where `s` is the depth shape. + parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. + sequence_length: The sequence length of shape `[batch_size, beam_width]`. + + Returns: + A `Tensor` which is a stacked `TensorArray` of the same size and type as + `t` and where beams are sorted in each `Tensor` according to `parent_ids`. + """ + max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] + batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] + beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] + + # Generate beam ids that will be reordered by gather_tree. + beam_ids = array_ops.expand_dims( + array_ops.expand_dims(math_ops.range(beam_width), 0), 0) + beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) + + mask = array_ops.sequence_mask( + sequence_length, maxlen=max_time, dtype=dtypes.int32) + mask = array_ops.transpose(mask, perm=[2, 0, 1]) + + # Use beam_width + 1 to mark the end of beam. + masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) + + max_sequence_lengths = math_ops.to_int32( + math_ops.reduce_max(sequence_length, axis=1)) + sorted_beam_ids = beam_search_ops.gather_tree( + step_ids=masked_beam_ids, + parent_ids=parent_ids, + max_sequence_lengths=max_sequence_lengths, + end_token=beam_width + 1) + + # For out of range steps, simply copy the same beam. + sorted_beam_ids = array_ops.where( + math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) + + # Generate indices for gather_nd. + time_ind = array_ops.tile(array_ops.reshape( + math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) + batch_ind = array_ops.tile(array_ops.reshape( + math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) + batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) + indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) + + # Gather from a tensor with collapsed additional dimensions. + gather_from = t + final_shape = array_ops.shape(gather_from) + gather_from = array_ops.reshape( + gather_from, [max_time, batch_size, beam_width, -1]) + ordered = array_ops.gather_nd(gather_from, indices) + ordered = array_ops.reshape(ordered, final_shape) + + return ordered + + def _check_maybe(t): - if isinstance(t, tensor_array_ops.TensorArray): - raise TypeError( - "TensorArray state is not supported by BeamSearchDecoder: %s" % t.name) if t.shape.ndims is None: raise ValueError( "Expected tensor (%s) to have known rank, but ndims == None." % t) +def _check_static_batch_beam_maybe(shape, batch_size, beam_width): + """Raises an exception if dimensions are known statically and can not be + reshaped to [batch_size, beam_size, -1]. + """ + reshaped_shape = tensor_shape.TensorShape([batch_size, beam_width, None]) + if (batch_size is not None and shape[0].value is not None + and (shape[0] != batch_size * beam_width + or (shape.ndims >= 2 and shape[1].value is not None + and (shape[0] != batch_size or shape[1] != beam_width)))): + tf_logging.warn("TensorArray reordering expects elements to be " + "reshapable to %s which is incompatible with the " + "current shape %s. Consider setting " + "reorder_tensor_arrays to False to disable TensorArray " + "reordering during the beam search." + % (reshaped_shape, shape)) + return False + return True + +def _check_batch_beam(t, batch_size, beam_width): + """Returns an Assert operation checking that the elements of the stacked + TensorArray can be reshaped to [batch_size, beam_size, -1]. At this point, + the TensorArray elements have a known rank of at least 1. + """ + error_message = ("TensorArray reordering expects elements to be " + "reshapable to [batch_size, beam_size, -1] which is " + "incompatible with the dynamic shape of %s elements. " + "Consider setting reorder_tensor_arrays to False to disable " + "TensorArray reordering during the beam search." + % (t.name)) + rank = t.shape.ndims + shape = array_ops.shape(t) + if rank == 2: + condition = math_ops.equal(shape[1], batch_size * beam_width) + else: + condition = math_ops.logical_or( + math_ops.equal(shape[1], batch_size * beam_width), + math_ops.logical_and( + math_ops.equal(shape[1], batch_size), + math_ops.equal(shape[2], beam_width))) + return control_flow_ops.Assert(condition, [error_message]) + + class BeamSearchDecoder(decoder.Decoder): """BeamSearch sampling decoder. @@ -173,7 +274,8 @@ class BeamSearchDecoder(decoder.Decoder): initial_state, beam_width, output_layer=None, - length_penalty_weight=0.0): + length_penalty_weight=0.0, + reorder_tensor_arrays=True): """Initialize the BeamSearchDecoder. Args: @@ -188,6 +290,12 @@ class BeamSearchDecoder(decoder.Decoder): `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. + reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell + state will be reordered according to the beam search path. If the + `TensorArray` can be reordered, the stacked form will be returned. + Otherwise, the `TensorArray` will be returned as is. Set this flag to + `False` if the cell state contains `TensorArray`s that are not amenable + to reordering. Raises: TypeError: if `cell` is not an instance of `RNNCell`, @@ -202,6 +310,7 @@ class BeamSearchDecoder(decoder.Decoder): "output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer + self._reorder_tensor_arrays = reorder_tensor_arrays if callable(embedding): self._embedding_fn = embedding @@ -342,6 +451,11 @@ class BeamSearchDecoder(decoder.Decoder): outputs.parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=self._end_token) + if self._reorder_tensor_arrays: + final_state = final_state._replace(cell_state=nest.map_structure( + lambda t: self._maybe_sort_array_beams( + t, outputs.parent_ids, final_state.lengths), + final_state.cell_state)) outputs = FinalBeamSearchDecoderOutput( beam_search_decoder_output=outputs, predicted_ids=predicted_ids) return outputs, final_state @@ -432,9 +546,10 @@ class BeamSearchDecoder(decoder.Decoder): returned unchanged. Raises: - TypeError: If `t` is an instance of `TensorArray`. ValueError: If the rank of `t` is not statically known. """ + if isinstance(t, tensor_array_ops.TensorArray): + return t _check_maybe(t) if t.shape.ndims >= 1: return self._split_batch_beams(t, s) @@ -455,15 +570,55 @@ class BeamSearchDecoder(decoder.Decoder): A reshaped version of t with shape `[batch_size, beam_width] + s`. Raises: - TypeError: If `t` is an instance of `TensorArray`. ValueError: If the rank of `t` is not statically known. """ + if isinstance(t, tensor_array_ops.TensorArray): + return t _check_maybe(t) if t.shape.ndims >= 2: return self._merge_batch_beams(t, s) else: return t + def _maybe_sort_array_beams(self, t, parent_ids, sequence_length): + """Maybe sorts beams within a `TensorArray`. + + Args: + t: A `TensorArray` of size `max_time` that contains `Tensor`s of shape + `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where + `s` is the depth shape. + parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. + sequence_length: The sequence length of shape `[batch_size, beam_width]`. + + Returns: + A `TensorArray` where beams are sorted in each `Tensor` or `t` itself if + it is not a `TensorArray` or does not meet shape requirements. + """ + if not isinstance(t, tensor_array_ops.TensorArray): + return t + # pylint: disable=protected-access + if (not t._infer_shape or not t._element_shape + or t._element_shape[0].ndims is None + or t._element_shape[0].ndims < 1): + shape = ( + t._element_shape[0] if t._infer_shape and t._element_shape + else tensor_shape.TensorShape(None)) + tf_logging.warn("The TensorArray %s in the cell state is not amenable to " + "sorting based on the beam search result. For a " + "TensorArray to be sorted, its elements shape must be " + "defined and have at least a rank of 1, but saw shape: %s" + % (t.handle.name, shape)) + return t + shape = t._element_shape[0] + # pylint: enable=protected-access + if not _check_static_batch_beam_maybe( + shape, tensor_util.constant_value(self._batch_size), self._beam_width): + return t + t = t.stack() + with ops.control_dependencies( + [_check_batch_beam(t, self._batch_size, self._beam_width)]): + return gather_tree_from_array(t, parent_ids, sequence_length) + def step(self, time, inputs, state, name=None): """Perform a decoding step. @@ -758,6 +913,8 @@ def _maybe_tensor_gather_helper(gather_indices, gather_from, batch_size, output: Gathered tensor of shape tf.shape(gather_from)[:1+len(gather_shape)] or the original tensor if its dimensions are too small. """ + if isinstance(gather_from, tensor_array_ops.TensorArray): + return gather_from _check_maybe(gather_from) if gather_from.shape.ndims >= len(gather_shape): return _tensor_gather_helper( -- GitLab From d28d4f4366b24876862b39351f67eed78c87f5eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 18 Mar 2018 15:18:06 -0700 Subject: [PATCH 162/960] Add precision and recall metrics to _BinaryLogisticHeadWithSigmoidCrossEntropyLoss. This change makes most of the binary classifiers in the canned estimators provide precision and recall metrics during evaluation. This matches the behavior of the canned estimators defined in the deprecated tf.contrib.learn.estimator. PiperOrigin-RevId: 189522420 --- .../python/estimator/canned/baseline_test.py | 7 ++++++- .../python/estimator/canned/dnn_testing_utils.py | 3 +++ tensorflow/python/estimator/canned/head.py | 12 ++++++++++++ tensorflow/python/estimator/canned/head_test.py | 14 +++++++++++++- .../estimator/canned/linear_testing_utils.py | 6 ++++++ tensorflow/python/estimator/canned/metric_keys.py | 2 ++ 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py index 96639e88ea..7833df2052 100644 --- a/tensorflow/python/estimator/canned/baseline_test.py +++ b/tensorflow/python/estimator/canned/baseline_test.py @@ -1071,6 +1071,8 @@ class BaselineClassifierEvaluationTest(test.TestCase): ops.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: 1.3133, metric_keys.MetricKeys.ACCURACY: 0., + metric_keys.MetricKeys.PRECISION: 0., + metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, @@ -1132,6 +1134,8 @@ class BaselineClassifierEvaluationTest(test.TestCase): ops.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0.5, + metric_keys.MetricKeys.PRECISION: 0., + metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, @@ -1207,6 +1211,8 @@ class BaselineClassifierEvaluationTest(test.TestCase): ops.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.), + metric_keys.MetricKeys.PRECISION: 0., + metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, metric_keys.MetricKeys.LABEL_MEAN: label_mean, metric_keys.MetricKeys.ACCURACY_BASELINE: ( @@ -1542,4 +1548,3 @@ class BaselineLogitFnTest(test.TestCase): if __name__ == '__main__': test.main() - diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py index 9a7d088778..85b058caf3 100644 --- a/tensorflow/python/estimator/canned/dnn_testing_utils.py +++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py @@ -1035,6 +1035,8 @@ class BaseDNNClassifierEvaluateTest(object): metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2., metric_keys.MetricKeys.ACCURACY: 0.5, + metric_keys.MetricKeys.PRECISION: 0.0, + metric_keys.MetricKeys.RECALL: 0.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.11105597, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, @@ -1042,6 +1044,7 @@ class BaseDNNClassifierEvaluateTest(object): # that is what the algorithm returns. metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.75, + ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 8d742a2c61..f68204a35e 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -940,6 +940,18 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): predictions=class_ids, weights=weights, name=keys.ACCURACY), + _summary_key(self._name, keys.PRECISION): + metrics_lib.precision( + labels=labels, + predictions=class_ids, + weights=weights, + name=keys.PRECISION), + _summary_key(self._name, keys.RECALL): + metrics_lib.recall( + labels=labels, + predictions=class_ids, + weights=weights, + name=keys.RECALL), _summary_key(self._name, keys.PREDICTION_MEAN): _predictions_mean( predictions=logistic, diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index b40758f8fe..b5d35c9b45 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -1559,6 +1559,8 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): # loss_mean = loss/2 = 41./2 = 20.5 keys.LOSS_MEAN: 20.5, keys.ACCURACY: 1./2, + keys.PRECISION: 1., + keys.RECALL: 1./2, keys.PREDICTION_MEAN: 1./2, keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, @@ -1602,11 +1604,13 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): expected_metric_keys = [ '{}/some_binary_head'.format(metric_keys.MetricKeys.LOSS_MEAN), '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY), + '{}/some_binary_head'.format(metric_keys.MetricKeys.PRECISION), + '{}/some_binary_head'.format(metric_keys.MetricKeys.RECALL), '{}/some_binary_head'.format(metric_keys.MetricKeys.PREDICTION_MEAN), '{}/some_binary_head'.format(metric_keys.MetricKeys.LABEL_MEAN), '{}/some_binary_head'.format(metric_keys.MetricKeys.ACCURACY_BASELINE), '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC), - '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC_PR) + '{}/some_binary_head'.format(metric_keys.MetricKeys.AUC_PR), ] self.assertItemsEqual(expected_metric_keys, spec.eval_metric_ops.keys()) @@ -1637,6 +1641,8 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LOSS_MEAN: expected_unregularized_loss, keys.LOSS_REGULARIZATION: expected_regularization_loss, keys.ACCURACY: 1./2, + keys.PRECISION: 1., + keys.RECALL: 1./2, keys.PREDICTION_MEAN: 1./2, keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, @@ -1742,6 +1748,8 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): expected_metrics = { keys.LOSS_MEAN: 1.62652338 / 2., keys.ACCURACY: 1./2, + keys.PRECISION: 1., + keys.RECALL: .5, keys.PREDICTION_MEAN: 1./2, keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, @@ -2187,6 +2195,8 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LOSS_MEAN: 26.9615384615, # accuracy = (1*1 + .1*0 + 1.5*0)/(1 + .1 + 1.5) = 1/2.6 = .38461538461 keys.ACCURACY: .38461538461, + keys.PRECISION: 1./2.5, + keys.RECALL: 1./1.1, # prediction_mean = (1*1 + .1*0 + 1.5*1)/(1 + .1 + 1.5) = 2.5/2.6 # = .96153846153 keys.PREDICTION_MEAN: .96153846153, @@ -2486,6 +2496,8 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): expected_metrics = { keys.LOSS_MEAN: expected_loss / np.sum(weights), keys.ACCURACY: (1.*0. + 1.5*1. + 2.*1. + 2.5*0.) / np.sum(weights), + keys.PRECISION: 2.0/3.0, + keys.RECALL: 2.0/4.5, keys.PREDICTION_MEAN: (1.*1 + 1.5*0 + 2.*1 + 2.5*0) / np.sum(weights), keys.LABEL_MEAN: (1.*0 + 1.5*0 + 2.*1 + 2.5*1) / np.sum(weights), keys.ACCURACY_BASELINE: (1.*0 + 1.5*0 + 2.*1 + 2.5*1) / np.sum(weights), diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py index 8e506a7631..da3ce86999 100644 --- a/tensorflow/python/estimator/canned/linear_testing_utils.py +++ b/tensorflow/python/estimator/canned/linear_testing_utils.py @@ -1337,6 +1337,8 @@ class BaseLinearClassifierEvaluationTest(object): ops.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: 41., metric_keys.MetricKeys.ACCURACY: 0., + metric_keys.MetricKeys.PRECISION: 0., + metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0., metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, @@ -1406,6 +1408,8 @@ class BaseLinearClassifierEvaluationTest(object): ops.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0., + metric_keys.MetricKeys.PRECISION: 0., + metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: 0.5, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, @@ -1487,6 +1491,8 @@ class BaseLinearClassifierEvaluationTest(object): ops.GraphKeys.GLOBAL_STEP: 100, metric_keys.MetricKeys.LOSS_MEAN: loss_mean, metric_keys.MetricKeys.ACCURACY: 0., + metric_keys.MetricKeys.PRECISION: 0., + metric_keys.MetricKeys.RECALL: 0., metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean, metric_keys.MetricKeys.LABEL_MEAN: label_mean, metric_keys.MetricKeys.ACCURACY_BASELINE: ( diff --git a/tensorflow/python/estimator/canned/metric_keys.py b/tensorflow/python/estimator/canned/metric_keys.py index 44eb680939..f374d31549 100644 --- a/tensorflow/python/estimator/canned/metric_keys.py +++ b/tensorflow/python/estimator/canned/metric_keys.py @@ -28,6 +28,8 @@ class MetricKeys(object): LOSS_REGULARIZATION = 'regularization_loss' ACCURACY = 'accuracy' + PRECISION = 'precision' + RECALL = 'recall' # This is the best the model could do by always predicting one class. # Should be < ACCURACY in a trained model. ACCURACY_BASELINE = 'accuracy_baseline' -- GitLab From d99731f28ab7566762da9b22cdc24486e3308a60 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Nov 2017 22:22:37 +0000 Subject: [PATCH 163/960] Add int64 support of `axis` (`Tidx`) for ConcatV2 In `array_ops.cc`, it was specified that ConcatV2 support both int32 and int64 data types of `axis` (`Tidx`): ``` .Attr("Tidx: {int32, int64} = DT_INT32") ``` However, in actual kernel implementations only int32 is supported as there is an unnecessary `.TypeConstraint("Tidx")` specified. This fix tries to address the discrepancy between the ops declaration and kernel registration by adding the int64 axis (`Tidx`) support for `ConcatV2`. This fix removes the TypeConstraint and adds additional processing so that differnt types (int32 or int64) of `axis` could be processed correctly. Additional test cases have been added to cover the changes as well. Signed-off-by: Yong Tang --- tensorflow/core/kernels/concat_op.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc index 7011550f7e..c4850150e1 100644 --- a/tensorflow/core/kernels/concat_op.cc +++ b/tensorflow/core/kernels/concat_op.cc @@ -62,8 +62,19 @@ class ConcatBaseOp : public OpKernel { axis_attribute_name, " tensor should be a scalar integer, but got shape ", concat_dim_tensor->shape().DebugString())); - const int32 concat_dim = - internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + int64 concat_dim; + // In case of ConcatV2, "axis" could be int32 or int64 + if (AxisArgName == NAME_IS_AXIS) { + OP_REQUIRES(c, (concat_dim_tensor->dtype() == DT_INT32 || concat_dim_tensor->dtype() == DT_INT64), errors::InvalidArgument(axis_attribute_name, " tensor should be int32 or int64, but got ", concat_dim_tensor->dtype())); + } else { + OP_REQUIRES(c, (concat_dim_tensor->dtype() == DT_INT32), errors::InvalidArgument(axis_attribute_name, " tensor should be int32, but got ", concat_dim_tensor->dtype())); + } + if (concat_dim_tensor->dtype() == DT_INT32) { + concat_dim = internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + } else { + concat_dim = internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + } + OpInputList values; OP_REQUIRES_OK(c, c->input_list("values", &values)); const int N = values.size(); @@ -163,7 +174,6 @@ using ConcatV2Op = ConcatBaseOp; REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ .HostMemory("axis"), \ ConcatV2Op) @@ -187,7 +197,6 @@ REGISTER_CONCAT(qint32); REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ .HostMemory("axis"), \ ConcatV2Op) @@ -212,7 +221,6 @@ REGISTER_KERNEL_BUILDER(Name("Concat") REGISTER_KERNEL_BUILDER(Name("ConcatV2") .Device(DEVICE_GPU) .TypeConstraint("T") - .TypeConstraint("Tidx") .HostMemory("values") .HostMemory("axis") .HostMemory("output"), @@ -230,7 +238,6 @@ REGISTER_KERNEL_BUILDER(Name("ConcatV2") REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ .Device(DEVICE_SYCL) \ .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ .HostMemory("axis"), \ ConcatV2Op) @@ -246,7 +253,6 @@ REGISTER_KERNEL_BUILDER(Name("Concat") REGISTER_KERNEL_BUILDER(Name("ConcatV2") .Device(DEVICE_SYCL) .TypeConstraint("T") - .TypeConstraint("Tidx") .HostMemory("values") .HostMemory("axis") .HostMemory("output"), -- GitLab From beff710f6230bc3c27ffe53a3d788bd6503359ac Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Nov 2017 22:27:43 +0000 Subject: [PATCH 164/960] Add test cases for int64 support of `axis` (`Tidx`) for ConcatV2 Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/concat_op_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 81c6a4aa6e..073611628c 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -606,6 +606,17 @@ class ConcatOpTest(test.TestCase): inp_tensors_placeholders, -2, output_shape=[2, 3], gather_indexes=[2, 0], feed_dict=feed_dict) + def testConcatAxisType(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + t1 = [[1, 2, 3], [4, 5, 6]] + t2 = [[7, 8, 9], [10, 11, 12]] + + c = gen_array_ops._concat_v2([t1, t2], + constant_op.constant(1, dtype=dtype)) + self.assertEqual([2, 6], c.get_shape().as_list()) + output = c.eval() + self.assertAllEqual([[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]], output) class ConcatOffsetTest(test.TestCase): -- GitLab From a508dcb2b732e8423794635630437b6c73deecba Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Nov 2017 22:28:36 +0000 Subject: [PATCH 165/960] Sanitize concat_op.cc with clang-format -i --style=Google Signed-off-by: Yong Tang --- tensorflow/core/kernels/concat_op.cc | 86 ++++++++++++++++------------ 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc index c4850150e1..f16766315f 100644 --- a/tensorflow/core/kernels/concat_op.cc +++ b/tensorflow/core/kernels/concat_op.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -28,6 +27,7 @@ limitations under the License. #include "tensorflow/core/kernels/concat_lib.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -53,9 +53,9 @@ class ConcatBaseOp : public OpKernel { void Compute(OpKernelContext* c) override { const Tensor* concat_dim_tensor; const char* axis_attribute_name = - AxisArgName == NAME_IS_AXIS - ? "axis" - : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : ""; + AxisArgName == NAME_IS_AXIS ? "axis" : AxisArgName == NAME_IS_CONCAT_DIM + ? "concat_dim" + : ""; OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor)); OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()), errors::InvalidArgument( @@ -65,14 +65,24 @@ class ConcatBaseOp : public OpKernel { int64 concat_dim; // In case of ConcatV2, "axis" could be int32 or int64 if (AxisArgName == NAME_IS_AXIS) { - OP_REQUIRES(c, (concat_dim_tensor->dtype() == DT_INT32 || concat_dim_tensor->dtype() == DT_INT64), errors::InvalidArgument(axis_attribute_name, " tensor should be int32 or int64, but got ", concat_dim_tensor->dtype())); + OP_REQUIRES( + c, (concat_dim_tensor->dtype() == DT_INT32 || + concat_dim_tensor->dtype() == DT_INT64), + errors::InvalidArgument(axis_attribute_name, + " tensor should be int32 or int64, but got ", + concat_dim_tensor->dtype())); } else { - OP_REQUIRES(c, (concat_dim_tensor->dtype() == DT_INT32), errors::InvalidArgument(axis_attribute_name, " tensor should be int32, but got ", concat_dim_tensor->dtype())); + OP_REQUIRES(c, (concat_dim_tensor->dtype() == DT_INT32), + errors::InvalidArgument(axis_attribute_name, + " tensor should be int32, but got ", + concat_dim_tensor->dtype())); } if (concat_dim_tensor->dtype() == DT_INT32) { - concat_dim = internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + concat_dim = + internal::SubtleMustCopy(concat_dim_tensor->scalar()()); } else { - concat_dim = internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + concat_dim = + internal::SubtleMustCopy(concat_dim_tensor->scalar()()); } OpInputList values; @@ -165,16 +175,16 @@ using ConcatOp = ConcatBaseOp; template using ConcatV2Op = ConcatBaseOp; -#define REGISTER_CONCAT(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .HostMemory("axis"), \ +#define REGISTER_CONCAT(type) \ + REGISTER_KERNEL_BUILDER(Name("Concat") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("concat_dim"), \ + ConcatOp) \ + REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ ConcatV2Op) TF_CALL_POD_STRING_TYPES(REGISTER_CONCAT); @@ -188,16 +198,16 @@ REGISTER_CONCAT(qint32); #if GOOGLE_CUDA -#define REGISTER_GPU(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("axis"), \ +#define REGISTER_GPU(type) \ + REGISTER_KERNEL_BUILDER(Name("Concat") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("concat_dim"), \ + ConcatOp) \ + REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ ConcatV2Op) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); @@ -229,16 +239,16 @@ REGISTER_KERNEL_BUILDER(Name("ConcatV2") #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("axis"), \ +#define REGISTER_SYCL(type) \ + REGISTER_KERNEL_BUILDER(Name("Concat") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .HostMemory("concat_dim"), \ + ConcatOp) \ + REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ ConcatV2Op) TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); -- GitLab From 343d30aec78b9f8f58a132988ae237e4fd9ce917 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 19 Mar 2018 00:06:38 +0000 Subject: [PATCH 166/960] Change `gen_array_ops._concat_v2` to gen_array_ops.concat_v2` as `_` is not needed any more. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/concat_op_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 073611628c..c22934ce47 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -612,8 +612,8 @@ class ConcatOpTest(test.TestCase): t1 = [[1, 2, 3], [4, 5, 6]] t2 = [[7, 8, 9], [10, 11, 12]] - c = gen_array_ops._concat_v2([t1, t2], - constant_op.constant(1, dtype=dtype)) + c = gen_array_ops.concat_v2([t1, t2], + constant_op.constant(1, dtype=dtype)) self.assertEqual([2, 6], c.get_shape().as_list()) output = c.eval() self.assertAllEqual([[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]], output) -- GitLab From 4b1b779b48aca2059319b9af20295e04d60fa1f1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 05:01:05 -0700 Subject: [PATCH 167/960] Add new helpers to HLO sharding. PiperOrigin-RevId: 189569053 --- tensorflow/compiler/xla/service/hlo_sharding.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index e715dff9a0..38273236f9 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -173,7 +173,7 @@ class HloSharding { bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && - protobuf_util::ProtobufEquals(tile_shape_, other.tile_shape_) && + ShapeUtil::Compatible(tile_shape_, other.tile_shape_) && tile_assignment_ == other.tile_assignment_ && tuple_elements_ == other.tuple_elements_; } @@ -207,6 +207,13 @@ class HloSharding { // REQUIRES: !IsReplicated() && !IsTuple() const Array& tile_assignment() const { return tile_assignment_; } + // Returns the flattened list of all the leaf shardings in a tuple shape, by + // pre-order walk (ShapeTree iterator order). + // REQUIRES: IsTuple(). + const std::vector& tuple_elements() const { + return tuple_elements_; + } + // Return a new sharding that can apply to the given new shape. // If this sharding is tile-maximal, the returned sharding will be the same as // this sharding. If this sharding is not tile-maximal, the returned -- GitLab From 8bd5da29ca4e502591fb38dfd27ecd86c9cef7ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 06:27:00 -0700 Subject: [PATCH 168/960] Adding non-linear image warping ops to tf.contrib.image New ops are: tf.contrib.image.sparse_image_warp, tf.contrib.image.dense_image_warp, and tf.contrib.image.interpolate_spline. PiperOrigin-RevId: 189574951 --- tensorflow/contrib/cmake/tf_tests.cmake | 2 + tensorflow/contrib/image/BUILD | 113 +++++++ tensorflow/contrib/image/__init__.py | 7 + .../kernel_tests/dense_image_warp_test.py | 267 ++++++++++++++++ .../kernel_tests/interpolate_spline_test.py | 264 ++++++++++++++++ .../kernel_tests/sparse_image_warp_test.py | 254 +++++++++++++++ .../test_data/Yellow_Smiley_Face.png | Bin 0 -> 14060 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-0.png | Bin 0 -> 18537 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-1.png | Bin 0 -> 19086 bytes ...llow_Smiley_Face_Warp-interp-1-clamp-4.png | Bin 0 -> 18884 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-0.png | Bin 0 -> 18109 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-1.png | Bin 0 -> 19251 bytes ...llow_Smiley_Face_Warp-interp-2-clamp-4.png | Bin 0 -> 19132 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-0.png | Bin 0 -> 17500 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-1.png | Bin 0 -> 18058 bytes ...llow_Smiley_Face_Warp-interp-3-clamp-4.png | Bin 0 -> 19313 bytes .../image/python/ops/dense_image_warp.py | 201 ++++++++++++ .../image/python/ops/interpolate_spline.py | 291 ++++++++++++++++++ .../image/python/ops/sparse_image_warp.py | 201 ++++++++++++ .../tools/pip_package/pip_smoke_test.py | 1 + 20 files changed, 1601 insertions(+) create mode 100644 tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py create mode 100644 tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py create mode 100644 tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-0.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-0.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png create mode 100644 tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-4.png create mode 100644 tensorflow/contrib/image/python/ops/dense_image_warp.py create mode 100644 tensorflow/contrib/image/python/ops/interpolate_spline.py create mode 100644 tensorflow/contrib/image/python/ops/sparse_image_warp.py diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 9f96a4b797..cdf48b3584 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -195,9 +195,11 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/profiler/model_analyzer_test.py" # Fails because uses data dependencies with bazel "${tensorflow_source_dir}/tensorflow/python/saved_model/saved_model_test.py" + "${tensorflow_source_dir}/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py" # requires scipy "${tensorflow_source_dir}/tensorflow/contrib/keras/python/keras/preprocessing/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/tfprof/python/tools/tfprof/pprof_profiler_test.py" + "${tensorflow_source_dir}/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py" # Takes very long to run without sharding (defined in bazel build file). "${tensorflow_source_dir}/tensorflow/python/kernel_tests/cwise_ops_test.py" # Loading resources in contrib doesn't seem to work on Windows diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 3ff02e085e..2924aef815 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -78,7 +78,10 @@ tf_custom_op_py_library( ], srcs_version = "PY2AND3", deps = [ + ":dense_image_warp_py", ":image_ops", + ":interpolate_spline_py", + ":sparse_image_warp_py", "//tensorflow/contrib/util:util_py", "//tensorflow/python:array_ops", "//tensorflow/python:common_shapes", @@ -194,6 +197,116 @@ cuda_py_test( ], ) +py_library( + name = "dense_image_warp_py", + srcs = [ + "python/ops/dense_image_warp.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + "//third_party/py/numpy", + ], +) + +py_library( + name = "interpolate_spline_py", + srcs = [ + "python/ops/interpolate_spline.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + ], +) + +py_library( + name = "sparse_image_warp_py", + srcs = [ + "python/ops/sparse_image_warp.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":dense_image_warp_py", + ":interpolate_spline_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + "//tensorflow/python:util", + ], +) + +cuda_py_test( + name = "sparse_image_warp_test", + size = "medium", + srcs = ["python/kernel_tests/sparse_image_warp_test.py"], + additional_deps = [ + ":sparse_image_warp_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + "//tensorflow/python:image_ops", + "//tensorflow/python:variables", + "//tensorflow/core:protos_all_py", + ], + data = [":sparse_image_warp_test_data"], +) + +filegroup( + name = "sparse_image_warp_test_data", + srcs = glob(["python/kernel_tests/test_data/*.png"]), +) + +cuda_py_test( + name = "dense_image_warp_test", + size = "medium", + srcs = ["python/kernel_tests/dense_image_warp_test.py"], + additional_deps = [ + ":dense_image_warp_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + "//tensorflow/python:image_ops", + "//tensorflow/python:variables", + "//tensorflow/core:protos_all_py", + ], +) + +cuda_py_test( + name = "interpolate_spline_test", + size = "medium", + srcs = ["python/kernel_tests/interpolate_spline_test.py"], + additional_deps = [ + ":interpolate_spline_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:image_ops", + "//tensorflow/python:variables", + "//tensorflow/core:protos_all_py", + ], +) + tf_py_test( name = "segmentation_test", size = "medium", diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index cc8ed117ba..e982030bc8 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -30,6 +30,9 @@ projective transforms (including rotation) are supported. @@transform @@translate @@translations_to_projective_transforms +@@dense_image_warp +@@interpolate_spline +@@sparse_image_warp ## Image Segmentation `Ops` @@ -47,6 +50,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.image.python.ops.dense_image_warp import dense_image_warp + from tensorflow.contrib.image.python.ops.distort_image_ops import adjust_hsv_in_yiq from tensorflow.contrib.image.python.ops.distort_image_ops import random_hsv_in_yiq @@ -57,7 +62,9 @@ from tensorflow.contrib.image.python.ops.image_ops import rotate from tensorflow.contrib.image.python.ops.image_ops import transform from tensorflow.contrib.image.python.ops.image_ops import translate from tensorflow.contrib.image.python.ops.image_ops import translations_to_projective_transforms +from tensorflow.contrib.image.python.ops.interpolate_spline import interpolate_spline from tensorflow.contrib.image.python.ops.single_image_random_dot_stereograms import single_image_random_dot_stereograms +from tensorflow.contrib.image.python.ops.sparse_image_warp import sparse_image_warp from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py b/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py new file mode 100644 index 0000000000..a58b6a247e --- /dev/null +++ b/tensorflow/contrib/image/python/kernel_tests/dense_image_warp_test.py @@ -0,0 +1,267 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for dense_image_warp.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import numpy as np + +from tensorflow.contrib.image.python.ops import dense_image_warp + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes + +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + +from tensorflow.python.training import adam + + +class DenseImageWarpTest(test_util.TensorFlowTestCase): + + def setUp(self): + np.random.seed(0) + + def test_interpolate_small_grid_ij(self): + grid = constant_op.constant( + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], shape=[1, 3, 3, 1]) + query_points = constant_op.constant( + [[0., 0.], [1., 0.], [2., 0.5], [1.5, 1.5]], shape=[1, 4, 2]) + expected_results = np.reshape(np.array([0., 3., 6.5, 6.]), [1, 4, 1]) + + interp = dense_image_warp._interpolate_bilinear(grid, query_points) + + with self.test_session() as sess: + predicted = sess.run(interp) + self.assertAllClose(expected_results, predicted) + + def test_interpolate_small_grid_xy(self): + grid = constant_op.constant( + [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]], shape=[1, 3, 3, 1]) + query_points = constant_op.constant( + [[0., 0.], [0., 1.], [0.5, 2.0], [1.5, 1.5]], shape=[1, 4, 2]) + expected_results = np.reshape(np.array([0., 3., 6.5, 6.]), [1, 4, 1]) + + interp = dense_image_warp._interpolate_bilinear( + grid, query_points, indexing='xy') + + with self.test_session() as sess: + predicted = sess.run(interp) + self.assertAllClose(expected_results, predicted) + + def test_interpolate_small_grid_batched(self): + grid = constant_op.constant( + [[[0., 1.], [3., 4.]], [[5., 6.], [7., 8.]]], shape=[2, 2, 2, 1]) + query_points = constant_op.constant([[[0., 0.], [1., 0.], [0.5, 0.5]], + [[0.5, 0.], [1., 0.], [1., 1.]]]) + expected_results = np.reshape( + np.array([[0., 3., 2.], [6., 7., 8.]]), [2, 3, 1]) + + interp = dense_image_warp._interpolate_bilinear(grid, query_points) + + with self.test_session() as sess: + predicted = sess.run(interp) + self.assertAllClose(expected_results, predicted) + + def get_image_and_flow_placeholders(self, shape, image_type, flow_type): + batch_size, height, width, numchannels = shape + image_shape = [batch_size, height, width, numchannels] + flow_shape = [batch_size, height, width, 2] + + tf_type = { + 'float16': dtypes.half, + 'float32': dtypes.float32, + 'float64': dtypes.float64 + } + + image = array_ops.placeholder(dtype=tf_type[image_type], shape=image_shape) + + flows = array_ops.placeholder(dtype=tf_type[flow_type], shape=flow_shape) + return image, flows + + def get_random_image_and_flows(self, shape, image_type, flow_type): + batch_size, height, width, numchannels = shape + image_shape = [batch_size, height, width, numchannels] + image = np.random.normal(size=image_shape) + flow_shape = [batch_size, height, width, 2] + flows = np.random.normal(size=flow_shape) * 3 + return image.astype(image_type), flows.astype(flow_type) + + def assert_correct_interpolation_value(self, + image, + flows, + pred_interpolation, + batch_index, + y_index, + x_index, + low_precision=False): + """Assert that the tf interpolation matches hand-computed value.""" + + height = image.shape[1] + width = image.shape[2] + displacement = flows[batch_index, y_index, x_index, :] + float_y = y_index - displacement[0] + float_x = x_index - displacement[1] + floor_y = max(min(height - 2, math.floor(float_y)), 0) + floor_x = max(min(width - 2, math.floor(float_x)), 0) + ceil_y = floor_y + 1 + ceil_x = floor_x + 1 + + alpha_y = min(max(0.0, float_y - floor_y), 1.0) + alpha_x = min(max(0.0, float_x - floor_x), 1.0) + + floor_y = int(floor_y) + floor_x = int(floor_x) + ceil_y = int(ceil_y) + ceil_x = int(ceil_x) + + top_left = image[batch_index, floor_y, floor_x, :] + top_right = image[batch_index, floor_y, ceil_x, :] + bottom_left = image[batch_index, ceil_y, floor_x, :] + bottom_right = image[batch_index, ceil_y, ceil_x, :] + + interp_top = alpha_x * (top_right - top_left) + top_left + interp_bottom = alpha_x * (bottom_right - bottom_left) + bottom_left + interp = alpha_y * (interp_bottom - interp_top) + interp_top + atol = 1e-6 + rtol = 1e-6 + if low_precision: + atol = 1e-2 + rtol = 1e-3 + self.assertAllClose( + interp, + pred_interpolation[batch_index, y_index, x_index, :], + atol=atol, + rtol=rtol) + + def check_zero_flow_correctness(self, shape, image_type, flow_type): + """Assert using zero flows doesn't change the input image.""" + + image, flows = self.get_image_and_flow_placeholders(shape, image_type, + flow_type) + interp = dense_image_warp.dense_image_warp(image, flows) + + with self.test_session() as sess: + rand_image, rand_flows = self.get_random_image_and_flows( + shape, image_type, flow_type) + rand_flows *= 0 + + predicted_interpolation = sess.run( + interp, feed_dict={ + image: rand_image, + flows: rand_flows + }) + self.assertAllClose(rand_image, predicted_interpolation) + + def test_zero_flows(self): + """Apply check_zero_flow_correctness() for a few sizes and types.""" + + shapes_to_try = [[3, 4, 5, 6], [1, 2, 2, 1]] + for shape in shapes_to_try: + self.check_zero_flow_correctness( + shape, image_type='float32', flow_type='float32') + + def check_interpolation_correctness(self, + shape, + image_type, + flow_type, + num_probes=5): + """Interpolate, and then assert correctness for a few query locations.""" + + image, flows = self.get_image_and_flow_placeholders(shape, image_type, + flow_type) + interp = dense_image_warp.dense_image_warp(image, flows) + low_precision = image_type == 'float16' or flow_type == 'float16' + with self.test_session() as sess: + rand_image, rand_flows = self.get_random_image_and_flows( + shape, image_type, flow_type) + + pred_interpolation = sess.run( + interp, feed_dict={ + image: rand_image, + flows: rand_flows + }) + + for _ in range(num_probes): + batch_index = np.random.randint(0, shape[0]) + y_index = np.random.randint(0, shape[1]) + x_index = np.random.randint(0, shape[2]) + + self.assert_correct_interpolation_value( + rand_image, + rand_flows, + pred_interpolation, + batch_index, + y_index, + x_index, + low_precision=low_precision) + + def test_interpolation(self): + """Apply check_interpolation_correctness() for a few sizes and types.""" + + shapes_to_try = [[3, 4, 5, 6], [1, 5, 5, 3], [1, 2, 2, 1]] + for im_type in ['float32', 'float64', 'float16']: + for flow_type in ['float32', 'float64', 'float16']: + for shape in shapes_to_try: + self.check_interpolation_correctness(shape, im_type, flow_type) + + def test_gradients_exist(self): + """Check that backprop can run. + + The correctness of the gradients is assumed, since the forward propagation + is tested to be correct and we only use built-in tf ops. + However, we perform a simple test to make sure that backprop can actually + run. We treat the flows as a tf.Variable and optimize them to minimize + the difference between the interpolated image and the input image. + """ + + batch_size, height, width, numchannels = [4, 5, 6, 7] + image_shape = [batch_size, height, width, numchannels] + image = random_ops.random_normal(image_shape) + flow_shape = [batch_size, height, width, 2] + init_flows = np.float32(np.random.normal(size=flow_shape) * 0.25) + flows = variables.Variable(init_flows) + + interp = dense_image_warp.dense_image_warp(image, flows) + loss = math_ops.reduce_mean(math_ops.square(interp - image)) + + optimizer = adam.AdamOptimizer(1.0) + grad = gradients.gradients(loss, [flows]) + opt_func = optimizer.apply_gradients(zip(grad, [flows])) + init_op = variables.global_variables_initializer() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(10): + sess.run(opt_func) + + def test_size_exception(self): + """Make sure it throws an exception for images that are too small.""" + + shape = [1, 2, 1, 1] + msg = 'Should have raised an exception for invalid image size' + with self.assertRaises(ValueError, msg=msg): + self.check_interpolation_correctness(shape, 'float32', 'float32') + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py b/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py new file mode 100644 index 0000000000..1939caaa2d --- /dev/null +++ b/tensorflow/contrib/image/python/kernel_tests/interpolate_spline_test.py @@ -0,0 +1,264 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for interpolate_spline.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from scipy import interpolate as sc_interpolate + +from tensorflow.contrib.image.python.ops import interpolate_spline + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util + +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + +from tensorflow.python.training import momentum + + +class _InterpolationProblem(object): + """Abstract class for interpolation problem descriptions.""" + + def get_problem(self, optimizable=False, extrapolate=True, dtype='float32'): + """Make data for an interpolation problem where all x vectors are n-d. + + Args: + optimizable: If True, then make train_points a tf.Variable. + extrapolate: If False, then clamp the query_points values to be within + the max and min of train_points. + dtype: The data type to use. + + Returns: + query_points, query_values, train_points, train_values: training and + test tensors for interpolation problem + """ + + # The values generated here depend on a seed of 0. + np.random.seed(0) + + batch_size = 1 + num_training_points = 10 + num_query_points = 4 + + init_points = np.random.uniform( + size=[batch_size, num_training_points, self.DATA_DIM]) + + init_points = init_points.astype(dtype) + train_points = ( + variables.Variable(init_points) + if optimizable else constant_op.constant(init_points)) + train_values = self.tf_function(train_points) + + query_points_np = np.random.uniform( + size=[batch_size, num_query_points, self.DATA_DIM]) + query_points_np = query_points_np.astype(dtype) + if not extrapolate: + query_points_np = np.clip(query_points_np, np.min(init_points), + np.max(init_points)) + + query_points = constant_op.constant(query_points_np) + query_values = self.np_function(query_points_np) + + return query_points, query_values, train_points, train_values + + +class _QuadraticPlusSinProblem1D(_InterpolationProblem): + """1D interpolation problem used for regression testing.""" + DATA_DIM = 1 + HARDCODED_QUERY_VALUES = { + (1.0, 0.0): [6.2647187603, -7.84362604077, -5.63690142322, 1.42928896387], + (1.0, + 0.01): [6.77688289946, -8.02163669853, -5.79491157027, 1.4063285693], + (2.0, + 0.0): [8.67110264937, -8.41281390883, -5.80190044693, 1.50155606059], + (2.0, + 0.01): [6.70797816797, -7.49709587663, -5.28965776238, 1.52284731741], + (3.0, + 0.0): [9.37691802935, -8.50390141515, -5.80786417426, 1.63467762122], + (3.0, + 0.01): [4.47106304758, -5.71266128361, -3.92529303296, 1.86755293857], + (4.0, + 0.0): [9.58172461111, -8.51432104771, -5.80967675388, 1.63361164256], + (4.0, 0.01): [ + -3.87902711352, -0.0253462273846, 1.79857618022, -0.769339675725 + ] + } + + def np_function(self, x): + """Takes np array, evaluates the test function, and returns np array.""" + return np.sum( + np.power((x - 0.5), 3) - 0.25 * x + 10 * np.sin(x * 10), + axis=2, + keepdims=True) + + def tf_function(self, x): + """Takes tf tensor, evaluates the test function, and returns tf tensor.""" + return math_ops.reduce_mean( + math_ops.pow((x - 0.5), 3) - 0.25 * x + 10 * math_ops.sin(x * 10), + 2, + keepdims=True) + + +class _QuadraticPlusSinProblemND(_InterpolationProblem): + """3D interpolation problem used for regression testing.""" + + DATA_DIM = 3 + HARDCODED_QUERY_VALUES = { + (1.0, 0.0): [1.06609663962, 1.28894849357, 1.10882405595, 1.63966936885], + (1.0, 0.01): [1.03123780748, 1.2952930985, 1.10366822954, 1.65265118569], + (2.0, 0.0): [0.627787735064, 1.43802857251, 1.00194632358, 1.91667538215], + (2.0, 0.01): [0.730159985046, 1.41702471595, 1.0065827217, 1.85758519312], + (3.0, 0.0): [0.350460417862, 1.67223539464, 1.00475331246, 2.31580322491], + (3.0, + 0.01): [0.624557250556, 1.63138876667, 0.976588193162, 2.12511237866], + (4.0, + 0.0): [0.898129669986, 1.24434133638, -0.938056116931, 1.59910338833], + (4.0, + 0.01): [0.0930360338179, -3.38791305538, -1.00969032567, 0.745535080382], + } + + def np_function(self, x): + """Takes np array, evaluates the test function, and returns np array.""" + return np.sum( + np.square(x - 0.5) + 0.25 * x + 1 * np.sin(x * 15), + axis=2, + keepdims=True) + + def tf_function(self, x): + """Takes tf tensor, evaluates the test function, and returns tf tensor.""" + return math_ops.reduce_sum( + math_ops.square(x - 0.5) + 0.25 * x + 1 * math_ops.sin(x * 15), + 2, + keepdims=True) + + +class InterpolateSplineTest(test_util.TensorFlowTestCase): + + def test_1d_linear_interpolation(self): + """For 1d linear interpolation, we can compare directly to scipy.""" + + tp = _QuadraticPlusSinProblem1D() + (query_points, _, train_points, train_values) = tp.get_problem( + extrapolate=False, dtype='float64') + interpolation_order = 1 + + with ops.name_scope('interpolator'): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, interpolation_order) + with self.test_session() as sess: + fetches = [query_points, train_points, train_values, interpolator] + query_points_, train_points_, train_values_, interp_ = sess.run(fetches) + + # Just look at the first element of the minibatch. + # Also, trim the final singleton dimension. + interp_ = interp_[0, :, 0] + query_points_ = query_points_[0, :, 0] + train_points_ = train_points_[0, :, 0] + train_values_ = train_values_[0, :, 0] + + # Compute scipy interpolation. + scipy_interp_function = sc_interpolate.interp1d( + train_points_, train_values_, kind='linear') + + scipy_interpolation = scipy_interp_function(query_points_) + scipy_interpolation_on_train = scipy_interp_function(train_points_) + + # Even with float64 precision, the interpolants disagree with scipy a + # bit due to the fact that we add the EPSILON to prevent sqrt(0), etc. + tol = 1e-3 + + self.assertAllClose( + train_values_, scipy_interpolation_on_train, atol=tol, rtol=tol) + self.assertAllClose(interp_, scipy_interpolation, atol=tol, rtol=tol) + + def test_1d_interpolation(self): + """Regression test for interpolation with 1-D points.""" + + tp = _QuadraticPlusSinProblem1D() + (query_points, _, train_points, + train_values) = tp.get_problem(dtype='float64') + + for order in (1, 2, 3): + for reg_weight in (0, 0.01): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, order, reg_weight) + + target_interpolation = tp.HARDCODED_QUERY_VALUES[(order, reg_weight)] + target_interpolation = np.array(target_interpolation) + with self.test_session() as sess: + interp_val = sess.run(interpolator) + self.assertAllClose(interp_val[0, :, 0], target_interpolation) + + def test_nd_linear_interpolation(self): + """Regression test for interpolation with N-D points.""" + + tp = _QuadraticPlusSinProblemND() + (query_points, _, train_points, + train_values) = tp.get_problem(dtype='float64') + + for order in (1, 2, 3): + for reg_weight in (0, 0.01): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, order, reg_weight) + + target_interpolation = tp.HARDCODED_QUERY_VALUES[(order, reg_weight)] + target_interpolation = np.array(target_interpolation) + with self.test_session() as sess: + interp_val = sess.run(interpolator) + self.assertAllClose(interp_val[0, :, 0], target_interpolation) + + def test_interpolation_gradient(self): + """Make sure that backprop can run. Correctness of gradients is assumed. + + Here, we create a use a small 'training' set and a more densely-sampled + set of query points, for which we know the true value in advance. The goal + is to choose x locations for the training data such that interpolating using + this training data yields the best reconstruction for the function + values at the query points. The training data locations are optimized + iteratively using gradient descent. + """ + tp = _QuadraticPlusSinProblemND() + (query_points, query_values, train_points, + train_values) = tp.get_problem(optimizable=True) + + regularization = 0.001 + for interpolation_order in (1, 2, 3, 4): + interpolator = interpolate_spline.interpolate_spline( + train_points, train_values, query_points, interpolation_order, + regularization) + + loss = math_ops.reduce_mean(math_ops.square(query_values - interpolator)) + + optimizer = momentum.MomentumOptimizer(0.001, 0.9) + grad = gradients.gradients(loss, [train_points]) + grad, _ = clip_ops.clip_by_global_norm(grad, 1.0) + opt_func = optimizer.apply_gradients(zip(grad, [train_points])) + init_op = variables.global_variables_initializer() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(100): + sess.run([loss, opt_func]) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py b/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py new file mode 100644 index 0000000000..0135c66e29 --- /dev/null +++ b/tensorflow/contrib/image/python/kernel_tests/sparse_image_warp_test.py @@ -0,0 +1,254 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sparse_image_warp.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.image.python.ops import sparse_image_warp + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import image_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest +from tensorflow.python.platform import test + +from tensorflow.python.training import momentum + + +class SparseImageWarpTest(test_util.TensorFlowTestCase): + + def setUp(self): + np.random.seed(0) + + def testGetBoundaryLocations(self): + image_height = 11 + image_width = 11 + num_points_per_edge = 4 + locs = sparse_image_warp._get_boundary_locations(image_height, image_width, + num_points_per_edge) + num_points = locs.shape[0] + self.assertEqual(num_points, 4 + 4 * num_points_per_edge) + locs = [(locs[i, 0], locs[i, 1]) for i in range(num_points)] + for i in (0, image_height - 1): + for j in (0, image_width - 1): + self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) + + for i in (2, 4, 6, 8): + for j in (0, image_width - 1): + self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) + + for i in (0, image_height - 1): + for j in (2, 4, 6, 8): + self.assertIn((i, j), locs, '{},{} not in the locations'.format(i, j)) + + def testGetGridLocations(self): + image_height = 5 + image_width = 3 + grid = sparse_image_warp._get_grid_locations(image_height, image_width) + for i in range(image_height): + for j in range(image_width): + self.assertEqual(grid[i, j, 0], i) + self.assertEqual(grid[i, j, 1], j) + + def testZeroShift(self): + """Run assertZeroShift for various hyperparameters.""" + for order in (1, 2): + for regularization in (0, 0.01): + for num_boundary_points in (0, 1): + self.assertZeroShift(order, regularization, num_boundary_points) + + def assertZeroShift(self, order, regularization, num_boundary_points): + """Check that warping with zero displacements doesn't change the image.""" + batch_size = 1 + image_height = 4 + image_width = 4 + channels = 3 + + image = np.random.uniform( + size=[batch_size, image_height, image_width, channels]) + + input_image_op = constant_op.constant(np.float32(image)) + + control_point_locations = [[1., 1.], [2., 2.], [2., 1.]] + control_point_locations = constant_op.constant( + np.float32(np.expand_dims(control_point_locations, 0))) + + control_point_displacements = np.zeros( + control_point_locations.shape.as_list()) + control_point_displacements = constant_op.constant( + np.float32(control_point_displacements)) + + (warped_image_op, flow_field) = sparse_image_warp.sparse_image_warp( + input_image_op, + control_point_locations, + control_point_locations + control_point_displacements, + interpolation_order=order, + regularization_weight=regularization, + num_boundary_points=num_boundary_points) + + with self.test_session() as sess: + warped_image, input_image, _ = sess.run( + [warped_image_op, input_image_op, flow_field]) + + self.assertAllClose(warped_image, input_image) + + def testMoveSinglePixel(self): + """Run assertMoveSinglePixel for various hyperparameters and data types.""" + for order in (1, 2): + for num_boundary_points in (1, 2): + for type_to_use in (dtypes.float32, dtypes.float64): + self.assertMoveSinglePixel(order, num_boundary_points, type_to_use) + + def assertMoveSinglePixel(self, order, num_boundary_points, type_to_use): + """Move a single block in a small grid using warping.""" + batch_size = 1 + image_height = 7 + image_width = 7 + channels = 3 + + image = np.zeros([batch_size, image_height, image_width, channels]) + image[:, 3, 3, :] = 1.0 + input_image_op = constant_op.constant(image, dtype=type_to_use) + + # Place a control point at the one white pixel. + control_point_locations = [[3., 3.]] + control_point_locations = constant_op.constant( + np.float32(np.expand_dims(control_point_locations, 0)), + dtype=type_to_use) + # Shift it one pixel to the right. + control_point_displacements = [[0., 1.0]] + control_point_displacements = constant_op.constant( + np.float32(np.expand_dims(control_point_displacements, 0)), + dtype=type_to_use) + + (warped_image_op, flow_field) = sparse_image_warp.sparse_image_warp( + input_image_op, + control_point_locations, + control_point_locations + control_point_displacements, + interpolation_order=order, + num_boundary_points=num_boundary_points) + + with self.test_session() as sess: + warped_image, input_image, flow = sess.run( + [warped_image_op, input_image_op, flow_field]) + # Check that it moved the pixel correctly. + self.assertAllClose( + warped_image[0, 4, 5, :], + input_image[0, 4, 4, :], + atol=1e-5, + rtol=1e-5) + + # Test that there is no flow at the corners. + for i in (0, image_height - 1): + for j in (0, image_width - 1): + self.assertAllClose( + flow[0, i, j, :], np.zeros([2]), atol=1e-5, rtol=1e-5) + + def load_image(self, image_file, sess): + image_op = image_ops.decode_png( + io_ops.read_file(image_file), dtype=dtypes.uint8, channels=4)[:, :, 0:3] + return sess.run(image_op) + + def testSmileyFace(self): + """Check warping accuracy by comparing to hardcoded warped images.""" + + test_data_dir = test.test_src_dir_path('contrib/image/python/' + 'kernel_tests/test_data/') + input_file = test_data_dir + 'Yellow_Smiley_Face.png' + with self.test_session() as sess: + input_image = self.load_image(input_file, sess) + control_points = np.asarray([[64, 59], [180 - 64, 59], [39, 111], + [180 - 39, 111], [90, 143], [58, 134], + [180 - 58, 134]]) # pyformat: disable + control_point_displacements = np.asarray( + [[-10.5, 10.5], [10.5, 10.5], [0, 0], [0, 0], [0, -10], [-20, 10.25], + [10, 10.75]]) + control_points_op = constant_op.constant( + np.expand_dims(np.float32(control_points[:, [1, 0]]), 0)) + control_point_displacements_op = constant_op.constant( + np.expand_dims(np.float32(control_point_displacements[:, [1, 0]]), 0)) + float_image = np.expand_dims(np.float32(input_image) / 255, 0) + input_image_op = constant_op.constant(float_image) + + for interpolation_order in (1, 2, 3): + for num_boundary_points in (0, 1, 4): + warp_op, _ = sparse_image_warp.sparse_image_warp( + input_image_op, + control_points_op, + control_points_op + control_point_displacements_op, + interpolation_order=interpolation_order, + num_boundary_points=num_boundary_points) + with self.test_session() as sess: + warped_image = sess.run(warp_op) + out_image = np.uint8(warped_image[0, :, :, :] * 255) + target_file = ( + test_data_dir + + 'Yellow_Smiley_Face_Warp-interp' + '-{}-clamp-{}.png'.format( + interpolation_order, num_boundary_points)) + + target_image = self.load_image(target_file, sess) + + # Check that the target_image and out_image difference is no + # bigger than 2 (on a scale of 0-255). Due to differences in + # floating point computation on different devices, the float + # output in warped_image may get rounded to a different int + # than that in the saved png file loaded into target_image. + self.assertAllClose(target_image, out_image, atol=2, rtol=1e-3) + + def testThatBackpropRuns(self): + """Run optimization to ensure that gradients can be computed.""" + + batch_size = 1 + image_height = 9 + image_width = 12 + image = variables.Variable( + np.float32( + np.random.uniform(size=[batch_size, image_height, image_width, 3]))) + control_point_locations = [[3., 3.]] + control_point_locations = constant_op.constant( + np.float32(np.expand_dims(control_point_locations, 0))) + control_point_displacements = [[0.25, -0.5]] + control_point_displacements = constant_op.constant( + np.float32(np.expand_dims(control_point_displacements, 0))) + warped_image, _ = sparse_image_warp.sparse_image_warp( + image, + control_point_locations, + control_point_locations + control_point_displacements, + num_boundary_points=3) + + loss = math_ops.reduce_mean(math_ops.abs(warped_image - image)) + optimizer = momentum.MomentumOptimizer(0.001, 0.9) + grad = gradients.gradients(loss, [image]) + grad, _ = clip_ops.clip_by_global_norm(grad, 1.0) + opt_func = optimizer.apply_gradients(zip(grad, [image])) + init_op = variables.global_variables_initializer() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(5): + sess.run([loss, opt_func]) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face.png new file mode 100644 index 0000000000000000000000000000000000000000..7e303881e213a82e412d18de9d9d86f368726f06 GIT binary patch literal 14060 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}9Bd2>47O+4j2IXg*pj^6T^Rm@;DWu&Co?cG zu$OrHy0YJ7yv!2(t%1$ISo z&l83--`$s=x@`68)xTz)^St-`NHe8~I9CILp2YXt%nukZ-GS zrPOjh(~VYon`(bDd|{e!Yq|e&MNZDSZY^zX-}w3YU&=^I3f)^7>=EJ7p|7OG6y(f& zC^hn%i+gp7=cK^6dk;QVC{*v|-Fo%vr0x6mE!?(k+ro9r)B?hzU$39Gn~liN&78FJTnFf4B8duwHDXIB-oJui0Bq)EGuELrB(vV2)vnWx{k z7IhVmj_%2VLQ0O)R2hsIIOY0z6c{4aUaQy^D9xV|x;-;vLO_IuqJ6wmUfzPV_xIXP z&5^11E!?<&(V|1~5wWrVo&Ed{M={K1xU%KM)ePoW3{k6R9654A^5sj;>1wK_Z|Bdt zwe|K4!@!MyfB#kr3QB2d)pXFB;Kg=Yh;_4FJmW0Z7*FNbPuK&awG-dRao*m>x@zCg zWAU@*zt>;6BICy7>CgYihl<)U2VBevUomwxbB03ln;T15Yk#Tu+dZA~?qm8KLFwu9 z-hC{6sdVu1Ld7SIuX)m}j4lVxKDT)BUa>OHNh%AMxE$iQXL%L2jz{L?$xMw^tGH@5 zmaqS!)(}7YPuJF@xoaHwXIgChKA(;6jp@HL#?{CAnjxXohQ|;FBtk zpA%kResN;0OXgcE-aWhK&DBl5x~gsQ{Tg8n9ldGS-rsIdXMB@$Y4r`RQX|H@xo2iP zu{JjP6>pRoH2=uQ$I6QyJ<8y?nyPAadXr0Iu1Mikk5#>|ju~VxeCnQT6*T$98U58d z-jiDH#Bk>CRTZuO+#6HyMdO}*g}{H7fLkfy+eBC1Wu7}l#4z*Djz<1IX>)_Wdp-+2 zs{S^`mg_*3x8o&|A6II&HZAYHQx$#SnRFz#ScF)b;Dayy&eKlTRbGsqrkZn;>E+Vt z6TaWhzY!YH(0=;#$5P?AcVgkytX3&6F6>}UK0fc$@0*+3zepLYtqHEM;9hrvee=X9 zZh^nb3EE|kYU-{m`|f){q%t?Ra;wg6{!Q|0b0;z~goYl7&R_WG@Ar-GPYBvn|1h}c zV4WkBlkd>pD}O)xr*bUsMo`;aU0WaHT=^ z;w{@=_T8Ot_UY5<*Vk1&rGK83$#^2w$nfE$`ijbru5ZqtKYr;>j1A+0du#O^ll*)`M2MRW%ItPP9`508_T}^`SzyPpQ;5ncj_>|zP|X~ zN%f89_VWGm77f>%{slz^wAtL-Y)jXUWlZW=(FsA0Co0`SAK> zu2;nx`{vqKKZ*YQ+<#WVF-gNfyZKjc{9@2}kQJ@oqtkAoB)wf*I+}Z5zggFTHRWlx z8)h9hd%W@|<5{z+3!x2DQ|F1UT+cqQM#$anzfAn!U)o{|d+)a91UvLgPv4aB?ak(Q z`#+s(JUQ2T^?|IltarN>em|2XV#+qV_n3*yqHSq!w>>(c-FIitiBsD$GU`LFDQywU z&`x;%oVomNYwYf_hYSDy{gq0-!I z%UTJgcpavo-YFJ9fwTgc@d~NLv_60Y- znO{HiY3mop8JUM#rmTLq^LZD4-tOF(s-z^3j^mFTk3VL8(0Kgu#WPbo5BNPhWyCNc zmyv1XBz@!g8o{?G)}+SzhS!N?*(97^G5O3-o+|lQ`+Mr7BftO5nq?RN`*GUoW-sIU z+EO+Wg3sr+|7+)ed}x=I$h7I(-`#$>R`SJ(oyEuX;x|1x6@A`HThq)Wgj*)O*c_-lzKC_^z3o+Uzy3;{efhM5 z9w%FpcJ)zxVbFb-Zonl@a9=6Y_?QX9)dG5rdj@d6>*enhc3_N-v=UmjpW_HE9WzyTy z(jRYnl=Q~*k5aV5T0N)q)TRI4ov)ww|JOZn!kLjaGGHBHJQzJTG@+t@9Wu{AuG&$eof?yiPPI9 zuU^&A(LKs{kTp|qtJ(tF=`lusma@yusK4IAd3W*NIDY0$S;{Bout^8}&&g=Gn!5J> z)hKQ&U;9dvfQ;|?W*`4-|5R!7<7Lq?hiu)pZ|nztWh__te1MH%KK~3W(Ys2@g=vnA zLfu^-B%?Z`xGyAji~0U~^RRvW4g*tDBc2aM4q@t6N}CxjE4;p3S?M*4X^os$ZRJek zjkizCs$KQ_!PXNQ2UE5`-1;PgF?{;Z1ncz@T?^P6dU#6gzJ~Ptcx%q1uW_JV!EU+h zRjGz|Rp0M!-uM4>eA+xQ;m4mpMH!xWV%jjf_tGB?pOie^7bcU|_3zsASL(0Ek&Dy* zD?faieelWFvKa@nV+@}xm1KPTk-^iG@!nmunst`9E?dd|x{H71 zD;|`;UcW0V<^6>K{VjqK@(Z{Ai`*(@vLPk%*^=g4Vccu9k~?~JTi4v|x9AsLzedcf z?ueLc>b$cBY+L4a#T zK{a-)ai!~ePk3!vmKe5p71O3k%btC_mGV^h_R7jXD^9s3?45Hsi*L8aqHcw2a?9_f zMX=xUoTW6M-{QH9%DY>wD^pW{EZQWb+9aG~WNLk(*I@nL4~HB**`@cMRW`l-DKRx~+A%%mhU?sOYR!a- ziVChxRQ|DP6_-1&|B+Nv?F)Z@AOEuZ_q)pYv$IS!PQDIh2z|@e7}7QHgkUEBNMq*O{?Ll%F9_O-)S2mBnK{jILFIq>9U$Njz1lfAq)>{_+zXVn{1 z=?iyvxqge;l=APFx4!HwC#B6?8^zMzcczM_SSfwZTh#G}O?K(mdo#~IW=>BLpQUAF z{W~yY?bN>ILpl@ z+Vh2lm~4LQ_{}w7oX^lvSYw&9!m@$y_vv`OxcaYG7gznR={mnuc?)}kKl6G!uk7b* zmaAPll(R*zZQ}HFd5w*08eU!NcjD2T`&{hAqT04r#{YlR+fvh#EE^22`}r+c#&)at ztf_GL|9{_u+gjSS3pBT{VKh6m>6`QGjeFHkWhie53|XeUQEG zV*1e|4FCU`pOY|M^-^9WBZ$BDr0U`SpIm_W4=Y({D_-K3=(a(F0io@cZ@u|JW~G=GK<|{@2XIYks9aJ@q15m*JOFcJ`lh3Axz|qHN7;_lZtednn74 zpJCEuL8a3k=T9DNymV<YTF#O=-)Fmf_F^vX zb#iH8*Y>~IW^QRK|9Afk*$HzU)_z%j?u5hgH#t2z8V6pz%9?U8)wH#}Z_l?|pTp)> z=e@6J`2XFxJ3cX3r~O3Si|>y2U2QC%oZGo8kTKhKDU1-)~AP2b>ECNm;G~?-|!!}rQ*n4 z>mA*8Eg^Ku>$CGWpDz2O^)0s=cm)A@wQ7_z`Vvnj%`B1XO`9?-WS>H z&;I%_$@`b;C9#I<^PAH3w>&zP)rJg=nniZdlu_voh7 zo3oZLdVBH>`xCn#n|Sv8&eK)%({T<7;k(mP$|JdD3 z+~P{cy1e05b`igJ#NKefzI@)S{mB;>IiCCyYrE8Y#lO7y`_g4!ul1L?)7jzoENaG4 zW$DgkrrDbckBYuuv}aFPg;DVGZwEJ}8Vg8EbG`k*ZK#KSPZeqJ}H@V%o8 zo%fteOuKhwEk{m`US%bt&zy)H4r7KX(@$Icud$D-yZLkT>bwY^K&iRkUo7^Jw-V8} zu)E;K%36|@^(FK3GheH{-r{eYXRqp2$gKIVEw`t2t6Y`#p(dV;h0E;2kBP^>IoNAn z*V4w89sj0JR&~dRLsM6*Pv5&$^mfA0{nqUJ_r)Ds6M6K;#>4q5Yn+)2{(fbw`Pgdx z?Qq83JgL3+7PA}VN}cQJJ<4&My-T5So?Y#i)57X*?kfeSf4!iSlB05!Ig7_w`sB;M z4V(EyL}U)%+4){UV1xKlvwXWI>vs|Sb~RxOOeb%yy;JBeX_mk3J6op%!wl`B zOD;P&BB1hHGyBdK`#%qr*TmN!xM=zB%Vois0!OJylav;+_lF;R`jp)`^ZUlqY}s|Y z&)pREy?WKbmDQkl+2XVQCo*?$4&0?QpTDEe?acc9j|wU+w^>@+Ef=&Z`SfICIlt5s z?qGK5=e#XSpU&h+-S!dB@ZooenVFP$?LYUMd$l6#_wPIUP?KpB&zSTrouq2VZP(omb6u*ROw`x?oi;IS-I=4`-Yzv@Kc{hD z@5`@0YooI_R(&muK7MZ1LdJ?O7Z;Qz?yfwiGxy%|N#)07U(BD?6+K60zVL#>%Y7s_ zZ}692@6WHj{;AV!zkFpWl{*qSfky*&$AmvW&Mv>>^R3t0@7+4QWX8cv_L%J3d%sSz z?tilPzg>X%lN)Xa-O}Ua_Sz&e9jg9%h{>Yj&{4AskB@)fRex^}qwN;)yC?n&UYN2l z{;2X@X_NZ+4@l^2_6AB+2JzKVpHnV@*L&OH5GL&{aNjNBHfqg!4^WiNViQSj!;BfA$b zdsqMY+2Jz3vmvw1wVN3_l~&I!kxKcTvA1&b?#J?Z_uibVd-E$r`M^}8lAGD<9QVuF z8k|38c$4YZm&^WpYcD9|YcL8W>}UC>ees4zj?B`tie~-Swr;I^bEELcxs@|tJ)b|7 zW37FDvctiX&5rZzvhwe~(z{??#`1o#=*^EO-G1)Ts6A)d;o)<{x2*5D(S^=V!QiyB zQg_<73Gcam^OntD-JIMMbEfInQ;z>$s38}1_y4)Mu8!ODHr(3QoBJ~1;j{#vm60K0 z`QJaEf48se_Ivg+vxCC5VKtlPYP|i-I6bE6-^Ps=mtPr1u>JaSnO#rP+Ds=%M6~k# z=fw|&e?LjcVVD21*!`~jlI1&gh*fdilERVJoTkTM8{r5{F^WnocrPaP{GRuR`f z-u*Adn4>;Pcw_;2Yu9v-QdMuuHcmTcY*{gRWnR2=JRVG(uP z`}V@VP2sQSZoPeTtE5{w_s{=gqN4AZW8?goeVsL%Zd9>|y>IvNWh~iwda*e>_l6he z79MBTiDSllJTGJ-1ozjUlUmv~*rs)V8Zzvn94X_}CEI z=Q&YUTd8foX2ZLzlM}5bpRl{3>z`kKGu7k8efjOPYPY?Nozci#-P_+CtYoy{AKQ{+ zsh*SF{DG6HXUwn-&LizaN&Um2iLs|(%2o=e77&o z{xOdVqeRk{xevF`n>lk!#+Mfz8E?fkjL&mlI`C-?+kK}Mf9Kw9oN=$-rA6Gi?TBQ& z;cO{GC9xWXv)7upD=EA0;{I}B;ghwYzN)(!q$5SDel~Fn%%7E2qkTuw%XRs-TSw5X|=U%&L*+0}V18E?PHTe$V)3-_gG?f?G? z+59>sGBa~-;j5Jw=VT{{EM&|+;Iu)$RLV?;W#!6uy|>B^vaWmUrJ??y>)YZpd*c*z zdG9io2u>Aenma?HGA}GF$NXyC3h@}}*0TwZm+pSL*mbR(&@%t`y)QD#XT8_kyG8wo zea^S}?c3j(SWPzRcR%*#=DU!R|mpn^Ogw(}K&Fh4o}D zE(kAT@Y&TS<(_(v!{b?h=teVV7QMOoVk=c!+N%prs@9%ezogSf?!l+i!3lh7IqPn3 zRtk@K$ai6Ly7}6Av)~QelK)h+e1Fr~?fR5QspL9i?5+<{CO6EIi+9}FDE&?OTWmQm z|E{gc&(5Tzq-c~K)_66`HAQaAwY4WcJhYorUsv$oLQ(O+{C^@>^m0z>-gbPyzPfxt zu=+<{v5VX9YU-}dmZ@cKF}Z&6Vj%yfZSAtsoogHNm-cWZ{oTgQ=JI&e>R-2arb$)W z9AM_>X)@fCcCDGs#6tVigXW2jjvsg5XZ-ui^8P;6BiUa)yqsiMnwNGr$k~?Vg!EkV z%FwraJ9lfX#S4u*wiQ$R+~3sQ$c>n=<#+vyg@GPEJzL7YO582-w&#%+-~jKU|Auc{OcH@c%PePdr?{zB>B;%XwBSJG)d;-KuP4cfEn{`LeJWAcs;m0y z&Fqa2{zt62C2AB~ool})s#Gm0xmP>v^0Lz7+wb?edwFU6n4f*r{j(0c{;bc}&aUgwT@K4e(B%J9m@jR#hR9NN2eY;}9iTMvm- zeEav1>@JL$KQBAam;nEUlRD<%J?K(@!}V1 zfyI)ls--JdeNr|wsYvn$yonxM&!oAujq*66n|~5xa%Gq9PGS%0{7B}l{GiN?7CyYAMkU*r!DUv zUn>_a<<6MtY5s5bvW0!!+^#Wm`n{gJx(RMk`rw-SCW^m0Oht+5#pUB~c5UtH`LlWM zLG_pGR=#}sf=Pjw=fbWDcR}+$Cr{2RxEHVV`_6*0yDvhy(ykroT3e81|KhRnW*Zyn zge|?1BIQZ8H{QO9oH*I_?=M9yJGotc8+LKbogiRor0CdWq1Eh_tll}9|J){?x)ign zYkTHzIy3Y0B8lDSnigzTy0&<;dii{UH{3W^HS|EhK(B~%BD!_MYES= zWJK)WVexd)Cmr^==Vm8t;5#RCW!J^&$?O@%2hOf(&^W7i#_hB~%{*c4qE9Q``57AC z?OV?GqNC%&IYmXTEw2`bZd$YUjpTafERki7rk~D*?J@cN^y7il>#lKEI5JYL@9mPE zaaL*HRQZORzfP+6x0h_-;&c?5y`0&~=#seGue=q3Gh4%>HJ2PT?K-ok)PcvsKgVU+ zWmkvK3s{es9p-S@Y%}rb2C0p28)`l=t<})+v9H_0_u|8c2?bvtX`U@AO5Qy84#%z? zsm*^F4*X$Y*jJvmc>BzVjF+kRc5!4m@C(gN6Ir{~C3CgiEuOfD2>Z$-b`8=}tKKhI z!fd5;Mta@XlU^!ei?&|5A)_An_WYz}dv|_(%sVY{lg=c;3#abn%w*pbDI#+4dE4@` zsCBicPhEca@S&JeNyZBG5ZQaDZ|_Jhbe+J!=R1qZ{I{9TtAd%9yLBdUBw3y1$S|I8 z@XXqOe`hnk+w$nfzqM^EdT)9C-g15Q^XbLOuG3DgR^HRQefv>S$HLQWv4$*@**-LG z(>7l4vecMcY1W;NYhhY_X$H-k?|n-t_u<=I`?IF1!NQ@s=1FL7Sfbe09EPeJ2R+NR zv;}8Pf0nW-OK?p?*0mOPN6TF~wzd`#kf|A&%7zQ&9)T3iX)+WjZTXh#&k~np-xF{+QqJJ0szKe3a z(C1^%IO)HuRV(+Do33=#ZN<|i)m!dVT*z5s7G-8DIyrUswZrd@G@XquPjbFd@-61p z@*b;yvppy2xV*S^tIf*P>2tK%k=zRpLbzVm{PMWV78RMkc$V-2#?NB>+j{PP%UsOi zUw`bRruKyER~^6avzEP3TFSdM>$yr%bL%>uNxM7TrrHZ$*f1f@HEfZQ^>fEF>ozNB zpMG;kC+hQpZA=yHXVQ`ue)}DGdfG+C*kIum;rgrgr8-AThFnTzVc!yblkpZu!GWl5 z_ETF?7*_ibE{tR&Gejha`jwkse+!fmu-VJJR56WUA?XZsDIp1nl|}zBF7w`gLQpV zizgqfuM0}E+t2)L*_DTf85(MHcJaNKB=rCM&!15fdK38y*rse`YWv$Bw|bS-hfk;7 z142U5v#pa~WG;BqbUyF&y3hCK-!Mzi(_x9LlU%VT=cV#Rua6gZ?sUl1tV{b`>hS#e z?{YZ{ffbB%_V`~oCmsLxfd8^?y;BF9SSu^u?M&+a9dN0+^G@1v^{#7gw&k*?G5olF zzk!vzpZ-nQ^aRj@`>2_1f{6IJ&+SM}4UKOG$( z9xUo!#PWN~tH$0%`ur(kF&8CN(k{-r&UDGD^rGt$^XNRi4T;W|>!!utNR_;?U*44A z(@h(>3yqh8>StP6Bs;LLuG87QZ2gzNkL6Vv8o2d)oJve$?wG%qKcV#{Y_X~S`ciEx zsTr1&b?pC^oR)3pcz4QcO0%%&j>F+$AwRlby*d+Zc4VsKa^>z%%k%fGJjN~`qGWQV zS=IYdPLA}MLYB*q4*oNUcyYUJnvUZ7jTQg@F6+M(rh59o$Hxw(rd$<2V-~Dl{ks0f zX7@iwweH>Ei}IMOmHP6B-MP1!LicE_X%C9`Oj+A$c;b;_NvdVT z&(GW&lih8FglcyD|0i+x`OW!JtBx+tef976$?AjUZy27P=jZb=*J@+fkjky~`O^nc zeSedrZ~ph|Uh9YM6Y&g+*5+97r)z=E>%_uayA)3g(uXM%81{VTt6Y1OU7n%*&O^T~si|yMwqM?b$1fB)kan-m z#o_vsr$4<_J9AHNs1j~vJFu^R?Rx%%dwY^>KJ)C{{PW4=jWbL%&pi4T7Pmad)OLng z|2FBVk-jsTZ2nn9{`-C3bIH2vUyHM)J}OlN3VJcr9#=W=z_xYjRNmcQUT0=K=ur%; z=UKxm=sbOHZ2!s0-5W|WP4~n-$gl4{xORKlvzl!eZ*OqPH2QH|Un_t4(ZzpVQs?FE z=B;=lc;GSPy^3asec97Ln&sW#E~{^Cb;#8%>pI2Jc<%i7{r?~L2kqrr_A`E0?f*4? zQ>QZOLJ~XbF6F7bQjBfqDKTm%?uiZOk;*Y&?TiNAottRdAInVgCQ`{_8 zvS7)UkYnHP|K3WVOza$`JV|~5z zM4jp?O@Yb$6H1-y1vkdJUVYA$DtTkCb;I&`s~i&-ww&^kY;6_#_J(tgMMgjS3@cNf z?f0ZQ7b!_zk6r$a=gb+MgZK8n>2|I-aOrhegIUuI-Q&zVYTZ+!7SFsM^Y6L+l0!ev z7#|RiXZZh1e5Q?D!UG2dL&1(NCHp4(e}WPQ2WH+)sFz^pUnIaZgE4tQV4&a+D{JeN zFAol~_=TEV18~<|t>pU58l44+9+*Z#&GrOk4r1oMC$)uSbk6P*VRWy?^PU)tJu5hD#L+|aw*%_ zT-DH;v>|7vyj(x~#fuI>lf;vcw<#$$)^q6Zk7;M+US*nDS8v`jC*hXX1)CL>uaz?V z<0dd3c<=t0+aq-EH>rZFp?^LdJ$j(ue$uIESJsW3(_`I~w#BYqc!Njv?fQDxkH+T} zxG#L)6rA$whDX}6@6$!KYn(gl-ITA?yytnb*TnO2b z`%4PlI1zN=Wmv{F&alHQzb##(jX&gSq~*6PEsCkW1S2S7%ksp=@S_# z*CVym_DH92|Kl5m<@a=MGj56y6+O_xd27oVpW0a-ehZ$u7P)#)f8^oEH_!BUjCQ`m z{hfJp=lJhmb1&=5R;CR%>!#^lmD-VfJVhrWKwZP%Jy|{Q&y%)@i8{>JW1OqDUab0C z`uT0or#T@kQ$O9WZD(Ro+g`MWvG-Dt&i!4YlP*qH&$-W+e1W;CXpQZJ3G?{wl+X7s zwmChG^+SLCmBVw{3;gr?&s@IjKej^}`Db{g%Hztrgra zXJjMCe{F5By|w^D*qRB22U(3y+|IXNP`WC1cf-oyuXBIM*G$-=EbYm%LbRe+$&%sE z_x&32o7t{kzpi}#ROJn+eHlkMa(?|t?)midLPfJH%La?{I`=&q8D8ptxS4+I%M_6Z zr}aMith-*W%s({gVcoO-o0Vyg^8 z!y+Y{?Pc76_!;Z=WkfbM>{-!{=KD+iv;TYLp6A@2Uq6x+w7b9TT=4Ys z#BA%bHD7Bjf99QC^tt^0!ha_O_izQR-mY_cs*|gz=#7dO0?*FOY-UmRKjSqeeM;Vv zFC{{2!`EM1cK!O~^aoX67~J}{l;^2;&#RZ4Vcy^K;N+{F&pqy^8Q*NZ;dNl~>A(NJ z-`HAfz0NI|nO8wTsOiJO=EAi1_p-hRD+j!F@L4|7(Kd}|rA^J@I}fL;Bs}%{6mC_j zwL-2+eS21RH(o#i}%+B(>@|^5(v^DQs@RmyB&IiHECfMy%-(2=yZLL`L zwGW@=>m+u3kNf<$qapRQ#m4Dg_vHUIypgpokj-&RKcIMlVabJnkka>ko7s4sEY>%O z?pV9zV)4XHn;K4dB)pBPjZ0Mga7ug0qT*Yb&wgj+Pg41`=iL6uy&}C5jqmM$am;-9@X4VIxtANh znq_EbhJ882ec-G)Gw-f)Wx-x)_B~&O3{pBi7w383+SqLK^D}dKk7CjH_w)a=i0N(F zX0Of57WSQb>1U0mjF3!sRm91}8HKpPqEBBXvKh0Oycq2A4&9J?F%dS^J zbgJOe&Trp;Jl6R2r#R->_r)h}1-?w(e$#Jp7gx^DFTpia=f8i)IeV>i<}H&4yWbzN zJuD@4;_w`1HU@t?&MQ|UZs$i|`1tsU9gl>=xxg>}Q|I|D`Zh)L+PCa)cddS|yQyg} z|LNA&i0Ihf1@BZ?L>DqXI3c*kwt!)?-X4LE4fpge6e(LpcV7AUF{NbwotOvLc60-GX@$j0%WK>9E6>E4 zl!TU=uJqaWfhi!4Z^_b$`yX#xQ2V>#xSaBYb#cDuT$jAa2&#|RbmSNRb-$1@silk$ z_I?l8m?bJUefsC)t2ZlOfAvS%E#a*7`-H0N>$Em_*)yn%I$X5co#R^lohdwq@z0mZ zm67YG{}I)XaA5nBmm9cy+q5r{buU#vId4rmdaO@%vE0|!>3@sY9LS!M9;O+xY?D#K zx15?ywbti0)|DjfyxB3e;G%29|M&ID3b!96ir-q+uxnxC9oF67<6b+5ySg(!pX03b zcjt4axz@AuFN>^Nr?!37{rdT5-pc%M=4Ud$oAUnIBx!@quS=wCO%5Jvy|Fim)sH%^VPZ3l zj6H9c2d`Mh_Tyi55qrs9t4km6oe=+QlFnB-Ma%N$FEh6bUDM_s(mj1(_4-9mTE#ac z+_%%&US5{8vu?JcGV}WX&+TX2-fh0eW<_)FF}KrYU*2v%F#o^LGd~e2uV3-}vR-Vu zZ|=|ka{Ap(VGplM{O1m>+Bfgcmy@TJ=dSwpM$wkF{_oT~S3kdw`<3k*8F?W7KUa@@ ze!jK3!R6Cs4%1Q<)Y%(Zxj!7!-~XWe+_@?G*BjXcCQfbr>ODOvBtk;|#Wf~VhpK0H ztJj;>=eo!47P>7ey!hW;bCVT|Ij2tD`MWatI1|6E%dd4eLW>W_#Pi2gX`1EUXy9b9 zh+Q&gwPu6c_jfPMYpv{R{}g^%tld#|pp&~vG*fe!P~7*UJC8r#Jo}C zewN(dnd|p;JrdO}$aZA>ztsNVDo>>?iyj{Sc;hht<7pQc3nr#nyPS3lys@H5!e>j^ zmO|y1(R*twcY0o)^y)JE4AbsAHXAShoTo0hVUNvroemx5*=7sBz1#iq%(mPe+B=K| zR(m$GH3Tl+@aF0C!ay&s8oz~R(PmzYpC8Qlo>#=2_BP6>srIX{^Mh$mzfXR) zL)raAQRviLj9NxQFD|v3{49O%m$1#Mp(4JwzB8NO?|D1FzHen{ zsKujqk)N)rtd+6(`FF*t6$g&*E_?W9W$?rU((e!Q7-+AR4!bIMP;9Htjk&u8=SoX8 z+|ECKj#s)ME>rWZz8tyiHnLTSnt@pWqlgE6oZy_V9^(iYkxj*tIe-96S~QmyrwBf z{6>OOtv9n@^T~DW%$qXu)O}~Zdesn}cW@i8bU~dXPfX-4ENA7A#h6 zohG`XGWCsvkG^&9wKOs3X^p8S z8zm(L_zWJX#Vo2d-9C3ilpD)BjrE#2?6!NqNp=1ISH5RQskYd`gBtF>3$HPmFf3TX zG5_fkh4SlqJvN_xN|Lp^-@4{8oLy&bdf`h|tn7vg!*6S2I-|?qnbcp6DqeL}F*vGA z_UF$ZKVGjlv@;g=G&KCMAwI>R)-1}ob<@se^8AJ?}}*trkIHH3dDTLsnff1DGv>D6cDQ`H zEY~ekvf#AtjlF5Sd1ucZyMN(`?X#x9Y&I!|fVjT7H*Z#?A7)D_dDqk8*nH(+&I|q1 zdRxMN7ny`kDHmG$cFk_lnbUQhkJ^5JaC-gzV?R?P+b-Ulz zpy`<_X7upb*r#%--7jR{rG1#SW7k}**I(|uGRlvglD6F^`@_XOCt`w`ZT^~c+5HvC zE`N7~mqCXw`?A#)u|_tABS#WS?(eI3wrsY+mp`8cbFa;hirl{WOtkpYo4S{!qO2lT zOxJgR`uY6h>33pc;=|hF&-8Ejy7Da3S`A&^a8>o>k8igx{(q#Cqx{pAQzto0_U_4( zeKoI6s`}HX58IvF-|Tf;yLIJR=DRBjCjkM;hMN|h z7PXi2a$JMhS!TwBr(Rd~eBvtVlR3Eh_xHz=41DZz*|k~u4%(lVE=h>{{%*(9N8K0S zcWDb;-nsd&b&qeVxya63>z-$!p$BH?FM4yBe`0V%$9{2<2f14&{S(hywU?>ZY2(Hd zUVg`Ho7oZ{G_k(0%@UDx5!!HFC~3;GB|H!KE~K3;w*Sil+RL#$oxe_7BLIl$5-@EgrbHdUD~53jUwBDp^*q>*IMi zdpi4*CksApzaKDlcK57LMIkcweNDHbYUWRGU(fi5V>&x$zW*zRTl@AfR55tGy~ew| zeDjOLX7#KWV{an^L HB{Ts5wR(V` literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-0.png new file mode 100644 index 0000000000000000000000000000000000000000..7fd9e4e6d69f3120428d1d778846d495cea1a989 GIT binary patch literal 18537 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3Fz|G745^rtlpw*nn1P*v zfq~)we<{|*4C+IOdE|U2Z!?1cv!lKG#DB9{InVitat0sNyU+JyzGR)99iR84jLX+} zR6RFvE@p$ML1Bd&SOSW7s1_BNb3(wZKDMZ|?_#fR_sueh?bHYsBZ4nXQn9BOY>-&!< zp3!EJZ7TFTp0UhRzdJVE{JdD;(G`NBURp~T1^2MOzZoUzF3HB2z`*#U?ZkY}1NQ$P zFE&+7QqpYoei-$;wm7?Ho^ATQZTsHODc-NA_UQ48t%o-6*aP;v)TGYl{{G(S)0^Ad zoBR8n9UFrk8@-*Hf*l&>&YvIa(-Z2^F|)tFarSR};mh`J$Nozg_D<#PtoyU+|Aio) zO~?NKU-;<%zrVl#{{H{{{r`QnzrX)9-lFsK%gg(3Z`bR`|11ChZ~yoA|Ns8^`Tpzc z|NHBBODu#yi4m0Oj5?d=&yNk5;O)>59W%$g)Z<>i_L)e*AxQ)PY@(51Ov8bT-eP9U3tsI%0-@&?I;F_V~z|;Z9A_UR}{HEuGWF zFBvr`MQ}EnvI=MzZ|FL(Sf>6_!*Yf#EliJ-Jgxr!|NsB*@B97o|Lgz$`u_6r{@>sK zzrXwY{VCD5@bwb+_ND&#khp+ZmJ@1MNhkBAIa4MGOqd{W;hNZ`Yhn{7KAbw`iI{Nm z!>6eUiH_<WIkeI(qE?{~1&Hik7)wZVHo2Sew0}TY>YR>)Pgje}A*TWc>U4|NsAw zkMp?q8N9iF{=*Bw!?VqKz}Kmd z{%iOCzt9zQe%&LXox1~q-{*ZzQ-AMX5E6Q?^w_N`nc0aNx3|6ceP@QrX_1N|XMx-; z50ol<{{MTnVX~W4$GSzwwB%It^#mP4ZLTnV-!XBOO6Tp62R1j$mvvUAMcy#GCMwwX znAuAYoP?!*965OYyq>zchQ5AYqTOv3=RnI;m%Zv=R@`{|B1P zakKyYKmXw|`{BR8d3Ki@-`RP%;I$ZtV_$Dn^WU%E0+e9?|CeewqNsCb;S!Z)i?fAt>aDm`F-`a{x`eZtl8z* z9U@}7!<(a`vLWnJg1^j(T)sQ8(iMS?cYd^TI58Fli_8oA_dor|hsKBBa@)GaCHrKP zLE-zexJ^GkLo^i3;l`i9%1Fv`Sbx>lQ6|%5Y7I#j^9yzq18j zwGQ9i&C||jILFR+wVeUPisB!`rT}m(07H6tSh_;k($j=jSE&)h_nqfh3qkb&nF#(xyy*zI|ih=PxGZrBhSW z(-V@Kz8FoI%=B#=tIi|I3y<0u_ul--@5?3krR5Z-j#`45hoRN#8HN38#<8$=1!m6DK6NdJAvpaG$~(QLV8fm|4lIfHzMm++@X7uA-QvX#zWUp(b< z%WPTNaPY;}MJpX&KQ=HBm}v1J#^TrC*$TxY2y^12}c}jP9-?#x`NW@=k|v`3fr!(meh+&*-_4K z#pBNgwbbd+gNKRNFHI^ay!i2h!L`f6X{o6#OV}c;8g|WO+^Xf+RhrVWl%Zkr4q?W$ z&Zhk~4#r|j7dEV9%T5*!KXSPD(2Lm$rxbHSIipr7X%#-m>;`51H%-DSq^-LEw?IX7-JG&DHyTlvJvCPuNjJQ^h`!2wytbDFf zV8<`}dI`Ilgoyp~5?_b4tqzy8t5r%25Cx|fCH2A$8w@->K14@l9XS#)Yu2gHA2%8r z8X6cW^*F5yNYHF(Y-D_-@kW*9-Z_qcuO7%xaywJR6wc*vxkkt(dHas83`poEDH5HNv&AbrJ&BePfYF%zE+I})2z6lPfV7`!cb zSMTQLHiN_HWQR%9p$08(L;h|tm9H5LzN&Xh)^th!`|o~EJVR|kW=hNR(v;Ny|NkF1 zV6=9)#;0i+5Zk23#4aPl&zTXC zn%&)>4UCKo3>2=beF$Ol7vtH`@`-<%)Pq*WF0m^a4olc(SQJG#d%C%?g$TAt^*lM+ zAa-9uWnIXj2Cde2Y%1rK4*dVmv%#h3s*lP313Uj09@SIvJ+SnHI(J2B^Rn)lGadc= z9UU4RothF4vn^UB0d<2};f9Tdr%!xfXTSa9$BXQ&uFoGf7#bNV9%pg;<|utN!D(&4 zj0qC|6AI5aq$ot4OLoz!yO1n$HEqquvmMr~3fc({svlPz&tfY3{{R2^j#&*GmNW0* zC`x9U&aq6-rybO_AeT_mB13yc6~^ z$?+$G{wJ*D_5RM-9_Lg2KlyUg-m9IyALAe8IEuTxeX(e3?!&jiE{_&WICAR8&&dy8 z2De=cYjbXwu&X;F_Z1q{b1e4k^_?{Nt*q?Sn>Qm9GMVPtJ?NC($;xo@=KqOFe6N38 zwU00s&w-mfM~(`6da_^En4-sTc3i;6uwL?l^{yA4%`+4O*LX7a zE3o|idHBOiVdqfM!v~vr^6w{gRzaz}{8#l^Nqj_!<%keGO< z_s*e&maVD}b5i90HGeJoe>jM(DEYI!kE)Zv%B!9BH5-pHUYqu^o`*G~O<+qaAH&VW zj)DfBx&$|yYfBi|S*rI0WCu*1*u~X6UHtI(_dLJ98{1SidB{UN=;SYR=1kA3RZ4Yr zZk(LwgoI+woas2NxRI}8nvy5tfAN%lD`f$HI&xf}&;g)^+t)3Z-JWnEyuVM{m{Ir5|lUQ_gn|-}R-Jb-D0)|`lkXD|Pr_7>7Hg$E&7A{ni zl4AOC_jtpV;?@rjRMr{Vg{+O>uC&o?kZ}C-?7{U8`3q~i4)T4tqR%7Y%DRkWo*BDQ zLR#8}O-<9})%}fXejR~0I;qjqfQzfh#U-MpRWc*XDb45KVWr>pRbB}R4yVpe_{Y)m zmw#(T!*t0E)?@V{Z+TiIB6hMcuJmjv(`?|HDjxOAp)Y?;b8<%WDwg(m#fI+ggLmeD zS{&l~5_<7z5IdUG_~hjH?Cl?Sa0r{43WkNTRcz*)X7u19XX;`1j)}k4W!U{c-XI~J z>X6l#q-*dmxnr@af(e`4-V>X6ZY=r0d@Iy&$+W%~4B^%y;nht3P6tmlFc=!LRadw9 z&zHEjHyL6}Qer}afWU)ITS7cNHUtI>KY7x!gp)1emqVFMn##fIRrP`1;uVEW4lBI( z2ZTK0Tl}Ag=YReGf4_f!muG9M&&lyQbLPZTfmzl6l8xsxKJ)X5^h)lw`SAVy|M>m? z>i_?n-}}~{o&8CKAY0B)M(zWb6;cm`{hna;nTfHeh{w!~=lpzQs5=A&61Hy(^YYtv z^{Qw}a&nV8caPFT`J8P$JIhb8x?dp=uO7+d1Q|EEs%xViPTwe`frs7Q2gt!up98o4lGn%E4jhYd4l zI(l~>yf_h(oRThHnez4H#qAp>CM72~xT+NW7q~aiL9oU0qkPV}9>%RR8nvC*#awXu zBR?r=O30^APp2mzmwR)w`1zHU>f7`7{{8gy`u_U=k`fX(Zq96Nu~@RS($usyCwHl` z(iIEKnGda>7aZ5P=Agdm-oa$4Fw=+w-{1fDpZ{-X#>MT$&+GQq{Ct17z5YY3YEBN% zp+h~NKJ_eH*5m2vbL$q*hSr-cj0e9oo=H8BQTafqk%5noEjSq3tV_zt=jY(PI(f>I z2Hsxf^Q#S&dZ!yNERCG_YaNe7b3(_AW7{5yUspN0e}Db|xII5gPEPt9z3tA9^7nV| z@4w+Zr@PxVYS!@!0c(~V@dyrXm_567;zxa*Q~?>a|NT~pZKrcjXmx&MvikY|KPY+Z zt^1pPtVe%!c>Uh;_vg>g*Vt-ZWb*lgK}Lqbv**T@RYn|~f0b9p9k$xqrrB3o#rk@N z)f{ecd6#5UxhcfEdwwTpu2pa6#ECN-8yPiI4HoiEOZJq^cqx9{De1=2 z^Z!3AeFla;O-yP!Kg-BaX_?)>L(-BP)-cRpmVUrYsZXdl!<02^gHpG1@yUMq{r+<* zBEq-7esQtgh3)us{r&p!b=IY?o|@{+pe?5Tve%yuQKl~Fm&U@`~S<2N^t2QQbi zFMK4(>3HbCfddB$UR*G)s|zzU$c7f4)Zq`2GL(SADI1du#n{vxz(BR{pPJi;iY9HD!y8 z6e>w{)1A|;*?lcNG2uYa90LPThnM01|E6_+(o&A>QWeN8cqGqU@{m>P*Xa{K#J8C| z)IS!)wx(NJpy&Vp|M@qIa&K>wwy&E|dFSfu>-Kqfer%}xd|2IopOX`V+oP451uGK| z1RQ8kalP^6`}_a7H$HrQa#CJDep02(q5btd(c27ScDbBeY1`AIp}ahUXH8Rqt^nI6 z3rHw9=_dStctg5F^slS8p24wrm6lj;qhDWM{=T@FKkrV3YmtXS@izr3tJ zA^1kY5t)OFni!pJnfdn4cwoq+k9npk&?cr>A+!-%qNP<1Ug(?yc}) z3lo-*fVO9wObiUWYrjlsELp&yBKzm)RF?@Qe~(5_aM-EtZ?~uJ@74GBm!u1;eE9fy z{q61caeIILJJ@`_!kdFlCfA>B$)Pv$W}1IE`MT!W{H%R+T@pkOv@+;)tIsA#3P;^rG|I*{{DSu=ll@MD}O$vemb9UQ1oVn zm*5&Th~Z5p9+C;VzGu|bPig5d@#7NqQ7#H+UB}~Z_>bT1UwXoi|Ig3wZ|A?izs|RH zamK&Dzv~|#JI>B;XKc`s+#P!+PH2|(#BN@;B^}3Sosihjb!X4d-={=h%_*;cEygS? z49ZYGJ}sijOlxYRw$$m&O4`A+!wq6{lB3_ho}Ld=7}w`LxU>I6CTp37bd3Oy_<^WO zWl*}Ee}Dh~e}AnT74?%2?5_uni2izXv^;04%C#uA?5_tKqPem&8z((`cmMz2ABLrV z6aW61`S371Xi&|<;>5+ioAQm!XFl@BoMeS05l20(2?8t!es8J2=p}fAE%1wL(u_VA z)r*IJU0(kF|B8|+&;I}Ck+=I(_5YvxyG@N1E&<$|L<^!N=Gp%K{8G4d&*76NKm506 z<~~_Hb0Nd)9x3K+ElpcrbAzfVP##d{Qws_HXJoWUNAkD?OY(tfMiXvkgqo(gexBd> z@cw-JdWG-lfvh#ZzWlzsd%ncS_G1dm`V@@5E|B=~^z{AX{r_wJc1`3x$>ik3V_!F6 zzJ-KDff(lu=F}ZsVvx{k3fG=+vG7NNICD_vp@wM|6TVG7%=-WEj)I0~dT(0Ug@u`| zts|#unH@NB;=||X@rxEuV|4EpUE6YCvBb^U|L^br@13a5D#^h)(Z;EZJ3)-6LQASc z#|7m2|NoO3eRuzF&o@m-`O_VE^niNggwyE8I0S7=rb56^!^SB@a=WTMO+me(Kv(seT>mX$+{PTAG}JAUev$J#gR z)}2#Obd+7(vBt~b_6BIU>0E8>sZZ`(kSza9DJg_G*F^Sp#!Gv)w)#7F&iwkdC@1F% zXCNbEn55*n`2GJD9J_DNHfQ25dF6Ni6TO9hOy*0f@d%wdbI&|=H3Oq00ii`rmKm8% zjJGY=uRSzkV}sW9PV#o{vUi)7NUsU}C!W0FkNmL}me*Jsjdq+n@gv^fKO#PU3fI8{ z2M(M*y=d`a_T4L56RIECXEizW=O|6$^Llyc^yvlb&VBlfh zCs!JHE}Y`u5?KxLOH!k`>&2+K9xv>fw{$YDO!{~D&4D$(8@5?qkjOY{%FNCloUq{2 z;dc4>y^CH538tmFm6fR}E7#@arCsrGOaePPDW z=g-vAQ&l|^1w1?pGqOH?`e;~OV6f=eeftpaLQ&}*E*FkS7O2eq-_+D2B-C^4*cuTt z>z*D7CudC_o)U)jb{}|FCxb?c82ZhKn};zeE?~b)T5SE1O0CrY8JYzwB5_%8{I$ zGd=lkjOGdH=^b5399&ApC+37S#LP($)8*mefmoR26!olm=H~y8G}JFh>|psAVpwoY zzT)$?ZPzr+!g_l?HFtJ8dUtPM;W2aWTtQ{o2Tl=EygM#)%}Zk2x$Ecg10U|6>atX8 zo7=M@DLtKoFYU#Pgal>dW&wtbi~VdHwn0bKlbraBmQ=CqU6GLThyC@tN9#>LoJd;6 z|3u@Dw$h=ns8!J+OAa4&bo!N$o?cWUa`sHflxb5F}kHD9E_OqT`n_W$(aYL+j4d}(iW%X_`Y4->a-7WVX= z`u(F|^V=zZ*t?C-%s6&`&jHnrt*wpI>;hasRm;am^DT?(bYp-0zO4Sn)XGReBqbv! z#ls`v!NY?$&6dbKxYf~+&J1n1{`sgpud$KQ-O9m_uV=$8j$FG|#>Lv*k`fWZ>}kIm zjQ;)qU;pyb?##=1I|E(-M*oDpgexv_G55JNsMkL5 z(yssi=jZc6==S_~`fZb9;B!{yx1r+`jJ54^T=y zb7sY|V~<~Iipl6?W@>i4&Ei}0l3{78L&>j(1^aK9JThcI_4n)R|7T~{o9F!4oqBrz z>+6#HY8uZS;^r{W&@jlzHa>FPc*cx?Vh?6}1vN*7&Co*p&qw81jH9TXMDNoR8D?h4Oh{7W{432{-t+EEOKh3X(0TF30Ut3I@!ttX z1xtC>ys4K6Vv&}~sGOc4^H|gPg9`T_H(s%Z{qMUpp+5Qf|G$3L$;Ley%&Yh^pFgfw z`lS}}=Fl=B<4A?#iQe3g1P@BF*Sk7z@NSxUYeIir!hvNu1&mFcOp=hy(DdHs*cum! z0B@!k?WzwuS!Q!wQ)21u4YN^LF43gP94vWIOygR_@x}8B>{+h{H;X^;*M?|JYHVfw z*xAgumE&iBDA~k~DBBaJ6C9?B$6OJkOB);Qc$kvW7Vv zAC`cI#~J?rKNMADV3gp`b;R6ggVtdm;T=tkErYkjLdqlELX2^()kP z?oDB2JR%}-LN*HG;H1X;N7~HTx*~G~%+&p(`gy?Gu*XEg#8?@xPFT{?qPRnIj+h zmFIof?)1ULw?kgu*{uEqgVJs-*gS#bSM_Nd^>b3$c3s`!x~}lsl7s1$Icu%xGm#YjSw}NK5yFI^Q1- zf29P4UExMg7@t)+rf8;^fwP!XcJTuL)rZoHBqbjnUKVkC)ee^pYOFqAXDnr9&PqvG z#?!R)L_N>c9j|8nKf=3>@8^`pty>Q4su5s<+9|Er>S27S%A)7PY2OLAlp>lrbsv2c zk>i*OSv_EHY^;1~#O|AUkBp*A9yRhzxUU+;KAvm-S_wG{dwlhj)2mIsvK-R z6^DddfBe?`arvNvmhQVdi8pplb#!PrczJSGy5R$+9YUa0LI3|Ztzk)auv*j3n9a}X zDW>b2P?o_7nvVYQ;bD33<o#mQ&dA*O>4Sly=I{Pha=N|-3P)8pDI82`tXB}- z{6Ny9;EjZHxOnpE`Tze< zpFDZu)F}m}Lq`sM;&i-S0A--qqs$!y7 z|F^CE_4?f0`PbL|oiZt@tni`B)z+1QuO(mAPdM`b@SK+Ve*0^G_nw$1rPDBRqGRC1 zgC{y3{w!|0y$rL6llJ1uKw5e z&(H6#4&Q%%_VJAe>8bsCmKae=3d9r62`(8_u%#Eg#7a#p79<&(#aFP&fw2q@4#OT z%v+RL`k_PKNrx;ZJhl(o^rJf@VZs#mi&y?Ps&xPV|NrgX-&^gg==ly?=O((8$&D-3IC9sXQ;Nm;PIpBO& z`hS0ZzMpBl-^h5U!IorGuI&1j#wjgEM<%h#eE4jBcWtp6<^s(S9o$M5g|>&O3_YyCa@VAJ|`zP~je zAHDCF4_LZp4Nq2<&;Gi~2L1!+oy}xI8 zXWv-{%f`Ou=EnB+hi9eR-rkms+n2(@o$TS4_Tb5pA{VQV+-E;tw}$4R$T!TMEdT#I zm!C8;FgTDc(~)UlHqWT};pgY|*Vp~Mx98{aHIec6_qE2&IM`9u^nboR&#PBH@%#Th zewKD1O@ukiD&c_G>mL^l7-eL5s=qnhw68N@VF5MRZ0gPw{4j8ilsvr1l}AvJhmVhE z)hZq)rWaM5$82XMndriM#ZD$36Wu)iD_@tIkuJaZnm*5mmaosx|1W?4Z*A1q+QV({ zpP&2t@596W9UP0NhS>8}@#!;6Iv*b4WD5^xla^**#Jt7O5k?q8Cn>j!a*)Y8%~>>9Vc#ko!2 zUf7v`n*F5Er+X9b%#__{qr@>M`iW{(>ipD70omI9k-LRt3*TJb>NRQhwYb}PF51HBE%I$1!x_>jd34M&D)~=Xr~D z5_v9(1gfmw8$bWQg|5mWV zkN5xg@-S#^-^|Xw`Sb~ckDrb8qP{%7vGKl>Q_uT*ieKvfo)Qo^aOlJj`Ni_xGk{baYjX^p>gd|)=-PVl#tg>@iKjvGXB-V;I3exKJ9F1^H77{4&hN}HsBsnK znYQ)rfdd+B`UhY1N~xa^5H!*0x?!sMFE??AyjjHkm-@@+d}x_o{H|J?|LwNfuTQ@g z-;>OKVO#dBbIUK6*!jB}nx9RQ2ohMaZ@oOJ|e!g`>pz9T=Ok<-S=MAm1@Xy z`r5Hyo)R@IP5CE(r1vVPb_$fpHY^Ibe&d@kgGujU#>+=aJ%t|4(0N?6aMg+87KiJc z&NlqF|2_OV)iMtp!gppFay2^$xbAl|%Vgo+C~)M`A?-JsHm$wWuejMXwDvxD*L60E zBdjyIt3$=BsKLkfu8rNEhxuB&zwawEtW-~)cYUAwQZdC9xzl&mg<=ywIesOZo386OgJd9WQoBMlZmOaQJv344vJg+tM5=yJyxj9>>!}f z*Pq?Sc%Ak61DQ;&r4ycKNR`O8F-9%cd1xUmlV1DxFH1EOc-8AHTdw8+ftKUT9R(aW zh>IMVuN)Ck5!auzoLwNgQq13_^T=J3PEVdmcG0}iXQur&JhS*s@s1tEFD!~~=cQKW z&RYB4Q!}h>28vAAF}@VCJ=gd8%a`YtopdUQy+6rikMZ$cN`0=^110MA z_Zt^_SpTb^;mD#H7`pzc&EfBkC9`rP#F`k^btfJ$aPZP#J?|`1Yw`Y(df6TD;?yUx zyE$6|1m@hWZTu+Q!{fqZVjys2-d4ViY}^|d3}loN&v`CIkD}1ut34ASvr$f8M!JJ)|lUC+I()aws+?3tDg&w#4F8E>s69soBq7U zm32?BR-Ge@;FrJmlPe{XWaQxpnJ=GM@!w zk&Ln7L^I$1IWCY=w<+crv%b~?F=HcxKhGZ}UAUGuWlGA!2MJG}rapO`oRHMiUy!g( zCQxiL7q{`9-QUAqS{~js7TNB=^YGcf<7du%c*?EJ&%e&?SO?qwI>SA6XFt3WJ-j+R z05o|U7|52F2O15&v2pJ1e#xg5Ts%p~j{p0g4z*cMf<3eSkh`nmQVwnH<_3nv3lx}{ znO&wa>T#Ph-ZV)$_WJt&B}*me+5Od5Zho=S*uY`>?f-vHe)#>p-ptHq?v06jar@IO z3Yi2e-@M_`(&G95KiOZB$HMHQvB3g$rWmdbUXVhx>D;M9(vlw*`v3T+`h$c0m4Mcv zf{xo0j@JMGcmC|z4?jNIGctmf+|8IN`QZEIn%~C`9Q*eFv^ZN?8E9fXBJzK-aq*I7 z^L&XuxzrzDxdjt#Y(TSQD^@+a(zLURdF8|gr6&7Y1#Qrf-T(iGV%{{{^tbM8`XK*i zO`*wAjZ+bk|MypYoj!f~!*6f@uUW%$!d$$t&@g8Au>)t%e)#nCzkU6L_lsp%13;@q z54ZF5%Ng3#^$7g=+1O|K|DsI=lYWf9YtPFMm{X%^IHl^~N=SPks2NdsWPpPyN7=BOhK0 zAMTUoxxY_w+UJ5IqZ6l%KYVzwC!X7L`BBIIddaxGNeL+_760Zuyspot zrp9*a{Vf^42any`Hm6IzyPI6`TTIYMMuw-R=E%FagI9kXh-!Y|>5Qo0cOHBFzkbRl zMom+P)1psO8Y9mfxVdJ{|9g9WoqSR@ zle|4>Np{7Tj)z||+veK~KKlRPSVF>J!=_1M0WM6udf@wxpVTvGv1P}IdkVWwzeL;KVQ)jw65&o z`*^k+MW(ly($dzx{g?9k8JAyI<$1sE@(Q2J3M_{m1SY6t)TSu@f7Tx4BYgUhMbPyS z(V$JaYhG)9c(zB2>(nG2<@27wHS;b0Z8UW7`ZaIc($_oO@@)API~Mit*vXp~{qgy% zndxQVB)(?uMy_TDo;G_Mc6kRYtsRz~oBbbT-+!Rck=Hc2`9alo*?C^=UeT+AG|q}R zG1>p&*`NzS|Ab? zeBFfi+Pi?CcAiboW3N37fBf5~u*CYT`r4P(@4yaTGRK;)$w8paeiA3I+X@eh#v{fR zJb5P+-@I92zG35*s1-U(9GUYHH{axNOn!9#+~OJERVGht-KGEaaq#g-*GUtWbxcSt zx!aSXTYKJQp{bIKpzq^PMKU+@HcnG;ICArZ|C;Xj-E#Lrzgs7Ea7^dkEuSoBpfv-M1T| zd$zxcf7bcF@sVWD!-^-YptixB+55Sg5=4&Hch&t=cj2%zkZze4w_y2;n^g;zJ!31( zC_3fKImKm*Lq}WFD{bjrmuJ2!*eP>v=bUT}8G|*JY%iZ=$FA==?z8;LRw-NdQ%s~P`WpzMjELQZ^^e-S_6pBG_C!=$?J3{8{{p*Z&A|~fXO=DZAqSo| z`R+##BCjVs?BdhoitYL@A2{LBwUx~m4mz*3SfN{WXl3)G?M-btCnxYvGFQ}$`x^K9 z)AF)UF4J0bm08w#%h=D>X36ZjR$}$#_m$Tce1R^TJ5whmr+&7y7)m&=Y`He&bc$r8+uCCZ4M8o6hZQUTH5_VX+SdENzUEW) z?61`?Ur+j!8n*XSh0prqImb9bQMhK7A$L=NNUOb^63e#@2UHcA7B19Osi@x_|3G5O zYHw#3g^ZUI_9!eo*d`aKcJabrX7P8HHFubgSA~9GY4tYY(y^2JI-*S%-m54Al9KTo)ncp&A#XO^~`jen14 z-p#pi>*T{-W%_3h&FFkk;o{Yz;A<)KqB>0b(Tvwq|H>&!xNnPI#kEw!d-+*?@ik9t zp4HVqzi*-Dd8f$k*mwS?yTfXwXI_f(Q{QxE?n`EnALhi^a<>@p)H*w}7rshzEV%4> z!Tw!;K}msD-l82GO&tq59uypIRhoC=ZmAY;Yt6?=)18h!Kbt;b$A->y4o@K$!A)=C zjy^tM(#TPJUN+1GxGQ&$r5xZeuLv zO%4cENc!~k>?SX%!?QQF1f3|}q1SR?lLE(uZ5O0|_f@t{k#Ohxe^W{GVWmfK#^24o z+a(L8#RVLf+vi;zSkV0q+-FEScDIZn%PMk6;E^eEwe(|x;p}*hxr55~8 zG~V*TOFT{}U}@QdhmB@G_SdHy7=XKh;pqWEP48UO($W%=nv^=O*8elGsXkWll@%KgW(@Nd(|##vCVhlsXpAJ z^pEwSpH#$+#;4KGY7XCV_|R>%&P40u68`(@z9)^^9-etv&s(_v*n`R=hK>!Xf^90C zmFv0%4|T5Ve!PM)?edFVn{-?RQ$1rZn`~M8>tW@x_gjy&yQy8eZ{J|^AX9|phFPd< z;GWp8ERx4-;`w*ynjcqAo}sp@ukeZ_{|9iyuYowRb-$@*Q1K5KDsnfaO+$8Y~w{AY7cw&OEy-&xjY-}qp2cSY#5ExOn4HScHtdd!65L~$Rp z;47)Mnp}q_=qz4!VU~s6#(-1v-Ly+LJyQBytKMb6mOMl4nWW&f*1{uElUOFUuUA}d zvh;Pvto!d5-52LuUwKDuN73yU)oqQXdw=PJi^Mgvb@^Hxcv|->GKz3=GWqXtG@a)B zV5fSLNQR17;}iGIO`MEvb5ka-&6@T6)|T3{s~2v1rDR*~a_7PIUmeM(gw{&7?H6?y zkVu=cY}rHkD+*l!T{_M!HtDx5;$ELSezErW=e$XI+hffg1zjZEr?u`{mV0qlW>V}+ zt7nq;&8xT9UjKRW(1b+`R(VWXFJb+9+kdO1E3u{($7Z+ftB-5813NLP<~yHZwUa1Y z*oSAl&R$%HkM&xJF4)2OjM2<2Lr`6bNtU@mXVc%0&5q&I9i5sUKKExcG-P9DW@BY- z(~n=URYF}Iw6?dW$7If%)-^7PiHV>UL{Gih($d-{t4sRLO-e{jt@tzJ;W=4QXXyEP z$$x*76Ox^0)~IvLm?5!dt>mj$5^vsYP)|B@i)C}s!Nm-|fBx6Mu&IodJ-jZqqjo1~ zqK@JJ|21>GA?=joUg26I{R>`*+BlwnyHditf*!iNp-j!fi>J>#&jtK{OF zPYMD;|5$_`N&K^T8vSr%mh}18M;{;jd+zNqn|i;#vjqw)>$-!=EGA6SNsbg~GTHh= z{#XosY%P;o47Tvq|DW=;e3qJ@`jkbP1+l;8?Q+vxvLttB%nK`qr!cms8`{<9_7T2TxHe3miL?AFVS>NP>G+mF_(ogGL6AQsJQ3H zdC9-*mv7}*{E|EEl-+XRvp`eju|f$p<@lvXN@uD)l2{6I>n4-)S>?MK{>Zf37az8L z?IeBN@p8cRExJK*+K^;%WzI&f1_K_K`#~>M_#Oqlc>XK9u5od9^zIvF1p!=qXHx=8 zzMPn1di?1FrxqC-zq$Q?9>h5{%$cbcwCMfJ^pjy;B}ux93rvq~SeL}pUB70&^k1gq zjX^ommfWJV*q*!SBp->|v;1{N^u!5LTc6fMXyngY`fYA%qJ)I|&OJLHuz%_!B$ZD8!&+}6q(sKt|W>cIPiRm`uJ@uVF6p)Jnw`>A)^*VmHY-X>Rk z=c_)IkdOd1z>!<9V)+;Qtn=(Q)pv;eIWCbb+oH<2tp4MPB&`L;2SOJbBrM^1qQn?E zjWIf|_V+Z}?)@pqVe5E8)OgqX)MNL$!H$c;fdP#q0);xSyS^(H&|CDS?w3dz6d?rt=kaE^ajn zi?nLdDROf8Y&=hiG5ke>8h1=v!1eO~f!4}AEtgor*qHmKeEh6_`0Q++=@ugAy-Nz+tXIPFj=n8B! zdQkQ8xcR>{mQ5T7LrynL4A>wR_~Wdi(c}mIGCWVz1XI4e5sRAmVd8*FV<($1Qh ziMLGH^z_(*gI}butbEDr%b0dyLI~qSCKh9*e=II3Txv#!$ z!eV}T2{GNI8ygy1rW`zQ0Gc{dQc@l~N!hR=;l$Zg1H(qWlEX5$4$0hgntVvaQP@2# z;i(Ykg;xrHPJMWx8YEulF7$=z+MKpb!L&yS2c~(Q&^FswpFYE&G4Z)y8#}+mzIycs z2`NyMe?oS4X@^UsolQzeNH8#M^?Z8(+PdsOaN&dZ?~<4%y^>1W@<5;G!kiX&<*4i_3Hy0gTka5ImrHe-6cK#v_jjJN zXD{64Pnk0rI(y2=Z)nI?Tidoog-u#I~b*l+oSa^99oAb zGkuuR_RPSfkx?ZucYhh}UX1l6=$knzBAfwDNEqQ%$?ucva)ckHf*c z1cLV``0AbLnQt$7ZeHpPv)+eCIJ?3OK`HA0f2a6_KksA@uMFmi-)B(s=fs1jM-QCm zzx#)eea2NT!(O4JB-U_^WHy-pcb1|`i=$IhV=L=kg;oZo-|LPn=V1%7 zQeauD5aIZsT=ZJX!3HVMBNo!DHy(My=yP;OmY7A=myX24Y@mWD@9v^QU9pg0I26w8 z?QL99ZQRmkv|^=WKSP2`)ThG@+s(VDcI$M!Fv>vmpC^!IbzR@hi4?)%=10uP96Xm3UcSE*WkSl zplK>eNuF1)cor?{XinSkEnm_gh+*9(mYpx#@7AV#t^KgwtRvHS(iAn0vZn<%n?hTD zOnUst6*STI?S1-=I@!NA3adat3NllXCn8c(L_{JcMnXnb(!^BK#YMpT?+2b70Uwzk z!EDTey#8w%-Y_xj{~00MaFfB^to_6>qluYJhkktNOnev9_V~DD-aSw>Tj}yZD;uS? z7g9i*$x|mxOZ)I8Wy7|#3DeUBL{%?vHD(_{06_b{=TR@v8pbm-s!e?dU%GM>JN~&_bZ+|dQ?H<#DWC}9=tlWV9Ak$gaZm{ZJXC}ZSDEO#?B%jEXm_CUE>Gi z#J!?1yoqb{j`*%utV}A#F>k>Oh0z&l!DF~ z1?|%dmLE^ZIFXRTVtbS=%jz8?|C&zb?5AfN1m8utN_x|KMdJl9=u=vAl$9=gZz6FF6OMPAHSzJ&XlZDx&HAnPyRiFn4O0{ zypaS=FyG&I@$l9eP=`PJ!KbawY;4RdF5bMwgjrs`dA4+OC+C+mqZ5(&KW?uo(3`lH z#pYCo`1c*FoKlWI$jn_(`2SC0#b?mg%C`IaLFwSee)%e{dykLv)c<$7J)0RcSt@^vfz8c4z2w9cuJrGZv znQP6};K1V|f2b~irTV=>_5bdOn>V^!?M0Z)8n*@;Jn=-hV9sHQuU$Ic zThl(z6Ih|OsVDWI<)=9&dk;9i`Q#S+U}DUkL&u*kab2_{h3DEa6Txl1XWiyrUj09F z(R)cY7Y(j8OLH@&j!jfaoVPOjuE=Yz>l@!kJ((}R8gvH8|Nm$1T7ynoIrecsf0G3B z!LS^b_w}0Do;CJd+h;g_W8$`E=g@59(R}v(?;gv0r;6uVdpgAii&(f{zVN-`{O1JS zT<`0Fp*@R}*4;~9`^w$0DAsF9y>^^ok<6}{xje25PqGJHKC)>;?i=gpD=%_(+1V z6Fhe?YR>YyxvFa)?3yqsPh7C4@pqWolQ}CymudtZs%}|OX2`bJnPsaJ%UVs&O4oBo zYK5jb73-W)7H(}dS=h(CS9@;JLI#bn(~;4_o+oD6O#kfnGq`4_BdCD;|DTyb`c>Br TwZ<3qpvd%e^>bP0l+XkKzc=NG literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-1.png new file mode 100644 index 0000000000000000000000000000000000000000..86d225e5d2158804f88dca881f69ed3ab287d866 GIT binary patch literal 19086 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s33iWhx45^rtlpw*nn1P*v zfq~)we<{|*4C=IH9_imGctC;YNKY@PEbs4r59i6&x8IB0WGs<#(Av&?&M;m2apu~t z4XaiZt$A5rm21E*zH`ZutHptb<$y}g zh1S5pgWdoC&(=`l<7k@v&i0LdOO?}JWB1#?Y?d$kxh$kS*0gVa;)cvk`?gOx5*u>V zC*(heL(4m-PnAtev^*FXnph45DW$)$o~C+D^tRY@$?KxuWBV3A`Ey9$9&Fc@kd0i; z4kFd%&%dq<3K0+w{P6$(iK9o)dOFs2zh8Q|G~>HdX4`}{iYM!*AC~Y^Ubiaxx~OmP zm9LB6zd!uo_O+=?&l)}6+k5eu<#@(Gq!S zZqDp;oBI~~OkVSP>-$YQhyJ|I*w+Y(mH+>*Ots}|P7rV@zrLVGLdBJ->=EOU-|+{e zE0_+({c*f7cO#eKy#sk|;pdun&X-vw7TMG$Z2R1cwvC7EZ3N(N|6Wp0${g)f32Y z>)Np!?H7)|Xg^rRzjkVIZ?VC%N=~8f#{$NW7SDNfc2|;$cj7spPL=DW^2e=wOXuy> zxnahj;8+*)%v%N=-coNmn`h3PICpOA)Txm%bG)6MgWTP(9dQW=ojP;&?1{a-oz2aS zQ$?jJZ*XyL`NW_tzW0oSuXcf6QPIJ3ZR&zMES3cP{Q3F*a{vG5=KU>yaNvLScZqv@ zlPi8tdwAcz&Ay(kT8j~4(Vouc$&+U`H^+udarX7~2@k(_E6T~axwreZy|J;f^zHhz zvt`UzpWL6J=te*gZyU*7)T z-+zDqUta!S-QVcVos%EFXdYf2&Xa%N=+EENA6{Q~XgB49X0x77XNQKlvuAtz_QnSX zA3NuF^U9Rz^Z(l`Kdu)OV{<$9zo%zLU|&b+44on_hxNV>CUQ-0>NGGg_;WOPv%*rI zH!)APw1)gXaC&O}|9^k~f9(c&t^B=&S#FX=)s%-{v)gu;OaA+lDAAG|0kQGSwAF$K z6a-w#jb_Y95D1(p!6TcXIg@WiquwLq6&nOO^yV{ND9^k9ZqLRC25Hx-KGv1y@h5H$ z({ydPtit(2w0mAgv0LNm)lY5rZ!;|7c@lc)zB|zKbO4k$t zO#QDqpndtJrgGi?!5S2e(ffQM~l?? zYVz0y3duNmIif4<S`YU`FttSERdu) z17!BHH9C1oi(Wigl9ZyNs(O@5Q$2x|f!StWL4VTU!xLKF_?J9!_e#|0WZtRcxJubT zdf|yq=4#32sK*XBSNO_2xx}=#Co$oGhgiYH{q>UTVv{Wjn;!lsY~vP}xVKMsh6>ah z4M!BUwV%9Lk(a+dDN9RR|2%7Ek&%Ic!gK)-Q;us^tm4e;p6M4Ya?IVQP@=e8dX2AN zkJVHAv?i?ilfL`$4#7XC0vCi=fvw$!1} z%?X}XD>~Vh`Z>NT|8vyGaE2+j+tyF=_dX>Y2;6x;Ovmo~lq!e6~`)I?Kke=@14I2ZeO@E%8nCRdq<{^0gz=03_jg5?!Nd{@Q8@4M3 zOkJp|_T(J*yp|^0ix0AL6|#>@DE@7C=Bk|`dNPHnlUc9z4%g4m?GGQhw!OVAxh*Gg zM|pqSMNkXl|Nkws_VTtG@U-mjR$^PQAdy*xE8^i*okJEp$E_A_$Wd@w?{LVq4Q;rYs{ckHoOPig#DmDdYf8OE>GsT8>OVb?}Ah4g1L*Ck8bF#GoRuk_#P;1X$3 z=pzrmvQuYD0>Z@7($iBC5*m)k1kL;?ztXJHRa=QgQEOo!hqo}#hk&${BclE%Rx;X# zO%N6fYD#eF(%Q1(Z-16fm*xNe|J{qsX2o{2eEM&1^Kg;mp(6~k9BMK%CZsej3ksXo zIDful%$$W=r#?I*d8lmyD5x3!|6f$J_0W+c2hN|LIQ_Z2WTuMl+4E<5J_zl3(qL{m z@z(7Bk_Bb%hOY&ul(B7N`mcZNj`Th+mSC~1Z#!f4KgmdE$$k;f-6xm1hG%n;;;J;q z)$tRuXU?~me0Mkb#ft=sl8%SJ%eDBoLlbM-qlCo7ppYkL&+~u&d|62BSyEEdQyq=J z^CwPZ43jvJ%(u4H(PuJKw?)(<$J{l4jvBB`?2*{edwlWPqrx|p8ePA|&(P@#{IE^e z?KiVv!wFt)8>JIXANunA`42C2<~ctvu=p6LGW-93QPiV^hKt({yNkHsis-$G#FH;M( z%>PQuwU#uoq(3~^FiXV1Gv&)W*~3px^6V}*wy8Mq;kS9|BWRi8H@msH`RE}hLw)@V z*TUw_o;`Kq#7@SjnZ}A&r+k?5vA?lV(Cptt zyqp>1!Va1&+M4o^VXbO6(}ry_fx1pYsVPTFY*w9?ytwDXfBRX<7jloBa?M_M$R}Aq zC`m&v?ZM;Z8(TUbzKd;}Z*LguX9UToYoZ+a4hRUyEPD8W|E>O=KMVI4d@()$an@VM za82K;0+qumnw(x|(Eo-YdFxXlZX`zBlWf zP}0%h!(SEJrTX<&{=O1#@pcUpL!h{ErRI;XET79;J}an_R2-(qV^83IV0-d z;+?#jx<&^c$*p>7uXI*<4%bK4O|cQ}<^@ZcV?ud^g_)(LovW;vnVAnavGVlG2Vbd& z*sf#`wmtLYl&O{0*4mpl86_{_QFA%u5Temh#-{Qk;iwLCkZo_wJY{t$wrERKuPc9;acXy{_CdFq2!D(&HTy$Ib+9U@(n;8l`5mOJg$Q=J5 z(88dicgM_ai4e!Ep9z(M>lm~ZRxm_P6P&Fer`uy8!6Ol~;&)=Ndy1LWArTj`#DoLu zc&6`YU~qO}Xq5fW9R(&oaW{#*A{MM^=J3cY?hp6 zG($pi=7eAdB?ZAyek1W?&6$m?+SjJBuq2$l$ugNwu0ir+&yLiIk`cLyF5Z#_u_-Ft zJ2+?Tn!vhrspOwO5;?h&EvGP=K+6cY8okVSWyq+y8AWR{J~gm(A~s&f7N)XW%d z)i7VaF?1TEra@_`79UzU!ax@N|F5+6?7@>Kv+~vn2_`OFb*kTVy>h5t z&ju!DrN8{H^8{^oJd8?M?EE2+<;MlqDAm96?9#jfZ`LHa8SlHsuu973Xv9(p!`n(9 z%FMI0rLVqhd6sYJI$^c~&xREnHz)u2G4bKs=(f|-J*90}!BM89Sh!)cv9!dABgbY* zOM5##^EFt_z~<7(*q@+wMbKbNKym|roq>_j0f~sk?8a&n+V}iRK4MiT%(0B&5o1x) zfh`<&_ZzS&zp^-6AJlIC;d}f9Mq}gVZtlaUr}M<`Gq|(+_=o5G8|5HHypm#}hZh$M ztK{ult2S*kJh144{hqn3<%-*4`d+^H?-8tfg6je2Yx%HZg9G(FOB~|X2ruT|%%HHZ zC-K0vvcOA*%EuJg#ln-MxgCT1O;taBFxauf094q1QayaU-{r^D#>U1*Xg=F^_Tcg3 znfdEqzFMWK(&FJiVNaink~PQ1Su$Hy98@%CC`M#S1~H^1HY{g4^dyKoVVj>spx~?| zoqwwP)SEQ>)-*6`Dm`fO^5WUIkLTDio;PofRB=KAOslYy@OGmyz+}YKEO>J9dTqkY6*w zVPmM*TiV9&WOAHD}9EK^(}AyIIu zsKCI&(r}Lb`45j+8~@&f#L1!Q7CUxE>gqg^l@)DmRXn8-$h}cm?)!vS&73PYHLQ8i zbbh1ZfkWJuh7Y?u*erhkuW#F5|KG0WN8QCm#q4}{>c79U78Yh!SMT0qv%j@ z#zu}aHIux!n_NW>^7!TmHM;!YS-k(-yT6r>j;wd@|5pdAUCbGG^Kl@RkR$0lW zrnX4UuESG4L+Jxcsf`Tzf+=@u#~EU__Hu3TkJSgx07 zCcfcG>5@atsi)>OGVXlyvp-kyx%~{z|NsB~{{H9Z@ulA9&(AD=_V#xEeEa`@{z_Z% zR=>S9n_E0i#`f2ZyRnK|UUMf;?(FPjEKgBB#M19yxZ?Qywhc@!Dt~?-|M2$qe^%~& z@9zA3d~2)y`nbP;Ke1{ZJ$0(!>#O~u+7iM-Gm?^LE?KJSB|jsGheyM+MKJR`w@X)N z=fP854{sQ@WncfG3Tlrr{QrMwrbR{NO*XU;6yxl>Y7z&Y-JqFBN*p(m;~JqAK@ zDH*ks>u+%JYyA5D{{G9${pvmzo3gLJ{r-OcfA`*w^YiO#|NVJ=w0pgNTutr&f7L~s z%GP~vvF`nL zfBwm)ulo1p<@KG#>+Amg`TYO?{xv$Xd3j{yil>~Pa^AyPKM372OHKt0rfd0Bn@A@ zFo@aHVGQk=9g3Ut@#Dtm=+iT2coY;Q^aVd$6?4H{(!l6nYiZJdrK?giG?^`H4ffRi zHPz35ywLgk$0a-*KmPqazcu^)ww#^&OJ6tp&)=|ao0`Pmg47)h>r7hQxgYN?|8JJ} zXJ78^@3*)Af8ZtQu`j`*YKo%vYkN<2?F7E!hc6!`@pU*Zsi=^ssg?ZlRdPc6dT@W@ z|NletERG!cv3Bj#)Ktqt6^<`wl|S@p*PP(K*MGEZ%O(4sDIfW_J${^@{CM5tHYos?{9W)E_vB%U+)p| z<;S__GJD2#R~oc^9~iYvo_z4~WXD+9>TYPRRZ`PAlb)%mt$)5Rrtko}`h~K7$6xCN z9JefK$ZeKNi)vif^Z)<<`@6rFU0R|nZ!e_PVR+#4^ZVO!tABiY`h9(Tz2Z{KH4M6u z0+C%0m|kp}Z}+$Q%Zu;V*51FjN3k;SeLFwT<}|~Yyqh#J2)ljKwz4J_J%Df z0U<~1ZbO2=$zNv88lR}BMQ(1BWMp_Gc)6!U3SGJKL0;4M!7AT>hrS6Vd8*d`|7ThG z>EXk}{2p8Z96$d2{C;}+{eJoSkMCrQPB$k6ItD2nP}!JK^YzvHjmh@+_Wk|+R=Uuz zhs$qHf3U$RERfoVnyqVDbg|M|_$_w4-tYX12Y&1EuuVSo7P zX`bVK6PIz<&lXwB*%WFmF5Wy%Y}!f-sO6qAH8nY6Vvnq>YI=J98)%8V$z%!3VQgHHrSTus(;@YHyJ z@9*yF@89q57s++1`P%jHiD%Mm*%J(Fa}KC>pK-k=Bz8Em?T(zwyAT1ctL`J!O{iWY^uL~ytueO<+`T8hqt%qYlq+a|KlUO zdsf1Mi7o}A6L<_vXRz`xPtswk{r&Czr>FPr>w}*0vbDAIOTN3W?ymfAnZ~?hnl5`6 zD>I9WcY#9n|9>YHzPh?HX=zSV)3PTAoZglsh#$N*=|}fOsSiu0*2~-1DKBR4W~=-2 z{!1>;Ggv&QF`)Nw!$R)JIgFu(yJr6X|G)n4uk-KkPf_dhpD%H5pK7Fd)NJO+ zIS(2nSSqR)i_1Y{PlYcpk1sVlM3YVzjhE`Ifb!_ZK1yKzCN_W!`kEG9b-_?an1^e^bH z`Ty^?{89J0e}8>_e|h=+`u|B*65((D9iGj%;gu`fi%p*S+woCQ>pD=sQjI-P_%}+C~@=jaT-rL(b zbLN`0`y?DcyqBN%rf)0T&vtX(g)5KiOp}-})zH}ZQ779Y#&sw8CNUiR7<`7$ASGNE zG(*Dh|9_Jkr?vI$HER|uP>5RjVLs#YGm>7ROxA`HYdsE}`q!Ox3zW~9nAX_XOlf16 zmKHWQ=f0#Mafgd7CzUPj=_h_=--IwFo{A77qkqpIC#R$?b$aE{rx?U&nEYx-NoS`a z2d5$nq@CNO#wRBi#>&blv%KxUyEm6X;g?fPrCIZ4oMO$IvOrsiAQnOX5G`X0ROcl7Asy#B@fV?%$xqf3j=UKV$Y53iFF6E9qIJM#e2he=9G zNJvOc4GMa6@#@n<9F97IoQj>&YR64EzKQpW2tN)8d!}w|lhDxH>lrz-y|-5|+OmDW zs-szwgW0sXa~)kJ2=+lO>B3I3Hw9ND6JR;W~8kWI`HS?Efw|0~+vH!kCU)5@-{zIo|V6?S%4_9=#jhFRI4KYrRIFr7of zAWJ@GC40Zf9St_Nw*PP=FAz9Q0+5kc&1I``6Kc~#OAMH)8@iAZ+Jd^;<2#_ z&;u91VEYmi#e^R}eei>Q_a$*%*#%1_*7SDn-w`dH@wR8b#Ib*=DSy)Q^cF5T(ti1> zOT&Eo|EpFMbarkO3iNoeGF75uh06x+t`37$D-DA!&M9i0dho>MnWN1V29K`9q@)KA z97>=QJ55t$d^T;`^TADZTWIb7gz_1Gox3jXUm?m8mHCl{dwzQ z!wJo56I~u2o;!DVii!dRVWF9lA|R4-`N9K-b&emV$Y+>eQ_%f(FlzY&FZmbBFKlei zR905S#H3u)iHSM$@S)M7ML8?l7YMKMRZ#kIS|TRqOUjWGKjJ+)47IwN`}@z#YMnb* zQM1!AY?{qo4hDbWh;)^L-U&_&_V&%Qq_%v3*_oIa6#8`Xv^1A*T6f|`DlCg&>G zs96^;PW1KY=-iW*86fcWwa6dO#zw|kZ*F^g&}hh!zcBlfl0?LwiHW#O5&ocjXJRm? z%t5s@#l?vgTHGaZc|pib0{o01k53-(4L*(vfQDe+*a z{QtuboP-3FHf_AB`9t_@e9qDH@oa7NO-(v`_oi*zCiLO81qV}HoLF^rpt-rBr6nH^ zd$O+U2NjMOPex0Y4cAOLl2uhCY;7ZRb0Sz+Jw4=$ig-RhGwkRVUf;~cQ2AJRPe7xL zE4$<(NZUIpDJe-r_^}J0mz&!bGZwF45j&o)Id%!U0YTn^I-7p|ViH!L_4~&Ofy*-< zJxK`*ew>+i?ZV~9iD}1tuFc4lHNN}Dwd?)<9l`?4>sGONzLIA?T%Y!=e$}d;yLacb zwfVS+F;`Xb1TQz*v1^jl&M8bWqP*tjTh2m?n53k%v?)_l4|6!WI5lqA_}2Mnd*sW6 zCCn);p<=qcf76qGtzUoMzIo) zk_IfC%Qzp2E|_uV*thtwZ~rH$u9>WEoOkEP_h)DS$M0L9BcGCzA|RGF!>IM(`ToYq zmu51*Ver2#{C}5LVj`%^8WzR|b?L-Oj|4MSa=2&Td-SY8@I()`D~qJh{moBQ+>t;3a4YxwKDpZB@9(DT$JcG%Xz1y&Av@!P zg!E@^&BBcv4Ijk4?ot=qc+5nu#XdDbH{_q8;os*E6IZNA;NtjE|L4bkZt;Kib!R^O zwPv=qZeFC++|1m(T6^m3`5Ft)bl9@~`QNXf>QKVTZOp-8U|?(vb?n2($>KtXd?ZhN z?BzC0Ft`xJ92Ir&QAmy}2Ul}*^T9(-7Y;c+xDW0_{{8-5URqjNSeW_P*Q-a4*dDyp z`a`6_T`bVWF)BYQiYF?nr~d!H_B->A9CPcJV$oJ8Qd~Ac094Qa|7QrbE-^j*>9c?Q z-U2;5JSWNp3>9M?T4t(UaFH;$#GV#)utfISA?8C4>o_E4%=ou?v+*IGgQDwMxY?el zsTgKHm0wl;LF0fjcjk&k>n|)ZR;W{!)z;p^2AQ2rN=#2rOlg@enK6}vxk$;uOM{1p z=YQc8c^(g*HL1tyb9(vT9A)ckV$|i6@X%M`n8xBF<*}o%Yl;jz&x7?|8=CglOaA+x zj&P@2ZovV;WgP3QSSO!tFt7dNTx@giQ}c`?jvML@PFCw!6Yt?=m7&HXp`aY;aNz!a z(9keUbF#BtAA8XpgFj-0Z%#68U6v8 z{r~zCKkIo`aI#igixjRGVrCA~X4!nR(Y`PtRpGd;(PMd`X2x=nBrYQXFV!O=LPwVH zpZRwD0CNm0vt#Up<^?dr5*o~%Bn_nIu~=*JNaV2X$!uZ_Wh`n+I1pB6kl-T0oRwnm zpu;9+<7o%S#zt+`#AW@Cj0L=<0xT1fnqlLWNr|bcEmxKQPvomo-Zc4;L)aq+wiz#( zXC9el*Wq~NOHzksg@+WMEF5;+V2ue6?GOG<)^u$sP?i z*Z=%Cm(gejq)U<1cy>*azrc;EwE_(L=kzeDo$HWtP*LSuqsl&~TSk?qV)9XYm9Iy> z2S{u<`AC2A1Fi?R*u@)MER7B=Wf1=Jhf`J&TCO*){e5V@k;H{l{5i%Oc~`%wV_e+^E>}09p+o^ zsv!#v4xRgz-f}mcVSk<^Uq}9gMN@eT1lPE7{JV0%!R2>za7VjB1wl`V+8BGmS6(WRLKALi0GihvhG?;22 zaJT>HERF(I*)0#JE#xg&I`#eig`Lp0;?M8z-~azV!P@ZO-~asYuQ@$C^3l|_bG282 z6_2!dcGN*1F&>LW0`(gm1G*Yt#GMJyGU#Cc@a5xQ>n+3-*da%>|9bE%*PQF!x72Pe-d2W{S!al-&{A+>4nsrAHA6+GvC$;zB-`_m8 zwmi%I3Iu6VnO1w|YEfXZGPWdThT+ zfq7NZ4(osUcYf6U`%^9>!^6gwGiQ#E!PFnx;x?&l%$>R1O|LfYo@1Br*sbl9SKAB; zHgE4;w|>8qJ$$&G=l(uJ0|Nu7Un)O8d;k0Ue-Y7#-6uGCI+Y9s4lmxo##MABX>E+a zo$dMm|NZ{{|N8p>lO~<{|Np=9BqwiSo)sTkv}+T#DRyK!Jn#}=zQ5udjx2Wl=7HPGOz`Ty_x&(HUd9-Vpg$P5ch!^h0o&CD~qZ~7g{nJIMnpF?&iPtB%; zgoGde=Q}zyIEGGrcviYi+T6oKrb3AEp9pjM{+dpU7ebIc{p;J?|I7VPSl@Wy_^rS3 z;s5{t1wMkjaMM7rNz`nX6wB}b|M&m@_jhl}%ijl^?=M*zIcKh=;kP@B-j@IBS8P39 zW^;SRhF!cr3k07$N^f9PR&HLR(!5gha4R>@^>v0df4vTgXnRiHn6dK7|N6Fm`Guui z%*@cF|Lf=H`}6JVANmGLG%aKEn2_kp>k_lP$#`nazCVlCzn8VPPWIvHZ($K$^+vvj zrE#X(2D5}tPxcH={>_#~5e+*mn)li^uh4Lgmp{BU+a*)6=)92Jhv)o*5!3RbASE2wKZN(PiEo5 zn3iAGJ1()z>z$S%_3&c9_s0a5o*o`YN1nDe9{qTuFJA)pQ3p3^Yi-3%8l2)MC{i6Z+PQ>fa`}6w+UDI z4G)|?ec<%z58reTPuJ&}ZEn1$>ga?C2S8(fYHf840yjfBi?k~L&3SlPy+sEc##iR< z=4~+GagpCIxQY4jMuFWI3>cIzED+hE$zgIs#DhhME9y>U*~@2h{dWn?c)nMICBWAH z)1=htua0uAHvg$DWZ<;AaLwATgtzxfqi_F8Qh;W=D}qevz>BGrlvff zuG&xU(Y5c_M(T6o&E9V z^!mi`$Yfc*gZ6t4Ht0@GaCMgc^SgaVUYu*IqgU6%m%(l8^x7UloHJK)%NB{2 zmV$dyJ}I-AmA*dV1eH%GwyQw%QDQ(|HsPrkozzKl%Iu3b9Ywyk-8@9*txyth1)l9DD&PyhA#xjZ+w zvYS;x!^T2U2_9v|NQVRS?Rid5bG*4`4UeiSPyIgwpLu6MOa1Qb>Nsc?SZA+{3> zIa!VRxmQ>HtN-!g|JT>`BQcyjrpmjdTAwo74Z#v-|t|$4AiYVqu}0 zt?jfXCO&ufFjdtwKEAe{#q|oefBpJuucyb>^TH)LA-CN~VXL}?LDtMf)_;GKK?_=s za%-5|%z60Ko2|5zEi{x(Qx9#t5Nj~|s9Yw7VZyLAV-YyvtDaj|7^5A*;iPOms z9xASt|NW5p@NDxJ<)@)#Kw?5dLQ2YypP%0!@AqM1>gmxi*7<)?Y=Tp7(>5~+y_g@y zrLX#%+3V|nAN%n6`TzU-B`Yc=b#)?QVk2#BBBoBAX=y2`kl8VL@;E>% zTO4E9Jdsg(?E^2>!`oIku+{%Js`+!`!#mk2r;oK+Y??QF_Q5Mt4qlnEa7D+%qug!E z?hHqxl|AYk`^2!Vr zE$jckLiB9Cn;U4_jg!N`!fMl}&&D1e1}|T1kdQRg)-pOM!`^#Q-u|C`^|$Z8N?pv7bk@&t^xb@oRgA7YzKYy;!RPEP_ zRge6p#kFzTs!7WXQ#Fba(z7=5=InNsFU{Fb*)L&T;Kgyh{8eeH zM3Bz=R!5fP0>0;U6JIJ%V|_0#+}B|aF6o-)`F(wP`G5ZXe}8`-K5(jQ&#QLEpbCYx zQh)eoA52e3Nf8iA+OjdRX{(Qf%cI@LnD?Ir;sa1 ze});^#`ku&+h}$zE6?b!3}!VjG`zF(@P|*TQ?xZt9yr>nnwr4Z_*m1pKvZ{ozH!Wc zzPn7FJW#vT`t;)^&dvLKzM&*qy5<{8=`UVYvB2;O1$99e8Dpb=yK8^1U!kFFllbYA z(Vw4(51jiaUq0(c{}G8uRyG;X!q#6VB^Q{2$}=((!#xY6Soweapa1Z8d0YBbNe@(_;4O{46i0vYRYhM1z?&=j1N55;<`C zvfdoqGZgApRUi;@n!84vCI$JLNvv1=lD*Y_J@`JNKaQXI^Zw=K`ZH&EK18dr zuu9t1{`&v?e0|up`W31yM%6COprwy@5_eRKpL+T@dBW7AGmRuBJk#sQY+1B)Dm%Z# zxp_zDfo3om{{Me7SC{XA0#ECHmfnZ|o;Nx_;N+Xou}nBQ=jMq?KW)PUzhAok<-3Zz zW{ZIu8{hfH=-|3Ex$vElwGyELf~9>DrD73TOUru#1#2%nVt2@{@b@~rMeO;#bF$|r ze8@bUdVR{A>$8}0z^O6GDU54ALkqK;+l!wUJx;R=p5?xwajkyIC05;i{|${m6RY~4 zY>)Hs@Vt5R|Nh?J|9^ka*mQuY?r+kL>gf*;v9_G}{+=f+EJDrqL%IKkRW88{k`9O8 z9C3ogj8nvq@A`-T{^t4qZlOePqOcd2L#!i*ZpejOllIsB{r&&H{im59=FH*gmpA6% z{-2&E=PMj>;Qf6b^L!(lx*mZ>pOp{tD;`a3y6!$h^V!$zw&Ld(+|NFQnyu1jUoY|Q zPGZIXc{8fGs}&D6yxkYc*Y|(E-QWNJWG8<3|0nUmv$TpY9S^VT3$bOn@m2hv_wYmF zk$((!c5JMyY<_-Bp=^e^K_(NyNi_AxAIaTfY;2(JAH)CuPMWLs)h6$#lfCGa=AhfR zA*xfYp|kGKkMoBP9XQdk>EFla{%z&&CF=e!d#!gtk#S#LDrhbFpUNX1X2!T^&W(o62M@vq_nkatKr@p!8229PWYny6IHkH}S6kzbXZ%Yhe|pI5 z^yGU!XvuR$g+$#S2l+Gd4DhG|M2#Fo~9;__Zv4Dcz8Si&TL*4 z1*#oynI?sQxWwepW`1stH&vZS*n%~Dh{5EH{wq`arcMcMAE;D0(BBZw~wx3B-bZ>>A z;iR)CPbw%kr&=1SIe0rnukFK252Ppf?ilOyZK0%`RE zmhg=Qk_Q=|oJ-U;J0!~D`myQ9toHwV`F{*1r3<@IGJ#wIQASW$OPKfL8y%muv3e zxT5fTp8kEx8z1?zRxAJipZ??5)Q3knzl6@Yc8%xJ zBcA&Ii}V%UvK|_A((K81$U@A6yaUxVOXn z!c9jJ**SMuN(;m@Wku%ys<3ZXt@ir0)bm!`y@v*Ei3>6(6s=#|f1@z0*x^I}@#t$$ zb9}RpO{`qDj+4oTKlxjHkK7&mWt_K<6?zDLnzL{DYmF}N`!Cgv@|-oNDD_PgQtVJV zRIuB+CSl&}(CMu@i*;763fFqIi=**3+M&Dsk3r;rh!K=e943{I>I)EGXXR%-qj)$bsir{jEiJ8e4Z+ z+}zhRCCU2NqJ7;89_>xt5nju^mM_-Y)~(QSnDd{tBTwX})|gLEPnWkn+Y$3_`L<^# z=eM7{ef#qf*B+K{Kd<}Fo+8t)BUT}ITJ^D^qeJ8)3!ZivK8{9#ExJw#x99kAIIeFG z`tqBH?TO{IqZuxf7Kj9Gy1g+j_3;pTyO&;0NImoXOq!3{4!uMV zna%ef|7N)U+L`ZwiLTX^r>nGPwV9-DeNyDSp%=6i-z-{CUv?cJ#xZwH31&?@wql{8C+Z_RZHw(Zihd0iV53+&9l>B;Jk#q3}|&h_VN3J^F}KSj4l@spB);6>NB zR*m0ZyCrH{H{3gLqE)X+?0nP?gQ~4B93{jq<+LZ79X|8@to$;2i(Au7mTE+-y?3r^ zB^Ps4;%3Qt*AsmgPdwK4-MX;nal-AK?Hr9k9-WT^u1`_zW(^7wsgMgQvM`umyt~jU zQKIMgT<*31C$*$L{LOK&q$_BaX~@-p&(~+A&)a-oxNU~oY~H{lj~icaiYwkL#PTcq zR?*3i9RgESeHZuhpG%+Aa(ZcSu$u6q7uf}W+UDDDi9D9~-7@a4nm@RDpEGMecT<4C zar>iz<9=1$=6FIj7l6?RvRG`dyoT`Th5Wdxh+L zZ{|g=7ktDm`F7(+$(J&6QJW(k?9vEzJtcJcNtNpPLYYY|FGY?&EK0OVGjZ~2Ik4%* zTM3>EyZ$Y+nXC2u=bv^r4#&U+rk^bx52PI~I+=4UCFt!w9cQmgQ~vH-q_gerN2lu> z%)i|CjS_2Tp4XncSS0bp+qU<{GLur*gv~i$As6(iVu8qU8$U{AN_b8_4hVpE5VvshFmQM0(S1pZ_CreA~tES4<<3) zdE#|}414W)oB31EADvb=op+&*=cMf8%JUrF@%kBmSD$oOvN`C9rI#Q}ilnP!=t9%m zZH6t1flp?*cmMf6cbN(AWtZ!QJUZO3Pgzto9kmX7=yLvO^o{fa{x@5ft?PW;zWCm; zL)vn`{cc-XFP<(BPC19njsASoJ$z*)&$n+J*&F&Q4GbQvE>UoBX;D;Ycz8}$>$~iP z+;s3_hzXNGiz0sfnR)Qiq=P3q9R2zldwU&2rZ_q?-%@X}kh_$7j9KBXYDZOUESs%u z+jMn78c$j?i+jH)TxBLa|>2?`|}q4c)Tj(!lIWi0^2sHd!BBD zRQGe{_;NV}h_vpXGCzndi0xR3=hL&f!r+bsGX_00v3zV=&9t#st4z-b%{&Hcm6 zz~QuJrXp8sfPgE%8mn*Ayn|{pSZ-@Ko!)pmw$IyuJFHMtX-A``ln#I3p9kMRr!zAj zo_lz%HNXAR)Pe(EP&z}c zwD+|V%jK5b;@x{!`nElq6zr!i$KTXoqkh`X=2Pb3d)iUEuRmS*erL>zRrB9}WdgbW z%*>5^&H*BA_RZEEYDzqMYWfLARSuKOKUk@`O%7FA*UFg5e#xwNS?2P8>kIbHvA=IQ z|KSCbNi8<&U2?(o;o8d!_9!e833a`HdD|N8>D;mHuZ7ya*akha=uzI+pJ{adhFROe zO$tXC59S^EKkfD9$^IP6I+rPOXn0Ray1gP>r+Zzp1YiF5J&)I|dVRz&vuld_g8z5a zez%=A>UzZOx$`-=VpPhqYnVP=QM-HLf`-Oc*6K|Mrg{}jmH$xC(j#hQ>|9+kA;)F= zSK$(d3*HYSCG6@zgFTRq-E47jZRYtBEG!cD_NNO7H1xf25DQLXvbo4Vv30t@iI|S7 z|Nk4mxp(%$%}%5F?lzE2prj>dSD$ucL*v1lGY=kM*t?aHpXpDeWbq+ZrUyG&jI>WX z9G`0Xg>ix$FEjI&WddStU8heN7#KXb-NnQh5zHLL|IPm0xwqZopdr~idpw^RK*pJr zww3Iwot=1yrS1IujbD;CRxxj8G_7D*rd)WEPv<}hXWFI%m%aY}o16I9txeY2Bd`4s zRG(SlpWnwf-YEbsQVk8 znc;XS!4ldeRazMP{W1IDGc$SE`2?N@__VC)-o7gOtpVX6WmU+%7vXc z*t*)j$u!d8!eV!xzkhlB=LbEJfCk6fw*Ma*7jB>K7$;|VuYpbBbliml|NmbQ)81r0 zVd_Kqs^ccS#hsi{a}K_BDprva-~6#Z_uSqN;~sGa~A7jd5nyBSXp^wWCFq$rRs0_^w|Dh z&_nxed%RT+EESKG>Ho>H_FsQgOG0S>)+tFYvFrP#%D=Rx?MlLP}OO_a%IQ3{vyu=1m(+d(D(=S@oPMBiBVbwNc60>ok z!2>@ro*R=mqE6k|)|>b$q^+LDb$mh|U^T#)Q%PA)Q=`)tGG547?dLFts-=634a^p2oM+<(Ntqo#>P2?>~ zdz6rnkdT}_VM2n2ZfZecqJWs%oi_(gIau^HePdTs*teP~yx&{$+LZ$iE4CbXC3l8R zlX)9gD`QxKfq}uEy0aJVo!d}r-B?_y0b0fi@?z1W1_nk(w)ptA6CP~L%o&1C8=fBQ zFtl|{IdwqzR|MlM-4up1*Yy4!IoPoMi9$rng0s4BZXPzMwK{xq@)rw3$by4Ko*;KU zc#t3W_Zz*rcXmSpW`@b>`De=t* z8s@CuQl@+O;^L0Ik4xB~Cd5B#U{F+KmC6)q4}|1jm_1IEKIFN4bJH}}qdc*tB@c5xys-pU^}F!=eg2@0|? zGqZ7Xvx$nbm6oz`a=I)QeQ2@gDCZ`Nd3jm~Z!z*KEY5D8Q^UE&M27$WrDhyqWIS_*XU`sgYM%{RXW6|0Y-boAdCVZQJ|%lI!A9E$YA=^L(~XPf}t)uKxdDN~DvS*P&1E=@Zbj zLqIra-d{i{>B6O?3zwSaO+9?<7-;hI`0)eB+B$ykuNSoL*uXT&&4uHd?+zXpZA-hl zlpmj1Uu~FeF6lQv4K%Ov*c~+7XIGm98qs+jF4b2b%mK0L%pF6nRs#Xoi>p_-Fm+Bi zr1jumdCJn>*47&=E+z?v%>Tvp4s35VKNC{C&qC(Eq1*3uT22ereVixpvij+bA~m)B^ec|IA%Ot_B01)`#^Ty=(9NpTH*@ z&LkXs*N@8-yi-cv48HIzb1Q?581HaeG=z?Zbh2`+w{b{w{qD(`q!67 zZPTsW>gv%sB|tUZYvGgt?O2PoQ%h4NW~dz#G|ozmyzza4_?bn%$5?muzZcGb3eH$k zlRBF#4jf~9tF<_HrBj-*0`C@wE&+}tmszGQQ?%{t3~K)K$u4dYF`kgD`R||Xk9n-R fkPyUvQWP^oPUPjdAN#Ie1F7bP0l+XkKdlJbi literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-1-clamp-4.png new file mode 100644 index 0000000000000000000000000000000000000000..37e8ffae114625d0cc6a07ab2b8dbbb7413a3829 GIT binary patch literal 18884 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3?DlkV45^rtlpw*nn1P*v zfq~)we<{|*4C?rpNBR#6GAQ!2#4P?6zh3S1-gT_08Vp~o9>@jF-t4Zi$UURypSEXE zwD*B~HyX|>UY@yb(J8Y~UEzw_qHcbW1xL;s3NZ-sI9!xSlk{9({(o|&XbaQHdrjp{ z|L?wTsP2e4HzTOyqjO0|--)VxX9fmSwhjlEXltcaRWr`dsqVS^JMmueyQpx@?#Q_H zF`t-0M&7w%&DCHaz zCVu$*#<@W8M(NsRC%@RvNshf7#gxV4$tt4Yuz-Q(i?iU_$ocN)H^13^OyhP~(wXCm znkQ?Yzn(PZ-^`QTV9PFTcT#_;FDB z&0l`y3YN7Z(yL>GSyh*K3mSd(G2V0kaY0{pt;A+#4u-t9YO2OT{AC$PEsVb?{Ib@2rM~A;p&*?)>E`ELiv9TVWouLsw z?1hC*_DM=kTr6Y!>*Es9Gg@qW|2U-nZ+-puzx=$61xoe*{#5_@ale(jKL75o-=CiT zH%|Yzzv}3R&+3P#>odzXEdkj8O2bl}&66ibM$8EJ=(v5z$tN__$=^T3ySsP(fBP&Y zhrHNFD}4nn_RN~}Z=tAAh469vC1yG*g$qoQA9nr!`}_N+r~7Mv|BK!G>;KQIMO&v=K`0|8&7uKg$uNtRrsDbJ^hy3wl-Qa?{3nL z>gf+3I=8*QFIo35MdHk2HK;vHI-6(CoY>wT>(LP%AAjab$jy_Ue!jgkdwJ8d9K;;8 zjQ{=r|DS&=hogydg61>(ET8}B2LdY;#6@phI$mG#uyNLse@>U!l^@ri_;B<4dmeT^ zgEu!07JSz`ytkTXzP<6EU#1?DIUrG90=BBTIW%U@@jEg1j=9~v8R-->Nl<8dPY=(D ztL%MWK8h=`_Ip=6ayb3#Llb-UdX}~3-Alxc1ez8Jntom2t++k$|NrzK|K~rvZ{PO+ zzocGlQpDD-hmTy_+W95z>Qa7uPQRck2=$mtXLD!g#MV|%pPt*7f^MGnK7P#2#i60I zw^wlS#!d};11Yf&iaKYesZCR2+$`j{I>sPbHoCLv+N~e$cVb*~SN}^+XjsXm-OF=F!EmJ2??(BSN zzoz9tQ0juO-XFI)eC$X_UtAh#D3G?XaqF%|^}hWEg)Wa8ChV^_uK9K3!w1E~*Vgh> ze>bwJK4wti<_f=BIDEhv!=J+}>{d=HA&4pHvU` z%QH(V3qbAoaU?N8;ql|-7cVrHuQ{WyaNy9PLk&^Q3gPk*ml?$z&#Yp}WMW;)cj&6p z!4;YTDTcq)Ui1cEDQQ+aeV}2Yg~F5b^LftCGmP1HW<#Cr;hn`i=jZcBEOCGuej+kb zRh7}(JGZ``H#XLglantjj4eWukRP zSDbqA!v2hj8e3bvM^gruf?@j_9?1iu&LKud1_nwy1%f3X|Nr09(_?X4_rY|{xtMh+^ z^g@ZB5`kt9^5Pb}JgSpiFx57~?bro3qYbN$ygA`ub=AQ1hBqGvr=z^R(HxtT1wRZ9 ze|yW*FVF8I!w5;y4M!HP(n`o%CoZ!lHAO{R>Clk_2O6%ta>;8<>dmRx%v_z#yj8W) zQmAX`y94vXF1R@vzUDjQ(yUzg$Mq5`>xq+1jGCzjZf?&roMU--Lx$kt$H$oiS|HBe zGj$_xg8>iA{SHO}W)~5U4USUJPHYH?RLlM#a!hT>B8MGCap$aqmh9iZQRwlMpgY?t zW83#FPtBPuY;e(ew=zHT*|p5`E}4qFb6YY~uwmB!s}1qrcZCF9R94}Yy3S^bjp zd>u$IEOLL8kdkuYib!#>@u#m>jg2NHrKdMc74F&a?$K-D#e1s3Xec>xq@G>{ofEL`x*R4LGxi)z;L3 z!9`%_pM->j3l|e7OidLKOS`e9^Wi1cHgWwIhGA^rNM2MqyScUXYqxO48u`HaUGh~4U7jp}Au_Eg`j@XIvKoE)ypN>45prgTp@9(Pgw z-UL}@dD(*!PaK{tGBNE?1QlYxr$4-)*miffVOWj#KU#2LhWkYA%!WCPoABqzhcIs9K)6Az%%dM zh8Cf)22;lcjE0773l!Mo@KN~{ ztpd!-;^S`vjsUp8CyB`96og5zySt-ffY+x!;UAMe8UpVX_SzhTu_$w;6cKJ zXK5B?JrA#HYncZ^im63uAX_h8n#9AsnUBLDEJ(;@-Ia&?oV4U76x(}!Is944&9+Hs zM~{Z%S9wnsUDgAqMW6iYV6@EIIF*;j!&Q)Zl@}Y^lohUWPYxt2d^2ejdwL=!MwM%m zg|TsSFE6vUHnX(!;f2mT`f&lqwa}Cl1q$PkU}0|l?RZNJD5t*u@ik2YgsEv;?^(RBPY3WgehubCo?! zC^6x{bgv1nGiFHW=tx9FNJvP6tKy%NvR*Pna*ZD-Elrpz%E!H#i^pW*6 zHeAf!aee~(zJJC>|9=0U&uMn4A*gvlU%OIzA={dnLYqqz*KJB@xtbAib-{9{C5L#z z-=(*d2PNnxu~mF(HBarx?|P&eexYHbVaUOEcX{&f2Q9A!b$R~(U*z{lK!D6c=V3?`0yM%#`FK5Va@+@8`Akp%}zi= z)#=fL=g+w~4g0$`1%!w+sj^m_N;srp`R}lr@4v&RIN5r7_)gB{@!(|5?m9TD!65ip zJ)4du^K`v4M$FSi$|7YrMY`iZta+=j?7#t$l%ymB)6^fI+aKOGZ*y*6v0)kyBp^)+ z3yL@Y{3WHL60v#(hp{23R8?HcllEjvW6Z(>&wT^98Qf;L{Wzk@?4v)$Ex7H8fLqSQ zcD3&1@m-Nx1_>FCuZ0tJTLN}?eC1GHXU4uKAtB+wrPQNQM;~Z{;_u8EiGB6yKfZQ9 z{8898-+m)2+eN7FGzvFt-n?UXwz}%g%*;+>W8*_ok~}L?*^0I@v1d%>`1FIFxu-j9 zt8&}sk0ItWwh06^toA!N`J4pDQl2MMy6#b0gFuPBr{sh|4ihI=jhe5#JnTABWUr z*{vb%_mPX2&XtC zD>F_>VPXDXc-;1y#Gz9MSPfD)u1Y_$mZ4j;hoj+x<{TF>#m>evQzs^Ja3vX-rza#I zerJ#Ha78Z$?7KyIz;sbYCm^%*oKACdx1V{dgB(V(w4G~Wobt4mc>tZsS5}rI{ zbkTAS?y*l@urV;p$1P&A>^!xCrJ`%N46ZulCZ&Mt@e2tZy{QJKsR>Hj^B}%Za-TeP zs*h9C-bG6GZRQ56*($UWorKz#b5^|h=l)HU?LfR*(llO`Om#QLy3Z4rN)#4PWWD%F z_*=yBP?ZE<`5Dg)jEoEn4J9NESy&BMteW)5RRZdA^+FdnD{0Bj%^Uu-n+h2g8z^0y zxL98E(Zi~Xd)igsbTD@ab(HLV^72qrPQp}I=ZKI;o?)^1*^%4}?W z(vAg^dUOj73=NMRjr{#%MNzTw!C0QJO3KW8vKl%M-~7N@!%^t-RzTQ5NNirl;Wy0- zHhwhdU`bI4-l1o+lCe5UF{0c+V4|JDSwZ1s53eKxbM>3jP)F((&Y0n`Z0XJJjtvLB zxt}cLxDskpF!BG2Zu5m4^BN>1FMeC~fbqZKBFXSy3LGc26CN;ZzTuGfFiBOiV2P-V zn85Wb4U>5!JXvmPh>Eto$Y67IRnlz-)vgTx|0^jL78Gn?VYkl8{%l~lkk?YdOh93) z_C#}qgO}M;x&;?aIB@ZhgEz~LS2`BrlmE&qe+`%+rN9t=MJVUR15dGtQ~Vap_1se$ z8RvGdv$ohb4{d#q~wpEl3H34 zTef(-*a#LnbDkU%!0%`h5KSdVc-lsj0De<8I6s{VT!2 zi*bh1j&JvW{W4k;bMnI@)`y(X_Jq=%%|>R^-oE)_WGtMTl$4f~l#-B;mXeZ`kkF)i zy}@9{18qx%O*}jr#+nAx6`$vPIop%*;^yZ5-R1YAHavK9aWQ}W|6`k8-8IgD;?ZeO^l>ET6Xrfbkd5K#2tt19ujg1J25BcLT~G)O z68iXM%Jav`t^JXP3${sbkX8P}|FUG>{t3EAC8B2Z)n$3-k z55K>!H_zKM+xq*dPT}+aoUZF6EaPYEi8wwr#vn9F^dL)yax72%f1{k+-)e7eTF)={ z=ivTnj~+U=ZQ4{Hz3q>B=1H-6a$MJrX)Y_%(`#Fy!6qokBnAnwL(?sG?1{Ya-n{wo22^7H|9@z@#gZi~GBQhATW=;Dj?@V3Hfw9_ z+%R`x!)meg27`Zpe^);~cKX}5zwhr0Uw!`jd;i1R+waddzrVlq_2J*&?F(J(|9Q3a zd|_nNn5s6X=HsLHKR@sP_Wu9Bui6{GUitl<=k8se>H5OE=L^FtvfT!l6lSb=>-{0?FTer97$A{WS z;d+06e*XXO@A>2X`zt;?5Z?BQhi}6)Rh}Cnpqi#&n!LE)ADiN5&mSLG(K-A3|NMt9 z0^6o)OWxbB9>)F8h&fnh^M{1wWCPRGfS{y=gLmIRYPmx*Em~T3=<2Gfs9G92%d9=f znD1|3WOQIve|^O9bB2>xy*fYr|G)om`}?(##Xq0>UlOx5GBkX5>24*Do#=6%q+g8PmiC*H(U%FQ0Wl$L2wmVD7~D{dM_w zD&Ahxoy33k_s`GoH>cmvyZ`UYtE=X0Y-~YNG8HM3212~c+)7sNF0a48_xJ22Nz8oj z?j}dy#0PcOKU4Yi^_x36n>QHv9fl^sc@{f%Mt=S3`t+%yZN!s_+GjMwToyU* z-ouo?v2khC&Vq+G8=1e?{Zo0qMoLWVn6mr4`X3*J%M!S*O=+|*Jisa$@K|oOVvmHI z;h+EKKm7Q3pIf|c;&!$7-}BpSt0m6OQFP=!Ske%x8q?C*=@>QZ;DLt5X6D^Lp>h6> zpOG;yIXSSfP;7}nqnpZ`D(NS?*i>Z`=BV+#U!|~kzl7(L`{(AG%iAdIy5`Dyf8Srr z@^@#iuiwAF_IK}<6RvDWrU?I-t-OQ5M6&kJkLPc1@1L$Waev^G-{oyzUwdA5+VHb; zKew{t!Gy%d&1rBCySVNQ4Q;(~GjI}X7I(8lKC5QupQ9eyCs^5>Qm(JNy@ZuV=*Q2` z_ut?DKhOUEw};G2+&i>2A82tFnEDzlHQ;URvakR5{p)LcdAmQ0p9oBKZkO0stL)3Y zPEFmgH%w2DO;wf6(D02Kq|iAu$zsQjh_7E;MMZaBh!S0KHfT-nsd|sOaWK`6n z>|$d5Z?4_nCy(8`%KrWP`}_U3xBdI;{~w*m^zUI`_#aU&y~r&gM?bv0yq}%FZu5o- za)BNX4zsrfFPAv6dz*y+f7bqvB`VErY~L;#K&_c%QBisG>Xnv=$cwA^JiB7c8<{eN=u zevw~~{@1rjo1eI;(0_A-!{np4)KU`?64KKrOg(D16B2|@{xWOU`0U!%q@^`SLXvND zl;g_02boNDhJqhoyuWXMeqODLrA@%W_3{7r)&D5ef_^bmA|JQ`EYU4t=XveKM}jqb-;jQPeQwZmZI~WPyfT+j6_tHIC{ORDC(AZ^zwlg-^4kaXLa7a zDRB7hv1{}Fs}yt07v?+PUOt7%tL56e$yGjr%7*c);;WjQ4~O<&&+G{B`jMH+c%^H~ zxtIIYbf!hRu$CjFA!3nKK{!*rV1mo{ihl#eSoA_Kry$`{H_*E>&q|H8MBv zo+BfEYlq#c$AMgyIV)N8jy4GXopG|2(etJ~TigGb7@k$DbVT@?3=Ir2G7~%1l^C!6 z)B1O?lp`TAQ9!^UoD*V^r;LvekDJ>CHufAr@u(Sx^v}1N+ieIFYYAqKda`o$=hLS@ zPn1;Q0f9Sr&RALf$;s)F&}86u3O?r062}&!#KhEd>(-i#^u3e0SXd-{d`{T-G4ow` z#kYnx^hu-@$A#r-v(7a;{impzJP}^|NuB?8QkZ;+H&a_-_!zc5N2x&aR*N z>FU0o9R|k6uU>xq^x?rx-Nvb>8v++OFf21VQ_`HYY=y?+Ra&3l-@mb_DPzIPlM7aQ zb~658xOP>pV8Pn63aYJ2ka%+Plwo4J^XO5AVrxvQ!}=GhI~)X}zx;R3t^echtM^}0 z;$Lb?#^krTa&lsVf@YPKesOVPIak@2aa@y{oWRJ;cw=g3hk=%cL3fwY!}uAU@-A*h z9$u5y%W5CE{~)13RPNNN7mpL5dDm0s&z~3{pAL(|Wv#|Vfgk1F*E&9#!ewP-EWCN! zG==m3gMwu2?1avo;gOhr_pv+A5m95Fif&$w=7}HoPY_6UVPTh-pJ~c%Z|_`XRT;IB z`O)mco*tf0&-QF$j%87C*1|Nj@gR#*Q!s3qva_2d74{a3DK^^y~+G?w%fEclxL|M1Rl z--IF~qwd^U!n$^kO}a}<&khS|c6Mbq#rCk;gk@}No-$5kBT(fpo!&3;{t`*9URKS!d?}MU&9nSpV=R)`5WJ1(YNY}zMHDkf@6Y0Nx3;DCr$(? ztFeFhv}wjn&TU&Vx;i&1`tDRzKJ)0IeBX>?k&yzEQXRz7lYg&Yb?VBcLn{{__IVO9 zZL#v<7EYd9Pt1=OGX1$LZ1Um1ee)KRQ^)e4$w!5ck58y=rB61?~c{}@q`3sM-&X_4_VG&>giG?J`Ny$y6p$p6#55H?l zNN|w(#lO>@tu8k)iywy@YsR!0cIEGFDGY9vzwe_V)bv zou58FJ|2JOOwXiAGHPl}=gujyvDNVW;N$$oZY;J|=U}MK1IHOl1)0s911_-}f3AL$mi0&!_H1)`ET8r0NB=^GnJ(o#KBwz%WGPf^YUnmDC@`?F;IOvf z`1|+w`{Vul|Nr~@_(_UWZk$0vw@u8%F7=5*qXC5AvUg>-qLs-Rmu6cFf6BrvCq-&8(o{aC1vrqFpO!o^XJRzaq17 zPTquWE)z4q{R@mHC^H;PO-pN-)8p-O?pTkONg>;wPUe2jAD4Nrg>i3rk(8*oA$#Vv zh8ZHw+k_y)&`v6S|Nl?)?)&et@}YdnTJcx09RDY3$+G3V-giNWi@ER1NBOCL<)xb? zTvAem52rb->XGQU)~BB;&?hFvye9jA!O{ytpcO~||2wJl)&KwZ_vh#S#)tp^|NpOG z{#0SAz>HMpTS9?bmNe`LWbI6GNLp<9&Hjvx)-wJrE0PXOy!&v|k>zr0ns#taYV?=n zxnZRr!98idePCZBERXc_%qpm9urBQ1;VSJ2>4pC%{!;r;_QL*#(wF}$ zx+ZoDSUC8}|2OOsNMTDmmEh1T*TJz)s`12?hLx=KcU#nVJ_xdzz;Hf^FNPsE(HK%+ zg3OXy^5LEKmPoEn?wK@i&26Lk4^ui5V?dbm({f& zLd+v3GtGMAz;q&j{oj;E&EL=Yy|%(r{*V8!#ZxpmFFi;|O8LW`J9}oLO;IJ<9rv{kul}Kb|an2A5fww|!#vKXo8}&*Z2%38Lu^ zz11-%lNy5ASOk0|kC=EsQoGa2u)6>My06SjXt*ld16o8-%2hP0AzW`l8hcdhp@xI0 z4x-0PWQ&9j`l$X7Jo@ptwq$3UilEKXa|f6|1s(hUpJ#vFLYP_JG7~Z*ez;!r@mL(! zvw=^`h`BhYnbWkPU@5;&Uh~8mvQcML4mDilI(YTjfyq@H%seCmxS84Zq@GE5Gh;!h zhDxa3{`$pJWT3wDl(DZ@nl5v~dC>>!Cdmu0M3z{JdpvB6`CuThD(s)KB*$~31w|Xx z_=3Cz*Cg}r*z&-im&c;3_aN(sImNv@6NTqB?5lU52x>U~|L^22GeNWT*#G~6$8R~i zIsN~ie!xnE&0F$>^(sjNFA<-Hn9U1X-_+-*tYc|ZH#HEL9@8P1av*RHPtE$4kcQU_ zN;rOeWL7PNx=i14LYw=-r|imG!hZHo(<$I-H#l&EN22M+ot{$;&xC^OCQME&k&VGGk^&Jsvo#+hQ4 zR&r=3&x)Tj_9QhYOf;6bJx_(v=57D~|C0ayB>wm(>&gpBr!y;le|-FYd;b55O2OkA z2WBlgu~vZX#+2LfIeR177D+EW!F7R)Cq-}zQ!``v9wP&Rj^rdMnH@(AcW-s(5y+Xj zum1dp&+1*!(GsVb(Ut%Iy+7XnzoK%Y^z0K$kJk5Cim+Yz)Xcb;L-Fk26+9-BW*l+! zIWM+fD#=Hps$s^$OV7_s-rM7tSq$}@O5gqc|LyDl{l2?_(dAfLei(_@Bi~3K0f|`ef>qVT?-XdG}m}L{#QPk^e)iB z-Mz0SFRyN2@pJZ^==6^n?s*bI{r{BSHXeADlVbko_=Kh9jfKzooNH|lw{nXtk+}{d9-j^Fb5Z|NlGPwEOb*_I~sHe+C8{{`^1BGSjm0q=C{@)r`l>c}~Re%zyCZ z=KudZ=J|E;`~Lj>|NsA+Hx@sCC}^L(=*68PxQ553H`qk4vX!ey;ezOzh=T^1LWlR) z^Bg;-vV?!jN1ik1wB&mEFN3F~9(HoG|7xIJK+j&M| z^RhSZmwMk%KmYH*$&)jt9MRT1*)oyi!}|uO;Lq~TGOy#TV%|xXXfiV`St2oKj>L~2 z5?0od=jN$yyX#Q)u<*0m;m5~ctb`^9P>pP4Q}yNj+vqPDZKBc|2e|@2|Nmd#;U!Z$ z$3COB_y7O@clZ7-KG@X1-0yEe!Gn&TCx<5LbZB~SQ0u5)u~h2dmaq8%OF1nP($YG5 zQZ8IiulUlDcuJ%#`?^O*>7QI~lc@rq-bA*YpMNnGS}5Po{PFGW{q^zxlaBnYmk{p@ z&=RrP8=vy!{r&r^!|Q!!eJMQJr9WxXnVB|ZYN!})~jiMJOHa+k6kXG%`W zn{fPu;9apRnW1b;981_#*fue>`Op6+Yxk%4VAJ~D<^L1X(oURwxNUp7^Q+|ves_!o zvbvOtC%$g2w^+g!SUuSzA?M75i3cxSJ-VSp^YH2EJgKQS*6&~NWY5?C|0V6}k6n^V zO@Q^N{AJ?z|EvD@=l%QpJ!j7FL_B{Hc)i%*fXJWr;{VR;9omZj|Nrsv|GByL(cAyU zYKfb>XkxH)xO&9{pp7VX11WBNWd}RLX$NUf%_|VUIt335U#P`;F z{jSkx8cjYinil;1_5Snodh3!G;(Mge9y+vO*|9(W|J%=SJL9J#85_xVCo}1VDmTZE z@BI%SySM4bOGrpa)cs4bsGs-nFne3}b@0SqiG+%ZL`6lxyt@u5Q9u9Be|Sc+OGn9TW{;$UwC&FS_2^Zq>k_jmvOeSiP|`T76$_WzX?5>uzlSiITmM! z@WjROgs(R`GwbMvVs+30-R*gXcXl5?aOB8=GiMYuP6Pxtrs^t$F$nIjoBQyd?HloS zNaTZDz4v$hrzi2%)oM5LR9{Tui)d=_UwOb>_0Rs&*Wd5%uK)Kx{mH{b5%Kg*8xt>G zNii}?+PE=MRIrh6pMCAG@YuO8pPjAe;o*tj_oqGQR-1)TXUc(#VF%(nc@C!>JaFK{ zfBVB17xS!7xUR&;W>oW=&Gw#i^z4NjCoWvj@bG1D+v(|&ar;t!{F(XizCD||dRzT} zi5W9E&a5>sP!NgPf9}H@$!~t3M#2C8cjkhVgzNottCmP?h`QjFySL^0{X&%VgKxv@`u|S|K~ptNR&BT7$9QEqMT%Bo8}YI;bL&lm?!M(#MCKEGo@~q zzK_fljF`6KR5#CpRXtl*UVm<`H2vf3+drHu&)dvr{l04X(i6pV4qM)uwl6*QX1UXX zb?a+=x2G%XZtu<9RrhN0JaDV$P~D^FyUYK_?M<38tt~J7=l}oudmOp+btM+u7L=GV zQ!+GE^6Hg{1orEOhqUTg9z0IoQSq_AgX8gR^Z$|(ptTn}cS`>I^Yi$jCYirHD^@l( z1{|K8xI>Mb1GI?dil#=iS65>@`{}C}KZH0HToM&_SoZS$or5ziAerRQCEj1(`ybx3 zZL9ts&}R4l|Ns7S6%nb#72?9qOF@gfe}1+vEnUVhzo75$@97VJmrq%JE>UG-hK0C9 z!G~9(hp(;<_;!Ck&$ewW=Qvci*XdFl|C z_LHx6AHFFwugPSRXKZHnSUk0{_38hK%JnyHEO@u0{QdkL7JH%+4_q`%ba+)|&@+LB zk-h%E(HzUeAHHZ#(N;R7pwqLC%~JEg%^h95K1c2AL2IHvd{p)`hx*5^;nQpJ!=Il| z5Yzco!lqPwxNq07dG9AKS@0Htj1(T6 zlWjB4Uyvg#+&16d(~Qyo@2CIvpmP8E`p3IFg&J(&ivRzOYkq%=5BOl;=62@iHo;js5)u;k_9R-A@mT#i zaP+8x#t8+5hIMS`btNkpY}VS;o+@}JvyZzEnx86j-d$eKbAO-Wb^j9|+4sKSza((5 zC%k8>x!8cAg9#H9JR2R-1t!bQd9r`6%)DiEdvD1zyH?` z|L1IVxu=Kc`}_aTpQI$DACKzn`2x;`Z>0}^ea+)P&td1GLp-~7{W!Ksf#dTrS)0Gp z&(D|SlR0qxK?1BsJT%i{UrpkU3c;fmJzqLByaR5_N6b>_m-{O$`j_9_-J#*(-QEA~ z>;4?SV9EumZtUvQEb3$o|14eB_H)~Z}COv3y&$RgWFLg&D^Q|ea?_`p1vA=$*q|ULW zu&=)S{lE74_kaJlKin{wXZjd*>T{*FViW%-^({o6l~sMAvW~{ z``JZD1&t2KSp5G#Kk=zoi|7t$ikxZj?|*v5uPF~-haXZEHDlG_Y@PVgUR-=p=$Ru& z6jY8B{QsA6>Hy!peW@0elO7&s*9ecA6JTfCF25@M** zgyQ@AQY*gmWogWs{b9?ri#4C#%a?F!vlti|ZP;XFQ*q$HIlm~r1*ShfFkY;d5%}<& z|M1(}JmPu^Zn}{X5;e6OV-6ndD7kI&?_klHj@>mgAD)wav%2~M%yf%&@##N4vD%s@ zlr8(%yuV&@U%k7@N`;?~pZmAD_e=cyC%fUx$LIW@4upNZkxtYR0ii<+793cx?AU_U zrxg?rE?9f^!u8`HUW_>|K0<-|hz291FN^ z!kg+Dx=?3^m!`E5z;BsioEL*Nt2c8!D zn|>cy%DSF!*e@ z?G*fH-J|^Xr-@}!tl+`#_h(F5l)G_SZt^_NrsZ>1GAlb>Ht73eo7!pUBkUE+)x0&L z`&x!?;DqNcf`vX3c1)K(M~JQJTK9WV=B!C7>R-ci9^~AuFXLq{tu6jtv=O`;Zq3|< zTx|{lE&GccdD^no470X$=kMWvpd8WSy!)ktfxOZoc@bB>kPr9wB_@`Yl}X-s{(ax! z0~SKZr+a;<`g4zrlQ}?|n^_-*W?wxNF?ae03BT0b%!NxeY^8vtJ>*zoANF^=ng($=~5>6 zQ>_jhc1z~kaR6!kg_+TT`UU+2NMU!VgFT|^#c|K+Hrgg`? z&zmT+?DD&8z+=Ie>N)T7wlB7oev=efP8StCogcupbiyPJB@PZ}8~)QyUfbTTD}P-) zr<#{J)OD87afxHDe3y@WuQZhZTPI=HdD(V%pU&=Tvj)g0<(#>O+)V)j$LseTacoJ? z=RLUdpkhngG`s9Cfhrt@LM#UzrS2&#bN1uAdLi1#^VP9t>30_I>g(V2&GzqGrg%DH z{qoM&9Pw(*Un(}lY>EmDbyH@Eb)O`_a>Q_-zeJFTM(dO-ra?)!x9EB)vP3;QK3!HG<%ERETFA~TjttxHzf!h ztLKthnzYNr8H%)nhV0QbtQ9cJVSgBZ<70v)$M#eQg5c;L?J~E=WE%s+7uMd-|nwZ zk$i64%oynwdFlP;`(n9getnxW>~93;V3>bw5(1s4y64Q(?xQw8k5Yxa z9N8Ur{n7B4xUI7J$&;cYk9{W!UR2)_JASj1RYvmI&kjMSe_wLHRRvzZVdmK+Ai-9s zGf|IAroD2R&d#`*YHf^)5n{&zpS=i6ls@^_zb8D@rvEl?tb6EEmCL0za=&kW-)YL* zuxP^b8EUiKLUNGOK zqvmImV?Bdcy_$aEfHcT2Q|9<`H#rEj%`Xa^`0bkE3Fl`glOOPZt1o$b?O^hZdqtXJ zLaj=Z_@rwiTK0ZVWP6snZ^q2HCnt(eZMt2sKg`NLsq*M zEi!ddU~z2}h*_UeoBptwrM_@K^ZCQucE)Vbc{D>qiQ~iH=XZB){8N**Th%vmZN&Qb zhj-nNt~$2TYoW+tMbDU%inSl;o9GY=2EbP1i9y5;Td;9mzD zEYj@WXT4OJ;$Oonr&g&u{ru9y^L8+T{Ig_^EmwmCkia?)N9B9{G6tUid_dwW{+Y!sYs%({iNS z4qCk0wkyc$*Xc#u|1UlIr{3y@*}V2CGt?X%B!padto!%k@+K4I?_b~B^50kYotge> zmd)~2U#fr9m1G@%_t+%U$(+~W_979d01*qm*mePpqXr639ithRsCb^68lm@2=6i{z z_pE0{r~YvHgUc-?twI9>gP471K0IJdGU4$*+hqE+qr*T#+BoM{i;~CP$jBX6=gi^x z^M~jDKEn+g3?d>8GqQ|0I1LL54K*|jUc3a6e}1#e&OCZwyp^YK*EY% zEF3DnvOzbADw!49R2?b!XS3zz!G>FZj@(i>>Z5e;9*?c<6AMA{g!FbXv!Ws%`+B3A z|K}32PAyo$5i7J{`SFA_HeZ%Qhj^5fIC_12TzYqI+-P_p+$B2FFe1{>rshP!SFOXl z%X$3g8OYe2DEMQ+tgnCg_V$o@b}8xUAHKO28b}?O%CTY9pYQz-f0ed1vwvJ`bR0B1 z&hY>LoH>S|p1`xBvY0bPq0tn-~3EBC}=jrP298F3}m#1oqY zSe9t6>%OMK8F_5mLes*Y_lr$`=ZR?6AKhi-d&BIo!)>F!Gsdyy%QA%T|LoIP^nT)} zr9U$j-_6gs?>BXkPUfuoDWKNe|NmQN8}hjZ2)Ochdff|tA+ksy$#7b#GFNBoiWj0K zTvJ&&=Ed6x+J98-syEV}vGb%)-m`rMc{crLi+&4u0pRx9J{E`pccWW=F$Vjp+S+XiaYRaPb znb9o^UT@IhX|Al>^lJSMy~?`%!ShNBR!c0^;M(-&tL7@uBDDYi*UYu$Y7P)+vFEOA zQfkt)X%G`LyLX8BiGth#Wsh^JH=;V4Cb&s=PL298`I)0ii$hbeZgh_E;Y06q-ta#X zXnH->WNPluZiRc3+^?8!m#*}?og{I?tkz#3VHQW@>qizj((~GvZCv+b-p)EXCx1TCA*Z5{mYa?a;=hn^>v zhaILZRX-$A*taP~#*pp(#aW9EM7{c@VfJp@Bx4nei>fTQrmg#Z?{gR=!>(1>SKgoa zyX-}PmGZW=|4!tb=wVjrn|PVs*tjHyF;|s=nR!v1#3jvU#%FbWd>JXLR~s7`7@Rm| zWN<*O?ZHaa2m?1k8w4*sD;l{41i{v29QYBrXfB#Y=_RM&AQCY|D z;L|*t+?S24Kb&Wl@wNF#9tyTsv41dYvXb=P>f;9aa)*1RMJz=jF0*X6t3MYI&As4bg#!aik5&;9+9)AUkr?4A82P#-#+rnLOWz1`Cj?^c=Pqz&QZkM}z-rY%j@%E@uoC9da1Z3LghoB|RPfqg4+fSN&>7gputx%@|HkUb# zj7#_XwYf$xi!*Vw*)8*m>5?q?t~tTh^}??IH%>(<*|J{$`i$4P-v02`>?@JtpswZr z|4PL-Ufj0_?Y0aFmHhN6;MTnv5B}Fr`C4Bf^!kdd)NgyYX-7p4<}=mB?l`h>npj1b zfX%l>qG>;BTV`I7C|K^t)}!+2ul3~PXdD+S?}}lFngP8H)#KAQ&Ye> zvz8@nDGgye4@_OiY94jh#X!w~QF7%g`)#xMEUg~qITvevJa>4#(Al#t)66rJ{{J)l z^YgGlGTY%@r7Ajts-SK14FCTx$~f@zWpLX(IW|Q_Hg=mW;v$|6UCi?=F5C#%9k}yE(~OsGAq`hq)=XvfR0}xTI%BCCv!*J$(f9lI zZS(CV#dH#8nDPF8_-zU(MS>jXWS5$fGGSWUiI~h)zo=;4lLrYKHYR%brf%5o9`nC(tKY$?0TKl+ z_C;ZB%SBIdi#Fx*C51d(eBW`)}A{60_V;xSanK3_UwbFtt(O`3QR0MglJb- zaUXG!Sf|r{ye7rL^N~VC!hx#~{KX26^~>|TzGkQscc$Q<&D=CYsKY`ZH86yQvDMbH zm6x|YNqHf;`p0hFA2&}lxol(qsLt1PYh_UJLw}hJt_{;9L8JY9PF%QpwBV=FOzwXt zKtoHQaL#?yz|6|p_8!@-dezrpK z#}L)~`wU}t9odj0dHCJkD>A=SgrNQkew2`qkdT@xAea~soc!TiT0nTZfvIc!^QP6^ z-0VlV9|bx5QD8s#t3%;HaKeH@!^ubY*Mr&$F?&vINMt@d)0ij!Uc&yg?rTu<;vY3I z^YO7UGO|fZvUz*6>Fcwxva<2(H9p#pw+$q%E90p6Id=tiSbCRWb9@1 zml4-D%DI28;ETrLx3^6sZz(~`Ua5v7jXW-{k`)yaKE9Gurb?QaO7ieZ9yy|syHQBl zbqUj-UePtF%wa44uXy})hU?LOd7k)v1~T^N42snbPgW0^r=$qo<0bU~y!@3XCkM3f zYtkehCMF&^Ii6p?c$}Pgf`WLOnlv7(o`~A8$kPM~A_VorW2ihJSvs`nsJkFI8SH zedZji;bYYwjMqdO_V;&Pvv!@?l@N$(9ySsn*jY5fTDg`_s}Y`Q+(Bb=O2zl~9KYABBCVj$gf(l92G@ zQ!8kM-haMC+@1uBiV2|8IK=fO?(LQC;bfA8y7@{cv+#xu4*rpXf{pLG9zRODa3#gS zAYsC^ws*@~=Bzq!u(7yYrS&J|Nl!RbuvpHcr6|l_M$IRPmj&ekgc?o&DE7nP%z`;xl^YOoIQKsG`Fjb zjmB0X=8KuEn_h^<&2XH=`eel!^ZW(s8#in)sQGuM;IGx;mzR0g$AJpt51&*`c0Smt zn|%Q^UkeJ5L!He00zyr4Cz&$$Ib^8@2C^k4zUV!7<_wRH4$qo38Y=AUCAK;)j4X~z zA|fUK{Y|d;KkwmP>$dds67y^lEB?z>ZTP>xo+tml)A^VdnSH-sUQSrA-vL@?2r_+M zC$q4DpbdnO%QA^AWsab}h(45^rtlpw*nn1P*v zfq~)we<{|*4CJIHd9|{-!_I@()a%p#=`C_vAjvcg#_|8tlH1r~;>Fe!?Sv1!A`dl^OZeW_Q zv?X%a@imb%TC;i{?tNbRv+`HrkIzqPpZ`84kvT0*QgH9`SN^>zGed+ z*2w~@UMo^2PVD{vU$K*;_TD{KJAMB7bQ>?if3xp(^XSeKSycXtQBKL4Oe?oLhKZf&tHEuHP{jZ?qcD=S}naYXoX z#FYi2iV~V_4qm_h%g28E|G%DxN8m`qk_YlCNB{i%{Qvv=`u+9)|9^gde{=f(^>Kg8 zpPjj%ef?kUj}QOPO1JULvzcAl2+A-Zn{_&y+uNu1_j?CSi1z9_ej~!eu`xbsmV4B! zV3(Gevwz!jbDQvSOy&J=FvZbDV!~sg>{$oq>J&_qUtxOAe#;Z4hl|t})Mse@|NsB{ z|NsBZ^Z&)}`0)S7$NKQ~MtkbdfB2$#_;^20{ePoBzmI?T|KGt4l)eA|mpXCe;Hgt5 z&Y#!RJEyLxnUSWpXrZEl_GxXsbBZcQ&K&$Wzmrq$1y96cnPaI52@OXWnYX1hsw!O! zl8EX_y8Mq{_vM1DpY5yIbayIf|6^a@kf8NnQS-<5_xJy+`%9dgo4liL?!%M9ZM(}Q zY?$MEhqokzNzvoY!Xqd9xu$$}59A5Fn16TcyXm?B#dGKw~RE>;O zwr`sl&(=KYXpd-#nVsOR@jqbepZ0Lo35iqv=S#%xO{(}a{T>r@#Q_Yq&aHBpTh?1 z*(}Bjj>%th;@Ir>vws%T1uv#2ZCcZ`UjF~Te}f+X72XM78mD>H{ZISxZ|=iKu5DLW zOX|fYRs5Xv@UHbOHyMb1xsL?Io?W>nx6jnL4pI9I9pFsV`C#@xH-#i z1?9*8|NsAg@RFq0tOH?d6|}gw9BFhmoEF65%oxSnw70c!>kWssF&B^CBKg&fPVw>lqgruY?Q=e|$5Kvfpb=%gdQ>UdRB_uSsax!nb z)WjIRMq#<*EU(H|o)YM@hlq;^n2G z+c#gna9vzbAfffEtE5E0uDKtK8W@ca8ALDe=;isN%A&+nplX-H$?YmrI)~YK{o_sI z!Mw~u-}SexYGIrfFyZ#X3{UpzM+ct#`+xq!dwEdi?3XiK6L+@Yi^k!}>OAuHL5hBi z5L=xdJ$RlT93ZfHTiEx{moHtN`s8tP(^n742}~0ix6VpGAlxnC$zr?vaI=Ka!3}O~ zHrFO{^VFQ=aOU!MI+Bt5z-Q%+DpieFhOhcS@tm5Fa3E~$ANP`|fXS@ajxaL*|7ZB; z_wj;HDu-`v<@x>1(5BWa^9-n}|NnoHN#TlBT|0NbuBdqN;#C(n_h#ouoq}0fMas;Z z4>+7Y5|Pa3{k1XV?t@j?AGT;4a!ar+S;gWVmEs`6JmXtZ?vy`=C&oOuv!6$!Ir;wv z!v%TD5jhHzKeT*$DSWt*nJ4?2LCn50A6|(v-<*6HQXVK77aABEa&Q*jxbdQ`T{=4I z6gTHaV`Jw}hRQ`v4!SlEJXz8O4nGpznV~o#yRfHK;PSa2?Ye(mPi3O2lW?cLID zU0u2I^oauJYbOp|V!X04Lz6k&B;lHJgtVkbC!1}e;}vCdafL3<(%cW`$`dwQ>mAMb z%DT)gg>^z@)tcDIHLC0n)7Y=2Fg|y>A#Z08v-i}7a{a@*N_o!DSKcDP3AN3#kb}Fp zy83lW%8!hU4}}E=i`BSYTA7qD2C-FT9SC0dAggQ9RjD7+8aEzIV$9BUU9suFI}Z!- zQWZJXWgC}@rbN1|Y@TNE!QA@8gcCjX^`N50qGm?oEt9s{=8_&!+u6X$MM<|XBg;!w z?bgvF5uZOA8X6iN3SwhcJ|yA6koYb3Vzb=hmknxta=%VIlJMj*=1t6Bz;)aiph_jQ<-Mghrmj0BGl$4N|@!%zsYN^5{7Ms)uJ+4bW zaz3dz8a3^YyRX#`B?jgD$!G8Iw1^0suutFnW9NreEeVetJRasxc=UclLg>%;@`soE z^HhH~(uq1!@Ym|_@%|rCiV)jsqIU{581PKC;9Aj;%w)Epg>{poTC%~kg=}2Mk2x&( zIQOY~|J3y!KTk?6di_e^n8fl*xm(+mGS6%ycj!Rx_i485>o3cN}URGPl z|9G{m`sQWk?_^hm{o9r5Yj06f!vD2m=TBGF%QLjp?<;t4sfYLq?O0pHtb0Lurk+u& zIO{An#Rcy->O_R++G|ytj3<*}&o6XI-%1S!CJWDJsA|xeeCd^B8$;w?Y zMTTd^XSEGX4V-R93ZgtG)D)7sCr-}s$+f{!<KZSq~)Hm2Vt6pv5QN zve>>JRG(>RCG99+{9U34PHahsL>@hR=HdVS)QJt(FHK5FOl(`qtfs;ytgI|NhtZLT z$A^dKjK`(`-4ic4)ebS_Z*-`#?TO%WaZ_nA^-WfaImM=aAkOK4*+aua<&PEl#SVC? zsj*1=a?NZE73+(LkPs1-ymd<=Cr9GO&5ah2-0QSH%*CZ5I5>9FWL*`NA2Sy4@;tFf zbJB7Z*c-WIQnTR75C4y!;K+A#m~^;)Mxe%{9opUhB{p_1*IU3Mp!X*;Q(3uf`v1eX zB{L-1>$#bIWp?gmT3Ug)!g5=#ZD7+mdapUWp9yf{wmQJsu02r!^Ls@*8%G`AqHEakSH9+Oh_v z-50F%XBhVGKXX7N#X-x7g;jFRT1mV5w2J@p9$wRJf##s5ZI&USu1?Or!a_2vEH`G% zT)0@mV<{7}56dApKNFt~O)Qq03fYat7t`Vl7(X6#4{}KUrvHNTUvj}i4&Q_IA*nAI zvL7?A6>Zx3lrcRax6)xk2S;-^x3dpV^A?jsZjfx6l$4M#VdBH`vTMBj+ow%PNKQ?4 zD&pF-`~P7s<|qF+mdO3=wvzfNAC{^V#Azfx{rSWr4-?rVzcZM!O_%C<;8w_KW_l!J zq3nIQt*uK2T@k{r}i~e}3QD8GnBM|G)qK%BOgEMa^;yof;TC*)imcJvTRV z-fjVgow`k{(;RQ~E;i+w-YUIx@!zFK(ar!$0LYGBJ0DB-kvEf zc@OUI=h@-FWA2V=phEMD-x; z7RG39W?fBz^#=|%KD??8>O$n*OMyB+W{1Jeotca zvnuDjk+DoY;JM|Nj!joLsWdS+%isVPr(4fW>p!O4%*@SOO%MP43`?DkS+Pb3 z7Rle**08kyzsKjLGdAaac=xVeg>9Zrw}S!f&oxNBE)d)yw&qo2q~xN-(1z)psNGyG1_E<#{C@wF%X{{f zf6fmMT=H8t(Zp_+vSjy*jk~z+wdR(YEs<(dG5%CGvw){`lBRU*ZtZE>@7w}Ka#XtgSWyje^GIH^vUn1qS=EJoLXicF{{t1{x^E; zcxmU-Rm%h}O+GL+PI1Ayi>3DU&9A<{;@rvX-!8&>N(j>NN@^4p`Y`E%{hmJ#TE-F& zlw&u1V$R}YKDB}8+M3SD@6N)f-ruk1m)-UK$4B-{Obnbge?cK|{LLGmW5=8xJ$hqf zbLK|Xb)JgH626SKhHa`11oBLzTtFnZXrS6MwjZnAxGzoHwf0?q9)r%>IdHT16(~<$vPL3#VFjC_VZ0 zb^81J_5VLMFFL#b{{H{A5HzL}S$D)hn%q{lmaW>DK4_;ngZhZDl^7bBQzOHDPI5F{kTv9OGCcQI>dTc#HtgH2w`0%AdQgYLF%Ll$(Kl&=zI!@HT zkU3p{rA&i4=x1Y&#xD-o>&wPTK0Z9S5y6pT<#VT<;U?oZk)3k>kXFv`~QFW0)>L7r@W1r88~ZxfBXLIZ2$9fdk>05 zpV-QLP3fV1mT!wrN95)N-#87^i+ij0=iU3$TW`}W@!_jBv$i&~WO`tdtd9G%rp+78 zd?#FmTG&)Ax=r?{e%7XjaKns8^@*ZfX;O#Q#7sE+OMi{sum6vZs+*b$PO9za-BbJf z^P8LH8)iL~cl)!@U9r#NU;B|(2Af^~7O!eIdUyZ-=Vxc*Po3ZM@Be&9mzFt44mK=y za`fiPC|j7>4(dxY{QsZibna14&xymc3MSU`bf|u=pV-A)v8ZveJr7U4!7Q~Itz1qq z&dxJr?k6|QxBnlz=f}Tez5Nms+gI;%xRPhFWx>nT1Ir|g?;6uZyk>oXgVK^h)OKmW_`<6yqDr-481Pr8H84&z1dTpi@4j;>fTQ8MEucOwhS z4N=jbC%@YYB_t(1c#=}};{$h}#bF(W3#+8hv@|_S@QF`Qn^K>D@6X>)PvxzxAJ4a+ zvW!12jx8~fjggV7*HQk!fm3a(U9x9E0!> zPlltW?%B0#b(M6^{QUN|yy#Ca%fCNA&!0a1>(kTnJ(kkpcapd18gvUc_5A1I;mN=E z=jY$w_HXvob~Q{BYhI-E^OodBUD3n$YmdHp2kDHw=j@-kzT3wZ*3-wKoh7-5=4vteE8f(`65)Wmpif3Eg`jOpP zMsOAv*B(YjttoAc%*@Qphv(b>-?Fvx|G&TOQj2#;as4-xY2Iq&Wc*N`k2OF~UcPyo z8TZpzQ8l%;2?A`KoLxMQueS*p7j=PR36vU|Ow1COuU+V(oAX1)Tk^wydzG~bWu6>j ziW-mp{8_VcuhJQTF<|;F7d<1#0}fh7I=Ma5f!ks2Nn0A#JASs-@!>7NnWhST8#p# zH_X`hk3V{rgSJxumrz((+oL39XXik#n$u^_NJwd&J$keykn53SglK}Vq-uv z`4b;C>*V$6&6C!x*=XqO#&+(a&nCu+t{g^?fxe`~gdgnhCmmpOemGgD!0gb{|Njp! zi}I@7F{-h`U znm=n*VuHeysii+YJ^g;9dR~9Os-B;of`e1c9A4$Dk5|+pyv1W&Io2^TU!G9U!}C8i zMdI(Di1{`@x7YmCf8_ruI5bs5C&j}@<=$%-#x<$ThOQjU%+Q)4T>HbS7D2PGOp?66 z+owoPSoUh;70v&J1^*XslAO>a#I)e!!E@&ll9hKQw^#d~z3Dq!Zf~u3#i{6!DY|NI z+cpGvhE0>rk5gZ^?7;k_4fn3bk6%(BzMQE-3#|Ig>dFaG>&`R(od_f&uHZftb)>N+@hT(As4Yy7mdyZxNETGAR^7sGl?)iCtlInlGxRy9xO@#vs79Co! z?%adNhabE-qu>?j-fM7RzSM^$pqV6w|NpndIC3#4a>yL)zF__R|MrCkZaBY5JLolC z)W6K2V1~oomOUx$N50jZ`21VPsonF7Qs-lfXl`Z{yF5YcxP8u9KDM4K45so^8>e=={X3qpc&YTC+@}8)pS1reG3s(j z7KpITP)pd#k^RUa%pigxOD^1W!S>vU|reaInflfvR0g{>zXv!C)6Pv>muu`cU*>%HoMTSPVrMTvg!Vv3SfgU1a6NUU$YQrG{em{7ntaTg=!G zGBNW(tz2VwT!+6Ps>S(<{kCK5>y|Vu6AZU>6nL0l|G-N!qS~ZllHfAk8J%ey3`UnA zqf|+bpXK)a?z&cUG^2e|NYn$R!or6+YeIR7S^`%uIyl+F;IdehmE4|&A5Ki51~tR~ z|4yDVH_rU(k6fgn`|QBvuoHgrA5soB2-?ng$!7e?U};BxDcho}JTsY^8Kccbi#24m zrNO>x>Mu@U{cpI_A}#bn(8>B7r5zqIo-CVn9KBgScpNenaJqDY{a;Gs%T)ee!@xp{QKOrVPQ84d#vyy!3YPq&S#XW-$LaC2P^C z(|6JhTJ$=-KjkF)|M;a3N3MA$9`KNyp|bDFhxhmYPuKr{XV=$Q_jc!q8Iqn{Yt0AgZb^Qs ze`)RVj$F+X(aMS9TK^AkyUQCXbNyW;%gqgW`HpUFj$U1jGiN?L!ucs&LHPcm1EG&0 zW+ojHZ2C~dWVu#BT#IEg>$<|6fF8p_Igig|j&Chjqrt2~1AfaI0JW zD9@r4M#i2Vo;PoJ=FQ`Y-)HdVe*fQlZcz+pf*@m$Nl6a*)BiS4{dwcFWLj*q;I9+E zlRvsX|M~eopZvdL$Bt#BA6L}x*PfZQR_)(o&w?9L@)5$97kG;mD5xAs$U1f4@L|xj z)!ErguIi|i#urXi5`dWN^d(F}Qc_^LlA@c@1uL}zwsn1+dZ{mZ^6&rCjr;TQ%}x2T zG8sX^7#^PbiC_LX2lXtQuF&-QZ~H}O$Fc(p*nE6={QP*LqIiz?8^5`Ec)@}L2hRF7 zm94qTBAE*+fI->zNqA4&(fSI@#wullb-QHzJDbk^`2YX?mdy9z>;LI!pPn@Tyt>Mf zhQ&Mw;!PJ^Rm*rAyhioDfQO`jg5tpmGfo^hbjYAs?eJ3XCoZ~0)ssEGWJooPLG&b^V&oeA3G1yb(TDQIR$3AT-h{=b7E+6jybg=os_AKGpia)JO zdcME^?=$bu%4N?E?=Js;>GICSi!5f&EL2rnqslmO>6HuGS_X{#{LX=b&Aq&bpPl8g zwB&hzU-)+0fmJ;{Um*2aQlq|Y`+6bQT$2Uc?E{W`e%xYtA({s?%lP&6{WCM?tNZ`^ z^yz85zCK@VZ5SgX8(Ukw$F6^g60J8s{b+CQURiO1xwMomGqdggf5~fW5)+b>SFBGD z2vRag^D+<=m~RJhxs$Y=d**~g(^~$x3m?C><8Y6k$hUs^|LfxZ-o5eTgg&|FyoAI(p?qgYq@bZEo%SKmN>ocz{u(`g0nanOR#C6I*TVqA!p9 zS&m$Rb+enyEhHrW8!ouFyV)e=-+acWe=KZdqkO*n|NVXc?d|pF&)4rSea-K2Dj^}E z;{U(<6P4}b_xxy1Zt0z&#k_f?^_>HIDtYAX9B$6HH`a+e6A*Q3L%luo$MQb{K1w_m zRpy8R(1T&K4HB*hzIZg@5sRntx1`7K@BbIq`!mn%?de6X_YK&K{pS6dJMrMTh83$e zh)HC`JgH(oB-fV7#1ywb?ZyViiF`V`l6Ex-H+D}44UzQlv^6k%IlR8GH2&nf4bZTP ze#0EzvMqzz+nPC9heg%u|KH#LKR(_+*Sh}9oS$ERe2hPHhG*HbHU9Jdw9eo7{jD^! zwe{g^Yxf%&-vq@ytI5QA4<+Z_8$AU;bHZOV4zG{z5Da2wW)l~0Q+AiQwkGk!$wZ5? z9z~-`2M;tjIxy5$%F7%GXL+QrIDv6f6~qxudfy!53c0L>ni(@^t1t)6Yz%17={xh| z^Yj0!!|QL~-oGa9@7#$K6>a7mzA@wJnVI&-`xnd1-v&!GUe3UfZX95Sf;AL1e-59(MkJI+34h ze|=eRU-xJG#zbG=ry_!hH6I?lKRJ2-lP5FeY@Mz!&6?%2J@0R7Qc_ydp*JjvYJK&} zioOm9?(gTBZ*Oc<*CX)oQIdgi@`I#4nK>)cOQj z_SL1{*v@}&Z+X9C=+uX&#b4ZXm^O`viHXO_iRboq|LE_k6+Hg}>^TSa&Qu#B-rEsl(?FD2?j>n`isG`tdRO>hJ%m zetlWL-8?!=t9`PtHaj~r^Wp#6;r}=|3}kG6%x-Rec$~jQw5F!Vz5n0WkBPs2ezv!^ zW_~xtP|1ti;sA+{nfB#7At8<;gD3@j`RX3S9Vo93jab>W-pgD4jcP=tb-Fb89Pvg^-`g8GYgWag#Wxe#cm1vZ^+jG^|M0`Z^&%oO=Gpx{edGv( zz4)Kc&;LJq)Z;(zPxaql_VV(~s~##FMjkl$_BK!ceIuJ{wx^#y7<~Agyfe|+vr|#K z`{4Qh#IsUuywVbW^V2H6^?=qk6+f5g>XP{KwR3I81W|?c0-(VB|3AsGuc&9~;{O+S z2(T^D`T6(v|L5obKR^3>U*6q*Vf8v`34`t1i;o}K(A{;&pEdQz*Vpq~SPtLYTYu%s zjBjs$pFMl_!&mJ?noUhS_xJz*|NHy>`}=z~FcmLR=VoJTv#*!fSC_h@kU6mE&;Rou zewmhpN<6zIpPAV%_u zT5~l!2-Lo|QqhRMaU=Rv!o%L2J+tQ>a^~R9e#jVhFw618agl5X6-D+FSB$UP+_Q=M zcx?VuS+RWIs(WV@AEde;v#^_$mU}z&b@epX(%QAFxQiEv?3wv&d52l@yug-qyIU7p z*eOjEx~6pA@civf3ciIw2KCYB5+w|L6`K5ym+_@?W=~X+|JCXs(=6$=P$Pk7OWTwe z)dtNkv_thIt)eHzx`kvD-_jhaR>GMmy>usvP`3Fq6cyFGm#f-Cu zn?wEj1k3bnnVFfFZ2a{9>+Apab$@Q}{A_M)e0X#E|B0`s?5~%ssF2uK^Yi~VT~Kj1 z-~NC7-(UW(WOpCt|MT# zV`gsq{$6s+R>^&}$q$~puTZ>k%R$=-l8vv#D)NDb>dmg3gnVG+`M~tA;fY|4Nb|vo z4?H5?-xu8f?(!RpqU*W7o3CvRJSDX8 z+N9@+p{}WtqOH~OwZ$yvfv3)G{$i`n+4gYP1rx>74l37nO}a5-`iT$sLj)2c9a^>> zo%m+P!5)*=Yx4?!d%LT2K6bBtAwO#YM;X{Xcjj*7YEY19@#o-dZ<{7}YgL}=oys=_m-47>B$c04XPcEX~| z@XOxY->sSMt8+wzPhwfC$+hNal*_*_6&(viZXerpp;W(ZN|MA0qeU;e*Hm2J?_+;X zom;d5ywt)e@;sxY+Wx}F=XaIve|GNg!6Qc&tUM_rcP=aI)S5+ymaI9WqtH-O$r^Q@ zDRAMRzo$QZe7t{s{QuZ}fBqkAu77_oaid(&gpO|GJvAp6tUdeTpY0YuQDNn4;fHw) z3=M1koB%BXeR-KDCx_?5vETdac}|_Wz*wrNSfeH|$0YH<>D+h!<<2PrNxqRgY72H*Xo#Q4>^{N+imfwq4MEG3UE^oD@N~5H zGUgpP_Hiepw0S}j1E-;)hm+L>rKy)LZIt_;5P400-+rCwx7~402EK9G@gm#bvZhVz zR1rL{ykxg}*+HJ?9M196UT<0x`tzB%QHQKclBu;l4xSk+$gqm-PfP} zg(s4E&MB$}{bS{x>#TWf;=bkU;#rQ{B)?7W`+KRk+4`Sn${ywAvr3%bfs^%?xxQS7 z3p>KXMW6n z;Cy!1**2A#Pb(kj_Md8hfA-Tmt%shUb}TB0Ew`4rBk(Vr!%$8lv+G)lWN2&99KSc; z=bT>?=Fur|z{Go>u!#|<;key5;(g-p2267D4@Rm*cInTSsh+o!bW7(heQ~)n z@qmQOjyTKtmU18Gty-j^;k{*<#-`Q-87`MEnB;6<6L#c8$*H1~`%jn5Z+K_ra^wBS zdB5k*&u30PvXf_GeAA!1U&XlcXHSQWHLRJno9mE+fNTFVM<=bDBAP90FW+GKZ&RO; zD%#Q8t7tc&fm2iGh{^`BbwXjHL`6=%=$U!ITqUXyZ@5YJXvhR ze|nL|hgz9-o9mu;-rsy{VlY*8Hm}ozU3c=XY@NAMeWF>4(ZNkCv@VGxY*g^M;ku&SF^5mB^sO-W8qlH{xV*H``M8?EyyC=DRXT><(k`k zr=}L)gqv@3*F0j}-qhr*S4obFYt-nbico!-PbwN$W-(Gvxeo5Y&bN$Yvhk_oOWTGhv&3HyU8jZnTMB~ zZl8Vian;k3Me8)VmTZ|5*0t}r1RuLi|MSV$PpAFaAFX$+urMdtpzZNQ0~gIlGc+C* zB}xc1DQsx_AO6-zeZlLZ{lTG2G@~Xz=WdM=SA1brwmUxJ=QDxm>KXfg{q|jC4$fI~ zW^Dv{$Yp+lfRCU^a`LR-e5?Ot{yTClPPgTV(~+Pl++u;}Ln%)Eb^<2({TTVEk7!D8%;Zs-*n^onG2V$ zT7$i_X0|PNvjb0?{2z(DmkVV0WaqZ7yZh(#U%{iUf(yI5O5P+~^`1fuiXOo&26rFr;IG*B{l8bMYGBu8&fCj!HMtgbC?srG?&Eac z)bmzsX+f+_f1*L%t!ST;rsr&9C_6PE#-Yh6zWVjwpz2THDdu>p9W2 zIl*aF%!5$=XMfKaL_7WRYuY~P+iR1X3#PYT?$mMSxFx1&@$2}XiiF6X#n&GC9{%bT z`Ka=Yu~*`(E2fSP^Rl;UEM=>?Ip5Ucr+4i3C{E3lS~c@k9+o{WQuOUimTCVyQ)AI< z4(B$;(}zrEs^vt_{`Ol|afQ~ed5au06VCG}a2W8!eE)Fa-(kNOd3}GTR5Czzo6MPO z2ufV8_jegKMQ|uJ&E4B;{^5$*yaP@w{R=pW(1bzx|)ge~(YB zmTL0nmybZmG%DRqgalYr|H!&J{n7fN^8m&Jaxc_np!xkiN8s;<)G`Gn)LZcTx;0n9p}o( ze6g{$nxm_f#C>*-Zv7;E$w}%n&PzSM8zAR?Yr}k}^>+Dl&a#OHa4Ej?yCl*4+Q`>} zFY5ivr01d9;#Zj{-F;ar<+SFx<^Hyfp{Fl0TNkr4|*>0mev3UDU*H6BM zxO$c$SE~U}+y1q;mMT0-apiyTaBpj|`&=Chjiw9SKia-^uK2>MAG}a<)+fP9e|Dzx zy)R~LeQlYvPis#49A}O-Pq+E~`SRRa@=Go&i^91jfB6NTv#WAODW?jQoSL+2*{T!8 zQ=C>F*f@V?Rcy)a8EVG_lOv`n{Muu-*WbENpw&?0sKEsj4#)e8O=I0}C+WH#+{Z3< zeX7aPozp9(I(co<+4fR6@zL|($Kj9?6colS1_CbglNPs@bvZG8tK0Kmu|r;A+4hME zQo$|SldSnZTJ+z}+U=WQbo0!FCvQ&eQ+}{qxm2dzJ0~t9OKQq5yG_%X%d@)waqd62 zX6d?@R;#*H6weDP)d=aa{61I6aMt4hc=PM@%{zFN`Gvw!MORl>>lvEKliKD#F<05M;85)UBR#P#$ z&gXxR&-(waY~JM5Gsa#Qx0&!}zG^$aYuPG>HL08H$A%(N!P7)q-wRg+E!DrPz5L8Qu*0^@-pkeE zz|&%{tYKDYW%_g!imVrC)4I&u$A8G2xBeCaxWNC;EZbgwh~q1y2dKV-quz z=0%3@Nss)N5X&DR=QMkrmY2%5lywGd$r;h7J}rLmBe#D~>coy+E*0RS>QJ6~nvmkM zKRec)dhpgPNB{oD>C+uuS`J>A z;OO1`@Hl_l{~KXXCRu&s=@jyq;Al2EcJHYVuSA=Bc@J+)zHqSgL=J~!LqgA{O-3(X z8fs`7dU!uJVT{}I_+{IJ_3j%s{r{8r;6VbUjlE_zWS5(K9hYKQSlO*?g{dpQ+_`t? zp4wK8ldLS=9T6)cQuH?HaOananeCkZ@UGXtAKo&LWkQeID6^FI8a5U$PInMdRL%4X zyzt%QmqtXkR#96hw zjdQBzD~Rt^>fe-de`WTPl*k)qYyKYFr1G&a{JJ4eiezs0H4)Zx7fs|8=KS898WC#8 zChb1uki@mA`_gB1DD^3qtgj51r_2(ySVZ90b4`I)&VI6mXKaeWy`MF+?71BRM2^@ur@A$D>wttvejGtTWMJ zWpui`pTL9PTXain_4M3-OtAA?|H+6g`35+ZY=Op2yx<1Iq{r(|2nOBcXi}=k%YVQc zH0#QwYv+{0nliYabDTc3AwTWt+3oX+=Dv46U;JuTZTh^8(>AGiE}AJcAyu4-(_~ks zg!>dHjxB1wmVMLh=kFI~GSie~TjQF2Yg+2T>Yl_{O|CiJ2G=f_tl0glv{D06I9~wnQI8jNv`vGEHYBn;~a91o;t-`;h3M4`8?#CnVf@wH@nCj zT{p3<5o^t^<_k~ybAZ2d@z>U{eX|t5v*(L{IJ{<#S)YdM{mUji%%=~T*v|jBZPk}u zLdPYXB_^f*uq*TlOyHU4E+T99XSwGd^RtD|F^# z4m`3+fJLcPqxJbh)6A}GB1cPq{lD1#yr$s!0bOgpV-r^`()kH?;FdWM2i{*K=C`(a@gxv*8Z7UJ$c9L=SQ=2k8OCb zanxdwNN%?wACpj1Wtqh3U6WGtw#J4&v2;>c`g?!mvr`>fzBDCOkz)>Hl!%L9kkIk|v{&t(1>*6h1@mIWuU-0n#nkt(G$^vS%(3Na zFc4|pzw=*=gT~&5+2&uW&NW@w*er7J+}j1t++4D2I`YrROcFS>`-I~&HOq6SkBI-D zd-UYA;xopD5xdrH$<0z;qQdfO);@QRS#60PoA1q6I(m1Z_lN%Hwu%9vI^0ePK0*@N zfnB+aUk5Gg^UdhAnik>KXFutg#f|c1orNyhe&BFw3e!Fz8}&zpxigVH(&_t8EMQ1!s&+O3G+>t+O=b6CN4@XC+x`iDsShuO55K?9lazE}qkzY|g;n0d+08Q3MI)B7 zg`O5)n~JYqTA&d&oWY@J@7O6r^P-~H{Qf9k0YJWLO? z6nAq;NGuS3%EQwU#k_OQf+U{>s@!KnoC3t}T<2+4=uG&^S7h0^hHdSK16h36CKY{7 zYg6}^*jJza8(WF`LDq{`92n}@7r#98 zMRSg?|NF(RTFcB0MJLSC`S(Bl$KTnY4W85W1-UOl%y61t(Y$%%HRTygPaF_a%m{4N z@lcuW@pSu?ed4AWk7fk%@bIwn8N}>3@Zp_o(jv&V z*+X$_6LbwGyp_1|iTQFe%Vdi=-I0ufTUuvKVvcOla%_y$GB8?HV0hJFft&rT%!toI zGBLZ46}%QZyg6N?Jr(MNgFcKqF0r#LSSrM}Ys>Ww#~8y*f9&k3cz)o*k_nUTVpuNA zm>3&geenK%BAfNH>JKh9^QJVeZgEZq4R}B0K74jIXe(OHpN=F*EtaHsW7~ny90P*| z%e73*LK(vD3xzaydk4s@mf8~1B-t8a%#(J-=z&Mv46TXWoF7C9lX z=s}TOn()*aLDLq*|4V+Yq}csfEB{||$w7st^7lqDI}Q|lR{Qnypa3K)oL;Vd;3W{j z&9h~LLvr#jne5r~6L=3b2);YA;>cpTehazO9gl?0mh-f{(qH3x;{X2}r4Re_*{*aP zH<`ewoN8dCbmRYjm= z_?Z3h-fEuo^PpgDs1(xxP4I(ff1V24?3G~aIpDN9%uso%M1$$kgj0)HS?X9%^esH% zS&^mM#2XZ|X$P0YAG@PH)h4MCsT05cYxg}n-|FawV)aL1;yw`XA2Q-N*0A`1!m_vn zPZQ7BaW9LDVaSd;xQb!5)T3bLWnO>I^8NYAa#rRH3uC`WlW({Qf6`gk8JSF*|1@mg zU!QJKIVtflTbsK7hqqRcGVIX4Uw`b~OdWl^xi07b*j;d<=?AxTlUCtU&MisI(|?}W zG-IzZn@p-=T0??p%k?i2(V`KPxo3&an3g0aQ88uV(w_bGl6>-MH@0*>{Fck~+Yp+F zW^Ot@-%di}!eqgJe?Xzbnjv>MbhmNB5xuM3yj!01sdcPga^#lR(f37bnkr;?_U+M)QmKk8TB99Z@2e|=m1f6(6k8{7LIUeR1s zCIoR|(!mn*G>rr?iHb=~>kcTX9Tz@RQ*mQP%o?Q~4D0^%ZvAi|bb>(w%Zd6&S)omO z$7eAWbUyZyyuQxx&CSCFKMW3ke(o}NAuNsaaBtah{FcA->69HxqL(N0?C?(W`CP~< z`L>JKr*{Vz{~AlVbv*uD#~w5MS=er0`@mCg!j!`Ia)-aY<@x>H_|AU+H$RnN@w=Ug zea2d?KYzK`^z!UD8DZOLDk~tliDl}Byd<3`jmI7yCM{eFH zQy5?FSQi1Fiagw|^5!$NRCs?%DZ2m7&;L7SEAKePA9GYQtsvoQANK=);RtSqq#(|9 zER9!vY>vt;GpfjE^|dWpcff=_e3xTzgSX6~wl~&)zV#-)3TX>oE+J=|^y5=&i*PW+ zkxnzC4?Z5)XK=S8(yLvFYTn3@G z&STE5Y>141;HA35XgD8dNGH>pkE%;$@@;lQ3s98qZ-7}YWo;m9>P5uOfzo>J{)#jHfO1=u0tZaD2 z*Q{g-e=at?cSqN>{}Q=AKF9w5e-Xp+_)jG$$ok~dEox`YnH=~45@yT{nzKFPPMUgU Og7}`UelF{r5}E*j-wS^L literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-1.png new file mode 100644 index 0000000000000000000000000000000000000000..df3cf2004312ed0ed0ebf1f0340cbfec7fd9ac46 GIT binary patch literal 19251 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3`sL~37*a7ODM5mDF#|gT z0|UeV|5B`r8Po?o^It<}XJcpP0$D>7MzFJw^gD932JkS?zW(ZXYsYu{r3xDR%zkp) zHa(D0n0)X$`@NLNwnCl-o^G4Eu7`ep^ZM89WsC*i#Tq-<7!1l&qo&>wOi@QXwg132UAc6Ux5Yq!<$z@JHJND@5}kiO zZ`NVq*Yp2w*U==gYTtC_mS_ju~2tR+Q`-HAmCD-Z(5{uf>ZJ3yZ`@BI%yv~9+C1;*FpG|CfyB-nvOxXV1~d+1&!3 zIg{5uGYx*Tr+!MaWq_)~X`^+kOit$n+Ptzl_jyU~d|zQH<#Q7a7j6}CJ=Jr1%l&V4 z?(5HWuCJN+Y_tEW^lvBLSkFDYDPfjDAA`c8!wu`4?`Y$zGed+w~P92T!LC2IxL->oxR`XE5&ErXckC0kj8N9OZtmzRo@>~v;7zE)43e= z+GOdjZN9Jg)T@0yE9U z9ba6?{_-z*VMY6+3$6u5g)5V~&&|?wHrjjqfBnS65^J|^+a`Vdprx^|lZ)5Z+I}O6 ztPmH;V_R)beq83E=q{;}zx43b`lpP>>GAn%r^LHd++Ug*(UN?`COL9~&>nY=i0~yW zf;qRZ8=SKQhl13JBL|Ni`}6(1y{#>0X{n{KaH6KBA~SQKq-5Lu{r@*^_>-QLl#uX) z`~2CSp8o;HRz*CHlQ_QJVl-Cb+-ehW#?e^J`0v6_hgA|Dlf~xQJ?Lb1IuvvI!+ZI| z%l&!Q#~J?lb+q88(cyP@dF<UI-qZ zXUlVay>ZQt10P-rGyk{fI#R9|wu+!`2_aLxJzCsSaBf~Ip=XV^{$W_IJ+KmX5vcqx2% zdp^%}ePbDm0|j5T4mY#&yuWAo=a1)hiDamK?vEZkOZ)qOzM;~gJ6A(}V`D=+JEu<- zjWbNv{J(K4=fA{mr=(TVZ%%T@RX*&fI5UYMJhye8MS!ncAdkP;iq1)mjHZzalmGuW z{`2?rhu7kVR|fN>pEH=zEnNeAJno>(+;KNsKUnN0ELahTOm`&TldO8a?YzPPndX$>f)ULy9W3GH%?Sv_l^5b~6 zDH`tn!XA&=BaTc2`D2OSv01Y(u1PpBc|!s38b#jl1xs@7bTaRCjbDDneJ+SM*UU8JJ7mD)dVhOD;>xJbox#(J`tB`j zf4#7(i;uIp)3aM*!ij{VeSd{-$A6k|y8hv=cV_4NQ)dS6`uij9g(R2(*0U5sx#rhe*wXMvM7>nTR&_#zYQWuM!$mL7RK zFJ$V&HP3A49cYwayVuqqs^URLvBGri7gGhd zDTe4Q6+Gs_)-`)#+M5Zfik%WtlXm>tenRO)kA1zwy}ijdHn%4}^=exkE@@YPY~HpL zpyUlobbgN>JW08BaUwV8#-2ViagmhtloqvOHa51LlT3S+4jgpI&PrG&aVCvL$tsZj z%Em3y6J#2~#2>6t_;5x4$7_)*EL~>~%(U3BjMF6b1n<@R`z8BilOwivJv_qM_Wb-x z;}xKm*#G~FoVFf1d^jy}p^#YG)5nX|6%L%^@D?|a3aU6XnSN}Es=U|e@AgeCBx3PB$rDk8Uha~himw3&~U%Tu`=?3 z=#hFEl_LT!vRsJ{t0dN#avL?6TTJ*Q-NZdHDJdx-IeEk8WC5Wh0rB)3Te}|~WY+qU z1oHg<|4U{b!g9p7kS{wnM3R>lp2)3wg{EP30fsFXBO>sVIV<{%@*xAW|Ne+=VZH+lJs37kSl z*EF+T^R|vk->pR}!Px15&Ee`LEpz_YeVE4pP5~N) z8#Wt9M{268Sw%%_fBvkzoG0VN;aSQ56ICsRbrwvN&il~euI(i7lT+!_e|wvhi(C>O z_8fAdKa@D*?j4X}D`#f*;oYOko$sAA(bahV#Et#Kg1kwwzp{i9CW zl*5cMT>e6dDJcN~2^~Et4;~~$Z0~#cDYa|X3P`vt(kRTxKAn zV)l5fv77;Mhmw7vhIaS#>Fm}P8pVYh4<72^j4L{Dhf81g;Z?3BTn!8!=Av>pKg4hd zwrp7`@buLG|2`YqRAdfxGfq)(^D5@?n0Tr|N}DGmlhxGM!08m9O6rSOigv9l6I%)e zpPsFM;Z)|PQrOZT;H@?%myazclQr9!g>S|Ti5oX17cG{&aYJI6Z}N^x;r^RHpzbjS zxo7J1-<1_BHg0m_;!$&RQ(Mx*C!f>McUplr*+9WmUqP|^j-In#vgE``Qp!PG4U#82 z#9nI@c{H7PYR^3BVL|GLZ(N5MGS)VA_^rw87S!D7(9N@?O?Q@*&teW0ZVyJ6tY{X# zhzJQLX32<1Nj0B@9i`nH6k!3PUbtdK$CBmNtt}FpHW?kdCHa9P&8f?b?ZykSTM~lT zp0O4QH8MQmdTPH$u`}l4f+LbeO%1Eu`1Q{i$?U!1pr>dcrTSte`^ubzhRF6gc@yUL zX4EWLmDr;0Sg~0o+b7w~!hoCKr&mUXXWl%XGiP}I{^couH(}2QS#bJPvMfAt>XWNW z#PK5$MFj_L&->t!nv~#>S7~q{OV~wOWb1?m^LH0&TIc8_eCp_84rW=a^q*h4G1VcI z;d)=#Y=Z@>jx6Q5$--9gMLC-Hu~L@ml{W5K!59CWX83M&OMyM*4u{dI#;wk$8>WO^ zV46I8_Q9)DA0FjyJK8NNXQ#5Zo*Uwb+{Fs~{Hr%_t_zr8Xm8KGN3ccasRxgmM^xh! zRf$w)uZNAx=1vq06I&Q=&QtJJqLWj2fvD4rR}()QsvR=n-5(esaAdB)dxt|@o-C(Q zlbn8tN*=tD{P5AlBp0c=mg33^2_Yc~85xO|){U*gdC+)Y_#`1I>B^-^{9Hz1Ay3?S znL2_6o-zha{%o&P|G)if*o3VH3nZIVe#mFrIh;K{BT_)`oI^}tBPWN5=!!=!QXKkc zBqSvyCM*kASjzqFm|9)KR*r*9KmM<0B~9)3&|yU45_E!w9!4dOX^1e+$H)bYz*hJ}h+Vl!{+>H%4gvRzLnC zC~cJ3oh&R7Cr%U?8X6u5VvFj3VE^oZK~bFJRHtv8%0�Jxz)+ zH>D&KbiE}yv_BV0cC^jdV96Yw*nHJC!drHWPm_kFM2Cjl1kXRYf;+tI?AX-R+2Z5b z?CcgXf!b65|0`(~c61e~s@{x<@;V^aZG515#s&)q$u~{sgN%$8i5v9v@MJWxybcd( z*dVw_x?Re|!AC6pfPmr*CC*hx*2EiZU0~h3#EEI=9n~3j5|WZjmPp*WE9v7aIb&vF zH6I()BU*(wZmeKsf4zB=(sBk1huIU`mrML;v1XnmQY2t-XqBQ*!}5Y-s~E!5j>bO; zV4Vh*g?as=^#?2E2n_F3%CkQ0&hqP3b=2%Rb>iXwzZ%Rsp z!bU!S`4gE;$2UE!KfyIYTXsg^|BX+J)-bF#@;UgwBDIrc$0cn&wmQ8cD}`k{oF*NZ z_?wd}DfO^tSy|f-3%2ZRwz9G{*OZ{{onWzJN5q@Arfb%!CNljG6bw69-_ui{m>uzN z(YZDSb^VxS51Ck$=e@b&pr{aGktkL)RV8&)NzqzH(leE6O| zVS++}DI`x$uvoIBMMg%XwdKZzQ*29iF-QK9Uvt1gVT-Pc%M`JSN!)sZNnAXiCh>W0 z6-vuxN>AQ4!NXu?0(WyGqw3uSOBLDJN{;gI@Z{z3+N)(%z=UQ=F0?u!rdQNf<7Y z4;O6?ZDBmA{A5eZhwUbXQ&o2eiD_#)7Z^0=>4F`7XuidbGiRqRS!x+DTYSROi5a|n zUznJK(iBRg|2_0$c5 z|MU0v|LOYw_t&Q@9G<&m8UGGFww@A)(~Rg z@L05{LF!J{5>1m6hwPqM20gjbuyxLIKYv67x(9#Q66KL~;lSLk`MoP8e8qwdU0Oc=|Nq~+|ma z`}^VHeSZ0Wh6aDagOa{{PM$JZ`JsnT#hS-a(rkgv=}~-#o<2)6FipL&W8%Zh>Tjy! zpbnjBQBiT@&D*c3DG`YiPjRRxh8=nFxNw+rt5K?V33Szuwa^%1P zhMNsv{y#lk-_G~<&YqvgCo1osXY-T2lc%hVEibQao!+q?o@EOZ+*DPCj%~X*iS?F^ z$TJn)XEF9EXRchF`0!_OTmJnG5s*A`Xr{%NFH4%5p3I#2KlzI0iOeR!m46!a+0q&& zf2h;%ak(AWAQXCje*HYt*GIm*RFB{Hr%7x_{=J&}TVEf1dHFoQOvQ}3Yx6TT4<0?r zBDwnLMWux@0+VNOs3{+-|Nrm*zq18DKfRCKyf5eGr^i2wr^x*Jtp3EK^?z?~z5l$F zGw0H_Y)*dgQ1Ok0+nv*P5)v1VPUPmlH98sw zU%pKf7fd{SS6gja$bqA3LIz4x#WKD*_5S)l|KaKB|9^k~Kf~nZ<(0wv_thNCJi5Q0 z=l1sh|G&Nc|NlRao?eT)g2aSN?AJo=GqfZPR>|!UoX=3o*l%ymbO{?ySu z8WE{@@qELUvNpl3azFW3yGu-v_`t-x@7f$es^zamE_DDKs@R&@ExLI(ZBTGsyHX};l!P(jSkN5x2$op6E_SXCN z_c>Me{r@l7CzmQ9kRTwOY+!gWNGpNq%$I`=%9;iXrpXk%<>BF3wTfrgE}r!BNt!m$ z6nTjyDJ^Zvbn&2|N6Cpl`)_6cJ>O7O!T8Fs|3aYt4UQZ2SJvrBIeBWFpI;xh``f3x zyYv75R~KmF;o;d||G)av6YuorJ8i4w_Sf@UK4;TiwLwg=LW}i`N^9hwgw*6^mMlMC zUfzFsdHvPv^$KdK2lgD>P;Gv=PnPHXec{D(8X4W~TN#fxNQ9aC`0&)#@hB-p@PNjP z|NlQU$s!_h=GLvPOPB7HxbS_m@-c%6?7KH?3Uohg5X^b!%>V!Y|KH#L|J=>Z{`G&A z%DC_EujeU$R}-`G(YJSZ`vcp3E_kp#QkqxMkjZrJOX0US=TlGbi`n<5`@|GR#`yh4 zId@JJJY+igJN9Ewzu|&qiYBH!JUmubJexK}oMC}P-6V^D|I;sAnIa&P(lS*tqw=qQ zQBfm*gOSl8;e+u@9u}`jWs7-Q&(>By-@bl-+1sa!-Q_*FdDz z{<;<^hBc^1Ca5W|;h4TLLxsh_uIkJ02M6E3zE-Q6eE!u`ZJz0RhBXIoemTUqfv+|s^Gs=-K}yC`f%{z-s<_`Yj>5uzqfq8 z{r?2*4~DZvcqBIQ%n#T~NJU!eVUtq#<;etI+Bpc7v{|09chBQY`b8y{P z^RxWbmG9Nx>uP_w_-$ll{Qlnf&kx6_&CIVSSgMH|FLZDb5dpQRA_A<}!(GtT)%5Gv zn+czv8_t@Rq4eJ{n4!;#b*GM_w_g4I{r|r`<^H+5w*KFr@>f@0OPkvz95G3gN%l4T zZxF=5+@^P?h$+&V>&Ks;_dhXG8lY`E3L*ZMVnFZ%gY^2g7M zypZ(gG*Vgdk;Rgkb;+&b{9Xt_b4O%fbxtb zM$L@QV&R2NX+Qq+%m0(J{`UE*wimzu&%d)*EaVGfTsNtC;zUP({@MaDaN>6IlsR*z zCn@PoQ`M6v~Waa+)|6Cje0goP<=hxla_xI)N>-Q_P z1=*CcB-vCx)^7-UXy10Fo~N+oVf_Ao)n8tG-=6>Ex~71`w~L$^y}oQRqFibQhKT|q zE`pHC#K~L6M@!3Q(V{aOcDQqEx?DMAwVK0h62~(Ii8Loi5yhAook^4Kq_nXxGcz+E zetv%c`qg)6|%~f<|~F0~;nO{P_I*{@dI0C)`@X1 zkn&C!SIHwsPhRX;UU49&Mj<;fCFRGrUPpiarM{4H7$;Aem>3-|FD8$JT``@1k{u#t zxcv3%4JUET^I}QOm1t2^JGOASx|g4a!k)7y4j=yX=coCd)Wi0B0{h~{vMpP-*8Mzw z;KTpp{r~su@ezp&)6zbzpz2zdAt3S9NzDD-V*^9O6|0Rud{VjsO_(2BriiScC&!*P z>p@ z*RR#5{YY|xM8>WE4?P45iWfA!dBYPFbY_Z-+L?m~A3Sq=v&^K4VeR`aOLJ~^A0M9j ze+#yh!Y#G2c~k%IPpZS5c|XO&$`zWVFU7kCG4#It|KHHv>Ejl&f6Cq)HyNd*yx6#T z@_n}CCoLp)fq+#Hs!l$PvR#=#(r}v{s?#a(z`y~W>H0%{TSt`TC+&6w~Z+ys~ zwXxB_H#&O5k|PuL9CCJO2o0YO8gjC>R^Dd9sQBM#feOcy5GS8q2PZ!eX!`&E|MV#- zAz^9PuBjQ9c61x7s83!lDw`lC#Nw@-nJE$r4Nw(6H#fJuJh8mI75Njh#qLaAH^X^; z#BnnY^N^V{{;{&0SiU@ak|3A7{lBtuaZ%x9XQ>&A5y}r%CVdFE+Hp&M!hxUXPn>x0 z_3O&BcxZYS;GMuhGzKx-$|wK{{H`l1{*eP zR9wWnQp-`WrMD_^u4kjUQ{htPlrQYMs;1_%ckS|;EG6dY`AJLb&Y?p(8KI)@zOfXQ zHbmAbi02$p5iGfEVk#*s6LIE@hqpWv6AvpZPhH)TUvkz5td=zL7>G;%|9}3&KU=SG zXyK;P7r+1CrcFf&B0?F8Kc+M8{mbY&VH<~8%htb*r6OPT<>fE$sodu1_(@0S$hPA- zIW4iVu3x|YG)NNHJ``;5z$+p`a)z6t$e;iJ?KL{C-ks~}*B23AP-G+`YM7EPymiYA z1qVH?4_h1zxu?q82UK}NgEn(QSJ$RQ6OISG{uP@X5*I96V$bv9D*HM!&8ntmWl71t zo153JT2;c?r)+3w_~PY9RrS+9e|K-*w8&CAdoq)+KsICZ)`|nc2@W@SPI$IY{5ZdX z!MV_|IznRh4vU9h4jwsTP^#q|DS36*`htczMjbN~6H`(iJaoLG1GQhIR!67j&>&Q z{r~-|w89b}{`a2tXJ74a`)i#hX=!Y=wQcj{*xcP?KDr7X&~f?@Za8NSPfkupZ#mS3 zDtv)~WqI%H|Nr}&+Sb?nCpjUZA-?#_e|ur&$MH87ZtgMYURW(?Fp1+1XGAdDr!s!F zGe-^{_%MIs$NjTs@0dCB@5jgMudl1!Q~!VU@&5gP|NWic+36TG>GCnR6Zhi}9dwL{ znKO0bL`L_Y{r0~)JFAmK{u~x?>EPh{r>~W?ZNrcKxwr2tyZ^hphjZ`Rc{67^#>_c* zWs0J~goCHM9j7ch$(+QvXKI%wAA3y=Py9ZGbDJPpb<12^&@5}~{G{yTJd45$i{JfS zugEpm#pJx>k)+T91Lw9iKPyg7m@mxtyk{%7XOV5*Ud4M^Dv#O4d;YAgh}qzw+4<9< ziG#6AkP{?c@yt7UrUSsRK8l$zv*M=?C*Upj8cnZr*F-bV>%Z2TW*zrW2hs` zxxYqRCf*Vjo4zY}nnc^@iRalsL)b}F=#5B}Fbxx9Mf_ale8 z1=yC9G?-3Zuv?)1SA#Mi9~&QESWM*EW4gue<{qNq#as_xCwUmH7Ps|35h;#ldRsOwLZ;l}Qo--M#-;oa9K9 zWou^CG(E8M#L^S>A%a^X5*sG&Ffu$Sk$T`Ri(_7_&}6j_D%>f0o356y9tlxB@!-0? zh$hr*mA?A_|LT8#;}=Vrz%4&xL#X5jr547QPx9Gvc-Z!=4Rn)Wp6TK(#v@_q5_Hp{ zMrLw~#k!EzYabocE=xb~664u0rP1>Fr~mecSBGn)8^R6UU;qE_kAn>UG8@9wmEXJ) zR@?IB;DLr3MKZooZ>(GPq{hs+(>F&5~04#LQy|7QH?KU(mjU%O^j&zTjdonRQNgWKKfY1gW!6mh!MYaVbT-lp`KWAGT?IQebCZ=%(SfJZS^31q>-75(PKS^8-Yn?ujh^ z@R+?sfM@3)2R-8pzcdBdZrz<}5SjG1>C~~Qb_HAAI{TxZ%<-+UY>bzTl9JV7xOb~z zIoqF;Oicg&I@|w*tN=JP%i>>hPga7j%c) zoco4N-@#92hHr?)@Bj5}+w&#&Es{G48qoaz-^o+PzW!h9UFCioo35>_U3adhHk`d< zw1BU}aKWZRR;9E@Kl*#S=XiDWcn4iIm@uz_P18Z!?89j%iTry8HUIk-@j%)t+Lr$g z+?H9P8(gtoG;XTFg3{E7lYe_WGc+>zqrU4&!T}9q34=w9{tp{hA2?wDr*(gEcl_A| z7M9)XBd$6#q&Z{ru%@(grUKi7*}tCiAAWwG z=g*%b^QQT$9Ef`P=l}T+?_>|pwSI9G-X8byskg8Bp{~T#pQCnTh2=Fi%{xMFds5kU zc~7u>fA8;1wsH08_t|NsAQkj?*}_}KgZg>7mIy*hsn z@AYOC|1QsbI9_ep|IO+9Z|?f~`1Ex9jW1nq*L0WOwp(_ho=5oKLaEkFyEz}_ug+;? z)Q>k>6LWGy3g_Wwc9rdV8vPiW5_{Py zyqfZN{%e=ow?j!tO1@wIf5n#<;>O0!`|PYQ`7PhSLR08+){|WQ8J!I0)3nxjid}AH z=CQDl`0+zR&i+{4Oh!kCtW!V!&wu!@b_#2g0L)`D_5Y3bRDKqJtKw(kl6JMx^}ejTMua>P#Y{=GN5kJbJ|AV%R%k=#J@7!H-W^zWND$6t%XSY*7K0ZD_Mf1I!{lB?WkLv56 z?^%1`N4uBO>3{BC<}BWVYtjzgwfAV;X676sa(Ht(&+4#+I(H9qb>vS^xccLZ=Hb`Z zBl13hyzu|Olef(On>YXe{mstlSi#wt;i>Sp$z?UGz)F_#$fl0v|Ns9#H}`kp+gtit zT5B>h@7!3#8?0{=nc&MiLzVs76$6$@tSo>2pBIonf8f}$4KdXEA#oC|qgzug`+YQqO33mra+Ah?{{R2~{blzw>FN2u@!UmY0|TYliyb!K zzscX`VBd84-~XSV>pwnP-`gv1U-RSojT7%*ojP`=$7UA;`-zPmHerr`0@$BLToYhE zbcn~$5VTGu=gx@(M~)ce%l))o`|t;c@=9mBAb5aJZH=GcWpnTD+}aexo!r7UgHxetRP? zEL>#$*nUkiQ)+_9Ck{b3H?|vdbu@Ud{{R2K_V>5%7Z%DdT`I5U_vg>2r~Ci^mpnKB z|ItHD`)#{#wjEgGXrL{Xa@5&6)xf~u&;RoUk622!Bp>Hvv$t;xT^$hi>EKjB`)!SD zx*HoGo|A10U%w$6T6Ah!Mnp(NME?Kx?{9xEuTj}Q{;(p4O*(t#IB4qrbIzaN%(ue1 z&(106{D;TK|35vw-+$((uaA!27jQQ)GCFa}Xm8C=_b91(Ru9iAo=IwEbf4I;+DX#x z!Eye!^7jiC->Ux)S~qv$!hsF-_Ra0=&HL<{m#CaGUew{E{r|u59IK-r*p8Q)L!%os zAjA_B^WU!iAHVgAX2!)U8S`@u4rNKQxw(1FW?q{6@xA=v>+5;=<^EU}J-Pnq=>79^ ze(GwRSh4>6rza=xo9F-g|Nnf!&rkFH=l}EaJjh;oA&c?MD#o3epX8@@$L!?ME_lRp z_~qq*GA}M>dHKW7&hm7N8^`QFmymQQA>+h?B}WdNKY!rt*#oCfA2{6n{)c|d($imG zM|3HH`sR>UQ~iIVn&034UtV7CU6YmXU_G@tYe5LCX$LPUhKPZ>*tZ)X{Cs!DW<@Wwc~@^8Vxe;-C`pws{-B ze1RBfnhWZ*e+D+Srw$zY#J_UR5B7L_9(|{P@ETBM{Qq6a~zrXui`|ZvB!|nC@ z@&Ae+9eK~r|BsWyKuW@ZoqhA}of|nh4VNwdpOD;~_w?Dbw3=^k);BR7k$Ixj$e5p9 zVo~DU!SesV@td2>vR9`~OMCDz@xryV3)j*L*{?9i5Ur`|;D;v3Z(G6y%8!j#qk`fYmcYhr|+&n|1?&~Xg>D%=; za-YxWY+zItoZ2Q;Rqul!lQxRHO&y9RNU3ClDd zzr9^=ZM}be#K+g)^Vt{~-8ebjyuHJurNfGg#cFHQ^z_&~YU=;}d4El}MZ2VQ=8T#D z|Ns2l-^%(f<;fbl{)=pP$zY3H5MtzEM)@;o|bywrx#Umyd_L+P~z4gde}Z|DUX0AHT0<8rPgT zAWI60|EDJ%nx!D&R<< z>V5N5gpI9D-Ctr|+|lqNNZIftW;fRX1)egmy0k!oPXc1>>4;+yFYPt0_Fk?gZPsDN*2kLu-}oBjTX zi80KWD70t(`A;q@zAn_QnHh5TN5`by3m7tZ4fnaug@m}g;@=+=AO8IO|HDW3C8E+7 zRyav%wJMzmkvXRO?f=BBHNU>R|MYafyq!bt!^g=F9{kYP;@Zyfdc|uFUr7N_jK05r zVaD%&a}OS2Iqj9aMU0J^*(>TG*MpZYzvpLcZ34|PffAj*<-h;w6`!8Sui5g0UH)a$ zRGA3}d0tFlYhir-T4}e?Ba@7kp#IH|kN2g`{~eA!cKGm$b>|M8`}h97eaHNb-(ubM z5>iqsev4gvFkxESgJ)?!{z`jqX5OjN&|+UxB)|4~6WhOksXNMgA6{0kIR^>NH**d7 z8VopGz2+@%Fzi^R^ z3}`-Y`h*|+yT7NJL@sFf|6j81Z&JlCk&FF5KD9df^{qI%&aZ#dHMhUj-}6MtmS2E``^>^KW_iO@*f}Se=Iy{UuWRq zVbIlSSWuwwzD|NQ02H=gwGYp=?qId}^oeK98WG>K$xX?}Ij(3TQoTN?lH9RF;@=;~ z)%X4#K7Me*v)d995+CG`{GZ?0`0#yvTetZCzdt_8&*PslQ&L1k;@{uj|DT_)_gc4G zYTbkD`k>{idG}m8Hf#z?yf1z0!}Yn=EyAD#@c+M4zl@q%+kAV;ef7sLeM@XOd(PNE z;g$>!56_9mFFYQL1-SqDaqz?2+xxSx3vi{RO5V8f&)X}ga`F8S3=@7-e`HF{>FVuu zbZFRO%Qb^xr|zeh!iR5f?_mB=2r4!}>C0bcf4%XY{pSzx?zWRzE7>Ua{e)kWwqx;y z>+ApX$^GenbH~13LPb?FB64Lx{M8T46Re(IRA#FX?wvZ-F;Fo6l!G_diCQre7JH9( zkKJ`<%Nv3W{{P?U{GR{+jrUZu=dMaHF_`$3i(Pu_vs<3B$R z9ysc{{I-h0!TI)}^@STY7#J8juD%$)USeNetBy*7fUxtOwOudEts2DjCGPEaeVd#**nd!1GmvrRSL z-4>*z?=_XF*RAC$N9Dgo0`~>YCZBz$GI^(f`}Mb5uPU{r$*XOZ+{?bjOlX&qkMO?p zHquXz+h5pb!h5)Z!_{%#V$)R5(1*KL2k3C$R^#YUdNf1DQS+GK`Q2M3_bq+DGJ8`F zzq%D);*NbB)4S(Pb~z`r?xIQU)Y1rWakOQ&Enl+%PpiEulbGIt1LqVYA}lqgpA_0? zXuLr{lUaDd1;v0Xr&jIy!1qA@W#qKKIf{=h7{jl=edEm}rm6I8LGIz5{RLB%d^No% zwPf4=5?NIH{$=$t&i9gRYmQ1xV_kY-mOxAW!onjGyZQt&q;5^yrFL4QE84l9>G!!^ zNpC%`RWwh2p1$_q&gTLj)y@_goVRGdw*SkG4$wlL|NpnlT+7w!Akb=WIEz(K$F8C5 z;2Y`udykkongS1Zom_NK;Sg{2M|O9e@AC{251o^|VQYPO;?%`DCqH|ioI7)cfKvYK z7_l;M)`HkW0?8dp2R(mrUYlBZ?DLB3Ez2@zsO{=owW}xLTa?XQhWp!Bi+Bkh7h#Qb z6PU2<$G`YGnd1v{CE1RBe6TH7rE`bPz6W{tZ)fn-{=e~CQXU*|Yvyj`Y%maUk#CAT zESRGcqq2Et`_xwD9SyEJtg0MhW}1sy7iEYjS3JEs|M&Nu&Gp7IP6@ilWb$?NFP@ux zx$tM79@_o7LPWToFMAy*a7Ag0?! zzqk)OTo0VK{8+w>@%cEd*HuB^Cv2Ryubwlx95hT0o=>m)n`BYWr@C^{RkecXi&Z;k zylm^`P0irnq%ya&^Wpb=HcrmA{q+m4+NeIbZJxoovt0l1-Q7I;aRzfN4iry97OS33yN&H*YcEW?r+4;pMW-dk*EBl%Ri_)&fR#w5JGiP{g zYV7PbiBy&Yt~8jJdrfMoo=T!$_h+Bm_dedA zz0gJTrIk(prX?CPmgSa7I6VLSTwjgF>4Hg$WTw}o2fvSeR1>jVviZjDn$U~iuYA22 zzP3}T>40pLYV+qS1&`kIc};wt-@TcMf#LuEDKl;PTmuAJ?WgV#?42fhqlq_SdFl!c z7sZR&d5%tt1V3JmIjQYYQ8}+**GZG$nwgf9i+PV5C_I(>7W1hv@}HNc39lkwi0|Z8 zi{3|yE#o}B%XnvcZgpww%ygb+lcnokS}p3>2dMR5 zxbo|<9XWV%;o|7O4A1MD@4r%87aPl>*`dc*re&ql(a4vX>zZBrSUT^)#0kI8{jId* zS$O-7)Tv#w6;HJit2RkprlB7soi#Y|1zdJajfE6yYs$h@UFgHYNrL5 zH2Xhoxw+`J*>hvZ120>i?=>o18`yQcdE3)6<@5hnNUmJAFq6Z1Qj3LJS3&Hej>0Fh z*6*i2oO=1lSFb<=9`4l{4G)){vzisZzw7rDqn#I79y<6Jg*n-S8lPHb+Kcc>t&0%0!=pR zo;&W|cpI@+qW`4P$z$;xnk_yDmnrgmw22YFeas|&*OBMD*XKSh$e*;~zVPum=U@D4 zU+dxQ9n8~ww8b_uE;PnvE>)NN6{#m|c%GUSBvsdn$ zGU5BRc*FY|LDyyY>fcCKgJY^??q04#1|qHY>Ki>2SA>3GcQ=oX;j(iqzkhyu#=>ar zA52`YUOZaV5!IEs@X>{UtzVz>D5mWGAehJZeZu*IB^%GY7FGPT_SoxF*5ZM!o^&HT4wnvptJzPH{KITN2L z)#kX=X>ntw(M$t5ZGqtET`}kG-w14(QnH(e&1bpJ^c{MAhu<#ylz3l+Wr|mY+-lLe zeA{|cA6E9bt%^AOplD&(;}WOqmNFg5OOAYxTtD;dr;SI-WBs*>Z9$q4;<&us_i>paVDwpv4U#QrT4{l$+LFo zIc>N(VcDANEr~BH<}KCWIGEA0EOfbszW%O#pD%csbNss?TEGr2CFa<2Hx&pRtM}B+ z>=sxU{osniLlMm@cf{XHTd+KQB<8Es!F%>%(xLl~N2I32>Au|dc*pdI59jTcey45G z)ueEsQ10hE&v%=1YgSE-5Zf%d`|fF{YO6qzHP?NXzp$EjyuxRa0?S{!M342ypB}2~ zUsYsxecxK~y{nue)9j8LTqyMszP(P}Wx?&gkGb?bO>a(D?Pfi#x~}_qp^eaS7RT=D zX%}B^xN9Wy=y!zJb?wE1&5TY9GF=3_4n1rTSn|@^-b-k;=wwglC;J%+&7WG%_C37$ zqI?l}q4uHcOU%sEBlgZ}oGfhklA+LK$-5Tg3pX-4nIA2iBJ$;6jhh>roLq)=j9=fw zlfrC*f}oye+@6F7kCQ8YPI~yieoW&04qJyR($HTB1znT{?kj7IT)|61?j)Y0L2_Kauh zRGzrF8_$CkbdA@1{v^<%_VvB|;kD5`l9D{(>n1#igtW-l%vR)Uao}-{_iA!-x)AYZ zhGD`74wmE2#gak>9C?lgEQh-!%WiI%AQ*jh$tvI9zh`grFE!!$VjE?>v3GObx8vUf zyLK(h+;=YW``lE=meN$2k2cS4_Sxr`l%C`fT-1@Pd|Jc%(i8;+&$_1becWuLM#fZCVS zE&lyUw5XhP@DNiL2o$!s^g;i+}%8Eh;7`noez;EnRy;X6MJo z&M4^lB`EY=V@Uel&d(#RXHfI&$c9Ye5|wA) za)oaB@bK{H=zw^O-=x3=)|QzY zIa?e=T;%n1BqTYw#5VYHh$SQ_vXvz(iYcgN<*gB&F=K~XQ%u3EHPz)3cU~vh9X2@g zzxImGVx9B8hYFel19=zUxoP%l)~ZFXXDlnu|Izn)(|Z>|FGUtlr&&s>`}`&B!hYvJ zlRUmK_ht38)@6>_Thd|$4++hDe`o3>g#({8xE@I?eZ&9D|L)~ioa=T~$@C~!o-?qS z_8DAFE}34g$(g+0%0>z3Y=I5ycLPIc3|E2gQQ^CmB|R?3N1;dD%x^(}u^;gM~E zt|F}WkDCOBHYKzd)%%IMyUu&Zw0x<4vh(~}|Im%=QY3v38w7~FunKL}QRX;fJj?C+ zLyHKpb>0$-Uaxqi+PqW#M&I6c@8>rropzce;JRP1PZzYq<^TUBvvs)|90XeTCkdA& z&OW-y{DaHAgUS`UyU!+Vh~$~ZdRXd+t1g@1|621I4?pg0oVhc<;kn@PB`*ut%t<}P zYTuA}Kw^#R`An%}6BjvnwH%OWYA`rrxkat(MfDV?Q`fh?E%9FI{FhTF`(y2@u62hk zPYEfm$X>>|^xF3qm2R)UCVkIYx!aVHUtrT$uiKMUQf^KU%x45;!zHkGr{jxv3VKVE z`ClxoYQ1-yo1^!_tAOQQKf({@pPS%tsG8;8@{3!KS*}@IZ_!uud7r$Tb=N63~^oC^Mi-AR~I((s}21SY3GkXp-?gW1g>1 z9_vbjQ<>7Wxl5>^HdT&2F%aeY7(U-XwE@|pH#@On`G<>>ne2eVm+!?Pn z&T4qcI_Zy-qQmBi2?~o>{`@^1v^jHg`bp?o6eY95n*Zl6Ts-*UIsdt5{B=hh3cnp? zzTni$7+tbxsuIg&(`6RRes1|Jvh49mhw^s@YhpZ|;-!S4mM50J*;&kEoSx(<`Qz^a zhtms=$md4PZNI;D$0Sxg?Fln>G%KtJkQS? z=iGF9`6~&Fnmr)eLKXwaDG9*weDdj5FKmpzyLEfBkFjDQTJ-FIZ+{qse@^ zX3edO5*hmrKdt+-?ScFLdgCu&4Q;C3(!5$h>*qo4(JlP*`*^_zg~NL)BecYoq?Kl5 zvg*$0;8b|zEX>qVD3NA#=+w~?v%1fmRg8LlcTOMR@o(DaV77ek+=q88g~DncLObnB zmCP@u>+^7n8@+gCWMOG&uwdQ4q$Atdq$-{I9?W2uYnaKoA+7trMUmDVZ=ZaQNPXp3 zLh2mSmzU2_%3$bcN+~?!a`-@lDr>?ro{CAgwsa<5R%81yas3pKk3sHCFaNMPooDOT z3wMv5ko*0;zkucO%-6^H|1DmpoS1MxD4pj9lhkFINep4p9~j)0wCcM=^}eh)qtdim zrfGHik{<1FiD&22E539{%ok(>nfd>}QgKLQXQyNQ{KR`UY?+xg$}f0$UgVfeV*LKC z#)d&x;9?R}_{_!}&3PHeq}Km9esIEHrwb8lo@$4&%}Z)#T+H$Fdp~IZ+^?#(x3@j# z%{mTU;jt*=#L3g$3)lBMM$BkzWULlSOgJ#fXv4Xafio;5K1`E5amr&W({yV|!>}ul zwln!ZN^tPy<*}H=%AB>a(d_)Er`*o*@`wNYRLSvHgwB2}(kL`AHa>F1z{JF$p!l(j z#c``Xzm#7GLMI8wS5D^LAeJ`cDf=cPC1Lp})s8uvI48|G%BIr!=jY)K35?F^>Wx>s zmOvwE(F+%$6AW+n*Ylh`%M%mBBDH)*P==znn`&>oO!ixcnnefK^mA-0Du`@fz4eGg z$)*GcEwc&F-zhfDHZ$~@;qY|I!2{3fnzY|BhedIsPi`%qiCJM+psAb^iZ1)`>e~P^k2% z!BPw6ymKnb?Ci~be23>)@=W0V%VB0MQLselQS`DMLhQ`j*7$UrvCq1iqTcs^MXYp; zE{}z{CCHlwnL>v@J>}t-pY*`=5oCyL(KDVc`|O;HjhlB`PWzg0M4gZAQIeo;Scj+l z5;eZ0TQU_s$0jQjT=jC9+3FQ<&2x?o+;piH9jEx&dfuJ9yBm}TbRrzK z#%YH^&H8ss)X%SNgF)M?tQQM+pK#@1-nLHXW^L4;PG(=0=^HD&L~L#qW;*(@L(9@_1&`Sd&obqS z-ezD^=d+_|3Cul-j}j6R5)u<9Oi1wXO5)&66%ccqQ`>FYnQ-k6OY)QxM;vt;c!F9ol!_93<5tEZ${kei428Ul>=4t0Q-c!^0^b6lEP^g1W1So$5ZeZ%^ zu~k;G#m2H(T7q^F%F4dz^AI_nc*P<@S}9{;|MDyaW`Ue^f#AP?kDKwFYIt>Mf4!vN z{InZ;XFt4W+je>R!sB{;pg}Fr;Yu@(Fm9bKxNB>SPmiNlm!ntL!DDR)kFZqTX5;5i z+QhbB(d1Obj$P6fQt~|(X}2!;G&IET2Q8oY@xA}y-|DtrX~{mB#EO4&9zHzmSR%{| zuA`64-OJhLAmB1z@6p`l5e*ZiZq>_&C9wZ%T$J>Qb$d-?T9XB9j>$>BGqd&27Eb4I zZQL3n>gzmDxGhC8)id_7%;hC(bhAB!djgMLx5@6jB(2w1*r#zx>uKSJbve?%&sDy< z*X3^4mEm~jFl(=(=ej%R{~JrP_+MLEvqN)H`LB8Fk|QPDZ-uSgc=$urqOT72U^kt) zyOFEKfalo9{T;1gQ2~Nq|K2}&_NcEoyLa@2L%e4X+BO_c<()Zc&HnfM`yOxKBKCdZ znzcG>Eb1R?lnX0=_vO%N-73)>^r(5M$~Q-jB`Sv6HTAnTG^x5SU9}`SZL3@MEwSvv zzIBV=OR`N_w2re;;^fR_2Q8n?%oGy4SY32XVp{9-hc=dcd&FlH+>Z?JbiQG>&U?;| h^3reRufaV=W`?;7RzxxUV7w1X)Sj+>F6*2UngG!74J-fv literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-2-clamp-4.png new file mode 100644 index 0000000000000000000000000000000000000000..e1799a87c8542d7e515b6185d7e8f6f75fe73f3e GIT binary patch literal 19132 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4s3YW8$-45^rtlpw*nn1P*v zfq~)we<{|*4C?rpM<#3(WKiH~dGLL{Wq#sgmWfTX4&P^7t--ZIMJwyj-w6z7wy;_` zd*428|M9$>H$!w~n)n&p2D2IVF*GEok%MqUM`by^;t(Alh+?!#1H{S9SGsty!E;;hGD)6u#Nb>&AZ|HVY z;(fjO-4}->d|2eZ%AJ#Zw%Kx-XZu6>L+js{%9y&%OW&^}lq~MF{FTdz>f=6V($db& z>=2ARXJEW2Ogk%g>4GWKjMs7h?khdI?V&}%ydCeq*?4ej)^0w{4Yn)Pk*mc(V7A!l z5?v#us*kV!|9`TPqxF@e$a3TA7j}t>iOzbQmaPu|(q%f#Siaobx>h-Bn(CR)tKZvB zly^7xJ!>GL^w!E%B>QzGM}w5|DyD$crA#58g@rGEo-#*O_2xDq<>g0?AN+mJ@YAko zbH46-TKQ$ymMQV&A654-g6x<2)Y&|D?#$NKnNz2F22BbM4fP0_67SaLA3ohZaAIW8 zq{hjg^_3sjFX?bkR$e6U$)eXcFCgK~e;%It|NsAg|Nj2}`T76v?*0A#)YSdY&;PIc z`|Er=fBih0pP&EP9=^Z-|NpEg&q+!@WJ;x!K>V%iXOl zIAqGiW@fqL9~Lo%KT=TWzWKvJ>|of%1uA?h2kYhS{_L;+|Gt_1f83rQ1=LqpE`ZIci_bH2OPZI-H%-gity{}oGL1%Izfb? zQ}Ig1v8zn31^@rIt1GP0Bnh|M#DlVo@>S;X~)P-`^$U z_9XoHnjH{s4mE!XDAK3Sog3=ze&c|{(c|uSPjrZ0+Le;jq{6y}mAPtW3jycO`JFxH785ngJ7PxfWZ^g!g9UDi z*H*FU{xf(ja%K}_Fk?|?)6A*~r`zf+rYu}CyZ-+LEh#oOwi9c3LH;^F-}uhHvm4TQ z58vI*<3FEooe~Qq9q4pAhfE3f_P%z^?c|*pr^rYLr>42RZ|!gW5OvFKS}omVdhKF2 zOUzV`O-WB2gVu0ztPF7oZd)4AIFW~EgAmg>=Sr%o~St6W*9dG1K#&0Q)Xeh;+F0#5O; z<@oYmnbJ7NMImbfx9E<{`iWhVY;0GKFe={Po4li__2D7bw&ddyda+4AJ|sTaZ49;H z!I6clw6gNn zeAm`94l7+Alqqg?3%s>TVzI*ihq>HaR6hN;Km7YU&vHLQ8QW6@pHvS2{mmn9&lhsW zz`(!&YT1S(ikc@EFHm^;e0gI2`jv|hpFPqN!(gCr)MVW$ChsGUSIJ(OwZdaleW8i> ze?`^4{Sn$~SM+$Qwl)QRQ23sj!0k3+5=WF)<>$7<*I{j&($N{UcFh9p00lKAR~Vjn@uLu)&mD7c^R~An4m5DW^Sm=-bfMC6{0LQ zYaNbpbh(-|m>l3;OyE6Q;>$cs_X<+*bKnVqILSMg6>ohuMF*i!g(ueNpzK zgrp=9@#ld-lX&>ezkRwiX-Z1lQqD6hALXN0Hmpwk$e;X5G~MXXGKPK*FVmJVg9N|R ziPG8bFHb3TE>K}j(UVHPZ`Z~vEx{*~@Z(eK!`IG|#q>s#|Mmg#Wjbo&dz}&+`sORCy^@gp zpS;aea>G>l`u|2Xe@__{s~zqX=9#X4c`FB`@LJ^lC?PHF$>YaYE=}s|+VtVmB{89- zw8X@e2EN!o?psv)Yv1CGPECe))|oB(N$UB_t9Wui`-Bx%G#VMXI?Rx}@2j@bq3${n;;;Yr!Sj!nvZ8Ig+tR zv+16UXvpeLAql{Q{TJ7Dc0`vq3(3dpLf6Zhv^&yiL|x!Yn6YM@i?yui0#~^wdDD zsQ>>LB|duaFwxv}s=rT3SwVo9Ktgh2V#`v_j`jjmqYLSm3QaivJq-BCzw>P4#0?jG z4o@!GaQ?w#KP!z)*0o)Za*D3Lwk_FWY**S&?62p!zu!1!_puE{s)zsmEfJ2hN{6$I@NxVR?`tKk>k4$!*VAmE4uCikv8AoY%#6?(gXX2O3HxL{=&@YaY8G z-gU_6dwq!J998zL#q8()aYsLCTv2Xt>}*Hwv_m%qSZ3b3yJu$NL#MXg<(WpZko>S_ z_D0?Y1p(Ll*N+_YlaglNSjZWBaP6Gau5WC)TNIL>hdyztvncxSSZqIeaptPTg3mVV zUSIj18>{_o-*-a+y(ukXbv$d0tQ{A&SNsa?JhV1_nzYrPV7o;Xl3Fhxd2%?Uc}O0s zc-yndLu)R(T}0BfuqtNd?xaZuyRMh&aG%?l`^NhC$Is$Vm_QA(MQM*7JV*!(7Ut*Q z-rK!dKal)=e;s=-xy(WBr0NM6;o?g9~R+%fY1^R=95yk3M(!%&8NPF1-2o|9`!YX2^F1Yvy?x ziBmP!Bs44)-BGks&B+h1M6X@~brk;p zU*rcWsIFa|+S^^s!)5g3d3t(EO3PB7430nl|I3TDet7YxJ!biV&n7_+8-;jJs2aEk zggr}E<96%W;ga^p{c4lY!4+~7iOFjGPx^l*JfT1tz> zOitaDiAf@=ZvwazQyh1cZDC?t#HO2IU~KCoWBM#0Wumh5SzaEGr!12!=J0+LD!Sx; z&3nNL1&!9NuM(4z49wcN-rYa{;hn5#+X=Xp0YQ^Gx;F8083_qINJvRZN=RsO(aKuP zcxgwn(<@&CLnEc_VuFe7Yd@bn(6F@WKuG^Jivz2;^VHVmsYgj3YY62Hn%MA6ZpFzD zZQQfGB*f!ZI4te(RGPt)a^T33grq~DHq`m~JiosMCQjpn!b@R##x_F=-6jm5^4R(qQ{u>EyrTZb=he zb>9lC4b0gkVE604{4CW4S9P)s8PL?Xx?&KPPxgo(N^LIro5-jXCIO;d!j+7dwJ@q2J4FkhK4g{8`u0l_u;kp;f={6YfT}o zCnfvB51)%!*rnawW}Q6AX<&Hpvz1D!f#UPJh@L~*B2T2+8Fd>9MAv52`t_`0iTox% zNu;Rc-|>ZVavuwX{7ZY)?zbiG;8cy1-JfiH3%>kwh2}2 zHc$!;IuUfjmv_xy{~fVMnAdc&S;{7eDQ4(778(RaHf>&^5Hw-NmxCuyCZr$#@QnBH z@qQljya|u;Rp2(gc*X6bJ%B_v6I~TZ`>9HBUY@R>=;N7_o4>GrH z&zFqbuXa2cVo{QkGrzfcwNFoTYpaa7xbd162a^j4n#>bj5;$2cbrvwJj68neL`P7_ z0fr@#zvWAPWO);=iJUm~FL^?RNHUYnQ+A`yXQn1*lL|Zbuzb}oTH?fVamfVMiz$Y@ zj$%`$q;zyA8yF=eq#b)}e+Octl4W5*(WWEEqksL%;^x?}u(F3SY^Q?Zj08{iz!eU+ z6?;B>X_=!VVAooMIxi5;kS>iNH-tMDpQ zMxmR)gH@3QD^m_k)48yt?GSVJDTirCLqiii*?A-c`r`O!U=^@&z@BYV$fmxC#qFFJ^ur=W!WN<4z1Drw^%8qVdV_Q*-}1^YqSbAo0*$>17da=WDCDp z%Nz5uErIvYfd;*fQ1MNTH8nhmi9D-T@#q}k&4+qItMJ6>?y9OR4(`oL?&?9_Oo#rp z&q7UT^D=_uuyr$2yWSTsWMBupw-P#39 zg?uzUeuwz>9=tj8#xzZE>8v!z;>XXMu`ySAdR=GElr&f*78iWL{rJUH!)t$&mhsrA zvdru0TYcjImxq^O1f!>K$QsU8p5g*CSIG^k+-yBY%)WCP8yy`Q4jyeicwt zxuT_|t`QLu8$=~;TxQvJrC}#)BV%+bqw9yxBlh+GYJYwC{P?(h{l7oXvtm`2xi;x| za69-&HpH#{^ZPicbUohBbASKhl`3yM{-m-oyBRWzr#Scq)zt8qnei-J)^hv`Gy*Od z35q@2ye&*bG_|>sVIJRvofE8D1v3|lGe5gx^G=!JV)C#1`~Tb5|Jzsa@aDh2)8MQ z|Ns9u_~F^v{q6k!@9h5m{N&{Q@%#TJr~O;M=+J}Lr&Saj-e~a8GCCtFCtw<}{OJVW zfbjGS*V8S^c#L}&fdYl$|NleNEg~Xk-nk>v+j}%Y{fzOVzBq$JNv)h=(;U|P=Z`%9#^&hA$IZR+LjiAn zeQI1>+7c^1@dH;o4n{dAhB^xvJPK=@Wc2S(;*P4X=VzJTH_!c5`1x77}4G$WH^)53>GINOqsf4IpCPJ4%@TU1n9`uI*g zawH-!yRqKD{(xkLV9%Xm=X1g{I+=U_{(pYn{&>Iri&s_^kB&U%m*1!S{!7*OcgMHo ze)pSKQ}*G(;(d13a&lr7%rgUO9y75lmN&M2Xe6`wgF{FC|9|zDm&xC~JOBJ#=bKx9 z{4_p%b@l)Jdw=R*U;FPpT_QMGQdPBcmR;(R84KLCB`zGf=*8C|e6R(66{3&wqda z_nuz&{>`77mqOn>etd3!`0efg%gg@Gw)uJg^71D+ng?#BJ5F;7W1jx>Ny>+hi4&$D zPX~>2{{Meyrp1jLGuEuVnwl~rFgzq-8PA!u?>fvhgHDTV_V1pTBxh3*9=`qcmzV1H z^^fkD|NC!$`1W>tKG~{ES4ti|J}$rF`pgHK_7WR3orC$;vKojyHs|F}`1ASs|7&aS zi|hO-dV4GXPqyX#J&7~y|NpCd@y zL=>*iOH|?hQp4{rzrp0=bjHOQj!%`icseZa@Bcr``umkm;r_b6N(GA!?63bHzwb}k zhXZE^Ss9zXJ)5KBoHj^@;?PWsoSZABrjZl8Ikq%%dDydVzR@sU z>_!)FjO&U2-qYvlMwdN2ExzcTz>m+*|1Wl*FRr_*@c+N#`tfy%N5qO|IWYeZl>Be_ zXdYwvm&Z&elqESnot$jX&R27P^Yh=|-@8bD{cnHx?QI_IFaw(!k1aN4uHPG;wXVKDPk&`QW^TviWyDdx%dv6?&d39i#PJq_3{q_I#;%eUg{Oo_E zjf?qkzkI#C{l85)H{UMz-~Z_B@uG;w0%-5)e#=(Xw{L%5blYXX&D?XP zVJ6=OF$n>oz^^vd-wMyqyY0d2v*?EneEDU1cwH<{{(Yg31-_ys?q}{QP<~Gn<45-`|6Vr>#m}+-zo_ zpv1oO)B&B(7Z?oB3Z7ZT6)fg$dE~^2f*&9Ht;_4`|M}G1WnumQ&+yKUg9XocIHNXf zGygb6EW=mW*LRLKbR6i=B#R?QcdlJ?#Nd$ly$1)PoD`JgG*kbyF05Z68}+5>$(NVv z_xDdOF1olrzJ7a7<(CHs({J=fu?Vs(II7L8XnG{v6IAY=KXRnt(UJb_>nCiwU%riQ z3tsMVxvA7aHaxR&*~FPM4_=$rI9;3)zvHSmhKGy~3)H7<&)>hN^7GsK_C-SM%*>!Uk@t6g);#CilcqR>uV5<6 z4llu^E@74*A0Pj3<+iu4QTXq6bAP?0oUPKrn~aPVv!r(H(k)-!8=rqaC`$XmeEp-$ z4<_eE?BwytWU_NTe)#Z$wPz0;Jow?QbXOqMxB8YYE)^{;Hx#zdPtaD}ASS)zmEa-) z;mh@Wd|g7vkN5ANXZQEp-{10g1kX2^%OBgte%P>bWg$0*!6I3;{#&OV z_DQfV-I9>v@_I+o+av#6jVH{XAL`iH|73y#6DYoZeYri`+&^sit3|Bs)N_B9EA{?7h0%hkWXWvx34W6{ILnJJ1VTU%Qj zXG_=ql7skfb=Zt0ODrDL9kZAubMeiAYugT-a?M_sl)wGd$wF&r>jSUYU-;sCLiv$$TU(nK6@E(>G zRQS@;+(JX;3=P#P794ngw6R^}*wQ;Q&UXKoaDFx;IX>gf{xfHIK7NdP^X5tW$`%%O zdwXp+t^@JD3!c_(xx&Eca?OIn`Nt0l6O)L#ze@XW7?`C6gsVrJGcyb723R<)OA-;4 zOi2xdj{Q4nTC%WOva()$^j@Di>Ek(jw`2c%dOnDJ;NSVZ@#4gv-3Q&Y?E|8g9XxpI z*RLnrZ6YEir%v7Z@6S(f7ZIiDtt?5i0zNjY-u3V@I(fn%_|gLpFQs^W=6817jtaWH z`({F#cTOsO|Nl$w*lBs=hJf+Q71v)L;^z1G)v%mx&0N7Vr&ui=7d*XE&%^V7%NCZ) zm$ybnp0sT9^68n|-|rebS32{gVbqmHiaw4pp`2+_Ne2|UO~c~mc}C6hjG86WZ(v}s zVue9V`{d=KvJS^~82na?U{qFS78kE-7J*3pRt#v?FFEO#xY)8MPgULBG(5bEEv&oi{{4}@-CB2IBL}a^qHeJ>Q$F$^_9;p2 z>G>ZTG9xCY<3?@=V@s>#s#PZ>A0FVk5X79O&N{E^!K;gY@1cp&Q)|+sH#dqzKkY3m zI2Lcwxp=|>AF&UoUhtdg{;#N*k$z@_sGwrfd^sTjk(8-ZpPHD9KX{gQsKX}4lxbbj zQ~NfBgSVc@>)cd)V`PwEW|A^-y87A=32A8^{mcC}Hb#0Ryxo`NxAnw{1szcPDjMd_ z_4Mju-7N6$zx-NT#{H8d?>+d@E%JIt+nx>60-TlxCS_1y{lb^P$*2QN=PdhC3NGuOw5M^KQbwbdg0%|w<0 zRo0kLo;jY^U8=Qk;iOHQ-t0)`Gv1l- zUE!S*lW}6ARJZG?v%Qwe&1X+{v;Hp-Gd6b4)I7YWl4sJSC*>v^+W6y+P?l;*y2b>+@oT?GXONiO{JR;aptlV@gTez%Inzsu26 z{>+*GckW1tibg&=pB}OE)A`58BOW<>b|zj_V$;_4`5(ygfLS8qwRlW-`oI6_51zP8 zgyk~6w6taO?f?J#&%b~3d-_T1U~`{>%}_S)y<@)fYvF)F-DyxmK_6g&H4=POl zi6>q%pL+Hv>B}df@1KNze|~=d{r!4L$^V--8X6fIKIoJCwBv{1L{0u|;pg(okL{Bm z|37xD=j_@4imNks?8vaQ@(ViE(ER>>pb%tKf6d&DTn7|*T;@->`Tu`V;U?LZf{Rz2 zGTAs8UEj?+;(1uEdVW_m`^EkhU7r|Lm7G#qb8K4Xd$;r`a;9Cc`5#iEZu~f*K1*v8 z(ZNAqZ!TTo|qPPn{1Bw6tJE`>5|NnPTn4S4>JsVq^o7i%{Wy%mRMj=uQ)|LT6TeODI_I>?Zj z_{aU#?n5_wcrIL?_uzQ_LoKI(7;fVh#*-#bcBCAb>r${(=+mNU3q%a3@?E>cWZB5a zaA2y!1BTUazQ5O)y%HJ(dY1ocezWBl8yFZktTXzz(X=ZGJ|35 zhXYY92}*21x8y616sn3(3X;3_>}>+e|Np`ofp)b{nxKVc4FCT-y<7XI`a7R-p#m5G zl7svnKL1^{{1o2uu{qgBm_QK+s)b~p|M)-u;bZqH&dZ^WlVqv>|Ia)}k^hsyi~18Q*mP3=xXPNP9=P_&;oi2! z`bg$IR}MxMCHRV+C_HwcLG@avr%45u+$Qe+gG>sZjFN(!7tCHj3w%$R`F4MQ{rRb{ zuP+>wFLo=vu{84G$Htg6hd!tF_^!^d%?m7f!<(5`b6or4$fGpl@kbFGma7V<{~a#) zpTIc7lSR9!x&FVzzB*S!X!F!lX1@LZy6^A!_4Qx>|NsBr|MZsk!A-GmPjz%jOz;x_ zp}Uf2iHXI6eGal>zxxlW+0{Nc|CGnY#q!#gqYbOLckbs^XXjSfx}~ANt@hWE56q?i zq#>!$Qzm}jpSthw`uErv?@4A_>GjX~G)qM_Q}%b!nWr00Da^2t2xykD^fg%UER8K^ zV#}F?1F<;*KD;x!Bn`eSS}7CLuxGQ5!Y;cy7k?ROIC{$5-~Yez@2~Uw>(`t)qfr}v z=5$kFesJ1>i=K|sHx9fsb7Gdu_}HT3?V~?sfzg?VAxRes7(N(1yKmnXzW&1YZ;-JG zCr_FEb$@IB{P^B4pL6F<$3E>fiH!2M9p0TgAlbvC5z0Kzk*~(@s9RF%0#}=57KTfi zZf008Y+3dEIDcF9_Y3EbLreSBVgK&!{QUd#^ZGk?{wJ>9AF%A%!@XfWQY@GB5)%>< z9C9lYa_fu|CMP-gGR{!i@u>6154B~2R*xpJx;*)u-WI)G!fvtLOAbhAtq%Klf6vd? zXJ^+xdURnUZ_eu~344?OP+b#DBGFQ1;yU+(`uE7LGR zg`16y&Er4o+GM8G1J7C?s@orAR9<)B@BjG^pPr7NI`#aMClODd&fL1?L3Z@TU+o#2 z3zvvVv&@#|*&%%SKtuX@iBqQ~=h-Aqn4WH6s95&UaAItjPb?1)kAA$7O>LJR)JlJu z_xJzTetKe_*2>hMd?oeRgk5cojg5^H1$Tb!V%rd|*VDsufB*m5e}A^OvCWGXz1dUBUWUjKG*4PNm zFY>qleSQ7^_V)UJe;Uu+{h!<*ySrhFppc@g$)P%(<7=bs!`A*fa_pFd`0>+cdR9J_ zzZ51O^XNnF*BM&>6IcA{j=q^NQ?ijkOpJ}6zwQ2h$!)pH4~1Ai9r*vB=l;ILm&c7@ zYnz<>W$OR^ss8xr`|0Ts!Uy9+4*vJp_*Fhq)sXYW@&5gj)%RC_dg31=vwoePc2-H! z-S=O_XDckTSQpZ2dh~#;(Tj$?w#~DpK?}@gn+xx2Y`qc~n9|F`!*hO~;ho*>OP@gV zl)udWdgD90zCORG{3YS<|Nr_qetdOd?6a6WUj6J=PF;|4=l}ozdNDr|uddQ>YVz^* zW!g~t>&If@nIGjZ`z+a8a%4&8!Asi?uv%D3PMRdKuRc9uOUJ`kp-GSDEm@`^b7$}A zg6DkGR3NGRkWuQdhllrDm;W0_m7B} z(b&P#7#Wh_%UEx4CBe6>*(%b3L0P$Zs;F~-z~Pz3JagyrxWv706@B1kSn%nd?VIJ9 zki30p)Anb_`~S<^|7*?hYb=ijm1c9fjyD)SV9cNI^etWK{8^&~u7~H_|DUe+_tu3E zAD^Dy-`mSHZ{CF5bv!G&F3xJt*gLh};G2DjqYbY=TS#;pLqbZ*gb4{AJ}DN(ZHY&@ z+UoyHEb~be5K?;e%OiwC^Xt-mVo|2I8^K*{hrE+f{D|pJ~ zTx;v>!qS**<018LSFQBv8ycYDd62&+S%4-Y3LoE}ZT^4BQppF0B|Nw*A27%Fcs!Q* zANYFue;yvt8cwHkRr|``oW1t zhxX0?{@A^umrwHdFLr6+tdvEXl*Vq20BqSuHr3nZn{rT~6eFw*5Be@I z|L-&B$KPjXL4o!E*Vq5Y`~S1DN}fF%$;5m!Cido&r!#NdkeH_T_uSdDAHHfIzQ3QR zrl#l38y=6qZqG%kef29IWo$iT!1DjUam;?c*z1})XAT@YwqO|uCFGyqP_BRYU^9<- zp242l)1WEA=jVCm+Z*56aqz(#o~s@k&N2$te`t94DfLTs*aL`T@60jeYH{Fko&Va5 zi`gvrcBEzn_j&{NFD^;R+8j|Fw>UOQ=}uw1d6PTmyCO@FjmLxfM{UXXSFX}_)ADD3 zx8bJz>w|H+zMo%O1#+$3tMTdakEzS*=I!5nCj3xfYeobIjoU71P|)CL41_Se5$o{mW`C`1ulKB-`dQ&jD9Zhbr4n zOxORvuQvI~v$Q||2#C4u_iJr#Zk#JyeY3$PvR`yh?dgI) z7W+1aL!#=?JPU;9+hsFj`%nur4x zLBVScHf(fU%w=}qEC0(iegAoQd?sYECNEW(CI8RZz@X;enGIE6_4)YP^y4KYBqZ!= zf90nf;V_raS5E%1U3Wv%{(4EhxE49tZ;<8ZO-&I~9GiaKx1ZGR{{L@s#ot*EAG_CN zLA|px{NwNPir}a6%wIyHT-Nd@D*K)|7~P#wTokbK`T75LRbRv>8O@j>@#f8pgmRC? zyc-uAx&?4E^BNc%*Zl43u*uDxqI)ib;pdgWHD9^YW;aiq`0#`v)As9-mj0oc7IrlW z7DX(H*Z%*XaD|~a@JB0`$HpK0(=sOH?{Z%tYrWtrleu~G4vWPN4F6aRA4N%IJUGtZ zmVJGJvF4gJJUKZx-m^M;b7in!UbK|+iOA>Y{D*IDX3^7tw9O99w3sndG9n^iS>?a~ z?)#d5*efefTm9DXR1atza9{oZ?U|S56>l_Ny}#c$BlGX{DS!HP9Tl?WFEVU0;Vx=E z^!`4NjLeJa3lo^OgfzKJGBY1OJ)KA1P9S0dXoJxI|A%fqOGrtn_}=&My!?^QnHv@c zas0N=Y1NFGAUJ)tUCodGFD}|wtd0ns>KG%lcCt~?YW|o9_wC!Z=P&%6IA;z|P>_YP z&}&ZrAIJAeNJyNUlUVV&tpU`r0LAi5a79zv-6;C<$^ZWy&M*J{caORHQNGeac3B&z z<+7VM|L5QN!EfiLt#n93Bsl?_xoh)A9`Pih?Kl>m$&`(|E2IDA6r{D$Ak4A*BAFNl^i&6;=^?}FabycJRyYU3;EgE;a#s43^ zd+_-21IzhPXAY~a70w3zaeh{C@bmMF{uPHni2)R4;YBrnP89qy>DoEtauv@CPS&fw z|Ns9_|M7*>@Y1LE@}P?1(xsAJf#Uo>zVlnHvdB1iu({)iqQ(gYy}r%A5)OpzRAAmB z*jN8wf=|Zben7t{)V1Q?Yj)^sHZKX@@%w{F())XgQCo9zc$%6b^6ZvLSNxv#@U%Fa zn;R&*>%}I4_ADIZ1+VO`Yw;^HHZ<($GSbjCe(~Da!oom8!eO<&S&?3~iZEye<3F2{ zE>WRvO-&JQ2NXW`{$G;x#EF&f-JQfAe`bCVJkG-d4X`bPZ5B(`joeDYLtulF2<2A`M%uSL|<*pibMnIx_K@{l1Yq~`ar z4=)7IJ=2wf#EnwG&;S3 z&Q9pc`S2Fpv(b-NT6WXI;==U82jyDNYPgt#Hb4Gd-qtP7S)QZ_amJdt)?5t^JTCHD zC#K0AxpnDu#Ua~cT^wfLZogz*^q{G=DM`@Yg55(jj)~LQ{OrTOjlHKQeLP~W)M6`{ zwR(e&MDvOB$*J=?m43}zmMD=Cy+*Z<)3Lhvm%{UcKAYoFrzWjZ@;tGrTuf@$r749= z*74uz3J`fAIN7T6yv0nlU4GUd=4}Np^;Vhy%BL2EO$X2MCGy_-?-a&(G!L z!9#~W{ILu%Y`w|1=-*wFa!S83bz^^$&b7gf$J{b_Xg>};O!^+r0e zrzXrgm5_2oLHF!}H9iXx{Zw{TO?~*lvCV(JKx10cAqAbDJc~~$ILwQ261!yIJGfC6o7) zuVudhx00sJRpf3C5NWkHYd4ED(u@i}^639fdy@#C!+ISvOms7a&%UbW_AkgWxSzMD z;Jj{oG5?$b{u7EUxw{wd*T1|`VcXun8$&;>lX++IvF1@_#56to5DS~WFSgH(J)I(r zc%J-RHoVACC{&wB2SLc3TXtJsogj4EjpJMOs;;YC z$v56+{5IVy)&J3kjrq}xpL1^ZC@fgFQAeiz;f9T7+oin<_ZICeuq(Ei^gPFG&Gj96 zrGGz9uD6<)`ok{sRFS8IkgJZE$H|PAYqo_O3^UF?beI$%lE0hj^2^CD*ME;cAOG=V zX7rY2I{&@SM&AV&C_Qs@`I`y^j@8$jNwoShNvAE5kP-T-EU==E$$r}X0}tM?G#f_> zZ1!#9l-b32m|^k*?wDN#yScx=xw-f4F88UPhyI>RxS;e{-0KJ1#E%lU^U6IUk9|Fr z*8E=CbA{F{x5%K-x4D6H*<2NQP8Od_FSEWhMdMMCW5>QD8lDz>hZWlzUmMLgo@@3+ z!R2_&Y7LKIo$jMePbYnyrJ!)W^YQ%pvOM#agsn*?hfHqfeY8oLd9&xaaBTjM_pv!w zy{0W()uPzW{M&YU@aurU89p1{>TmoPEmEl+}-Tx-7J65|5C?b5AMEmBO9%!2RR{C`n2tG-TY&Gj{5Z@zbx+;e8Rd}Ldp zj1KpC=gSiPFG~!3f7QO471wXG{C4X0#{~*~ok~aZczK!)j(nG&yM5-O+NeLL?wtrd zn&dd`ufT6nknfhv-pJLIAkbhhvS^{etMUyOWA8f^lxM#Tlv0hVk($3SPGN3|P@#ah zU{qh5impn}zLz`R_T^i?%L`k*sN-*)LGq`$Auj=oX#D2t=_o)dFJNS!0Q!q zN)~*lRof1xIfrhHDA;|mG-kaB>-XOY` zvjtRAt?t+`QL1C(PTq)yjkXaDwi$-k5(K9yXdId~Q$ljZ!Mq~w z-y>`}t8L%$?{7Ij<`zXfnUVha_ZcR=(*jMORn7-?y;kD6S>94G^@GW;K$k!T)l|>e z$;*!Q{|J_0zuXekqkOpG`QNbMlKHnP`;+IjD>!&fYgI|S-O#_)Z}p;B_t2Y5V>l9@ zzMoyXH23V$9o2il4pq`BjEFE;6M6W+xxPsv1xxukq$U6SJ-wmQ*f~gOT5`ZcQ&wT& zZ&$DGV33yHqAMwIV8QzH3aUpFGEXXKd++$_uKVGW>fz_-d8X?dpP6^|!xzoNmzT3l z`?;90qkd!9l%5`*OP6?-F6Hs`e32agL50j2R0y$4{EXqo>DHQ*+_utb+{m>{53Ww?BLt z+_uykG`w8#Z_dN7*^|1R#l+Y$GuzJiwEd~kY0ix3=BoI3pkb{=qlk1npJ7V+WMNP} z0$L|;RwyB9m~*RTbK1eFE*B0Q@8>z*Z|pPYM8OLIP#bf9eKNnb2j{`_{CCeJryf0U z{(M3PC=fuCr%Hzooc7*iZ^&45?yyynGuw|ZosCncCSKQTn{6)n?r!ppEuD?CXFohA zTk}_^LquA-d5=x=OiAY|tLC-3e(aCESat|;rJOOho}GAJE@^flbeqo_(D;ObfNOke zvf@h?R_TLIEPES!kAG5-bJ^RLAT7POfy3#|Hjf*J-gGqu`x#H*IsE)@;yb%$0TbTT z%!-Sixd-069aNIP)g{e#Yg(n7*ODXOZWT<|G(+WJ;fdm|rpF4W<2EG!$Pc)A z;MlqQ`U^dK(jsfmZ!_%E>Au-7*p#Wn6UcR5nWG?fRhP-$UAMpd{TeJBlM!TVwySTK z*{tuGptX|!|0^B*e()gA-@iODF*mdYpWI?u`{97p5zuy?zo$SQoay>J=jS_%Ti5@PufvbJgPsg}OCPUEdM*V+@x<3gOKPEJ`w?90_ z%ciN>R{mb1qGG}Gu2ZLtH*9n)meo7{SX1kR$`tc_iLNe*vUe(L%bG!hnV@()c=^yF zp3~C|B_st`gl$l&V%jpzC`MqqkxxSy_kpQ_?XXcGFugl~UR@(}EHXgxMsBs%W^TJUTY3 zYQkK1PKEQK8J_>|+qYR3OMH8?@QN+eh@D^SKPoxb+beC7oZ!SBZZEyI;ovOM3C(Pi z8`Z4}Q*~^1Jz$t_Y;aY0<_rmgNqo$Lnr%Ns8r1sse0^qr$>h|#yFA?Dlgt>qMWNS4+_VKD5D!U9Og2;T=?gB#Gmi-rM`*>r@gGdaX@?CmL|%7BE zcx`WOWV|>v)xox#V>07bxoBoy{~eDf9g$^j^yYvk6%#ZT<{e1p>$7b<8xGCZ* z6DiJo@5kXSeE(;#>k0n(mvkU7Dq&K^D{;0ZQ$BuBbdHrh+{nzrSF8kDECxzR`=6gX zc1%FvfI%V?v$i(pNv6pz7OjHSTORt$@U)zgws|U0WZ9q2(Dfig;@XkMXJ$5E#hJ6R z5*&QRcz7a$nA1*Gd}vs>Z{94;X4njnQoZ}<_3=EfuN(jQef+}h(+5s=<|jypvmKgz zbmK9Nbw?WAxg9l|%qHF1b0AA$-7(>?$TgbGlN;}x+fg@n;jUQ^pLiyjFA;$Hc~Qxk z{)rP0-kh2E$mNJ6N#|=>jC)+)Eaaz1>Yqn%n)d$%;f0(OoZE0^|adu;K zPEsmK*4=aXP{TyE2P!tRI#+J)OcCbXaIK_b7V8c!%O+3B>2D);9@*~s|8Me+>gkD( zTwbiv?}a44MbG9)XlpmOvN~sKHqVqy>J!X9b>NATj7v0Vyi`mAD&ZUCForQ@Kfu>HnsA`Wh-Fg(0A`pU~qk6?cM*RpbgK=WQ(=Tg(=9-c-m zwpUD-vc=ePRx)n>a_qi9->Tt6)5w0Qj3y{-7UMB14Ii(09wE*oD+ zZMmrKQ3HdaA!rJ2f&iPc@{yIrGjjRZx`H{ERhdjKT*@Exn#rg4h#3-QfvyQs^Vl9;BqC2al64TuAuvEv=w1F(61O;HbhKmld2& zHU}o#dFakGFio=J^lj-W(@^{Q-~RBiUY_FTMsw_ZpUslp1a(v5BL}t#Z;wu#$YW*I zvG+8O#Vm!2NlczkEz`Cjclsj4u`2j z6a1p!M-9x3jBJL6Y?YO4?(S^a*=&rAY;tmJa&mK0IS(qb*f0c(g|o9vw|t_?XX2h@ z%I>fH_+M??+Gt5WIq;z&Z=<#PvnPUPR6xn9{E-7&Qxi{44v&@=kCPKmRTa;*YdkeI zJbU)=JbI+@cx}eUj+nFwvn)J98|qgyH+EF9thRV&aP0PWp6&UdQQHlb#-O7c*2nXA zIoCk^Ds|#WBU?pKdNPZ zF#Zeb`r*bV#az_#@cw?D+uMzE?wtXh3bHkuXMg?Vf=yhYqX8KH|Ce&n zsG!KHKO;3c`NE~74V#+g9aK^~rl7_qTabL<8jE5k3+sNV7+wV%*~}+P8f;d5{Aql6 zZ8T5$dn21#@KFMRB zd9IdH-LXKSd4U46y}i;!2Zo79IoCz}`7K^0bm9O13rcT)7Po1KORS4&T-N#ey5u}t zP$l*7>+6H_L<}GSe+9Z6OMaV>$g4;H>*KeJiLkS8Y~H@Dx_`~hzLctRd$Z&1ea!D~ zmhVw_3sOq|clY?H6-DUs;~f8>Q(Q zo_OxM#De%2;GUz@yiR6e10$!)jLKi4QrN`A*v!l{n3b3;C;t6i{-QJBMEdJP`Fno< zk=zyd{(lj}tpy)Ivz3%)XEV=%j*Y8&8yTNQEKB5v6e$D`lVWDr?eejJ SmEWitq`}kG&t;ucLK6V`eLJDEwk^kok&p`cK54y=As{TBNmmvDU87 zGV_FIPCThq=B}osk$BzAgMm$_^4RWsS9{X8yq2sDI<$M~?TPiDz;SqG(nhWp0|D0Z z-il)aEgox*{Qtj6M0$f@(~8~Itw$TTDtxavdgbt+a+zZf3`{=1S$4ATvHCH!7OR$B z@3r6eexF)=qclTzi>{JZ!wM~i7(s?RKgzF4A7)jG>Uws@#p#Gt|M`#DguDV(M&eZjN+madv13_UMR?nd2Tf(K~FKbJVO*pPtBo2@_|} zZft&RudU5&{JH*5;J^R&yA+g_^`_2<`j;5Y>6`Wc|9|IeKc1ieZ=V0}-rnEezrEdm zef|HP#n0bgUS4ls`|J1L-}~p=|NsBb*5IHA8#JkwbT-eO8|l#C95Tf{Xi~hV=ZO;@ z4zY86otir5|F^fcW){9|pEL1PgVa-=86SI>u|>-4oN!<%_cAui&OgqVIT(3({s#!( zv}bGk|NsC0-~a#rU+(|^-Mzo%&(6%hy!`*Y9UtGnyqZ2(|df zk%K2r95{ViP3h3;1qvw%3h&rW#sbytrzka2)Y{7v;Cp$IXDt`R+-`=Lyz`)>7dPW)^ndiWKi0Lg z9^SQX`~F_C?(d@fDG4CIG5r6(sQl4`=g$KI1PslmfBibu)bQccXKqIX(oz|{RW&9a zIdP&vM)E{dfw~minnx!z`E{)%CuGT7dG%P{Z1F$E%*Jr@{{g2ZrX)2p?!EMr{qm3Z z|NsB@pKl~%b7Die{^6&mdDh1p*Zg*S9?=B#;i6!$ePLq71p!?A;US@^>4}LAuEte8 zNzY2x1omllERtKf=iodJ18-KFjtNFnZm}4fE-;JwuXx?`geQw;>YwJRCOrTD8P@zb zQ1DUd@V~!2%l(aG_W8Wu!~#y6i*g@5c#tq{vT$K(C^u)|)$7mGQd1pH{rzuv^Twlj zjg1fGwZ!FAC;WOUzi<;{B=SzGitt07Ig7r@pv0qQS`GtrbuCB%W{4; zHc)z5_~Nv9+w1F+d~zu_c1?YF&(_M+7Zia1|1a`9}17 zR?9_PJanL8mfeJ#=J`fC5eGIDs~=t+&SN5F%?Js-s7DFO$zef4Wo6g=y)RFnoSdAR zns8vQ+5=Z6W#vUr`vsr=mz11ny;P&;|Ns9F_4zj_O%i#$GT;Ks#8sRrkw-Y#-);HC zzw%yVxWT`OOkeWJas_2(* zW?o)KVF3xLsi`eTZJAYM7fk#Ag4gYi>rwljIcFPYHUyL|oGx=>nc#e#2d_GFdW{y8 zId4#vI#Z{!=iyKOP#N8+4$P9_KMx)_@Zr1u;lu4budf^D+&WV5n2lLmIF+}8!_Ok0{*R{lTKFvo5~m`TCM>+2=ed=hSK>wWmt`&G{_NK`4= z7k>D(DKbh+Mcvxk!sF8i17oF`Ew`Se9PnU&ku2&_crL_vhA$?Bp=CJv?(Fs2Y=?$q%WC@yb*6Qne>TTy8l8xeb+y{)SJgR-6-e2 z-#-Rzhy}WZ9$s7_VO4A_FSW;g`1zAmQ?dis`xo+&Jjo& zS28Qyu*s;h;>EM4J0qh`85tg&e6&8|k;A&+3Dp}uXt?`{{WqA!myybrrgVTOHRfXT zwUPtzjuQgc2?TFmV7NhT;{O#ZpU5*;{T7MaxnMb`4_n%)hT{G8={L4^FWfov;X~(1 z!k~uM|Nm>kt@#=hcxp}cUZmaF(KUPV@`A@Y_j}%oH>NZW*gT*-$ST`8XIsEyKGiyfIn^TpK;x6>QtO%MJsvKsM zsK175RaN$%raOMq+qY?jgg!Z8$g8!r;=WZe$3vNClJ}BhCtlhvn!*lh@hQa=X=ry( zomSP_B2ipuXl!6$WMt%&G-cvMM#~oyqojF0%xSq5*1#YA_wb9VKh9^r>^POUMpC4H zYX546154YMxUm`L#Kax`qP*u|W7uf{Ju{AL`v3nM+ti&2h(2BL!{G4q^GDS9ph<9o zMNf~2w)WpUcT6H8AD(Oy42v=_Qc$hjax-KJ8@o-U!gaL*vui=({UY;ZyU)dZ6+ z3AA=T*5{M{pZG(NTRMBIQ;n9R^L%?_5iz4T_x*EzK}aD^02?|B0iTA= zk^x(d7jzjU1h;V(T$NpNP4oZ9BF^mgDHlFyw6_>X9c^6q+`dSaHI!e{?CHbA6>C!y z($a41p8oK%`k`yg;PiQDo<&7PL`>{gZtmKi-la2VEXZt{IFVCG_{Jsns?0{4@N*|P zUVc=%#Lj#;e#sO@e}NA#(p|cWnm#i6i_I`(bf2LVAuPO1%!j#?kwx|o|KY?RUzC<} zvh{>~dM|G!E^J_E_~-ZW14oW1=${XWI|nMv&duequNV3NY6kuP-!jvdtHFS$HJ+)p zse3Yef!O+sCwEt{=|3-&>GtSt4hiBd&Gq)W|9jTIBMpmlYdId<_#eJtD11PYJ1nK3 zbNRa7iAL5d-FwgR&SG}%tNHm$vdlZ&S9%%a(U7ne%TKGkkzRQ2>}G!jrQdp*o|hk3 zs2rQ@v8TQGe)F0bmAx|^!a8?0JbUJ_d+}|?FSgTIZ?~2H{h7KS94=1z#T*<36%{Y0 zPGw!SlGD)G_;4_rN?DmA!4zaP|~`F?J0 zytG-({oUW6?=JtpulD!%hllG=Wbp_JvibW@yAY5jF7C$1_e9|Eyxa-FY$;b{^!TP9 z)#7vi|Ig5->WF~!>4L{>`=Diklc$W0%^E+ysL)Wys;WsnJw7}lpQIrHa_ z2*ap<|2GyID$CWkvt|b!xZI#R$7NmI-<_G4uUCHFXH)Uv`F;C2o0pwA!y_ftW1P0( z#tnt+<9r{??F*N3rhNIV4r9Wt!20 zn6!wkOqw(~cT0miKLG+;NC)x|q#L?xh~PtG{3W_}E@b>Yjbom-Z81Efy`}k(BgFP35t% zu@DmEdEn2!8xon(E3T zsT29f_0d1)SEmy-jq~o;#m4MOJw5I9j$8L47>$jU&CR`oCb<=D+SSFy&(5y=?n=XI zl>nd3bqwY;0IU7OAp& z%xszIwIR$<;NboJl19doW$%yKwQ_@#Vbi%enp#~~uKkLR*50(iz`$Yl@tzA8dE8vA zIh3#cKNQ8I5qR!I$4(WAb8Zda-|yFtC`h}$E?T8kmYMnR{r&&#Yk%!cICyr(jK9&_ z?S6e`ue4-kp5&UbZShg1S-j+Whuc^3{QUnb*Vk{a`g(o7y{ES% z4-e1%{l;rzPj4txewb=KVMXI{K4D=e7T+$xy}6vCDQRgd)}&0Bcrecn?9`;BgoFuG zMOUo&P*{4kVJ%;KwWzW6A{B18WA+|7Tf`37_dG~*;QMG_|Ie=GN8YC=$@cY=ELZ+{ zczAuc_b(sOkKE;{|Ce z|MR?<%XenVM{(1s3EP}6dMAj99pjZ=S5P2ey0e*mj?K@iUteDDF0*aqOz>riILfIe zo6;ASZV_VW{b|R3w z|F^wCi-wtmY?mxqwD_wVXi2xpZ}kqx7XF(=s%x> z_n{EuzrV=|X=ye!Kd#QVuQz-&i7`K(*;sRdD)*mOmT5&#UtYF9Kkx6-8qOcz-}5Y5 zr1B=`aQw~$Q5Qd@J$raouH@n3(ny0eRg;oZ|MIVP|F@VoPVuYPg-1X5b#^BBiq&si zs<{!W?3gmK!Iby!;e-FqANcV8 z>}>m*njRmYDZe}-W;jO7XzcE0(cNIlt|4w5`Cw9NLPA1Xnt@T0fqD9aCoU2kkUZdY zSE^;2TEHWTc?u8C`d5TD1#VfODy!po^4PHh$B(~ywaUfsL}J7F`TtLxD%xB7oB6QP znxAr;-40x7-TL8xPO`*>NqowW>qSMyPHkWC@`b^NPm`9hW*%YO`@3tuC{v)0%8>_8 zjwmQL3P2;;>D<{9);berOJz*_``?~ReCO(>iL3r4N;GM>Ub)&TEBn;N)$^q6j|26t zuIuFO|0Rg9ReZAgC&dyTbU?&?hK~C2C2P+3>`c$em7Frw^IGd1JBbh1Oe3P@!oql* zoH#B+)3cNB*9j_3L3Igb{t}Y^ldk9#eB$30(xmyLzwx2Gy87a&Q%hW0)L0WaSe=}5 z)~xCI^GD~+n;waaZF~P5fADB!+k;K@6K_Oht^WW2e`cnkx3_U;hrr#l2iSJ(l>GBo zQbkphUqXi={326kWlNh;M~4F|G$kcD=v@BAzB(*m7T^E>&dK5{4;-*M)OhWm!!xDn z@*1r>XGzsZ&g@*a%;00K*_sE(`|TSWC)wEeY)HEt-&4}CT&$N3Jo>1m4#FxQJCZqwdF*EQ44Et_!T9V zs0j;H6&49b{rxn(^X2{?p7Q$rdzY%Z-c-@@3Uh9DkC?$4z3g~Y%Y&@sB;O-DPANW_ zCBE?3jTtA8wuX9lpE-5(bhn~vYh&xCi;jP0@@B^}GnSR{bah>5Gk{o@nDB!=*8bmo z#(fK9@;LHN|1(~|H|rql#2qJ84bt8-Omx3{H?_4TA}g!!{q65h&d>kPqrzluov5#0 zsH|*vWljIo#ux*Ggk)vyVf+6aPW)8Q)>hxyxyFCqpPwHd-rtm+n)3JW(S&5h{SySVwY8a_ z&GVAE*twkNfc#$<%Sk&nJ&|WV{Qvv=|8MV~FZgM6c(FUroH;z-zVUQ*fzY8tSA4`y zykxG^WBzwFDdSY#pM)FR`xh>0fo9G>|Mqfe{+c-XXuZbSAM(BerQXUm{>i?QJRXcK zC)?P~4@x+7r~L_RTE5aFrDxLI9)rosMx7mIT)W>LVfd5E{4b>Fm58$=Bh+t=KUr84 z#3Xn+Oc=LHxh+!Vdt%b4_CsFt!Gy!ymexiI*@{L627ivKwa(nbVa8g(X(X_G?hKKh zmEyK7FDDp1V9*sz*ebiPZ-U^qmyONMj&5yGS2ng<%P4HCKH&3{e>>Z)=YP*L%wFmo z)-cV;fYp$BO5+rrZfzzF9h(p83R_%!YO9qOzO#%#mkN@Sa1NE zFde9- zUaN*_A_l4d|0XxQ;fGXFP19xmC*GbcA-N!xd6R+>Gt&fVnHeXW8X0Y0Y+S~}w#TvG z{vGEjp-A@!UMw@h3eG z6YmfuArbTBQtOW?2ODN>oN$@t<6PDzQ;us(oNHz`&v?=Pn{~6RQ43?y=EtwYL7g^+ z|Nos#JOYIonVF}})0)HR>2N~uOoPiKPs#tC4o6&sil!tSvr5ZS-Y4ypW#p8`@8(eO z*GhTY;wA|Ri78W|4sd#R>R%42R0vE3;|J41QihR5=P ziH*C=XEC~)7(Yl5o^RygrL^Rlsf$+HfwhGmOfjA;O6N{=T>8|IvrAAU1rlLSdBqDR zXI#!P5Ylb>=ppDM%yUDDVYPys;G|n(N0tb+Dc;?dkZ|DowdYwq4v#-{@H)! zNlTZ4yM0tPbEx4#5r-ays2sxu*Eo12xC|aoO2|CepzQh3U(%U>!j*3>!Y$$|5c`_S zQ~xji>Z{O{@sfERPqd8j0;7M48V9D96dYBWcysQXhdO^aneH8yuVVQBhSP;BSj_hc zB6XTFG~oG2+XjodVmUAIV?RTWo)0%YlmtKA>*Z@jgQrcjGg&Tqi0XDfclbmohLi z+OPrCBmmXFhorWy=w#S*`n`PCg2hYaVix))|_>lBqcT@jW6!Np+gIn z9Qp8C{P5r3JYFI)L5@#2tY%4Bh=Ut1PI{LA4xi#Y<9xqB^ku|nrFllodpoNB|NDMw zs=S1R1Q!=eWQ0MnW!l+K;$c;Pj~4J8RAPU3_8^~&tK^)ylJjhnEvlz+ElX4rRC*o* zjX@_aql(rhVa3=Pzu*6_Ke4ac<|Ie9iSnXX*VlvQm~QO*yLzc=dnc!~^zC|?uOc>% z2af)KU?%x`-5JHnItmT_{Ra;Ha^Vu=GW`i#d1j_Cvd^2xla(kEt zDR_71{DTBNc6PI{us3q2Caq-)R$Lni9h%sD9O1``6eDSlIkbs1hYoE58x^hl4uUK>7 zign0jRu%&zqn1{~7q7ux_G0zGC=cBPj#ZGjZ<_z6d22+QxXOu5KTa|C`1DMB`QOOk z&)@(5?>{>mUsYvSP-NscM`OXNQ!LguAKI_ttnlJqQ+qvelFkDMmzIUQW<9*F-?q)H zOyHi~4yFujzuO z&+7k!29Iv+p1z{mYy#uIrO#(*O@w;j<}(MYB!Q&^53)&2g)ALOL6@82)$J(F3PnGfII&ZDZ@ zA-pbrPXY&DTEsq?=WF|p^iO}RPBIehB?fkTCOAx(5z1Ml&0_wm{^O(dD^_g3 zdw2f-zsU}Ex4wVRZD==S0CT6Q`K4X^yU~ zj-gW(jVCvDb}n4hl=v!y&CINAfdX4z-WOw)i##rD8Cn;w{a#QA4GRAG&R(of_}iPNOV}9Vp_Li3KLP9>l%RacdY$z@^HZV3mal$~c{M!Hj z=?Q6RJ4#-@Kid6&p55Q__xI-K-*?N%WMrS9XWBy`!6^a5EW5uX5QI1c|n!#!zh@9*ypO3wk-PtggCn^TdlsN8>lA)kg3B z#?E~BzvtwNMT>4IsMY`ck@I@%h<* z`+A8dPbGKk)U=hXJJI<6zhvFtq>67n56{WARexVlzQ?hlhv)G5`8;#xXas7g90`ay zqo8ysAoOU#Un}Q){ljyuc`jb;n40_{LOm_kFbtm5x2i}OB+E`q`j@zbhwX{ji|zUU z`DA`rrk~q>cX$2G+nYAu0_z787<+nn?(Z|)Q+aqpt#wI=2-A)o694`r zUbve2;8@w?$QqmGIWo@4%7;6JFKn4Q=}dcoe!PEsd;gj>|Mg;iDC?g6@aLyJ zBcl#zPXF+1a~^ShBb(Y&1s@a+UtJAavGM1J&a_Wk|E?aC`w z8y6HjkdfHBEp3LyL`8*$hu?DB_Et-(`8e!cD`9!fuz!lnYTpZHcOk9mGjnwLnjJ)1 z>^%f$^Ex^>Sum`7{y?naT&rWE$A>wLosMgnd=+(fi!R~a61w)~oChm^o;-c#<>a6Z z-b&S}iT)C=vv$3U5}9r@f9VpPyDEh(4gE*Ojudkl+Wv^IM;)8Ey75jg#IlEM! zJF(;1obx`yfi2f=eDk-RZ{Kcs#`v+v?L{J9vT_pPu38W7Cdcw-^Pt6R;1IPNExrSWL4k9kc@2=>4 zac!HKlH7sdn-x6yj}+hN94U)d+}pZlkyzKeWgZ>|8GI8g4;$G0`!MtLKhDDPmsY2H zwoOg7T>kRNPpRYERd()L=l1fPq?hL50N>5Mi}&2$zRx{&btaP{``$;Fmj^j{74V&B zPL^QfV7%@s(IjBNcKVRbVUzgCPx6;cN@QCc0vD`ucv+smN#{dtKv(Hs)|Y+F8cpjC zZ=3l1gb~ZZ^tHV1HC1kF@@>J+zB1F0ugO8cb-tob22bnOEAtw!SyyO8^*xN_U}eAH zlBD6I&~PB{k)D{;;zb*`&CPmt{>2Z^uXe8 z{^_Ta7A4pn-?lQFhxzi8D%Z;sp30fJO746)<%fKg72~_|G~-i~9#6Dbrjapg`ah4L z{?^1lP03b&_w}!voEmYh`c?EvbM8v8kG9O&%hePha=czFD%@xmQ(<->M~y`MQuP;e zXEkSJW(7paHZ2I~RNAHRB=>_*t!MD&5(d6g9-^gh<6 z88{_~$IeV#%kvbXE@e)<=FVM>c}pUvC}lm5z`Pu_j>^Nzb6k2lq7iRyG~ICb1# zW;g5GrAa?4Oq6(vAL(>X75Kq;Wau8_Se|pxF4k;DEB$wH%Yn1OhZ20D? zQ!C&dot>?%E0iPTa#GbP;(J%DQ^4%oS!Jsl=coRwPn-60(x>Gw*S~U=d>f*)^wj6) zX~FBR?=Q^J6}$g=r?J$>dF~EZw=zv#=6^o;_S!Q>TW_zld0wP=x}m=3#H$}BIbIJd zZRFfuTl4MnpJy!bVAm4O$A;T{Cq4Mh!uZ|#y7u9P3)MWEQU%+-RFzm=F?BuoHtop5 z*HZ5;y-m6uA?9@9y)cW<@*{?mIRjhXOWR4bok`QT)yjT`SRWh~}e$k{(oVF^5vb+oAS@t&r;cD>g!q=RO$e|VJ7E&TP{(_K6N zF@o}T&n!c(LjgQ(`!6j|&}CJB8DbS89?qVwuKz%HPmB6TrM$gvnj&8EyEHVWKKg9= zC2+~othNb?x8*14D^3eaysxoRD^?_H|9)A%r&+3HyDz3G+pJ%26z4P{HOt^ZMM&kk z#}V_-dt}~wwLj<8{|H`f-ZSQn0?!IQc^d0(`pNWU>$GX99!IyG^<6r%Fv@tgVA~J7 zb*Dd{OmPx1oF6<*!&N|l#p7hbi4w296MK)}dA~z1^yuTp#|1kdRL=2h+p$!|_oUII z8Ly|R7@pmf>a-zd)7?#Rn(^Kt}B1_ zCO?sE+kW-c#)yummu8%2Z#yV{_=16uQ|in$TB65`_J`kIYc=0;u7%vg`79JRa~64MvF195&cZV(2_5(ha)_Hq4W!@T}t}O8AX#FJRF4`+Yc=~cYT7( z1efCXRo4!Gp7U8s>Gk%r0vDo+_j*m@*?o6u=DR|h`+3hD6QacCyB|BPmaFZ$@O03( zYD>A3Mz^NzdR8Vae`Wv4VCTaP99|#S)J{EA`m$Pv{ruwB5jqiiE49M3&!y)`-+oqd z$mC>kgNbW4U;9Mu$%Z_KCY){4IP`ty*}A6VODtw29sYii85ART=GbyII|#JyH%Vg_ z(h=WysXMZAR?LCj9;&S!mcg_H2d+j`@kx0J-rvNSi{0Us*tTG$}LpuT|!ya8c1`M_l?kIKnhF=g3aJzyE*z&rk2)-~T^d|NrlAZ}0#6 z`+vS&>W;$ZhnLmc%=6h6oaXw@mZErfeLT;mO*~9YJofcQd#a8uSbu)OnllP2EV5Hl z5*zt?xj76zd@|aw(J^^$W8=f;{%!L13l>kauQRCmdFaDC*_4~cC2Bq0+*Vt-?kBmJ z^E*9DR@4-=^YY%7nNiBMjLAe3(#hWwV-4C5Qp+N~g~LtIW!vVj4ELOD+R8urY>PYM z)zLbkL*>}bLsL#a+O**^-(_35J^aF7_O5%q;Ps(pl`5R~4jr;oQqnwcu%yst`&${d zT}?-7kMH^yJtHRXS)l`mi{KWv=PR`CGd+~a?cUTPU~o)*Z)BT|d!Rz!LXjz--YqZQ zAa<;xfH{FDDtVrw>avs9YkNQ5JGQCj`@y%<6`Qtt1RFfN`u=m}_iuLHH4%Hk8DY&V zTabfWEHEW8fZkcoZabz`bcS7RA)-tYxN*9xyx_EczJWQT7(^F7_<6-UL zmWr)!qdCM}e|$=qp>V+FwTbKXm$mzU{fi3s=zP54{(1ZINO{4F`bz_*J&?KGw#+eB zxi67t6HjRB-rPmsTnY~hF!VHDn|4m6bFZ0bY(SLA0~xPG8AF~4LYG@UyUa_DT$A`p z|Cx=RlWR|;%J1g26YeO~IRAY-Bfezi>w92l@0qn1RA9FpKV%&sl2hBcSy-A|K1N5U zhCy1|Rfl!!1mTZ$K@}{DYAc0qU0(2hV{SzF7Tr}`UX20;|Fm8FL?oQ2Df&)S`LIX7_fNU}rRbMtaXY<0 zL;3&z@0n%K-IO44q&{oCl`2PLvjxXa(Q|oqKeqj1XzYH>&864I^=pfW=3nWiydC}p zcRt#buh!W-<&meDu*dbz#~y(`x!T7chfFK7@(f*QT6)&vQtAJIw$!`-njH@qJox>l z%z|&5@2pMtm79WG1U-wqGtO5VJ~8iM-`|}1Xx9$CJHHh!r)*X{-E)7hb>}#BvnH1RXLWj@{_wxbdT!&qRiW9t-jYYk`~Ux* z*_PbR0Rk=dDitM7WpZ=HxXk|jzjiKYgNR6U_wq-K`$Q^PngxP9mfcI;om!UmtuWMR z$&}Mdtrl&L*S=mXb&ou?ZE5C=^snFc=l)la+;whW@~j_rmt7`JNR1HdbDk%B{M1{X zrv7Qwh4)ORxz4)uox@S$Q6;GLw=3zk2#;t5U8{H@LwaF7@$o zYPHz2`>CMeKTzx&aw z=lt+9uiJT^H=Rmd`+irlO#im8mtF2ZmN_jX_K!J*wS%IkiZ4ykNtRflwPZ==1R=#4&%L9+`TdC&yfjNiu}gp{JMfkN$H@)9 zlDm(z@X z{8(`Mu8*%|Pme@JrKF3iq>ip63yVbE|1<%ywr`Fv_<#IdDE;ZmRmmAMCH3OdethY4 zjG5Ee+xzfMSE;E+tzxe%P&N&aEXEPTmoaba(vs5Q^rA6J(%eN#%RZbVg{+efU zb=M~Yfi@G~ef`hx-ZVOvp{LDpe5srBXG|7U0j8Zi8R zef|IHZ-1))|NDQi`Tzevi4|WscHI%bv~{t+_2cXMY;kdHmX>Yj=S%A7NSrw%@#BZY zks}go)+`8J-@xE2);gIhB4WY9l}lJ+Ltni)bKuOGf`?3p*T?hd=zxa%K74j&zZntP z8KuD)&&w$}SyVK2!nCx6#6(a){SFMj{f2wdJ!Y{L85XV$96 zmC18mZ>>qazcQOwL5+KR@rUkN=;4@6Y#VXa65=|G%&9 z@BcHBpel6o`~Uv}6VG*eFhuemJ$Ft){rHEk+RYOMn`cTkFHmS+q0u~5lv!N-@csQE zua{k7>gWJ9QWO*pB%~cH_^MUH^Y%sHi$zb{`6Z7Wk(g)Kx~!r8|AK$5DUm;}*?O$z z+cZORlfS!1hoetVV}F0+RMDmWjobof4p)dQm?{sckk8D4R224ob6PEJK1exk*eYoK z@vn9KfiM9U-M^0O0}NZ8f?7-(Csej1&i+|eo_Dw)Wk)!uY-6M^BuYcSpuzz;X zWm&}zrD?4us;58H_9&j;dH?3N@2sN#l{NLZ%vV{X{kY?r40~y>;Thv~-BY&c&U3&2 z*dqL$_4H$2iMmnAx``cASQxzok6Z8^OL+Ua@L6-5@XP5p9)o+lXXY4kwL0*)$o~*b zR^HUw+dB0HX}7+BDpXc=<2sa%YmVg*ykcq?++h%k9?BcfXsVX3ukrb=LFK zA&;dwy02BQ>dIZ5yHPCpMDdzX-EW0yEJrr-t<98h-}|>~#j_XHT~2$A1h0uEn?&x= zaS@Dc<4E|;YI0aNVgHi_oJ%{B*DPH%BmG$57NP9Gu5GG3FRL>z>T*XV^F*rYvRqg7 z{b=(^M&RRvkFU#~Tza~becI{D1aLR!%aVrjYjhqHMl zck+>0$ttQ_ragIR8+vrtBGbz)-+Pp=EfTrrWzNa?d`0%p3hgfOO$VEp_cXU(5L?J5+q7cag7T*Nio@q0*RJPIJU>%n z;-;y|`%B#Rt_fQC_{63J9@E_i=&))a%b zDFuDGhpSr><~2_1nm2iM`rdz%;rH*ngj;?#(QH{B^A){-wssPDt;`AxF(7!8BCqAp8) z@7=0#bXy|N^;W|<>c7u@elVvbd~KxIp7dEPdw7p~U3b{A2W)FnN^o zLP7QDhc}YU($dWh3?+A8&$kDyM6js{-4CtI90XOCAV9dWuOcbEXY!wTbE2ba-zf0zu(cJ!O^?>;9XfOC6}Pz-4YF@KRCku``s9VQjHA_Ykn}? zmH+*iy?Kes;kUPW{`}#YZ*Sbu>3Dsi<0=V{9|zc9cCzg7;xpnBf|@)D zhUPv#W^wW67MA8t&Jtc0(dB*``92r62(rDba`+ZuVBp~X^TtiV_LBwyd&)Q~WlgVL z%Xle1(|1&5k3d)@*zpkRyeCq^eZhu>+b~h`P zsSeYw8yf^HlymeJII?B+hE*vk(4}FEvL7{w%(JgIl8`jy;4=E~(eTAfLkUU8*{?%h z&PzCEvM0S?(kCOgdd)+R?S3**Ef-hB_T=cy6BO*w0efaq{G*0B<=_5VgFGPO5k33h z9hqHvZCbv(F_*cH9RGD-8NU{j(Vd=|GLXu1QShULw6uiO)E^(49mA)CmP@s^I>yRY z9k5hZeptHs2t(FU?YC9hUl^BI$(U6DB4mBqRicr5PAE%k6V} zxGR!F!PJFgm3wf5aGD$N2dN zZ%9;CO8*NE?_@c0L?i9UWRq2fTe$d*(z-vKfF^*@M+r$ydE3GQ69t5le*B&7=-z(t zhJ@8+4NXs0?T*ZlNAkB7F8L@;esG}a{|&RNJ{m{F`6EP= zuYxvU{{O!y?NNh|;s4+4cPD&)%H6!t@XIN~vNASFNv$WCzfxqH3hlZ)78&qol`;+09#ZQZ~y=R literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png b/tensorflow/contrib/image/python/kernel_tests/test_data/Yellow_Smiley_Face_Warp-interp-3-clamp-1.png new file mode 100644 index 0000000000000000000000000000000000000000..6f8b65451cc08a463e4305ddc4be0dbe2879fae9 GIT binary patch literal 18058 zcmeAS@N?(olHy`uVBq!ia0y~yVAuk}983%h44c+ZOl4qj3-okx45^rtlpw*nn1P*v zfq~)we<{|*4C>$({aM~`q#Tr)_v9Y?N1wZD=^(Q%StVGiFhw8xVyb0B}#MZOl6J& z!M*F=SDunsTdt;+mO0IVVV%?}z9XA2)xTi}Ma7v*o_S1$0<0HSZM&Vx%)LP*rBn91 zz0`_KHvZJzb!I!AmG&y{*?&^7>3_|WDqcmIZ@)E)U)B1p+os&w_Vs!2{d`|e&A@X4 zj+#-cyLcRW9kaC-S@z#!Z17*Dq7DNOGx=cy;(kVsQg5>QN! z4KMAR-nv#bc}3V1Cc#hhp3SUw_i}MLcTvKGP4J@0#DAxE*@F|;kx2*h4jBlz953Wh z>=NGbBtlQ&}3Nvy0NSuTDp*-*G@Q?0{mFS)!r ze@?8rr76g8t>{X*Nw4QE5-7TT;`5Tn7i=zDc%9SiT9J97)I(^C?kg#m6;VnOtIx^b`WUYuY1Tz;b_RpoS>y*U;Zt>z^w6x(Pg87vF4xX zSe?0_KBiTCuzTia;W97s+o{j_fqPDWQf@kNxH^o(&u2;3!S@_Te)_xAEV1g`v2|Tc z`TnXMn_MS$Jht4fomRDH!{Zlsn3mq;`Mak{@?4_Z{l~Vk?zb0t#R@a|I)@tav>dQl zbjsvz_+6uVu!|OXK6>yl(cEZ~kwHLV`Sq~Chl0Y7Q<9sXRlB*l9Q^SAzrEYB`VT@( z?9!?`4UCjr9Mtr#>T;?&z5f782Ip6n*_&O^Fxmw=&vbk2rG2Bc|w=d z&8?!FC9oo)By@sw$&00|Zo;D3D;t(eHm){Mh&ZsolKG(9s;YkrRRx#aVh`Mzpldnd zX2wdEro4M87KKfThuPYs%_Z#WmN@NagIK9ixM70JHiFozeyQsjx*y!K? z|MMA*JQ(u(4rYnzsIss*t@btW7N63?;cvL%%4hk|SN1vwA9`x@NG#=J&MG=M)o#Ny z!;UYFAvYdqz54g}e}BxS1tEGF4B49!9JHJyBqaX*Oa1Y={oxtOw%pqie6mRuzotC= zobI(~IwQ;(D^_%{u|>JKN3UANWo&eaol8<;!B-BOJr}fCx43)|*Zlg&)l2NZVTe=v z@(DX7rgB8R_@90tOk6Hj13G751ryYQKP9-|KGjq`hK1ZhyLs5tYBWNb#Q9lzsF*Z$qGrV zrm}yM9Wt36yBY8J3ZMDXs4l5E-MJ&BWy%ymzo!2Ci$$e*I?{9&r6nGirncc*%!M5v zLO#8hKYX~IXLY!-PVA`-AaI-h-n1R9hFSf5wZTtHt<{P+nnKEOq zV1oirYy5{B2dv-Pwl63VlbT*+RAwmlvDHIe)?TsY`@}mRrTgYA58C$G=3DJHVWwm4 z0dMAI>UMu}^Qn96;5zBJ+}Y0VRccwu&z#~D6yLAeFsJmEkK#VRg~~OGZVOI{KL{1G zlP-3C%<~*RlW5cTXWxa7p(?84R2mhQSUa&2bpC!KZ!mpVQ9iH#>ewxeN-gc$x z$)4pYAI-Itnwj<`AAMaX=%gN0!!6HqH}MO*f)L}2w#hlNs!ToCA|_4H3`{ZJ?ri1D zvR>r!i_Lv|Hd#CljyGXwm2g}mA7FMVkKgR+*}GEe>*6LAI)DCh%iBHXbyrY9YzFW1 z&i?mopti!4u#LP26nKggUNYyzTwvDv$Xc!c);;zYQ;EaE!o-Z33PCKMJ#Xb7e25E* zD!%6w@k_h1sQ0t9UDvnO4CgM{Wlu1kyY)%q^lLp25*#9KZJ5p1YSp{BMr7^kbNh69 z<1cGp5ZSzHbLG0^*UDM$Ete0nPEy<&wpiYt>CiD(0p)M9&pRIpxnnh8h@DP zHKl)Z*R5Rr`hQH#1n;_NFD=)tuXbIT<$byroQzt+9r+F@aAcnj-*$KJ?7tcR_rH6f zaN>*`hav9`kIulq%XiFue(krOZA!CLX2|3lO1&bPFLe(UUjD(pyg|85vr4?=&z6j3 z?n}9L7<0B={U9oL=4pkZO6a6o?g>)_WM>PCeX!gn@w52ht;Ss*uddWGY-xLasPMp> z_>a<{U{EqE)X?r`Ws5p-sz{;VY>`-AM?~XN&WbMz+dTwaHFSamMR|?HCNRKR2%8v%pK;yUsI6FDvfF*7so;t11=F}yOh2UPJ! zMe$s__Ci!R3{uT2nHAo+p&=o`VQge{U=piMr-1@*pmPiZzx9+4PN&YExGKOLb;`l( zN5o~uS@SOlF{Uvk9SG>pR9JVVEtD}V^+b#h8}qWy7bbBtzdO;EUfW^O;u}=ul zV`s+`6~(h=&5p;j8yg!NA+?N>Rv`yx;q4nQIJk=!@EREz7#JA+YxdcEFzd_z4S|6! zlHD>98zwa{nx-yj)m(StFi-6tPsW?GD~jy@nY4uRUOUpIE|~nvHBou0#tB!oho6`u zdFON+$S%IoV9?VeQBxzq!cwq%B9eV`EFvN#a&tod{F!p|rldrHPDVqoyzzp*4rMEj z4^1MS-Rj~-55!UqOl&SpjcE!qNJ@UZQT%e{jxO^Uy(1=!t=-z14?GPcSU;{vOilgp zJ>9@KIU)V{^F(Oq&9V6R?{9vN&f!C!#KmrYU^{LfVwM!go4JX1(=mBXh6$G}w(U@i zka(bfd!yE2o%Rh{#~!n-c`6ph{83&1kKfS=3fjLWZdIO~(bmyn@Zz;`LD8dPNpP8= zG{?fkwDs(nCpT|OPIz`IkcZQI%J^%#DojmK6K7+qS;?-~%~nwC|QD`!3Ide45E_w1HttQb2>r+omJ2YRX;MGwd z_K4d{B1DZ7yv=!dc&w~=`ucuE$w7l8rYIt!W8>!k?y;$##aEtf49YPu_~W`Z=0Xr# zQLInUPx7HMwjCUtkF<{Nw3gLZ8|MAw9@m-T0OKK!?5b~`a)noGgPjjP=KJb8F{goJo@?Xp<@ z37S3)YSt}MO3l*Z@s{k_u(N2zPYIEQmmO@HcBBaA{r>s+{^R5S zxw($EJCX z470fSH(Opv034cbF=K{=sR@gv#lM5g{2xwU@&DoEr7OI-9t5lOY*=!*A*A!bYXvqH zHj~af`~T;io|b)i`TRYVpLvy)58vN^fo;vEO%40^XT{8!Hf@>$H#hT{vNJ*)kzC1)|GG?|FS>)`uhKEIX~^Hzisap|L-s@Y{N#wf+C|o|Ia^ocu++0WJcaO z1x?R6sXY;C4<3jqY9}Wgu*>}Z-=0}pySbhH@G{>U!tCHwd}zLfL`o{_jG2-XCb73zh|NsA&zu%`9SyXqdNBI1_g*wy!{h$Bv++6;> zdzVkw+bboybmEjHYuBASf9Ozy;NJ=^PPUwrtl3-&G4mc9%+dJp@$vto-TQrJe){^% zcgk0h{dJDd4}`7%SN@F0IYuTqORKqsg;`ke@Z=LOna>~5Va{r7ToxNR(b1*l;ZbfK z>tB#~KlGfXuux7|xH2s4$t#9pUG+uDpZ@=kS9uqobK2>@^285~0&U61<-UEBbMuxw z@!)88{$%y|j^4Y5_5Zf+um7)jcB|0?4K0psY#lkd3A$Y$%+&%e{Qu9hzs~T@y}!z8 z&tgNbs&ARnm~^DRt@^t}Rn(Ix?=z?3$K&Jj zKb#+ae_#Ln{C+(ho08w(e$SDSwY1Dz;?uKrZa14jGM}%^>Z&yjEvueCJ-t7Ad)?cY zKP#S}lTVtKQ2X^Zw%Eeiv3|=EK+5 z*YDbWzPxsnHL4Oj(R8GXO&5&C2lvheYqi@>Tnr?CANQ;Y0L_tngs|wSU zciL;%yCoD|edRNh{!2*w`}_Cz_v8Kd`Q`ViE2>CHi0SQ#jI4Zgb@luDe+yJCEFAhg z*09=19Z={_I@-TzO^oK1m6QvNs7kO1o2Hc`ROuH%;x^ zXZOuh3>pG6K^ak0gk|T>i_g3SW-m-k%b(n2ZpB(9V&QwsylwahxOeT zpP%24-fowbX<7bOs%VvyAMXsMBg;AeXfjOa)X3Ub|9^ksiJ+mA_N>oaZ#-t9;cOe#2TZjsv&k59YHf z=VWTh%Qvsk5PJ&^h0LG*^7S)jL`X^+^7|eLGWhfVzj*ZAPvV}2QQoY(n7|h)70*Hd2;suwq{?y z{`z`-?O(6H{Sp!q`|8{CDpffjtPCj7Y%_3i-S|Eonqy~Lu&~?^6=9iZwm#{Y$*mf$ zs7pVaSN+ppWt7pfaiihM)1M~?YC9iT8!bOwZ*TQOrWC`k^;b$7EYr?6NKW)Gg{Z~<|NnQ5y}>yBZ}Td-iIZl`{O{@gd4d?5$AkO!ZI_qde{=6| z&x;o>-t7MaE(?U2X1rv`-r00XZ(s5A?>me4*G-CC>cA*3&nzyU8TCMwy^eK8y92Zt)+QJnx^e!Jqnnf6l+YK1EJJOQl71 z8wb-9$DlQ`!ak85)8y?p%(()!aHd7w-(N@1oaw5P<1JY?b)w+J--;($w{C6Rd!0+z zo$q@+56}N8Q-AvUcFy)Yz*hI~Px;$h$CcgV61%_FugHmUfri$k#e)01S$lcePi!jCJ#&WV{e9yEu~N9LDylzIQX&kJzT3abkVvyS zkdyf5s9LK=duqy#yLV4=aY{URkYH8*PWRp?hH#(cWfC!uKA1FWo^Cky(8&R{f&){lP51a zl9+l_LEU{>+@HysJUlOyUkE6uwJjBcW>ZPk`}_Y_{rn{UWQSyy*c+UDN)E`B@HXM&hU z%>j|k>$@Wp6u6ZZP3U}CU$tx9ym=gz2S5J(9bZ=`=I57|bFsg>SDp7Dt7L&Xx6R78 z^LJWmtE#rI)!k@)X#-P?E9<&X%t~Kh2u?GDR^&4+R8)4but)?p^H1WKpgp~whv$PF zR}|NQsDuYu6aQSkD43IBpsMe}!W#Mi&(Gy+&o);T7|sh~37_?&dm&FY*N^tIH7ly( z;@FIh!}Rpr{zN+!Fp7z_wXw0q#T9)~Vm`BoDN-QU*^HU_@N)l%ZDG)my!MU3=_^OA(WxczSt(z|Su)&5u3fj4|afY~`Al z(sG364Pk4m-;l(dJ-5m9>z~AgAE(d!S-<_dk&@HQC7+rnPmc8LjCOCI zK7YRC;yKRep1yEZSZKshfAh2aVT~3eN5^9m1jt;Bq$kH^}28m}KT>0f^KUQ@H}oSz>fqnnbj zm9TL0YHeqK{^kt^6aO9KoG`uPkYvR#9UdMY`#OU^e>ziHIU#k*WQ#fnHRD8Iu8apx zQVRtfuY6;l7s$J&^?3Xhj^`bN19*_dhDiCwO_$$`uBJn-^$nbR^y~VT+ADq}9nXBm3|LmWhnU#)lst=dm}q z_6VA8XIiwh-rTW6V&Wz#rKZ5^xqqB>1OF*r)y!DRDkW~YP(fVfzomslcJ|kAZ;MS$ zMUxT_&U34%jEsqOy?x6hDQU@sb=AMRrHV6J|HtpS%CgRiBbduywngCKi=lPwYvG zWq~A=DY1L`S`0Y;Jt&&Dc$UQ%*&06f`4^@aoAdRqJ1Z))M=CeyvNDeY2Tu(%v%Tk< zYhTl4cJEt1V`j(U!o}AMyPq+*2XH2&wUn~#`cOQLL&ngKVb1*OJ=aT1&(+R;7AsP6 zf6;0Edz%f9D^At8IdMhT_f5H*bd1ht>7EkG z1dj?HdbuV4{y&@AU)-kmjSUU{xLSSZJ(SvF>MM~@eOKax8s8JwNCAfJOuDsyk{i~g z{y%=;z=1P={3B*amK{*`{5j*ti~sD!{856R_)jMO-=KMHmdgSa<;V77)9Mc$`rq36 zKRfGDZ>SbyQz{$Ff$*lgdrp4%sx4Lw36VoHE$aUL`TgZ(KHH7RRU0PPEIGln z=|k32`)x_hjJNG3{+Gz9s4)=e3S`?;%0BTb*FT}w#vMxR+P~$^gz|LGGEUTFmb|m` z@Q0_|V(=<=#on5q{M)LgG&1fw{nlPt*_ipujPe(P6aW8LJ~y#-?i3H7txePHCY}JN& zmzHAs3r;01H8GM0$BpTu|0J;<@wjVZge>E^SBT>_@H3CnocdL}h9Mo*r=sJwjA zETa>z_aun<{Wo|c%g|7Ig?louV*K7imA6bGrOb4TeKkM-zPcK(sCc4qGtY{*JU`}2 zbUf}z{2NiB>Qu9tdGncs1A?AMj%fVrP}wAMFiVPAPV!Ay1mukzyGgKt7vTkrK*I6E1EnWj}v%qM9FFB9Y1z7v?#%ijd`0**H&fcASNZ# zm4*r025mv75)u-26th3KZG@Pl6i~FM_P4m$i=GXCbN^3B`fGp5f_;x)qC&{*gpb@= zSqTTEqcbKRY0$kMG(+Ko_A%2ph7prkKWa8NHa>h8J4Y2-d)!y}%j_xh|G)DoHV?%{ zi(-juEQw8XKQ)(qVv0J|P(0CDpto0M--(00(>NqPq-AV*#FCw)q`dRN#*6?JZnmBm z>S-*0pZU(=jexl7&~#Ay>-)dI@=Mx8Yt&fl+9icA9ni3;HAo22s5r&{B*9hNSi+-U z-A0M0MMzBUvn0n%`-mVmsU~hGw^J9dI55oH%?L?zN(P($KRhfSvHFqypK10>q@LJc z%CJ(Nq{F<;F-W98VZ%gW7OCk!Bo0L-2wWCUJdnuBY<9F+FEWAY$v=);KGH>&AOBkK z>+XaaHpk-M|NqAsUUwz<%vE4JC?RnC?}6jTB?7iS;Br5cavnccJ+8Ej56iHmRMI4Gl^eqf@`hn`uF z{q>Ga5pK4PG)Qo>Xdzt4Yv+snySnEZFn@;7igX7lfV z`Vq6$hZ@SOSX+O8er~U(_T#AIjEzm)4<%n8QEk=i%RCyOrS+g8!LpI_-96QUUQ2MM zQQE-s?|A?I_xJxd^D-UUuJ)(B`@zljrXxp195=-8{dM#1-#-T${db7ZTx59Rjnayz z@|TiAeRS&$1@4qH&os)rlW=1L<7qvUB!+`6OoBV><{o(H2zTYZ{r^Az`@8@D|B2r< z7A)b3d358DtoNxO|Nr0b5`AA)6(?)+gVS06+L8wC+6R%3`=(WWSQ3@8pGW1#&&dmS z&Rn>%^WibxMOxfj40+htzP!`kBxUjap|j5K`{BK934MX;?16B%biU0qd{qVz((HLsQ1dACnCHL1f-q$P-y{d3kJwg>CWs zCC<%FHZW1SFB9|i^#A`n>*EA73JeSkpkcM+Yv0HJ^-r=S0-GJKwI<)L?cbW)7`M>C z;Lq>R&)=V%eBW>GFFAn&i`Sg#Stt>n+qB1@FPQh77Eg4nu|zN%GxHalB0s;j(A6Fm z!4rPm<6>q$yfwQ8IzFZ}0W^$IQL*6R@Bj9H)bum%I0Uiz%rW`$|Ns8w{`F^Pe|L0h zI(ojpyStk)f6}3=OAag*GIr!yq{eD^;L;>V-`Lfa;*bINFUn1wg#^m*GdGaufV z%pxVj!vmUL1Z9_qqCd}{y5{XY+|4ybP3w}JMa9ZK^`n3O{{Mgf`}_K5XMPsEy3)VF zATK#NOy(%_IyU9BN`~kE+NWwnnBV4YOkrgF^M_~VOrH66hHIjafB0wnFo<#IsRKUR zGmN*0>j{)W8tO_DEdKrZ`Te!Ha?uoLKQ-4^m;brj{$LOk_IRYR@!$XdpaIXkJ3mt2 z-SNM6?M-Lroi}ePl;px)o3EK3z4g-d-CRD~2XD?KWSrPgs(pB&v&+ikK8!Q|igWyY z&VR0S@d;>DOt84O@9+0VtV)k2o?FwZCh{ct;>T%XCypNY@7%s`>$ZK*pQr0+Kb|@{ z`OrkE4X=*&RIg#>EZUv2g2~XZZHGnM`}>k={>QezV$wdaN@AW}+@6FVKPGNagd2Y2 zhQ$4Se@`D^xS9Q*zwGmu`Xy$XTNHl&m#@<|Ra(1XTgrc9gFnB&zrX+R@cp8qd;6-t zZ{IcR{HFES@AE@69&K*2;K9tfBye(f4Ez` ze!BjDyV6(Fe}Dh~@BjIN|8^xyIe7jwzdU4muF>Ye(S|8}Y+pNATR|fmv&|*<)g=D- z-k*3?t8II}WK@*Im#>l^=DcjMRO!0uqWWnTEc<{H#_=;}7`i3gmP}#1+ug9m^^twe zd#*ck3qBogub(@2{&Jt6T$&P+l9D2#Gv)06fpU__O`aP<>`_ezO#hhL9Q^*CN8a8k z{c8Q+B#V*`MfLWBw`U(b+Un@u{-JP6#BUvW`=tA3y`cUKD8WwvwFv+J`DtHU>%sX^ zK91MW$Z2|?)4NB>DS!G)w5$)g+SeJ}+4nd7c;9^Ew3_NKFV;^I`uzU>e|ei9-~Zbm zKG?iJD*9rZkz|0fv@z=f)#(rC=qNn+nB4aD^};tjJZJ&>))B8!ae`Y*9%DpHD(l$_9oKw!AzxOUpK9cF-!08ygxOqh>YEpWirpwxY!hMN`qJ2?w+V-FjYED%@G_ zFL`gT@~JXtD!B11;lib+o)m{Szx($rl9*>8At9k~^3lV?_0ik@RQ>qyey(+Wzx@Bd z|Nhns3c7iFUwe^JCM|vK^z{EVzgSFPotJOBzW)Ep6$XditxY{J&Ei2T`zEF*(@qtz z)&Dm-Glw(R^3-YLj!wgjOv4v14J|ATbmGnwJYqTg{5;S4I70(N!W2^=~kE{Q3ERNePL#y}!PHeQlqXrk2C&zs5JPZDMeO z!+~aYp5^||H>091ELOOALqbMY^2`~DdwUXh6f{0ODcqKSU&6$6;ckt0KrZ|L|H+(RC)30xS{UlM-|vpV%ejD)IUCbNB0a!|vSs!UCEX zelmA0Z-WDmTl~W#je9<`oMg~B;J|ZI$wOuui-*P|y+2%R4~k7qYdwE2_597~J}q$C z^+P!gSuQS*j2#%KxvuuQbfw+AOC&W!WFSPcKb8S%Pky-H=HewIQ3ys_m+Qj2vxQSQ{>-0#pkA`CanFpGJ6FZbeCYI`Z$aC4k)|uNdK8x$ z%w|qIH*>D=@+Ko^uT5nRp*q_h_gfi-zxUwW%G6-kQTX}mX4_*)Ler;KZfOhJ_pJcZ z44Pn(k|I&}@6Y+epXW~$EZlSX|9>7Ho)5|r3wT4#%-mR6J))o3t9(tc_%@*>XpegtEw=|K|&yaxFT1^7!!&zs;F1UD9G>%h6~zuWxlG|dA26O<9AyA)uhei@zp=Yt zzv6S&1heDy7ShVE|2seZ_xwr9jyl<>IR%A=9-e>ye|`P``udDugT$9*i)t~e6KBtyS+J%j z|Fxjefv7nL7?qU|pP%pIyA#?}JT%?n-{0gPzos^}vRY~MZJGANKIl}#t*$WHMLU1o z-Cdu5?@xRF=|8`wI{Nf9wm-MGwr0+fHQ@rSHmmqK>ETiCjHZSo=JRHnSsb{<;^?h? zeci*`|2sfSuNc5{IRE~qKX~kHCN)p(L8|bU0}WfJN<5UBxct6^q~wg5|KH#E$Uo1* z#YLi`LZa^Pum7K)*MG`0kvhgONlh=VrRe|PWC4*B0U@O|g0CePXFPd)yk&adAxLl^ z3TG`WGFq`J>7E%!tRYus`T;+)wr6!63PHk8~6-tm`Oo@hHFWm@{a}2lfB9?3SFLrFZS>W8L#4ap8l|QrVE6dgE z#Lq7myd4&qcAWXKSfpp8;hz0Q+fs`sxxAmPSAB#Tl-HFmRERdl-r(?`UGRIuw*Uh@ z;~(cw{CIzTeZ9EepZ4zyA|fT9JeAzBQ?l-_(!vgD#Ruo*+1%XPgw+LHS&faGcUYWT zDm~+4lTK^c@(-Vk?(Fw>dITwH4}~Auu-W*-XXl$9r%e~|+)&Y-(yi0_#)*xsEqePu z`?@m+&h&gR;O9Slem+l3%#}TLZ5|(=&*S66BO}9e=8TK!mlvFMe-bPzCOrI{ z?y^l#AVEMl`NwzuYW*n_1*LBOcrC*5`=qc?5F0ZyB$e)&yPK=QfX9`8QS;>P$sG^g z{r}(Zyx?uZ+^**Jju%&Hlo)0ku@njK;Pl*g;NhLuyRIIbvG?Yim0xt1b3VWD>(Sn8 zB1f;e@P9B3`BRe>y(U$nLurZ=i(t>f*WVULcsk9Rl={Rn)id_8Nr>p@GZsR}>kni^ zzNlW~`lRQ@H;rGq-Sh5TU9W0>L_^dev_TXUxLfAfa~pP#F4 zwyl_f`#;A`rRLXK8ISFjc}PA}?f>ch?!?A0i%rM zJoj*Qk7LU=*Oo$;mc#|`Us`SBP2MtDQs}5f7dty;A1lQbMSHrkG2+@nTIW}eRcbn{rN{?U+wZ-=LHqk(uX${ z$h<#(s#2-RkcYX{_xi$9HkZHmJZ^n?=d$!s!&N@Qp{|nxrirjdwryRad2L_KwstMw zW{(>y7j@XUt4y))U;kwL$(FK0GbnW?L=6CzJ5wQ|rUm;cdtJJ+-|RAH0uu zX{!4*CGlfY+tzH!H*Y0-dL^Gckq8Nul##hm%`&Cgf4)TA9*6ls>1k;X9v&28jd&g) zv0yj?_&xYCq6u8Z=NC| zx=PgJ^S_tDZQj!*?CR5Y6f`E@HfsxCuW)$Nkv=P7=6$uvH}=hac!ZNpS((k#vn~0! z#Gbv9CMFVf|56@2UVNp%<22i*FR!m^w`~6ZKmEeRgLT!G2VLVo{9--SwuUh|Svm2C z3^-Wk%+lp*0IgqF+P0Q$V|9mZ+ji0Bz|9Ajs7>A=<`{6nplN@{YwH6BJo0Cnzu$Xc z(6;XO-siSP`|^t~ZZhCm&eSL}^T<*&UdIpZ{5Ml(Nw~*Mx46gj&PM%ofy}d?*}~_- zj!jVcVdv^NOUO0D_qI^m|8%d=<*PKjU;Sd)@_gZy&xx+?E^+_5EjH}ie&Tf_I9<<~ zwUM{MfQKcYVMarsKygZt8^hAYOFcX%a=B(|csM@vU}%Q22?%CRtT+^$0D(1h+lFd3i%w88u zC7f>`6VrDxcxl_HwpM=|qlNqB4;6_gwl7*4I^pbeff>s-odJ!Y{{O#bt~FPK1CNV* zhunc%+8kB9{x4O($TW5IUTr$j)jD?(hnV)i{SQP|P8RSm;yL;EZE?$i(&>9Mwrw_l z&0#6+c&kjmWqtOnwnYaF%*=PnzjjXbz1*SH)~KS$F=3L8`ssVEzvX|;Tjg~9W9=o8 z!|kr`(s>^?;`)?GsB$iKzYa+4$1S3HsCZP$|0v^NoH6$It}DRXVPS{!&< z?OR+^1vVCXN%ueS+_PGI!v#OVY)1j^?v)`a8*V10u-=R{3BB}$J??>^$0vT7evZfM z73c2EN{v)s(YoOM&GJ=UDhCVsrd56XVA#7!=Z4v3m)OOou8z|ZBY6(r{kyiiLN2l6 z{o`HkRZ+(0HA=Hj-ss3YvMEKTQ)yP4iL2KVmA;#1EkEL>H}+L3trw~EyMA%op1Aa4 zYraGe0UN!yheCail-AbF{s37RIoFoEDM8?P{f`d{8XhesCYj9_glZ?+ZPRmV>Q0v8 zeW+k6;vaVFLi5Gh7wc`_S?qWm_phM9-s-_|{cHM}yC0{%C~-P|G4+b0W{X1K$J#EY z%PA&{*2!O9-hKD0*JTCO*}Tn+flI!>HBj+1=s9lbW8ZQ?TqXB(ij%;D%AVsq?Ga)@ zS^dj}k1Ow%y{)@iH1gY5<9pWEvR8H4sQX6h6vU<(KPx^ZbpLt5fo(fu1e&%jT_@P| zS59;5$s$jQB0Kx7wM%FC)n9&Ar*->f#I^bm)*ayDZpth}?xp~N(CAPQmbudLz*NIjLnJlK{O&>DV z)+C!Wlce28H(AI?@OT7^u=*zNUw->&>BLDldo))4^fBl0nOwJj*@^S#)K`8KI`_PO zj@a}v@G{aVbL_dB0z{71pYT2+<8*SVp^Q*gm)icO+IaPY3s$x{aL5?VD&kOz`r~Z! zMMKzg*XPH2?+e~pKDW(dFIMs7cDyPWkS>@Z>AYBQn(O}mrPKS_fA77cvms)_^AyR; znwh5(6uvG?6+E6I8R{xhXi>0#bKGK$qNvhonO-qfa`TSY=o$ZxXpJZ+yEBsJfW*w$|Xu{`6wH*7J3*Mr!>sc5d8wMUJ>Rh+^uF>p`Hw|VM*d$5Z z$(!RBqi6KhYf@UYq6A-ItmHlOReas@>$Tn|ujhAh`jh@>9=I%=GS`r+IY7WA{-g4Z zl^F`T0++Qa80#l?KjULlX`LFrLEwtfjTIU*)D&CiI|wzWtZIEb?dIIn#2cbTCcN8k z`#*eA+g0-+?O0m#^V)d}-<2Pm_*qg@l8ui&X1Y!K{_y`Va%QV6%$=!rDI}M0$Zl!Y|mPiJ!?~+?iDA6mQ!|erzF)>k1Kl~dg?SS@pjC5?YPT3=XNp0 zJ}*(?@Cc3(Kd#t!^X-akpUIn+ynku6ic3-KiXuW+*aYt?0T;Iywv93!}YBWp${cD#jbY|O!Sy{`TG~!+}->}z8sFBiLLS5n6lBPHBR{<1x3Jy5za zcT$UvvM;~ETh^ELMaQs-`5XVIqCRiU%D?Y-DE_Y*kKl_ppyyfq38Tp@cRl=IUWrhkE4mX>U~x^0`&H=ZeU zNOQNZPuo#7wXwPR;n(ao`+A8TI~VR%Ik7;0qC@2@$%f2?-Uy zrp%cMooiK^U=a~1nRh>J!?t6Vd~Mehj^29n;89XXXQD;joQGetb^Kbo4@9r|z}o84 z_qD6bXvQp~6Q_+=tTw)J%Sb|Up+(OthuFEGWro*u*?fK3Dl6NjYD>oLP5SYj-^$M0 zG9WN=ab0%olNU7pJ7ifFZ#*Htwnigp~ZMb>x&>;n# zGXiqw3jW(2zPwz;TF~CWFwwv;F(4%6!j+T_8y$HhFKVPZG;AIrL z$9uZbo*GVBg@r5z#>OX37)VGOrlc=EfAKNnhai^gHmAS8?}#-}0d;LagJ(9p1%Is$ zvGPc`vZk>e6-i7? zs96VciX9~K?;F`vIBcqxfKBfy&9R8v3)+(A=*;|(?@XP}9n*?Wt9My7PZmDByPU_m zEMSV=jvW&J{w80zoc!Ti+J(!>0-~uGuBc>1v+Fi2Pyp?;J9myJD2Ruh-^ix+l!Er@ z4e9*M;^O|?`kF>dwhL({{70cU+iE}?Dwdi&|ew_T^aWV&Q zs)4ENO38~1>*REp-A+05E%^Um^4c23;$NWQ%>Vzl%vI!WP7r95H|gM7)bUbV?5%u~ zsE*qIR{Luy7j-n!3};-~v|vYXt_9ofs)rWuo<4XOw^Cs@J@GaQ^Y*|#`OzV zrATh;eXPJzde$ON=b>%(tQN&tZSN;PXJ5SQUA^qw`^WFCc|K8vhdD?|HTC4C3npJ| z+Z?aUK3iFNY*R#diL7FP$f6DtRgR9w+}WpgKy&nF)jtTNzvHgli5mrrr*`meRiIujK5T;io`b%z6vH%&zBx$9KHD z*rK|9wTju=*}8wZ!{`6@czWGGcJlMzOt1WF!0ix_%UcZuT;dajIb1eu40rjUb??~k z3RZui3)h0y0n47Yl|q}%{X`v(U65JFa*wG^`CUzt)KvD6)n@)!5+8O$^453RO8`Hf|_V{iS9f z1ak8K|4NF54q{sz!;&6(ip`KaJU@OVQ^y*n0@b85Px|hs%vvV;sF^Q&C0k<6j{_U3 z%`Kzop(4(WS+;-68?1x$3pt=yRA_Y z`Nf(obcDymC*8on;LqRF8wwOpp6)b++Foh#@?C73WwFG+KZ;sYHfkj(Xm>u)RJbOa7!kb5l2X5}L@i3VB!14V( z!!_~#tx3)BjX&uMzurc-T?=cA-`~*{7~?57JzO%NS!i|U4j;7)?yv^UR{MzyE&262 zPAaJRCYPGcGjYG5yt~YBj%9N!<4UMoil;p~A=tJxyVL7Lg#dG3#}QRO8)-fRWr@|s z2DvW6F3*xoc~}3OVRtA?{3E051^xALhG%9t9(wug2Q*kd`>eaaU((82QbbfzN9V$0 z=1rEet0nm;b1FzTZS!jteD7(M(JlGG`kDQ<*4he>Ce4?(&D-APy*OpN z3gi?}P5D1%Zu)tNTel?Q_8gdWOtQdSx#Nts%QKZTdpMpiVl+B)B-Kdo@Wx+K{s9T4 zqRg6_%s0Q){|8UmZ0S64Lm6}s2E+gVi(cGVI$2%v)-8#Bb*?+6_JnD%2xxkwI3#DO zb4<9JaJA!j;*Vf9W@bC~WTOwuV`E;;w@I8~IeF1c5or7@(kT4%AH0$J(o&6T^M8+L zII3AoKQv_27VmAmn$lF3`0D5rp~*%cRK&dmW^um1Z|pPUK*2X1)rpvpITN>UPhkD;IcM836Lwz~`{@fZS*9kMSxXoQ$uF~z>z)wIIV))k z%aI183f8VYo~5Se|1k6XBd^O9lbB}x`2U}${=d6GyJqqA}jSANwl>*8KR;k*vmT!j_g>^iAjR z5>KAz=O2F;bA_7nuP;bavTct|+l2tOygV1><|Ut) ^%n!tEat8-5hoBu<{Ak~g# ztj4MjGs_O#{QuAJ&+p>}U$va8tq)IB-eMar3@+amoh(asbZpyV(sn22$jci?wpebs z_B!HqKF0#D)~5RoMq-N}>-$M?nA#Ow)j84m_>^edx3V_p_7w*2yX2uRxf)keR|jfy z|EqgZ=6XV@l#OkYFgx?hPz%FU9?6Coj~J)V<_t4DqSE*OzhvG2G>e)U3-``__{_Iz z&QWfNBc3|4^6;!$#q;tdPhujEj7&!g8l`P_XicPBY)AlP%~sv`J5MYpKt@$#nBn zhqSguXe~Kk`?`;aUnep#nb%)Hc zj75Dfp3YHyb?l{3p*%7jF6{cdvVE0M}YsFURbTEx+9b|7b82xp@gC z-~GNT*VOIY%=4R<9C;np@9ZU*wsW1>GW9oXAb(!DwveyIK!D}K+q|QkCt8bDc)1S$ z|Ia4XuXNCCMOm3&!NVV{cYajPJhQoQ2J2M?p;>Cnt}OfV^7nU(ZI{1DwOuIfnW$tU z96D>K*4cBv^x5?eN%z(7=vI`qXm43sp~=VapfDk}HqzjH>HX|;rMI)r?H4}W&VNcA zWVO_-PUpah{y{+=US09Qlifq7#zxGT+TA^~zrS_<{Lc3F#<_pxm6aE@IA}{u@Zxy0 zDMOPjXoAA#|NsC0|NVVGzx=Ta*%=%(Y2xhP^1{MZZyOG} z`e}cd(zx8>XLs0j1$C*EZ~N>2@2ma&{l&%or>Fn7tNZi*aQpu6@Bi;Ee}BK7|Ns7) z#2^1^7clpgKvTIgHwzhg7pa1vw|1W=dXuq=izkB=se!sfYtxC z-cx`6!xv3vVd2C7|2r>m1?BPo|E1n^Iy*FkIy(nBJ0H6gXTBG^cKI_qe&?p5V-k@EMf&4FD-r4#2{n^?7`{j+_+&*6L-|q13 z?L71C40%3%G=chPPp7kAUwB;Hr87QG{{8_$K@PEVy&WFdtGFee^@%yk(CEY@)ZNoF z)o`esg@I^)U*`JMSp-i$LenNPXuS#IbnULdNT z@s<19HO@)1_IpVESDLE*Us6J%?tj{j%E=EuB(_PLOXS^8tN76H@U(cFK!fwak%OmB9XWPv)k4LLymgP?uV1v_fTG5U zb0<$8IPjr=;zUNvTN}0-Z2CG}W)A$6gvfTUB@S~+^^B)J-~cTx#{Yw*XXF|oaqTE zG*DpI-nPX_A#39Dh(PPr8XNn3589sz7i=+Pq-9!Gt7{R~{Xo1ELpbJY}DEr193jqd_|ry8rw={o#w| z;f2mT>i$MDHYYwjVx9ZA3lbxX;vWeJJ~S~7E-bsYX>)K;*t691^n`>4y)BKV`VV%> ztLhm_$!kPP>Z<S+(JXtwc6OU$!LtoS`G&+r)3SHX`00tLT*H!ms(FgKg_5MQ<9wV=yg zM$Wu5DUGJu%3qZy?7R`omh+5f9oy=Y9Zsy7HoSS<(~fkR@*cc-V)bbWiGPL$3(QRw z7_y_E{I7587MJv!pBAx4!s?x0HpDMmq71nj40!gujwvV>`OUC!$1jG1$G;iYPAGh3 z`s$8q%afv(33sM!nWa{3AuQ|eOr9We z`)$zOOoqAbdvBiETzlf`Y{~z&`zNv_xJ|m}W%k4Ry#IyzDNlQ*7@XH&-O4w80z0U} zU6goo;bLV;$;`)(7Oh>Yd*sLwhVu(HT-42&*;uV5sw#Z=*Txx3S$+%U|D;XQQz_7RQU{EK@aDzS2Ww2*(J^L({|L(E)zTf@#3P; zlZmNICrx{nk-b_^^W;H>+zKNFQRW^V9s#{2e;Qhj*GEiC`on+n_KTxz%52ja&$Xt; zOnJmOllOGrhezx?B^r2a7QA%QoF5?DA7i7yWfQTo;lZIfEFblkENRJ3Z{aT&(6N`8 z&?S(T#>UOfR#?cUs@i5*ETI>pAR_q$R8ukh|G%i{Q9?>eLRwl;;ls8Cn-bO}S*26ao3YY!=|9{~zb0@|ly&v9&ewflY$K}GL?ny$;69k$&I1XP~ z$>ZLCDdnpGIJYlKdz6rt_T>5VFQ1M5y*8N{Oh`=p*+1uNV&D(?opT!}%1->O8Y6a- z<%vY9LzgsLj~Z)Q4CkgYCw`B?+^qSC~Fe-bM`wIrSrdEp=_nqC4e6#PyeJauZxA|)}Q zBw68PJ#B4E5PYm2Lz8%S^U!Zw7u+~6f zs@sHV0%_kIR;s}yT2$AWqVtwYvwnVF+aixW$iS-6Co(zV#uL&%6&3kN| zD=nGz^$*|M%fl}hkb4&z-cF$K4ha?Q?%LGfw@pC!aa)C4l&VAUv3fQ(H8wW3h}J_a z+=X9mwFt_Eb$pWHO8DzG$FcLuQ|G*i4^$Mx6qZI=aGE~+|NsArP$8?%H!&ga4Ga!k zVpREfP$FY)fy;I=<41*C%{GJ?`uOlDDeGu%r-N;v;X{ux6;!TltiK7Zc=Dq zWOU+mcS~E;(IXKZT}8%*h6jV$bS^x2$mh>ELpPw}Kw@8_fl{kZ(b|5Q;{Sn?JRX|@ z^ftL&31ZPZ$KWqq(7kbMpg@zyQ3mF1M~-s@H(Z*~q^iDU9{02~O=h>C1CzaG%#hfz zL!ziyG9ps)+S=qDh0PDo%g0P~(}6poqvJzsYvj=*5nY{y#zsa5_wcR`7pVMbuj9MG z)bjt~MJ$mM1a+?jBnx^6^J(r>Na6?;U$@LgIqFiQdG$o8*&1?=FH%~}JtQaWw7 z(H6pS@aq5LQEUlJKB+q<@d*nvD=RyvsWD4SA8umhDSszmCSwn^Tcfa`cyoJu^s!@6 zp5C7iFY4voDv{8z<&A`-q=IJi#Eiz|7>V|NuTUutQsYGaNa2OaE7#JBXoF>x|sJY}X3y+5=yYiYhfBVI1Rzyz-v}(!V zYdESG)^JI%HB{^bD}!>@oyT5rC&CnV%Xk-yW@gM0^tD}csv%tOe_|y2JHa^(OTMu) zGc$8@JC~Ro-e1pidAYGoz2CngXNb*nV!+)`V}nA6{DilhwvCO_72V%08g*qgPE5Jf zcva9TVp?)#La@v+jrZpz^q=#s6`39)r2{BQkmv9jo2w!7xN}#srL{BbP6D*pRZ=h1(Bw={VUd*b+O)~&pc0R6>H-zPD=B)F9<3p=_#xwu>J+;O6~&`_D3G3!)Wy zk57PTkB^6A=ZRk{_Bp-tOsbODJjXr6)+4Cm6lYAXf6S|mY6?E)35tQiHaa{L7^J4p zIK(`&d;Zx22M(M$vtaF60r~R_)}3oGOY?=c4V2cNJ$U~7ySHoS&&%WUl#~cO-Ekzr zN$5Ab^p9WMf{wEmx?b!!<(#dTz~t65r7@KE+CL7*10B|p3naU{U5wdP#RYF}Q#he! zA$$1Pv4q^SA6^K88ujJxCyB8@{i0j=;?=36$D%%eGBQ+n7Ou!}^Z$#PZHy++>mXk_Z9ebL6Ujfo+C$(K6rAZ;4$0b#qLYao`DqDN(-YVP7~wdF)=d{as2eK z{>nzRCxPB>71_-bKiWs#WLO_I;jq+7)sOO~*Cv)K&TiIzBxxw};B|=r4oE;dN7b<4nV*^)#N@j%xg&V6XGAgT9ZQ5+C zsLqr!-@w4&z;D41H;zvGJ@M*+y^aRYHcS)aS#i;!=5FJ1sYA^Y1xL(h85@KeC2SCE zI`NjDt;eKM$7zzz9PY#!vy58Wj82>okagnWfd-ygp@%1DW%X+h&qC#;Y%VzmkC{(g z{r|s5_kV$gi68B=n2!61Rwzkqwm7(od$VYhCLZqbC^kUyt3-0Da(H4<-3c-h?8 z7;vz`bm{{y`2s;RGd6Q`Hd|YvsAW(qv_PI{YnPUi_TI3`=s*{5MOFiUkfJ;1uUi~F zK`fJ8IyCe-7;FrCP1BP%eatwREX!=hHcKaL>WOU$VN-Z`ZUnI%JjEJmC?ziG>0uxu zI`Kj=G!nE5B_uqmD>Aq_Hy&hp@bZB@oA|~fY?+4-)-UPR3Rs?OqO)O}@dU;&%`;9- zKNhu0I2cqebk)6fq)T7(!4`?da}o}CnC}Ql%B^&`F>4l&o*qw4jmu7`*>fy@{J2q5 z%WGm%F>^*hnbN_V9)kT5s;yfJUh7}lrJvHvbE2sw#?ePfLI2lE)(v8@JQAABn{OPr zwW(3EaWO~YfsWD`Q@%}sJv}^Y*6AfGjEX~l6LPBT*j zL3Va$7nbG)3W=+s?ww%a;<|Is-rm&InFh1OBLrSe3*Qp*Y5qjPTz?&wgk{WY+7@d% zBpW;kXmD?_Y1kikz(;k8*-C*l!HISf1ydw>Bsh~xHuU+@rappUQU-n~x~-W`~y zbKzW$OIC5hy05Bij*L0xno+;o7>}t+Pgp1(9@(`2gTv#bTi#KZ9R1{ZEN-nm(e+V9 z#LI=~9G)x2iiQq|`V6^}AVe{xzYB4N1gNRsGQYr2>C^Z)<<`|El1hj?zp5&u)^=knYvYbCp-*B* zVwUVJR z|59%3_;`P=HBbIM12;D#4o<$OZ44v>-`6Lmqy&VfUbvoaQ7e`H2RE>7Grf)fr|a3~XWgoP%vN=(p*jEl zKN;(9Yj*s2FirQnUG1;dUCiI?*$xS17!*#L)p#V_Gh((Q&;S3%H9tP&AMca*^nC9> z?@w#??u-Bb^MtQ6@R|9k{&3rWmwCb6TW>aM_ATFi`uOpLv||d|-It+*s)y!TsHn2$ z~It7 zk4;9qzTS|U$K%n;6;&)clh;vAjc3*@i?g74hX4N$O|qCVLn1Y`b?Vfg5fK+I9jV_^ zaa2R`-$M_L9aj1QB_AH_Jv)2-|KIKwv;Y15{r}?P^Rlw({4y1v-b$zBG9BEJt{`&u zhqhGA$&P}h-#$P8&&*!uGvmY0cX#cNd~yHxH`$_YPU7#fHuHQA?KXxrk0odD#55}% zI>eKe#gmi669Fl44ow0TxpU{Ls;JxuY! zf8NJG;x1_0oF2bErtZHJ*&CAAn+^{N0|NH8Y1cTU|SlNnRUA%H+=Lu$T1@Gi9 zyO9urGMT1R5OyFY%qGP?-u$=e44_)BsGpF(~jTV?5`jjKIuR^|NcIi z%8iAOo6pa$PhP^nwugr;=VRCEl0*m5<01#Q%xY|GY@9gpobyTAN>-7K4*uMW4*|NlSzK%kStamk{;K!=Dm_0i@%{b(ar^${-QTy|e_qYO zSObHE?pU@QPNtn6heJIRQxCf?cyW7sefYXRI|?3N?-oyqv#)v1XIXvaLx|Fkokm7H zmo7!{Kx$nle;FgAJ5f1u`04w5c{Q~*`}&TIQ;!*r z_wC(R@=}(WnfYE4bGB{M&AbCk+4`^8hp_VS@cg&0{Z;<^MlFzuzxc`}M^|@tljD`wb49`8z{u;?~D2O9H;^tN;J})z$d;eF0qn_y7O?>gxUY{U`iooSZD!mvyj2 zbaOp<8DpXu7RDAD`lad!vKXiXTC@N}SUaD5&VqWuzXUAn;=50)m<@uNeALZZs^Y_C;`O4*T4h=0V%{we+ zR&D$izHybKp6iYs5NX_p5XQOu-M1{9U8_LT~+UP%|16IZ-9tTK-hb>7pQ$^(E zf2y+U>o+e~Z)C0c!FX#I?;nPFGk*M(Y-tT#0*xpYK2A=*)YNPB|Nk96#koa^Me6AU zZiWf$`zA~}Vc+wmvAa=*u|6_FqN}T?qT>k~&jG#lxPLneEIOiJqR0SvH5c9;BaqsHJ{EN~VA{DT(LPC!RGP6XEv8#qk*$ zx;?qZ5XIZn_F&TlBj&9|KfBjXI54YZ!PDT7DYG^kze`oGU<8e|Jg$$7YD%m z?_^+UapT5~e~Zi97xw)B|Nr2-YPXq*ic7crIDX&*`}d5O^{lM(u7wqUI%f0a=}uOb zh^0#{4IT+Cb#n3T%V=epx1>4J&@jQ2r|18c zt*p0hpFR1(Uifl-eSK&C{kr6y;}6ycbC@X?T-C1+6kUAd#EBpOkN5vqRFrx2NJX8m zw6txT8JpOxd-GcP9vtNq7WUeB@W276l^-lDC1=i5^iEk=o}D~5;qZz`gKE_sv*!Qb z$hm07%xTL4BQqT?HCU;uPu{-0WrKnCCDX(V-oE-9J7#Qd$k?^v+vW#fMXo8aXGy6) zug}g-iTdjvlk`x$*1t!Gi(8i|^maaH;v5l&1FNrRKT?2RbJ{-ni)h z4JC=SXAa~9CCus!TldHL>ev3>xAmT!yO*gMPdxsku-JIZHX|3;iN$NA)eiWuXRHm0 zaaOo=Nke@Zr0#7};|mK@b97Ym^JCk=ADDmQFZ*h*4Q#e)+#;))o~Sgf{`j%KyL;n? zfa23)>WPU=b1l=-+?197Z85177f&=ZW4j`8$cHyVm@8~~lCN-t>x7#;C%A6h_@}L5 z;5YBg%2lUaHk6(`eR@NUrLz;0>)U`o491!Xx*jPh0acqJ;hof^{q#Y?qeo8N8}>_x zUt74<_``JhdP#|Y2ci_~CrbREkg~BX?w_Sq*L;hIfiY_owNCY!_HuJ?Zf)_{wl$ND zt?RIuQ?jn&g-4(4%~CT|86AraN)tWHa&rEgno5f4KKk$}>Dkkxa_MW<@Z7t{!_IH? zel00cvh;YHDgHzL~1s zcp2*BaXiBCK%Y%|gYH%LxDEZ!thPLjIKlJe57*OwKR>@e+Wr3acKhgUZyw&<{NBF) z-{BJ{6x5Erd!QgBto-cqmr+~1Sc={6Yj!~JtaSU{e6OAC&xh_`G6xwzRaILUvk@Bq54w}-CArxT>NY* zY<00Gvi>D{Bzo$dIrE>DMS_LpL|1MQ!?kZL|Mv0;-1MIxkOdvLb5iMpbd{&ip3SHk z^pNKXH^0L7MUHw52@R?f_pf-xIZfvOM3##}EP7^9cMJqh9bxHaj43?t;K8_VOVi1} z+{NlPGx->P_|-QA2uBBpDKezNNu1k4usFPR|j=Z-mH#)W%2qM2xo)7ad1K#vrvuCVH98?>jFX;a z-QN2D{f!UxnVE&N!dY)^Xy`OPBqXrpTVexGk;2_`XB)Pg9ek;&a?D=EY^7kE_N7=s!UZ4R$$I63##;XWcbfU<&xePgC`nFig|*%g=X<3 z1o23?axk+=-xXhx^CYKzrufEV0{^-iPJGj3ewSxp_%dyS3b#j-f^y`a|K~rv6#nI> z1&wV_nfLeqSN;3LZ*R}Mq_WZQ(7^^1p;JdroM5n?!EJT!V1v=&CY}$|W&Q_BX1r`* z)cn-ZuluLz#Kq2d$&X^ZVhxsheQF({s?5xw1v44$)1YDJEn`35=I7dV=Z-X7y5*qf zq;QjaXHlcL77q{4hlK`gGeo4Acb5kUGcsotB)h3hEIi+kJf&$#i9=6sv+M#@4mP$o zQ_sD={z4i$edpvUGvEGy-Jc)g%*@)%hyVZoFL+q`!^D7?WN8V_<`XH5r3PBe*(V)S zwiqUKJH=!&MGE-pGiP-jNj{Xo{=-}FNyw*Prd21Qj`Ng>-}k5T#|QJ8U%X+P53FCV zoEajb&Aj_};S|Y?Uf(SdKl^XR9GHKab6d2hYqRWxTmC=)&wu#T`^yoy0r&U+-&_9v z`2GDh6LyN`_y)2?h<=vOR~MVyUBD(Mo+Kj2BT>r5eD8*8s7&@syE z!faPcobx|?*FXICH_!inh8o(z=_d{47&O!5>{52rO8=cu1G8$!k6&NsZ%(hT{r``D z(*cQXKR?>%JYs12XJqi_@Bjb$Ifhm-ALRQ?U(7h|C#WQCzOTQpE+F{xo6F1Z&)Al* zVWU)QX|D0CEAc$V23r@eZIat?>Cvmuwl_I#|LTPJ=BV%=&|WaxTymbR^ZI^>jfXa^ z`g&^W{P*|&|NhQDOYOmJ!Gx1f<+GX`ShRIi74tg|^%#gu{Qv)d-S2PT54Z1s^yo~* zVtM~xzuN^5Mg*%bvy8ZFcHkrv6KH<;V^iYiG@&rzN0I;ASMmhEU+NvfHw)4dJv7PU z-TnW6KRwN7)7dTi(6CEx(+8tx|LbF{b2g^G`~JRuzV-LsnKK>z`+M8l8Ql#VRwo>o za!5yX!!ov*r)Lu~BQmm#Hf%E5Q+KxDkAIx2@IS=`(I|L-&2 znrtSo8wXGEoba5Tv6SQ5h2ygV{vG7qU;qF4w%qx>y_P+_H)qT!nAxyZ;y@Hbf^WqC zgsV#HrkHn3Vr3~PGP1BX-muB&&7DpoUoW8p!P6EjEPpRyS9c@{?u&hOe}8{^I)8ut zL~qt9E)2I>JJla?F4_-Tn)v7E`9rPp_xAmL{r>#gB}bZ+6BE}4ZP>wuQlGJpqk_SOHd{{HSgzx;!P?UPPk zI3?6`r*lrjzs1M&x-9Ge|GT^4;pb0J=jY^D{P}Bn^Tq}-`yE|MHe31sUHL42a%;1c zhqv*M9|m)*k8j8p|8+w|&*5Ip?d|zI>F161)b+ZGfy>TAQAL0D*Ze$xdism6tZBSW z+b&B5EpnK(PJ)v|(EtDc|9^jff4|WAepJ-G&JKg;Pd=PFVX#n* zW2>Ikz#JFH79Y=MXV*5jjeQU`%f4tDcF64Tr@O^FV`*r{RtY6V_{o1tNnUd94nU()1XJ;MQSzmBP zKjZ0<37%pC2hN;H$UXbu{rQ6bcFx(_hu_}*kvHMUe5L|bagN82-P_vvH)cadN)GLt z`S3JMKNN(@e%NU+@7 zyyq6nBJQG2&2<+=pqVJH=+CdO|4&cfzrXHaw(uFf`}H|FE~-tXD;^(iw+~)c z;+V^uu9GuDfBu>I@Kwgs#aGCpv~t|9N+}{f5mGF9^2e^?nA3YNgq0c?7}WebQ}EyJoC!4QD;v(;-u~ah zQt|n5!$aa_3Txjb{^6gyu2p@?{}r1z@z~cJ*;ITezrQcOTda0}$;<64G$t=qef{@$ z{e0Ws-~ZbmetCI+qAELcWM}e?X<{cHoRe)6R$nktT>kulakV@^)OpAT>|G&SyT+hh( zgHN)8_Yg~DHP7tz?o*WF_y5~n_xJtgbWp?o|EsI}rOp4{+4c4Ny}kSO5^^%lzm6D=Kn|+NGk(cV^j(4XpM5jqdDXO^uE;jEFMQ&@_~> zIZ^P_=`)98ylWGcX;&Uja=NF`M)YHd&(&Ja7WeDhlkncWIdvdeV&5h0N*#3;OH~wGHp73AZ`djs*Bk%L?|NH;v=l|E&K~B|IJ(`kw^wHzP zX~~DR)Q*M3{+&8iG^%;i27`ivKgXvTeOTU~cu821%^^>ywM6yDPe}_)$&ykKVq%gI z(@(P~Xn6S0xoy6^b8`;bnQ?)_4^$w|w3snd zQqJ}ld$x~5Uv6T<7W0X(q}YBiT`GS5@7tFe>#8sO#>ULj(#rb!i+5UfZ#K>j5SYEk z##s3B|M&O*|Nrw-KGX8#e}4IYoE!#^Jh*ryIGY%!+Wt9sem+n1Hiw%|P94@Vsi~4( zT@pn_67y`6cT`M#cviYi-d^I$)xx+>9o!CKy$@iHnQkE=@$c`?!v{{C`tjfX^8f!I z^0-r8>l8IJKl=4QEbNS$n$Dj;JVixkmMzmUGV;n`W2a%zf!XFf9}aW8zn>lu+PZ1xOvxQP!Dxp>ORHqv-=rVk`K#90ddA*d z?*H+sCJ$`(`^g;8azX(Xb@q({N7Ov*9{Ja-?f=NwSQ55qUaL@`iQNI+^|z8Nb|l{n zw^-I$cqKJ3TQo?+;3$i}yan;Tc=%I{+9Zd%58A!q63b+_j-eX&)1!jl8Z^)oF%)mHIyd+FQu5%N#$-(2AP z@hPYxdrITPiGdy+fe|w}uSK=p(dlH8v-|V=m1xL>nKK(_&TO13>pwer#`i$ZruzR9 z@9re-C~Srd{5!g`F85M;o-c{h@NPH?z%oHrVF=f1oUrA^ldd zVd6x_vR&MV>%S;7NJvUZ+}r!R{?`}zu&|8je-F3+U$(60&6~w1nbbi`m%rs^WPbc_ zfB5zF8}C`$7>wL5%(qSc@wNLv;7@4cFaMz`FQ1Yix^#9!NsoNWEZ(ZWZ~p)1FH_d8anztNRCrw6wPT`vTboDk>5M zvT0sg4NI0tn@h;qs}*botxo#?|Il=cd3LSpC;xw#(pa{Z^Nkqy7H_8ii`5p?hp+!< zSt}J)_wkd_i4z7NKK^x|)ziAXbHB$MLEe3}$vbML3%_nypm0)rhBEV$TP>hb0DI8# z<$L?p1D-%VFx{f=Ps6gX8G`>O=$Sa4IhX7!aH46x-CygPANn;4{+>1$0ZuV`($a^=VI6C9C@M;bze|JHwdbN^uTg=MP=shrle|$Vkb5e-E2a>13O*JbZ)S`J!UuIkw&vFO49H z@p|;nZ@rFAOo@`rUj9$)iwc`=6(_pLt^M_Tdg9|_^&f&AfBckmagkWERPx4+2bmcn z3!3-WOWxb>erNWVzt)Gpzvuap|NMZC?uVGOC7;!%^>WOBhD3PLpFbxHp7ZUh_2~Kk zq11N6YO@R0i~j%r4|4Ei_5Tl4Z%o(c`SpvZrsn_58E&)1Ee`(w&!eNmv)oV7MMQ)n zLop)ifNxqt!j3ZD`zbmyP-CZC{QK7`_woFJhV_x%SAID7%lGvB-(UN?{GH4pW-BY6 zUAzA8+~GEv)$YS<@xxoQc_vK?(36)xyxjkVbmT(@%dC!_Zjax`>pb6*0P4Vk3dr!a z8#XBT6&_Djz0_hihr3+o@&5Y%{BnQlzw1pof9%+Tb>}|3m){p?Y1L5wU*g}t*0{{J zdXbC;3@Z2cS291X`^Pia^B|YPRrY5uES4F0Y^eX;R(DTB58~WCbFH}=3DKzs zdY9_u)v`dxUT$9J6_bZDr<;(z zz?W^$s^McEJmvG$O z?AkP=^4Rb2X^;|d%Z!B}=eWerICxIaG4Y4c6}y7@CXLcU%v?wLdr~?CIe0jj&TV^i zDNcW+*kHob^Ehsf z+LAqKKlIkS&D(g;vZr$E6NWT!ia9jh0#uNFVA%W2Sx;qx=bBsdZ5&1J`uKpnyn&x^2$w5>C#N zKE9Iw{;3++xC$jCB~^TEdiWyn$OqLW|NCdo{m`Fcuwa@@0cbtSA=aeTqLA$TQE~eJe^6hLUk=oWRnYNiO*`H#4q8{dI?V9R{qr9_ zsUCiLndkjI$GN`2A099sZf55>b&6+qx$%+X#w~3|28Mteq;D0 zw^ovERoAvunY)&E&E=+=b$JLqlDMB9J2Sm!;Blwfd5BsIjr#B_= z_!iqvQ{Xviq<;6wJ-^+0PGzRbN5DG}=ghI?b14uxQs3gIDYQsntwkHN#bWIX){LE9 zVIhkqPWa)Kv+;kD_|4er#_MNNp6xyPq_!#Jex}s3NR#mMn?6>pjk)A{d4tZxgUnq@ zQeR@**Dcog{v|nfV(O(S%M{OdDmm_pmw7T%L6^I&@%`gnGt}A|mvyeXV`A9%#WvSF zHh5ad)#)=2H>e067h!dDm?qeE!)(>B^Sk!uo-p!snzr}hbBl_58y%`X{PI3%(Jph^ z>Gr(}(J$b%)iY-!SF;08n?0YOIr~F4_T_r9bD~~7{*h+aa>4V(rG?I{)}`-NSlROf z9=hCG7Ob&B$i-}BZTmgrgdddK~pF(-`p z*k`dR?sNVi`sH@7McVGC6)Ig3Yd_db`Q@7C-(w?}aklRK%OhW{o?YE^BWIudgmWJk z&j0&#ZDi=2^Z)lXG0uZ{XYPLPrT~HC_6q#$(M1L&9O8;CYVA@|f5ne%tl#*Hokjd$ z%$-?L(sQR&B$)kJ5Fd57;&gSv{O@~@7vA~X@VIJe<_sS>(`n{^E5!KiT&`{_jO}xG zjksR8v%qTOZ9&%%(YD5Q+DsKn>+Kht>$+Soef{6QAW< zbZ43US+vB3<6Qct9u-C3_$Kq&f^CaiKRX)jtN3Ns@c%6zsD$j9wV$ggK;&4x*fCia zLB*VmH>`QhhhuA)r?=f(<6cnwic`s{<%6VCL4lE|Y>d>dfG_9I{m^Y^wzs(d(?Yr6 zis|(3fA#fw-wIFf@wc1*__xbN=apK1>MaX0Z{+QaE0odcUgykF`{G=h2<;;{|zc%IuBYO$h==>*pt3eDdUph^47!sY$>0o;CF^Ih5J{G9tw))#zbG&P*G14h^m)M@kKS zf7Ry9wr~r*XmUwJ(L;!jy(MA0bI1F3rO>6T3S!fY-IUv$W!Mi_y!@%J;kDOjy_Oia zP5;%dvuv6ZC+Sqjf1P6&*s^Z({mplcGKYiA_OBC&V@OzHm z*SzhSH#SA+-Hu5W*2f$B z%szJEW@jC@K+3gw+;)$!6&5a8_wlqiTXuHa_I$}J zS0(LglM)gWLA{7esu^hx-Dz%aprNiSA#Ja+<|xYuuxMufOU}u1dggTF`8mUs6a$+I zho*E;Kk@(nIdcs8Tml5z>`yAIIEnl!=S&aC3eo#;?its*h8LbDi>`LD+)Rq!IPJ}? zzrSaGn*GPP{@Lz@GHlaWk6W&>vP*cJyQxjl(n<|!dC%f=3^7^vBgNisnJ?dYTIhCK?9B9* z9cAa71deO8zK`Etd-T{$`Sgk$Y2VM%W(|;mQ>8f;3Ubcx^o$QgEt_FG{r`=$2PUSH zixx}9?N75PX0z2%xHI2=;mfuq7xO%f<6`F?yg%PDdiKNr^;vy;AF->c@n~uBSXuGp z6aWm^%Gh@j*){`&2?|YC&P*iZv+{CLa!xe&oef5hcgN!xO*X zXpV*B6%$)fGoIo9e^XDiDk?NQJkH-%`B|b*UOj6j zr`~~+oO5q|{hAsOmUiJvih;TMeG{*S28QMahQr7EdD7B&q|J?TZXWvZ%k=OhK+me>73q<8+XidE7Ki_DMmFuTO==L?GRdd4v4+ey^+kNbIXUY$wVz z(?(b*>A|zK2TxK8ijxBZ6DLef{qQaA$M5NlQ>UJ}SkB>aI^D%jRFGNDHYp(`<;L#m ziT~@`&d-rJ#EC8Hs6GDu zq*08xrY6_6vrXsre-)nfu`W_xefyU+DcQcaA6Z=4T3dA4q($*Cg8<7Js^G1j1jQO(;%)cdJI4v)i5F2A^w#UaeTJxQsX>{V>Len4pw_1}O-kwh6 zjY(mfWm&3Sx4asy4bYFmIdG=H{wgcU&OY!+FZzzo#$U^b9Gi4>dVws+g{wFe~#zyZ)?=5l1BhvIUy>4y3Y9WT=;T!IO!-UaQLd{!JOg; zS$~pcJCDm$ID|FFvjhwDdMD0i4xT2MIaBE41N%$A)Xq$4QR~odHB?yIBdjj5YPICL z*u{sOpxbzrDi?hGoDM3Eo;;PTsg;y4m{#}qxY@_HWetp(J2-WH7ic<*9hn?f!1+k< zxN7T5xtM86>Z~T^Li`tkn8O^;m>g`-t#t@GwlbKfx0mPo`rvdAEI%-Us?B9V`*S#?1GQG#*zzk#~4an1sign7Y47H@5XIT-*Eb zqH@-|XONkPMJu-JsU5p;<;a8Arw<%!+kEz5gL0~Z@rGw+F;C587F-ROA!R74y!s%s zf$xE-9fr0|%NTrB_1YIs1-X-bozdgZ>7Z@9%X}9*af^TsG+_Aue^H4>dwaX0@nl8Q zsfB9Nk`+?8XVa6QBhs6 zAxxX6rK|U7w`AH`r#si2OCce+=$68UEhf$D^+D$BuxOmc zI=Ne?1K6HntD8RrTM=P0R?RGojnSYI*SQD)!4aC2p2h4zZo-Al4q{bzoB%noXkXorLA zuz$9`enPNvc+tr(6MJ}^0|cB444S*Rl1?i|X*EXmFOKYr+&n>Cdk2H#<$|U5XWTgD z7ITP6@K~JUVe4Ufd|kgS`MAWjHHj8gQy%^-wwk*GQt2=9d<0q-nwIw9adJR-`iCzm z1!ZXgp+~R(JK!T$A=NV@(8=wRVuaB_-(!mfS9^cRR$nRm>%aZs`};wscf7fKdP9X_ zQn=<_Xefm~a$uXlyZYTb9yd2od!VT3!qJ@ z!JFI1KYUXC`MP8UEEcjs4n2ENMuta8iAP9?M@@}q&Kw>oDIPbs3wu3lYF@C%F4}0w zE&6CO$3&@*>U?Z_UNUFho@@uOxHKwQ+xWu-`@=LA9I6>8qg^- z8;&%xJ^1PA(9k${uA@(nW5kTc=4MCF&VzU6G`6=lPMqi%C%21j?nJ?=6^xt8)+|f( zO5}Q!ANc4}t7f;izS`sa_Mlo?FFtKY{d~|V6^Gj;?dsKjo^S@uw1Fx(DVityTu8WZxp`gLv!|&OCMQprl=R@4TPCNFrCpBFz!JP0?MmyuT2Y6zwE`RRIxu-S1G@? z_^{ya$LrGcx7Ocw&|KoOw#V_@mSc0i{OdfqBasp8g;kx*{0S*30>a4&$;k=n=?@+y z35cdnn3xE1`sCyZ)6xV)Qy)BPnpeSTFT$Fg%oO?IzI~g0{eosc9i0n1+D#Pa+oxB2 zYI*oExlP`Ffw<6}U97fJ??0!%@^8B+3`vZ4<{EMxP!MsJ=W1F}@n86q{J#eef?Tab ztV&;o&C+D&(2xFfxWMLARFINe&rO}3dUKLhSmr3F2i>~;{NCd`&(7xdHAe-ywgd$% zHNBSP=klUf+~s}G$GgiamI{OYpxMdH-=M-FCiddv51tLeArH@Tb0npt{P;Qf;cfG( jeivQ^h&u?LfW^#kCBJ6FvfZ0pK^i<={an^LB{Ts5;0bTK literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/image/python/ops/dense_image_warp.py b/tensorflow/contrib/image/python/ops/dense_image_warp.py new file mode 100644 index 0000000000..f9b219ada4 --- /dev/null +++ b/tensorflow/contrib/image/python/ops/dense_image_warp.py @@ -0,0 +1,201 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Image warping using per-pixel flow vectors.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops + +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def _interpolate_bilinear(grid, + query_points, + name='interpolate_bilinear', + indexing='ij'): + """Similar to Matlab's interp2 function. + + Finds values for query points on a grid using bilinear interpolation. + + Args: + grid: a 4-D float `Tensor` of shape `[batch, height, width, channels]`. + query_points: a 3-D float `Tensor` of N points with shape `[batch, N, 2]`. + name: a name for the operation (optional). + indexing: whether the query points are specified as row and column (ij), + or Cartesian coordinates (xy). + + Returns: + values: a 3-D `Tensor` with shape `[batch, N, channels]` + + Raises: + ValueError: if the indexing mode is invalid, or if the shape of the inputs + invalid. + """ + if indexing != 'ij' and indexing != 'xy': + raise ValueError('Indexing mode must be \'ij\' or \'xy\'') + + with ops.name_scope(name): + grid = ops.convert_to_tensor(grid) + query_points = ops.convert_to_tensor(query_points) + shape = grid.get_shape().as_list() + if len(shape) != 4: + msg = 'Grid must be 4 dimensional. Received size: ' + raise ValueError(msg + str(grid.get_shape())) + + batch_size, height, width, channels = shape + query_type = query_points.dtype + grid_type = grid.dtype + + if (len(query_points.get_shape()) != 3 or + query_points.get_shape()[2].value != 2): + msg = ('Query points must be 3 dimensional and size 2 in dim 2. Received ' + 'size: ') + raise ValueError(msg + str(query_points.get_shape())) + + _, num_queries, _ = query_points.get_shape().as_list() + + if height < 2 or width < 2: + msg = 'Grid must be at least batch_size x 2 x 2 in size. Received size: ' + raise ValueError(msg + str(grid.get_shape())) + + alphas = [] + floors = [] + ceils = [] + + index_order = [0, 1] if indexing == 'ij' else [1, 0] + unstacked_query_points = array_ops.unstack(query_points, axis=2) + + for dim in index_order: + with ops.name_scope('dim-' + str(dim)): + queries = unstacked_query_points[dim] + + size_in_indexing_dimension = shape[dim + 1] + + # max_floor is size_in_indexing_dimension - 2 so that max_floor + 1 + # is still a valid index into the grid. + max_floor = math_ops.cast(size_in_indexing_dimension - 2, query_type) + min_floor = constant_op.constant(0.0, dtype=query_type) + floor = math_ops.minimum( + math_ops.maximum(min_floor, math_ops.floor(queries)), max_floor) + int_floor = math_ops.cast(floor, dtypes.int32) + floors.append(int_floor) + ceil = int_floor + 1 + ceils.append(ceil) + + # alpha has the same type as the grid, as we will directly use alpha + # when taking linear combinations of pixel values from the image. + alpha = math_ops.cast(queries - floor, grid_type) + min_alpha = constant_op.constant(0.0, dtype=grid_type) + max_alpha = constant_op.constant(1.0, dtype=grid_type) + alpha = math_ops.minimum(math_ops.maximum(min_alpha, alpha), max_alpha) + + # Expand alpha to [b, n, 1] so we can use broadcasting + # (since the alpha values don't depend on the channel). + alpha = array_ops.expand_dims(alpha, 2) + alphas.append(alpha) + + if batch_size * height * width > np.iinfo(np.int32).max / 8: + error_msg = """The image size or batch size is sufficiently large + that the linearized addresses used by array_ops.gather + may exceed the int32 limit.""" + raise ValueError(error_msg) + + flattened_grid = array_ops.reshape(grid, + [batch_size * height * width, channels]) + batch_offsets = array_ops.reshape( + math_ops.range(batch_size) * height * width, [batch_size, 1]) + + # This wraps array_ops.gather. We reshape the image data such that the + # batch, y, and x coordinates are pulled into the first dimension. + # Then we gather. Finally, we reshape the output back. It's possible this + # code would be made simpler by using array_ops.gather_nd. + def gather(y_coords, x_coords, name): + with ops.name_scope('gather-' + name): + linear_coordinates = batch_offsets + y_coords * width + x_coords + gathered_values = array_ops.gather(flattened_grid, linear_coordinates) + return array_ops.reshape(gathered_values, + [batch_size, num_queries, channels]) + + # grab the pixel values in the 4 corners around each query point + top_left = gather(floors[0], floors[1], 'top_left') + top_right = gather(floors[0], ceils[1], 'top_right') + bottom_left = gather(ceils[0], floors[1], 'bottom_left') + bottom_right = gather(ceils[0], ceils[1], 'bottom_right') + + # now, do the actual interpolation + with ops.name_scope('interpolate'): + interp_top = alphas[1] * (top_right - top_left) + top_left + interp_bottom = alphas[1] * (bottom_right - bottom_left) + bottom_left + interp = alphas[0] * (interp_bottom - interp_top) + interp_top + + return interp + + +def dense_image_warp(image, flow, name='dense_image_warp'): + """Image warping using per-pixel flow vectors. + + Apply a non-linear warp to the image, where the warp is specified by a dense + flow field of offset vectors that define the correspondences of pixel values + in the output image back to locations in the source image. Specifically, the + pixel value at output[b, j, i, c] is + images[b, j - flow[b, j, i, 0], i - flow[b, j, i, 1], c]. + + The locations specified by this formula do not necessarily map to an int + index. Therefore, the pixel value is obtained by bilinear + interpolation of the 4 nearest pixels around + (b, j - flow[b, j, i, 0], i - flow[b, j, i, 1]). For locations outside + of the image, we use the nearest pixel values at the image boundary. + + + Args: + image: 4-D float `Tensor` with shape `[batch, height, width, channels]`. + flow: A 4-D float `Tensor` with shape `[batch, height, width, 2]`. + name: A name for the operation (optional). + + Note that image and flow can be of type tf.half, tf.float32, or tf.float64, + and do not necessarily have to be the same type. + + Returns: + A 4-D float `Tensor` with shape`[batch, height, width, channels]` + and same type as input image. + + Raises: + ValueError: if height < 2 or width < 2 or the inputs have the wrong number + of dimensions. + """ + with ops.name_scope(name): + batch_size, height, width, channels = image.get_shape().as_list() + # The flow is defined on the image grid. Turn the flow into a list of query + # points in the grid space. + grid_x, grid_y = array_ops.meshgrid( + math_ops.range(width), math_ops.range(height)) + stacked_grid = math_ops.cast( + array_ops.stack([grid_y, grid_x], axis=2), flow.dtype) + batched_grid = array_ops.expand_dims(stacked_grid, axis=0) + query_points_on_grid = batched_grid - flow + query_points_flattened = array_ops.reshape(query_points_on_grid, + [batch_size, height * width, 2]) + # Compute values at the query points, then reshape the result back to the + # image grid. + interpolated = _interpolate_bilinear(image, query_points_flattened) + interpolated = array_ops.reshape(interpolated, + [batch_size, height, width, channels]) + return interpolated diff --git a/tensorflow/contrib/image/python/ops/interpolate_spline.py b/tensorflow/contrib/image/python/ops/interpolate_spline.py new file mode 100644 index 0000000000..daf8c56456 --- /dev/null +++ b/tensorflow/contrib/image/python/ops/interpolate_spline.py @@ -0,0 +1,291 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Polyharmonic spline interpolation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops import math_ops + +EPSILON = 0.0000000001 + + +def _cross_squared_distance_matrix(x, y): + """Pairwise squared distance between two (batch) matrices' rows (2nd dim). + + Computes the pairwise distances between rows of x and rows of y + Args: + x: [batch_size, n, d] float `Tensor` + y: [batch_size, m, d] float `Tensor` + + Returns: + squared_dists: [batch_size, n, m] float `Tensor`, where + squared_dists[b,i,j] = ||x[b,i,:] - y[b,j,:]||^2 + """ + x_norm_squared = math_ops.reduce_sum(math_ops.square(x), 2) + y_norm_squared = math_ops.reduce_sum(math_ops.square(y), 2) + + # Expand so that we can broadcast. + x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) + y_norm_squared_tile = array_ops.expand_dims(y_norm_squared, 1) + + x_y_transpose = math_ops.matmul(x, y, adjoint_b=True) + + # squared_dists[b,i,j] = ||x_bi - y_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj + squared_dists = x_norm_squared_tile - 2 * x_y_transpose + y_norm_squared_tile + + return squared_dists + + +def _pairwise_squared_distance_matrix(x): + """Pairwise squared distance among a (batch) matrix's rows (2nd dim). + + This saves a bit of computation vs. using _cross_squared_distance_matrix(x,x) + + Args: + x: `[batch_size, n, d]` float `Tensor` + + Returns: + squared_dists: `[batch_size, n, n]` float `Tensor`, where + squared_dists[b,i,j] = ||x[b,i,:] - x[b,j,:]||^2 + """ + + x_x_transpose = math_ops.matmul(x, x, adjoint_b=True) + x_norm_squared = array_ops.matrix_diag_part(x_x_transpose) + x_norm_squared_tile = array_ops.expand_dims(x_norm_squared, 2) + + # squared_dists[b,i,j] = ||x_bi - x_bj||^2 = x_bi'x_bi- 2x_bi'x_bj + x_bj'x_bj + squared_dists = x_norm_squared_tile - 2 * x_x_transpose + array_ops.transpose( + x_norm_squared_tile, [0, 2, 1]) + + return squared_dists + + +def _solve_interpolation(train_points, train_values, order, + regularization_weight): + """Solve for interpolation coefficients. + + Computes the coefficients of the polyharmonic interpolant for the 'training' + data defined by (train_points, train_values) using the kernel phi. + + Args: + train_points: `[b, n, d]` interpolation centers + train_values: `[b, n, k]` function values + order: order of the interpolation + regularization_weight: weight to place on smoothness regularization term + + Returns: + w: `[b, n, k]` weights on each interpolation center + v: `[b, d, k]` weights on each input dimension + """ + + b, n, d = train_points.get_shape().as_list() + _, _, k = train_values.get_shape().as_list() + + # First, rename variables so that the notation (c, f, w, v, A, B, etc.) + # follows https://en.wikipedia.org/wiki/Polyharmonic_spline. + # To account for python style guidelines we use + # matrix_a for A and matrix_b for B. + + c = train_points + f = train_values + + # Next, construct the linear system. + with ops.name_scope('construct_linear_system'): + + matrix_a = _phi(_pairwise_squared_distance_matrix(c), order) # [b, n, n] + if regularization_weight > 0: + batch_identity_matrix = np.expand_dims(np.eye(n), 0) + batch_identity_matrix = constant_op.constant( + batch_identity_matrix, dtype=train_points.dtype) + + matrix_a += regularization_weight * batch_identity_matrix + + # Append ones to the feature values for the bias term in the linear model. + ones = array_ops.ones([b, n, 1], train_points.dtype) + matrix_b = array_ops.concat([c, ones], 2) # [b, n, d + 1] + + # [b, n + d + 1, n] + left_block = array_ops.concat( + [matrix_a, array_ops.transpose(matrix_b, [0, 2, 1])], 1) + + num_b_cols = matrix_b.get_shape()[2] # d + 1 + lhs_zeros = array_ops.zeros([b, num_b_cols, num_b_cols], train_points.dtype) + right_block = array_ops.concat([matrix_b, lhs_zeros], + 1) # [b, n + d + 1, d + 1] + lhs = array_ops.concat([left_block, right_block], + 2) # [b, n + d + 1, n + d + 1] + + rhs_zeros = array_ops.zeros([b, d + 1, k], train_points.dtype) + rhs = array_ops.concat([f, rhs_zeros], 1) # [b, n + d + 1, k] + + # Then, solve the linear system and unpack the results. + with ops.name_scope('solve_linear_system'): + w_v = linalg_ops.matrix_solve(lhs, rhs) + w = w_v[:, :n, :] + v = w_v[:, n:, :] + + return w, v + + +def _apply_interpolation(query_points, train_points, w, v, order): + """Apply polyharmonic interpolation model to data. + + Given coefficients w and v for the interpolation model, we evaluate + interpolated function values at query_points. + + Args: + query_points: `[b, m, d]` x values to evaluate the interpolation at + train_points: `[b, n, d]` x values that act as the interpolation centers + ( the c variables in the wikipedia article) + w: `[b, n, k]` weights on each interpolation center + v: `[b, d, k]` weights on each input dimension + order: order of the interpolation + + Returns: + Polyharmonic interpolation evaluated at points defined in query_points. + """ + + batch_size = train_points.get_shape()[0].value + num_query_points = query_points.get_shape()[1].value + + # First, compute the contribution from the rbf term. + pairwise_dists = _cross_squared_distance_matrix(query_points, train_points) + phi_pairwise_dists = _phi(pairwise_dists, order) + + rbf_term = math_ops.matmul(phi_pairwise_dists, w) + + # Then, compute the contribution from the linear term. + # Pad query_points with ones, for the bias term in the linear model. + query_points_pad = array_ops.concat([ + query_points, + array_ops.ones([batch_size, num_query_points, 1], train_points.dtype) + ], 2) + linear_term = math_ops.matmul(query_points_pad, v) + + return rbf_term + linear_term + + +def _phi(r, order): + """Coordinate-wise nonlinearity used to define the order of the interpolation. + + See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition. + + Args: + r: input op + order: interpolation order + + Returns: + phi_k evaluated coordinate-wise on r, for k = r + """ + + # using EPSILON prevents log(0), sqrt0), etc. + # sqrt(0) is well-defined, but its gradient is not + with ops.name_scope('phi'): + if order == 1: + r = math_ops.maximum(r, EPSILON) + r = math_ops.sqrt(r) + return r + elif order == 2: + return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON)) + elif order == 4: + return 0.5 * math_ops.square(r) * math_ops.log( + math_ops.maximum(r, EPSILON)) + elif order % 2 == 0: + r = math_ops.maximum(r, EPSILON) + return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r) + else: + r = math_ops.maximum(r, EPSILON) + return math_ops.pow(r, 0.5 * order) + + +def interpolate_spline(train_points, + train_values, + query_points, + order, + regularization_weight=0.0, + name='interpolate_spline'): + r"""Interpolate signal using polyharmonic interpolation. + + The interpolant has the form + $$f(x) = \sum_{i = 1}^n w_i \phi(||x - c_i||) + v^T x + b.$$ + + This is a sum of two terms: (1) a weighted sum of radial basis function (RBF) + terms, with the centers \\(c_1, ... c_n\\), and (2) a linear term with a bias. + The \\(c_i\\) vectors are 'training' points. In the code, b is absorbed into v + by appending 1 as a final dimension to x. The coefficients w and v are + estimated such that the interpolant exactly fits the value of the function at + the \\(c_i\\) points, the vector w is orthogonal to each \\(c_i\\), and the + vector w sums to 0. With these constraints, the coefficients can be obtained + by solving a linear system. + + \\(\phi\\) is an RBF, parametrized by an interpolation + order. Using order=2 produces the well-known thin-plate spline. + + We also provide the option to perform regularized interpolation. Here, the + interpolant is selected to trade off between the squared loss on the training + data and a certain measure of its curvature + ([details](https://en.wikipedia.org/wiki/Polyharmonic_spline)). + Using a regularization weight greater than zero has the effect that the + interpolant will no longer exactly fit the training data. However, it may be + less vulnerable to overfitting, particularly for high-order interpolation. + + Note the interpolation procedure is differentiable with respect to all inputs + besides the order parameter. + + Args: + train_points: `[batch_size, n, d]` float `Tensor` of n d-dimensional + locations. These do not need to be regularly-spaced. + train_values: `[batch_size, n, k]` float `Tensor` of n c-dimensional values + evaluated at train_points. + query_points: `[batch_size, m, d]` `Tensor` of m d-dimensional locations + where we will output the interpolant's values. + order: order of the interpolation. Common values are 1 for + \\(\phi(r) = r\\), 2 for \\(\phi(r) = r^2 * log(r)\\) (thin-plate spline), + or 3 for \\(\phi(r) = r^3\\). + regularization_weight: weight placed on the regularization term. + This will depend substantially on the problem, and it should always be + tuned. For many problems, it is reasonable to use no regularization. + If using a non-zero value, we recommend a small value like 0.001. + name: name prefix for ops created by this function + + Returns: + `[b, m, k]` float `Tensor` of query values. We use train_points and + train_values to perform polyharmonic interpolation. The query values are + the values of the interpolant evaluated at the locations specified in + query_points. + """ + with ops.name_scope(name): + train_points = ops.convert_to_tensor(train_points) + train_values = ops.convert_to_tensor(train_values) + query_points = ops.convert_to_tensor(query_points) + + # First, fit the spline to the observed data. + with ops.name_scope('solve'): + w, v = _solve_interpolation(train_points, train_values, order, + regularization_weight) + + # Then, evaluate the spline at the query locations. + with ops.name_scope('predict'): + query_values = _apply_interpolation(query_points, train_points, w, v, + order) + + return query_values diff --git a/tensorflow/contrib/image/python/ops/sparse_image_warp.py b/tensorflow/contrib/image/python/ops/sparse_image_warp.py new file mode 100644 index 0000000000..54a215d6db --- /dev/null +++ b/tensorflow/contrib/image/python/ops/sparse_image_warp.py @@ -0,0 +1,201 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Image warping using sparse flow defined at control points.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.image.python.ops import dense_image_warp +from tensorflow.contrib.image.python.ops import interpolate_spline + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops + + +def _get_grid_locations(image_height, image_width): + """Wrapper for np.meshgrid.""" + + y_range = np.linspace(0, image_height - 1, image_height) + x_range = np.linspace(0, image_width - 1, image_width) + y_grid, x_grid = np.meshgrid(y_range, x_range, indexing='ij') + return np.stack((y_grid, x_grid), -1) + + +def _expand_to_minibatch(np_array, batch_size): + """Tile arbitrarily-sized np_array to include new batch dimension.""" + tiles = [batch_size] + [1] * np_array.ndim + return np.tile(np.expand_dims(np_array, 0), tiles) + + +def _get_boundary_locations(image_height, image_width, num_points_per_edge): + """Compute evenly-spaced indices along edge of image.""" + y_range = np.linspace(0, image_height - 1, num_points_per_edge + 2) + x_range = np.linspace(0, image_width - 1, num_points_per_edge + 2) + ys, xs = np.meshgrid(y_range, x_range, indexing='ij') + is_boundary = np.logical_or( + np.logical_or(xs == 0, xs == image_width - 1), + np.logical_or(ys == 0, ys == image_height - 1)) + return np.stack([ys[is_boundary], xs[is_boundary]], axis=-1) + + +def _add_zero_flow_controls_at_boundary(control_point_locations, + control_point_flows, image_height, + image_width, boundary_points_per_edge): + """Add control points for zero-flow boundary conditions. + + Augment the set of control points with extra points on the + boundary of the image that have zero flow. + + Args: + control_point_locations: input control points + control_point_flows: their flows + image_height: image height + image_width: image width + boundary_points_per_edge: number of points to add in the middle of each + edge (not including the corners). + The total number of points added is + 4 + 4*(boundary_points_per_edge). + + Returns: + merged_control_point_locations: augmented set of control point locations + merged_control_point_flows: augmented set of control point flows + """ + + batch_size = control_point_locations.get_shape()[0].value + + boundary_point_locations = _get_boundary_locations(image_height, image_width, + boundary_points_per_edge) + + boundary_point_flows = np.zeros([boundary_point_locations.shape[0], 2]) + + type_to_use = control_point_locations.dtype + boundary_point_locations = constant_op.constant( + _expand_to_minibatch(boundary_point_locations, batch_size), + dtype=type_to_use) + + boundary_point_flows = constant_op.constant( + _expand_to_minibatch(boundary_point_flows, batch_size), dtype=type_to_use) + + merged_control_point_locations = array_ops.concat( + [control_point_locations, boundary_point_locations], 1) + + merged_control_point_flows = array_ops.concat( + [control_point_flows, boundary_point_flows], 1) + + return merged_control_point_locations, merged_control_point_flows + + +def sparse_image_warp(image, + source_control_point_locations, + dest_control_point_locations, + interpolation_order=2, + regularization_weight=0.0, + num_boundary_points=0, + name='sparse_image_warp'): + """Image warping using correspondences between sparse control points. + + Apply a non-linear warp to the image, where the warp is specified by + the source and destination locations of a (potentially small) number of + control points. First, we use a polyharmonic spline + (@{tf.contrib.image.interpolate_spline}) to interpolate the displacements + between the corresponding control points to a dense flow field. + Then, we warp the image using this dense flow field + (@{tf.contrib.image.dense_image_warp}). + + Let t index our control points. For regularization_weight=0, we have: + warped_image[b, dest_control_point_locations[b, t, 0], + dest_control_point_locations[b, t, 1], :] = + image[b, source_control_point_locations[b, t, 0], + source_control_point_locations[b, t, 1], :]. + + For regularization_weight > 0, this condition is met approximately, since + regularized interpolation trades off smoothness of the interpolant vs. + reconstruction of the interpolant at the control points. + See @{tf.contrib.image.interpolate_spline} for further documentation of the + interpolation_order and regularization_weight arguments. + + + Args: + image: `[batch, height, width, channels]` float `Tensor` + source_control_point_locations: `[batch, num_control_points, 2]` float + `Tensor` + dest_control_point_locations: `[batch, num_control_points, 2]` float + `Tensor` + interpolation_order: polynomial order used by the spline interpolation + regularization_weight: weight on smoothness regularizer in interpolation + num_boundary_points: How many zero-flow boundary points to include at + each image edge.Usage: + num_boundary_points=0: don't add zero-flow points + num_boundary_points=1: 4 corners of the image + num_boundary_points=2: 4 corners and one in the middle of each edge + (8 points total) + num_boundary_points=n: 4 corners and n-1 along each edge + name: A name for the operation (optional). + + Note that image and offsets can be of type tf.half, tf.float32, or + tf.float64, and do not necessarily have to be the same type. + + Returns: + warped_image: `[batch, height, width, channels]` float `Tensor` with same + type as input image. + flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense + flow field produced by the interpolation. + """ + + image = ops.convert_to_tensor(image) + source_control_point_locations = ops.convert_to_tensor( + source_control_point_locations) + dest_control_point_locations = ops.convert_to_tensor( + dest_control_point_locations) + + control_point_flows = ( + dest_control_point_locations - source_control_point_locations) + + clamp_boundaries = num_boundary_points > 0 + boundary_points_per_edge = num_boundary_points - 1 + + with ops.name_scope(name): + + batch_size, image_height, image_width, _ = image.get_shape().as_list() + + # This generates the dense locations where the interpolant + # will be evaluated. + grid_locations = _get_grid_locations(image_height, image_width) + + flattened_grid_locations = np.reshape(grid_locations, + [image_height * image_width, 2]) + + flattened_grid_locations = constant_op.constant( + _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) + + if clamp_boundaries: + (dest_control_point_locations, + control_point_flows) = _add_zero_flow_controls_at_boundary( + dest_control_point_locations, control_point_flows, image_height, + image_width, boundary_points_per_edge) + + flattened_flows = interpolate_spline.interpolate_spline( + dest_control_point_locations, control_point_flows, + flattened_grid_locations, interpolation_order, regularization_weight) + + dense_flows = array_ops.reshape(flattened_flows, + [batch_size, image_height, image_width, 2]) + + warped_image = dense_image_warp.dense_image_warp(image, dense_flows) + + return warped_image, dense_flows diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index b66c45ec13..e2518f6cbf 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -75,6 +75,7 @@ BLACKLIST = [ "//tensorflow/contrib/timeseries/examples:data/period_trend.csv", # pylint:disable=line-too-long "//tensorflow/contrib/timeseries/python/timeseries:test_utils", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils", # pylint:disable=line-too-long + "//tensorflow/contrib/image:sparse_image_warp_test_data", ] -- GitLab From b7cbb2c9b155f3528edd8d26f7595dde8de578a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 07:29:45 -0700 Subject: [PATCH 169/960] Adds missing protobuf dep to tf.contrib.data ops. PiperOrigin-RevId: 189580464 --- tensorflow/contrib/data/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 0458199ff7..5ba2297e7f 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -29,7 +29,10 @@ py_library( tf_custom_op_library( name = "_dataset_ops.so", srcs = ["ops/dataset_ops.cc"], - deps = ["//tensorflow/contrib/data/kernels:dataset_kernels"], + deps = [ + "//tensorflow/contrib/data/kernels:dataset_kernels", + "//tensorflow/core:lib_proto_parsing", + ], ) tf_gen_op_libs( -- GitLab From c07b18684c3b20dd91911a31bbd6169ad9cc1617 Mon Sep 17 00:00:00 2001 From: Alan Lee Date: Mon, 19 Mar 2018 23:24:53 +0800 Subject: [PATCH 170/960] Fix set_difference doc --- tensorflow/python/ops/sets_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/sets_impl.py b/tensorflow/python/ops/sets_impl.py index b0eecd8a1e..21e08d03d2 100644 --- a/tensorflow/python/ops/sets_impl.py +++ b/tensorflow/python/ops/sets_impl.py @@ -247,7 +247,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True): # # collections.OrderedDict([ # ((0, 0, 0), 2), - # ((0, 0, 1), 3), + # ((0, 1, 0), 3), # ]) ``` -- GitLab From 31dc58502a4a2a594424fc76a3b4a2a508f28200 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Mon, 19 Mar 2018 17:38:38 +0200 Subject: [PATCH 171/960] Fix typos in `resampling.py`. - Correct `initial_dist` -> `target_dist` - `variabes` -> `variables` --- tensorflow/contrib/data/python/ops/resampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 56f526a330..f4015f19fb 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -54,7 +54,7 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" dist_estimation_batch_size = 32 - target_dist_t = ops.convert_to_tensor(target_dist, name="initial_dist") + target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") class_values_ds = dataset.map(class_func) if initial_dist is not None: initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") @@ -151,7 +151,7 @@ def _calculate_acceptance_probs(initial_probs, target_probs): ``` - A solution for a_i in terms of the other variabes is the following: + A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` """ # Add tiny to initial_probs to avoid divide by zero. -- GitLab From 98f522d3e982daafa9ccf136894cc83f496f5a11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 09:28:58 -0700 Subject: [PATCH 172/960] Remove a few unused #includes PiperOrigin-RevId: 189593522 --- tensorflow/compiler/xla/array.h | 1 + tensorflow/compiler/xla/tests/test_macros.cc | 1 + tensorflow/contrib/tensor_forest/kernels/data_spec.h | 1 + tensorflow/core/BUILD | 1 + tensorflow/core/lib/bfloat16/bfloat16.h | 4 ++-- tensorflow/core/lib/io/path.cc | 4 ++-- tensorflow/core/lib/io/path.h | 1 - tensorflow/core/lib/strings/str_util.cc | 2 ++ tensorflow/core/lib/strings/str_util.h | 1 - tensorflow/core/platform/env.cc | 1 - tensorflow/core/platform/file_system.cc | 3 --- tensorflow/core/platform/file_system.h | 1 - tensorflow/core/platform/types.h | 4 ---- 13 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 24b58bec11..ea75ad32d5 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/core/bits.h" +#include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/compiler/xla/tests/test_macros.cc b/tensorflow/compiler/xla/tests/test_macros.cc index 978a669bca..be35ec6c6e 100644 --- a/tensorflow/compiler/xla/tests/test_macros.cc +++ b/tensorflow/compiler/xla/tests/test_macros.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/regexp.h" namespace xla { diff --git a/tensorflow/contrib/tensor_forest/kernels/data_spec.h b/tensorflow/contrib/tensor_forest/kernels/data_spec.h index 0a3abe56df..bb33400214 100644 --- a/tensorflow/contrib/tensor_forest/kernels/data_spec.h +++ b/tensorflow/contrib/tensor_forest/kernels/data_spec.h @@ -21,6 +21,7 @@ #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace tensorforest { diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 14769c3770..df44857185 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -594,6 +594,7 @@ cc_library( "platform/prefetch.h", "platform/thread_annotations.h", "platform/types.h", + "platform/cpu_info.h", ] + if_windows(["platform/windows/integral_types.h"]), visibility = ["//visibility:public"], deps = diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index 6a1cc0994f..075a8d1430 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -19,8 +19,8 @@ limitations under the License. #include #include -// We need types.h here in order to pick up __BYTE_ORDER__ from cpu_info.h -#include "tensorflow/core/platform/types.h" +// We need cpu_info.h here in order to pick up __BYTE_ORDER__. +#include "tensorflow/core/platform/cpu_info.h" #ifdef __CUDACC__ // All functions callable from CUDA code must be qualified with __device__ diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc index 83f15e134d..996fbf62e5 100644 --- a/tensorflow/core/lib/io/path.cc +++ b/tensorflow/core/lib/io/path.cc @@ -27,9 +27,9 @@ limitations under the License. #include #include "tensorflow/core/lib/strings/scanner.h" -#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" namespace tensorflow { namespace io { diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h index 47bb2b998d..818ba99888 100644 --- a/tensorflow/core/lib/io/path.h +++ b/tensorflow/core/lib/io/path.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LIB_IO_PATH_H_ #define TENSORFLOW_LIB_IO_PATH_H_ -#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" namespace tensorflow { diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc index 9dbb74f6b8..2c9e98357a 100644 --- a/tensorflow/core/lib/strings/str_util.cc +++ b/tensorflow/core/lib/strings/str_util.cc @@ -16,9 +16,11 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include +#include #include #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace str_util { diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h index f062eddef8..065871c1b4 100644 --- a/tensorflow/core/lib/strings/str_util.h +++ b/tensorflow/core/lib/strings/str_util.h @@ -20,7 +20,6 @@ limitations under the License. #include #include #include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc index 12509c250e..b9a9ef85eb 100644 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -33,7 +33,6 @@ limitations under the License. #endif #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/stringprintf.h" diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index 271d73f5f1..5bc8606e28 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -19,15 +19,12 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/gtl/map_util.h" -#include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/file_system.h" #include "tensorflow/core/platform/platform.h" -#include "tensorflow/core/platform/protobuf.h" namespace tensorflow { diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 3085b6958f..03c0c5ab51 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/core/platform/file_statistics.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/platform.h" -#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #ifdef PLATFORM_WINDOWS diff --git a/tensorflow/core/platform/types.h b/tensorflow/core/platform/types.h index 38d75dbb32..6308e58847 100644 --- a/tensorflow/core/platform/types.h +++ b/tensorflow/core/platform/types.h @@ -31,10 +31,6 @@ limitations under the License. #error Define the appropriate PLATFORM_ macro for this platform #endif -#if defined(PLATFORM_WINDOWS) -#include "tensorflow/core/platform/windows/cpu_info.h" -#endif - namespace tensorflow { // Define tensorflow::string to refer to appropriate platform specific type. -- GitLab From 8d172e1a89feb06f906de43d75f0d5e65a2e1a04 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 09:42:14 -0700 Subject: [PATCH 173/960] Refactor code to improve TensorDataSet construction speed. PiperOrigin-RevId: 189595482 --- tensorflow/contrib/tensor_forest/kernels/v4/input_data.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h b/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h index c544a8c75e..b991e6339f 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h @@ -44,12 +44,15 @@ class TensorDataSet { int column_count = 0; for (int i = 0; i < input_spec_.dense_size(); ++i) { for (int j = 0; j < input_spec_.dense(i).size(); ++j) { - decision_trees::FeatureId id; - id.mutable_id()->set_value(strings::StrCat(column_count)); - available_features_.push_back(id); ++column_count; } } + available_features_.reserve(column_count); + decision_trees::FeatureId id; + for (int i = 0; i < column_count; i++) { + id.mutable_id()->set_value(strings::StrCat(i)); + available_features_.emplace_back(id); + } // Set up the random number generator. if (split_sampling_random_seed_ == 0) { -- GitLab From 9f3a01ee045ea19baebf6e07a2a966564bfa2f3f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 09:43:59 -0700 Subject: [PATCH 174/960] Simple rewrite to remove negation nodes. PiperOrigin-RevId: 189595735 --- tensorflow/core/grappler/op_types.cc | 2 + tensorflow/core/grappler/op_types.h | 1 + .../optimizers/arithmetic_optimizer.cc | 75 ++++++++++++++---- .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/arithmetic_optimizer_test.cc | 77 +++++++++++++++++++ 5 files changed, 143 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 9c9600db5e..259168bb33 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -212,6 +212,8 @@ bool IsMod(const NodeDef& node) { return node.op() == "Mod"; } bool IsMul(const NodeDef& node) { return node.op() == "Mul"; } +bool IsNeg(const NodeDef& node) { return node.op() == "Neg"; } + bool IsNoOp(const NodeDef& node) { return node.op() == "NoOp"; } bool IsNotEqual(const NodeDef& node) { return node.op() == "NotEqual"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 41ba8bb01e..49e01f68e3 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -89,6 +89,7 @@ bool IsNextIteration(const NodeDef& node); bool IsPack(const NodeDef& node); bool IsPad(const NodeDef& node); bool IsPack(const NodeDef& node); +bool IsNeg(const NodeDef& node); bool IsNoOp(const NodeDef& node); bool IsNotEqual(const NodeDef& node); bool IsPlaceholder(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 3a67c4b056..c25836ceef 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -344,8 +344,7 @@ class ArithmeticOptimizerStage { // will be automatically added to the optimization queue. If a simplified node // has the same name as original node it has to be explicitly added to the // optimization queue for second pass. - virtual Status TrySimplify(const NodeDef* node, - string* simplified_node_name) = 0; + virtual Status TrySimplify(NodeDef* node, string* simplified_node_name) = 0; protected: struct ScopedNodeName { @@ -557,8 +556,7 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { HasAllInputsOfSymbolicallyEqualShape(*node, properties); } - Status TrySimplify(const NodeDef* node, - string* simplified_node_name) override { + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { CHECK(IsSupported(node)); AddOpsGroup group; TF_RETURN_IF_ERROR(CreateAddOpsGroup(node, &group)); @@ -794,8 +792,7 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { !IsRewritten(node); } - Status TrySimplify(const NodeDef* node, - string* simplified_node_name) override { + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { CHECK(IsSupported(node)); std::set common_factors; @@ -945,8 +942,7 @@ class RemoveIdentityTranspose : public ArithmeticOptimizerStage { // TODO(rmlarsen): Forward control dependencies on the bypassed // transpose nodes. - Status TrySimplify(const NodeDef* node, - string* simplified_node_name) override { + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { CHECK(IsSupported(node)); NodeDef* input; @@ -1028,8 +1024,7 @@ class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { return IsBitcast(*node); } - Status TrySimplify(const NodeDef* node, - string* simplified_node_name) override { + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { CHECK(IsSupported(node)); // Bypass Bitcast whose source type and destination type are equal. @@ -1066,8 +1061,7 @@ class RemoveRedundantCastStage : public ArithmeticOptimizerStage { bool IsSupported(const NodeDef* node) const override { return IsCast(*node); } - Status TrySimplify(const NodeDef* node, - string* simplified_node_name) override { + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { CHECK(IsSupported(node)); // Bypass Cast whose source type and destination type are equal. if (GetSourceDataType(*node) == GetDestinationDataType(*node)) { @@ -1077,6 +1071,57 @@ class RemoveRedundantCastStage : public ArithmeticOptimizerStage { } }; +class RemoveNegationStage : public ArithmeticOptimizerStage { + public: + explicit RemoveNegationStage(const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("RemoveNegation", ctx) {} + ~RemoveNegationStage() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsAdd(*node) || IsSub(*node); + } + + Status TrySimplify(NodeDef* node, string* simplified_node_name) override { + const string node_name = node->name(); + NodeDef* x; + NodeDef* y; + TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &x)); + TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &y)); + bool updated = false; + if (IsAdd(*node)) { + if (IsNeg(*x)) { + // (-a) + b = b - a + node->set_op("Sub"); + node->mutable_input()->SwapElements(0, 1); + node->set_input(1, x->input(0)); + node->add_input(AsControlDependency(x->name())); + ctx_.node_map->AddOutput(NodeName(x->input(0)), node_name); + updated = true; + } else if (IsNeg(*y)) { + // a + (-b) = a - b + node->set_op("Sub"); + node->set_input(1, y->input(0)); + node->add_input(AsControlDependency(y->name())); + ctx_.node_map->AddOutput(NodeName(y->input(0)), node_name); + updated = true; + } + } else if (IsSub(*node)) { + if (IsNeg(*y)) { + // a - (-b) = a + b + node->set_op("Add"); + node->set_input(1, y->input(0)); + node->add_input(AsControlDependency(y->name())); + ctx_.node_map->AddOutput(NodeName(y->input(0)), node_name); + updated = true; + } + } + if (updated) { + AddToOptimizationQueue(node); + } + return Status::OK(); + } +}; + } // namespace class UniqueNodes { @@ -1696,12 +1741,16 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { stages.push_back(std::unique_ptr( new RemoveRedundantCastStage(ctx))); } + if (options_.remove_negation) { + stages.push_back(std::unique_ptr( + new RemoveNegationStage(ctx))); + } VLOG(1) << "Simplify arithmetic ops using " << stages.size() << " arithmetic optimization stages"; while (!nodes_to_simplify.Empty()) { - const NodeDef* node = nodes_to_simplify.PopBack(); + NodeDef* node = nodes_to_simplify.PopBack(); // TODO(ezhulenev): move all rewrites into separate stages string simplified_tensor = ""; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 95c1e14258..965f0e9ea2 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -63,6 +63,7 @@ class ArithmeticOptimizer : public GraphOptimizer { bool remove_identity_transpose = true; bool remove_redundant_bitcast = true; bool remove_redundant_cast = true; + bool remove_negation = true; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 6f7a95c2ed..3876486d80 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -126,6 +126,11 @@ class ArithmeticOptimizerTest : public GrapplerTest { DisableAllStages(optimizer); optimizer->options_.remove_redundant_cast = true; } + + void EnableOnlyRemoveNegation(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_negation = true; + } }; TEST_F(ArithmeticOptimizerTest, NoOp) { @@ -1498,5 +1503,77 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewrite_AddOpsOfSymbolicallyEqualShape) { EXPECT_EQ(collapsed_add->name(), updated_outputs->input(0)); } +TEST_F(ArithmeticOptimizerTest, RemoveNegation) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x = ops::Variable(s.WithOpName("x"), {2, 2}, DT_FLOAT); + auto y = ops::Variable(s.WithOpName("y"), {2, 2}, DT_FLOAT); + Output neg_x = ops::Neg(s.WithOpName("Neg_x"), x); + Output neg_y = ops::Neg(s.WithOpName("Neg_y"), y); + Output add_x_y = ops::Add(s.WithOpName("Add_x_y"), x, y); + Output add_negx_y = ops::Add(s.WithOpName("Add_negx_y"), neg_x, y); + Output add_x_negy = ops::Add(s.WithOpName("Add_x_negy"), x, neg_y); + Output add_negx_negy = ops::Add(s.WithOpName("Add_negx_negy"), neg_x, neg_y); + Output sub_x_y = ops::Sub(s.WithOpName("Sub_x_y"), x, y); + Output sub_negx_y = ops::Sub(s.WithOpName("Sub_negx_y"), neg_x, y); + Output sub_x_negy = ops::Sub(s.WithOpName("Sub_x_negy"), x, neg_y); + Output sub_negx_negy = ops::Sub(s.WithOpName("Sub_negx_negy"), neg_x, neg_y); + auto add_all = ops::AddN(s.WithOpName("add_all"), + {add_x_y, add_negx_y, add_x_negy, add_negx_negy, + sub_x_y, sub_negx_y, sub_x_negy, sub_negx_negy}); + + GrapplerItem item; + item.fetch = {"add_all"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableOnlyRemoveNegation(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); + + EXPECT_EQ(item.graph.node_size(), output.node_size()); + int found = 0; + for (int i = 0; i < output.node_size(); ++i) { + const NodeDef& node = output.node(i); + if (node.name() == "Add_negx_y") { + ++found; + EXPECT_EQ("Sub", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("^Neg_x", node.input(2)); + } else if (node.name() == "Add_x_negy") { + ++found; + EXPECT_EQ("Sub", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^Neg_y", node.input(2)); + } else if (node.name() == "Add_negx_negy") { + ++found; + EXPECT_EQ("Sub", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("Neg_y", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("^Neg_x", node.input(2)); + } else if (node.name() == "Sub_x_negy") { + ++found; + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^Neg_y", node.input(2)); + } else if (node.name() == "Sub_negx_negy") { + ++found; + EXPECT_EQ("Sub", node.op()); + EXPECT_EQ(4, node.input_size()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("^Neg_y", node.input(2)); + EXPECT_EQ("^Neg_x", node.input(3)); + } + } + EXPECT_EQ(5, found); +} + } // namespace grappler } // namespace tensorflow -- GitLab From f90616bc78ddfd1a9fb37ae30ac8851a9a275800 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 09:50:00 -0700 Subject: [PATCH 175/960] Fix misc typos in tensorflow/compiler/xla. PiperOrigin-RevId: 189596520 --- tensorflow/compiler/xla/service/algebraic_simplifier.h | 2 +- tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 2 +- .../compiler/xla/service/cpu/parallel_task_assignment.cc | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index 43315f5cdc..f0590943be 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -23,7 +23,7 @@ limitations under the License. namespace xla { -// A pass which performs AlgebraicSimplications. +// A pass which performs algebraic simplifications. class AlgebraicSimplifier : public HloPassInterface { public: // Given shapes 'from_shape' and 'to_shape', determines if it is valid to diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 0a966fd5a7..e43777c5e5 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -318,7 +318,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { // Note this is not run for AOT because it would bring in thread pool // and thread synchronization dependencies which would likely increase // binary size (and most AOT applications are single-threaded). - // TODO(29630486) Support multi-threaded AOT. + // TODO(b/29630486) Support multi-threaded AOT. pipeline.AddPass(max_parallelism, ShapeSizeBytesFunction()); } diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc index 38f1668159..86e8be8461 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment.cc @@ -71,7 +71,7 @@ class DefaultCostModel : public ParallelCostModel { if (flops_to_bytes_ratio <= 1.0) { // Limit max parallelism for I/O bound instructions by assuming a // sub-linear scaling function (fit based on empirical benchmark results). - // TODO(29630486) Develop system bandwidth model. + // TODO(b/29630486) Develop system bandwidth model. max_parallelism = std::ceil(std::sqrt(tensorflow::port::NumSchedulableCPUs())); // Use shape size instruction cost and L2 cache size min per-thread cost. @@ -81,7 +81,7 @@ class DefaultCostModel : public ParallelCostModel { // Use max parallelism for compute bound instructions. max_parallelism = max_parallelism_; // Calculate the instruction cost in cycles. - // TODO(29630486) Improve on this linear cost model. + // TODO(b/29630486) Improve on this linear cost model. // Consider making 'min_cost_per_thread' be a function of the target // bandwidth limit for instructions with low arithmetic complexity. instruction_cost = -- GitLab From 8dc7a69b3bfc04872fde56fda595a7614ac643fe Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 20 Mar 2018 00:57:43 +0800 Subject: [PATCH 176/960] Fix the comments of tf.contrib.lookup.MutableHashTable insert operation --- tensorflow/contrib/lookup/lookup_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index 62f1c810fc..cc77cd5431 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -298,7 +298,7 @@ class MutableHashTable(LookupInterface): table = tf.contrib.lookup.MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=-1) - table.insert(keys, values) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) ``` @@ -494,7 +494,7 @@ class MutableDenseHashTable(LookupInterface): value_dtype=tf.int64, default_value=-1, empty_key=0) - table.insert(keys, values) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) ``` -- GitLab From b1cb65ab5218c13eb9d0f55b7f169cd676e032f3 Mon Sep 17 00:00:00 2001 From: Fanjin Zeng Date: Mon, 19 Mar 2018 10:29:45 -0700 Subject: [PATCH 177/960] Fix related doc clarification request on tf.contrib.lookup.MutableHashTable insert operation #17835 Make the doc example executable, and explicitly suggests that MutableDenseHashTable.insert is an operation rather than in-place computation. --- tensorflow/contrib/lookup/lookup_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index 62f1c810fc..c7a61fcac3 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -494,7 +494,7 @@ class MutableDenseHashTable(LookupInterface): value_dtype=tf.int64, default_value=-1, empty_key=0) - table.insert(keys, values) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) ``` -- GitLab From 6f2f21894fae9384fe52ad77ec751c3c42276aa3 Mon Sep 17 00:00:00 2001 From: Tarang Chugh Date: Mon, 19 Mar 2018 18:30:05 +0100 Subject: [PATCH 178/960] Update README.md (#16301) Correct MobilenetV1 variable --- tensorflow/contrib/lite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 5194f015b5..2680d515eb 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -165,7 +165,7 @@ bazel-bin/tensorflow/python/tools/freeze_graph\ --input_graph=/tmp/mobilenet_v1_224.pb \ --input_checkpoint=/tmp/checkpoints/mobilenet-10202.ckpt \ --input_binary=true --output_graph=/tmp/frozen_mobilenet_v1_224.pb \ - --output_node_names=MobileNet/Predictions/Reshape_1 + --output_node_names=MobilenetV1/Predictions/Reshape_1 ``` The user has to first build the freeze_graph script using bazel and then run the script. The input_binary flag has to be enabled to ensure that the protobuf is read and written in binary format. The user has to input the .pb and the .ckpt files to freeze the graph The output_node_names may not be obvious outside of the code that built the model. The easiest way to find them is to visualize the graph, either with -- GitLab From 7f9ab7f8c5e161562656604d9b22939b1f97c791 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 19 Mar 2018 10:37:00 -0700 Subject: [PATCH 179/960] Documentation tweaks and tests for GradientTape with graph execution. PiperOrigin-RevId: 189604536 --- tensorflow/python/eager/backprop.py | 76 ++++++++----------- tensorflow/python/eager/backprop_test.py | 25 +++--- tensorflow/python/framework/ops.py | 35 ++++++--- .../python/ops/resource_variable_ops.py | 32 ++++---- tensorflow/python/ops/variables.py | 4 +- 5 files changed, 88 insertions(+), 84 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 88de1a951f..9b997fed30 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -638,63 +638,53 @@ _default_vspace = imperative_grad.VSpace( class GradientTape(object): - """Records operations to use to compute gradients. + """Record operations for automatic differentiation. - Operations are recorded if: - - they happen in code marked by this context manager - - at least one of their inputs is being watched + Operations are recorded if they are executed within this context manager and + at least one of their inputs is being "watched". - Outputs of recorded operations are watched. Variables are automatically - watched and tensors can be manually watched by calling the watch method on the - context manager. + Variables (created by @{tf.contrib.eager.Variable} or @{tf.get_variable}) + are automatically watched. Tensors can be manually watched by invoking the + `watch` + method on this context manager. - Example usage: + For example, consider the function `y = x * x`. The gradient at `x = 3.0` can + be computed as: ```python + x = tf.constant(3.) with tfe.GradientTape() as g: - x = tf.constant(3.0) g.watch(x) y = x * x - grad = g.gradient(y, [x])[0] - assert grad.numpy() == 6.0 + grad = g.gradient(y, [x])[0] # Will compute to 6.0 ``` - It is possible to use GradientTapes to compute higher-order derivatives as - follows: + GradientTapes can be nested to compute higher-order derivatives. For example, ```python + x = tf.constant(3.0) with tfe.GradientTape() as g: - x = tf.constant(3.0) - g.watch(x) - y = x * x with tfe.GradientTape() as gg: - gg.watch(y) - z = 2 * y - inner_grad = gg.gradient(z, [y])[0] - assert inner_grad.numpy() == 2 - y = y + inner_grad - grad = g.gradient(y, [x])[0] - assert grad.numpy() == 6.0 + gg.watch(x) + y = x * x + dy_dx = gg.gradient(y, [x])[0] # Will compute to 6.0 + d2y_dx2 = g.gradient(dy_dx, [x])[0] # Will compute to 2.0 ``` By default, the resources held by a GradientTape are released as soon as - GradientTape.gradient() method is called. However, if one need to compute - multiple gradients over the same computation, she can create a persistent - GradientTape. Persistent tapes allow multiple calls to the gradient() method - and release resources when the tape object is destructed. - - Example usage: + GradientTape.gradient() method is called. To compute multiple gradients over + the same computation, create a persistent gradient tape. This allows multiple + calls to the gradient() method as resources are released when the tape object + is garbage collected. For example: ```python + x = tf.constant(3.0) with tfe.GradientTape(persistent=True) as g: - x = tf.constant(3.0) g.watch(x) y = x * x z = y * y - dz_dx = g.gradient(z, [x])[0] - assert dz_dx.numpy() == 108.0 # 4*x^3 at x = 3 - dy_dx = g.gradient(y, [x])[0] - assert dy_dx.numpy() == 6.0 + dy_dx = g.gradient(z, [x])[0] # 6.0 + dz_dx = g.gradient(y, [x])[0] # 108.0 (4*x^3 at x = 3) del g # Drop the reference to the tape """ @@ -703,8 +693,8 @@ class GradientTape(object): Args: persistent: Boolean controlling whether a persistent gradient tape - is created. Must be True or False. - + is created. False by default, which means at most one call can + be made to the gradient() method on this object. """ self._tape = None self._persistent = persistent @@ -720,7 +710,7 @@ class GradientTape(object): """Ensures that `tensor` is being traced by this tape. Args: - tensor: a Tensor or Variable a list of Tensors or Variables. + tensor: a Tensor or list of Tensors. """ for t in nest.flatten(tensor): if isinstance(t, resource_variable_ops.ResourceVariable): @@ -735,14 +725,14 @@ class GradientTape(object): key=lambda v: v.handle._id)) # pylint: disable=protected-access def gradient(self, target, sources, output_gradients=None): - """Computes the gradient using information traced by the tape. + """Computes the gradient using operations recorded in context of this tape. Args: - target: the tensor to be differentiated. - sources: a list of Tensors or Variables, the target will be - differentiated with respect to the sources. + target: Tensor to be differentiated. + sources: a list of Tensors or Variables. `target` will be differentiated + against elements in `sources`. output_gradients: a list of gradients, one for each element of - target. Defaults to None. + target. Defaults to None. Returns: a list of Tensors (or IndexedSlices, or None), one for each element in @@ -750,7 +740,7 @@ class GradientTape(object): Raises: RuntimeError: if called inside the context of the tape, or if called more - than once. + than once on a non-persistent tape. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once " diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 5934293dfc..bca2928708 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -195,8 +195,10 @@ class BackpropTest(test.TestCase): g, = backprop.gradients_function(loss, [0])(logits, labels) self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) + @test_util.run_in_graph_and_eager_modes() def testGradientWithinTapeBlock(self): v1 = resource_variable_ops.ResourceVariable(1.) + self.evaluate(v1.initializer) with backprop.GradientTape() as t: loss = 2 * v1 with self.assertRaises(RuntimeError): @@ -204,7 +206,7 @@ class BackpropTest(test.TestCase): with backprop.GradientTape(persistent=True) as t: loss = 2 * v1 grad = t.gradient(loss, [v1]) - self.assertAllEqual(grad[0], 2.0) + self.assertAllEqual(self.evaluate(grad[0]), 2.0) @test_util.assert_no_new_tensors def testSecondGrad(self): @@ -367,6 +369,7 @@ class BackpropTest(test.TestCase): self.assertEqual(backprop.implicit_grad(f)()[0][0], None) @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testGradientTape(self): with backprop.GradientTape() as g: x = constant_op.constant(3.0) @@ -376,10 +379,10 @@ class BackpropTest(test.TestCase): gg.watch(y) z = 2 * y inner_grad = gg.gradient(z, [y])[0] - self.assertEqual(inner_grad.numpy(), 2.0) + self.assertEqual(self.evaluate(inner_grad), 2.0) y += inner_grad grad = g.gradient(y, [x])[0] - self.assertEqual(grad.numpy(), 6.0) + self.assertEqual(self.evaluate(grad), 6.0) @test_util.assert_no_new_tensors def testGradientTapeGradientCalledMultipleTimes(self): @@ -394,6 +397,7 @@ class BackpropTest(test.TestCase): g.gradient(y, [x]) @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testPersistentTape(self): with backprop.GradientTape(persistent=True) as g: x = constant_op.constant(3.0) @@ -401,12 +405,13 @@ class BackpropTest(test.TestCase): y = x * x z = y * y dz_dx = g.gradient(z, [x])[0] - self.assertEqual(dz_dx.numpy(), 4*3*3*3) + self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3) dy_dx = g.gradient(y, [x])[0] - self.assertEqual(dy_dx.numpy(), 2*3) + self.assertEqual(self.evaluate(dy_dx), 2 * 3) del g @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testPersistentNestedTape(self): with backprop.GradientTape(persistent=True) as g: x = constant_op.constant(3.0) @@ -417,22 +422,24 @@ class BackpropTest(test.TestCase): z = 2 * y for _ in range(2): inner_grad = gg.gradient(z, [y])[0] - self.assertEqual(inner_grad.numpy(), 2.0) + self.assertEqual(self.evaluate(inner_grad), 2.0) y += inner_grad del gg grad = g.gradient(y, [x])[0] - self.assertEqual(grad.numpy(), 6.0) + self.assertEqual(self.evaluate(grad), 6.0) grad = g.gradient(z, [x])[0] - self.assertEqual(grad.numpy(), 12.0) + self.assertEqual(self.evaluate(grad), 12.0) del g @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() def testGradientTapeVariable(self): v = resource_variable_ops.ResourceVariable(1.0, name='v') + self.evaluate(v.initializer) with backprop.GradientTape() as g: y = v * v grad = g.gradient(y, [v])[0] - self.assertAllEqual(grad, 2.0) + self.assertAllEqual(self.evaluate(grad), 2.0) @test_util.assert_no_new_tensors def testEmptyParamsForValueAndGradFunction(self): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 01a0e03be2..f1cd341d66 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -838,41 +838,51 @@ class _EagerTensorBase(Tensor): def set_shape(self, shape): if not self.shape.is_compatible_with(shape): raise ValueError( - "EagerTensor's shape %s is not compatible with supplied shape %s" % + "Tensor's shape %s is not compatible with supplied shape %s" % (self.shape, shape)) # Methods not supported / implemented for Eager Tensors. @property def op(self): - raise AttributeError("op not supported for Eager Tensors.") + raise AttributeError( + "Tensor.op is meaningless when eager execution is enabled.") @property def graph(self): - raise AttributeError("graph not supported for Eager Tensors.") + raise AttributeError( + "Tensor.graph is meaningless when eager execution is enabled.") @property def name(self): - raise AttributeError("name not supported for Eager Tensors.") + raise AttributeError( + "Tensor.name is meaningless when eager execution is enabled.") @property def value_index(self): - raise AttributeError("value_index not supported for Eager Tensors.") + raise AttributeError( + "Tensor.value_index is meaningless when eager execution is enabled.") def consumers(self): - raise NotImplementedError("consumers not supported for Eager Tensors.") + raise NotImplementedError( + "Tensor.consumers is meaningless when eager execution is enabled.") def _add_consumer(self, consumer): - raise NotImplementedError("_add_consumer not supported for Eager Tensors.") + raise NotImplementedError( + "_add_consumer not supported when eager execution is enabled.") def _as_node_def_input(self): raise NotImplementedError( - "_as_node_def_input not supported for Eager Tensors.") + "_as_node_def_input not supported when eager execution is enabled.") def _as_tf_output(self): - raise NotImplementedError("_as_tf_output not supported for Eager Tensors.") + raise NotImplementedError( + "_as_tf_output not supported when eager execution is enabled.") def eval(self, feed_dict=None, session=None): - raise NotImplementedError("eval not supported for Eager Tensors.") + raise NotImplementedError( + "eval is not supported when eager execution is enabled, " + "is .numpy() what you're looking for?" + ) # This call creates an EagerTensor class, as a subclass of _EagerTensorBase, and @@ -5937,8 +5947,9 @@ def get_from_proto_function(collection_name): def _assert_collection_is_ok(collection_name): if context.executing_eagerly(): if collection_name in GraphKeys._VARIABLE_COLLECTIONS: # pylint: disable=protected-access - raise ValueError("When Eager Execution is enabled, variable " - "collections are not supported.") + raise ValueError( + "variable collections are not supported when eager execution is enabled." + ) def _operation_conversion_error(op, dtype=None, name=None, as_ref=False): diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index affa7ae629..df873da98e 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -149,7 +149,7 @@ def shape_safe_assign_variable_handle(handle, shape, value, name=None): class ResourceVariable(variables.Variable): """Variable based on resource handles. - See the @{$python/state_ops$`Variables`} documentation for more details. + See the @{$variables$Variables How To} for a high level overview. A `ResourceVariable` allows you to maintain state across subsequent calls to session.run. @@ -179,24 +179,20 @@ class ResourceVariable(variables.Variable): by edges in the graph. Consider the following example, in which two writes can cause tf.Variable and tf.ResourceVariable to behave differently: - ```python - a = tf.ResourceVariable(1.0) - a.initializer.run() - - assign = a.assign(2.0) - with tf.control_dependencies([assign]): - b = a.read_value() - with tf.control_dependencies([b]): - other_assign = a.assign(3.0) - with tf.control_dependencies([other_assign]): - # Will print 2.0 because the value was read before other_assign ran. If - # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed. - tf.Print(b, [b]).eval() + ```python + a = tf.ResourceVariable(1.0) + a.initializer.run() + + assign = a.assign(2.0) + with tf.control_dependencies([assign]): + b = a.read_value() + with tf.control_dependencies([b]): + other_assign = a.assign(3.0) + with tf.control_dependencies([other_assign]): + # Will print 2.0 because the value was read before other_assign ran. If + # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed. + tf.Print(b, [b]).eval() ``` - - To enforce these consistency properties tf.ResourceVariable might make more - copies than an equivalent tf.Variable under the hood, so tf.Variable is still - not deprecated. """ def __init__(self, diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 5b9947f441..c37cdd9e27 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -125,8 +125,8 @@ class Variable(checkpointable.CheckpointableBase): @compatibility(eager) `tf.Variable` is not compatible with eager execution. Use - `tfe.Variable` instead which is compatible with both eager execution - and graph construction. See [the TensorFlow Eager Execution + `tf.contrib.eager.Variable` instead which is compatible with both eager + execution and graph construction. See [the TensorFlow Eager Execution guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/g3doc/guide.md#variables-and-optimizers) for details on how variables work in eager execution. @end_compatibility -- GitLab From 20edf09163f2757b5c26e4de4a28dc87efa065c0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 11:11:28 -0700 Subject: [PATCH 180/960] Optimizations to DepthwiseConv PiperOrigin-RevId: 189610985 --- .../internal/optimized/depthwiseconv_uint8.h | 3 +- .../depthwiseconv_uint8_3x3_filter.h | 75 ++++++++++++++++--- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 08674a6c59..c71b070680 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1697,7 +1697,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, // Call kernel optimized for depthwise convolutions using 3x3 filters, // stride = 1, no padding, depth_multiplier = 1 and depth a multiple of 16. if (filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && - stride_width == 1 && stride_height == 1 && pad_width == 0 && + (stride_width == 1 || stride_width == 2) && + (stride_height == 1 || stride_height == 2) && pad_width == 0 && pad_height == 0 && (input_depth % 16) == 0) { DepthwiseConv3by3FilterDepth16( input_data, input_dims, input_offset, filter_data, filter_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index e0335b2c74..9dc76e7608 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -466,8 +466,8 @@ inline void DepthwiseConv3by3FilterDepth16( TFLITE_DCHECK(filter_width == 3); TFLITE_DCHECK(pad_height == 0); TFLITE_DCHECK(pad_width == 0); - TFLITE_DCHECK(stride_width == 1); - TFLITE_DCHECK(stride_height == 1); + TFLITE_DCHECK(stride_width == 1 || stride_width == 2); + TFLITE_DCHECK(stride_height == 1 || stride_height == 2); // The number of outputs to process in the main loop. const int num_x_outputs = 1; @@ -513,6 +513,16 @@ inline void DepthwiseConv3by3FilterDepth16( } } + using dot_product_func_t = + decltype(&ConvKernel3x3FilterDepth16<1, 2, 1>::Run); + dot_product_func_t dot_product_func = nullptr; + + if (stride_width == 1 && stride_height == 1) { + dot_product_func = ConvKernel3x3FilterDepth16<1, 2, 1>::Run; + } else { + dot_product_func = ConvKernel3x3FilterDepth16<1, 2, 2>::Run; + } + // Offsets for preloading inputs. const int i0 = 0; const int i1 = input_depth; @@ -526,6 +536,9 @@ inline void DepthwiseConv3by3FilterDepth16( const int i9 = 3 * input_row_width; const int i10 = 3 * input_row_width + input_depth; const int i11 = 3 * input_row_width + 2 * input_depth; + const int i12 = 4 * input_row_width; + const int i13 = 4 * input_row_width + input_depth; + const int i14 = 4 * input_row_width + 2 * input_depth; for (int b = 0; b < batches; ++b) { const int32* bias_ptr = bias_data; @@ -551,10 +564,6 @@ inline void DepthwiseConv3by3FilterDepth16( const uint8* input_ptr = input_data + depth + in_x_offset + in_y_offset + in_batch_offset; - uint8* output_ptr = output_data + depth + (out_x * output_depth) + - (output_depth * output_width * out_y) + - out_batch_offset; - // Preload inputs. If input depth is large, preload every value of the // input for this depth range. Otherwise, preload only the first values // of each row. @@ -571,19 +580,33 @@ inline void DepthwiseConv3by3FilterDepth16( preload_l1_keep(input_ptr + i9); preload_l1_keep(input_ptr + i10); preload_l1_keep(input_ptr + i11); + + if (stride_height == 2) { + preload_l1_keep(input_ptr + i12); + preload_l1_keep(input_ptr + i13); + preload_l1_keep(input_ptr + i14); + } } else { preload_l1_keep(input_ptr + i0); preload_l1_keep(input_ptr + i3); preload_l1_keep(input_ptr + i6); preload_l1_keep(input_ptr + i9); + + if (stride_height == 2) { + preload_l1_keep(input_ptr + i12); + } } + uint8* output_ptr = output_data + depth + (out_x * output_depth) + + (output_depth * output_width * out_y) + + out_batch_offset; + for (; out_x < out_x_end; out_x += num_x_outputs) { - ConvKernel3x3FilterDepth16<1, 2, 1>::Run( - filter, input_ptr, input_depth, input_offset, input_row_width, - bias_ptr, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_ptr, - output_depth, output_width); + dot_product_func(filter, input_ptr, input_depth, input_offset, + input_row_width, bias_ptr, output_offset, + output_multiplier, output_shift, + output_activation_min, output_activation_max, + output_ptr, output_depth, output_width); input_ptr += input_ptr_x_increment * num_x_outputs; output_ptr += output_depth * num_x_outputs; @@ -603,6 +626,8 @@ inline void DepthwiseConv3by3FilterDepth16( preload_l1_keep(input_ptr + i8); preload_l1_keep(input_ptr + i10); preload_l1_keep(input_ptr + i11); + preload_l1_keep(input_ptr + i13); + preload_l1_keep(input_ptr + i14); } } @@ -624,6 +649,21 @@ inline void DepthwiseConv3by3FilterDepth16( const uint8* input_ptr = input_data + depth + in_x_offset + in_y_offset + in_batch_offset; + if (input_depth >= 32) { + preload_l1_keep(input_ptr + i0); + preload_l1_keep(input_ptr + i1); + preload_l1_keep(input_ptr + i2); + preload_l1_keep(input_ptr + i3); + preload_l1_keep(input_ptr + i4); + preload_l1_keep(input_ptr + i5); + preload_l1_keep(input_ptr + i6); + preload_l1_keep(input_ptr + i7); + } else { + preload_l1_keep(input_ptr + i0); + preload_l1_keep(input_ptr + i3); + preload_l1_keep(input_ptr + i6); + } + uint8* output_ptr = output_data + depth + (out_x * output_depth) + (output_depth * output_width * out_y) + out_batch_offset; @@ -637,6 +677,19 @@ inline void DepthwiseConv3by3FilterDepth16( input_ptr += input_ptr_x_increment; output_ptr += output_depth; + + if (stride_width == 1) { + preload_l1_keep(input_ptr + i2); + preload_l1_keep(input_ptr + i5); + preload_l1_keep(input_ptr + i8); + } else if (stride_width == 2) { + preload_l1_keep(input_ptr + i1); + preload_l1_keep(input_ptr + i2); + preload_l1_keep(input_ptr + i4); + preload_l1_keep(input_ptr + i5); + preload_l1_keep(input_ptr + i7); + preload_l1_keep(input_ptr + i8); + } } } filter_ptr += 16; -- GitLab From cb7a530e6f9648377d92b32db6347d5f0777cbb3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 11:27:45 -0700 Subject: [PATCH 181/960] Internal change PiperOrigin-RevId: 189613870 --- tensorflow/contrib/lite/schema/BUILD | 8 +++----- tensorflow/contrib/session_bundle/BUILD | 4 +--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index a758c5e7e1..da65ec659c 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -1,8 +1,6 @@ -package( - default_visibility = [ - "//visibility:public", - ], -) +package(default_visibility = [ + "//visibility:public", +]) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 67011c8fef..75a753ed89 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -1,9 +1,7 @@ # Description: # TensorFlow Serving session bundle. -package( - default_visibility = ["//visibility:public"], -) +package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 -- GitLab From af35a55a5db07160901ea244c7619e1db5a13e1d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 11:29:44 -0700 Subject: [PATCH 182/960] Do not use SparseMatmul to for bfloat16 as Matmul is already supported. PiperOrigin-RevId: 189614197 --- tensorflow/python/ops/math_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e18d0e9501..c893bf9b90 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2093,8 +2093,9 @@ def matmul(a, sparse_matmul_types = [dtypes.bfloat16, dtypes.float32] use_sparse_matmul = ( a.dtype in sparse_matmul_types and b.dtype in sparse_matmul_types) - if a.dtype == dtypes.bfloat16 or b.dtype == dtypes.bfloat16: - # matmul currently doesn't handle bfloat16 inputs. + if (a.dtype == dtypes.bfloat16 or b.dtype == dtypes.bfloat16 and + a.dtype != b.dtype): + # matmul currently doesn't handle mixed-precision inputs. use_sparse_matmul = True if use_sparse_matmul: ret = sparse_matmul( -- GitLab From 57d117db96ef84e4fe12b74c9115421767db4531 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 12:15:59 -0700 Subject: [PATCH 183/960] Add a map from TPU core id to name to TfOpStats. PiperOrigin-RevId: 189620850 --- tensorflow/contrib/tpu/profiler/tf_op_stats.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index e5c798aa2f..20ed7419fd 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -223,4 +223,6 @@ message TfOpStats { optional RunEnvironmentResult run_environment = 7; // The result for the host operations. optional HostOpsResult host_ops = 8; + // A map from core ID to name. + map core_id_to_name_map = 9; } -- GitLab From 12ead8d98e2ff05998b8b502eb0a584ddeb275f4 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 19 Mar 2018 13:01:58 -0700 Subject: [PATCH 184/960] Checkpointable: Small cleanup making better use of NewCheckpointReader. PiperOrigin-RevId: 189627956 --- tensorflow/contrib/eager/python/BUILD | 5 ++- .../eager/python/checkpointable_utils.py | 32 +++---------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 32aa2c0a4a..4fba014d6f 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -233,12 +233,15 @@ py_library( "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", + "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", - "//tensorflow/python:io_ops", + "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:session", "//tensorflow/python:tensor_shape", "//tensorflow/python:training", + "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python/eager:context", ], diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index 0a34f3b3f6..adbb92e43b 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -32,7 +32,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import io_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.training import checkpointable as core_checkpointable @@ -577,7 +576,6 @@ class CheckpointableSaver(object): self._last_save_saver = None # Op caching for restore - self._object_graph_restore_tensor = None self._last_restore_object_graph = None self._last_restore_checkpoint = None @@ -660,7 +658,7 @@ class CheckpointableSaver(object): attribute_proto.checkpoint_key] return saver_names - def restore(self, save_path, session=None): + def restore(self, save_path): """Restore a training checkpoint. Restores `root_checkpointable` and any objects that it tracks @@ -670,8 +668,7 @@ class CheckpointableSaver(object): constructor after this call will be matched if they have a corresponding object in the checkpoint. - When building a graph, restorations are added to the graph but not run. A - session is required to retrieve checkpoint metadata. + When building a graph, restorations are added to the graph but not run. To disallow deferred loading, assert immediately that all checkpointed variables have been matched to variable objects: @@ -709,9 +706,6 @@ class CheckpointableSaver(object): object which may run initializers for objects in the dependency graph. If the checkpoint was written by the name-based `tf.train.Saver`, names are used to match variables. - session: The session to retrieve metadata with. Ignored when executing - eagerly. If not provided when graph building, the default session is - used. Returns: A load status object, which can be used to make assertions about the @@ -726,32 +720,15 @@ class CheckpointableSaver(object): return InitializationOnlyStatus(self._root_checkpointable) in_graph_mode = not context.executing_eagerly() if in_graph_mode: - if session is None: - session = ops.get_default_session() file_prefix_tensor = self._file_prefix_placeholder file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} else: - session = None with ops.device("/cpu:0"): file_prefix_tensor = constant_op.constant(save_path) file_prefix_feed_dict = None + reader = pywrap_tensorflow.NewCheckpointReader(save_path) try: - if not in_graph_mode or self._object_graph_restore_tensor is None: - with ops.device("/cpu:0"): - object_graph_string, = io_ops.restore_v2( - prefix=file_prefix_tensor, - tensor_names=[_OBJECT_GRAPH_PROTO_KEY], - shape_and_slices=[""], - dtypes=[dtypes.string], - name="object_graph_proto_read") - if in_graph_mode: - self._object_graph_restore_tensor = object_graph_string - if in_graph_mode: - object_graph_string = session.run( - self._object_graph_restore_tensor, - feed_dict=file_prefix_feed_dict) - else: - object_graph_string = object_graph_string.numpy() + object_graph_string = reader.get_tensor(_OBJECT_GRAPH_PROTO_KEY) except errors_impl.NotFoundError: # The object graph proto does not exist in this checkpoint. Try again with # name-based saving. @@ -766,7 +743,6 @@ class CheckpointableSaver(object): if in_graph_mode: dtype_map = None else: - reader = pywrap_tensorflow.NewCheckpointReader(save_path) dtype_map = reader.get_variable_to_dtype_map() checkpoint = core_checkpointable_utils._Checkpoint( # pylint: disable=protected-access object_graph_proto=object_graph_proto, -- GitLab From 774095829ec262f2e1cb4e73938410e0248bc57c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 13:03:22 -0700 Subject: [PATCH 185/960] Extract GraphOptimizer{Stage,Context}, and use it as a base class in ArithmeticOptimizer. PiperOrigin-RevId: 189628227 --- tensorflow/core/grappler/optimizers/BUILD | 32 +++ .../optimizers/arithmetic_optimizer.cc | 267 ++++-------------- .../optimizers/graph_optimizer_stage.cc | 120 ++++++++ .../optimizers/graph_optimizer_stage.h | 185 ++++++++++++ .../optimizers/graph_optimizer_stage_test.cc | 168 +++++++++++ 5 files changed, 553 insertions(+), 219 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc create mode 100644 tensorflow/core/grappler/optimizers/graph_optimizer_stage.h create mode 100644 tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 3499879dee..96ea8f7a83 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -202,6 +202,37 @@ cc_library( ], ) +cc_library( + name = "graph_optimizer_stage", + srcs = ["graph_optimizer_stage.cc"], + hdrs = ["graph_optimizer_stage.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/utils:frame", + ], +) + +tf_cc_test( + name = "graph_optimizer_stage_test", + size = "small", + srcs = ["graph_optimizer_stage_test.cc"], + deps = [ + ":graph_optimizer_stage", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/utils:grappler_test", + ], +) + cc_library( name = "custom_graph_optimizer", hdrs = [ @@ -224,6 +255,7 @@ cc_library( deps = [ ":constant_folding", ":graph_optimizer", + ":graph_optimizer_stage", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index c25836ceef..942724a6ce 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/graph_optimizer_stage.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/lib/core/errors.h" @@ -288,170 +289,29 @@ NodeDef* GetTailOfValuePreservingChain( is_value_preserving_non_branching); } -// Context passed to each arithmetic optimizer stage. Optimizer stage is -// responsible for updating the node map for all added or deleted nodes, to keep -// it consistent with optimized graph. +// Graph optimizer context extension specific to ArithmeticOptimizer struct ArithmeticOptimizerContext { - ArithmeticOptimizerContext( - const std::unordered_set* nodes_to_preserve, - GraphDef* optimized_graph, GraphProperties* graph_properties, - NodeMap* node_map, FrameMap* frame_map, - SetVector* nodes_to_simplify) - : nodes_to_preserve(nodes_to_preserve), - optimized_graph(optimized_graph), - graph_properties(graph_properties), - node_map(node_map), - frame_map(frame_map), - nodes_to_simplify(nodes_to_simplify) {} - - const std::unordered_set* nodes_to_preserve; - GraphDef* optimized_graph; - GraphProperties* graph_properties; - NodeMap* node_map; - FrameMap* frame_map; + explicit ArithmeticOptimizerContext(SetVector* nodes_to_simplify) + : nodes_to_simplify(nodes_to_simplify) {} SetVector* nodes_to_simplify; }; // Base class for single arithmetic optimization: e.g. Bitcast optimization, // AddOps optimization, etc... -// TODO(ezhulenev): extract this class to be reused by other multi-stage -// graph optimizers (const_folding, dependency_optimizer, etc...) -class ArithmeticOptimizerStage { +class ArithmeticOptimizerStage : public GraphOptimizerStage { public: explicit ArithmeticOptimizerStage(const string& name, - const ArithmeticOptimizerContext& ctx) - : name_(name), ctx_(ctx) {} + const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext ctx_ext) + : GraphOptimizerStage("ArithmeticOptimizer", name, ctx), + ctx_ext_(ctx_ext) {} virtual ~ArithmeticOptimizerStage() = default; - // Check if we should try to simplify node. Returning true doesn't - // guarantee that node will be simplified. - // - // Should implement just a basic sanity check, without any expensive graph - // traversals. - virtual bool IsSupported(const NodeDef* node) const = 0; - - // Try to simplify the given node. If successfully simplified a given node, - // return a name of a new simplified version using output parameter. - // - // Consumers of an old node's outputs will be automatically re-wired to - // consume outputs of a new simplified node. - // - // Return error status only if some precondition is failed, or got an - // incorrect graph. In every other case return Status:OK(), even if didn't - // simplify anything. - // - // A simplified node will be always considered for further optimization and - // will be automatically added to the optimization queue. If a simplified node - // has the same name as original node it has to be explicitly added to the - // optimization queue for second pass. - virtual Status TrySimplify(NodeDef* node, string* simplified_node_name) = 0; - - protected: - struct ScopedNodeName { - string scope; - string name; - }; - - const ScopedNodeName ParseScopedNodeName(const string& name) const { - auto pos = name.find_last_of("/"); - if (pos == string::npos) { - return {"", name}; - } else { - return {name.substr(0, pos), name.substr(pos + 1)}; - } - } - - // Prefix optimized node name with stage name and rewrite_rule - const string OptimizedNodeName(const string& rewrite_rule, - const ScopedNodeName& scoped_node_name) const { - return MakeOptimizedNodeName(strings::StrCat(name_, "_", rewrite_rule), - scoped_node_name); - } - - // Prefix optimized node name with stage name and rewrite_rule - const string OptimizedNodeName(const string& rewrite_rule, - const ScopedNodeName& scoped_node_name, - const std::vector& node_names) const { - return MakeOptimizedNodeName(strings::StrCat(name_, "_", rewrite_rule), - scoped_node_name, node_names); - } - - // Prefix optimized node name with stage name - const string OptimizedNodeName(const ScopedNodeName& scoped_node_name) const { - return MakeOptimizedNodeName(name_, scoped_node_name); - } - - // Prefix optimized node name with stage name - const string OptimizedNodeName(const ScopedNodeName& scoped_node_name, - const std::vector& node_names) const { - return MakeOptimizedNodeName(name_, scoped_node_name, node_names); - } - // Simplification graph rewrite can create additional nodes that are inputs // to final simplified node, they can be also added to the arithmetic // optimizer queue for further optimization. void AddToOptimizationQueue(NodeDef* node) { - ctx_.nodes_to_simplify->PushBack(node); - } - - // Get a node by input name from a node map. Return an error if node was not - // found. - Status GetInputNode(const string& input, NodeDef** node) const { - string node_name = NodeName(input); - NodeDef* node_by_name = ctx_.node_map->GetNode(node_name); - if (node_by_name == nullptr) { - return errors::FailedPrecondition("Node ", node_name, - " doesn't exists in a node map"); - } - *node = node_by_name; - return Status::OK(); - } - - // Lookup tensor properties by name. Tensor name might have non-zero port - // number. Return an error if tensor node doesn't exists in a graph, or it - // doesn't have properties defined for requested port. - Status GetTensorProperties(const string& tensor, - OpInfo::TensorProperties* properties) const { - int port; - string tensor_node_name = ParseNodeName(tensor, &port); - if (port < 0) { - return errors::InvalidArgument( - "Can't get tensor properties of control dependency ", tensor); - } - - const auto& output_properties = - ctx_.graph_properties->GetOutputProperties(tensor_node_name); - auto num_outputs = output_properties.size(); - - if (num_outputs == 0 || port > num_outputs - 1) { - return errors::InvalidArgument( - "Node ", tensor_node_name, - " is missing output properties at position :", port, - " (num_outputs=", num_outputs, ")"); - } - - properties->CopyFrom(output_properties[port]); - return Status::OK(); - } - - NodeDef* AddCopyNode(const string& name, const NodeDef* node_to_copy) { - CHECK(node_to_copy != nullptr); - CHECK(!ctx_.node_map->NodeExists(name)) - << "Node " << name << " already exists in a graph"; - NodeDef* new_node = ctx_.optimized_graph->add_node(); - *new_node = *node_to_copy; - new_node->set_name(name); - ctx_.node_map->AddNode(name, new_node); - return new_node; - } - - NodeDef* AddEmptyNode(const string& name) { - CHECK(!ctx_.node_map->NodeExists(name)) - << "Node " << name << " already exists in a graph"; - NodeDef* new_node = ctx_.optimized_graph->add_node(); - new_node->set_name(name); - ctx_.node_map->AddNode(name, new_node); - return new_node; + ctx_ext_.nodes_to_simplify->PushBack(node); } // TODO(ezhulenev): remove this method from ArithmeticOptimizer when all @@ -476,49 +336,9 @@ class ArithmeticOptimizerStage { } } - const string name_; - const ArithmeticOptimizerContext ctx_; - private: - // Get a name for a new node obtained by optimizing a single node of the - // original graph. The optimized node is placed under the original node scope. - // - // Node name uniqueness is guaranteed by unique name of an original node in - // a same scope. - // - // Example: MakeOptimizedNodeName("AwesomeRewrite", "a/b/c/Add_1") - // Optimized name: "a/b/c/ArithmeticOptimizer/AwesomeRewrite_Add_1" - const string MakeOptimizedNodeName( - const string& prefix, const ScopedNodeName& scoped_node_name) const { - string node_name; - strings::StrAppend(&node_name, scoped_node_name.scope); - if (!node_name.empty()) strings::StrAppend(&node_name, "/"); - strings::StrAppend(&node_name, kArithmeticOptimizer, "/", prefix, "_", - scoped_node_name.name); - return node_name; - } - - // Get a name for a new node obtained by optimizing multiple nodes of the - // original graph, starting from "root". The optimized node is placed under - // the original scope of a "root" node. - // - // Node name uniqueness is guaranteed by unique name of a "root" node in - // a same scope. - // - // Example: - // MakeOptimizedNodeName("AwesomeRewrite", "a/b/Add_AB", ["x/y/Add_XY"]) - // Optimized name: - // "a/b/ArithmeticOptimizer/AwesomeRewrite_Add_AB_Add_XY" - const string MakeOptimizedNodeName( - const string& prefix, const ScopedNodeName& scoped_node_name, - const std::vector& node_names) const { - string node_name = MakeOptimizedNodeName(prefix, scoped_node_name); - for (const string& optimized : node_names) { - auto scoped_node = ParseScopedNodeName(optimized); - strings::StrAppend(&node_name, "_", scoped_node.name); - } - return node_name; - } + // extened context required for ArithmeticOptimizer + const ArithmeticOptimizerContext ctx_ext_; }; // Rewrite a tree of Add/AddN with a single AddN operation, consuming all the @@ -537,8 +357,10 @@ class ArithmeticOptimizerStage { // q e class AddOpsRewriteStage : public ArithmeticOptimizerStage { public: - explicit AddOpsRewriteStage(const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("AddOpsRewrite", ctx), rewritten_nodes_() {} + explicit AddOpsRewriteStage(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("AddOpsRewrite", ctx, ctx_ext), + rewritten_nodes_() {} ~AddOpsRewriteStage() override = default; @@ -718,7 +540,7 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { string AddOpsGroupName(const AddOpsGroup& group) const { CHECK_NOTNULL(group.root_node); - auto root = ParseScopedNodeName(group.root_node->name()); + auto root = ParseNodeScopeAndName(group.root_node->name()); std::vector absorbed_node_names(group.absorbed_nodes.size()); std::transform(group.absorbed_nodes.begin(), group.absorbed_nodes.end(), @@ -783,8 +605,9 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { public: explicit HoistCommonFactorOutOfAggregation( - const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("HoistCommonFactor", ctx) {} + const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("HoistCommonFactor", ctx, ctx_ext) {} ~HoistCommonFactorOutOfAggregation() override = default; bool IsSupported(const NodeDef* node) const override { @@ -845,14 +668,14 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { private: // Get a name for new outer Mul node string OuterMulNodeName(const NodeDef* node) const { - auto scoped_node = ParseScopedNodeName(node->name()); - return OptimizedNodeName("Mul", scoped_node); + auto scope_and_name = ParseNodeScopeAndName(node->name()); + return OptimizedNodeName(scope_and_name, "Mul"); } // Get a name new inner Add node string InnerAddNodeName(const NodeDef* node) const { - auto scoped_node = ParseScopedNodeName(node->name()); - return OptimizedNodeName("Add", scoped_node); + auto scope_and_name = ParseNodeScopeAndName(node->name()); + return OptimizedNodeName(scope_and_name, "Add"); } // Determine the set of common factors if the input nodes are all Mul nodes. @@ -932,8 +755,9 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { // Removes inverse transpose nodes class RemoveIdentityTranspose : public ArithmeticOptimizerStage { public: - explicit RemoveIdentityTranspose(const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("RemoveIdentityTranspose", ctx) {} + explicit RemoveIdentityTranspose(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("RemoveIdentityTranspose", ctx, ctx_ext) {} ~RemoveIdentityTranspose() override = default; bool IsSupported(const NodeDef* node) const override { @@ -1016,8 +840,10 @@ class RemoveIdentityTranspose : public ArithmeticOptimizerStage { // 2) Rewrite Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { public: - explicit RemoveRedundantBitcastStage(const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("RemoveRedundantBitcast", ctx) {} + explicit RemoveRedundantBitcastStage( + const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("RemoveRedundantBitcast", ctx, ctx_ext) {} ~RemoveRedundantBitcastStage() override = default; bool IsSupported(const NodeDef* node) const override { @@ -1055,8 +881,9 @@ class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { // Remove Casts whose source type and destination type are equal. class RemoveRedundantCastStage : public ArithmeticOptimizerStage { public: - explicit RemoveRedundantCastStage(const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("RemoveRedundantCast", ctx) {} + explicit RemoveRedundantCastStage(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("RemoveRedundantCast", ctx, ctx_ext) {} ~RemoveRedundantCastStage() override = default; bool IsSupported(const NodeDef* node) const override { return IsCast(*node); } @@ -1073,8 +900,9 @@ class RemoveRedundantCastStage : public ArithmeticOptimizerStage { class RemoveNegationStage : public ArithmeticOptimizerStage { public: - explicit RemoveNegationStage(const ArithmeticOptimizerContext& ctx) - : ArithmeticOptimizerStage("RemoveNegation", ctx) {} + explicit RemoveNegationStage(const GraphOptimizerContext& ctx, + const ArithmeticOptimizerContext& ctx_ext) + : ArithmeticOptimizerStage("RemoveNegation", ctx, ctx_ext) {} ~RemoveNegationStage() override = default; bool IsSupported(const NodeDef* node) const override { @@ -1715,35 +1543,36 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { nodes_to_simplify.PushBack(optimized_graph_->mutable_node(i)); } - const ArithmeticOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, - graph_properties_.get(), node_map_.get(), - &frame_map_, &nodes_to_simplify); + const GraphOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, + graph_properties_.get(), node_map_.get(), + &frame_map_); + const ArithmeticOptimizerContext ctx_ext(&nodes_to_simplify); std::vector> stages; if (options_.combine_add_to_addn) { - stages.push_back( - std::unique_ptr(new AddOpsRewriteStage(ctx))); + stages.push_back(std::unique_ptr( + new AddOpsRewriteStage(ctx, ctx_ext))); } if (options_.hoist_common_factor_out_of_aggregation) { stages.push_back(std::unique_ptr( - new HoistCommonFactorOutOfAggregation(ctx))); + new HoistCommonFactorOutOfAggregation(ctx, ctx_ext))); } if (options_.remove_identity_transpose) { stages.push_back(std::unique_ptr( - new RemoveIdentityTranspose(ctx))); + new RemoveIdentityTranspose(ctx, ctx_ext))); } if (options_.remove_redundant_bitcast) { stages.push_back(std::unique_ptr( - new RemoveRedundantBitcastStage(ctx))); + new RemoveRedundantBitcastStage(ctx, ctx_ext))); } if (options_.remove_redundant_cast) { stages.push_back(std::unique_ptr( - new RemoveRedundantCastStage(ctx))); + new RemoveRedundantCastStage(ctx, ctx_ext))); } if (options_.remove_negation) { stages.push_back(std::unique_ptr( - new RemoveNegationStage(ctx))); + new RemoveNegationStage(ctx, ctx_ext))); } VLOG(1) << "Simplify arithmetic ops using " << stages.size() diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc new file mode 100644 index 0000000000..7044705ade --- /dev/null +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc @@ -0,0 +1,120 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/graph_optimizer_stage.h" + +namespace tensorflow { +namespace grappler { + +const NodeScopeAndName ParseNodeScopeAndName(const string& node_name) { + auto pos = node_name.find_last_of("/"); + if (pos == string::npos) { + return {"", node_name}; + } else { + return {node_name.substr(0, pos), node_name.substr(pos + 1)}; + } +}; + +Status GetInputNode(const GraphOptimizerContext& ctx, const string& input, + NodeDef** node) { + string node_name = NodeName(input); + NodeDef* node_by_name = ctx.node_map->GetNode(node_name); + if (node_by_name == nullptr) { + return errors::FailedPrecondition("Node ", node_name, + " doesn't exists in a node map"); + } + *node = node_by_name; + return Status::OK(); +} + +Status GetTensorProperties(const GraphOptimizerContext& ctx, + const string& tensor, + OpInfo::TensorProperties* properties) { + int port; + string tensor_node_name = ParseNodeName(tensor, &port); + if (port < 0) { + return errors::InvalidArgument( + "Can't get tensor properties of control dependency ", tensor); + } + + const auto& output_properties = + ctx.graph_properties->GetOutputProperties(tensor_node_name); + auto num_outputs = output_properties.size(); + + if (num_outputs == 0 || port > num_outputs - 1) { + return errors::InvalidArgument( + "Node ", tensor_node_name, + " is missing output properties at position :", port, + " (num_outputs=", num_outputs, ")"); + } + + properties->CopyFrom(output_properties[port]); + return Status::OK(); +} + +NodeDef* AddCopyNode(const GraphOptimizerContext& ctx, const string& name, + const NodeDef* node_to_copy) { + CHECK(node_to_copy != nullptr); + CHECK(!ctx.node_map->NodeExists(name)) + << "Node " << name << " already exists in a graph"; + NodeDef* new_node = ctx.optimized_graph->add_node(); + *new_node = *node_to_copy; + new_node->set_name(name); + ctx.node_map->AddNode(name, new_node); + return new_node; +} + +NodeDef* AddEmptyNode(const GraphOptimizerContext& ctx, const string& name) { + CHECK(!ctx.node_map->NodeExists(name)) + << "Node " << name << " already exists in a graph"; + NodeDef* new_node = ctx.optimized_graph->add_node(); + new_node->set_name(name); + ctx.node_map->AddNode(name, new_node); + return new_node; +} + +const string MakeOptimizedNodeName(const NodeScopeAndName& node, + const string& sub_scope, + const string& prefix) { + CHECK(!sub_scope.empty() || !prefix.empty()) + << "Either optimized node name prefix or sub-scope must be non-empty"; + string optimized_node_name; + if (!node.scope.empty()) { + strings::StrAppend(&optimized_node_name, node.scope, "/"); + } + if (!sub_scope.empty()) { + strings::StrAppend(&optimized_node_name, sub_scope, "/"); + } + if (!prefix.empty()) { + strings::StrAppend(&optimized_node_name, prefix, "_"); + } + strings::StrAppend(&optimized_node_name, node.name); + return optimized_node_name; +} + +const string MakeOptimizedNodeName(const NodeScopeAndName& root, + const std::vector node_names, + const string& sub_scope, + const string& prefix) { + string optimized_node_name = MakeOptimizedNodeName(root, sub_scope, prefix); + for (const string& node_name : node_names) { + auto name_and_scope = ParseNodeScopeAndName(node_name); + strings::StrAppend(&optimized_node_name, "_", name_and_scope.name); + } + return optimized_node_name; +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h new file mode 100644 index 0000000000..be95c00d2d --- /dev/null +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h @@ -0,0 +1,185 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_OPTIMIZERS_OPTIMIZER_STAGE_H_ +#define TENSORFLOW_GRAPPLER_OPTIMIZERS_OPTIMIZER_STAGE_H_ + +#include +#include +#include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/frame.h" + +namespace tensorflow { +namespace grappler { + +struct NodeScopeAndName { + string scope; + string name; +}; + +// Parse scope and name: "a/b/c/Add_1" -> {"a/b/c", "Add_1"} +const NodeScopeAndName ParseNodeScopeAndName(const string& node_name); + +// Context owned by GraphOptimizer, and passed to every stage at construction +// time. Each optimizer stage is responsible for updating it according to the +// changes it made to the graph. +// +// If an optimizer needs access to some helper class that is not present in this +// context, consider creating an extension context, specific to that +// optimizer (see example of ArithmeticOptimizerContext). GraphOptimizerContext +// should only have members that are useful to almost all optimizers. +struct GraphOptimizerContext { + GraphOptimizerContext(const std::unordered_set* nodes_to_preserve, + GraphDef* optimized_graph, + GraphProperties* graph_properties, NodeMap* node_map, + FrameMap* frame_map) + : nodes_to_preserve(nodes_to_preserve), + optimized_graph(optimized_graph), + graph_properties(graph_properties), + node_map(node_map), + frame_map(frame_map) {} + + const std::unordered_set* nodes_to_preserve; + GraphDef* optimized_graph; + GraphProperties* graph_properties; + NodeMap* node_map; + // TODO(ezhulenev): it seems that frame_map is only relevant for loop + // optimizer? Move it to loop-optimizer specific context extension. + FrameMap* frame_map; +}; + +Status GetInputNode(const GraphOptimizerContext& ctx, const string& input, + NodeDef** node); +Status GetTensorProperties(const GraphOptimizerContext& ctx, + const string& tensor, + OpInfo::TensorProperties* properties); + +NodeDef* AddCopyNode(const GraphOptimizerContext& ctx, const string& name, + const NodeDef* node_to_copy); +NodeDef* AddEmptyNode(const GraphOptimizerContext& ctx, const string& name); + +// WARNING: +// Optimizer stage must try to re-use original nodes of a graph and +// make all updates in place. This helps to make robust node placement +// decisions. Create new nodes only if there is a reason for that. + +// Make a name for a new node obtained by optimizing a single node of the +// original graph. The optimized node is placed under the original node scope. +// +// Node name uniqueness is guaranteed by unique name of an original node in +// a same scope. +// +// Empty sub_scope or prefix ignored. At least one of them must be non-empty. +// +// Example: a/b/c/Add -> a/b/c/${sub_scope}/${prefix}_Add. +const string MakeOptimizedNodeName(const NodeScopeAndName& node, + const string& sub_scope, + const string& prefix); +// Make a name for a new node obtained by optimizing multiple nodes of the +// original graph, starting from "root". The optimized node is placed under +// the original scope of a "root" node. +// +// Example: [a/b/c/Add, x/y/z/Mul] -> a/b/c/${sub_scope}/${prefix}_Add_Mul +const string MakeOptimizedNodeName(const NodeScopeAndName& root, + const std::vector node_names, + const string& sub_scope, + const string& prefix); + +// Base class for multi-stage GraphOptimizers (ArithmeticOptimizer, etc...). +// +// If a graph optimizer consists of large number of small independent +// rewrites, each of them should be implemented as a separate stage. +// +// * Result: +// Each graph optimizer choose what result is reported by each stage +// (e.g. each stage can fill in the name of optimized nodes, or have more +// complex result). +template +class GraphOptimizerStage { + public: + explicit GraphOptimizerStage(const string& optimizer_name, + const string& stage_name, + const GraphOptimizerContext& ctx) + : optimizer_name_(optimizer_name), stage_name_(stage_name), ctx_(ctx) {} + virtual ~GraphOptimizerStage() = default; + + // Check if we should try to simplify node. Returning true doesn't + // guarantee that node will be simplified. + // + // Should implement just a basic sanity check, without any expensive graph + // traversals. + virtual bool IsSupported(const NodeDef* node) const = 0; + + // Try to simplify the given node. + // + // Return error status only if some precondition is failed, or got an + // incorrect graph. In every other case return Status:OK(), even if didn't + // simplify anything. + // + // Report result using output argument. Each GraphOptimizer can choose it's + // own Result type. + // TODO(ezhulenev): if it will appear that Result output parameter is not + // sufficiently useful (used with a reason by most optimizers), get rid of it, + // and remove template parameter. + virtual Status TrySimplify(NodeDef* node, Result* result) = 0; + + // Get a name for a new node, created by this stage, based on one or multiple + // nodes of an original graph. + const string OptimizedNodeName(const NodeScopeAndName& node) const { + return MakeOptimizedNodeName(node, optimizer_name_, stage_name_); + } + const string OptimizedNodeName(const NodeScopeAndName& root, + const std::vector& nodes) const { + return MakeOptimizedNodeName(root, nodes, optimizer_name_, stage_name_); + } + const string OptimizedNodeName(const NodeScopeAndName& node, + const string& rewrite_rule) const { + const string prefix = strings::StrCat(stage_name_, "_", rewrite_rule); + return MakeOptimizedNodeName(node, optimizer_name_, prefix); + } + + // Get a node by input name from a node map. Return an error if node was not + // found. + Status GetInputNode(const string& input, NodeDef** node) const { + return ::tensorflow::grappler::GetInputNode(ctx_, input, node); + } + // Lookup tensor properties by name. Tensor name might have non-zero port + // number. Return an error if tensor node doesn't exists in a graph, or it + // doesn't have properties defined for requested port. + Status GetTensorProperties(const string& tensor, + OpInfo::TensorProperties* properties) const { + return ::tensorflow::grappler::GetTensorProperties(ctx_, tensor, + properties); + } + + NodeDef* AddCopyNode(const string& name, const NodeDef* node_to_copy) { + return ::tensorflow::grappler::AddCopyNode(ctx_, name, node_to_copy); + } + NodeDef* AddEmptyNode(const string& name) { + return ::tensorflow::grappler::AddEmptyNode(ctx_, name); + } + + protected: // Data members + const string optimizer_name_; + const string stage_name_; + const GraphOptimizerContext ctx_; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_OPTIMIZERS_OPTIMIZER_STAGE_H_ diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc new file mode 100644 index 0000000000..416327e622 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc @@ -0,0 +1,168 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/graph_optimizer_stage.h" + +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class GraphOptimizerStageTest : public ::testing::Test {}; + +struct FakeResult {}; + +// NoOp optimizer stage that supports all the node types and does nothing +class FakeOptimizerStage : public GraphOptimizerStage { + public: + explicit FakeOptimizerStage(const string& optimizer_name, + const string& stage_name, + const GraphOptimizerContext& ctx) + : GraphOptimizerStage(optimizer_name, stage_name, ctx) {} + ~FakeOptimizerStage() override = default; + + bool IsSupported(const NodeDef* node) const override { return true; } + Status TrySimplify(NodeDef* node, FakeResult* result) override { + return Status::OK(); + } +}; + +TEST_F(GraphOptimizerStageTest, ParseNodeNameAndScope_InRoot) { + const auto scope_and_name = ParseNodeScopeAndName("Add"); + EXPECT_EQ("", scope_and_name.scope); + EXPECT_EQ("Add", scope_and_name.name); +} + +TEST_F(GraphOptimizerStageTest, ParseNodeNameAndScope_InScope) { + const auto scope_and_name = ParseNodeScopeAndName("a/b/c/Add"); + EXPECT_EQ("a/b/c", scope_and_name.scope); + EXPECT_EQ("Add", scope_and_name.name); +} + +TEST_F(GraphOptimizerStageTest, OptimizedNodeName) { + GraphOptimizerContext ctx(/*nodes_to_preserve*/ nullptr, + /*optimized_graph*/ nullptr, + /*graph_properties*/ nullptr, /*node_name*/ nullptr, + /*frame_map*/ nullptr); + FakeOptimizerStage stage("my_opt", "my_stg", ctx); + + const auto node = ParseNodeScopeAndName("a/b/c/Add"); + + // Without rewrite rule + EXPECT_EQ("a/b/c/my_opt/my_stg_Add", stage.OptimizedNodeName(node)); + EXPECT_EQ( + "a/b/c/my_opt/my_stg_Add_Mul_Sqrt", + stage.OptimizedNodeName(node, std::vector({"Mul", "Sqrt"}))); + + // With rewrite rule + const string rewrite = "my_rewrite"; + EXPECT_EQ("a/b/c/my_opt/my_stg_my_rewrite_Add", + stage.OptimizedNodeName(node, rewrite)); +} + +TEST_F(GraphOptimizerStageTest, GetInputNodeAndProperties) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto add = ops::Add(s.WithOpName("Add"), a, b); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphProperties properties(item); + TF_CHECK_OK(properties.InferStatically(/*assume_valid_feeds*/ false)); + + NodeMap node_map(&item.graph); + + GraphOptimizerContext ctx(/*nodes_to_preserve*/ nullptr, + /*optimized_graph*/ &item.graph, + /*graph_properties*/ &properties, + /*node_name*/ &node_map, + /*frame_map*/ nullptr); + FakeOptimizerStage stage("my_opt", "my_stg", ctx); + + NodeDef* add_node; + TF_CHECK_OK(stage.GetInputNode("Add", &add_node)); + EXPECT_EQ("a", add_node->input(0)); + EXPECT_EQ("b", add_node->input(1)); + + OpInfo::TensorProperties add_properties; + TF_CHECK_OK(stage.GetTensorProperties("Add", &add_properties)); + EXPECT_EQ(DT_FLOAT, add_properties.dtype()); + + OpInfo::TensorProperties a_properties; + TF_CHECK_OK(stage.GetTensorProperties("a:0", &a_properties)); + EXPECT_EQ(DT_FLOAT_REF, a_properties.dtype()); + + OpInfo::TensorProperties b_properties; + TF_CHECK_OK(stage.GetTensorProperties("b:0", &b_properties)); + EXPECT_EQ(DT_FLOAT_REF, b_properties.dtype()); +} + +TEST_F(GraphOptimizerStageTest, AddNodes) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto add = ops::Add(s.WithOpName("Add"), a, b); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphProperties properties(item); + TF_CHECK_OK(properties.InferStatically(/*assume_valid_feeds*/ false)); + + NodeMap node_map(&item.graph); + + GraphOptimizerContext ctx(/*nodes_to_preserve*/ nullptr, + /*optimized_graph*/ &item.graph, + /*graph_properties*/ &properties, + /*node_name*/ &node_map, + /*frame_map*/ nullptr); + FakeOptimizerStage stage("my_opt", "my_stg", ctx); + + NodeDef* add_node; + TF_CHECK_OK(stage.GetInputNode("Add", &add_node)); + + // Add a new copy node + NodeDef* add_node_copy = stage.AddCopyNode("Add_1", add_node); + EXPECT_EQ("Add_1", add_node_copy->name()); + EXPECT_EQ("Add", add_node_copy->op()); + EXPECT_EQ("a", add_node_copy->input(0)); + EXPECT_EQ("b", add_node_copy->input(1)); + + // It must be available for by-name lookup + NodeDef* add_node_copy_by_name; + TF_CHECK_OK(stage.GetInputNode("Add_1", &add_node_copy_by_name)); + EXPECT_EQ(add_node_copy, add_node_copy_by_name); + + // Add new empty node + NodeDef* empty_node = stage.AddEmptyNode("Add_2"); + EXPECT_EQ("Add_2", empty_node->name()); + + // It must be available for by-name lookup + NodeDef* empty_node_by_name; + TF_CHECK_OK(stage.GetInputNode("Add_2", &empty_node_by_name)); + EXPECT_EQ(empty_node, empty_node_by_name); +} + +} // namespace +} // end namespace grappler +} // end namespace tensorflow \ No newline at end of file -- GitLab From 36ec749ec79c2313924666a1c5324620e493d0c4 Mon Sep 17 00:00:00 2001 From: Terry Koo Date: Mon, 19 Mar 2018 13:20:12 -0700 Subject: [PATCH 186/960] Adds missing protobuf dep to tf.contrib.data ops. (#17840) * Adds missing protobuf dep to tf.contrib.data ops. I think this will help resolve the following: https://github.com/tensorflow/serving/issues/421 https://github.com/tensorflow/serving/issues/684 https://github.com/tensorflow/tensorflow/issues/17619 Or at least I was experiencing a similar issue and this change resolved it for me in my local repo. * s/third_party// --- tensorflow/contrib/data/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 0458199ff7..5ba2297e7f 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -29,7 +29,10 @@ py_library( tf_custom_op_library( name = "_dataset_ops.so", srcs = ["ops/dataset_ops.cc"], - deps = ["//tensorflow/contrib/data/kernels:dataset_kernels"], + deps = [ + "//tensorflow/contrib/data/kernels:dataset_kernels", + "//tensorflow/core:lib_proto_parsing", + ], ) tf_gen_op_libs( -- GitLab From eb03b44049328404eb5578efda0729ca1a4f0a11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 13:26:19 -0700 Subject: [PATCH 187/960] Add bfloat16 support for CPU ops. PiperOrigin-RevId: 189631659 --- tensorflow/core/kernels/cwise_op_div.cc | 8 ++++---- tensorflow/core/kernels/cwise_op_less.cc | 4 ++-- tensorflow/core/kernels/cwise_op_less_equal.cc | 4 ++-- tensorflow/core/kernels/cwise_op_minimum.cc | 4 ++-- tensorflow/core/kernels/cwise_op_sqrt.cc | 8 ++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc index c71c756e44..b12652f7fb 100644 --- a/tensorflow/core/kernels/cwise_op_div.cc +++ b/tensorflow/core/kernels/cwise_op_div.cc @@ -16,14 +16,14 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Div", functor::div, float, Eigen::half, double, - complex64, complex128); +REGISTER6(BinaryOp, CPU, "Div", functor::div, float, Eigen::half, double, + bfloat16, complex64, complex128); REGISTER5(BinaryOp, CPU, "Div", functor::safe_div, uint8, uint16, int16, int32, int64); REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16, int32, int64); -REGISTER5(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double, - complex64, complex128); +REGISTER6(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double, + bfloat16, complex64, complex128); #if GOOGLE_CUDA REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8, uint16, int16, int64, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc index 00cdecdbd1..575968126f 100644 --- a/tensorflow/core/kernels/cwise_op_less.cc +++ b/tensorflow/core/kernels/cwise_op_less.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER8(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double, - int32, int64, uint8, int8, int16); +REGISTER9(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double, + bfloat16, int32, int64, uint8, int8, int16); #if GOOGLE_CUDA REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double, int64, uint8, int8, int16); diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc index 11806c5fc7..499200d054 100644 --- a/tensorflow/core/kernels/cwise_op_less_equal.cc +++ b/tensorflow/core/kernels/cwise_op_less_equal.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER8(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half, - double, int32, int64, uint8, int8, int16); +REGISTER9(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half, + bfloat16, double, int32, int64, uint8, int8, int16); #if GOOGLE_CUDA REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half, double, int64, uint8, int8, int16); diff --git a/tensorflow/core/kernels/cwise_op_minimum.cc b/tensorflow/core/kernels/cwise_op_minimum.cc index dff83df828..9bc3700387 100644 --- a/tensorflow/core/kernels/cwise_op_minimum.cc +++ b/tensorflow/core/kernels/cwise_op_minimum.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Minimum", functor::minimum, float, Eigen::half, - double, int32, int64); +REGISTER6(BinaryOp, CPU, "Minimum", functor::minimum, float, Eigen::half, + bfloat16, double, int32, int64); #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "Minimum", functor::minimum, float, Eigen::half, double, int64); diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc index 497756133d..205070761f 100644 --- a/tensorflow/core/kernels/cwise_op_sqrt.cc +++ b/tensorflow/core/kernels/cwise_op_sqrt.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Sqrt", functor::sqrt, float, Eigen::half, double, + bfloat16, complex64, complex128); #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double); @@ -27,8 +27,8 @@ REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double); REGISTER2(UnaryOp, SYCL, "Sqrt", functor::sqrt, float, double); #endif // TENSORFLOW_USE_SYCL -REGISTER5(SimpleBinaryOp, CPU, "SqrtGrad", functor::sqrt_grad, float, - Eigen::half, double, complex64, complex128); +REGISTER6(SimpleBinaryOp, CPU, "SqrtGrad", functor::sqrt_grad, float, + Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA REGISTER3(SimpleBinaryOp, GPU, "SqrtGrad", functor::sqrt_grad, float, Eigen::half, double); -- GitLab From a78c5033e005f76b83df4fd97d0074fcc990f603 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 19 Mar 2018 13:38:23 -0700 Subject: [PATCH 188/960] TFE: Fix bug encountered when using `optimizer.apply_gradients` in a defun. Prior to this change, `Optimizer` assumed that `not context.executing_eagerly()` implied that every variable that it was to update was constructed in a graph. That assumption is incorrect --- TensorFlow functions can mutate variables captured from or lifted into the eager context. As such, this change removes that assumption. Fixes #17792 PiperOrigin-RevId: 189633630 --- tensorflow/python/eager/function_test.py | 32 ++++++++++++++++++++++++ tensorflow/python/ops/variables.py | 6 +++++ tensorflow/python/training/optimizer.py | 11 +++++++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index b9cde16867..fd1d2c25ff 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.training import gradient_descent class FunctionTest(test.TestCase): @@ -762,6 +763,37 @@ class AutomaticControlDependenciesTest(test.TestCase): self.assertAllEqual(f().eval(), 4.0) + def testOptimizerInDefun(self): + def loss(v): + return v**2 + + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) + + @function.defun + def train(): + v = resource_variable_ops.ResourceVariable(1.0) + grad = backprop.implicit_grad(loss)(v) + optimizer.apply_gradients(grad) + return v.read_value() + + value = train() + self.assertEqual(value.numpy(), -1.0) + + def testOptimizerInDefunWithCapturedVariable(self): + v = resource_variable_ops.ResourceVariable(1.0) + def loss(): + return v**2 + + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) + + @function.defun + def train(): + grad = backprop.implicit_grad(loss)() + optimizer.apply_gradients(grad) + + train() + self.assertEqual(v.numpy(), -1.0) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index c37cdd9e27..c646f79589 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -293,6 +293,7 @@ class Variable(checkpointable.CheckpointableBase): Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. + RuntimeError: If lifted into the eager context. """ _ = expected_shape if initial_value is None: @@ -319,6 +320,11 @@ class Variable(checkpointable.CheckpointableBase): if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] with ops.init_scope(): + # Ensure that we weren't lifted into the eager context. + if context.executing_eagerly(): + raise RuntimeError( + "tf.Variable not supported when eager execution is enabled. " + "Please use tf.contrib.eager.Variable instead") with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index af9cc3491c..bf79714f96 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -191,6 +191,10 @@ def _get_processor(v): return _TensorProcessor(v) else: return _DenseResourceVariableProcessor(v) + if isinstance( + v, resource_variable_ops.ResourceVariable) and not v._in_graph_mode: # pylint: disable=protected-access + # True if and only if `v` was initialized eagerly. + return _DenseResourceVariableProcessor(v) if v.op.type == "VarHandleOp": return _DenseResourceVariableProcessor(v) if isinstance(v, variables.Variable): @@ -546,7 +550,12 @@ class Optimizer( # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. - scope_name = "" if context.executing_eagerly() else var.op.name + if context.executing_eagerly() or isinstance( + var, + resource_variable_ops.ResourceVariable) and not var._in_graph_mode: # pylint: disable=protected-access + scope_name = "" + else: + scope_name = var.op.name with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: -- GitLab From 60a37c43c1504f5a1957f2f319bcd1e907be4c18 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 19 Mar 2018 13:43:50 -0700 Subject: [PATCH 189/960] Moves TFE_Executor to tensorflow::EagerExecutor in tensorflow/core/common_runtime/eager PiperOrigin-RevId: 189634404 --- tensorflow/c/eager/BUILD | 2 + tensorflow/c/eager/c_api.cc | 154 ++---------------- tensorflow/c/eager/c_api_internal.h | 104 +----------- tensorflow/core/BUILD | 3 + .../common_runtime/eager/eager_executor.cc | 152 +++++++++++++++++ .../common_runtime/eager/eager_executor.h | 138 ++++++++++++++++ 6 files changed, 312 insertions(+), 241 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/eager_executor.cc create mode 100644 tensorflow/core/common_runtime/eager/eager_executor.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 3046d9064a..73a3450e0e 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -27,6 +27,7 @@ tf_cuda_library( ":runtime", "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", + "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -54,6 +55,7 @@ tf_cuda_library( ":runtime", "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", + "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 455bc19be8..4e5703ffe0 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -165,7 +165,7 @@ void TFE_ContextSetThreadLocalDevicePlacementPolicy( // Note: this function looks up a thread local policy. So it should be called in // the appropriate client thread. In particular, in async mode, it may not be -// safe to call this function from the async TFE_Executor threads. +// safe to call this function from the async EagerExecutor threads. extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( TFE_Context* ctx) { tensorflow::mutex_lock ml(ctx->policy_map_mu); @@ -731,15 +731,15 @@ tensorflow::Status Execute( return tensorflow::Status::OK(); } -// TODO(agarwal): move TFE_Executor and TFE_Node related code to a separate +// TODO(agarwal): move EagerExecutor and EagerNode related code to a separate // file. -class ExecuteNode : public TFE_Node { +class ExecuteNode : public tensorflow::EagerNode { public: ExecuteNode(TFE_Op* op, tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, const tensorflow::DataTypeVector& output_dtypes, TFE_TensorHandle** retvals, int num_retvals) - : TFE_Node(op->ctx->executor.NextId()), + : tensorflow::EagerNode(op->ctx->executor.NextId()), ctx_(op->ctx), op_device_(op->device), inputs_(op->inputs), @@ -791,11 +791,11 @@ class ExecuteNode : public TFE_Node { tensorflow::gtl::InlinedVector retvals_; }; -class CopyToDeviceNode : public TFE_Node { +class CopyToDeviceNode : public tensorflow::EagerNode { public: CopyToDeviceNode(TFE_TensorHandle* src, tensorflow::Device* dstd, TFE_Context* ctx) - : TFE_Node(ctx->executor.NextId()), + : tensorflow::EagerNode(ctx->executor.NextId()), src_(src), dstd_(dstd), ctx_(ctx), @@ -1182,8 +1182,9 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Note that for async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(agarwal): Consider executing "cheap" kernels inline for performance. - TFE_Node* node = new ExecuteNode(op, kernel, maybe_stats.release(), - output_dtypes, retvals, *num_retvals); + tensorflow::EagerNode* node = + new ExecuteNode(op, kernel, maybe_stats.release(), output_dtypes, + retvals, *num_retvals); ctx->executor.Add(node); } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to @@ -1214,8 +1215,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, // make sure that `h` is ready before the copy is actually done. CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); TFE_TensorHandle* output = node->dst(); - // Note that calling Add makes `node` accessible by the TFE_Executor thread. - // So further accesses need to be thread-safe. + // Note that calling Add makes `node` accessible by the EagerExecutor + // thread. So further accesses need to be thread-safe. ctx->executor.Add(node); return output; } else { @@ -1356,137 +1357,6 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } } // namespace tensorflow -TFE_Node::TFE_Node(tensorflow::uint64 id) : id(id) {} - -TFE_Executor::~TFE_Executor() { - tensorflow::mutex_lock l(node_queue_mutex_); - thread_done_ = true; - nodes_pending_.notify_all(); -} - -tensorflow::uint64 TFE_Executor::NextId() { - tensorflow::mutex_lock l(next_id_mutex_); - return next_id_++; -} - -void TFE_Executor::EnableAsync() { - tensorflow::mutex_lock l(node_queue_mutex_); - if (thread_ == nullptr) { - thread_.reset(tensorflow::Env::Default()->StartThread( - tensorflow::ThreadOptions(), "eager_async_executor", - std::bind(&TFE_Executor::Run, this))); - } -} - -void TFE_Executor::Add(TFE_Node* node) { - tensorflow::mutex_lock l(node_queue_mutex_); - DCHECK(thread_) << "EnableAsync should have been called before Add"; - if (!status_.ok()) { - delete node; - return; - } - int qlen = node_queue_.size(); - if (qlen > 0) { - if (node_queue_.back()->id >= node->id) { - status_ = tensorflow::errors::InvalidArgument( - "Inserting TFE_Node with non-increasing ids:", node_queue_.back()->id, - " vs ", node->id); - delete node; - return; - } - node_queue_.push(node); - } else { - node_queue_.push(node); - nodes_pending_.notify_all(); - } -} - -tensorflow::Status TFE_Executor::WaitFor(tensorflow::uint64 node_id) { - return WaitImpl(false, node_id); -} - -tensorflow::Status TFE_Executor::WaitForAllPendingNodes() { - return WaitImpl(true, 0); -} - -tensorflow::Status TFE_Executor::WaitImpl(bool wait_all, - tensorflow::uint64 node_id) { - tensorflow::condition_variable cond; - tensorflow::mutex_lock l(node_queue_mutex_); - // Don't wait if an error is already set. - if (!status_.ok()) return status_; - if (node_queue_.empty()) return tensorflow::Status::OK(); - if (wait_all) { - node_id = node_queue_.back()->id; - } else if (node_id < node_queue_.front()->id) { - // Note that we are relying on the ops being dispatched sequentially from - // the queue. - return tensorflow::Status::OK(); - } - node_done_notifications_.insert(std::make_pair(node_id, &cond)); - cond.wait(l); - // Note that we could be woken up if an error occurs, even though the node has - // not actually executed. - return status_; -} - -void TFE_Executor::ClearError() { - tensorflow::mutex_lock l(node_queue_mutex_); - if (status_.ok()) return; - // If an error was set, node_done_notifications_ and node_queue_ should have - // been cleared, and no new entries should have been added since. - DCHECK(node_done_notifications_.empty()); - DCHECK(node_queue_.empty()); - status_ = tensorflow::Status::OK(); - nodes_pending_.notify_all(); -} - -tensorflow::Status TFE_Executor::status() { - tensorflow::mutex_lock l(node_queue_mutex_); - return status_; -} - -void TFE_Executor::Run() { - while (true) { - std::unique_ptr curr_node; - { - tensorflow::mutex_lock l(node_queue_mutex_); - while (node_queue_.empty() || !status_.ok()) { - if (thread_done_) return; - nodes_pending_.wait(l); - } - curr_node.reset(node_queue_.front()); - } - tensorflow::Status status = curr_node->Run(); - const bool ok = status.ok(); - tensorflow::mutex_lock l(node_queue_mutex_); - node_queue_.pop(); - if (!ok) { - status_ = status; - // TODO(agarwal): mark all affected handles as corrupted before clearing - // this queue. - // We remove any pending ops so that we don't try to execute them if - // ClearError is called. - for (int i = 0; i < node_queue_.size(); ++i) { - delete node_queue_.front(); - node_queue_.pop(); - } - } - if (!node_done_notifications_.empty()) { - tensorflow::uint64 node_id = curr_node->id; - // Note that we notify all waiting threads in case an error has occurred. - // These calling threads are responsible for checking status_ before - // proceeding. - const auto range = ok ? node_done_notifications_.equal_range(node_id) - : make_pair(node_done_notifications_.begin(), - node_done_notifications_.end()); - for (auto it = range.first; it != range.second; ++it) { - it->second->notify_all(); - } - node_done_notifications_.erase(range.first, range.second); - } - } -} bool TFE_Context::Async() const { tensorflow::mutex_lock l(async_map_mu); @@ -1502,7 +1372,7 @@ bool TFE_TensorHandle::IsReady() { tensorflow::Status TFE_TensorHandle::WaitReady() { if (node_id == 0) return tensorflow::Status::OK(); - TFE_Executor* executor = nullptr; + tensorflow::EagerExecutor* executor = nullptr; { tensorflow::mutex_lock l(ctx_mutex_); if (ctx_ == nullptr) return tensorflow::Status::OK(); diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 8dba12f47b..1edbe81992 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -40,101 +41,6 @@ limitations under the License. #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" -// A unit of execution for the TFE_Executor class below. Example subclasses -// encapsulate execution of a TFE_Op, or copying a TFE_TensorHandle from one -// device to another. -class TFE_Node { - public: - explicit TFE_Node(tensorflow::uint64 id); - - virtual ~TFE_Node() {} - - // Runs the computation corresponding to this node and blocks till the - // execution is done. - virtual tensorflow::Status Run() = 0; - - // An id unique to the TFE_Context under which this node is created. Allocated - // monotonically. - const tensorflow::uint64 id; -}; - -// A class for handling async execution (see TFE_ContextSetAsync). -// Note that this class is thread-safe. -// TODO(agarwal): TFE_OpAddInput may currently block if it tries to access the -// device of the input handle. Fix that. -// TODO(agarwal): On error, mark all affected handles as corrupted. -// TODO(agarwal): Implement support for control dependencies. -// TODO(agarwal): Support out-of-order execution and dispatching multiple -// TFE_Node in parallel. -// TODO(agarwal): Implement optimizations over TFE_Node traces. -class TFE_Executor { - public: - ~TFE_Executor(); - - // This is called whenever async mode is enabled. Note that it may be called - // multiple times as different calling threads may switch async mode on or off - // independently. - void EnableAsync(); - - // Helper function to create monotonically increasing ids unique to this - // object. - tensorflow::uint64 NextId(); - - // Schedules `node` for execution. - // Note that Add must be called in monotonically increasing order of node->id. - void Add(TFE_Node* node); - - // Causes the caller to block till node with id `node_id` has finished - // execution. - tensorflow::Status WaitFor(tensorflow::uint64 node_id); - - // Blocks till all currently pending ops are done. - tensorflow::Status WaitForAllPendingNodes(); - - // Clears all currently set errors which re-enables async execution. - void ClearError(); - - // Returns Status based on any errors that occurred during async execution. - tensorflow::Status status(); - - private: - // Starts execution of pending TFE_Nodes. This function loops till - // thread_done_ is set to true. If any errors are encontered, these are set - // inside `status_`. The loop blocks anytime there are no pending nodes, or if - // `status_` is not ok. - void Run(); - - tensorflow::Status WaitImpl(bool wait_all, tensorflow::uint64 node_id); - - tensorflow::mutex node_queue_mutex_; - - // Used to signal that some TFE_Nodes are pending execution. - tensorflow::condition_variable nodes_pending_ GUARDED_BY(node_queue_mutex_); - - // Queue of pending TFE_Nodes. - std::queue node_queue_ GUARDED_BY(node_queue_mutex_); - - // `status_` is set based on any errors raised during execution of a TFE_Node. - // It remains set until ClearError is called. - tensorflow::Status status_ GUARDED_BY(node_queue_mutex_); - - // Map from id of a TFE_Node to condition_variables (not owned by the map). - // These condition_variables are notified and removed when that TFE_Node is - // done executing, or if an error is found in execution of any TFE_Node. - std::multimap - node_done_notifications_ GUARDED_BY(node_queue_mutex_); - - // Thread object that calls the `Run` method. Currently we use only one thread - // for executing the TFE_Nodes one-by-one. - std::unique_ptr thread_ GUARDED_BY(node_queue_mutex_); - - // Indicates that `thread_` should stop as soon as it is done executing the - // current TFE_Node. - bool thread_done_ GUARDED_BY(node_queue_mutex_) = false; - - tensorflow::mutex next_id_mutex_; - tensorflow::uint64 next_id_ GUARDED_BY(next_id_mutex_) = 1; -}; struct TFE_ContextOptions { TF_SessionOptions session_options; @@ -203,8 +109,8 @@ struct TFE_Context { tensorflow::mutex metadata_mu; tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu); const bool log_device_placement; - // TFE_Executor for async execution. - TFE_Executor executor; + // EagerExecutor for async execution. + tensorflow::EagerExecutor executor; // True if running in asynchronous mode. bool Async() const; @@ -263,13 +169,13 @@ struct TFE_TensorHandle : public tensorflow::core::RefCounted { private: // If the contents of the Tensor pointed to by this handle is yet to be - // computed by a TFE_Node, this function will block till that compuatation is + // computed by a EagerNode, this function will block till that compuatation is // done and the handle is "ready". tensorflow::Status WaitReady(); bool IsReady(); - // Id for the TFE_Node that will compute the value pointed to by this handle. + // Id for the EagerNode that will compute the value pointed to by this handle. // If the value is 0, the handle is already ready, but not vice-versa. const tensorflow::uint64 node_id; diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index df44857185..cf29444065 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -793,6 +793,7 @@ tf_cuda_library( hdrs = [ "common_runtime/device.h", "common_runtime/device_factory.h", + "common_runtime/eager/eager_executor.h", "common_runtime/optimization_registry.h", "common_runtime/shape_refiner.h", "graph/algorithm.h", @@ -2141,6 +2142,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/stats_publisher_interface.h", "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", + "common_runtime/eager/eager_executor.h", "graph/gradients.h", "graph/quantize_training.h", ] + if_mkl(["graph/mkl_graph_util.h"]) @@ -2160,6 +2162,7 @@ tf_cuda_library( "common_runtime/device_factory.cc", "common_runtime/device_mgr.cc", "common_runtime/device_set.cc", + "common_runtime/eager/eager_executor.cc", "common_runtime/executor.cc", "common_runtime/function.cc", "common_runtime/graph_optimizer.cc", diff --git a/tensorflow/core/common_runtime/eager/eager_executor.cc b/tensorflow/core/common_runtime/eager/eager_executor.cc new file mode 100644 index 0000000000..b699036e96 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/eager_executor.cc @@ -0,0 +1,152 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/eager_executor.h" + +namespace tensorflow { + +EagerNode::EagerNode(tensorflow::uint64 id) : id(id) {} + +EagerExecutor::~EagerExecutor() { + tensorflow::mutex_lock l(node_queue_mutex_); + thread_done_ = true; + nodes_pending_.notify_all(); +} + +tensorflow::uint64 EagerExecutor::NextId() { + tensorflow::mutex_lock l(next_id_mutex_); + return next_id_++; +} + +void EagerExecutor::EnableAsync() { + tensorflow::mutex_lock l(node_queue_mutex_); + if (thread_ == nullptr) { + thread_.reset(tensorflow::Env::Default()->StartThread( + tensorflow::ThreadOptions(), "eager_async_executor", + std::bind(&EagerExecutor::Run, this))); + } +} + +void EagerExecutor::Add(EagerNode* node) { + tensorflow::mutex_lock l(node_queue_mutex_); + DCHECK(thread_) << "EnableAsync should have been called before Add"; + if (!status_.ok()) { + delete node; + return; + } + int64 qlen = node_queue_.size(); + if (qlen > 0) { + if (node_queue_.back()->id >= node->id) { + status_ = tensorflow::errors::InvalidArgument( + "Inserting EagerNode with non-increasing ids:", + node_queue_.back()->id, " vs ", node->id); + delete node; + return; + } + node_queue_.push(node); + } else { + node_queue_.push(node); + nodes_pending_.notify_all(); + } +} + +tensorflow::Status EagerExecutor::WaitFor(tensorflow::uint64 node_id) { + return WaitImpl(false, node_id); +} + +tensorflow::Status EagerExecutor::WaitForAllPendingNodes() { + return WaitImpl(true, 0); +} + +tensorflow::Status EagerExecutor::WaitImpl(bool wait_all, + tensorflow::uint64 node_id) { + tensorflow::condition_variable cond; + tensorflow::mutex_lock l(node_queue_mutex_); + // Don't wait if an error is already set. + if (!status_.ok()) return status_; + if (node_queue_.empty()) return tensorflow::Status::OK(); + if (wait_all) { + node_id = node_queue_.back()->id; + } else if (node_id < node_queue_.front()->id) { + // Note that we are relying on the ops being dispatched sequentially from + // the queue. + return tensorflow::Status::OK(); + } + node_done_notifications_.insert(std::make_pair(node_id, &cond)); + cond.wait(l); + // Note that we could be woken up if an error occurs, even though the node has + // not actually executed. + return status_; +} + +void EagerExecutor::ClearError() { + tensorflow::mutex_lock l(node_queue_mutex_); + if (status_.ok()) return; + // If an error was set, node_done_notifications_ and node_queue_ should have + // been cleared, and no new entries should have been added since. + DCHECK(node_done_notifications_.empty()); + DCHECK(node_queue_.empty()); + status_ = tensorflow::Status::OK(); + nodes_pending_.notify_all(); +} + +tensorflow::Status EagerExecutor::status() { + tensorflow::mutex_lock l(node_queue_mutex_); + return status_; +} + +void EagerExecutor::Run() { + while (true) { + std::unique_ptr curr_node; + { + tensorflow::mutex_lock l(node_queue_mutex_); + while (node_queue_.empty() || !status_.ok()) { + if (thread_done_) return; + nodes_pending_.wait(l); + } + curr_node.reset(node_queue_.front()); + } + tensorflow::Status status = curr_node->Run(); + const bool ok = status.ok(); + tensorflow::mutex_lock l(node_queue_mutex_); + node_queue_.pop(); + if (!ok) { + status_ = status; + // TODO(agarwal): mark all affected handles as corrupted before clearing + // this queue. + // We remove any pending ops so that we don't try to execute them if + // ClearError is called. + for (int i = 0; i < node_queue_.size(); ++i) { + delete node_queue_.front(); + node_queue_.pop(); + } + } + if (!node_done_notifications_.empty()) { + tensorflow::uint64 node_id = curr_node->id; + // Note that we notify all waiting threads in case an error has occurred. + // These calling threads are responsible for checking status_ before + // proceeding. + const auto range = ok ? node_done_notifications_.equal_range(node_id) + : make_pair(node_done_notifications_.begin(), + node_done_notifications_.end()); + for (auto it = range.first; it != range.second; ++it) { + it->second->notify_all(); + } + node_done_notifications_.erase(range.first, range.second); + } + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/eager_executor.h b/tensorflow/core/common_runtime/eager/eager_executor.h new file mode 100644 index 0000000000..021daeb21d --- /dev/null +++ b/tensorflow/core/common_runtime/eager/eager_executor.h @@ -0,0 +1,138 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EAGER_EXECUTOR_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EAGER_EXECUTOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +// A unit of execution for the EagerExecutor class below. Example subclasses +// encapsulate execution of a TFE_Op, or copying a TFE_TensorHandle from one +// device to another. +class EagerNode { + public: + explicit EagerNode(uint64 id); + + virtual ~EagerNode() {} + + // Runs the computation corresponding to this node and blocks till the + // execution is done. + virtual Status Run() = 0; + + // An id unique to the TFE_Context under which this node is created. Allocated + // monotonically. + const uint64 id; +}; + +// A class for handling async execution (see TFE_ContextSetAsync). +// Note that this class is thread-safe. +// TODO(agarwal): TFE_OpAddInput may currently block if it tries to access the +// device of the input handle. Fix that. +// TODO(agarwal): On error, mark all affected handles as corrupted. +// TODO(agarwal): Implement support for control dependencies. +// TODO(agarwal): Support out-of-order execution and dispatching multiple +// EagerNode in parallel. +// TODO(agarwal): Implement optimizations over EagerNode traces. +class EagerExecutor { + public: + ~EagerExecutor(); + + // This is called whenever async mode is enabled. Note that it may be called + // multiple times as different calling threads may switch async mode on or off + // independently. + void EnableAsync(); + + // Helper function to create monotonically increasing ids unique to this + // object. + uint64 NextId(); + + // Schedules `node` for execution. + // Note that Add must be called in monotonically increasing order of node->id. + void Add(EagerNode* node); + + // Causes the caller to block till node with id `node_id` has finished + // execution. + Status WaitFor(uint64 node_id); + + // Blocks till all currently pending ops are done. + Status WaitForAllPendingNodes(); + + // Clears all currently set errors which re-enables async execution. + void ClearError(); + + // Returns Status based on any errors that occurred during async execution. + Status status(); + + private: + // Starts execution of pending EagerNodes. This function loops till + // thread_done_ is set to true. If any errors are encontered, these are set + // inside `status_`. The loop blocks anytime there are no pending nodes, or if + // `status_` is not ok. + void Run(); + + Status WaitImpl(bool wait_all, uint64 node_id); + + mutex node_queue_mutex_; + + // Used to signal that some EagerNodes are pending execution. + condition_variable nodes_pending_ GUARDED_BY(node_queue_mutex_); + + // Queue of pending EagerNodes. + std::queue node_queue_ GUARDED_BY(node_queue_mutex_); + + // `status_` is set based on any errors raised during execution of a + // EagerNode. It remains set until ClearError is called. + Status status_ GUARDED_BY(node_queue_mutex_); + + // Map from id of a EagerNode to condition_variables (not owned by the map). + // These condition_variables are notified and removed when that EagerNode is + // done executing, or if an error is found in execution of any EagerNode. + std::multimap node_done_notifications_ + GUARDED_BY(node_queue_mutex_); + + // Thread object that calls the `Run` method. Currently we use only one thread + // for executing the EagerNodes one-by-one. + std::unique_ptr thread_ GUARDED_BY(node_queue_mutex_); + + // Indicates that `thread_` should stop as soon as it is done executing the + // current EagerNode. + bool thread_done_ GUARDED_BY(node_queue_mutex_) = false; + + mutex next_id_mutex_; + uint64 next_id_ GUARDED_BY(next_id_mutex_) = 1; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EAGER_EXECUTOR_H_ -- GitLab From a80fb2b1cad1bb9c868222b8c25f162d69a509e6 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 19 Mar 2018 13:44:23 -0700 Subject: [PATCH 190/960] Automated g4 rollback of changelist 189416074 PiperOrigin-RevId: 189634491 --- tensorflow/python/client/session.py | 25 ++++++++++++++++++ tensorflow/python/client/session_test.py | 32 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 924d62992a..6e9ce9b080 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -21,6 +21,7 @@ from __future__ import print_function import functools import re import threading +import warnings import numpy as np @@ -1624,6 +1625,9 @@ class InteractiveSession(BaseSession): ``` """ + _count_lock = threading.Lock() + _active_session_count = 0 # GUARDED_BY(_count_lock) + def __init__(self, target='', graph=None, config=None): """Creates a new interactive TensorFlow session. @@ -1652,6 +1656,19 @@ class InteractiveSession(BaseSession): config.graph_options.place_pruned_graph = True super(InteractiveSession, self).__init__(target, graph, config) + with InteractiveSession._count_lock: + if InteractiveSession._active_session_count > 0: + warnings.warn('An interactive session is already active. This can ' + 'cause out-of-memory errors in some cases. You must ' + 'explicitly call `InteractiveSession.close()` to release ' + 'resources held by the other session(s).') + InteractiveSession._active_session_count += 1 + # NOTE(mrry): We do not use `Session._closed` here because it has unhelpful + # semantics (in particular, it is not set to true if `Session.close()` is + # called on a session that has not been "opened" by running a step) and we + # cannot change those semantics without breaking existing code. + self._explicitly_closed = False + self._default_session = self.as_default() self._default_session.enforce_nesting = False self._default_session.__enter__() @@ -1664,6 +1681,14 @@ class InteractiveSession(BaseSession): def close(self): """Closes an `InteractiveSession`.""" super(InteractiveSession, self).close() + with InteractiveSession._count_lock: + if not self._explicitly_closed: + InteractiveSession._active_session_count -= 1 + self._explicitly_closed = True + else: + return if self._explicit_graph is not None: self._default_graph.__exit__(None, None, None) + self._default_graph = None self._default_session.__exit__(None, None, None) + self._default_session = None diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 3bf2a9e4dd..44ff440cc5 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -22,6 +22,7 @@ import os import sys import threading import time +import warnings import numpy as np import six @@ -65,6 +66,10 @@ ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape) # @test_util.with_c_api class SessionTest(test_util.TensorFlowTestCase): + def setUp(self): + super(SessionTest, self).setUp() + warnings.simplefilter('always') + def testUseExistingGraph(self): with ops.Graph().as_default() as g, ops.device('/cpu:0'): a = constant_op.constant(6.0, shape=[1, 1]) @@ -1190,6 +1195,33 @@ class SessionTest(test_util.TensorFlowTestCase): self.assertAllEqual([[24.0]], e.eval()) sess.close() + def testMultipleInteractiveSessionsWarning(self): + # Reinitialize the global state to ensure that the expected warnings will + # be emitted. + session.InteractiveSession._active_session_count = 0 # pylint: disable=protected-access + + sess = session.InteractiveSession() + sess.run(constant_op.constant(4.0)) # Run so that the session is "opened". + sess.close() + # Opening and closing interactive sessions serially should not warn. + with warnings.catch_warnings(record=True) as w: + sess = session.InteractiveSession() + sess.close() + self.assertEqual(0, len(w)) + + with warnings.catch_warnings(record=True) as w: + sess = session.InteractiveSession() + self.assertEqual(0, len(w)) + with warnings.catch_warnings(record=True) as w: + sess2 = session.InteractiveSession() + self.assertEqual(1, len(w)) + self.assertTrue('An interactive session is already active. This can cause ' + 'out-of-memory errors in some cases. You must explicitly ' + 'call `InteractiveSession.close()` to release resources ' + 'held by the other session(s).' in str(w[0].message)) + sess2.close() + sess.close() + def testInteractivePlacePrunedGraph(self): sess = session.InteractiveSession() -- GitLab From e6affeb79ee0cfda24b76368e3e788a7ec23df32 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 14:09:52 -0700 Subject: [PATCH 191/960] Add a helper that allows constructing simple expression ASTs from string. Useful to simplify the representation of composite symbols, e.g. 'py2tf.foo'. PiperOrigin-RevId: 189638901 --- tensorflow/contrib/py2tf/pyct/parser.py | 22 ++++++++++++++++++-- tensorflow/contrib/py2tf/pyct/parser_test.py | 17 +++++++++------ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/parser.py b/tensorflow/contrib/py2tf/pyct/parser.py index dc7df883b3..c961efa892 100644 --- a/tensorflow/contrib/py2tf/pyct/parser.py +++ b/tensorflow/contrib/py2tf/pyct/parser.py @@ -29,12 +29,30 @@ from tensorflow.python.util import tf_inspect def parse_entity(entity): - """Return the AST of given entity.""" + """Returns the AST of given entity.""" source = tf_inspect.getsource(entity) source = textwrap.dedent(source) return parse_str(source), source def parse_str(src): - """Return the AST of given piece of code.""" + """Returns the AST of given piece of code.""" return gast.parse(src) + + +def parse_expression(src): + """Returns the AST of given identifier. + + Args: + src: A piece of code that represents a single Python expression + Returns: + A gast.AST object. + Raises: + ValueError: if src does not consist of a single Expression. + """ + node = parse_str(src) + assert isinstance(node, gast.Module) + if len(node.body) != 1 and not isinstance(node.body[0], gast.Expr): + raise ValueError( + 'Expected a single expression, found instead %s' % node.body) + return node.body[0].value diff --git a/tensorflow/contrib/py2tf/pyct/parser_test.py b/tensorflow/contrib/py2tf/pyct/parser_test.py index f35dfa04c7..c58ffc7e0c 100644 --- a/tensorflow/contrib/py2tf/pyct/parser_test.py +++ b/tensorflow/contrib/py2tf/pyct/parser_test.py @@ -24,24 +24,29 @@ from tensorflow.contrib.py2tf.pyct import parser from tensorflow.python.platform import test -def f(x): - return x + 1 - - class ParserTest(test.TestCase): def test_parse_entity(self): + + def f(x): + return x + 1 + mod, _ = parser.parse_entity(f) self.assertEqual('f', mod.body[0].name) def test_parse_str(self): mod = parser.parse_str( textwrap.dedent(""" - def f(x): - return x + 1 + def f(x): + return x + 1 """)) self.assertEqual('f', mod.body[0].name) + def test_parse_expression(self): + node = parser.parse_expression('a.b') + self.assertEqual('a', node.value.id) + self.assertEqual('b', node.attr) + if __name__ == '__main__': test.main() -- GitLab From eaa61ab7514c56b9ce219bb5f5f38a5b3ad78657 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 19 Mar 2018 14:24:00 -0700 Subject: [PATCH 192/960] Turned on gradient optimization by default PiperOrigin-RevId: 189641300 --- .../grappler/optimizers/function_optimizer.cc | 24 +++++++++++-------- tensorflow/python/BUILD | 1 + 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 3f2afdeef1..97effae8c8 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -220,21 +220,27 @@ Status InlineSymbolicGradient(const NodeDef& node, inlined_node.set_name(node.name()); for (int i = 0; i < inlined_node.input_size(); ++i) { inlined_node.set_input( - i, strings::StrCat(node.name(), "/", inlined_node.input(i))); + i, AddPrefixToNodeName(inlined_node.input(i), node.name())); } } else if (inlined_node.name() == "FunctionInputs") { inlined_node.set_name( - strings::StrCat(node.name(), "/", inlined_node.name())); + AddPrefixToNodeName(inlined_node.name(), node.name())); inlined_node.clear_input(); for (int i = 0; i < node.input_size(); ++i) { inlined_node.add_input(node.input(i)); } } else { inlined_node.set_name( - strings::StrCat(node.name(), "/", inlined_node.name())); + AddPrefixToNodeName(inlined_node.name(), node.name())); for (int i = 0; i < inlined_node.input_size(); ++i) { inlined_node.set_input( - i, strings::StrCat(node.name(), "/", inlined_node.input(i))); + i, AddPrefixToNodeName(inlined_node.input(i), node.name())); + } + // If the node has no input, hook it up to the function input node to make + // sure it runs in the same frame as the other nodes of the function body. + if (inlined_node.input_size() == 0) { + *inlined_node.add_input() = AsControlDependency( + AddPrefixToNodeName("FunctionInputs", node.name())); } } inlined_node.set_device(node.device()); @@ -275,12 +281,10 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph->mutable_versions() = item.graph.versions(); for (const NodeDef& node : item.graph.node()) { - if (opt_level_ == RewriterConfig::AGGRESSIVE) { - if (node.op() == "SymbolicGradient") { - TF_RETURN_IF_ERROR(InlineSymbolicGradient(node, item.graph.library(), - optimized_graph)); - continue; - } + if (node.op() == "SymbolicGradient") { + TF_RETURN_IF_ERROR( + InlineSymbolicGradient(node, item.graph.library(), optimized_graph)); + continue; } auto it = functions.find(node.op()); if (it == functions.end()) { diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 9a29986c3b..a029ecd4d0 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1033,6 +1033,7 @@ cuda_py_tests( "//third_party/py/numpy", "//tensorflow/core:protos_all_py", ], + shard_count = 10, ) py_test( -- GitLab From e613e0844a95814457f3530eedb9baf812cf1e87 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 14:27:06 -0700 Subject: [PATCH 193/960] Enable stack push removal optimization by default. PiperOrigin-RevId: 189641729 --- .../grappler/optimizers/loop_optimizer.cc | 36 +++++++++++-------- .../optimizers/loop_optimizer_test.cc | 24 ++++++++----- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 2 +- tensorflow/python/kernel_tests/BUILD | 2 ++ 5 files changed, 42 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 244653504d..f78036d78c 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -45,8 +45,9 @@ namespace tensorflow { namespace grappler { namespace { -std::vector GetStackPushNodesToConvert(const SimpleGraphView& graph_view, - int stack_node_idx) { +std::vector GetStackPushNodesToConvert( + const SimpleGraphView& graph_view, + const std::unordered_set& nodes_to_preserve, int stack_node_idx) { VLOG(1) << "Stack node: " << graph_view.graph()->node(stack_node_idx).name(); const std::unordered_set op_types_to_traverse( {"Stack", "StackV2", "Enter", "RefEnter", "Switch", "RefSwitch", @@ -64,7 +65,9 @@ std::vector GetStackPushNodesToConvert(const SimpleGraphView& graph_view, op_types_to_traverse.end()) { continue; } else if (!IsStackPopOp(fanout_node) || - !graph_view.outputs(fanout_idx).empty()) { + (!graph_view.outputs(fanout_idx).empty() || + nodes_to_preserve.find(fanout_node.name()) != + nodes_to_preserve.end())) { // The node is either a stack pop with consumers or something unexpected // so we leave the graph alone. nodes_to_convert.clear(); @@ -74,15 +77,17 @@ std::vector GetStackPushNodesToConvert(const SimpleGraphView& graph_view, return nodes_to_convert; } -Status RemoveStackOps(const GraphDef& graph, GraphDef* optimized_graph) { +Status RemoveStackOps(const GrapplerItem& item, GraphDef* optimized_graph) { + const std::unordered_set nodes_to_preserve = item.NodesToPreserve(); + const GraphDef& graph = item.graph; *optimized_graph = graph; NodeMap node_map(optimized_graph); SimpleGraphView graph_view; TF_RETURN_IF_ERROR(graph_view.Initialize(graph)); for (int node_idx = 0; node_idx < graph.node_size(); ++node_idx) { if (IsStackOp(graph.node(node_idx))) { - for (int push_node_idx : - GetStackPushNodesToConvert(graph_view, node_idx)) { + for (int push_node_idx : GetStackPushNodesToConvert( + graph_view, nodes_to_preserve, node_idx)) { // We found push nodes without corresponding pops. Convert them to // Identity passing the data through and add a control dependency from // the op supplying the stack handle. @@ -463,17 +468,18 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { - TF_RETURN_IF_ERROR(RemoveStackOps(item.graph, optimized_graph)); + TF_RETURN_IF_ERROR(RemoveStackOps(item, optimized_graph)); - optimized_graph_ = optimized_graph; - - // Set up helper data structures. - node_map_.reset(new NodeMap(optimized_graph_)); - int num_frames; - TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_, - &frame_map_, &num_frames)); + if (opt_level_ == RewriterConfig::AGGRESSIVE) { + optimized_graph_ = optimized_graph; + // Set up helper data structures. + node_map_.reset(new NodeMap(optimized_graph_)); + int num_frames; + TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_, + &frame_map_, &num_frames)); + TF_RETURN_IF_ERROR(LoopInvariantNodeMotion()); + } - TF_RETURN_IF_ERROR(LoopInvariantNodeMotion()); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index 0d45ba9b56..a0bd335197 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -81,7 +81,7 @@ TEST_F(LoopOptimizerTest, Basic) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -128,7 +128,7 @@ TEST_F(LoopOptimizerTest, Const) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -175,7 +175,7 @@ TEST_F(LoopOptimizerTest, ControlOutput) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -235,7 +235,7 @@ TEST_F(LoopOptimizerTest, NestedLoop1) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -302,7 +302,7 @@ TEST_F(LoopOptimizerTest, NestedLoop2) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -365,7 +365,7 @@ TEST_F(LoopOptimizerTest, NestedLoopConst1) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -429,7 +429,7 @@ TEST_F(LoopOptimizerTest, NestedLoopConst2) { GrapplerItem item; item.graph = graph; - LoopOptimizer optimizer; + LoopOptimizer optimizer(RewriterConfig::AGGRESSIVE); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -502,6 +502,7 @@ TEST_F(LoopOptimizerTest, RemovePush_NoOp) { AddSimpleNode("stack3", "StackV2", {}, &graph); AddSimpleNode("push3", "StackPushV2", {"stack3", "c"}, &graph); AddSimpleNode("stop", "StopGradient", {"stack3"}, &graph); + LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -525,12 +526,19 @@ TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { AddSimpleNode("stack3", "StackV2", {}, &graph); AddSimpleNode("push3", "StackPushV2", {"stack3", "c"}, &graph); AddSimpleNode("pop3", "StackPopV2", {"stack3"}, &graph); + // Push for a Pop without consumer that is fetched should not be removed. + AddSimpleNode("stack4", "StackV2", {}, &graph); + AddSimpleNode("push4", "StackPushV2", {"stack4", "c"}, &graph); + AddSimpleNode("pop4", "StackPopV2", {"stack4"}, &graph); + + item.fetch.push_back("pop4"); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(10, output.node_size()); + + EXPECT_EQ(13, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); if (node.name() == "push1") { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7b2e7a1fe0..6eb2bbc547 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -106,7 +106,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.loop_optimization() == RewriterConfig::ON) { + if (cfg_.loop_optimization() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( new LoopOptimizer(cfg_.loop_optimization()))); } @@ -234,7 +234,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.function_optimization() != RewriterConfig::OFF || cfg.constant_folding() != RewriterConfig::OFF || cfg.arithmetic_optimization() != RewriterConfig::OFF || - cfg.loop_optimization() == RewriterConfig::ON || + cfg.loop_optimization() != RewriterConfig::OFF || cfg.dependency_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index b1fceaacf4..fdf16aa1da 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -42,7 +42,7 @@ message RewriterConfig { // Control dependency optimizations (default is ON). // Remove redundant control dependencies, which may enable other optimization. Toggle dependency_optimization = 8; - // Loop optimizations (default is OFF). + // Loop optimizations (default is ON). Toggle loop_optimization = 9; // Function optimizations (default is ON). Toggle function_optimization = 10; diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5b0c38fa5d..d9571fa2be 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -393,6 +393,7 @@ tf_py_test( "//tensorflow/python:nn_ops", "//tensorflow/python:nn_ops_gen", ], + shard_count = 5, ) tf_py_test( @@ -408,6 +409,7 @@ tf_py_test( "//tensorflow/python:nn_ops", "//tensorflow/python:nn_ops_gen", ], + shard_count = 5, ) tf_py_test( -- GitLab From ff43dff34ab525dd333128c73ebfb0f9723c34c0 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 19 Mar 2018 14:27:52 -0700 Subject: [PATCH 194/960] TFLite Delegate: Add an `allow_dynamic_tensors` parameter. PiperOrigin-RevId: 189641833 --- tensorflow/contrib/lite/BUILD | 1 + tensorflow/contrib/lite/interpreter.cc | 80 +++++++++++-- tensorflow/contrib/lite/interpreter.h | 22 +++- tensorflow/contrib/lite/interpreter_test.cc | 118 +++++++++++++++++--- 4 files changed, 191 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 5cfbb544b7..dafe6f136e 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -170,6 +170,7 @@ cc_test( deps = [ ":framework", ":string_util", + "//tensorflow/contrib/lite/kernels:kernel_util", "//tensorflow/contrib/lite/kernels/internal:tensor_utils", "//tensorflow/contrib/lite/schema:schema_fbs", "//tensorflow/contrib/lite/testing:util", diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index cee57bba5e..937c185b0a 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -356,7 +356,11 @@ TfLiteStatus Interpreter::AllocateTensors() { } TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors()); - invokable_ = true; + if (state_ == kStateUninvokable) { + state_ = kStateInvokable; + } + TF_LITE_ENSURE(&context_, state_ == kStateInvokable || + state_ == kStateInvokableAndImmutable); return kTfLiteOk; } @@ -364,7 +368,12 @@ TfLiteStatus Interpreter::AddNodeWithParameters( const std::vector& inputs, const std::vector& outputs, const char* init_data, size_t init_data_size, void* builtin_data, const TfLiteRegistration* registration, int* node_index) { - invokable_ = false; + if (state_ == kStateInvokableAndImmutable) { + ReportError(&context_, + "AddNodeWithParameters is disallowed when graph is immutable."); + return kTfLiteError; + } + state_ = kStateUninvokable; std::unique_ptr builtin_data_deleter(builtin_data, free); @@ -420,12 +429,17 @@ TfLiteStatus Interpreter::AddNodeWithParameters( TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, const std::vector& dims) { + if (state_ == kStateInvokableAndImmutable) { + ReportError(&context_, + "ResizeInputTensor is disallowed when graph is immutable."); + return kTfLiteError; + } + state_ = kStateUninvokable; + // TODO(aselle): All bounds checks can be implemented as one-sided bounds // checks by casting to unsigned for efficiency. Profile before doing this. - TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); - invokable_ = false; TfLiteIntArray* dims_lite = ConvertVectorToTfLiteIntArray(dims); return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite); } @@ -490,7 +504,7 @@ TfLiteStatus Interpreter::Invoke() { ReportError(&context_, "Invoke called on model that is not consistent."); return kTfLiteError; } - if (!invokable_) { + if (state_ == kStateUninvokable) { ReportError(&context_, "Invoke called on model that is not ready."); return kTfLiteError; } @@ -622,6 +636,13 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( int tensor_index, TfLiteType type, const char* name, const int rank, const int* dims, TfLiteQuantizationParams quantization, const char* buffer, size_t bytes, const Allocation* allocation) { + if (state_ == kStateInvokableAndImmutable) { + ReportError( + &context_, + "SetTensorParametersReadOnly is disallowed when graph is immutable."); + return kTfLiteError; + } + TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); // For most tensors we know exactly how much memory is necessary so we can @@ -645,7 +666,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( tensor.allocation_type = kTfLiteMmapRo; tensor.allocation = allocation; } else { - invokable_ = false; + state_ = kStateUninvokable; TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims), quantization, const_cast(buffer), bytes, kTfLiteMmapRo, allocation, &tensor); @@ -660,7 +681,12 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( TfLiteStatus Interpreter::SetTensorParametersReadWrite( int tensor_index, TfLiteType type, const char* name, const int rank, const int* dims, TfLiteQuantizationParams quantization) { - invokable_ = false; + if (state_ == kStateInvokableAndImmutable) { + ReportError( + &context_, + "SetTensorParametersReadWrite is disallowed when graph is immutable."); + return kTfLiteError; + } TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); size_t required_bytes = 0; @@ -738,19 +764,55 @@ void Interpreter::SetNumThreads(int num_threads) { context_.recommended_num_threads = num_threads; } -TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { +TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate, + bool allow_dynamic_tensors) { + if (!allow_dynamic_tensors) { + int last_execution_plan_index_prepared; + TF_LITE_ENSURE_OK(&context_, PrepareOpsStartingAt( + 0, &last_execution_plan_index_prepared)); + + bool has_dynamic_tensors = true; + // Dynamic tensors exist if not all nodes can be prepared. + if (last_execution_plan_index_prepared + 1 == execution_plan_.size()) { + // If all the nodes can be prepared, check if the last node has dynamic + // tensors. + int node_index = execution_plan_[last_execution_plan_index_prepared]; + TfLiteNode& node = nodes_and_registration_[node_index].first; + if (!HasDynamicTensor(context_, node.outputs)) { + has_dynamic_tensors = false; + } + } + if (has_dynamic_tensors) { + ReportError(&context_, "Attempting to resize a fixed-size tensor."); + return kTfLiteError; + } + } + // TODO(aselle): Consider if it is worth storing pointers to delegates. - // Setup additional context interface + // Setup additional context interface. context_.GetNodeAndRegistration = GetNodeAndRegistration; context_.ReplaceSubgraphsWithDelegateKernels = ReplaceSubgraphsWithDelegateKernels; context_.GetExecutionPlan = GetExecutionPlan; TfLiteStatus status = delegate->Prepare(&context_, delegate); + // Remove additional context info. SetForbiddenContextFunction(&context_.GetNodeAndRegistration); SetForbiddenContextFunction(&context_.ReplaceSubgraphsWithDelegateKernels); SetForbiddenContextFunction(&context_.GetExecutionPlan); + + TF_LITE_ENSURE_OK(&context_, status); + + if (!allow_dynamic_tensors) { + TF_LITE_ENSURE_OK(&context_, AllocateTensors()); + TF_LITE_ENSURE(&context_, state_ == kStateInvokable || + state_ == kStateInvokableAndImmutable); + // After using a delegate which doesn't support dynamic tensors, make the + // entire graph immutable. + state_ = kStateInvokableAndImmutable; + } + return status; } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index af143370ee..788546fd60 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -272,7 +272,9 @@ class Interpreter { // Allow a delegate to look at the graph and modify the graph to handle // parts of the graph themselves. After this is called, the graph may // contain new nodes that replace 1 more nodes. - TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate); + // WARNING: This is an experimental API and subject to change. + TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate, + bool allow_dynamic_tensors = false); // Ensure the data in `tensor.data` is readable. In case delegate is used, // it might require to copy the data from delegate buffer to raw memory. @@ -447,6 +449,20 @@ class Interpreter { } } + // The state of the Interpreter. + enum State { + // The interpreter isn't ready to be invoked. + // `AllocateTensor` need to be called to enter an invokable state. + kStateUninvokable = 0, + // The interpreter is ready to be invoked. + kStateInvokable, + // The interpreter is ready to be invoked, and graph can't be further + // modified. The interpreter will enter this state when calling + // `ModifyGraphWithDelegate` with `allow_dynamic_tensors=false`. + kStateInvokableAndImmutable, + }; + State state_ = kStateUninvokable; + // A pure C data structure used to communicate with the pure C plugin // interface. To avoid copying tensor metadata, this is also the definitive // structure to store tensors. @@ -462,10 +478,6 @@ class Interpreter { // the tensor array. bool consistent_ = true; - // Whether the model is safe to invoke (if any errors occurred this - // will be false). - bool invokable_ = false; - // Array of indices representing the tensors that are inputs to the // interpreter. std::vector inputs_; diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 7a029c7df8..efb29d5c9d 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -17,9 +17,11 @@ limitations under the License. #include #include "tensorflow/contrib/lite/error_reporter.h" #include "tensorflow/contrib/lite/kernels/internal/compatibility.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" #include "tensorflow/contrib/lite/string_util.h" #include "tensorflow/contrib/lite/testing/util.h" + namespace tflite { namespace { @@ -439,12 +441,12 @@ TEST(BasicInterpreter, ThreeStepAllocate) { // String-in String-out node. TfLiteRegistration reg_copy = {nullptr, nullptr, nullptr, nullptr}; reg_copy.invoke = [](TfLiteContext* context, TfLiteNode* node) { - TfLiteTensor* a0 = &context->tensors[node->inputs->data[0]]; - TfLiteTensor* a1 = &context->tensors[node->outputs->data[0]]; + TfLiteTensor* input = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; DynamicBuffer buf; - StringRef str_ref = GetString(a0, 0); + StringRef str_ref = GetString(input, 0); buf.AddString(str_ref); - buf.WriteToTensor(a1); + buf.WriteToTensor(output); return kTfLiteOk; }; @@ -778,13 +780,17 @@ TfLiteRegistration AddOpRegistration() { reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { // Set output size to input size - TfLiteTensor* tensor0 = &context->tensors[node->inputs->data[0]]; - TfLiteTensor* tensor1 = &context->tensors[node->inputs->data[1]]; - TfLiteTensor* tensor2 = &context->tensors[node->outputs->data[0]]; - TfLiteIntArray* newSize = TfLiteIntArrayCopy(tensor0->dims); - TfLiteIntArray* newSizeOther = TfLiteIntArrayCopy(tensor1->dims); - TF_LITE_ENSURE_EQ(context, newSize->size, newSizeOther->size); - TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, tensor2, newSize)); + TfLiteTensor* input1 = &context->tensors[node->inputs->data[0]]; + TfLiteTensor* input2 = &context->tensors[node->inputs->data[1]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + + TF_LITE_ENSURE_EQ(context, input1->dims->size, input2->dims->size); + for (int i = 0; i < input1->dims->size; ++i) { + TF_LITE_ENSURE_EQ(context, input1->dims->data[i], input2->dims->data[i]); + } + + TF_LITE_ENSURE_STATUS(context->ResizeTensor( + context, output, TfLiteIntArrayCopy(input1->dims))); return kTfLiteOk; }; @@ -818,6 +824,8 @@ class TestDelegate : public ::testing::Test { quant); interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, quant); + interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3}, + quant); TfLiteRegistration reg = AddOpRegistration(); interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); @@ -916,7 +924,6 @@ class TestDelegate : public ::testing::Test { }; TEST_F(TestDelegate, BasicDelegate) { - interpreter_->Invoke(); delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); @@ -944,7 +951,6 @@ TEST_F(TestDelegate, BasicDelegate) { } TEST_F(TestDelegate, ComplexDeligate) { - interpreter_->Invoke(); delegate_ = std::unique_ptr(new SimpleDelegate({1, 2})); interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); @@ -959,7 +965,6 @@ TEST_F(TestDelegate, ComplexDeligate) { } TEST_F(TestDelegate, SetBufferHandleToInput) { - interpreter_->Invoke(); delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); interpreter_->ModifyGraphWithDelegate(delegate); @@ -978,7 +983,6 @@ TEST_F(TestDelegate, SetBufferHandleToInput) { } TEST_F(TestDelegate, SetBufferHandleToOutput) { - interpreter_->Invoke(); delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); interpreter_->ModifyGraphWithDelegate(delegate); @@ -1002,7 +1006,7 @@ TEST_F(TestDelegate, SetInvalidHandleToTensor) { interpreter_->Invoke(); delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); - interpreter_->ModifyGraphWithDelegate(delegate); + interpreter_->ModifyGraphWithDelegate(delegate, true); SimpleDelegate another_simple_delegate({0, 1, 2}); @@ -1023,6 +1027,88 @@ TEST_F(TestDelegate, SetInvalidHandleToTensor) { EXPECT_EQ(tensor->buffer_handle, kTfLiteNullBufferHandle); } +TEST_F(TestDelegate, ResizeInputWithNonDynamicDelegateShouldFail) { + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {1, 2}), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(1, {1, 2}), kTfLiteOk); + ASSERT_EQ( + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()), + kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {1, 2}), kTfLiteError); +} + +class TestDelegateWithDynamicTensors : public ::testing::Test { + protected: + void SetUp() override { + interpreter_.reset(new Interpreter); + + interpreter_->AddTensors(2); + interpreter_->SetInputs({0}); + interpreter_->SetOutputs({1}); + TfLiteQuantizationParams quant; + interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quant); + TfLiteRegistration reg = DynamicCopyOpRegistration(); + interpreter_->AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, ®); + + delegate_.Prepare = [](TfLiteContext* context, + TfLiteDelegate* delegate) -> TfLiteStatus { + // In this test, the delegate replaces all the nodes if this function is + // called. + TfLiteIntArray* execution_plan; + TF_LITE_ENSURE_STATUS( + context->GetExecutionPlan(context, &execution_plan)); + context->ReplaceSubgraphsWithDelegateKernels( + context, DelegateRegistration(), execution_plan, delegate); + return kTfLiteOk; + }; + } + + static TfLiteRegistration DynamicCopyOpRegistration() { + TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; + + reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* output = &context->tensors[node->outputs->data[0]]; + SetTensorToDynamic(output); + return kTfLiteOk; + }; + + reg.invoke = [](TfLiteContext* context, TfLiteNode* node) { + // Not implemented since this isn't required in testing. + return kTfLiteOk; + }; + return reg; + } + + static TfLiteRegistration DelegateRegistration() { + TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; + return reg; + } + + std::unique_ptr interpreter_; + TfLiteDelegate delegate_; +}; + +TEST_F(TestDelegateWithDynamicTensors, DisallowDynamicTensors) { + interpreter_->ModifyGraphWithDelegate(&delegate_, false); + + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + // The interpreter should not call delegate's `Prepare` when dynamic tensors + // exist. So the node ID isn't changed. + ASSERT_EQ(interpreter_->execution_plan()[0], 0); +} + +TEST_F(TestDelegateWithDynamicTensors, AllowDynamicTensors) { + interpreter_->ModifyGraphWithDelegate(&delegate_, true); + + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + // The node should be replaced because dynamic tensors are allowed. Therefore + // only node ID in the execution plan is changed from 0 to 1. + ASSERT_EQ(interpreter_->execution_plan()[0], 1); +} + } // namespace } // namespace tflite -- GitLab From e69a3e1ac1cd0c31a8e8078982212cca1fbf988e Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Mon, 19 Mar 2018 14:48:23 -0700 Subject: [PATCH 195/960] Fix build breakage with downloadable clang and -fopenmp. By disabling openmp when building with clang. If we want to enable openmp with clang, we'll probably have to have libomp as an explicit dependency. This fixes a breakage found by OS CI: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu-clang/215/ PiperOrigin-RevId: 189644968 --- third_party/mkl_dnn/mkldnn.BUILD | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD index 58bb7a6a5d..752a0d8498 100644 --- a/third_party/mkl_dnn/mkldnn.BUILD +++ b/third_party/mkl_dnn/mkldnn.BUILD @@ -1,5 +1,13 @@ exports_files(["LICENSE"]) +config_setting( + name = "clang_linux_x86_64", + values = { + "cpu": "k8", + "define": "using_cuda_clang=true", + }, +) + cc_library( name = "mkl_dnn", srcs = glob([ @@ -9,8 +17,11 @@ cc_library( hdrs = glob(["include/*"]), copts = ["-fexceptions"] + select({ "@org_tensorflow//tensorflow:linux_x86_64": [ - "-fopenmp", + "-fopenmp", # only works with gcc ], + # TODO(ibiryukov): enable openmp with clang by including libomp as a + # dependency. + ":clang_linux_x86_64": [], "//conditions:default": [], }), includes = [ -- GitLab From 8dec85d39480ce19130bac56ebb54c00b53085ce Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 19 Mar 2018 14:57:09 -0700 Subject: [PATCH 196/960] Maintain an updateable map of devices in the eager context. PiperOrigin-RevId: 189646358 --- tensorflow/c/eager/c_api.cc | 9 +++++++-- tensorflow/c/eager/c_api_internal.h | 11 ++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 4e5703ffe0..a23015c99e 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -346,8 +346,13 @@ void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { tensorflow::Device* d = nullptr; if (device_name != nullptr && strlen(device_name) > 0) { - status->status = op->ctx->device_manager->LookupDevice(device_name, &d); - if (!status->status.ok()) return; + auto it = op->ctx->devices_map.find(device_name); + if (it == op->ctx->devices_map.end()) { + status->status = + tensorflow::errors::InvalidArgument(device_name, " unknown device."); + return; + } + d = it->second; } op->device = d; } diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 1edbe81992..cc5ed48b48 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" @@ -70,6 +71,10 @@ struct TFE_Context { opts.session_options.options.config.log_device_placement()), async_default(opts.async) { if (async_default) executor.EnableAsync(); + + for (auto* device : devices) { + devices_map[tensorflow::StringPiece(device->name())] = device; + } } const bool soft_placement; @@ -83,7 +88,11 @@ struct TFE_Context { std::unique_ptr device_manager; // Devices owned by device_manager - const std::vector devices; + std::vector devices; + // All devices are not owned. + tensorflow::gtl::FlatMap + devices_map; tensorflow::Rendezvous* const rendezvous; tensorflow::mutex functions_mu; -- GitLab From dee1bc350ac0826822161f211f7fa8a1e1ae62f0 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Mon, 19 Mar 2018 15:06:40 -0700 Subject: [PATCH 197/960] Allowing the FunctionBufferingResource to be passed in thread_pool_size=0 in which case we wouldn't pass in a runner to the FLR::Run call and rely on the underlying device threadpool instead. PiperOrigin-RevId: 189648051 --- .../data/kernels/prefetching_kernels.cc | 24 ++++++++++++------- .../data/python/ops/prefetching_ops.py | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index 1baac3ea52..2f986f2bb1 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -49,16 +49,18 @@ class FunctionBufferingResource : public ResourceBase { source_device_(source_device), target_device_(target_device), func_args_(func_args), - thread_pool_(new thread::ThreadPool(Env::Default(), ThreadOptions(), - "buffer_resource", thread_pool_size, - false /* low_latency_hint */)), handle_(kInvalidHandle), is_buffering_(false), end_of_sequence_(false), cancelled_(false) { - runner_ = [this](std::function c) { - thread_pool_->Schedule(std::move(c)); - }; + if (thread_pool_size > 0) { + thread_pool_ = new thread::ThreadPool(Env::Default(), ThreadOptions(), + "buffer_resource", thread_pool_size, + false /* low_latency_hint */); + runner_ = [this](std::function c) { + thread_pool_->Schedule(std::move(c)); + }; + } } ~FunctionBufferingResource() override { @@ -69,7 +71,9 @@ class FunctionBufferingResource : public ResourceBase { cond_var_.wait(l); } } - delete thread_pool_; + if (thread_pool_ != nullptr) { + delete thread_pool_; + } } string DebugString() override { @@ -175,7 +179,9 @@ class FunctionBufferingResource : public ResourceBase { FunctionLibraryRuntime::Options opts; // Copied from CapturedFunction::generate_step_id(); opts.step_id = -std::abs(static_cast(random::New64())); - opts.runner = &runner_; + if (runner_ != nullptr) { + opts.runner = &runner_; + } opts.source_device = source_device_; AllocatorAttributes arg_alloc_attr; arg_alloc_attr.set_on_host(true); @@ -231,7 +237,7 @@ class FunctionBufferingResource : public ResourceBase { const string source_device_; const string target_device_; const std::vector func_args_; - thread::ThreadPool* thread_pool_; + thread::ThreadPool* thread_pool_ = nullptr; FunctionLibraryRuntime::Handle handle_ GUARDED_BY(mu_); std::deque buffer_ GUARDED_BY(mu_); std::deque requests_ GUARDED_BY(mu_); diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 7059b358f3..b16f12c4ee 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -27,7 +27,7 @@ def function_buffering_resource(string_arg, target_device, f, buffer_size, - thread_pool_size=1, + thread_pool_size=0, container="", shared_name=None, name=None): -- GitLab From 12baea6c9a2ccb15f24ca79f18bcdd639b149592 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 15:09:23 -0700 Subject: [PATCH 198/960] Use fully-qualified function names and avoid the need to replace attributes. PiperOrigin-RevId: 189648496 --- .../py2tf/converters/logical_expressions.py | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions.py b/tensorflow/contrib/py2tf/converters/logical_expressions.py index 10192e6a03..e0abf74ebc 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions.py +++ b/tensorflow/contrib/py2tf/converters/logical_expressions.py @@ -24,6 +24,7 @@ from __future__ import print_function import gast from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import templates from tensorflow.contrib.py2tf.pyct import transformer @@ -44,17 +45,18 @@ class LogicalExpressionTransformer(transformer.Base): def __init__(self, context): super(LogicalExpressionTransformer, self).__init__(context) # TODO(mdan): Look into replacing with bitwise operators instead. + # TODO(mdan): Skip replacing if the function is trivial. self.op_mapping = { - gast.And: 'logical_and', - gast.Eq: 'equal', - gast.Gt: 'greater', - gast.GtE: 'greater_equal', - gast.Lt: 'less', - gast.LtE: 'less_equal', - gast.Not: 'logical_not', - gast.NotEq: 'not_equal', - gast.Or: 'logical_or', - gast.USub: 'negative', + gast.And: 'tf.logical_and', + gast.Eq: 'tf.equal', + gast.Gt: 'tf.greater', + gast.GtE: 'tf.greater_equal', + gast.Lt: 'tf.less', + gast.LtE: 'tf.less_equal', + gast.Not: 'tf.logical_not', + gast.NotEq: 'tf.not_equal', + gast.Or: 'tf.logical_or', + gast.USub: 'tf.negative', gast.Is: 'py2tf_utils.dynamic_is', gast.IsNot: 'py2tf_utils.dynamic_is_not' } @@ -70,27 +72,19 @@ class LogicalExpressionTransformer(transformer.Base): '"a.x or b"; for a workaround, assign the expression to a local ' 'variable and use that instead, for example "tmp = a.x", "tmp or b"') - def _matching_tf_op(self, operator): + def _matching_func(self, operator): op_type = type(operator) mapped_op = self.op_mapping.get(op_type) if not mapped_op: raise NotImplementedError('operator %s is not yet supported' % op_type) return mapped_op - def _inline_tf_op(self, op_name, args): - if 'py2tf_utils' in op_name: - # TODO(alexbw): explicitly spelling out the attribute function name - # until fix for issue highlighted in cl/188931581 lands. - template = """ - py2tf_utils.op_name(args) + def _as_function(self, func_name, args): + template = """ + func_name(args) """ - op_name = op_name.replace('py2tf_utils.', '') - else: - template = """ - tf.op_name(args) - """ replacement = templates.replace_as_expression( - template, op_name=op_name, args=args) + template, func_name=parser.parse_expression(func_name), args=args) anno.setanno(replacement, SAFE_BOOLEAN_OPERAND, True) return replacement @@ -104,14 +98,14 @@ class LogicalExpressionTransformer(transformer.Base): # a < b < c -> a < b and b < c while ops_and_comps: op, right = ops_and_comps.pop(0) - binary_comparison = self._inline_tf_op(self._matching_tf_op(op), - (left, right)) + binary_comparison = self._as_function( + self._matching_func(op), (left, right)) if isinstance(left, gast.Name) and isinstance(right, gast.Name): anno.setanno(binary_comparison, SAFE_BOOLEAN_OPERAND, True) if op_tree: self._expect_simple_symbol(right) - op_tree = self._inline_tf_op('logical_and', - (binary_comparison, op_tree)) + op_tree = self._as_function('tf.logical_and', + (binary_comparison, op_tree)) else: op_tree = binary_comparison left = right @@ -120,7 +114,7 @@ class LogicalExpressionTransformer(transformer.Base): def visit_UnaryOp(self, node): node = self.generic_visit(node) - return self._inline_tf_op(self._matching_tf_op(node.op), node.operand) + return self._as_function(self._matching_func(node.op), node.operand) def visit_BoolOp(self, node): node = self.generic_visit(node) @@ -130,7 +124,7 @@ class LogicalExpressionTransformer(transformer.Base): while node_values: left = node_values.pop() self._expect_simple_symbol(left) - right = self._inline_tf_op(self._matching_tf_op(node.op), (left, right)) + right = self._as_function(self._matching_func(node.op), (left, right)) return right -- GitLab From 2d6176e66b48956946b34d595c1dacedd2703fff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 15:13:53 -0700 Subject: [PATCH 199/960] Run flatbuffer verifier before reading a TFLITE file into toco. PiperOrigin-RevId: 189649236 --- tensorflow/contrib/lite/toco/tflite/import.cc | 11 ++++ .../contrib/lite/toco/tflite/import_test.cc | 62 ++++++++++++++----- 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/lite/toco/tflite/import.cc b/tensorflow/contrib/lite/toco/tflite/import.cc index e16784fd21..867395e881 100644 --- a/tensorflow/contrib/lite/toco/tflite/import.cc +++ b/tensorflow/contrib/lite/toco/tflite/import.cc @@ -162,8 +162,19 @@ void ImportIOTensors(const ::tflite::Model& input_model, } } +namespace { +bool Verify(const void* buf, size_t len) { + ::flatbuffers::Verifier verifier(static_cast(buf), len); + return ::tflite::VerifyModelBuffer(verifier); +} +} // namespace + std::unique_ptr Import(const ModelFlags& model_flags, const string& input_file_contents) { + if (!Verify(input_file_contents.data(), input_file_contents.size())) { + LOG(FATAL) << "Invalid flatbuffer."; + } + const ::tflite::Model* input_model = ::tflite::GetModel(input_file_contents.data()); diff --git a/tensorflow/contrib/lite/toco/tflite/import_test.cc b/tensorflow/contrib/lite/toco/tflite/import_test.cc index f25b170876..937a291cf7 100644 --- a/tensorflow/contrib/lite/toco/tflite/import_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/import_test.cc @@ -66,15 +66,43 @@ class ImportTest : public ::testing::Test { } Offset>> BuildOpCodes() { - auto c1 = - ::tflite::CreateOperatorCode(builder_, ::tflite::BuiltinOperator_CUSTOM, - builder_.CreateString("custom_op_one")); + auto c1 = ::tflite::CreateOperatorCode( + builder_, ::tflite::BuiltinOperator_MAX_POOL_2D, 0); auto c2 = ::tflite::CreateOperatorCode( builder_, ::tflite::BuiltinOperator_CONV_2D, 0); return builder_.CreateVector( std::vector>({c1, c2})); } + Offset>> BuildOperators() { + auto is = builder_.CreateVector({0}); + auto os = builder_.CreateVector({1}); + auto op = ::tflite::CreateOperator( + builder_, 0, is, os, ::tflite::BuiltinOptions_Conv2DOptions, + ::tflite::CreateConv2DOptions(builder_, ::tflite::Padding_VALID, 1, 1, + ::tflite::ActivationFunctionType_NONE) + .Union(), + /*custom_options=*/0, ::tflite::CustomOptionsFormat_FLEXBUFFERS); + + return builder_.CreateVector(std::vector>({op})); + } + + Offset>> BuildSubGraphs( + Offset>> tensors, + Offset>> operators, + int num_sub_graphs = 1) { + std::vector inputs = {0}; + std::vector outputs = {1}; + std::vector> v; + for (int i = 0; i < num_sub_graphs; ++i) { + v.push_back(::tflite::CreateSubGraph( + builder_, tensors, builder_.CreateVector(inputs), + builder_.CreateVector(outputs), operators, + builder_.CreateString("subgraph"))); + } + return builder_.CreateVector(v); + } + // This is a very simplistic model. We are not interested in testing all the // details here, since tf.mini's testing framework will be exercising all the // conversions multiple times, and the conversion of operators is tested by @@ -83,14 +111,13 @@ class ImportTest : public ::testing::Test { auto buffers = BuildBuffers(); auto tensors = BuildTensors(); auto opcodes = BuildOpCodes(); - - auto subgraph = ::tflite::CreateSubGraph(builder_, tensors, 0, 0, 0); - std::vector> subgraph_vector( - {subgraph}); - auto subgraphs = builder_.CreateVector(subgraph_vector); + auto operators = BuildOperators(); + auto subgraphs = BuildSubGraphs(tensors, operators); auto s = builder_.CreateString(""); - builder_.Finish(::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, - opcodes, subgraphs, s, buffers)); + + ::tflite::FinishModelBuffer( + builder_, ::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, + opcodes, subgraphs, s, buffers)); input_model_ = ::tflite::GetModel(builder_.GetBufferPointer()); } @@ -99,7 +126,6 @@ class ImportTest : public ::testing::Test { builder_.GetSize()); } flatbuffers::FlatBufferBuilder builder_; - // const uint8_t* buffer_ = nullptr; const ::tflite::Model* input_model_ = nullptr; }; @@ -116,7 +142,7 @@ TEST_F(ImportTest, LoadOperatorsTable) { details::OperatorsTable operators; details::LoadOperatorsTable(*input_model_, &operators); - EXPECT_THAT(operators, ElementsAre("custom_op_one", "CONV_2D")); + EXPECT_THAT(operators, ElementsAre("MAX_POOL_2D", "CONV_2D")); } TEST_F(ImportTest, Tensors) { @@ -143,13 +169,17 @@ TEST_F(ImportTest, Tensors) { EXPECT_EQ(100, q->zero_point); } -TEST_F(ImportTest, NoSubGraphs) { +TEST_F(ImportTest, MultipleSubGraphs) { auto buffers = BuildBuffers(); + auto tensors = BuildTensors(); auto opcodes = BuildOpCodes(); - auto subgraphs = 0; // no subgraphs in this model + auto operators = BuildOperators(); + auto subgraphs = BuildSubGraphs(tensors, operators, 2); auto comment = builder_.CreateString(""); - builder_.Finish(::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, - opcodes, subgraphs, comment, buffers)); + ::tflite::FinishModelBuffer( + builder_, ::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, opcodes, + subgraphs, comment, buffers)); + input_model_ = ::tflite::GetModel(builder_.GetBufferPointer()); EXPECT_DEATH(Import(ModelFlags(), InputModelAsString()), -- GitLab From 7cabd979a46febafdb90a83865cd743233d4449f Mon Sep 17 00:00:00 2001 From: Alan Du Date: Mon, 19 Mar 2018 18:42:08 -0400 Subject: [PATCH 200/960] Don't use NCHW or NHCW in tf.layers.conv1d (#17455) * Don't use NCHW or NHCW for conv1d Fixes deprecation warning when using tf.layers.conv1d --- tensorflow/python/ops/nn_ops.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 9b2aaa4c1c..a74de39eab 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -150,14 +150,12 @@ class _NonAtrousConvolution(object): conv_dims)) if conv_dims == 1: # conv1d uses the 2-d data format names - if data_format is None or data_format == "NWC": - data_format_2d = "NHWC" - elif data_format == "NCW": - data_format_2d = "NCHW" - else: + if data_format is None: + data_format = "NWC" + elif data_format not in {"NCW", "NWC", "NCHW", "NHWC"}: raise ValueError("data_format must be \"NWC\" or \"NCW\".") self.strides = strides[0] - self.data_format = data_format_2d + self.data_format = data_format self.conv_op = self._conv1d elif conv_dims == 2: if data_format is None or data_format == "NHWC": -- GitLab From 41781e61d04763dbc1ebca77292410b8136c2adb Mon Sep 17 00:00:00 2001 From: Xiaoqiang Zheng Date: Mon, 19 Mar 2018 15:40:37 -0700 Subject: [PATCH 201/960] A few changes to improve the real data performance: * Turn off the force_gpu_compatible by default. * Move the cast operator within the processing operator. * Have the map_and_batch operator produce gpu_compatible output. * Add an option to produce fp16 tensors for network transfer by default. On DGX-1 V100, with resnet50, I got 5050 images/sec on real data, 5395 images/sec on synthetic data. With trivial model, I got 13000+ images/sec on real data. PiperOrigin-RevId: 189653575 --- tensorflow/core/kernels/data/map_and_batch_dataset_op.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 9ce263732f..e22200f758 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -183,7 +183,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { TensorShape component_shape( batch_results_[current_batch_index_].output[i].shape()); component_shape.set_dim(0, num_elements); - Tensor component(ctx->allocator({}), output[i].dtype(), + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + Tensor component(ctx->allocator(attr), output[i].dtype(), component_shape); TF_RETURN_IF_ERROR( CopyPartialBatch(&component, output[i], num_elements)); @@ -255,7 +257,9 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { for (size_t i = 0; i < num_components; ++i) { TensorShape component_shape({dataset()->batch_size_}); component_shape.AppendShape(return_values[i].shape()); - Tensor component(ctx->allocator({}), return_values[i].dtype(), + AllocatorAttributes attr; + attr.set_gpu_compatible(true); + Tensor component(ctx->allocator(attr), return_values[i].dtype(), component_shape); batch_result->output.emplace_back(std::move(component)); } -- GitLab From 448d65c673980d167fbd97206334bec641d118e6 Mon Sep 17 00:00:00 2001 From: Zhixian Yan Date: Mon, 19 Mar 2018 15:50:02 -0700 Subject: [PATCH 202/960] Disable lstm test in generated_example due to state non-definitive init. PiperOrigin-RevId: 189654943 --- tensorflow/contrib/lite/testing/BUILD | 1 - .../contrib/lite/testing/generated_examples_zip_test.cc | 4 ---- 2 files changed, 5 deletions(-) diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 631601656d..f1b18ad30f 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -35,7 +35,6 @@ gen_zipped_test_files( "l2norm.zip", "local_response_norm.zip", "log_softmax.zip", - "lstm.zip", "max_pool.zip", "mean.zip", "mul.zip", diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 88c5aaa099..5e76e7c510 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -88,9 +88,6 @@ std::map kBrokenTests = { // Transpose only supports 1D-4D input tensors. {R"(^\/transpose.*input_shape=\[.,.,.,.,.\])", "71545879"}, - - // Lstm kernel gets different results on tsan, asan, msan. - {R"(^\/lstmdtype=tf.float32.*)", "73830845"}, }; // Allows test data to be unzipped into a temporary directory and makes @@ -250,7 +247,6 @@ INSTANTIATE_TESTS(l2_pool) INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(local_response_norm) INSTANTIATE_TESTS(log_softmax) -INSTANTIATE_TESTS(lstm) INSTANTIATE_TESTS(max_pool) INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(mul) -- GitLab From 8fded7872fe0921e0f90fac1891cda0c46a26855 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 19 Mar 2018 16:35:29 -0700 Subject: [PATCH 203/960] [update TensorRT converter] (#17772) * [update TensorRT converter] fixed FusedBatchNorm to support broadcast; remove fp16 conversion for type int const add Snapshot in conversion (treated as identity) * [TensorRT converter batchnorm code cleaning] * TRT batchnorm code cleaning --- .../contrib/tensorrt/convert/convert_graph.cc | 1 + .../contrib/tensorrt/convert/convert_nodes.cc | 245 +++++++++--------- 2 files changed, 118 insertions(+), 128 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index eea8c8efa2..90447ee666 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -55,6 +55,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // Split it into a registration for each kernel. static const std::set candidate_ops = { "Identity", + "Snapshot", "Const", "Conv2D", "MaxPool", diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 4c00630cfe..7f4b57f9f4 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -346,11 +346,10 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights, break; } case tensorflow::DataType::DT_HALF: { - Reorder2( - {k, c}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); + Reorder2({k, c}, static_cast(iweights.GetValues()), + istrides, static_cast( + const_cast(oweights->GetValues())), + ostrides); break; } default: @@ -998,9 +997,7 @@ enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; tensorflow::Status ConvertConv2DHelper( Converter& ctx, const tensorflow::NodeDef& node_def, const std::vector& inputs, - std::vector* outputs, - int group // group ==0 specifies depthwise conv -) { + std::vector* outputs, int group) { const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); @@ -1134,9 +1131,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1447,62 +1444,23 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - TTypes::Flat half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - switch (dtype) { - case (tensorflow::DT_INT32): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT16): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_UINT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - default: - return tensorflow::errors::InvalidArgument( - "Datatype " + tensorflow::DataTypeString(dtype) + - " for FP16 conversion"); - break; - }; - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); - len_data = std::max(len_data, len_tensor); - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val() - .end()); // make a local copy first to flatten - // doesn't have to be contigous - memcpy(dst, tensor_data.data(), len_tensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } + // we should not have converted //if (ctx.isFP16()) { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; + size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); + len_data = std::max(len_data, len_tensor); + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val().end()); // make a local copy first to flatten + // doesn't have to be contigous + memcpy(dst, tensor_data.data(), len_tensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } else if (!weights_tensor.tensor_content().empty()) { + // obsolete method. + // After optimization path, we do not see weights in this format. + // fp16 conversion technically should be needed here. VLOG(2) << "TENSOR!!!" << node_def.name(); const auto& content = weights_tensor.tensor_content(); @@ -1784,8 +1742,6 @@ tensorflow::Status ConvertConcat(Converter& ctx, TRT_ShapedWeights axis = inputs.at(input_size).weights(); TFAttrs attrs(node_def); - // auto attr_size = attrs.at("N")->i(); - // auto data_type = attrs.get("T"); auto index_type = attrs.get("Tidx"); // TODO(jie): handle data type @@ -1875,71 +1831,103 @@ tensorflow::Status ConvertFusedBatchNorm( "only is_training=false is supported, at " + node_def.name()); } nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - TRT_ShapedWeights scale_weights = inputs.at(1).weights(); - TRT_ShapedWeights offset_weights = inputs.at(2).weights(); - TRT_ShapedWeights mean_weights = inputs.at(3).weights(); - TRT_ShapedWeights variance_weights = inputs.at(4).weights(); - TRT_ShapedWeights dummy_power_weights(scale_weights.type_); - TRT_ShapedWeights combined_scale_weights = - ctx.get_temp_weights_like(scale_weights); - TRT_ShapedWeights combined_offset_weights = - ctx.get_temp_weights_like(offset_weights); - size_t nweight = scale_weights.count(); - if ((scale_weights.type_ == offset_weights.type_) && - (mean_weights.type_ == variance_weights.type_) && - (scale_weights.type_ == variance_weights.type_)) { - if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && - (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { + + // Check parameter types + auto parameter_type = inputs.at(1).weights().type_; + if ((parameter_type != tensorflow::DataType::DT_FLOAT) && + (parameter_type != tensorflow::DataType::DT_HALF)) { + return tensorflow::errors::Unimplemented( + "only float32 or float16 weight data type is supported, for node " + + node_def.name() + " got " + tensorflow::DataTypeString(parameter_type)); + } + for (int i = 1; i < 5; i++) { + if (inputs.at(i).weights().type_ != parameter_type) { return tensorflow::errors::Unimplemented( - "only float32 or float16 weight data type is supported, for node " + - node_def.name() + " got " + - tensorflow::DataTypeString(scale_weights.type_)); + "Inconsistent parameter type for batchnormis not supported, at: " + + node_def.name()); } - if (scale_weights.type_ == tensorflow::DT_FLOAT) { - for (size_t i = 0; i < nweight; ++i) { - float scale = (static_cast(scale_weights.GetValues()))[i]; - float offset = - (static_cast(offset_weights.GetValues()))[i]; - float mean = (static_cast(mean_weights.GetValues()))[i]; - float variance = - (static_cast(variance_weights.GetValues()))[i]; - float& combined_scale_ref = const_cast( - static_cast(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast( - static_cast(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); - combined_offset_ref = offset - mean * combined_scale_ref; - } - } else { - const Eigen::half* scale_vals = - (static_cast(scale_weights.GetValues())); - const Eigen::half* off_vals = - (static_cast(offset_weights.GetValues())); - const Eigen::half* mean_vals = - (static_cast(mean_weights.GetValues())); - const Eigen::half* variance_vals = - (static_cast(variance_weights.GetValues())); - Eigen::half* comb_scale_vals = const_cast( - static_cast(combined_scale_weights.GetValues())); - Eigen::half* comb_off_vals = const_cast( - static_cast(combined_offset_weights.GetValues())); - for (size_t i = 0; i < nweight; ++i) { - float scale(scale_vals[i]); - float offset(off_vals[i]); - float mean(mean_vals[i]); - float variance(variance_vals[i]); - float combined_scale_ref = scale / sqrtf(variance + epsilon); - comb_scale_vals[i] = Eigen::half(combined_scale_ref); - float combined_offset_ref = offset - mean * combined_scale_ref; - comb_off_vals[i] = Eigen::half(combined_offset_ref); + } + + TRT_ShapedWeights dummy_power_weights(parameter_type); + size_t nweight = 0; + for (int i = 1; i < 5; i++) { + nweight = std::max(nweight, (size_t)inputs.at(i).weights().count()); + } + TRT_ShapedWeights* ptr_shape_weights = nullptr; + for (int i = 1; i < 5; i++) { + if (inputs.at(i).weights().count() == nweight) { + ptr_shape_weights = + const_cast(&(inputs.at(i).weights())); + } else if (inputs.at(i).weights().count() != 1) { + return tensorflow::errors::InvalidArgument( + "Inconsistent batchnorm parameter count, at: " + node_def.name()); + } + } + // We could technically have two weights with different shape. + // that requires two addScale op, arguably less performant + TRT_ShapedWeights combined_scale_weights = + ctx.get_temp_weights_like(*ptr_shape_weights); + TRT_ShapedWeights combined_offset_weights = + ctx.get_temp_weights_like(*ptr_shape_weights); + + const Eigen::half* cast_vals_array[4]; + const float* vals_array[4]; + for (int j = 0; j < 4; j++) { + cast_vals_array[j] = + static_cast(inputs.at(j + 1).weights().GetValues()); + vals_array[j] = + static_cast(inputs.at(j + 1).weights().GetValues()); + } + Eigen::half* cast_combined_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + Eigen::half* cast_combined_offset_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + float* combined_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + float* combined_offset_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + + for (size_t i = 0; i < nweight; ++i) { + float batchnorm_data[4]; + for (int j = 0; j < 4; j++) { + if (inputs.at(j + 1).weights().count() != 1) { + if (parameter_type == tensorflow::DT_FLOAT) { + batchnorm_data[j] = vals_array[j][i]; + } else if (parameter_type == tensorflow::DT_HALF) { + batchnorm_data[j] = + Eigen::half_impl::half_to_float(cast_vals_array[j][i]); + } + } else { + if (parameter_type == tensorflow::DT_FLOAT) { + batchnorm_data[j] = vals_array[j][0]; + } else if (parameter_type == tensorflow::DT_HALF) { + batchnorm_data[j] = + Eigen::half_impl::half_to_float(cast_vals_array[j][0]); + } } } + float scale = batchnorm_data[0]; + float offset = batchnorm_data[1]; + float mean = batchnorm_data[2]; + float variance = batchnorm_data[3]; + float combined_scale_val = scale / sqrtf(variance + epsilon); + float combined_offset_val = offset - mean * combined_scale_val; + if (parameter_type == tensorflow::DT_FLOAT) { + combined_scale_vals[i] = combined_scale_val; + combined_offset_vals[i] = combined_offset_val; + } else if (parameter_type == tensorflow::DT_HALF) { + cast_combined_scale_vals[i] = Eigen::half(combined_scale_val); + cast_combined_offset_vals[i] = Eigen::half(combined_offset_val); + } } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - combined_offset_weights.GetWeightsForTRT(), - combined_scale_weights.GetWeightsForTRT(), - dummy_power_weights.GetWeightsForTRT()); + + nvinfer1::ScaleMode mode = nweight == 1 ? nvinfer1::ScaleMode::kUNIFORM + : nvinfer1::ScaleMode::kCHANNEL; + nvinfer1::IScaleLayer* layer = + ctx.network()->addScale(*const_cast(tensor), mode, + combined_offset_weights.GetWeightsForTRT(), + combined_scale_weights.GetWeightsForTRT(), + dummy_power_weights.GetWeightsForTRT()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -2050,6 +2038,7 @@ void Converter::register_op_converters() { op_registry_["Const"] = ConvertConst; // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed + op_registry_["Snapshot"] = ConvertIdentity; // Snapshot should be removed // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; -- GitLab From b1208ba0197547e75c3860b385d036e3909f8ea9 Mon Sep 17 00:00:00 2001 From: Lukasz Kaiser Date: Mon, 19 Mar 2018 16:56:44 -0700 Subject: [PATCH 204/960] Automated g4 rollback of changelist 188440916 PiperOrigin-RevId: 189664854 --- tensorflow/core/kernels/pad_op.cc | 124 ++---------------- tensorflow/python/kernel_tests/pad_op_test.py | 25 ---- 2 files changed, 11 insertions(+), 138 deletions(-) diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index 04c71e384b..a7238ef67b 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -104,144 +104,42 @@ class PadOp : public OpKernel { return; } - TensorShape collapsed_input_shape; - TensorShape collapsed_output_shape; - Tensor collapsed_paddings; - if (fixed_dims > 1 && - CollapseAdjacentNonPaddedDimensions( - in0.shape(), in1, output_shape, &collapsed_input_shape, - &collapsed_paddings, &collapsed_output_shape)) { - Tensor collapsed_input; - CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape)); - Tensor collapsed_output; - OP_REQUIRES_OK(context, context->allocate_temp(collapsed_input.dtype(), - collapsed_output_shape, - &collapsed_output)); - const Tensor& collapsed_paddings_ref = collapsed_paddings; - typename TTypes::ConstMatrix collapsed_paddings_matrix = - collapsed_paddings_ref.matrix(); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); - OperateWithVariableRank(context, collapsed_input_shape.dims(), - collapsed_input, collapsed_paddings_matrix, - pad_value, &collapsed_output); - - Tensor output; - CHECK(output.CopyFrom(collapsed_output, output_shape)); - context->set_output(0, output); - } else { - Tensor* output = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, output_shape, &output)); - OperateWithVariableRank(context, fixed_dims, in0, paddings, pad_value, - output); - } - } - - private: - // Collapses adjacent dimensions that are not padded to one dimension for - // speed. Returns true if any two dimensions are collapsed. For example, - // - // Pad(input_shape=[8, 28, 28, 3], - // paddings=[[0, 0], [0, 0], [0, 0], [0, 1]] - // is equivalent to - // Pad(input_shape=[6272, 3], - // paddings=[[0, 0], [0, 1]]) - // - // input_shape: the original input shape. - // paddings_as_tensor: the original paddings. - // output_shape: the original output shape. - // collapsed_input_shape: the input shape after collapsing. - // collapsed_paddings_as_tensor: the paddings after collapsing. - // collapsed_output_shape: the output shape after collapsing. - static bool CollapseAdjacentNonPaddedDimensions( - const TensorShape& input_shape, const Tensor& paddings_as_tensor, - const TensorShape& output_shape, TensorShape* collapsed_input_shape, - Tensor* collapsed_paddings_as_tensor, - TensorShape* collapsed_output_shape) { - bool collapsed = false; - typename TTypes::ConstMatrix paddings = - paddings_as_tensor.matrix(); - std::vector> collapsed_paddings; - int i = 0; - while (i < paddings.dimension(0)) { - if (paddings(i, 0) != 0 || paddings(i, 1) != 0) { - // If padded, copy the original dimension over. - collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), - input_shape.dim_size(i)); - collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), - output_shape.dim_size(i)); - collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)}); - ++i; - } else { - // If not padded, find the next dimension that is padded and collapse - // all dimensions in between to one dimension. - int64 collapsed_input_dim_size = input_shape.dim_size(i); - int64 collapsed_output_dim_size = output_shape.dim_size(i); - ++i; - while (i < paddings.dimension(0) && paddings(i, 0) == 0 && - paddings(i, 1) == 0) { - collapsed = true; - collapsed_input_dim_size *= input_shape.dim_size(i); - collapsed_output_dim_size *= output_shape.dim_size(i); - ++i; - } - collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), - collapsed_input_dim_size); - collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), - collapsed_output_dim_size); - collapsed_paddings.push_back({0, 0}); - } - } - - // Copy collapsed_paddings to collapsed_paddings_as_tensor. - *collapsed_paddings_as_tensor = - Tensor(paddings_as_tensor.dtype(), - TensorShape({static_cast(collapsed_paddings.size()), 2})); - auto collapsed_paddings_as_matrix = - collapsed_paddings_as_tensor->matrix(); - for (size_t i = 0; i < collapsed_paddings.size(); ++i) { - collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first; - collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second; - } - return collapsed; - } - - void OperateWithVariableRank(OpKernelContext* context, int fixed_dims, - const Tensor& input, - typename TTypes::ConstMatrix paddings, - T pad_value, Tensor* output) { // Invoke the dims-specific implementation. switch (fixed_dims) { case 0: - Operate<0>(context, input.tensor(), paddings, pad_value, output); + Operate<0>(context, in0.tensor(), paddings, pad_value, output); break; case 1: // TODO(irving): Once Pad doesn't need a scalar special case, // change flat to tensor. That is, once !allow_legacy_scalars(). - Operate<1>(context, input.flat(), paddings, pad_value, output); + Operate<1>(context, in0.flat(), paddings, pad_value, output); break; case 2: - Operate<2>(context, input.tensor(), paddings, pad_value, output); + Operate<2>(context, in0.tensor(), paddings, pad_value, output); break; case 3: - Operate<3>(context, input.tensor(), paddings, pad_value, output); + Operate<3>(context, in0.tensor(), paddings, pad_value, output); break; case 4: - Operate<4>(context, input.tensor(), paddings, pad_value, output); + Operate<4>(context, in0.tensor(), paddings, pad_value, output); break; case 5: - Operate<5>(context, input.tensor(), paddings, pad_value, output); + Operate<5>(context, in0.tensor(), paddings, pad_value, output); break; case 6: - Operate<6>(context, input.tensor(), paddings, pad_value, output); + Operate<6>(context, in0.tensor(), paddings, pad_value, output); break; default: OP_REQUIRES(context, false, errors::InvalidArgument("Only ranks up to 6 supported: ", - input.shape().DebugString())); + in0.shape().DebugString())); } } + private: template void Operate(OpKernelContext* context, typename TTypes::ConstTensor input, diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py index 14632ec29a..9ed5947aae 100644 --- a/tensorflow/python/kernel_tests/pad_op_test.py +++ b/tensorflow/python/kernel_tests/pad_op_test.py @@ -336,30 +336,5 @@ class PadOpTest(test.TestCase): self.assertAllEqual(inp, out) self.assertShapeEqual(inp, tf_val) - def testCollapseAdjacentNonPaddedDimensions(self): - # pyformat: disable - for paddings_value in [[[0, 0], [0, 0], [0, 0], [0, 1]], - [[0, 0], [2, 3], [0, 0], [0, 0]], - [[0, 0], [0, 0], [0, 0], [0, 0]]]: - # pyformat: enable - inp = constant_op.constant(1.0, shape=[8, 28, 28, 3]) - paddings = constant_op.constant(paddings_value, dtype=dtypes.int32) - padded = array_ops.pad(inp, paddings) - middle = array_ops.slice(padded, [row[0] for row in paddings_value], - [dim.value for dim in inp.shape.dims]) - left = array_ops.slice(padded, [0, 0, 0, 0], - [row[0] for row in paddings_value]) - right = array_ops.slice( - padded, - [paddings_value[i][0] + inp.shape.dims[i].value for i in range(4)], - [-1, -1, -1, -1]) - with self.test_session(use_gpu=True): - self.assertAllEqual(inp.eval(), middle.eval()) - self.assertAllEqual( - np.zeros([row[0] for row in paddings_value]), left.eval()) - self.assertAllEqual( - np.zeros([row[1] for row in paddings_value]), right.eval()) - - if __name__ == "__main__": test.main() -- GitLab From 4f5b7b42e2f8cb6b6e6730b6ada0edbee67dbfe3 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Mon, 19 Mar 2018 17:04:28 -0700 Subject: [PATCH 205/960] Fix test failure PiperOrigin-RevId: 189666053 --- tensorflow/python/layers/normalization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 8b79a92cc4..11daf01670 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -364,8 +364,9 @@ class BatchNormalization(base.Layer): [variable, value, momentum]) as scope: with ops.colocate_with(variable): decay = ops.convert_to_tensor(1.0 - momentum, name='decay') - update_delta = math_ops.multiply( - math_ops.subtract(variable.read_value(), value), decay) + if decay.dtype != variable.dtype.base_dtype: + decay = math_ops.cast(decay, variable.dtype.base_dtype) + update_delta = (variable - value) * decay return state_ops.assign_sub(variable, update_delta, name=scope) def _fused_batch_norm(self, inputs, training): -- GitLab From 2bd7f5e190db4ad2d111f824163855c3dfcb9566 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 19 Mar 2018 17:13:11 -0700 Subject: [PATCH 206/960] [tf.data] Combine implementations of FlatMapDataset, InterleaveDataset and ParallelInterleaveDataset. PiperOrigin-RevId: 189667086 --- tensorflow/python/data/ops/dataset_ops.py | 54 ++------------------ tensorflow/python/data/ops/readers.py | 60 +++-------------------- 2 files changed, 10 insertions(+), 104 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index a0c5a43a45..c0a6283be4 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1950,47 +1950,13 @@ class FlatMapDataset(Dataset): return self._output_types -class InterleaveDataset(Dataset): +class InterleaveDataset(FlatMapDataset): """A `Dataset` that maps a function over its input and interleaves the result. """ def __init__(self, input_dataset, map_func, cycle_length, block_length): """See `Dataset.interleave()` for details.""" - super(InterleaveDataset, self).__init__() - self._input_dataset = input_dataset - - @function.Defun(*nest.flatten( - sparse.as_dense_types(input_dataset.output_types, - input_dataset.output_classes))) - def tf_map_func(*args): - """A wrapper for Defun that facilitates shape inference.""" - # Pass in shape information from the input_dataset. - dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, - input_dataset.output_classes) - for arg, shape in zip(args, nest.flatten(dense_shapes)): - arg.set_shape(shape) - - nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types, input_dataset.output_shapes, - input_dataset.output_classes) - if _should_unpack_args(nested_args): - dataset = map_func(*nested_args) - else: - dataset = map_func(nested_args) - - if not isinstance(dataset, Dataset): - raise TypeError("`map_func` must return a `Dataset` object.") - - self._output_classes = dataset.output_classes - self._output_types = dataset.output_types - self._output_shapes = dataset.output_shapes - - return dataset._as_variant_tensor() # pylint: disable=protected-access - - self._map_func = tf_map_func - self._map_func.add_to_graph(ops.get_default_graph()) - + super(InterleaveDataset, self).__init__(input_dataset, map_func) self._cycle_length = ops.convert_to_tensor( cycle_length, dtype=dtypes.int64, name="cycle_length") self._block_length = ops.convert_to_tensor( @@ -1999,27 +1965,15 @@ class InterleaveDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.interleave_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._map_func.captured_inputs, + self._map_func.captured_inputs, # pylint: disable=protected-access self._cycle_length, self._block_length, - f=self._map_func, + f=self._map_func, # pylint: disable=protected-access output_types=nest.flatten( sparse.as_dense_types(self.output_types, self.output_classes)), output_shapes=nest.flatten( sparse.as_dense_shapes(self.output_shapes, self.output_classes))) - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - class FilterDataset(Dataset): """A `Dataset` that filters its input according to a predicate function.""" diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index 6c493d8163..fe033f5546 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -22,7 +22,6 @@ from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes -from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops @@ -121,51 +120,14 @@ class _TFRecordDataset(dataset_ops.Dataset): return dtypes.string -class ParallelInterleaveDataset(dataset_ops.Dataset): +class ParallelInterleaveDataset(dataset_ops.InterleaveDataset): """A `Dataset` that maps a function over its input and flattens the result.""" def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements): """See `tf.contrib.data.parallel_interleave()` for details.""" - super(ParallelInterleaveDataset, self).__init__() - self._input_dataset = input_dataset - - @function.Defun(*nest.flatten( - sparse.as_dense_types(input_dataset.output_types, - input_dataset.output_classes))) - def tf_map_func(*args): - """A wrapper for Defun that facilitates shape inference.""" - # Pass in shape information from the input_dataset. - dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, - input_dataset.output_classes) - for arg, shape in zip(args, nest.flatten(dense_shapes)): - arg.set_shape(shape) - - nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types, input_dataset.output_shapes, - input_dataset.output_classes) - if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access - dataset = map_func(*nested_args) - else: - dataset = map_func(nested_args) - - if not isinstance(dataset, dataset_ops.Dataset): - raise TypeError("`map_func` must return a `Dataset` object.") - - self._output_classes = dataset.output_classes - self._output_types = dataset.output_types - self._output_shapes = dataset.output_shapes - - return dataset._as_variant_tensor() # pylint: disable=protected-access - - self._map_func = tf_map_func - self._map_func.add_to_graph(ops.get_default_graph()) - - self._cycle_length = ops.convert_to_tensor( - cycle_length, dtype=dtypes.int64, name="cycle_length") - self._block_length = ops.convert_to_tensor( - block_length, dtype=dtypes.int64, name="block_length") + super(ParallelInterleaveDataset, self).__init__(input_dataset, map_func, + cycle_length, block_length) self._sloppy = ops.convert_to_tensor( sloppy, dtype=dtypes.bool, name="sloppy") self._buffer_output_elements = convert.optional_param_to_tensor( @@ -178,8 +140,9 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): argument_default=2 * cycle_length) def _as_variant_tensor(self): + # pylint: disable=protected-access return gen_dataset_ops.parallel_interleave_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._input_dataset._as_variant_tensor(), self._map_func.captured_inputs, self._cycle_length, self._block_length, @@ -191,18 +154,7 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): sparse.as_dense_types(self.output_types, self.output_classes)), output_shapes=nest.flatten( sparse.as_dense_shapes(self.output_shapes, self.output_classes))) - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types + # pylint: enable=protected-access @tf_export("data.TFRecordDataset") -- GitLab From 41335abb46f80ca644b5738550daef6136ba5476 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 17:23:20 -0700 Subject: [PATCH 207/960] Improve flatbuffer verification. PiperOrigin-RevId: 189668634 --- tensorflow/contrib/lite/toco/tflite/BUILD | 2 + tensorflow/contrib/lite/toco/tflite/import.cc | 7 +- .../contrib/lite/toco/tflite/import_test.cc | 106 +++++++++++++++--- tensorflow/contrib/lite/tools/verifier.cc | 71 ++++++++++-- tensorflow/contrib/lite/tools/verifier.h | 15 +++ .../contrib/lite/tools/verifier_test.cc | 4 +- 6 files changed, 175 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD index a2b8145a67..9d3e1daf12 100644 --- a/tensorflow/contrib/lite/toco/tflite/BUILD +++ b/tensorflow/contrib/lite/toco/tflite/BUILD @@ -115,9 +115,11 @@ cc_library( deps = [ ":operator", ":types", + "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite/schema:schema_fbs", "//tensorflow/contrib/lite/toco:model", "//tensorflow/contrib/lite/toco:tooling_util", + "//tensorflow/contrib/lite/tools:verifier", "@flatbuffers", ], ) diff --git a/tensorflow/contrib/lite/toco/tflite/import.cc b/tensorflow/contrib/lite/toco/tflite/import.cc index 867395e881..c0e7ab2ef5 100644 --- a/tensorflow/contrib/lite/toco/tflite/import.cc +++ b/tensorflow/contrib/lite/toco/tflite/import.cc @@ -15,10 +15,12 @@ limitations under the License. #include "tensorflow/contrib/lite/toco/tflite/import.h" #include "flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" #include "tensorflow/contrib/lite/toco/tflite/operator.h" #include "tensorflow/contrib/lite/toco/tflite/types.h" #include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/contrib/lite/tools/verifier.h" namespace toco { @@ -171,10 +173,11 @@ bool Verify(const void* buf, size_t len) { std::unique_ptr Import(const ModelFlags& model_flags, const string& input_file_contents) { - if (!Verify(input_file_contents.data(), input_file_contents.size())) { + ::tflite::AlwaysTrueResolver r; + if (!::tflite::Verify(input_file_contents.data(), input_file_contents.size(), + r, ::tflite::DefaultErrorReporter())) { LOG(FATAL) << "Invalid flatbuffer."; } - const ::tflite::Model* input_model = ::tflite::GetModel(input_file_contents.data()); diff --git a/tensorflow/contrib/lite/toco/tflite/import_test.cc b/tensorflow/contrib/lite/toco/tflite/import_test.cc index 937a291cf7..edd22f783f 100644 --- a/tensorflow/contrib/lite/toco/tflite/import_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/import_test.cc @@ -36,12 +36,13 @@ class ImportTest : public ::testing::Test { return builder_.CreateVector(reinterpret_cast(data.data()), sizeof(T) * data.size()); } + Offset>> BuildBuffers() { auto buf0 = ::tflite::CreateBuffer(builder_, CreateDataVector({})); - auto buf1 = - ::tflite::CreateBuffer(builder_, CreateDataVector({1.0f, 2.0f})); + auto buf1 = ::tflite::CreateBuffer( + builder_, CreateDataVector({1.0f, 2.0f, 3.0f, 4.0f})); auto buf2 = - ::tflite::CreateBuffer(builder_, CreateDataVector({3.0f})); + ::tflite::CreateBuffer(builder_, CreateDataVector({3.0f, 4.0f})); return builder_.CreateVector( std::vector>({buf0, buf1, buf2})); } @@ -53,10 +54,10 @@ class ImportTest : public ::testing::Test { /*max=*/builder_.CreateVector({0.2f}), /*scale=*/builder_.CreateVector({0.3f}), /*zero_point=*/builder_.CreateVector({100ll})); - auto t1 = ::tflite::CreateTensor(builder_, - builder_.CreateVector({1, 2, 3, 4}), - ::tflite::TensorType_FLOAT32, 1, - builder_.CreateString("tensor_one"), q); + auto t1 = + ::tflite::CreateTensor(builder_, builder_.CreateVector({1, 2, 2}), + ::tflite::TensorType_FLOAT32, 1, + builder_.CreateString("tensor_one"), q); auto t2 = ::tflite::CreateTensor(builder_, builder_.CreateVector({2, 1}), ::tflite::TensorType_FLOAT32, 2, @@ -65,18 +66,26 @@ class ImportTest : public ::testing::Test { std::vector>({t1, t2})); } + Offset>> BuildOpCodes( + std::initializer_list<::tflite::BuiltinOperator> op_codes) { + std::vector> op_codes_vector; + for (auto op : op_codes) { + op_codes_vector.push_back(::tflite::CreateOperatorCode(builder_, op, 0)); + } + return builder_.CreateVector(op_codes_vector); + } + Offset>> BuildOpCodes() { - auto c1 = ::tflite::CreateOperatorCode( - builder_, ::tflite::BuiltinOperator_MAX_POOL_2D, 0); - auto c2 = ::tflite::CreateOperatorCode( - builder_, ::tflite::BuiltinOperator_CONV_2D, 0); - return builder_.CreateVector( - std::vector>({c1, c2})); + return BuildOpCodes({::tflite::BuiltinOperator_MAX_POOL_2D, + ::tflite::BuiltinOperator_CONV_2D}); } - Offset>> BuildOperators() { - auto is = builder_.CreateVector({0}); - auto os = builder_.CreateVector({1}); + Offset>> BuildOperators( + std::initializer_list inputs, std::initializer_list outputs) { + auto is = builder_.CreateVector(inputs); + if (inputs.size() == 0) is = 0; + auto os = builder_.CreateVector(outputs); + if (outputs.size() == 0) os = 0; auto op = ::tflite::CreateOperator( builder_, 0, is, os, ::tflite::BuiltinOptions_Conv2DOptions, ::tflite::CreateConv2DOptions(builder_, ::tflite::Padding_VALID, 1, 1, @@ -87,6 +96,10 @@ class ImportTest : public ::testing::Test { return builder_.CreateVector(std::vector>({op})); } + Offset>> BuildOperators() { + return BuildOperators({0}, {1}); + } + Offset>> BuildSubGraphs( Offset>> tensors, Offset>> operators, @@ -154,9 +167,9 @@ TEST_F(ImportTest, Tensors) { Array& a1 = model->GetArray("tensor_one"); EXPECT_EQ(ArrayDataType::kFloat, a1.data_type); EXPECT_THAT(a1.GetBuffer().data, - ElementsAre(1.0f, 2.0f)); + ElementsAre(1.0f, 2.0f, 3.0f, 4.0f)); ASSERT_TRUE(a1.has_shape()); - EXPECT_THAT(a1.shape().dims(), ElementsAre(1, 2, 3, 4)); + EXPECT_THAT(a1.shape().dims(), ElementsAre(1, 2, 2)); const auto& mm = a1.minmax; ASSERT_TRUE(mm.get()); @@ -169,6 +182,63 @@ TEST_F(ImportTest, Tensors) { EXPECT_EQ(100, q->zero_point); } +TEST_F(ImportTest, NoBuffers) { + auto buffers = 0; + auto tensors = BuildTensors(); + auto opcodes = BuildOpCodes(); + auto operators = BuildOperators(); + auto subgraphs = BuildSubGraphs(tensors, operators); + auto comment = builder_.CreateString(""); + ::tflite::FinishModelBuffer( + builder_, ::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, opcodes, + subgraphs, comment, buffers)); + EXPECT_DEATH(Import(ModelFlags(), InputModelAsString()), + "Missing 'buffers' section."); +} + +TEST_F(ImportTest, NoInputs) { + auto buffers = BuildBuffers(); + auto tensors = BuildTensors(); + auto opcodes = BuildOpCodes(); + auto operators = BuildOperators({}, {1}); + auto subgraphs = BuildSubGraphs(tensors, operators); + auto comment = builder_.CreateString(""); + ::tflite::FinishModelBuffer( + builder_, ::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, opcodes, + subgraphs, comment, buffers)); + EXPECT_DEATH(Import(ModelFlags(), InputModelAsString()), + "Missing 'inputs' for operator."); +} + +TEST_F(ImportTest, NoOutputs) { + auto buffers = BuildBuffers(); + auto tensors = BuildTensors(); + auto opcodes = BuildOpCodes(); + auto operators = BuildOperators({0}, {}); + auto subgraphs = BuildSubGraphs(tensors, operators); + auto comment = builder_.CreateString(""); + ::tflite::FinishModelBuffer( + builder_, ::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, opcodes, + subgraphs, comment, buffers)); + EXPECT_DEATH(Import(ModelFlags(), InputModelAsString()), + "Missing 'outputs' for operator."); +} + +TEST_F(ImportTest, InvalidOpCode) { + auto buffers = BuildBuffers(); + auto tensors = BuildTensors(); + auto opcodes = BuildOpCodes({static_cast<::tflite::BuiltinOperator>(-1), + ::tflite::BuiltinOperator_CONV_2D}); + auto operators = BuildOperators(); + auto subgraphs = BuildSubGraphs(tensors, operators); + auto comment = builder_.CreateString(""); + ::tflite::FinishModelBuffer( + builder_, ::tflite::CreateModel(builder_, TFLITE_SCHEMA_VERSION, opcodes, + subgraphs, comment, buffers)); + EXPECT_DEATH(Import(ModelFlags(), InputModelAsString()), + "Operator id '-1' is out of range."); +} + TEST_F(ImportTest, MultipleSubGraphs) { auto buffers = BuildBuffers(); auto tensors = BuildTensors(); diff --git a/tensorflow/contrib/lite/tools/verifier.cc b/tensorflow/contrib/lite/tools/verifier.cc index 59c74205f0..8818a7dc85 100644 --- a/tensorflow/contrib/lite/tools/verifier.cc +++ b/tensorflow/contrib/lite/tools/verifier.cc @@ -148,11 +148,52 @@ bool VerifyNumericTensorBuffer(const Tensor& tensor, const Buffer& buffer, // TODO(yichengfan): verify quantized tensors. } +using flatbuffers::Offset; +using flatbuffers::Vector; + +bool VerifyOperators(const Vector>& operators, + ErrorReporter* error_reporter) { + for (const auto& op : operators) { + if (!op->inputs()) { + ReportError(error_reporter, "Missing 'inputs' for operator."); + return false; + } + if (!op->outputs()) { + ReportError(error_reporter, "Missing 'outputs' for operator."); + return false; + } + } + return true; +} + +bool VerifySubGraphs(const Model& model, ErrorReporter* error_reporter) { + if (!model.subgraphs()) { + ReportError(error_reporter, "Missing 'subgraphs' section."); + return false; + } + for (const auto& subgraph : *model.subgraphs()) { + if (!subgraph->operators()) { + ReportError(error_reporter, "Missing 'operators' section in subgraph."); + return false; + } + + if (!VerifyOperators(*subgraph->operators(), error_reporter)) { + return false; + } + } + return true; +} + // Verifies tensors have valid properties and legit buffer if set. bool VerifyTensors(const Model& model, ErrorReporter* error_reporter) { if (!model.subgraphs()) { return true; } + if (!model.buffers()) { + ReportError(error_reporter, "Missing 'buffers' section."); + return false; + } + for (const auto& subgraph : *model.subgraphs()) { if (!subgraph->tensors()) { continue; @@ -167,19 +208,23 @@ bool VerifyTensors(const Model& model, ErrorReporter* error_reporter) { return false; } auto* buffer = model.buffers()->Get(tensor->buffer()); - if (!buffer || !buffer->data()) { + if (!buffer) { ReportError(error_reporter, "Tensor buffer %d not set", tensor->buffer()); return false; } - if (tensor->type() == TensorType_STRING) { - if (!VerifyStringTensorBuffer(*buffer, error_reporter)) { - return false; - } - } else { - if (!VerifyNumericTensorBuffer(*tensor, *buffer, error_reporter)) { - return false; + // Many transient tensors don't have data in the flatbuffer. Their + // buffers will be allocated by the interpreter at run-time. + if (buffer->data()) { + if (tensor->type() == TensorType_STRING) { + if (!VerifyStringTensorBuffer(*buffer, error_reporter)) { + return false; + } + } else { + if (!VerifyNumericTensorBuffer(*tensor, *buffer, error_reporter)) { + return false; + } } } } @@ -193,6 +238,13 @@ bool VerifyOps(const Model& model, const OpResolver& resolver, return true; } for (const auto& opcode : *model.operator_codes()) { + if (opcode->builtin_code() < BuiltinOperator_MIN || + opcode->builtin_code() > BuiltinOperator_MAX) { + ReportError(error_reporter, "Operator id '%d' is out of range.", + opcode->builtin_code()); + return false; + } + if (opcode->builtin_code() == BuiltinOperator_CUSTOM) { if (!resolver.FindOp(opcode->custom_code()->c_str())) { ReportError(error_reporter, "Unsupported custom op: %s", @@ -223,6 +275,9 @@ bool Verify(const void* buf, size_t len, const OpResolver& resolver, ReportError(error_reporter, "Invalid model version %d", model->version()); return false; } + if (!VerifySubGraphs(*model, error_reporter)) { + return false; + } if (!VerifyTensors(*model, error_reporter)) { return false; } diff --git a/tensorflow/contrib/lite/tools/verifier.h b/tensorflow/contrib/lite/tools/verifier.h index c2ee11215c..b7ce4e8305 100644 --- a/tensorflow/contrib/lite/tools/verifier.h +++ b/tensorflow/contrib/lite/tools/verifier.h @@ -23,6 +23,21 @@ limitations under the License. namespace tflite { +class AlwaysTrueResolver : public OpResolver { + public: + AlwaysTrueResolver() {} + TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override { + static TfLiteRegistration null_registration = {nullptr, nullptr, nullptr, + nullptr}; + return &null_registration; + } + TfLiteRegistration* FindOp(const char* op) const override { + static TfLiteRegistration null_registration = {nullptr, nullptr, nullptr, + nullptr}; + return &null_registration; + } +}; + // Verifies the integrity of a Tensorflow Lite flatbuffer model file. // Currently, it verifies: // * The file is following a legit flatbuffer schema. diff --git a/tensorflow/contrib/lite/tools/verifier_test.cc b/tensorflow/contrib/lite/tools/verifier_test.cc index b3e611f999..03b93afe3e 100644 --- a/tensorflow/contrib/lite/tools/verifier_test.cc +++ b/tensorflow/contrib/lite/tools/verifier_test.cc @@ -113,8 +113,8 @@ TEST(VerifyModel, TestEmptyModel) { /*description=*/0, /*buffers=*/0); ::tflite::FinishModelBuffer(builder, model); - ASSERT_TRUE(Verify(builder.GetBufferPointer(), builder.GetSize(), - MutableOpResolver{}, DefaultErrorReporter())); + ASSERT_FALSE(Verify(builder.GetBufferPointer(), builder.GetSize(), + MutableOpResolver{}, DefaultErrorReporter())); } TEST(VerifyModel, TestSimpleModel) { -- GitLab From b6b4ec642a632af9abaf3ca7a2b1348ab2e94bef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 17:29:19 -0700 Subject: [PATCH 208/960] Add a clif build rule for saved_model. PiperOrigin-RevId: 189669509 --- tensorflow/core/BUILD | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index cf29444065..1d283e240d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1405,6 +1405,13 @@ tf_pyclif_proto_library( visibility = ["//visibility:public"], ) +tf_pyclif_proto_library( + name = "protobuf/device_properties_pyclif", + proto_lib = ":protos_all_cc", + proto_srcfile = "protobuf/device_properties.proto", + visibility = ["//visibility:public"], +) + tf_pyclif_proto_library( name = "protobuf/meta_graph_pyclif", proto_lib = ":protos_all_cc", @@ -1413,9 +1420,9 @@ tf_pyclif_proto_library( ) tf_pyclif_proto_library( - name = "protobuf/device_properties_pyclif", + name = "protobuf/saved_model_pyclif", proto_lib = ":protos_all_cc", - proto_srcfile = "protobuf/device_properties.proto", + proto_srcfile = "protobuf/saved_model.proto", visibility = ["//visibility:public"], ) -- GitLab From 2714c07c93c2fd84480f816e0da44030a0a2bd45 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 19 Mar 2018 17:34:47 -0700 Subject: [PATCH 209/960] Make _USE_C_API = True and_USE_C_SHAPES = False work with import_graph_def. Without this change, shapes wouldn't be correctly computed for operations created via import_graph_def. PiperOrigin-RevId: 189670312 --- tensorflow/python/client/session_test.py | 3 +- tensorflow/python/framework/importer.py | 34 ++++++++++--------- tensorflow/python/framework/importer_test.py | 4 +-- .../python/framework/meta_graph_test.py | 3 +- tensorflow/python/framework/ops.py | 29 +++++++++++----- tensorflow/python/training/saver_test.py | 3 +- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 44ff440cc5..6e2640efd1 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -62,8 +62,7 @@ from tensorflow.python.util import compat ops.RegisterShape('ConstructionFails')(common_shapes.unknown_shape) -# TODO(skyewm): reenable when this works with _USE_C_SHAPES=False -# @test_util.with_c_api +@test_util.with_c_api class SessionTest(test_util.TensorFlowTestCase): def setUp(self): diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 783e9259ad..a9e399f59b 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -489,23 +489,25 @@ def import_graph_def(graph_def, # Convert to ValueError for backwards compatibility. raise ValueError(str(e)) - _ProcessNewOps(graph) + # Create _DefinedFunctions for any imported functions. + # + # We do this by creating _DefinedFunctions directly from `graph_def`, and + # adding them to `graph`. Adding an existing function to a TF_Graph is a + # no-op, so this only has the effect of updating the Python state (usually + # _DefinedFunction.add_to_graph also adds the function to the TF_Graph). + # + # TODO(skyewm): fetch the TF_Functions directly from the TF_Graph + # TODO(skyewm): avoid sending serialized FunctionDefs back to the TF_Graph + # TODO(b/74620627): move this after _ProcessNewOps outside the lock once + # _USE_C_SHAPES is removed. + if graph_def.library and graph_def.library.function: + # pylint: disable=protected-access + functions = function._from_library(graph_def.library) + for f in functions: + f.add_to_graph(graph) + # pylint: enable=protected-access - # Create _DefinedFunctions for any imported functions. - # - # We do this by creating _DefinedFunctions directly from `graph_def`, and - # adding them to `graph`. Adding an existing function to a TF_Graph is a - # no-op, so this only has the effect of updating the Python state (usually - # _DefinedFunction.add_to_graph also adds the function to the TF_Graph). - # - # TODO(skyewm): fetch the TF_Functions directly from the TF_Graph - # TODO(skyewm): avoid sending serialized FunctionDefs back to the TF_Graph - if graph_def.library and graph_def.library.function: - # pylint: disable=protected-access - functions = function._from_library(graph_def.library) - for f in functions: - f.add_to_graph(graph) - # pylint: enable=protected-access + _ProcessNewOps(graph) # Treat input mappings that don't appear in the graph as an error, because # they are likely to be due to a typo. diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index c39191e6d9..bf5d9fe093 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import function from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import test_ops # pylint: disable=unused-import +from tensorflow.python.framework import test_util from tensorflow.python.framework import versions from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -43,8 +44,7 @@ import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test -# TODO(skyewm): reenable when this works with _USE_C_SHAPES=False -# @test_util.with_c_api +@test_util.with_c_api class ImportGraphDefTest(test.TestCase): def _MakeGraphDef(self, diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 06cec504e4..21963d0bee 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -285,8 +285,7 @@ class SimpleMetaGraphTest(test.TestCase): self.assertIs(global_vars[0], trainable_vars[0]) -# TODO(skyewm): reenable when this works with _USE_C_SHAPES=False -# @test_util.with_c_api +@test_util.with_c_api class ScopedMetaGraphTest(test.TestCase): def _testScopedExport(self, test_dir, exported_filenames): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index f1cd341d66..4be2e2c15d 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3303,6 +3303,20 @@ class Graph(object): input_types=input_types, original_op=self._default_original_op, op_def=op_def) + + # TODO(vrv): Instead of eagerly filling in shape property for every op, + # only populate the shape when requested. + # + # TODO(skyewm): unlike in the original Python implementation, the C API + # always computes shape information (even for function calls, which the + # original Python shape inference code doesn't handle). Deprecate the + # compute_shapes argument. + # + # TODO(b/74620627): move this back to _create_op_helper once _USE_C_SHAPES + # is removed + if (ret._c_op and _USE_C_SHAPES) or compute_shapes: # pylint: disable=protected-access + set_shapes_for_outputs(ret) + self._create_op_helper(ret, compute_shapes=compute_shapes, compute_device=compute_device) return ret @@ -3336,15 +3350,6 @@ class Graph(object): def _create_op_helper(self, op, compute_shapes=True, compute_device=True): """Common logic for creating an op in this graph.""" - # TODO(vrv): Instead of eagerly filling in shape property for every op, only - # populate the shape when requested. - # - # TODO(skyewm): unlike in the original Python implementation, the C API - # always computes shape information (even for function calls, which the - # original Python shape inference code doesn't handle). Deprecate the - # compute_shapes argument. - if (op._c_op and _USE_C_SHAPES) or compute_shapes: # pylint: disable=protected-access - set_shapes_for_outputs(op) # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed. self._add_op(op) @@ -3449,6 +3454,12 @@ class Graph(object): ] for op in new_ops: + # The Python shape inference code does not support imported functions. It + # also needs access to op.inputs, which is why we call it here. + # TODO(b/74620627): move this back to _create_op_helper once _USE_C_SHAPES + # is removed. + if not self._is_function(op.type) or _USE_C_SHAPES: + set_shapes_for_outputs(op) new_control_inputs = self._control_dependencies_for_inputs(op.inputs) # pylint: disable=protected-access op._add_control_inputs(new_control_inputs) diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 787582ae70..7de778f298 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -1739,8 +1739,7 @@ class CheckpointStateTest(test.TestCase): os.path.join(save_dir, "./model.ckpt-687529")) -# TODO(skyewm): reenable when this works with _USE_C_SHAPES=False -# @test_util.with_c_api +@test_util.with_c_api class MetaGraphTest(test.TestCase): def _get_test_dir(self, dirname): -- GitLab From e2c90615a4ab9033a36111299f6a0d4485f4f16a Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Mon, 19 Mar 2018 17:41:25 -0700 Subject: [PATCH 210/960] Standardize bib references and Examples subsection in docstrings. Recipe: + Write a #### Examples subsection below Args/Returns/Raises to illustrate examples. If the docstring's last line is a ``` closing a code snippet, add an empty line before closing the docstring with """. This properly displays the code snippet. + Write a #### References subsection at the bottom of any docstring with citations. Enumerate all references in alphabetical order. Individual bibentries use ICLR?s bibliography style, which borrows from icml2010.bst and which itself borrows from plainnl.bst. Add a link to the paper if the publication is open source (ideally, arXiv). PiperOrigin-RevId: 189670932 --- .../python/ops/autoregressive.py | 24 +-- .../python/ops/bijectors/affine.py | 2 +- .../ops/bijectors/batch_normalization.py | 31 ++-- .../ops/bijectors/cholesky_outer_product.py | 2 +- .../ops/bijectors/masked_autoregressive.py | 99 +++++----- .../python/ops/bijectors/real_nvp.py | 82 +++++---- .../python/ops/bijectors/square.py | 2 +- .../distributions/python/ops/kumaraswamy.py | 10 +- .../distributions/python/ops/moving_stats.py | 20 ++- .../contrib/distributions/python/ops/shape.py | 169 +++++++++--------- .../python/ops/vector_diffeomixture.py | 13 +- 11 files changed, 251 insertions(+), 203 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py index 852298bf33..69f3d57ff0 100644 --- a/tensorflow/contrib/distributions/python/ops/autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/autoregressive.py @@ -36,7 +36,8 @@ class Autoregressive(distribution_lib.Distribution): "Autoregressive models decompose the joint density as a product of conditionals, and model each conditional in turn. Normalizing flows transform a base density (e.g. a standard Gaussian) into the target density - by an invertible transformation with tractable Jacobian." [1] + by an invertible transformation with tractable Jacobian." [(Papamakarios et + al., 2016)][1] In other words, the "autoregressive property" is equivalent to the decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided @@ -45,17 +46,18 @@ class Autoregressive(distribution_lib.Distribution): Practically speaking the autoregressive property means that there exists a permutation of the event coordinates such that each coordinate is a - diffeomorphic function of only preceding coordinates. [2] + diffeomorphic function of only preceding coordinates + [(van den Oord et al., 2016)][2]. #### Mathematical Details - The probability function is, + The probability function is ```none prob(x; fn, n) = fn(x).prob(x) ``` - And a sample is generated by, + And a sample is generated by ```none x = fn(...fn(fn(x0).sample()).sample()).sample() @@ -93,13 +95,15 @@ class Autoregressive(distribution_lib.Distribution): ``` - [1]: "Masked Autoregressive Flow for Density Estimation." - George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017. - https://arxiv.org/abs/1705.07057 + #### References - [2]: "Conditional Image Generation with PixelCNN Decoders." - Aaron van den Oord, Nal Kalchbrenner, Oriol Vinyals, Lasse Espeholt, Alex - Graves, Koray Kavukcuoglu. Arxiv, 2016. + [1]: George Papamakarios, Theo Pavlakou, and Iain Murray. Masked + Autoregressive Flow for Density Estimation. In _Neural Information + Processing Systems_, 2017. https://arxiv.org/abs/1705.07057 + + [2]: Aaron van den Oord, Nal Kalchbrenner, Oriol Vinyals, Lasse Espeholt, + Alex Graves, and Koray Kavukcuoglu. Conditional Image Generation with + PixelCNN Decoders. In _Neural Information Processing Systems_, 2016. https://arxiv.org/abs/1606.05328 """ diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py index 7fe73ada44..bef7bbb49b 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py @@ -62,7 +62,7 @@ class Affine(bijector.Bijector): matrices, i.e., the matmul is [matrix-free]( https://en.wikipedia.org/wiki/Matrix-free_methods) when possible. - Examples: + #### Examples ```python # Y = X diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py index be72ff3081..33fdd32d7a 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py @@ -76,15 +76,16 @@ def _undo_batch_normalization(x, class BatchNormalization(bijector.Bijector): """Compute `Y = g(X) s.t. X = g^-1(Y) = (Y - mean(Y)) / std(Y)`. - Applies Batch Normalization [1] to samples from a data distribution. This can - be used to stabilize training of normalizing flows [2, 3]. + Applies Batch Normalization [(Ioffe and Szegedy, 2015)][1] to samples from a + data distribution. This can be used to stabilize training of normalizing + flows ([Papamakarios et al., 2016][3]; [Dinh et al., 2017][2]) When training Deep Neural Networks (DNNs), it is common practice to normalize or whiten features by shifting them to have zero mean and scaling them to have unit variance. - The `inverse()` method of the BatchNorm bijector, which is used in the - log-likelihood computation of data samples, implements the normalization + The `inverse()` method of the `BatchNormalization` bijector, which is used in + the log-likelihood computation of data samples, implements the normalization procedure (shift-and-scale) using the mean and standard deviation of the current minibatch. @@ -92,7 +93,6 @@ class BatchNormalization(bijector.Bijector): `X*std(Y) + mean(Y)` with the running-average mean and standard deviation computed at training-time. De-normalization is useful for sampling. - ```python dist = tfd.TransformedDistribution( @@ -112,19 +112,20 @@ class BatchNormalization(bijector.Bijector): `BatchNorm.forward(BatchNorm.inverse(...))` will be identical when `training=False` but may be different when `training=True`. - [1]: "Batch Normalization: Accelerating Deep Network Training by Reducing - Internal Covariate Shift." - Sergey Ioffe, Christian Szegedy. Arxiv. 2015. - https://arxiv.org/abs/1502.03167 + #### References - [2]: "Density Estimation using Real NVP." - Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio. ICLR. 2017. - https://arxiv.org/abs/1605.08803 + [1]: Sergey Ioffe and Christian Szegedy. Batch Normalization: Accelerating + Deep Network Training by Reducing Internal Covariate Shift. In + _International Conference on Machine Learning_, 2015. + https://arxiv.org/abs/1502.03167 - [3]: "Masked Autoregressive Flow for Density Estimation." - George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017. - https://arxiv.org/abs/1705.07057 + [2]: Laurent Dinh, Jascha Sohl-Dickstein, and Samy Bengio. Density Estimation + using Real NVP. In _International Conference on Learning + Representations_, 2017. https://arxiv.org/abs/1605.08803 + [3]: George Papamakarios, Theo Pavlakou, and Iain Murray. Masked + Autoregressive Flow for Density Estimation. In _Neural Information + Processing Systems_, 2017. https://arxiv.org/abs/1705.07057 """ def __init__(self, diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py index 43208ff088..8f09e16058 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py @@ -57,7 +57,7 @@ class CholeskyOuterProduct(bijector.Bijector): that, if `I = L_3 @ L_3.T`, with L_3 being lower-triangular with positive- diagonal, then `L_3 = I`. Thus, `L_1 = L_2`, proving injectivity of g. - Examples: + #### Examples ```python bijector.CholeskyOuterProduct().forward(x=[[1., 0], [2, 1]]) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py index 5251dbcb57..84b2340c75 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py @@ -45,14 +45,15 @@ __all__ = [ class MaskedAutoregressiveFlow(bijector_lib.Bijector): """Affine MaskedAutoregressiveFlow bijector for vector-valued events. - The affine autoregressive flow [1] provides a relatively simple framework for - user-specified (deep) architectures to learn a distribution over vector-valued - events. Regarding terminology, + The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a + relatively simple framework for user-specified (deep) architectures to learn + a distribution over vector-valued events. Regarding terminology, "Autoregressive models decompose the joint density as a product of conditionals, and model each conditional in turn. Normalizing flows transform a base density (e.g. a standard Gaussian) into the target density - by an invertible transformation with tractable Jacobian." [1] + by an invertible transformation with tractable Jacobian." + [(Papamakarios et al., 2016)][3] In other words, the "autoregressive property" is equivalent to the decomposition, `p(x) = prod{ p(x[i] | x[0:i]) : i=0, ..., d }`. The provided @@ -75,26 +76,26 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): Given a `shift_and_log_scale_fn`, the forward and inverse transformations are (a sequence of) affine transformations. A "valid" `shift_and_log_scale_fn` - must compute each `shift` (aka `loc` or "mu" [2]) and `log(scale)` (aka - "alpha" [2]) such that each are broadcastable with the arguments to `forward` - and `inverse`, i.e., such that the calculations in `forward`, `inverse` - [below] are possible. + must compute each `shift` (aka `loc` or "mu" in [Germain et al. (2015)][1]) + and `log(scale)` (aka "alpha" in [Germain et al. (2015)][1]) such that each + are broadcastable with the arguments to `forward` and `inverse`, i.e., such + that the calculations in `forward`, `inverse` [below] are possible. For convenience, `masked_autoregressive_default_template` is offered as a possible `shift_and_log_scale_fn` function. It implements the MADE - architecture [2]. MADE is a feed-forward network that computes a `shift` and - `log(scale)` using `masked_dense` layers in a deep neural network. Weights are - masked to ensure the autoregressive property. It is possible that this - architecture is suboptimal for your task. To build alternative networks, - either change the arguments to `masked_autoregressive_default_template`, use - the `masked_dense` function to roll-out your own, or use some other - architecture, e.g., using `tf.layers`. + architecture [(Germain et al., 2015)][1]. MADE is a feed-forward network that + computes a `shift` and `log(scale)` using `masked_dense` layers in a deep + neural network. Weights are masked to ensure the autoregressive property. It + is possible that this architecture is suboptimal for your task. To build + alternative networks, either change the arguments to + `masked_autoregressive_default_template`, use the `masked_dense` function to + roll-out your own, or use some other architecture, e.g., using `tf.layers`. Warning: no attempt is made to validate that the `shift_and_log_scale_fn` enforces the "autoregressive property". Assuming `shift_and_log_scale_fn` has valid shape and autoregressive - semantics, the forward transformation is, + semantics, the forward transformation is ```python def forward(x): @@ -106,7 +107,7 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): return y ``` - and the inverse transformation is, + and the inverse transformation is ```python def inverse(y): @@ -121,7 +122,7 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): the "last" `y` used to compute `shift`, `log_scale`. (Roughly speaking, this also proves the transform is bijective.) - #### Example Use + #### Examples ```python tfd = tf.contrib.distributions @@ -142,7 +143,8 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): maf.log_prob(x) # Almost free; uses Bijector caching. maf.log_prob(0.) # Cheap; no `tf.while_loop` despite no Bijector caching. - # [1] also describes an "Inverse Autoregressive Flow", e.g., + # [Papamakarios et al. (2016)][3] also describe an Inverse Autoregressive + # Flow [(Kingma et al., 2016)][2]: iaf = tfd.TransformedDistribution( distribution=tfd.Normal(loc=0., scale=1.), bijector=tfb.Invert(tfb.MaskedAutoregressiveFlow( @@ -168,14 +170,20 @@ class MaskedAutoregressiveFlow(bijector_lib.Bijector): event_shape=[dims]) ``` - [1]: "Masked Autoregressive Flow for Density Estimation." - George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017. - https://arxiv.org/abs/1705.07057 + #### References - [2]: "MADE: Masked Autoencoder for Distribution Estimation." - Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015. - https://arxiv.org/abs/1502.03509 + [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE: + Masked Autoencoder for Distribution Estimation. In _International + Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509 + [2]: Diederik P. Kingma, Tim Salimans, Rafal Jozefowicz, Xi Chen, Ilya + Sutskever, and Max Welling. Improving Variational Inference with Inverse + Autoregressive Flow. In _Neural Information Processing Systems_, 2016. + https://arxiv.org/abs/1606.04934 + + [3]: George Papamakarios, Theo Pavlakou, and Iain Murray. Masked + Autoregressive Flow for Density Estimation. In _Neural Information + Processing Systems_, 2017. https://arxiv.org/abs/1705.07057 """ def __init__(self, @@ -329,11 +337,7 @@ def masked_dense(inputs, **kwargs): """A autoregressively masked dense layer. Analogous to `tf.layers.dense`. - See [1] for detailed explanation. - - [1]: "MADE: Masked Autoencoder for Distribution Estimation." - Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015. - https://arxiv.org/abs/1502.03509 + See [Germain et al. (2015)][1] for detailed explanation. Arguments: inputs: Tensor input. @@ -358,6 +362,12 @@ def masked_dense(inputs, Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. + + #### References + + [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE: + Masked Autoencoder for Distribution Estimation. In _International + Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509 """ # TODO(b/67594795): Better support of dynamic shape. input_depth = inputs.shape.with_rank_at_least(1)[-1].value @@ -398,23 +408,24 @@ def masked_autoregressive_default_template( name=None, *args, **kwargs): - """Build the MADE Model [1]. + """Build the Masked Autoregressive Density Estimator (Germain et al., 2015). This will be wrapped in a make_template to ensure the variables are only - created once. It takes the input and returns the `loc` ("mu" [1]) and - `log_scale` ("alpha" [1]) from the MADE network. + created once. It takes the input and returns the `loc` ("mu" in [Germain et + al. (2015)][1]) and `log_scale` ("alpha" in [Germain et al. (2015)][1]) from + the MADE network. Warning: This function uses `masked_dense` to create randomly initialized `tf.Variables`. It is presumed that these will be fit, just as you would any other neural architecture which uses `tf.layers.dense`. - #### About Hidden Layers: + #### About Hidden Layers Each element of `hidden_layers` should be greater than the `input_depth` (i.e., `input_depth = tf.shape(input)[-1]` where `input` is the input to the neural network). This is necessary to ensure the autoregressivity property. - #### About Clipping: + #### About Clipping This function also optionally clips the `log_scale` (but possibly not its gradient). This is useful because if `log_scale` is too small/large it might @@ -427,11 +438,7 @@ def masked_autoregressive_default_template( `grad[exp(clip(x))] = grad[x] exp(clip(x))` rather than the usual `grad[clip(x)] exp(clip(x))`. - [1]: "MADE: Masked Autoencoder for Distribution Estimation." - Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015. - https://arxiv.org/abs/1502.03509 - - Arguments: + Args: hidden_layers: Python `list`-like of non-negative integer, scalars indicating the number of units in each hidden layer. Default: `[512, 512]. shift_only: Python `bool` indicating if only the `shift` term shall be @@ -450,12 +457,20 @@ def masked_autoregressive_default_template( **kwargs: `tf.layers.dense` keyword arguments. Returns: - shift: `Float`-like `Tensor` of shift terms (the "mu" in [2]). - log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in [2]). + shift: `Float`-like `Tensor` of shift terms (the "mu" in + [Germain et al. (2015)][1]). + log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in + [Germain et al. (2015)][1]). Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. + + #### References + + [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE: + Masked Autoencoder for Distribution Estimation. In _International + Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509 """ with ops.name_scope(name, "masked_autoregressive_default_template", diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py index 2840f52e74..71ab369d01 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py @@ -38,7 +38,7 @@ class RealNVP(bijector_lib.Bijector): """RealNVP "affine coupling layer" for vector-valued events. Real NVP models a normalizing flow on a `D`-dimensional distribution via a - single `D-d`-dimensional conditional distribution [1]: + single `D-d`-dimensional conditional distribution [(Dinh et al., 2017)][1]: `y[d:D] = y[d:D] * math_ops.exp(log_scale_fn(y[d:D])) + shift_fn(y[d:D])` `y[0:d] = x[0:d]` @@ -51,31 +51,34 @@ class RealNVP(bijector_lib.Bijector): Masking is currently only supported for base distributions with `event_ndims=1`. For more sophisticated masking schemes like checkerboard or - channel-wise masking [2], use the `tfb.Permute` bijector to re-order desired - masked units into the first `d` units. For base distributions with - `event_ndims > 1`, use the `tfb.Reshape` bijector to flatten the event shape. - - Recall that the MAF bijector [2] implements a normalizing flow via an - autoregressive transformation. MAF and IAF have opposite computational - tradeoffs - MAF can train all units in parallel but must sample units - sequentially, while IAF must train units sequentially but can sample in - parallel. In contrast, Real NVP can compute both forward and inverse - computations in parallel. However, the lack of an autoregressive + channel-wise masking [(Papamakarios et al., 2016)[4], use the `tfb.Permute` + bijector to re-order desired masked units into the first `d` units. For base + distributions with `event_ndims > 1`, use the `tfb.Reshape` bijector to + flatten the event shape. + + Recall that the MAF bijector [(Papamakarios et al., 2016)][4] implements a + normalizing flow via an autoregressive transformation. MAF and IAF have + opposite computational tradeoffs - MAF can train all units in parallel but + must sample units sequentially, while IAF must train units sequentially but + can sample in parallel. In contrast, Real NVP can compute both forward and + inverse computations in parallel. However, the lack of an autoregressive transformations makes it less expressive on a per-bijector basis. A "valid" `shift_and_log_scale_fn` must compute each `shift` (aka `loc` or - "mu" [2]) and `log(scale)` (aka "alpha" [2]) such that each are broadcastable - with the arguments to `forward` and `inverse`, i.e., such that the - calculations in `forward`, `inverse` [below] are possible. For convenience, + "mu" in [Papamakarios et al. (2016)][4]) and `log(scale)` (aka "alpha" in + [Papamakarios et al. (2016)][4]) such that each are broadcastable with the + arguments to `forward` and `inverse`, i.e., such that the calculations in + `forward`, `inverse` [below] are possible. For convenience, `real_nvp_default_nvp` is offered as a possible `shift_and_log_scale_fn` function. - NICE [3] is a special case of the Real NVP bijector which discards the scale - transformation, resulting in a constant-time inverse-log-determinant-Jacobian. - To use a NICE bijector instead of Real NVP, `shift_and_log_scale_fn` should - return `(shift, None)`, and `is_constant_jacobian` should be set to `True` in - the `RealNVP` constructor. Calling `real_nvp_default_template` with - `shift_only=True` returns one such NICE-compatible `shift_and_log_scale_fn`. + NICE [(Dinh et al., 2014)][2] is a special case of the Real NVP bijector + which discards the scale transformation, resulting in a constant-time + inverse-log-determinant-Jacobian. To use a NICE bijector instead of Real + NVP, `shift_and_log_scale_fn` should return `(shift, None)`, and + `is_constant_jacobian` should be set to `True` in the `RealNVP` constructor. + Calling `real_nvp_default_template` with `shift_only=True` returns one such + NICE-compatible `shift_and_log_scale_fn`. Caching: the scalar input depth `D` of the base distribution is not known at construction time. The first call to any of `forward(x)`, `inverse(x)`, @@ -103,23 +106,24 @@ class RealNVP(bijector_lib.Bijector): nvp.log_prob(0.) ``` - For more examples, see [4]. + For more examples, see [Jang (2018)][3]. - [1]: "Density Estimation using Real NVP." - Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio. ICLR. 2017. - https://arxiv.org/abs/1605.08803 + #### References - [2]: "Masked Autoregressive Flow for Density Estimation." - George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017. - https://arxiv.org/abs/1705.07057 + [1]: Laurent Dinh, Jascha Sohl-Dickstein, and Samy Bengio. Density Estimation + using Real NVP. In _International Conference on Learning + Representations_, 2017. https://arxiv.org/abs/1605.08803 - [3]: "NICE: Non-linear Independent Components Estimation." - Laurent Dinh, David Krueger, Yoshua Bengio. ICLR. 2015. - https://arxiv.org/abs/1410.8516 + [2]: Laurent Dinh, David Krueger, and Yoshua Bengio. NICE: Non-linear + Independent Components Estimation. _arXiv preprint arXiv:1410.8516_, + 2014. https://arxiv.org/abs/1410.8516 - [4]: "Normalizing Flows Tutorial, Part 2: Modern Normalizing Flows." - Eric Jang. Blog post. January 2018. - http://blog.evjang.com/2018/01/nf2.html + [3]: Eric Jang. Normalizing Flows Tutorial, Part 2: Modern Normalizing Flows. + _Technical Report_, 2018. http://blog.evjang.com/2018/01/nf2.html + + [4]: George Papamakarios, Theo Pavlakou, and Iain Murray. Masked + Autoregressive Flow for Density Estimation. In _Neural Information + Processing Systems_, 2017. https://arxiv.org/abs/1705.07057 """ def __init__(self, @@ -250,12 +254,20 @@ def real_nvp_default_template( **kwargs: `tf.layers.dense` keyword arguments. Returns: - shift: `Float`-like `Tensor` of shift terms (the "mu" in [2]). - log_scale: `Float`-like `Tensor` of log(scale) terms (the "alpha" in [2]). + shift: `Float`-like `Tensor` of shift terms ("mu" in + [Papamakarios et al. (2016)][1]). + log_scale: `Float`-like `Tensor` of log(scale) terms ("alpha" in + [Papamakarios et al. (2016)][1]). Raises: NotImplementedError: if rightmost dimension of `inputs` is unknown prior to graph execution. + + #### References + + [1]: George Papamakarios, Theo Pavlakou, and Iain Murray. Masked + Autoregressive Flow for Density Estimation. In _Neural Information + Processing Systems_, 2017. https://arxiv.org/abs/1705.07057 """ with ops.name_scope(name, "real_nvp_default_template"): diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/square.py b/tensorflow/contrib/distributions/python/ops/bijectors/square.py index 2831a92df8..1e9dbf3509 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/square.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/square.py @@ -37,7 +37,7 @@ class Square(bijector.Bijector): g is a bijection between the non-negative real numbers (R_+) and the non-negative real numbers. - Examples: + #### Examples ```python bijector.Square().forward(x=[[1., 0], [2, 1]]) diff --git a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py index 120b38db3c..192dede6ff 100644 --- a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py +++ b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py @@ -44,18 +44,16 @@ _kumaraswamy_sample_note = """Note: `x` must have dtype `self.dtype` and be in def _harmonic_number(x): """Compute the harmonic number from its analytic continuation. - Derivation from [1] and Euler's constant [2]. - [1] - - https://en.wikipedia.org/wiki/Digamma_function#Relation_to_harmonic_numbers - [2] - https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant - + Derivation from [here]( + https://en.wikipedia.org/wiki/Digamma_function#Relation_to_harmonic_numbers) + and [Euler's constant]( + https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant). Args: x: input float. Returns: z: The analytic continuation of the harmonic number for the input. - """ one = array_ops.ones([], dtype=x.dtype) return math_ops.digamma(x + one) - math_ops.digamma(one) diff --git a/tensorflow/contrib/distributions/python/ops/moving_stats.py b/tensorflow/contrib/distributions/python/ops/moving_stats.py index 20f85643b9..87d40805a3 100644 --- a/tensorflow/contrib/distributions/python/ops/moving_stats.py +++ b/tensorflow/contrib/distributions/python/ops/moving_stats.py @@ -47,9 +47,7 @@ def assign_moving_mean_variance( Note: `mean_var` is updated *after* `variance_var`, i.e., `variance_var` uses the lag-1 mean. - For derivation justification, see equation 143 of: - T. Finch, Feb 2009. "Incremental calculation of weighted mean and variance". - http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf + For derivation justification, see [Finch (2009; Eq. 143)][1]. Args: mean_var: `float`-like `Variable` representing the exponentially weighted @@ -72,6 +70,12 @@ def assign_moving_mean_variance( TypeError: if `mean_var` does not have float type `dtype`. TypeError: if `mean_var`, `variance_var`, `value`, `decay` have different `base_dtype`. + + #### References + + [1]: Tony Finch. Incremental calculation of weighted mean and variance. + _Technical Report_, 2009. + http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf """ with ops.name_scope(name, "assign_moving_mean_variance", [variance_var, mean_var, value, decay]): @@ -183,9 +187,7 @@ def moving_mean_variance(value, decay, collections=None, name=None): Note: `mean_var` is updated *after* `variance_var`, i.e., `variance_var` uses the lag-`1` mean. - For derivation justification, see equation 143 of: - T. Finch, Feb 2009. "Incremental calculation of weighted mean and variance". - http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf + For derivation justification, see [Finch (2009; Eq. 143)][1]. Unlike `assign_moving_mean_variance`, this function handles variable creation. @@ -208,6 +210,12 @@ def moving_mean_variance(value, decay, collections=None, name=None): Raises: TypeError: if `value_var` does not have float type `dtype`. TypeError: if `value`, `decay` have different `base_dtype`. + + #### References + + [1]: Tony Finch. Incremental calculation of weighted mean and variance. + _Technical Report_, 2009. + http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf """ if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] diff --git a/tensorflow/contrib/distributions/python/ops/shape.py b/tensorflow/contrib/distributions/python/ops/shape.py index 5fb6f0c7ea..bac0b79d59 100644 --- a/tensorflow/contrib/distributions/python/ops/shape.py +++ b/tensorflow/contrib/distributions/python/ops/shape.py @@ -32,45 +32,50 @@ from tensorflow.python.ops.distributions import util as distribution_util class _DistributionShape(object): """Manage and manipulate `Distribution` shape. - Terminology: - Recall that a `Tensor` has: - - `shape`: size of `Tensor` dimensions, - - `ndims`: size of `shape`; number of `Tensor` dimensions, - - `dims`: indexes into `shape`; useful for transpose, reduce. - - `Tensor`s sampled from a `Distribution` can be partitioned by `sample_dims`, - `batch_dims`, and `event_dims`. To understand the semantics of these - dimensions, consider when two of the three are fixed and the remaining - is varied: - - `sample_dims`: indexes independent draws from identical - parameterizations of the `Distribution`. - - `batch_dims`: indexes independent draws from non-identical - parameterizations of the `Distribution`. - - `event_dims`: indexes event coordinates from one sample. - - The `sample`, `batch`, and `event` dimensions constitute the entirety of a - `Distribution` `Tensor`'s shape. - - The dimensions are always in `sample`, `batch`, `event` order. - - Purpose: - This class partitions `Tensor` notions of `shape`, `ndims`, and `dims` into - `Distribution` notions of `sample,` `batch,` and `event` dimensions. That - is, it computes any of: + #### Terminology - ``` - sample_shape batch_shape event_shape - sample_dims batch_dims event_dims - sample_ndims batch_ndims event_ndims - ``` + Recall that a `Tensor` has: + - `shape`: size of `Tensor` dimensions, + - `ndims`: size of `shape`; number of `Tensor` dimensions, + - `dims`: indexes into `shape`; useful for transpose, reduce. + + `Tensor`s sampled from a `Distribution` can be partitioned by `sample_dims`, + `batch_dims`, and `event_dims`. To understand the semantics of these + dimensions, consider when two of the three are fixed and the remaining + is varied: + - `sample_dims`: indexes independent draws from identical + parameterizations of the `Distribution`. + - `batch_dims`: indexes independent draws from non-identical + parameterizations of the `Distribution`. + - `event_dims`: indexes event coordinates from one sample. + + The `sample`, `batch`, and `event` dimensions constitute the entirety of a + `Distribution` `Tensor`'s shape. + + The dimensions are always in `sample`, `batch`, `event` order. + + #### Purpose + + This class partitions `Tensor` notions of `shape`, `ndims`, and `dims` into + `Distribution` notions of `sample,` `batch,` and `event` dimensions. That + is, it computes any of: + + ``` + sample_shape batch_shape event_shape + sample_dims batch_dims event_dims + sample_ndims batch_ndims event_ndims + ``` - for a given `Tensor`, e.g., the result of - `Distribution.sample(sample_shape=...)`. + for a given `Tensor`, e.g., the result of + `Distribution.sample(sample_shape=...)`. - For a given `Tensor`, this class computes the above table using minimal - information: `batch_ndims` and `event_ndims`. + For a given `Tensor`, this class computes the above table using minimal + information: `batch_ndims` and `event_ndims`. + + #### Examples + + We show examples of distribution shape semantics. - Examples of `Distribution` `shape` semantics: - Sample dimensions: Computing summary statistics, i.e., the average is a reduction over sample dimensions. @@ -111,52 +116,54 @@ class _DistributionShape(object): tf.div(1., tf.reduce_prod(x, event_dims)) ``` - Examples using this class: - Write `S, B, E` for `sample_shape`, `batch_shape`, and `event_shape`. - - ```python - # 150 iid samples from one multivariate Normal with two degrees of freedom. - mu = [0., 0] - sigma = [[1., 0], - [0, 1]] - mvn = MultivariateNormal(mu, sigma) - rand_mvn = mvn.sample(sample_shape=[3, 50]) - shaper = DistributionShape(batch_ndims=0, event_ndims=1) - S, B, E = shaper.get_shape(rand_mvn) - # S = [3, 50] - # B = [] - # E = [2] - - # 12 iid samples from one Wishart with 2x2 events. - sigma = [[1., 0], - [2, 1]] - wishart = Wishart(df=5, scale=sigma) - rand_wishart = wishart.sample(sample_shape=[3, 4]) - shaper = DistributionShape(batch_ndims=0, event_ndims=2) - S, B, E = shaper.get_shape(rand_wishart) - # S = [3, 4] - # B = [] - # E = [2, 2] - - # 100 iid samples from two, non-identical trivariate Normal distributions. - mu = ... # shape(2, 3) - sigma = ... # shape(2, 3, 3) - X = MultivariateNormal(mu, sigma).sample(shape=[4, 25]) - # S = [4, 25] - # B = [2] - # E = [3] - ``` - - Argument Validation: - When `validate_args=False`, checks that cannot be done during - graph construction are performed at graph execution. This may result in a - performance degradation because data must be switched from GPU to CPU. - - For example, when `validate_args=False` and `event_ndims` is a - non-constant `Tensor`, it is checked to be a non-negative integer at graph - execution. (Same for `batch_ndims`). Constant `Tensor`s and non-`Tensor` - arguments are always checked for correctness since this can be done for - "free," i.e., during graph construction. + We show examples using this class. + + Write `S, B, E` for `sample_shape`, `batch_shape`, and `event_shape`. + + ```python + # 150 iid samples from one multivariate Normal with two degrees of freedom. + mu = [0., 0] + sigma = [[1., 0], + [0, 1]] + mvn = MultivariateNormal(mu, sigma) + rand_mvn = mvn.sample(sample_shape=[3, 50]) + shaper = DistributionShape(batch_ndims=0, event_ndims=1) + S, B, E = shaper.get_shape(rand_mvn) + # S = [3, 50] + # B = [] + # E = [2] + + # 12 iid samples from one Wishart with 2x2 events. + sigma = [[1., 0], + [2, 1]] + wishart = Wishart(df=5, scale=sigma) + rand_wishart = wishart.sample(sample_shape=[3, 4]) + shaper = DistributionShape(batch_ndims=0, event_ndims=2) + S, B, E = shaper.get_shape(rand_wishart) + # S = [3, 4] + # B = [] + # E = [2, 2] + + # 100 iid samples from two, non-identical trivariate Normal distributions. + mu = ... # shape(2, 3) + sigma = ... # shape(2, 3, 3) + X = MultivariateNormal(mu, sigma).sample(shape=[4, 25]) + # S = [4, 25] + # B = [2] + # E = [3] + ``` + + #### Argument Validation + + When `validate_args=False`, checks that cannot be done during + graph construction are performed at graph execution. This may result in a + performance degradation because data must be switched from GPU to CPU. + + For example, when `validate_args=False` and `event_ndims` is a + non-constant `Tensor`, it is checked to be a non-negative integer at graph + execution. (Same for `batch_ndims`). Constant `Tensor`s and non-`Tensor` + arguments are always checked for correctness since this can be done for + "free," i.e., during graph construction. """ def __init__(self, diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index 3208ecdf64..971d65c4a6 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -248,11 +248,7 @@ class VectorDiffeomixture(distribution_lib.Distribution): The default quadrature scheme chooses `z_{N, n}` as `N` midpoints of the quantiles of `p(z)` (generalized quantiles if `K > 2`). - See [1] for more details. - - [1]. "Quadrature Compound: An approximating family of distributions" - Joshua Dillon, Ian Langmore, arXiv preprints - https://arxiv.org/abs/1801.03080 + See [Dillon and Langmore (2018)][1] for more details. #### About `Vector` distributions in TensorFlow. @@ -313,6 +309,13 @@ class VectorDiffeomixture(distribution_lib.Distribution): is_positive_definite=True), ], validate_args=True) + ``` + + #### References + + [1]: Joshua Dillon and Ian Langmore. Quadrature Compound: An approximating + family of distributions. _arXiv preprint arXiv:1801.03080_, 2018. + https://arxiv.org/abs/1801.03080 """ def __init__(self, -- GitLab From 4e330dcdeaafc92944a713a950355700f906ecfc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 17:45:10 -0700 Subject: [PATCH 211/960] add option to save trace table to model directory's profile plugin subdirectory. PiperOrigin-RevId: 189671290 --- .../tpu/profiler/capture_tpu_profile.cc | 19 +++++++++++++++---- .../contrib/tpu/profiler/tpu_profiler.proto | 9 ++++++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index b1ef9fde37..f86aff47e1 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -29,6 +29,9 @@ limitations under the License. #include "tensorflow/contrib/tpu/profiler/version.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/util/command_line_flags.h" @@ -62,10 +65,13 @@ Status ValidateHostPortPair(const string& host_port) { } ProfileResponse Profile(const string& service_addr, int duration_ms, + const string& repository_root, const string& session_id, const ProfileOptions& opts) { ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); + request.set_repository_root(repository_root); + request.set_session_id(session_id); request.add_tools("input_pipeline"); request.add_tools("overview_page"); *request.mutable_opts() = opts; @@ -137,10 +143,17 @@ int main(int argc, char** argv) { opts.set_include_dataset_ops(FLAGS_include_dataset_ops); tensorflow::ProfileResponse response; + // Use the current timestamp as the run name. + tensorflow::string session_id = + tensorflow::tpu::GetCurrentTimeStampAsString(); + constexpr char kProfilePluginDirectory[] = "plugins/profile/"; + string repository_root = + ::tensorflow::io::JoinPath(FLAGS_logdir, kProfilePluginDirectory); while (true) { std::cout << "Starting to profile TPU traces for " << duration_ms << " ms. " << "Remaining attempt(s): " << remaining_attempts-- << std::endl; - response = tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms, opts); + response = tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms, + repository_root, session_id, opts); if (remaining_attempts <= 0 || !response.encoded_trace().empty()) break; std::cout << "No trace event is collected. Automatically retrying." << std::endl @@ -158,10 +171,8 @@ int main(int argc, char** argv) { return 0; } - // Use the current timestamp as the run name. - tensorflow::string run = tensorflow::tpu::GetCurrentTimeStampAsString(); TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile( - FLAGS_logdir, run, response, &std::cout)); + FLAGS_logdir, session_id, response, &std::cout)); // Print this at the end so that it's not buried in irrelevant LOG messages. std::cout << "NOTE: using the trace duration " << duration_ms << "ms." << std::endl diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto index f3f3302ceb..cddc3cd1b4 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto @@ -36,10 +36,17 @@ message ProfileRequest { // Optional profiling options that control how a TF session will be profiled. ProfileOptions opts = 4; + // The place where we will dump profile data. We will normally use + // MODEL_DIR/plugin/profile/ as our repository root. + string repository_root = 5; + + // The user provided profile session identifier. + string session_id = 6; + // In future, the caller will indicate which TF session is being profiled, and // only data relating to that program will be returned. For now, we assume // all activity during the profiling period is relevant. - // next-field: 5 + // next-field: 7 } message ProfileToolData { -- GitLab From c8d0b125f62a3b8785494f53d013809f8e7c8c29 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Mon, 19 Mar 2018 17:50:05 -0700 Subject: [PATCH 212/960] Register gradient for argmin (cf. #15278). PiperOrigin-RevId: 189671974 --- tensorflow/python/ops/math_grad.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index eb33687cb5..02e07dc7b1 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -41,6 +41,12 @@ def _ArgMaxGrad(op, grad): return [None, None] +@ops.RegisterGradient("ArgMin") +def _ArgMinGrad(op, grad): + del op, grad + return [None, None] + + @ops.RegisterGradient("Sum") def _SumGrad(op, grad): """Gradient for Sum.""" -- GitLab From d548cb4e811fc8a04dd10370c576441fc56b03f2 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 19 Mar 2018 17:56:02 -0700 Subject: [PATCH 213/960] Add docstring pointing to tf.contrib.quantize. PiperOrigin-RevId: 189672549 --- tensorflow/python/training/quantize_training.i | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/python/training/quantize_training.i b/tensorflow/python/training/quantize_training.i index 17ffcd6e07..fb5e47efa0 100644 --- a/tensorflow/python/training/quantize_training.i +++ b/tensorflow/python/training/quantize_training.i @@ -56,6 +56,11 @@ PyObject* DoQuantizeTrainingOnGraphDefHelper( %insert("python") %{ def do_quantize_training_on_graphdef(input_graph, num_bits): + """A general quantization scheme is being developed in @{tf.contrib.quantize}. + + Consider using that instead, though since it is in the tf.contrib namespace, + it is not subject to backward compatibility guarantees. + """ from tensorflow.core.framework.graph_pb2 import GraphDef from tensorflow.python.framework import errors with errors.raise_exception_on_not_ok_status() as status: -- GitLab From 331bbe2886712fffc96ed9a7fb33fc9f09600240 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 18:20:12 -0700 Subject: [PATCH 214/960] Support general permutation. PiperOrigin-RevId: 189675019 --- tensorflow/core/kernels/data_format_ops.cc | 64 +++++----- tensorflow/core/kernels/data_format_ops.h | 131 ++------------------- 2 files changed, 42 insertions(+), 153 deletions(-) diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc index bea3af98eb..39ef8ee3ac 100644 --- a/tensorflow/core/kernels/data_format_ops.cc +++ b/tensorflow/core/kernels/data_format_ops.cc @@ -28,15 +28,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -namespace { -inline functor::DataFormat FormatNameToEnum(const string& name) { - if (name == "NHWC") return functor::DataFormat::NHWC; - if (name == "NCHW") return functor::DataFormat::NCHW; - if (name == "HWNC") return functor::DataFormat::HWNC; - return functor::DataFormat::UNKNOWN; -} -} // namespace - template class DataFormatDimMapOp : public OpKernel { public: @@ -76,17 +67,8 @@ class DataFormatVecPermuteOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format)); string dst_format; OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format)); - OP_REQUIRES(context, - (src_format == "NHWC" && dst_format == "NCHW") || - (src_format == "NCHW" && dst_format == "NHWC") || - (src_format == "NHWC" && dst_format == "HWNC") || - (src_format == "HWNC" && dst_format == "NHWC"), - errors::InvalidArgument(strings::StrCat( - "Current implementation only supports NHWC<->NCHW and " - "NHWC<->HWNC conversion; got source format ", - src_format, " and destination format ", dst_format))); - src_format_ = FormatNameToEnum(src_format); - dst_format_ = FormatNameToEnum(dst_format); + src_format_ = src_format; + dst_format_ = dst_format; } void Compute(OpKernelContext* context) override { @@ -116,14 +98,34 @@ class DataFormatVecPermuteOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); - functor::DataFormatVecPermute()( - context->eigen_device(), input.flat(), output->flat(), - src_format_, dst_format_); + // Support 1D and 2D cases. + Eigen::DSizes dst_idx; + ComputeDstIndex(input.dims(), &dst_idx); + + functor::DataFormatVecPermute()(context->eigen_device(), + input.flat(), + output->flat(), dst_idx); } private: - functor::DataFormat src_format_; - functor::DataFormat dst_format_; + // Finds out the destination index. Support 1D and 2D cases. + // Example: HWNC --> NHWC + // 1D: dst = [1, 2, 0, 3], + // 2D: dst = [2, 3, 4, 5, 0, 1, 6, 7] + void ComputeDstIndex(int num_dim, Eigen::DSizes* dst) { + for (int i = 0; i < src_format_.size(); ++i) { + for (int j = 0; j < dst_format_.size(); ++j) { + if (dst_format_[j] != src_format_[i]) continue; + // Found the dst index. Set output based on the number of dims. + for (int k = 0; k < num_dim; ++k) { + (*dst)[i * num_dim + k] = j * num_dim + k; + } + } + } + } + + string src_format_; + string dst_format_; }; #define REGISTER_KERNEL(T) \ @@ -156,12 +158,12 @@ TF_CALL_int32(DECLARE_GPU_SPECS); TF_CALL_int64(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPEC -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void DataFormatVecPermute::operator()( \ - const GPUDevice& d, typename TTypes::ConstFlat x, \ - typename TTypes::Vec y, const DataFormat src_format, \ - const DataFormat dst_format); \ +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void DataFormatVecPermute::operator()( \ + const GPUDevice& d, typename TTypes::ConstFlat x, \ + typename TTypes::Vec y, \ + const Eigen::DSizes& dst_idx); \ extern template struct DataFormatVecPermute; #define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T); TF_CALL_int32(DECLARE_GPU_SPECS); diff --git a/tensorflow/core/kernels/data_format_ops.h b/tensorflow/core/kernels/data_format_ops.h index d27415ed91..2ccc919586 100644 --- a/tensorflow/core/kernels/data_format_ops.h +++ b/tensorflow/core/kernels/data_format_ops.h @@ -23,13 +23,6 @@ limitations under the License. namespace tensorflow { namespace functor { -enum class DataFormat { - UNKNOWN = 0, - NHWC, - NCHW, - HWNC, -}; - // Functor used by DataFormatDimMapOP to do the computations. template struct DataFormatDimMap { @@ -47,65 +40,8 @@ struct DataFormatDimMap { }; template -struct VecPermuteNHWCToNCHW { - Eigen::DSizes dimensions( - typename TTypes::ConstFlat input) const { - Eigen::DSizes result; - result[0] = input.dimension(0); - return result; - } - template - void eval(typename TTypes::ConstFlat input, Output& output, - const Device& d) const { - if (input.size() == 8) { - output.template chip<0>(0).device(d) = input.template chip<0>(0); - output.template chip<0>(1).device(d) = input.template chip<0>(1); - output.template chip<0>(2).device(d) = input.template chip<0>(6); - output.template chip<0>(3).device(d) = input.template chip<0>(7); - output.template chip<0>(4).device(d) = input.template chip<0>(2); - output.template chip<0>(5).device(d) = input.template chip<0>(3); - output.template chip<0>(6).device(d) = input.template chip<0>(4); - output.template chip<0>(7).device(d) = input.template chip<0>(5); - } else { - output.template chip<0>(0).device(d) = input.template chip<0>(0); - output.template chip<0>(1).device(d) = input.template chip<0>(3); - output.template chip<0>(2).device(d) = input.template chip<0>(1); - output.template chip<0>(3).device(d) = input.template chip<0>(2); - } - } -}; - -template -struct VecPermuteNCHWToNHWC { - Eigen::DSizes dimensions( - typename TTypes::ConstFlat input) const { - Eigen::DSizes result; - result[0] = input.dimension(0); - return result; - } - template - void eval(typename TTypes::ConstFlat input, Output& output, - const Device& d) const { - if (input.size() == 8) { - output.template chip<0>(0).device(d) = input.template chip<0>(0); - output.template chip<0>(1).device(d) = input.template chip<0>(1); - output.template chip<0>(2).device(d) = input.template chip<0>(4); - output.template chip<0>(3).device(d) = input.template chip<0>(5); - output.template chip<0>(4).device(d) = input.template chip<0>(6); - output.template chip<0>(5).device(d) = input.template chip<0>(7); - output.template chip<0>(6).device(d) = input.template chip<0>(2); - output.template chip<0>(7).device(d) = input.template chip<0>(3); - } else { - output.template chip<0>(0).device(d) = input.template chip<0>(0); - output.template chip<0>(1).device(d) = input.template chip<0>(2); - output.template chip<0>(2).device(d) = input.template chip<0>(3); - output.template chip<0>(3).device(d) = input.template chip<0>(1); - } - } -}; - -template -struct VecPermuteNHWCToHWNC { +struct VecPermute { + VecPermute(const Eigen::DSizes& dst) : dst_(dst) {} Eigen::DSizes dimensions( typename TTypes::ConstFlat input) const { Eigen::DSizes result; @@ -115,71 +51,22 @@ struct VecPermuteNHWCToHWNC { template void eval(typename TTypes::ConstFlat input, Output& output, const Device& d) const { - if (input.size() == 8) { - output.template chip<0>(0).device(d) = input.template chip<0>(2); - output.template chip<0>(1).device(d) = input.template chip<0>(3); - output.template chip<0>(2).device(d) = input.template chip<0>(4); - output.template chip<0>(3).device(d) = input.template chip<0>(5); - output.template chip<0>(4).device(d) = input.template chip<0>(0); - output.template chip<0>(5).device(d) = input.template chip<0>(1); - output.template chip<0>(6).device(d) = input.template chip<0>(6); - output.template chip<0>(7).device(d) = input.template chip<0>(7); - } else { - output.template chip<0>(0).device(d) = input.template chip<0>(1); - output.template chip<0>(1).device(d) = input.template chip<0>(2); - output.template chip<0>(2).device(d) = input.template chip<0>(0); - output.template chip<0>(3).device(d) = input.template chip<0>(3); + for (int i = 0; i < input.size(); ++i) { + output.template chip<0>(dst_[i]).device(d) = input.template chip<0>(i); } } -}; -template -struct VecPermuteHWNCToNHWC { - Eigen::DSizes dimensions( - typename TTypes::ConstFlat input) const { - Eigen::DSizes result; - result[0] = input.dimension(0); - return result; - } - template - void eval(typename TTypes::ConstFlat input, Output& output, - const Device& d) const { - if (input.size() == 8) { - output.template chip<0>(0).device(d) = input.template chip<0>(4); - output.template chip<0>(1).device(d) = input.template chip<0>(5); - output.template chip<0>(2).device(d) = input.template chip<0>(0); - output.template chip<0>(3).device(d) = input.template chip<0>(1); - output.template chip<0>(4).device(d) = input.template chip<0>(2); - output.template chip<0>(5).device(d) = input.template chip<0>(3); - output.template chip<0>(6).device(d) = input.template chip<0>(6); - output.template chip<0>(7).device(d) = input.template chip<0>(7); - } else { - output.template chip<0>(0).device(d) = input.template chip<0>(2); - output.template chip<0>(1).device(d) = input.template chip<0>(0); - output.template chip<0>(2).device(d) = input.template chip<0>(1); - output.template chip<0>(3).device(d) = input.template chip<0>(3); - } - } + private: + Eigen::DSizes dst_; }; // Functor used by DataFormatVecPermuteOp to do the computations. template struct DataFormatVecPermute { void operator()(const Device& d, typename TTypes::ConstFlat x, - typename TTypes::Flat y, const DataFormat src_format, - const DataFormat dst_format) { - if (src_format == DataFormat::NHWC && dst_format == DataFormat::NCHW) { - y.device(d) = x.customOp(VecPermuteNHWCToNCHW()); - } else if (src_format == DataFormat::NCHW && - dst_format == DataFormat::NHWC) { - y.device(d) = x.customOp(VecPermuteNCHWToNHWC()); - } else if (src_format == DataFormat::NHWC && - dst_format == DataFormat::HWNC) { - y.device(d) = x.customOp(VecPermuteNHWCToHWNC()); - } else if (src_format == DataFormat::HWNC && - dst_format == DataFormat::NHWC) { - y.device(d) = x.customOp(VecPermuteHWNCToNHWC()); - } + typename TTypes::Flat y, + const Eigen::DSizes& dst) { + y.device(d) = x.customOp(VecPermute(dst)); } }; -- GitLab From 28a6a8b235dafd6610e95dc05676d5b64fa5a404 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 19 Mar 2018 18:32:13 -0700 Subject: [PATCH 215/960] Export tf.GradientTape tf.GradientTape can be used both for eager execution and graph construction to compute gradients (unlike tf.gradients, which works only for graph construction). PiperOrigin-RevId: 189676004 --- tensorflow/python/BUILD | 1 + tensorflow/python/eager/backprop.py | 4 +++- tensorflow/python/ops/gradients.py | 2 ++ tensorflow/python/ops/standard_ops.py | 1 + tensorflow/python/training/training.py | 1 + .../golden/tensorflow.-gradient-tape.pbtxt | 21 +++++++++++++++++++ tensorflow/tools/api/golden/tensorflow.pbtxt | 4 ++++ 7 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.-gradient-tape.pbtxt diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a029ecd4d0..ec67f43190 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1804,6 +1804,7 @@ py_library( ":platform", ":spectral_grad", ":util", + "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/eager:tape", "//third_party/py/numpy", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 9b997fed30..06e11f6ef9 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -40,6 +40,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect +from tensorflow.python.util.tf_export import tf_export _op_attr_type_cache = {} @@ -637,13 +638,14 @@ _default_vspace = imperative_grad.VSpace( ones=_ones) +@tf_export("GradientTape") class GradientTape(object): """Record operations for automatic differentiation. Operations are recorded if they are executed within this context manager and at least one of their inputs is being "watched". - Variables (created by @{tf.contrib.eager.Variable} or @{tf.get_variable}) + Variables (created by `tf.contrib.eager.Variable` or @{tf.get_variable}) are automatically watched. Tensors can be manually watched by invoking the `watch` method on this context manager. diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py index 63d9a23222..2668e8f60c 100644 --- a/tensorflow/python/ops/gradients.py +++ b/tensorflow/python/ops/gradients.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import +from tensorflow.python.eager.backprop import GradientTape from tensorflow.python.ops.custom_gradient import custom_gradient from tensorflow.python.ops.gradients_impl import AggregationMethod from tensorflow.python.ops.gradients_impl import gradients @@ -29,6 +30,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ # TODO(drpng): find a good place to reference this. "AggregationMethod", + "GradientTape", "custom_gradient", "gradients", # tf.gradients.gradients. "hessians", # tf.gradients.hessians diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index 60a98aca7f..230b7ef937 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -218,6 +218,7 @@ _allowed_symbols_gradients = [ # Documented in training.py: # Not importing training.py to avoid complex graph dependencies. "AggregationMethod", + "GradientTape", "custom_gradient", "gradients", # tf.gradients = gradients.gradients "hessians", diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index 6880cfc4db..b759b156d7 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -31,6 +31,7 @@ See the @{$python/train} guide. @@custom_gradient @@gradients @@AggregationMethod +@@GradientTape @@stop_gradient @@hessians @@clip_by_value diff --git a/tensorflow/tools/api/golden/tensorflow.-gradient-tape.pbtxt b/tensorflow/tools/api/golden/tensorflow.-gradient-tape.pbtxt new file mode 100644 index 0000000000..7405202b89 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.-gradient-tape.pbtxt @@ -0,0 +1,21 @@ +path: "tensorflow.GradientTape" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'persistent\'], varargs=None, keywords=None, defaults=[\'False\'], " + } + member_method { + name: "gradient" + argspec: "args=[\'self\', \'target\', \'sources\', \'output_gradients\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "watch" + argspec: "args=[\'self\', \'tensor\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "watched_variables" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 99e09c3759..55b82dd765 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -84,6 +84,10 @@ tf_module { name: "GRAPH_DEF_VERSION_MIN_PRODUCER" mtype: "" } + member { + name: "GradientTape" + mtype: "" + } member { name: "Graph" mtype: "" -- GitLab From 48adc7ba73177f2a9331918b160bc3d0775985b8 Mon Sep 17 00:00:00 2001 From: Surya Bhupatiraju Date: Mon, 19 Mar 2018 18:51:06 -0700 Subject: [PATCH 216/960] Make L2 norm computation more stable. Avoids the potentially numerically instable square root in the linalg_ops.norm() function because we 'undo' that operation with a math_ops.square() operation anyway. PiperOrigin-RevId: 189677716 --- .../gan/python/eval/python/classifier_metrics_impl.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py index 323cbe6e76..7e86d10b64 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py @@ -563,7 +563,8 @@ def mean_only_frechet_classifier_distance_from_activations( m_w = math_ops.reduce_mean(generated_activations, 0) # Next the distance between means. - mean = math_ops.square(linalg_ops.norm(m - m_w)) # This uses the L2 norm. + mean = math_ops.reduce_sum( + math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. mofid = mean if activations_dtype != dtypes.float64: mofid = math_ops.cast(mofid, activations_dtype) @@ -637,7 +638,8 @@ def diagonal_only_frechet_classifier_distance_from_activations( (var + var_w) - 2.0 * math_ops.sqrt(math_ops.multiply(var, var_w))) # Next the distance between means. - mean = math_ops.square(linalg_ops.norm(m - m_w)) # This uses the L2 norm. + mean = math_ops.reduce_sum( + math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. dofid = trace + mean if activations_dtype != dtypes.float64: dofid = math_ops.cast(dofid, activations_dtype) @@ -718,7 +720,8 @@ def frechet_classifier_distance_from_activations(real_activations, trace = math_ops.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component # Next the distance between means. - mean = math_ops.square(linalg_ops.norm(m - m_w)) # This uses the L2 norm. + mean = math_ops.reduce_sum( + math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. fid = trace + mean if activations_dtype != dtypes.float64: fid = math_ops.cast(fid, activations_dtype) -- GitLab From df9fdc7a74ab5ce786a91c7c62f6ad0d36b24f42 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 19:24:24 -0700 Subject: [PATCH 217/960] Update GraphProperties comments PiperOrigin-RevId: 189680477 --- tensorflow/core/grappler/costs/graph_properties.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 93a722f038..8ff572fe4f 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -29,9 +29,12 @@ namespace grappler { class SymbolicShapeRefiner; class TopoQueue; -// A TensorFlow model to optimize. -// Models are represented by the combination of a graph, one of more fetch -// nodes, and potentially a set of nodes to feed. +// Infer OpInfo::TensorProperties for graph nodes inputs/outputs. +// +// Typical use case, is to infer tensor properties from a graph, before doing +// optimization pass. Nodes modified during optimization pass have to be +// invalidated, to prevent further incorrect optimizations based on wrong shape +// and data type properties. class GraphProperties { public: explicit GraphProperties(const GrapplerItem& item) : item_(item) {} @@ -64,6 +67,9 @@ class GraphProperties { const string& node_name) const; const std::vector& GetOutputProperties( const string& node_name) const; + // Invalidate input/output properties for nodes modified during graph + // optimization pass, to prevent potential optimizations, based on incorrect + // shape information. void ClearInputProperties(const string& node_name); void ClearOutputProperties(const string& node_name); -- GitLab From 886df46bdfe89af739ee4f4a81b4c88bbe572c64 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 19 Mar 2018 19:24:26 -0700 Subject: [PATCH 218/960] Disable freeze_bn_delay by default. PiperOrigin-RevId: 189680481 --- .../contrib/quantize/python/quantize_graph.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index be4fc39651..d0fb55da74 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -99,16 +99,7 @@ def create_training_graph(input_graph=None, quant_delay=0): # TODO(raghuramank) Need to have freeze_bn_delay be a function of batch size # Currently the values below are hardcoded for mobilenetV1 on imagenet # Please use the experimental API if you need to tune these values. - if quant_delay == 0: - # Corresponds to case of restoring from a floating point checkpoint - # In this case, we can freeze the moving mean and variance early on and - # switch to using them during training. Therefore, freeze_bn_delay is set to - # 2e5. - freeze_bn_delay = int(2e5) - else: - # If training from scratch, set freeze_bn_delay to 100 epochs after quant - # delay. With a batch size of 64, this corresponds to 20000*100=2M steps. - freeze_bn_delay = quant_delay + int(2e6) + freeze_bn_delay = None _create_graph( input_graph=input_graph, @@ -142,7 +133,7 @@ def experimental_create_training_graph(input_graph=None, weight_bits=8, activation_bits=8, quant_delay=0, - freeze_bn_delay=int(2e5)): + freeze_bn_delay=None): """Rewrites a training input_graph in place for simulated quantization. Variables added by the rewrite get added to the global variables collection. -- GitLab From fea994f9dd2bcb15eba0515c8c051aac9aed0399 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 19:30:23 -0700 Subject: [PATCH 219/960] Avoid attaching fqn annotations to live values that don't have a `__name__`. PiperOrigin-RevId: 189680937 --- .../py2tf/pyct/static_analysis/live_values.py | 6 ++++-- .../pyct/static_analysis/live_values_test.py | 17 +++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py index 0388be5d25..ac5697900a 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py @@ -55,11 +55,13 @@ class LiveValueResolver(transformer.Base): if not symbol_is_local and not symbol_is_param: if node.id in self.literals: anno.setanno(node, 'live_val', self.literals[node.id]) - # TODO(mdan): Could live values have FQNs? i.e. 'a'.join() elif node.id in self.context.namespace: obj = self.context.namespace[node.id] anno.setanno(node, 'live_val', obj) - anno.setanno(node, 'fqn', (obj.__name__,)) + if hasattr(obj, '__name__'): + # If the symbol value is for example a primitive, then it will not + # have a name. + anno.setanno(node, 'fqn', (obj.__name__,)) else: pass # TODO(mdan): Should we raise an error here? diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py index c133a455b3..a56dff824e 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py @@ -57,13 +57,26 @@ class LiveValuesResolverTest(test.TestCase): def test_literals(self): + a = None + def test_fn(): - return Foo # pylint: disable=undefined-variable + return a - node = self._parse_and_analyze(test_fn, {}, {'Foo': 'bar'}) + node = self._parse_and_analyze(test_fn, {}, literals={'a': 'bar'}) retval_node = node.body[0].body[0].value self.assertEquals('bar', anno.getanno(retval_node, 'live_val')) + def test_primitive_values(self): + + a = None + + def test_fn(): + return a + + node = self._parse_and_analyze(test_fn, {'a': True}) + retval_node = node.body[0].body[0].value + self.assertFalse(anno.hasanno(retval_node, 'fqn')) + def test_namespace(self): def foo(): -- GitLab From a2e0f8c24776f63b04a29fad9c66bf3d66e94f4d Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 19 Mar 2018 19:52:06 -0700 Subject: [PATCH 220/960] Handle non-broadcastables shapes in eager assert_equal Before this change assert_equal would fail when producing an error message for non-equal shapes because array_ops.boolean_mask only works for equal shapes. This part of the error message is fairly confusing in presence of non-equal shapes. This change removes it. PiperOrigin-RevId: 189682518 --- .../python/kernel_tests/check_ops_test.py | 6 +++ tensorflow/python/ops/check_ops.py | 39 ++++++++++--------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 26d3df9e63..5a83ec8d30 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -212,6 +212,12 @@ First 2 elements of y: out = array_ops.identity(small) self.evaluate(out) + @test_util.run_in_graph_and_eager_modes() + def test_raises_when_not_equal_and_broadcastable_shapes(self): + cond = constant_op.constant([True, False], name="small") + with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): + check_ops.assert_equal(cond, False, message="fail") + @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_both_empty(self): larry = constant_op.constant([]) diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index d6d75e4ef9..9cea3e91f7 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -363,27 +363,30 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None): (x_sum, x_np[:x_sum], y_sum, y_np[:y_sum])) - # Get the values that actually differed and their indices. - mask = math_ops.logical_not(eq) - indices = array_ops.where(mask) - indices_np = indices.numpy() - x_vals = array_ops.boolean_mask(x, mask) - y_vals = array_ops.boolean_mask(y, mask) - summarize = min(summarize, indices_np.shape[0]) + index_and_values_str = '' + if x.shape == y.shape: + # If the shapes of x and y are the same, + # Get the values that actually differed and their indices. + # If shapes are different this information is more confusing + # than useful. + mask = math_ops.logical_not(eq) + indices = array_ops.where(mask) + indices_np = indices.numpy() + x_vals = array_ops.boolean_mask(x, mask) + y_vals = array_ops.boolean_mask(y, mask) + summarize = min(summarize, indices_np.shape[0]) + index_and_values_str = ( + 'Indices of first %s different values:\n%s\n' + 'Corresponding x values:\n%s\n' + 'Corresponding y values:\n%s\n' % + (summarize, indices_np[:summarize], + x_vals.numpy().reshape((-1,))[:summarize], + y_vals.numpy().reshape((-1,))[:summarize])) raise errors.InvalidArgumentError( node_def=None, op=None, - message=('%s\nCondition x == y did not hold.\n' - 'Indices of first %s different values:\n%s\n' - 'Corresponding x values:\n%s\n' - 'Corresponding y values:\n%s\n' - '%s' - % - (message or '', - summarize, indices_np[:summarize], - x_vals.numpy().reshape((-1,))[:summarize], - y_vals.numpy().reshape((-1,))[:summarize], - summary_msg))) + message=('%s\nCondition x == y did not hold.\n%s%s' % + (message or '', index_and_values_str, summary_msg))) return if data is None: -- GitLab From 79d06a6261a523866ace67f7b831d7f617d550e6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 19:58:03 -0700 Subject: [PATCH 221/960] Apply output_min/output_max to the result in the NEON implementation of Add operator. Both non-NEON and reference implementation have this, but it's missing from NEON version. PiperOrigin-RevId: 189682984 --- .../lite/kernels/internal/optimized/optimized_ops.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index edd65c9170..004433498d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1583,6 +1583,8 @@ inline void Add(int left_shift, const uint8* input1_data, TFLITE_DCHECK_LT(input1_offset, 256); TFLITE_DCHECK_LT(input2_offset, 256); #ifdef USE_NEON + const auto output_activation_min_vector = vdup_n_u8(output_activation_min); + const auto output_activation_max_vector = vdup_n_u8(output_activation_max); for (; i <= size - 8; i += 8) { const auto input1_val_original = vld1_u8(input1_data + i); const auto input2_val_original = vld1_u8(input2_data + i); @@ -1628,7 +1630,10 @@ inline void Add(int left_shift, const uint8* input1_data, const auto s2_narrowed = vmovn_s32(s2); const auto s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(output_offset)); - vst1_u8(output_data + i, vqmovun_s16(s)); + const auto clamped = + vmax_u8(output_activation_min_vector, + vmin_u8(output_activation_max_vector, vqmovun_s16(s))); + vst1_u8(output_data + i, clamped); } #endif // NEON -- GitLab From 56555d0604c029e8b92fcd354de3bf32b63b62d8 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 19 Mar 2018 20:06:26 -0700 Subject: [PATCH 222/960] Adds final partial batch support for TPUEstimator.predict. PiperOrigin-RevId: 189683528 --- tensorflow/contrib/tpu/BUILD | 11 + .../contrib/tpu/python/tpu/tpu_estimator.py | 212 +++++++++---- .../python/tpu/tpu_estimator_signals_test.py | 291 ++++++++++++++++++ 3 files changed, 458 insertions(+), 56 deletions(-) create mode 100644 tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index ed930e44e8..eea19e9465 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -271,6 +271,17 @@ tf_py_test( ], ) +tf_py_test( + name = "tpu_estimator_signals_test", + size = "small", + srcs = ["python/tpu/tpu_estimator_signals_test.py"], + additional_deps = [ + ":tpu_estimator", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 32f15e60cd..5a8fa04e7c 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -49,6 +49,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -62,6 +63,7 @@ from tensorflow.python.training import evaluation from tensorflow.python.training import session_run_hook from tensorflow.python.training import training from tensorflow.python.training import training_util +from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect _INITIAL_LOSS = 1e7 @@ -678,8 +680,11 @@ def generate_per_host_enqueue_ops_fn_for_host( raise TypeError( 'For mode PREDICT, `input_fn` must return `Dataset` instead of ' '`features` and `labels`.') + if batch_axis is not None: + raise TypeError('For mode PREDICT, batch_axis is not supported yet.') inputs = _InputsWithStoppingSignals( - dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn) + dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn, + add_padding=True) if is_dataset: hooks.append(inputs.dataset_initializer_hook()) @@ -1620,11 +1625,6 @@ class TPUEstimator(estimator_lib.Estimator): 2. `input_fn` must return a `Dataset` instance rather than `features`. In fact, .train() and .evaluate() also support Dataset as return value. - 3. Each batch returned by `Dataset`'s iterator must have the *same static* - shape. This means two things: - - batch_size cannot be `None` - - the final batch must be padded by user to a full batch. - Example (MNIST): ---------------- ``` @@ -1639,41 +1639,9 @@ class TPUEstimator(estimator_lib.Estimator): [total_examples, height, width, 3], minval=-1, maxval=1) dataset = tf.data.Dataset.from_tensor_slices(images) - dataset = dataset.batch(batch_size) dataset = dataset.map(lambda images: {'image': images}) - def pad(tensor, missing_count): - # Pads out the batch dimension to the complete batch_size. - rank = len(tensor.shape) - assert rank > 0 - padding = tf.stack([[0, missing_count]] + [[0, 0]] * (rank - 1)) - padded_shape = (batch_size,) + tuple(tensor.shape[1:]) - padded_tensor = tf.pad(tensor, padding) - padded_tensor.set_shape(padded_shape) - return padded_tensor - - def pad_batch_if_incomplete(batch_features): - # Pads out the batch dimension for all features. - real_batch_size = tf.shape(batch_features["image"])[0] - - missing_count = tf.constant(batch_size, tf.int32) - real_batch_size - - padded_features = { - key: pad(tensor, missing_count) - for key, tensor in batch_features.iteritems() - } - padding_mask = tf.concat( - [ - tf.zeros((real_batch_size, 1), dtype=tf.int32), - tf.ones((missing_count, 1), dtype=tf.int32) - ], - axis=0) - padding_mask.set_shape((batch_size, 1)) - padded_features["is_padding"] = padding_mask - return padded_features - - dataset = dataset.map(pad_batch_if_incomplete) - + dataset = dataset.batch(batch_size) return dataset def model_fn(features, labels, params, mode): @@ -2089,12 +2057,14 @@ class TPUEstimator(estimator_lib.Estimator): predictions, message=( 'The estimated size for TPUEstimatorSpec.predictions is too ' 'large.')) - stopping_signals = host_call_ret['signals'] + signals = host_call_ret['signals'] with ops.control_dependencies(host_ops): host_ops = [] # Empty, we do do not need it anymore. scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal( - stopping_signals) + signals) + predictions = _PaddingSignals.slice_tensor_or_dict( + predictions, signals) hooks = [ _StoppingPredictHook(scalar_stopping_signal), @@ -2389,20 +2359,19 @@ class _Inputs(object): return self._dataset -# TODO(xiejw): Extend this to support final partial batch. class _InputsWithStoppingSignals(_Inputs): """Inputs with `_StopSignals` inserted into the dataset.""" - def __init__(self, dataset, batch_size): + def __init__(self, dataset, batch_size, add_padding=False): assert dataset is not None user_provided_dataset = dataset.map( _InputsWithStoppingSignals.insert_stopping_signal( - stop=False, batch_size=batch_size)) + stop=False, batch_size=batch_size, add_padding=add_padding)) final_batch_dataset = dataset.take(1).map( _InputsWithStoppingSignals.insert_stopping_signal( - stop=True, batch_size=batch_size)) + stop=True, batch_size=batch_size, add_padding=add_padding)) dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2) super(_InputsWithStoppingSignals, self).__init__(dataset=dataset) @@ -2432,7 +2401,7 @@ class _InputsWithStoppingSignals(_Inputs): return signals @staticmethod - def insert_stopping_signal(stop, batch_size): + def insert_stopping_signal(stop, batch_size, add_padding=False): """Inserts stopping_signal into dataset via _map_fn. Here we change the data structure in the dataset, such that the return value @@ -2443,6 +2412,7 @@ class _InputsWithStoppingSignals(_Inputs): Args: stop: bool, state of current stopping signals. batch_size: int, batch size. + add_padding: bool, whether to pad the tensor to full batch size. Returns: A map_fn passed to dataset.map API. @@ -2456,11 +2426,25 @@ class _InputsWithStoppingSignals(_Inputs): args = args[0] features, labels = _Inputs._parse_inputs(args) new_input_dict = {} - new_input_dict['features'] = features - if labels is not None: - new_input_dict['labels'] = labels + + if add_padding: + padding_mask, features, labels = ( + _PaddingSignals.pad_features_and_labels( + features, labels, batch_size)) + + new_input_dict['features'] = features + if labels is not None: + new_input_dict['labels'] = labels + + else: + new_input_dict['features'] = features + if labels is not None: + new_input_dict['labels'] = labels + padding_mask = None + new_input_dict['signals'] = _StopSignals( - stop=stop, batch_size=batch_size).as_dict() + stop=stop, batch_size=batch_size, padding_mask=padding_mask).as_dict() + return new_input_dict return _map_fn @@ -2469,23 +2453,28 @@ class _InputsWithStoppingSignals(_Inputs): class _StopSignals(object): """Signals class holding all logic to handle TPU stopping condition.""" - NON_STOPPING_SIGNAL = 0.0 - STOPPING_SIGNAL = 1.0 + NON_STOPPING_SIGNAL = False + STOPPING_SIGNAL = True - def __init__(self, stop, batch_size): + def __init__(self, stop, batch_size, padding_mask=None): self._stop = stop self._batch_size = batch_size + self._padding_mask = padding_mask def as_dict(self): + """Returns the signals as Python dict.""" shape = [self._batch_size, 1] - dtype = dtypes.float32 + dtype = dtypes.bool if self._stop: stopping = array_ops.ones(shape=shape, dtype=dtype) else: stopping = array_ops.zeros(shape=shape, dtype=dtype) - return {'stopping': stopping} + signals = {'stopping': stopping} + if self._padding_mask is not None: + signals['padding_mask'] = self._padding_mask + return signals @staticmethod def as_scalar_stopping_signal(signals): @@ -2493,7 +2482,118 @@ class _StopSignals(object): @staticmethod def should_stop(scalar_stopping_signal): - return scalar_stopping_signal >= _StopSignals.STOPPING_SIGNAL + if isinstance(scalar_stopping_signal, ops.Tensor): + # STOPPING_SIGNAL is a constant True. Here, the logical_and is just the TF + # way to express the bool check whether scalar_stopping_signal is True. + return math_ops.logical_and( + scalar_stopping_signal, _StopSignals.STOPPING_SIGNAL) + else: + # For non Tensor case, it is used in SessionRunHook. So, we cannot modify + # the graph anymore. Here, we use pure Python. + return bool(scalar_stopping_signal) + + +class _PaddingSignals(object): + """Signals class holding all logic to handle padding.""" + + @staticmethod + def pad_features_and_labels(features, labels, batch_size): + """Pads out the batch dimension of features and labels.""" + real_batch_size = array_ops.shape( + _PaddingSignals._find_any_tensor(features))[0] + + batch_size_tensor = constant_op.constant(batch_size, dtypes.int32) + + check_greater = check_ops.assert_greater_equal( + batch_size_tensor, real_batch_size, + data=(batch_size_tensor, real_batch_size), + message='The real batch size should not be greater than batch_size.') + + with ops.control_dependencies([check_greater]): + missing_count = batch_size_tensor - real_batch_size + + def pad_single_tensor(tensor): + """Pads out the batch dimension of a tensor to the complete batch_size.""" + rank = len(tensor.shape) + assert rank > 0 + padding = array_ops.stack([[0, missing_count]] + [[0, 0]] * (rank - 1)) + padded_shape = (batch_size,) + tuple(tensor.shape[1:]) + padded_tensor = array_ops.pad(tensor, padding) + padded_tensor.set_shape(padded_shape) + return padded_tensor + + def nest_pad(tensor_or_dict): + return nest.map_structure(pad_single_tensor, tensor_or_dict) + + features = nest_pad(features) + if labels is not None: + labels = nest_pad(labels) + + padding_mask = _PaddingSignals._padding_mask( + real_batch_size, missing_count, batch_size) + + return padding_mask, features, labels + + @staticmethod + def slice_tensor_or_dict(tensor_or_dict, signals): + """Slice the real Tensors according to padding mask in signals.""" + + padding_mask = signals['padding_mask'] + batch_size = array_ops.shape(padding_mask)[0] + + def verify_batch_size(tensor): + check_batch_size = math_ops.equal(batch_size, tensor.shape[0]) + with ops.control_dependencies([check_batch_size]): + return array_ops.identity(tensor) + + def slice_single_tensor(tensor): + rank = len(tensor.shape) + assert rank > 0 + real_batch_size = batch_size - math_ops.reduce_sum(padding_mask) + return verify_batch_size(tensor)[0:real_batch_size] + + # As we split the Tensors to all TPU cores and concat them back, it is + # important to ensure the real data is placed before padded ones, i.e., + # order is preserved. By that, the sliced padding mask should have all 0's. + # If this assertion failed, # the slice logic here would not hold. + sliced_padding_mask = slice_single_tensor(padding_mask) + assert_padding_mask = math_ops.equal( + math_ops.reduce_sum(sliced_padding_mask), 0) + + with ops.control_dependencies([assert_padding_mask]): + should_stop = _StopSignals.should_stop( + _StopSignals.as_scalar_stopping_signal(signals)) + + is_full_batch = math_ops.equal(math_ops.reduce_sum(padding_mask), 0) + + def slice_fn(tensor): + # If the current batch is full batch or part of stopping signals, we do + # not need to slice to save performance. + return control_flow_ops.cond( + math_ops.logical_or(should_stop, is_full_batch), + (lambda: verify_batch_size(tensor)), + (lambda: slice_single_tensor(tensor))) + + return nest.map_structure(slice_fn, tensor_or_dict) + + @staticmethod + def _find_any_tensor(batch_features): + tensors = [x for x in nest.flatten(batch_features) + if isinstance(x, ops.Tensor)] + if not tensors: + raise ValueError('Cannot find any Tensor in features dict.') + return tensors[0] + + @staticmethod + def _padding_mask(real_batch_size, missing_count, batch_size): + padding_mask = array_ops.concat( + [ + array_ops.zeros((real_batch_size,), dtype=dtypes.int32), + array_ops.ones((missing_count,), dtype=dtypes.int32) + ], + axis=0) + padding_mask.set_shape((batch_size,)) + return padding_mask class _SignalsHelper(object): diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py new file mode 100644 index 0000000000..3e90957e6d --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py @@ -0,0 +1,291 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU Estimator Signalling Tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.tpu.python.tpu import tpu_estimator +from tensorflow.python import data as dataset_lib +from tensorflow.python.client import session +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.platform import test + + +def make_input_fn(num_samples): + a = np.linspace(0, 100.0, num=num_samples) + b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1)) + + def input_fn(params): + batch_size = params['batch_size'] + da1 = dataset_lib.Dataset.from_tensor_slices(a) + da2 = dataset_lib.Dataset.from_tensor_slices(b) + + dataset = dataset_lib.Dataset.zip((da1, da2)) + dataset = dataset.map(lambda fa, fb: {'a': fa, 'b': fb}) + dataset = dataset.batch(batch_size) + return dataset + return input_fn, (a, b) + + +def make_input_fn_with_labels(num_samples): + a = np.linspace(0, 100.0, num=num_samples) + b = np.reshape(np.array(a, dtype=np.float32), (len(a), 1)) + + def input_fn(params): + batch_size = params['batch_size'] + da1 = dataset_lib.Dataset.from_tensor_slices(a) + da2 = dataset_lib.Dataset.from_tensor_slices(b) + + dataset = dataset_lib.Dataset.zip((da1, da2)) + dataset = dataset.map(lambda fa, fb: ({'a': fa}, fb)) + dataset = dataset.batch(batch_size) + return dataset + return input_fn, (a, b) + + +class TPUEstimatorStoppingSignalsTest(test.TestCase): + + def test_normal_output_without_signals(self): + num_samples = 4 + batch_size = 2 + + params = {'batch_size': batch_size} + input_fn, (a, b) = make_input_fn(num_samples=num_samples) + + with ops.Graph().as_default(): + dataset = input_fn(params) + features = dataset.make_one_shot_iterator().get_next() + + # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape. + self.assertIsNone(features['a'].shape.as_list()[0]) + + with session.Session() as sess: + result = sess.run(features) + self.assertAllEqual(a[:batch_size], result['a']) + self.assertAllEqual(b[:batch_size], result['b']) + + # This run should work as num_samples / batch_size = 2. + result = sess.run(features) + self.assertAllEqual(a[batch_size:num_samples], result['a']) + self.assertAllEqual(b[batch_size:num_samples], result['b']) + + with self.assertRaises(errors.OutOfRangeError): + # Given num_samples and batch_size, this run should fail. + sess.run(features) + + def test_output_with_stopping_signals(self): + num_samples = 4 + batch_size = 2 + + params = {'batch_size': batch_size} + input_fn, (a, b) = make_input_fn(num_samples=num_samples) + + with ops.Graph().as_default(): + dataset = input_fn(params) + inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size) + hook = inputs.dataset_initializer_hook() + features, _ = inputs.features_and_labels() + signals = inputs.signals() + + # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape. + self.assertIsNone(features['a'].shape.as_list()[0]) + + with session.Session() as sess: + hook.begin() + hook.after_create_session(sess, coord=None) + + result, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual(a[:batch_size], result['a']) + self.assertAllEqual(b[:batch_size], result['b']) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + + # This run should work as num_samples / batch_size = 2. + result, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual(a[batch_size:num_samples], result['a']) + self.assertAllEqual(b[batch_size:num_samples], result['b']) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + + # This run should work, *but* see STOP ('1') as signals + _, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(features) + + +class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): + + def test_num_samples_divisible_by_batch_size(self): + num_samples = 4 + batch_size = 2 + + params = {'batch_size': batch_size} + input_fn, (a, b) = make_input_fn(num_samples=num_samples) + + with ops.Graph().as_default(): + dataset = input_fn(params) + inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size, + add_padding=True) + hook = inputs.dataset_initializer_hook() + features, _ = inputs.features_and_labels() + signals = inputs.signals() + + # With padding, all shapes are static now. + self.assertEqual(batch_size, features['a'].shape.as_list()[0]) + + with session.Session() as sess: + hook.begin() + hook.after_create_session(sess, coord=None) + + result, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual(a[:batch_size], result['a']) + self.assertAllEqual(b[:batch_size], result['b']) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + self.assertAllEqual([0.] * batch_size, + evaluated_signals['padding_mask']) + + # This run should work as num_samples / batch_size = 2. + result, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual(a[batch_size:num_samples], result['a']) + self.assertAllEqual(b[batch_size:num_samples], result['b']) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + self.assertAllEqual([0.] * batch_size, + evaluated_signals['padding_mask']) + + # This run should work, *but* see STOP ('1') as signals + _, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(features) + + def test_num_samples_not_divisible_by_batch_size(self): + num_samples = 5 + batch_size = 2 + + params = {'batch_size': batch_size} + input_fn, (a, b) = make_input_fn_with_labels(num_samples=num_samples) + + with ops.Graph().as_default(): + dataset = input_fn(params) + inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size, + add_padding=True) + hook = inputs.dataset_initializer_hook() + features, labels = inputs.features_and_labels() + signals = inputs.signals() + + # With padding, all shapes are static. + self.assertEqual(batch_size, features['a'].shape.as_list()[0]) + + with session.Session() as sess: + hook.begin() + hook.after_create_session(sess, coord=None) + + evaluated_features, evaluated_labels, evaluated_signals = ( + sess.run([features, labels, signals])) + self.assertAllEqual(a[:batch_size], evaluated_features['a']) + self.assertAllEqual(b[:batch_size], evaluated_labels) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + self.assertAllEqual([0.] * batch_size, + evaluated_signals['padding_mask']) + + # This run should work as num_samples / batch_size >= 2. + evaluated_features, evaluated_labels, evaluated_signals = ( + sess.run([features, labels, signals])) + self.assertAllEqual(a[batch_size:2*batch_size], evaluated_features['a']) + self.assertAllEqual(b[batch_size:2*batch_size], evaluated_labels) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + self.assertAllEqual([0.] * batch_size, + evaluated_signals['padding_mask']) + + # This is the final partial batch. + evaluated_features, evaluated_labels, evaluated_signals = ( + sess.run([features, labels, signals])) + real_batch_size = num_samples % batch_size + + # Assert the real part. + self.assertAllEqual(a[2*batch_size:num_samples], + evaluated_features['a'][:real_batch_size]) + self.assertAllEqual(b[2*batch_size:num_samples], + evaluated_labels[:real_batch_size]) + # Assert the padded part. + self.assertAllEqual([0.0] * (batch_size - real_batch_size), + evaluated_features['a'][real_batch_size:]) + self.assertAllEqual([[0.0]] * (batch_size - real_batch_size), + evaluated_labels[real_batch_size:]) + + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + + padding = ([.0] * real_batch_size + + [1.] * (batch_size - real_batch_size)) + self.assertAllEqual(padding, evaluated_signals['padding_mask']) + + # This run should work, *but* see STOP ('1') as signals + _, evaluated_signals = sess.run([features, signals]) + self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(features) + + def test_slice(self): + num_samples = 3 + batch_size = 2 + + params = {'batch_size': batch_size} + input_fn, (a, b) = make_input_fn(num_samples=num_samples) + + with ops.Graph().as_default(): + dataset = input_fn(params) + inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size, + add_padding=True) + hook = inputs.dataset_initializer_hook() + features, _ = inputs.features_and_labels() + signals = inputs.signals() + + sliced_features = ( + tpu_estimator._PaddingSignals.slice_tensor_or_dict( + features, signals)) + + with session.Session() as sess: + hook.begin() + hook.after_create_session(sess, coord=None) + + result, evaluated_signals = sess.run([sliced_features, signals]) + self.assertAllEqual(a[:batch_size], result['a']) + self.assertAllEqual(b[:batch_size], result['b']) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + + # This is the final partial batch. + result, evaluated_signals = sess.run([sliced_features, signals]) + self.assertEqual(1, len(result['a'])) + self.assertAllEqual(a[batch_size:num_samples], result['a']) + self.assertAllEqual(b[batch_size:num_samples], result['b']) + self.assertAllEqual([[0.]] * batch_size, evaluated_signals['stopping']) + + # This run should work, *but* see STOP ('1') as signals + _, evaluated_signals = sess.run([sliced_features, signals]) + self.assertAllEqual([[1.]] * batch_size, evaluated_signals['stopping']) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(sliced_features) + + +if __name__ == '__main__': + test.main() -- GitLab From 88334807a5beb8b61a967d21e534ed238e7916c0 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 19 Mar 2018 20:21:45 -0700 Subject: [PATCH 223/960] Always imports the contrib summary ops when importing tensorflow. Fixes #17802 PiperOrigin-RevId: 189684619 --- tensorflow/contrib/cmake/tf_python.cmake | 3 +- tensorflow/contrib/summary/BUILD | 8 +- tensorflow/contrib/summary/summary_ops.py | 2 +- tensorflow/core/BUILD | 1 + .../base_api/api_def_CloseSummaryWriter.pbtxt | 4 + .../api_def_CreateSummaryDbWriter.pbtxt | 4 + .../api_def_CreateSummaryFileWriter.pbtxt | 4 + .../base_api/api_def_FlushSummaryWriter.pbtxt | 4 + .../base_api/api_def_ImportEvent.pbtxt | 4 + .../base_api/api_def_SummaryWriter.pbtxt | 4 + .../base_api/api_def_WriteAudioSummary.pbtxt | 4 + .../base_api/api_def_WriteGraphSummary.pbtxt | 4 + .../api_def_WriteHistogramSummary.pbtxt | 4 + .../base_api/api_def_WriteImageSummary.pbtxt | 4 + .../base_api/api_def_WriteScalarSummary.pbtxt | 4 + .../base_api/api_def_WriteSummary.pbtxt | 4 + .../api_def_CloseSummaryWriter.pbtxt | 4 + .../api_def_CreateSummaryDbWriter.pbtxt | 4 + .../api_def_CreateSummaryFileWriter.pbtxt | 4 + .../api_def_FlushSummaryWriter.pbtxt | 4 + .../python_api/api_def_ImportEvent.pbtxt | 4 + .../python_api/api_def_SummaryWriter.pbtxt | 4 + .../api_def_WriteAudioSummary.pbtxt | 4 + .../api_def_WriteGraphSummary.pbtxt | 4 + .../api_def_WriteHistogramSummary.pbtxt | 4 + .../api_def_WriteImageSummary.pbtxt | 4 + .../api_def_WriteScalarSummary.pbtxt | 4 + .../python_api/api_def_WriteSummary.pbtxt | 4 + tensorflow/core/ops/summary_ops.cc | 191 ++---------------- tensorflow/python/BUILD | 7 + tensorflow/python/summary/summary.py | 2 + 31 files changed, 121 insertions(+), 189 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_CloseSummaryWriter.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CreateSummaryDbWriter.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CreateSummaryFileWriter.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_FlushSummaryWriter.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ImportEvent.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_SummaryWriter.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WriteAudioSummary.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WriteGraphSummary.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WriteHistogramSummary.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WriteImageSummary.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WriteScalarSummary.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WriteSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CloseSummaryWriter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CreateSummaryDbWriter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CreateSummaryFileWriter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FlushSummaryWriter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ImportEvent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SummaryWriter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_WriteAudioSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_WriteGraphSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_WriteHistogramSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_WriteImageSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_WriteScalarSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_WriteSummary.pbtxt diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index b730ebd3ba..1e354bf212 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -348,6 +348,7 @@ GENERATE_PYTHON_OP_LIB("state_ops") GENERATE_PYTHON_OP_LIB("sparse_ops") GENERATE_PYTHON_OP_LIB("spectral_ops") GENERATE_PYTHON_OP_LIB("string_ops") +GENERATE_PYTHON_OP_LIB("summary_ops") GENERATE_PYTHON_OP_LIB("user_ops") GENERATE_PYTHON_OP_LIB("training_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py) @@ -419,8 +420,6 @@ GENERATE_PYTHON_OP_LIB("stateless_random_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/stateless/gen_stateless_random_ops.py) GENERATE_PYTHON_OP_LIB("debug_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/debug/ops/gen_debug_ops.py) -GENERATE_PYTHON_OP_LIB("summary_ops" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/summary/gen_summary_ops.py) add_custom_target(tf_python_ops SOURCES ${tf_python_ops_generated_files} ${PYTHON_PROTO_GENFILES}) add_dependencies(tf_python_ops tf_python_op_gen_main) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index b58c83fdaf..80563c5e15 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -10,12 +10,6 @@ load( "tf_gen_op_wrapper_py", ) -tf_gen_op_wrapper_py( - name = "gen_summary_ops", - out = "gen_summary_ops.py", - deps = ["//tensorflow/core:summary_ops_op_lib"], -) - py_test( name = "summary_ops_test", srcs = ["summary_ops_test.py"], @@ -61,7 +55,6 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ - ":gen_summary_ops", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", @@ -72,6 +65,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:summary_op_util", + "//tensorflow/python:summary_ops_gen", "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python/eager:context", diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index c1724c6e43..bc763fe655 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -26,7 +26,6 @@ import time import six -from tensorflow.contrib.summary import gen_summary_ops from tensorflow.core.framework import graph_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -35,6 +34,7 @@ from tensorflow.python.framework import ops from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_summary_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import summary_op_util diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1d283e240d..8124280914 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -722,6 +722,7 @@ cc_library( ":sendrecv_ops_op_lib", ":set_ops_op_lib", ":sparse_ops_op_lib", + ":summary_ops_op_lib", ":spectral_ops_op_lib", ":state_ops_op_lib", ":stateless_random_ops_op_lib", diff --git a/tensorflow/core/api_def/base_api/api_def_CloseSummaryWriter.pbtxt b/tensorflow/core/api_def/base_api/api_def_CloseSummaryWriter.pbtxt new file mode 100644 index 0000000000..f6fd7d9316 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_CloseSummaryWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CloseSummaryWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_CreateSummaryDbWriter.pbtxt b/tensorflow/core/api_def/base_api/api_def_CreateSummaryDbWriter.pbtxt new file mode 100644 index 0000000000..28da46a0f8 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_CreateSummaryDbWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CreateSummaryDbWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_CreateSummaryFileWriter.pbtxt b/tensorflow/core/api_def/base_api/api_def_CreateSummaryFileWriter.pbtxt new file mode 100644 index 0000000000..2ce2c4d37e --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_CreateSummaryFileWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CreateSummaryFileWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_FlushSummaryWriter.pbtxt b/tensorflow/core/api_def/base_api/api_def_FlushSummaryWriter.pbtxt new file mode 100644 index 0000000000..3ada43c9b8 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_FlushSummaryWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FlushSummaryWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_ImportEvent.pbtxt b/tensorflow/core/api_def/base_api/api_def_ImportEvent.pbtxt new file mode 100644 index 0000000000..d8813b58f3 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ImportEvent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ImportEvent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_SummaryWriter.pbtxt b/tensorflow/core/api_def/base_api/api_def_SummaryWriter.pbtxt new file mode 100644 index 0000000000..1fe57ecf19 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_SummaryWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SummaryWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_WriteAudioSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_WriteAudioSummary.pbtxt new file mode 100644 index 0000000000..520952cd41 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_WriteAudioSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteAudioSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_WriteGraphSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_WriteGraphSummary.pbtxt new file mode 100644 index 0000000000..3653477b20 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_WriteGraphSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteGraphSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_WriteHistogramSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_WriteHistogramSummary.pbtxt new file mode 100644 index 0000000000..26e1482630 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_WriteHistogramSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteHistogramSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_WriteImageSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_WriteImageSummary.pbtxt new file mode 100644 index 0000000000..78db8700f0 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_WriteImageSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteImageSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_WriteScalarSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_WriteScalarSummary.pbtxt new file mode 100644 index 0000000000..7bae8638d2 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_WriteScalarSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteScalarSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_WriteSummary.pbtxt b/tensorflow/core/api_def/base_api/api_def_WriteSummary.pbtxt new file mode 100644 index 0000000000..db86883e21 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_WriteSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CloseSummaryWriter.pbtxt b/tensorflow/core/api_def/python_api/api_def_CloseSummaryWriter.pbtxt new file mode 100644 index 0000000000..f6fd7d9316 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CloseSummaryWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CloseSummaryWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CreateSummaryDbWriter.pbtxt b/tensorflow/core/api_def/python_api/api_def_CreateSummaryDbWriter.pbtxt new file mode 100644 index 0000000000..28da46a0f8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CreateSummaryDbWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CreateSummaryDbWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CreateSummaryFileWriter.pbtxt b/tensorflow/core/api_def/python_api/api_def_CreateSummaryFileWriter.pbtxt new file mode 100644 index 0000000000..2ce2c4d37e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CreateSummaryFileWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CreateSummaryFileWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FlushSummaryWriter.pbtxt b/tensorflow/core/api_def/python_api/api_def_FlushSummaryWriter.pbtxt new file mode 100644 index 0000000000..3ada43c9b8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FlushSummaryWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FlushSummaryWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ImportEvent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ImportEvent.pbtxt new file mode 100644 index 0000000000..d8813b58f3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ImportEvent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ImportEvent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SummaryWriter.pbtxt b/tensorflow/core/api_def/python_api/api_def_SummaryWriter.pbtxt new file mode 100644 index 0000000000..1fe57ecf19 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SummaryWriter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SummaryWriter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_WriteAudioSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_WriteAudioSummary.pbtxt new file mode 100644 index 0000000000..520952cd41 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_WriteAudioSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteAudioSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_WriteGraphSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_WriteGraphSummary.pbtxt new file mode 100644 index 0000000000..3653477b20 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_WriteGraphSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteGraphSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_WriteHistogramSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_WriteHistogramSummary.pbtxt new file mode 100644 index 0000000000..26e1482630 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_WriteHistogramSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteHistogramSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_WriteImageSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_WriteImageSummary.pbtxt new file mode 100644 index 0000000000..78db8700f0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_WriteImageSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteImageSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_WriteScalarSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_WriteScalarSummary.pbtxt new file mode 100644 index 0000000000..7bae8638d2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_WriteScalarSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteScalarSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_WriteSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_WriteSummary.pbtxt new file mode 100644 index 0000000000..db86883e21 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_WriteSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "WriteSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index aa7458f903..742a221adc 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -22,15 +22,7 @@ REGISTER_OP("SummaryWriter") .Output("writer: resource") .Attr("shared_name: string = ''") .Attr("container: string = ''") - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Returns a handle to be used to access a summary writer. - -The summary writer is an in-graph resource which can be used by ops to write -summaries to event files. - -writer: the summary writer resource. Scalar handle. -)doc"); + .SetShapeFn(shape_inference::ScalarShape); REGISTER_OP("CreateSummaryFileWriter") .Input("writer: resource") @@ -38,17 +30,7 @@ REGISTER_OP("CreateSummaryFileWriter") .Input("max_queue: int32") .Input("flush_millis: int32") .Input("filename_suffix: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Creates a summary file writer accessible by the given resource handle. - -writer: A handle to the summary writer resource -logdir: Directory where the event file will be written. -max_queue: Size of the queue of pending events and summaries. -flush_millis: How often, in milliseconds, to flush the pending events and - summaries to disk. -filename_suffix: Every event file's name is suffixed with this suffix. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("CreateSummaryDbWriter") .Input("writer: resource") @@ -56,47 +38,15 @@ REGISTER_OP("CreateSummaryDbWriter") .Input("experiment_name: string") .Input("run_name: string") .Input("user_name: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Creates summary database writer accessible by given resource handle. - -This can be used to write tensors from the execution graph directly -to a database. Only SQLite is supported right now. This function -will create the schema if it doesn't exist. Entries in the Users, -Experiments, and Runs tables will be created automatically if they -don't already exist. - -writer: Handle to SummaryWriter resource to overwrite. -db_uri: For example "file:/tmp/foo.sqlite". -experiment_name: Can't contain ASCII control characters or <>. Case - sensitive. If empty, then the Run will not be associated with any - Experiment. -run_name: Can't contain ASCII control characters or <>. Case sensitive. - If empty, then each Tag will not be associated with any Run. -user_name: Must be valid as both a DNS label and Linux username. If - empty, then the Experiment will not be associated with any User. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("FlushSummaryWriter") .Input("writer: resource") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"( -Flushes the writer's unwritten events. - -writer: A handle to the summary writer resource. -)"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("CloseSummaryWriter") .Input("writer: resource") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"( -Flushes and closes the summary writer. - -Also removes it from the resource manager. To reopen, use another -CreateSummaryFileWriter op. - -writer: A handle to the summary writer resource. -)"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("WriteSummary") .Input("writer: resource") @@ -105,31 +55,12 @@ REGISTER_OP("WriteSummary") .Input("tag: string") .Input("summary_metadata: string") .Attr("T: type") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Outputs a `Summary` protocol buffer with a tensor. - -writer: A handle to a summary writer. -step: The step to write the summary for. -tensor: A tensor to serialize. -tag: The summary's tag. -summary_metadata: Serialized SummaryMetadata protocol buffer containing - plugin-related metadata for this summary. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("ImportEvent") .Input("writer: resource") .Input("event: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Outputs a `tf.Event` protocol buffer. - -When CreateSummaryDbWriter is being used, this op can be useful for -importing data from event logs. - -writer: A handle to a summary writer. -event: A string containing a binary-encoded tf.Event proto. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("WriteScalarSummary") .Input("writer: resource") @@ -137,17 +68,7 @@ REGISTER_OP("WriteScalarSummary") .Input("tag: string") .Input("value: T") .Attr("T: realnumbertype") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Writes a `Summary` protocol buffer with scalar values. - -The input `tag` and `value` must have the scalars. - -writer: A handle to a summary writer. -step: The step to write the summary for. -tag: Tag for the summary. -value: Value for the summary. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("WriteHistogramSummary") .Input("writer: resource") @@ -155,21 +76,7 @@ REGISTER_OP("WriteHistogramSummary") .Input("tag: string") .Input("values: T") .Attr("T: realnumbertype = DT_FLOAT") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Writes a `Summary` protocol buffer with a histogram. - -The generated -[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -has one summary value containing a histogram for `values`. - -This op reports an `InvalidArgument` error if any value is not finite. - -writer: A handle to a summary writer. -step: The step to write the summary for. -tag: Scalar. Tag to use for the `Summary.Value`. -values: Any shape. Values to use to build the histogram. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("WriteImageSummary") .Input("writer: resource") @@ -179,52 +86,7 @@ REGISTER_OP("WriteImageSummary") .Input("bad_color: uint8") .Attr("max_images: int >= 1 = 3") .Attr("T: {uint8, float, half} = DT_FLOAT") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Writes a `Summary` protocol buffer with images. - -The summary has up to `max_images` summary values containing images. The -images are built from `tensor` which must be 4-D with shape `[batch_size, -height, width, channels]` and where `channels` can be: - -* 1: `tensor` is interpreted as Grayscale. -* 3: `tensor` is interpreted as RGB. -* 4: `tensor` is interpreted as RGBA. - -The images have the same number of channels as the input tensor. For float -input, the values are normalized one image at a time to fit in the range -`[0, 255]`. `uint8` values are unchanged. The op uses two different -normalization algorithms: - -* If the input values are all positive, they are rescaled so the largest one - is 255. - -* If any input value is negative, the values are shifted so input value 0.0 - is at 127. They are then rescaled so that either the smallest value is 0, - or the largest one is 255. - -The `tag` argument is a scalar `Tensor` of type `string`. It is used to -build the `tag` of the summary values: - -* If `max_images` is 1, the summary value tag is '*tag*/image'. -* If `max_images` is greater than 1, the summary value tags are - generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. - -The `bad_color` argument is the color to use in the generated images for -non-finite input values. It is a `unit8` 1-D tensor of length `channels`. -Each element must be in the range `[0, 255]` (It represents the value of a -pixel in the output image). Non-finite values in the input tensor are -replaced by this tensor in the output image. The default value is the color -red. - -writer: A handle to a summary writer. -step: The step to write the summary for. -tag: Scalar. Used to build the `tag` attribute of the summary values. -tensor: 4-D of shape `[batch_size, height, width, channels]` where - `channels` is 1, 3, or 4. -max_images: Max number of batch elements to generate images for. -bad_color: Color to use for pixels with non-finite values. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("WriteAudioSummary") .Input("writer: resource") @@ -233,41 +95,12 @@ REGISTER_OP("WriteAudioSummary") .Input("tensor: float") .Input("sample_rate: float") .Attr("max_outputs: int >= 1 = 3") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Writes a `Summary` protocol buffer with audio. - -The summary has up to `max_outputs` summary values containing audio. The -audio is built from `tensor` which must be 3-D with shape `[batch_size, -frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. - -The `tag` argument is a scalar `Tensor` of type `string`. It is used to -build the `tag` of the summary values: - -* If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -* If `max_outputs` is greater than 1, the summary value tags are - generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. - -writer: A handle to a summary writer. -step: The step to write the summary for. -tag: Scalar. Used to build the `tag` attribute of the summary values. -tensor: 2-D of shape `[batch_size, frames]`. -sample_rate: The sample rate of the signal in hertz. -max_outputs: Max number of batch elements to generate audio for. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("WriteGraphSummary") .Input("writer: resource") .Input("step: int64") .Input("tensor: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Writes a `GraphDef` protocol buffer to a `SummaryWriter`. - -writer: Handle of `SummaryWriter`. -step: The step to write the summary for. -tensor: A scalar string of the serialized tf.GraphDef proto. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); } // namespace tensorflow diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index ec67f43190..7ece482ea7 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1358,6 +1358,12 @@ tf_gen_op_wrapper_private_py( ], ) +tf_gen_op_wrapper_private_py( + name = "summary_ops_gen", + visibility = ["//tensorflow:__subpackages__"], + deps = ["//tensorflow/core:summary_ops_op_lib"], +) + tf_gen_op_wrapper_private_py( name = "audio_ops_gen", require_shape_functions = True, @@ -4110,6 +4116,7 @@ py_library( ":pywrap_tensorflow", ":summary_op_util", ":summary_ops", + ":summary_ops_gen", ":util", "//tensorflow/python/eager:context", "//third_party/py/numpy", diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index f1b2be0a1a..97f2ddfdfc 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -48,10 +48,12 @@ from tensorflow.core.util.event_pb2 import SessionLog from tensorflow.core.util.event_pb2 import TaggedRunMetadata # pylint: enable=unused-import + from tensorflow.python.eager import context as _context from tensorflow.python.framework import dtypes as _dtypes from tensorflow.python.framework import ops as _ops from tensorflow.python.ops import gen_logging_ops as _gen_logging_ops +from tensorflow.python.ops import gen_summary_ops as _gen_summary_ops # pylint: disable=unused-import from tensorflow.python.ops import summary_op_util as _summary_op_util # exports tensor-related summaries -- GitLab From 1f4ee9d3d705a9c64af69e51e9fb5c738e145802 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 19 Mar 2018 20:42:00 -0700 Subject: [PATCH 224/960] Quantize bypasses after activations. PiperOrigin-RevId: 189686219 --- .../contrib/quantize/python/quantize.py | 84 +++++++++++++++++-- .../contrib/quantize/python/quantize_test.py | 29 +++++++ 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 6cc097b20e..9780e6dbcc 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -123,10 +123,47 @@ def Quantize(graph, vars_collection=vars_collection, bits=activation_bits) + if layer_match.post_activation_bypass_op is not None: + _InsertQuantOp( + add_context, + 'post_activation_bypass_quant', + layer_match.post_activation_bypass_op, + input_to_ops_map.ConsumerOperations( + layer_match.post_activation_bypass_op), + is_training, + moving_avg=True, + ema_decay=ema_decay, + quant_delay=quant_delay, + vars_collection=vars_collection, + bits=activation_bits) + def _FindLayersToQuantize(graph): """Matches layers in graph to quantize. + The following patterns get matched. Nodes surrounded by [] will be + optionally matched: + + weight|folded_weight + / + conv|fc + | + [post_conv_correction] + | + biasadd|folded_bias + | + [bypass] + | + activation + | + [post_activation_bypass] + + Match replacements: + If weight_folded_weight is found, FakeQuant is added afterwards. + If bypass is found, FakeQuant is added before and after. + If activation is found, FakeQuant is added afterwards. + If post_activation_bypass is found, FakeQuant is added afterwards. + Args: graph: Graph to perform match on. @@ -179,7 +216,7 @@ def _FindLayersToQuantize(graph): [bias_add_pattern, folded_bias_add_pattern]) ]) - # The input to the activation can come from bias add, fold bias add or the + # The input to the activation can come from bias add, fold bias add, the # bypasses. activation_pattern = graph_matcher.OpTypePattern( '|'.join(_ACTIVATION_TYPES), @@ -190,7 +227,16 @@ def _FindLayersToQuantize(graph): ]) ]) - layer_matcher = graph_matcher.GraphMatcher(activation_pattern) + post_activation_bypass_pattern_a = graph_matcher.OpTypePattern( + 'Add', inputs=['*', activation_pattern]) + post_activation_bypass_pattern_b = graph_matcher.OpTypePattern( + 'Add', inputs=[activation_pattern, '*']) + + layer_matcher = graph_matcher.GraphMatcher( + graph_matcher.OneofPattern([ + post_activation_bypass_pattern_a, post_activation_bypass_pattern_b, + activation_pattern + ])) for match_result in layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) @@ -203,8 +249,19 @@ def _FindLayersToQuantize(graph): bypass_op = match_result.get_op(bypass_pattern_a) if bypass_op is None: bypass_op = match_result.get_op(bypass_pattern_b) + post_activation_bypass_op = match_result.get_op( + post_activation_bypass_pattern_a) + if post_activation_bypass_op is None: + post_activation_bypass_op = match_result.get_op( + post_activation_bypass_pattern_b) + # If we don't find a post_activation_bypass_op but activation_op has a + # bypass following it, then we need to skip this match, since there will be + # another match that includes post_activation_bypass_op. + if post_activation_bypass_op is None and _HasPostActivationBypass( + activation_op): + continue yield _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, - bias_add_op) + post_activation_bypass_op, bias_add_op) # Match the final layer, where there will not be an activation and instead # the output of the final BiasAdd must be quantized, so we treat it as the @@ -215,19 +272,32 @@ def _FindLayersToQuantize(graph): for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_pattern) + if weight_tensor is None: + weight_tensor = match_result.get_tensor(folded_weight_pattern) activation_op = match_result.get_op(bias_add_pattern) - yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None) + if activation_op is None: + activation_op = match_result.get_op(folded_bias_add_pattern) + yield _LayerMatch(layer_op, weight_tensor, activation_op, None, None, None) + + +def _HasPostActivationBypass(activation_op): + for activation_tensor in activation_op.outputs: + for output_op in activation_tensor.consumers(): + if output_op.type == 'Add': + return True + return False class _LayerMatch(object): """Contains all information related to a matched Layer.""" def __init__(self, layer_op, weight_tensor, activation_op, bypass_op, - bias_add_op): + post_activation_bypass_op, bias_add_op): self._layer_op = layer_op self._weight_tensor = weight_tensor self._activation_op = activation_op self._bypass_op = bypass_op + self._post_activation_bypass_op = post_activation_bypass_op self._bias_add_op = bias_add_op @property @@ -246,6 +316,10 @@ class _LayerMatch(object): def bypass_op(self): return self._bypass_op + @property + def post_activation_bypass_op(self): + return self._post_activation_bypass_op + @property def bias_add_op(self): return self._bias_add_op diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index ef59475167..8e60f4b661 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -135,6 +135,35 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertTrue('FakeQuantWithMinMaxVars' in [op.type for op in bias_add_op.outputs[0].consumers()]) + def testPostActivationBypassQuantized(self): + self._RunTestOverParameters(self._TestPostActivationBypassQuantized) + + def _TestPostActivationBypassQuantized(self, is_training): + graph = ops.Graph() + with graph.as_default(): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + input2 = array_ops.zeros((batch_size, height / 2, width / 2, 32)) + conv = conv2d( + input1, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=array_ops.identity, + scope='test/test') + bypass_tensor = math_ops.add(conv, input2, name='test/add') + _ = array_ops.identity(bypass_tensor, name='test/output') + + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) + + # Ensure that the bypass node is preceded and followed by + # FakeQuantWithMinMaxVars operations. + self.assertTrue('FakeQuantWithMinMaxVars' in + [c.type for c in bypass_tensor.consumers()]) + self.assertTrue('FakeQuantWithMinMaxVars' in + [i.op.type for i in bypass_tensor.op.inputs]) + def _WeightInit(self, stddev): """Returns truncated normal variable initializer. -- GitLab From 63fee8ee24dc86d4a008ae153505ff838fb38849 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 20:55:51 -0700 Subject: [PATCH 225/960] Add `ostream<<` to `tensorflow::TensorShapeBase`. Reason: Allow `LOG(ERROR) << shape` (currently disallowed). PiperOrigin-RevId: 189687162 --- tensorflow/core/framework/tensor_shape.h | 8 +++++++- tensorflow/core/framework/tensor_shape_test.cc | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h index fe2ba375aa..be7e740c33 100644 --- a/tensorflow/core/framework/tensor_shape.h +++ b/tensorflow/core/framework/tensor_shape.h @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { @@ -271,6 +271,12 @@ class TensorShapeBase : public TensorShapeRep { friend Status MakeShapeHelper(const T*, int64, S*); }; +/// Outputs `TensorShapeBase` to `std::ostream`. +template +std::ostream& operator<<(std::ostream& os, const TensorShapeBase& tsb) { + return os << tsb.DebugString(); +} + /// Represents the shape of a Tensor. /// /// A tensor's shape is denoted by its number of dimensions and a size for each diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc index d7517bb311..6329aa6d8e 100644 --- a/tensorflow/core/framework/tensor_shape_test.cc +++ b/tensorflow/core/framework/tensor_shape_test.cc @@ -198,6 +198,13 @@ TEST(TensorShapeTest, DataType) { EXPECT_EQ(TensorShapeTestHelper::data_type(&s2), DT_INVALID); } +TEST(TensorShapeTest, ostream) { + TensorShape s({10, 5, 4}); + std::stringstream ss; + ss << s; + EXPECT_EQ(ss.str(), "[10,5,4]"); +} + // ----------------------------------------------------------------------- // An old implementation of TensorShape using a different representation, // preserved here in the unittest to allow us to have a randomized unittest -- GitLab From ea1718feb535d4dfc47c136b5cb59cf18b77259b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 21:18:06 -0700 Subject: [PATCH 226/960] Update ops-related pbtxt files. PiperOrigin-RevId: 189688675 --- .../core/ops/compat/ops_history.v1.pbtxt | 305 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 305 ++++++++++++++++++ 2 files changed, 610 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 85dd1a423a..992e943966 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -10867,6 +10867,14 @@ op { } } } +op { + name: "CloseSummaryWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + is_stateful: true +} op { name: "CompareAndBitpack" input_arg { @@ -12822,6 +12830,54 @@ op { } } } +op { + name: "CreateSummaryDbWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "db_uri" + type: DT_STRING + } + input_arg { + name: "experiment_name" + type: DT_STRING + } + input_arg { + name: "run_name" + type: DT_STRING + } + input_arg { + name: "user_name" + type: DT_STRING + } + is_stateful: true +} +op { + name: "CreateSummaryFileWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "logdir" + type: DT_STRING + } + input_arg { + name: "max_queue" + type: DT_INT32 + } + input_arg { + name: "flush_millis" + type: DT_INT32 + } + input_arg { + name: "filename_suffix" + type: DT_STRING + } + is_stateful: true +} op { name: "CropAndResize" input_arg { @@ -19468,6 +19524,14 @@ op { } } } +op { + name: "FlushSummaryWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + is_stateful: true +} op { name: "FractionalAvgPool" input_arg { @@ -21770,6 +21834,18 @@ op { type: "string" } } +op { + name: "ImportEvent" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "event" + type: DT_STRING + } + is_stateful: true +} op { name: "InTopK" input_arg { @@ -62152,6 +62228,28 @@ op { } } } +op { + name: "SummaryWriter" + output_arg { + name: "writer" + type: DT_RESOURCE + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} op { name: "Svd" input_arg { @@ -66477,6 +66575,39 @@ op { } is_stateful: true } +op { + name: "WriteAudioSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "tensor" + type: DT_FLOAT + } + input_arg { + name: "sample_rate" + type: DT_FLOAT + } + attr { + name: "max_outputs" + type: "int" + default_value { + i: 3 + } + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "WriteFile" input_arg { @@ -66488,6 +66619,180 @@ op { type: DT_STRING } } +op { + name: "WriteGraphSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tensor" + type: DT_STRING + } + is_stateful: true +} +op { + name: "WriteHistogramSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "values" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + is_stateful: true +} +op { + name: "WriteImageSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "tensor" + type_attr: "T" + } + input_arg { + name: "bad_color" + type: DT_UINT8 + } + attr { + name: "max_images" + type: "int" + default_value { + i: 3 + } + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_UINT8 + type: DT_FLOAT + type: DT_HALF + } + } + } + is_stateful: true +} +op { + name: "WriteScalarSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "value" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + is_stateful: true +} +op { + name: "WriteSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tensor" + type_attr: "T" + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "summary_metadata" + type: DT_STRING + } + attr { + name: "T" + type: "type" + } + is_stateful: true +} op { name: "ZerosLike" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 3faa4eeada..3beebdc6d4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -4384,6 +4384,14 @@ op { } } } +op { + name: "CloseSummaryWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + is_stateful: true +} op { name: "CompareAndBitpack" input_arg { @@ -5473,6 +5481,54 @@ op { } } } +op { + name: "CreateSummaryDbWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "db_uri" + type: DT_STRING + } + input_arg { + name: "experiment_name" + type: DT_STRING + } + input_arg { + name: "run_name" + type: DT_STRING + } + input_arg { + name: "user_name" + type: DT_STRING + } + is_stateful: true +} +op { + name: "CreateSummaryFileWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "logdir" + type: DT_STRING + } + input_arg { + name: "max_queue" + type: DT_INT32 + } + input_arg { + name: "flush_millis" + type: DT_INT32 + } + input_arg { + name: "filename_suffix" + type: DT_STRING + } + is_stateful: true +} op { name: "CropAndResize" input_arg { @@ -8800,6 +8856,14 @@ op { } } } +op { + name: "FlushSummaryWriter" + input_arg { + name: "writer" + type: DT_RESOURCE + } + is_stateful: true +} op { name: "FractionalAvgPool" input_arg { @@ -10367,6 +10431,18 @@ op { type: "string" } } +op { + name: "ImportEvent" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "event" + type: DT_STRING + } + is_stateful: true +} op { name: "InTopK" input_arg { @@ -28659,6 +28735,28 @@ op { } } } +op { + name: "SummaryWriter" + output_arg { + name: "writer" + type: DT_RESOURCE + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} op { name: "Svd" input_arg { @@ -31562,6 +31660,39 @@ op { } is_stateful: true } +op { + name: "WriteAudioSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "tensor" + type: DT_FLOAT + } + input_arg { + name: "sample_rate" + type: DT_FLOAT + } + attr { + name: "max_outputs" + type: "int" + default_value { + i: 3 + } + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "WriteFile" input_arg { @@ -31573,6 +31704,180 @@ op { type: DT_STRING } } +op { + name: "WriteGraphSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tensor" + type: DT_STRING + } + is_stateful: true +} +op { + name: "WriteHistogramSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "values" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + is_stateful: true +} +op { + name: "WriteImageSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "tensor" + type_attr: "T" + } + input_arg { + name: "bad_color" + type: DT_UINT8 + } + attr { + name: "max_images" + type: "int" + default_value { + i: 3 + } + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_UINT8 + type: DT_FLOAT + type: DT_HALF + } + } + } + is_stateful: true +} +op { + name: "WriteScalarSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "value" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + is_stateful: true +} +op { + name: "WriteSummary" + input_arg { + name: "writer" + type: DT_RESOURCE + } + input_arg { + name: "step" + type: DT_INT64 + } + input_arg { + name: "tensor" + type_attr: "T" + } + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "summary_metadata" + type: DT_STRING + } + attr { + name: "T" + type: "type" + } + is_stateful: true +} op { name: "ZerosLike" input_arg { -- GitLab From 407ddd1c0539cfc5d33ab2629230eab5a958b7d4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 21:45:45 -0700 Subject: [PATCH 227/960] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 189690096 --- tensorflow/go/op/wrappers.go | 12047 ++++++++++++++++----------------- 1 file changed, 5827 insertions(+), 6220 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 469d1e9adb..e5256af1e8 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -38,188 +38,6 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in return list, start + size, nil } -// WriteImageSummaryAttr is an optional argument to WriteImageSummary. -type WriteImageSummaryAttr func(optionalAttr) - -// WriteImageSummaryMaxImages sets the optional max_images attribute to value. -// -// value: Max number of batch elements to generate images for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { - return func(m optionalAttr) { - m["max_images"] = value - } -} - -// Writes a `Summary` protocol buffer with images. -// -// The summary has up to `max_images` summary values containing images. The -// images are built from `tensor` which must be 4-D with shape `[batch_size, -// height, width, channels]` and where `channels` can be: -// -// * 1: `tensor` is interpreted as Grayscale. -// * 3: `tensor` is interpreted as RGB. -// * 4: `tensor` is interpreted as RGBA. -// -// The images have the same number of channels as the input tensor. For float -// input, the values are normalized one image at a time to fit in the range -// `[0, 255]`. `uint8` values are unchanged. The op uses two different -// normalization algorithms: -// -// * If the input values are all positive, they are rescaled so the largest one -// is 255. -// -// * If any input value is negative, the values are shifted so input value 0.0 -// is at 127. They are then rescaled so that either the smallest value is 0, -// or the largest one is 255. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_images` is 1, the summary value tag is '*tag*/image'. -// * If `max_images` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. -// -// The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. -// Each element must be in the range `[0, 255]` (It represents the value of a -// pixel in the output image). Non-finite values in the input tensor are -// replaced by this tensor in the output image. The default value is the color -// red. -// -// Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 4-D of shape `[batch_size, height, width, channels]` where -// `channels` is 1, 3, or 4. -// bad_color: Color to use for pixels with non-finite values. -// -// Returns the created operation. -func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "WriteImageSummary", - Input: []tf.Input{ - writer, step, tag, tensor, bad_color, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Outputs a `tf.Event` protocol buffer. -// -// When CreateSummaryDbWriter is being used, this op can be useful for -// importing data from event logs. -// -// Arguments: -// writer: A handle to a summary writer. -// event: A string containing a binary-encoded tf.Event proto. -// -// Returns the created operation. -func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ImportEvent", - Input: []tf.Input{ - writer, event, - }, - } - return scope.AddOperation(opspec) -} - -// Outputs a `Summary` protocol buffer with a tensor. -// -// Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tensor: A tensor to serialize. -// tag: The summary's tag. -// summary_metadata: Serialized SummaryMetadata protocol buffer containing -// plugin-related metadata for this summary. -// -// Returns the created operation. -func WriteSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WriteSummary", - Input: []tf.Input{ - writer, step, tensor, tag, summary_metadata, - }, - } - return scope.AddOperation(opspec) -} - -// Creates summary database writer accessible by given resource handle. -// -// This can be used to write tensors from the execution graph directly -// to a database. Only SQLite is supported right now. This function -// will create the schema if it doesn't exist. Entries in the Users, -// Experiments, and Runs tables will be created automatically if they -// don't already exist. -// -// Arguments: -// writer: Handle to SummaryWriter resource to overwrite. -// db_uri: For example "file:/tmp/foo.sqlite". -// experiment_name: Can't contain ASCII control characters or <>. Case -// sensitive. If empty, then the Run will not be associated with any -// Experiment. -// run_name: Can't contain ASCII control characters or <>. Case sensitive. -// If empty, then each Tag will not be associated with any Run. -// user_name: Must be valid as both a DNS label and Linux username. If -// empty, then the Experiment will not be associated with any User. -// -// Returns the created operation. -func CreateSummaryDbWriter(scope *Scope, writer tf.Output, db_uri tf.Output, experiment_name tf.Output, run_name tf.Output, user_name tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CreateSummaryDbWriter", - Input: []tf.Input{ - writer, db_uri, experiment_name, run_name, user_name, - }, - } - return scope.AddOperation(opspec) -} - -// Creates a summary file writer accessible by the given resource handle. -// -// Arguments: -// writer: A handle to the summary writer resource -// logdir: Directory where the event file will be written. -// max_queue: Size of the queue of pending events and summaries. -// flush_millis: How often, in milliseconds, to flush the pending events and -// summaries to disk. -// filename_suffix: Every event file's name is suffixed with this suffix. -// -// Returns the created operation. -func CreateSummaryFileWriter(scope *Scope, writer tf.Output, logdir tf.Output, max_queue tf.Output, flush_millis tf.Output, filename_suffix tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CreateSummaryFileWriter", - Input: []tf.Input{ - writer, logdir, max_queue, flush_millis, filename_suffix, - }, - } - return scope.AddOperation(opspec) -} - // FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient. type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr) @@ -509,63 +327,317 @@ func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQua return op.Output(0) } -// Replaces the contents of the table with the specified keys and values. +// Scatter `updates` into a new (initially zero) tensor according to `indices`. // -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. +// Creates a new tensor by applying sparse `updates` to individual +// values or slices within a zero tensor of the given `shape` according to +// indices. This operator is the inverse of the @{tf.gather_nd} operator which +// extracts values or slices from a given tensor. +// +// **WARNING**: The order in which updates are applied is nondeterministic, so the +// output will be nondeterministic if `indices` contains duplicates. +// +// `indices` is an integer tensor containing indices into a new tensor of shape +// `shape`. The last dimension of `indices` can be at most the rank of `shape`: +// +// indices.shape[-1] <= shape.rank +// +// The last dimension of `indices` corresponds to indices into elements +// (if `indices.shape[-1] = shape.rank`) or slices +// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of +// `shape`. `updates` is a tensor with shape +// +// indices.shape[:-1] + shape[indices.shape[-1]:] +// +// The simplest form of scatter is to insert individual elements in a tensor by +// index. For example, say we want to insert 4 scattered elements in a rank-1 +// tensor with 8 elements. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// shape = tf.constant([8]) +// scatter = tf.scatter_nd(indices, updates, shape) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` +// +// The resulting tensor would look like this: +// +// [0, 11, 0, 10, 9, 0, 0, 12] +// +// We can also, insert entire slices of a higher rank tensor all at once. For +// example, if we wanted to insert two slices in the first dimension of a +// rank-3 tensor with two matrices of new values. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[0], [2]]) +// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]], +// [[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]]]) +// shape = tf.constant([4, 4, 4]) +// scatter = tf.scatter_nd(indices, updates, shape) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` +// +// The resulting tensor would look like this: +// +// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], +// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] // // Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. +// indices: Index tensor. +// updates: Updates to scatter into output. +// shape: 1-D. The shape of the resulting tensor. // -// Returns the created operation. -func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +// Returns A new tensor with the given shape and updates applied according +// to the indices. +func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableImportV2", + Type: "ScatterNd", Input: []tf.Input{ - table_handle, keys, values, + indices, updates, shape, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MapPeekAttr is an optional argument to MapPeek. -type MapPeekAttr func(optionalAttr) +// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2. +type QuantizeAndDequantizeV2Attr func(optionalAttr) -// MapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value. // -// REQUIRES: value >= 0 -func MapPeekCapacity(value int64) MapPeekAttr { +// value: If the quantization is signed or unsigned. +// If not specified, defaults to true +func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["capacity"] = value + m["signed_input"] = value } } -// MapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value. // -// REQUIRES: value >= 0 -func MapPeekMemoryLimit(value int64) MapPeekAttr { +// value: The bitwidth of the quantization. +// If not specified, defaults to 8 +func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["memory_limit"] = value + m["num_bits"] = value } } -// MapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapPeekContainer(value string) MapPeekAttr { +// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value. +// +// value: If the range is given or should be computed from the tensor. +// If not specified, defaults to false +func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["container"] = value + m["range_given"] = value } } -// MapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" +// Quantizes then dequantizes a tensor. +// +// This op simulates the precision loss from the quantized forward pass by: +// 1. Quantizing the tensor to fixed point numbers, which should match the target +// quantization method when it is used in inference. +// 2. Dequantizing it back to floating point numbers for the following ops, most +// likely matmul. +// +// There are different ways to quantize. This version does not use the full range +// of the output type, choosing to elide the lowest possible value for symmetry +// (e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit +// quantization), so that 0.0 maps to 0. +// +// To perform this op, we first find the range of values in our tensor. The range +// we use is always centered on 0, so we find m such that +// +// 1. m = max(abs(input_min), abs(input_max)) if range_given is true, +// 2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise. +// +// Our input tensor range is then [-m, m]. +// +// Next, we choose our fixed-point quantization buckets, [min_fixed, max_fixed]. +// If signed_input is true, this is +// +// [min_fixed, max_fixed ] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]. +// +// Otherwise, if signed_input is false, the fixed-point range is +// +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]. +// +// From this we compute our scaling factor, s: +// +// s = (max_fixed - min_fixed) / (2 * m). +// +// Now we can quantize and dequantize the elements of our tensor. An element e +// is transformed into e': +// +// e' = (e * s).round_to_nearest() / s. +// +// Note that we have a different number of buckets in the signed vs. unsigned +// cases. For example, if num_bits == 8, we get 254 buckets in the signed case +// vs. 255 in the unsigned case. +// +// For example, suppose num_bits = 8 and m = 1. Then +// +// [min_fixed, max_fixed] = [-127, 127], and +// s = (127 + 127) / 2 = 127. +// +// Given the vector {-1, -0.5, 0, 0.3}, this is quantized to +// {-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}. +// +// Arguments: +// input: Tensor to quantize and then dequantize. +// input_min: If range_given, this is the min of the range, otherwise this input +// will be ignored. +// input_max: If range_given, this is the max of the range, otherwise this input +// will be ignored. +func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizeAndDequantizeV2", + Input: []tf.Input{ + input, input_min, input_max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Bitcasts a tensor from one type to another without copying data. +// +// Given a tensor `input`, this operation returns a tensor that has the same buffer +// data as `input` with datatype `type`. +// +// If the input datatype `T` is larger than the output datatype `type` then the +// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)]. +// +// If `T` is smaller than `type`, the operator requires that the rightmost +// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from +// [..., sizeof(`type`)/sizeof(`T`)] to [...]. +// +// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different +// endian orderings will give different results. +func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"type": type_} + opspec := tf.OpSpec{ + Type: "Bitcast", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Extract `patches` from `images` and put them in the "depth" output dimension. +// +// Arguments: +// images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`. +// ksizes: The size of the sliding window for each dimension of `images`. +// strides: 1-D of length 4. How far the centers of two consecutive patches are in +// the images. Must be: `[1, stride_rows, stride_cols, 1]`. +// rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the +// input stride, specifying how far two consecutive patch samples are in the +// input. Equivalent to extracting patches with +// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by +// subsampling them spatially by a factor of `rates`. This is equivalent to +// `rate` in dilated (a.k.a. Atrous) convolutions. +// padding: The type of padding algorithm to use. +// +// We specify the size-related attributes as: +// +// ```python +// ksizes = [1, ksize_rows, ksize_cols, 1] +// strides = [1, strides_rows, strides_cols, 1] +// rates = [1, rates_rows, rates_cols, 1] +// ``` +// +// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows * +// ksize_cols * depth]` containing image patches with size +// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note +// `out_rows` and `out_cols` are the dimensions of the output patches. +func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding} + opspec := tf.OpSpec{ + Type: "ExtractImagePatches", + Input: []tf.Input{ + images, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MapPeekAttr is an optional argument to MapPeek. +type MapPeekAttr func(optionalAttr) + +// MapPeekCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapPeekCapacity(value int64) MapPeekAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// MapPeekMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapPeekMemoryLimit(value int64) MapPeekAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapPeekContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapPeekContainer(value string) MapPeekAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapPeekSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" func MapPeekSharedName(value string) MapPeekAttr { return func(m optionalAttr) { m["shared_name"] = value @@ -1645,6 +1717,54 @@ func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// BiasAddAttr is an optional argument to BiasAdd. +type BiasAddAttr func(optionalAttr) + +// BiasAddDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the bias tensor will be added to the last dimension +// of the value tensor. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// The tensor will be added to "in_channels", the third-to-the-last +// dimension. +// If not specified, defaults to "NHWC" +func BiasAddDataFormat(value string) BiasAddAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Adds `bias` to `value`. +// +// This is a special case of `tf.add` where `bias` is restricted to be 1-D. +// Broadcasting is supported, so `value` may have any number of dimensions. +// +// Arguments: +// value: Any number of dimensions. +// bias: 1-D with size the last dimension of `value`. +// +// Returns Broadcasted sum of `value` and `bias`. +func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "BiasAdd", + Input: []tf.Input{ + value, bias, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse. type SparseReduceSumSparseAttr func(optionalAttr) @@ -1698,109 +1818,13 @@ func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values t return op.Output(0), op.Output(1), op.Output(2) } -// BiasAddAttr is an optional argument to BiasAdd. -type BiasAddAttr func(optionalAttr) - -// BiasAddDataFormat sets the optional data_format attribute to value. +// Returns x + y element-wise. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddDataFormat(value string) BiasAddAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Adds `bias` to `value`. -// -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. -// -// Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. -// -// Returns Broadcasted sum of `value` and `bias`. -func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BiasAdd", - Input: []tf.Input{ - value, bias, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BiasAddGradAttr is an optional argument to BiasAddGrad. -type BiasAddGradAttr func(optionalAttr) - -// BiasAddGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddGradDataFormat(value string) BiasAddGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// The backward operation for "BiasAdd" on the "bias" tensor. -// -// It accumulates all the values from out_backprop into the feature dimension. -// For NHWC data format, the feature dimension is the last. For NCHW data format, -// the feature dimension is the third-to-last. -// -// Arguments: -// out_backprop: Any number of dimensions. -// -// Returns 1-D with size the feature dimension of `out_backprop`. -func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BiasAddGrad", - Input: []tf.Input{ - out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x + y element-wise. -// -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return +// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } opspec := tf.OpSpec{ Type: "AddV2", @@ -2278,125 +2302,6 @@ func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) return op.Output(0) } -// Computes inverse hyperbolic cosine of x element-wise. -func Acosh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Acosh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SerializeManySparseAttr is an optional argument to SerializeManySparse. -type SerializeManySparseAttr func(optionalAttr) - -// SerializeManySparseOutType sets the optional out_type attribute to value. -// -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. -// -// The `SparseTensor` must have rank `R` greater than 1, and the first dimension -// is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The serialized -// `SparseTensor` objects going into each row of `serialized_sparse` will have -// rank `R-1`. -// -// The minibatch size `N` is extracted from `sparse_shape[0]`. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeManySparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayV2Attr is an optional argument to TensorArrayV2. -type TensorArrayV2Attr func(optionalAttr) - -// TensorArrayV2ElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. -// If not specified, defaults to false -func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value - } -} - -// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. -// If not specified, defaults to true -func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { - return func(m optionalAttr) { - m["clear_after_read"] = value - } -} - -// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. -// If not specified, defaults to "" -func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { - return func(m optionalAttr) { - m["tensor_array_name"] = value - } -} - -// Deprecated. Use TensorArrayV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayV3 -func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayV2", - Input: []tf.Input{ - size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes the mean along sparse segments of a tensor. // // Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is @@ -3197,30 +3102,6 @@ func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_in return op.Output(0), op.Output(1), op.Output(2) } -// Updates the table to associates keys with values. -// -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. -// -// Returns the created operation. -func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableInsertV2", - Input: []tf.Input{ - table_handle, keys, values, - }, - } - return scope.AddOperation(opspec) -} - // FractionalAvgPoolAttr is an optional argument to FractionalAvgPool. type FractionalAvgPoolAttr func(optionalAttr) @@ -3802,152 +3683,17 @@ func IsNan(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. -type FractionalAvgPoolGradAttr func(optionalAttr) - -// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` +// Computes rectified linear gradients for a Relu operation. // -// `value 20 5 16 3 7` +// Arguments: +// gradients: The backpropagated gradients to the corresponding Relu operation. +// features: The features passed as input to the corresponding Relu operation, OR +// the outputs of that operation (both work equivalently). // -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [41/3, 26/3] for fractional avg pooling. -// If not specified, defaults to false -func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { - return func(m optionalAttr) { - m["overlapping"] = value - } -} - -// Computes gradient of the FractionalAvgPool function. -// -// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for -// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of -// out_backprop to those indices that form the same pooling cell. Therefore, we -// just need to know the shape of original input tensor, instead of the whole -// tensor. -// -// Arguments: -// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_avg_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. -// -// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. -func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FractionalAvgPoolGrad", - Input: []tf.Input{ - orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for the exponential linear (Elu) operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Elu operation. -// outputs: The outputs of the corresponding Elu operation. -// -// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, -// `gradients` otherwise. -func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "EluGrad", - Input: []tf.Input{ - gradients, outputs, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process. -// -// Note that the hash function may change from time to time. -// This functionality will be deprecated and it's recommended to use -// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. -// -// Arguments: -// -// num_buckets: The number of buckets. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "StringToHashBucket", - Input: []tf.Input{ - string_tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that contains `count` elements from the `input_dataset`. -// -// Arguments: -// -// count: A scalar representing the number of elements from the `input_dataset` -// that should be taken. A value of `-1` indicates that all of `input_dataset` -// is taken. -// -// -func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "TakeDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes rectified linear gradients for a Relu operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Relu operation. -// features: The features passed as input to the corresponding Relu operation, OR -// the outputs of that operation (both work equivalently). -// -// Returns `gradients * (features > 0)`. -func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return +// Returns `gradients * (features > 0)`. +func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return } opspec := tf.OpSpec{ Type: "ReluGrad", @@ -4273,44 +4019,6 @@ func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padd return op.Output(0) } -// Bucketizes 'input' based on 'boundaries'. -// -// For example, if the inputs are -// boundaries = [0, 10, 100] -// input = [[-5, 10000] -// [150, 10] -// [5, 100]] -// -// then the output will be -// output = [[0, 3] -// [3, 2] -// [1, 3]] -// -// Arguments: -// input: Any shape of Tensor contains with int or float type. -// boundaries: A sorted list of floats gives the boundary of the buckets. -// -// Returns Same shape with 'input', each value of input replaced with bucket index. -// -// @compatibility(numpy) -// Equivalent to np.digitize. -// @end_compatibility -func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"boundaries": boundaries} - opspec := tf.OpSpec{ - Type: "Bucketize", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes gradients of the maxpooling function. // // Arguments: @@ -4717,45 +4425,47 @@ func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output return scope.AddOperation(opspec) } -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) +// MaxPoolGradAttr is an optional argument to MaxPoolGrad. +type MaxPoolGradAttr func(optionalAttr) -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. +// MaxPoolGradDataFormat sets the optional data_format attribute to value. // -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradDataFormat(value string) MaxPoolGradAttr { return func(m optionalAttr) { - m["element_shape"] = value + m["data_format"] = value } } -// Gather specific elements from the TensorArray into output `value`. -// -// All elements selected by `indices` must have the same shape. +// Computes gradients of the maxpooling function. // // Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { +// Returns Gradients w.r.t. the input to `max_pool`. +func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", + Type: "MaxPoolGrad", Input: []tf.Input{ - handle, indices, flow_in, + orig_input, orig_output, grad, }, Attrs: attrs, } @@ -4763,333 +4473,310 @@ func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. -// -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. +// CropAndResizeAttr is an optional argument to CropAndResize. +type CropAndResizeAttr func(optionalAttr) + +// CropAndResizeMethod sets the optional method attribute to value. // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "StringToHashBucketFast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeMethod(value string) CropAndResizeAttr { + return func(m optionalAttr) { + m["method"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the max of x and y (i.e. x > y ? x : y) element-wise. +// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. // -// *NOTE*: `Maximum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return +// value: Value used for extrapolation, when applicable. +// If not specified, defaults to 0 +func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { + return func(m optionalAttr) { + m["extrapolation_value"] = value } - opspec := tf.OpSpec{ - Type: "Maximum", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Outputs all keys and values in the table. +// Extracts crops from the input image tensor and bilinearly resizes them (possibly // -// Arguments: -// table_handle: Handle to the table. +// with aspect ratio change) to a common output size specified by `crop_size`. This +// is more general than the `crop_to_bounding_box` op which extracts a fixed size +// slice from the input image and does not allow resizing or aspect ratio change. // +// Returns a tensor with `crops` from the input `image` at positions defined at the +// bounding box locations in `boxes`. The cropped boxes are all resized (with +// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The +// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The +// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the +// method will give identical results to using `tf.image.resize_bilinear()` +// with `align_corners=True`. // +// Arguments: +// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +// Both `image_height` and `image_width` need to be positive. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All +// cropped image patches are resized to this size. The aspect ratio of the image +// content is not preserved. Both `crop_height` and `crop_width` need to be +// positive. // -// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. -func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { +// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LookupTableExportV2", + Type: "CropAndResize", Input: []tf.Input{ - table_handle, + image, boxes, box_ind, crop_size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Real-valued fast Fourier transform. +// Fills empty rows in the input 2-D `SparseTensor` with a default value. // -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. +// The input `SparseTensor` is represented via the tuple of inputs +// (`indices`, `values`, `dense_shape`). The output `SparseTensor` has the +// same `dense_shape` but with indices `output_indices` and values +// `output_values`. // -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. +// This op inserts a single entry for every row that doesn't have any values. +// The index is created as `[row, 0, ..., 0]` and the inserted value +// is `default_value`. // -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values: // -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. +// [0, 1]: a +// [0, 3]: b +// [2, 0]: c +// [3, 1]: d // -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. +// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values: // -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RFFT", - Input: []tf.Input{ - input, fft_length, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) - -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Converts two real numbers to a complex number. +// [0, 1]: a +// [0, 3]: b +// [1, 0]: default_value +// [2, 0]: c +// [3, 1]: d +// [4, 0]: default_value // -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. +// The output `SparseTensor` will be in row-major order and will have the +// same shape as the input. // -// The input tensors `real` and `imag` must have the same shape. +// This op also returns an indicator vector shaped `[dense_shape[0]]` such that // -// For example: +// empty_row_indicator[i] = True iff row i was an empty row. // -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { +// And a reverse index map vector shaped `[indices.shape[0]]` that is used during +// backpropagation, +// +// reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :] +// +// Arguments: +// indices: 2-D. the indices of the sparse tensor. +// values: 1-D. the values of the sparse tensor. +// dense_shape: 1-D. the shape of the sparse tensor. +// default_value: 0-D. default value to insert into location `[row, 0, ..., 0]` +// for rows missing from the input sparse tensor. +// output indices: 2-D. the indices of the filled sparse tensor. +// +// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the +// input sparse tensor.1-D. a map from the input indices to the output indices. +func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Complex", + Type: "SparseFillEmptyRows", Input: []tf.Input{ - real, imag, + indices, values, dense_shape, default_value, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) - -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { - return func(m optionalAttr) { - m["Tout"] = value - } + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Returns the imaginary part of a complex number. +// Reverses specific dimensions of a tensor. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. +// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions +// of `tensor`, this operation reverses each dimension i of `tensor` where +// `dims[i]` is `True`. +// +// `tensor` can have up to 8 dimensions. The number of dimensions +// of `tensor` must equal the number of elements in `dims`. In other words: +// +// `rank(tensor) = size(dims)` // // For example: // // ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] +// # tensor 't' is [[[[ 0, 1, 2, 3], +// # [ 4, 5, 6, 7], +// # [ 8, 9, 10, 11]], +// # [[12, 13, 14, 15], +// # [16, 17, 18, 19], +// # [20, 21, 22, 23]]]] +// # tensor 't' shape is [1, 2, 3, 4] +// +// # 'dims' is [False, False, False, True] +// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], +// [ 7, 6, 5, 4], +// [ 11, 10, 9, 8]], +// [[15, 14, 13, 12], +// [19, 18, 17, 16], +// [23, 22, 21, 20]]]] +// +// # 'dims' is [False, True, False, False] +// reverse(t, dims) ==> [[[[12, 13, 14, 15], +// [16, 17, 18, 19], +// [20, 21, 22, 23] +// [[ 0, 1, 2, 3], +// [ 4, 5, 6, 7], +// [ 8, 9, 10, 11]]]] +// +// # 'dims' is [False, False, True, False] +// reverse(t, dims) ==> [[[[8, 9, 10, 11], +// [4, 5, 6, 7], +// [0, 1, 2, 3]] +// [[20, 21, 22, 23], +// [16, 17, 18, 19], +// [12, 13, 14, 15]]]] // ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { +// +// Arguments: +// tensor: Up to 8-D. +// dims: 1-D. The dimensions to reverse. +// +// Returns The same shape as `tensor`. +func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Imag", + Type: "Reverse", Input: []tf.Input{ - input, + tensor, dims, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). +// Computes log softmax activations. // -// The Hurwitz zeta function is defined as: +// For each batch `i` and class `j` we have // +// logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i]))) // -// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) -func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { +// Arguments: +// logits: 2-D with shape `[batch_size, num_classes]`. +// +// Returns Same shape as `logits`. +func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Zeta", + Type: "LogSoftmax", Input: []tf.Input{ - x, q, + logits, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// LRNGradAttr is an optional argument to LRNGrad. -type LRNGradAttr func(optionalAttr) - -// LRNGradDepthRadius sets the optional depth_radius attribute to value. +// Computes the inverse permutation of a tensor. // -// value: A depth radius. -// If not specified, defaults to 5 -func LRNGradDepthRadius(value int64) LRNGradAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNGradBias sets the optional bias attribute to value. +// This operation computes the inverse of an index permutation. It takes a 1-D +// integer tensor `x`, which represents the indices of a zero-based array, and +// swaps each value with its index position. In other words, for an output tensor +// `y` and an input tensor `x`, this operation computes the following: // -// value: An offset (usually > 0 to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNGradBias(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["bias"] = value - } -} - -// LRNGradAlpha sets the optional alpha attribute to value. +// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]` // -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNGradAlpha(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// LRNGradBeta sets the optional beta attribute to value. +// The values must include 0. There can be no duplicate values or negative values. // -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNGradBeta(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["beta"] = value - } -} - -// Gradients for Local Response Normalization. +// For example: +// +// ``` +// # tensor `x` is [3, 4, 0, 2, 1] +// invert_permutation(x) ==> [2, 4, 3, 0, 1] +// ``` // // Arguments: -// input_grads: 4-D with shape `[batch, height, width, channels]`. -// input_image: 4-D with shape `[batch, height, width, channels]`. -// output_image: 4-D with shape `[batch, height, width, channels]`. +// x: 1-D. // -// Returns The gradients for LRN. -func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { +// Returns 1-D. +func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "LRNGrad", + Type: "InvertPermutation", Input: []tf.Input{ - input_grads, input_image, output_image, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AnyAttr is an optional argument to Any. -type AnyAttr func(optionalAttr) - -// AnyKeepDims sets the optional keep_dims attribute to value. +// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AnyKeepDims(value bool) AnyAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the "logical or" of elements across dimensions of a tensor. +// This operation folds the padded areas of `input` by `MirrorPad` according to the +// `paddings` you specify. `paddings` must be the same as `paddings` argument +// given to the corresponding `MirrorPad` op. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// The folded size of each dimension D of the output is: +// +// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. +// # 'paddings' is [[0, 1]], [0, 1]]. +// # 'mode' is SYMMETRIC. +// # rank of 't' is 2. +// pad(t, paddings) ==> [[ 1, 5] +// [11, 28]] +// ``` // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// input: The input tensor to be folded. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// mode: The mode used in the `MirrorPad` op. // -// Returns The reduced tensor. -func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { +// Returns The folded tensor. +func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"mode": mode} opspec := tf.OpSpec{ - Type: "Any", + Type: "MirrorPadGrad", Input: []tf.Input{ - input, axis, + input, paddings, }, Attrs: attrs, } @@ -5097,41 +4784,36 @@ func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (ou return op.Output(0) } -// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. -type ResourceApplyFtrlAttr func(optionalAttr) +// BiasAddGradAttr is an optional argument to BiasAddGrad. +type BiasAddGradAttr func(optionalAttr) -// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. +// BiasAddGradDataFormat sets the optional data_format attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the bias tensor will be added to the last dimension +// of the value tensor. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// The tensor will be added to "in_channels", the third-to-the-last +// dimension. +// If not specified, defaults to "NHWC" +func BiasAddGradDataFormat(value string) BiasAddGradAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["data_format"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// The backward operation for "BiasAdd" on the "bias" tensor. // -// accum_new = accum + grad * grad -// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// It accumulates all the values from out_backprop into the feature dimension. +// For NHWC data format, the feature dimension is the last. For NCHW data format, +// the feature dimension is the third-to-last. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 regulariation. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. +// out_backprop: Any number of dimensions. // -// Returns the created operation. -func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { +// Returns 1-D with size the feature dimension of `out_backprop`. +func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -5140,237 +4822,272 @@ func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf. a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrl", + Type: "BiasAddGrad", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, lr_power, + out_backprop, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomUniformAttr is an optional argument to RandomUniform. -type RandomUniformAttr func(optionalAttr) +// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2. +type FusedBatchNormV2Attr func(optionalAttr) -// RandomUniformSeed sets the optional seed attribute to value. +// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformSeed(value int64) RandomUniformAttr { +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr { return func(m optionalAttr) { - m["seed"] = value + m["epsilon"] = value } } -// RandomUniformSeed2 sets the optional seed2 attribute to value. +// FusedBatchNormV2DataFormat sets the optional data_format attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformSeed2(value int64) RandomUniformAttr { +// value: The data format for x and y. Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["data_format"] = value } } -// Outputs random values from a uniform distribution. +// FusedBatchNormV2IsTraining sets the optional is_training attribute to value. // -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Batch normalization. +// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// offset: A 1D Tensor for offset, to shift to the normalized x. +// mean: A 1D Tensor for population mean. Used for inference only; +// must be empty for training. +// variance: A 1D Tensor for population variance. Used for inference only; +// must be empty for training. // -// Returns A tensor of the specified shape filled with uniform random values. -func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { +// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow +// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by +// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused +// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance +// in the cuDNN case), to be reused in the gradient computation. +func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RandomUniform", + Type: "FusedBatchNormV2", Input: []tf.Input{ - shape, + x, scale, offset, mean, variance, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// AssertAttr is an optional argument to Assert. -type AssertAttr func(optionalAttr) +// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. +type TensorArrayGatherV3Attr func(optionalAttr) -// AssertSummarize sets the optional summarize attribute to value. +// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. // -// value: Print this many entries of each tensor. -// If not specified, defaults to 3 -func AssertSummarize(value int64) AssertAttr { +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { return func(m optionalAttr) { - m["summarize"] = value + m["element_shape"] = value } } -// Asserts that the given condition is true. +// Gather specific elements from the TensorArray into output `value`. // -// If `condition` evaluates to false, print the list of tensors in `data`. -// `summarize` determines how many entries of the tensors to print. +// All elements selected by `indices` must have the same shape. // // Arguments: -// condition: The condition to evaluate. -// data: The tensors to print out when condition is false. +// handle: The handle to a TensorArray. +// indices: The locations in the TensorArray from which to read tensor elements. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. // -// Returns the created operation. -func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { +// Returns All of the elements in the TensorArray, concatenated along a new +// axis (the new dimension 0). +func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Assert", + Type: "TensorArrayGatherV3", Input: []tf.Input{ - condition, tf.OutputList(data), + handle, indices, flow_in, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). +// Converts each string in the input Tensor to its hash mod by a number of buckets. // -// For each entry in `x`, calculates the number of `1` (on) bits in the binary -// representation of that entry. +// The hash function is deterministic on the content of the string within the +// process and will never change. However, it is not suitable for cryptography. +// This function may be used when CPU time is scarce and inputs are trusted or +// unimportant. There is a risk of adversaries constructing inputs that all hash +// to the same bucket. To prevent this problem, use a strong hash function with +// `tf.string_to_hash_bucket_strong`. // -// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into -// `int32` or `int64` and perform the bitcount on the result, than to feed in -// 8- or 16-bit inputs and then aggregate the resulting counts. -func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { +// Arguments: +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "PopulationCount", + Type: "StringToHashBucketFast", Input: []tf.Input{ - x, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Split a `SparseTensor` into `num_split` tensors along one dimension. -// -// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices -// `[0 : shape[split_dim] % num_split]` gets one extra dimension. -// For example, if `split_dim = 1` and `num_split = 2` and the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// output_tensor[0] = shape = [2, 4] -// [ a ] -// [b c ] -// -// output_tensor[1] = shape = [2, 3] -// [ d e ] -// [ ] -// -// Arguments: -// split_dim: 0-D. The dimension along which to split. Must be in the range -// `[0, rank(shape))`. -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// num_split: The number of ways to split. +// Returns the max of x and y (i.e. x > y ? x : y) element-wise. // -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { +// *NOTE*: `Maximum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_split": num_split} opspec := tf.OpSpec{ - Type: "SparseSplit", + Type: "Maximum", Input: []tf.Input{ - split_dim, indices, values, shape, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - return output_indices, output_values, output_shape + return op.Output(0) } -// Returns the truth value of (x < y) element-wise. +// Real-valued fast Fourier transform. // -// *NOTE*: `Less` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the +// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, +// followed by the `fft_length / 2` positive-frequency terms. +// +// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length / 2 + 1` unique +// frequency components of its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft +// @end_compatibility +func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Less", + Type: "RFFT", Input: []tf.Input{ - x, y, + input, fft_length, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedReluXAttr is an optional argument to QuantizedReluX. -type QuantizedReluXAttr func(optionalAttr) +// LRNGradAttr is an optional argument to LRNGrad. +type LRNGradAttr func(optionalAttr) -// QuantizedReluXOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { +// LRNGradDepthRadius sets the optional depth_radius attribute to value. +// +// value: A depth radius. +// If not specified, defaults to 5 +func LRNGradDepthRadius(value int64) LRNGradAttr { return func(m optionalAttr) { - m["out_type"] = value + m["depth_radius"] = value } } -// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` +// LRNGradBias sets the optional bias attribute to value. // -// Arguments: +// value: An offset (usually > 0 to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNGradBias(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["bias"] = value + } +} + +// LRNGradAlpha sets the optional alpha attribute to value. // +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNGradAlpha(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// LRNGradBeta sets the optional beta attribute to value. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNGradBeta(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["beta"] = value + } +} + +// Gradients for Local Response Normalization. // -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Arguments: +// input_grads: 4-D with shape `[batch, height, width, channels]`. +// input_image: 4-D with shape `[batch, height, width, channels]`. +// output_image: 4-D with shape `[batch, height, width, channels]`. +// +// Returns The gradients for LRN. +func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -5379,39 +5096,43 @@ func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_f a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedReluX", + Type: "LRNGrad", Input: []tf.Input{ - features, max_value, min_features, max_features, + input_grads, input_image, output_image, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) - -// RandomPoissonSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed(value int64) RandomPoissonAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// AnyAttr is an optional argument to Any. +type AnyAttr func(optionalAttr) -// RandomPoissonSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed2(value int64) RandomPoissonAttr { +// AnyKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func AnyKeepDims(value bool) AnyAttr { return func(m optionalAttr) { - m["seed2"] = value + m["keep_dims"] = value } } -// Use RandomPoissonV2 instead. +// Computes the "logical or" of elements across dimensions of a tensor. // -// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 -func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -5420,9 +5141,9 @@ func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...Ra a(attrs) } opspec := tf.OpSpec{ - Type: "RandomPoisson", + Type: "Any", Input: []tf.Input{ - shape, rate, + input, axis, }, Attrs: attrs, } @@ -5430,28 +5151,25 @@ func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...Ra return op.Output(0) } -// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. -type ResourceSparseApplyFtrlV2Attr func(optionalAttr) +// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. +type ResourceApplyFtrlAttr func(optionalAttr) -// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. // // value: If `True`, updating of the var and accum tensors will be protected // by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { +func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. +// Update '*var' according to the Ftrl-proximal scheme. // -// That is for rows we have grad for, we update var, accum and linear as follows: -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// accum_new = accum + grad * grad +// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var // quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 // var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 // accum = accum_new @@ -5461,15 +5179,13 @@ func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2At // accum: Should be from a Variable(). // linear: Should be from a Variable(). // grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. // lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. -// +// l1: L1 regulariation. Must be a scalar. +// l2: L2 regulariation. Must be a scalar. // lr_power: Scaling factor. Must be a scalar. // // Returns the created operation. -func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { +func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -5478,92 +5194,93 @@ func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, li a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrlV2", + Type: "ResourceApplyFtrl", Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, + var_, accum, linear, grad, lr, l1, l2, lr_power, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Associates the given iterator with the given statistics aggregator. +// RandomUniformAttr is an optional argument to RandomUniform. +type RandomUniformAttr func(optionalAttr) + +// RandomUniformSeed sets the optional seed attribute to value. // -// Returns the created operation. -func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IteratorSetStatsAggregator", - Input: []tf.Input{ - iterator_handle, stats_aggregator_handle, - }, +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformSeed(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed"] = value } - return scope.AddOperation(opspec) } -// Returns element-wise smallest integer in not less than x. -func Ceil(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Ceil", - Input: []tf.Input{ - x, - }, +// RandomUniformSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformSeed2(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed2"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the number of elements in the given table. +// Outputs random values from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. // // Arguments: -// table_handle: Handle to the table. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns Scalar that contains number of elements in the table. -func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { +// Returns A tensor of the specified shape filled with uniform random values. +func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LookupTableSizeV2", + Type: "RandomUniform", Input: []tf.Input{ - table_handle, + shape, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. -type ResizeBilinearGradAttr func(optionalAttr) +// AssertAttr is an optional argument to Assert. +type AssertAttr func(optionalAttr) -// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. +// AssertSummarize sets the optional summarize attribute to value. // -// value: If true, rescale grads by (orig_height - 1) / (height - 1), which -// exactly aligns the 4 corners of grads and original_image. If false, rescale by -// orig_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { +// value: Print this many entries of each tensor. +// If not specified, defaults to 3 +func AssertSummarize(value int64) AssertAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["summarize"] = value } } -// Computes the gradient of bilinear interpolation. +// Asserts that the given condition is true. +// +// If `condition` evaluates to false, print the list of tensors in `data`. +// `summarize` determines how many entries of the tensors to print. // // Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. +// condition: The condition to evaluate. +// data: The tensors to print out when condition is false. // -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { +// Returns the created operation. +func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -5572,71 +5289,126 @@ func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "ResizeBilinearGrad", + Type: "Assert", Input: []tf.Input{ - grads, original_image, + condition, tf.OutputList(data), }, Attrs: attrs, } + return scope.AddOperation(opspec) +} + +// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). +// +// For each entry in `x`, calculates the number of `1` (on) bits in the binary +// representation of that entry. +// +// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into +// `int32` or `int64` and perform the bitcount on the result, than to feed in +// 8- or 16-bit inputs and then aggregate the resulting counts. +func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "PopulationCount", + Input: []tf.Input{ + x, + }, + } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// Split a `SparseTensor` into `num_split` tensors along one dimension. // -// N is the size of the segment being reduced. +// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices +// `[0 : shape[split_dim] % num_split]` gets one extra dimension. +// For example, if `split_dim = 1` and `num_split = 2` and the input is // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] // -// Arguments: +// Graphically the output tensors are: // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// output_tensor[0] = shape = [2, 4] +// [ a ] +// [b c ] // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +// output_tensor[1] = shape = [2, 3] +// [ d e ] +// [ ] +// +// Arguments: +// split_dim: 0-D. The dimension along which to split. Must be in the range +// `[0, rank(shape))`. +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. +// num_split: The number of ways to split. +// +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_split": num_split} opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtN", + Type: "SparseSplit", Input: []tf.Input{ - data, indices, segment_ids, + split_dim, indices, values, shape, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + return output_indices, output_values, output_shape } -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) +// RandomPoissonAttr is an optional argument to RandomPoisson. +type RandomPoissonAttr func(optionalAttr) -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { +// RandomPoissonSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed(value int64) RandomPoissonAttr { return func(m optionalAttr) { - m["dtype"] = value + m["seed"] = value } } -// Outputs deterministic pseudorandom values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// RandomPoissonSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed2(value int64) RandomPoissonAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Use RandomPoissonV2 instead. // -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { +// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 +func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -5645,9 +5417,9 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", + Type: "RandomPoisson", Input: []tf.Input{ - shape, seed, + shape, rate, }, Attrs: attrs, } @@ -5655,94 +5427,111 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt return op.Output(0) } -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) +// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. +type ResourceSparseApplyFtrlV2Attr func(optionalAttr) -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. +// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. // -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { return func(m optionalAttr) { - m["preferred_shard"] = value + m["use_locking"] = value } } -// Restores a tensor from checkpoint files. -// -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. +// Update relevant entries in '*var' according to the Ftrl-proximal scheme. // -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. +// That is for rows we have grad for, we update var, accum and linear as follows: +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new // // Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. // -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RestoreSlice", + Type: "ResourceSparseApplyFtrlV2", Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, + var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) +// Associates the given iterator with the given statistics aggregator. +// +// Returns the created operation. +func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IteratorSetStatsAggregator", + Input: []tf.Input{ + iterator_handle, stats_aggregator_handle, + }, + } + return scope.AddOperation(opspec) +} -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { +// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. +type DataFormatVecPermuteAttr func(optionalAttr) + +// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. +// +// value: source data format. +// If not specified, defaults to "NHWC" +func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { return func(m optionalAttr) { - m["out_idx"] = value + m["src_format"] = value } } -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` +// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. // -// For example: +// value: destination data format. +// If not specified, defaults to "NCHW" +func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { + return func(m optionalAttr) { + m["dst_format"] = value + } +} + +// Returns the permuted vector/tensor in the destination data format given the // -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` +// one in the source data format. // // Arguments: -// x: 1-D. +// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. // -// Returns 1-D.1-D.1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { +// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. +func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { if scope.Err() != nil { return } @@ -5751,32 +5540,77 @@ func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAtt a(attrs) } opspec := tf.OpSpec{ - Type: "UniqueWithCounts", + Type: "DataFormatVecPermute", Input: []tf.Input{ x, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. -type StatelessRandomNormalAttr func(optionalAttr) +// Computes tan of x element-wise. +func Tan(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tan", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// StatelessRandomNormalDtype sets the optional dtype attribute to value. +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// +// N is the size of the segment being reduced. +// +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentSqrtN", + Input: []tf.Input{ + data, indices, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. +type StatelessTruncatedNormalAttr func(optionalAttr) + +// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. // // value: The type of the output. // If not specified, defaults to DT_FLOAT -func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { +func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { return func(m optionalAttr) { m["dtype"] = value } } -// Outputs deterministic pseudorandom values from a normal distribution. +// Outputs deterministic pseudorandom values from a truncated normal distribution. // -// The generated values will have mean 0 and standard deviation 1. +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // // The outputs are a deterministic function of `shape` and `seed`. // @@ -5785,7 +5619,7 @@ func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { // seed: 2 seeds (shape [2]). // // Returns Random values with specified shape. -func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { +func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -5794,7 +5628,7 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessRandomNormal", + Type: "StatelessTruncatedNormal", Input: []tf.Input{ shape, seed, }, @@ -5804,49 +5638,287 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option return op.Output(0) } -// Reshapes a quantized tensor as per the Reshape op. +// RestoreSliceAttr is an optional argument to RestoreSlice. +type RestoreSliceAttr func(optionalAttr) + +// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. // -// ``` +// value: Index of file to open first if multiple files match +// `file_pattern`. See the documentation for `Restore`. +// If not specified, defaults to -1 +func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. // -// Arguments: +// This is like `Restore` except that restored tensor can be listed as filling +// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the +// larger tensor and the slice that the restored tensor covers. // -// shape: Defines the shape of the output tensor. -// input_min: The minimum value of the input. -// input_max: The maximum value of the input. +// The `shape_and_slice` input has the same format as the +// elements of the `shapes_and_slices` input of the `SaveSlices` op. // -// Returns This value is copied from input_min.This value is copied from input_max. -func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Arguments: +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// shape_and_slice: Scalar. The shapes and slice specifications to use when +// restoring a tensors. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "QuantizedReshape", + Type: "RestoreSlice", Input: []tf.Input{ - tensor, shape, input_min, input_max, + file_pattern, tensor_name, shape_and_slice, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// GatherAttr is an optional argument to Gather. -type GatherAttr func(optionalAttr) +// ImagAttr is an optional argument to Imag. +type ImagAttr func(optionalAttr) -// GatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func GatherValidateIndices(value bool) GatherAttr { +// ImagTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ImagTout(value tf.DataType) ImagAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["Tout"] = value } } -// Gather slices from `params` according to `indices`. +// Returns the imaginary part of a complex number. // -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the imaginary part of each element in `input`. All +// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part returned by this operation. // -// ```python -// # Scalar indices +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.imag(input) ==> [4.75, 5.75] +// ``` +func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Imag", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ComplexAttr is an optional argument to Complex. +type ComplexAttr func(optionalAttr) + +// ComplexTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_COMPLEX64 +func ComplexTout(value tf.DataType) ComplexAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Converts two real numbers to a complex number. +// +// Given a tensor `real` representing the real part of a complex number, and a +// tensor `imag` representing the imaginary part of a complex number, this +// operation returns complex numbers elementwise of the form \\(a + bj\\), where +// *a* represents the `real` part and *b* represents the `imag` part. +// +// The input tensors `real` and `imag` must have the same shape. +// +// For example: +// +// ``` +// # tensor 'real' is [2.25, 3.25] +// # tensor `imag` is [4.75, 5.75] +// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] +// ``` +func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Complex", + Input: []tf.Input{ + real, imag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. +type UniqueWithCountsAttr func(optionalAttr) + +// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { + return func(m optionalAttr) { + m["out_idx"] = value + } +} + +// Finds unique elements in a 1-D tensor. +// +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. Finally, it returns a third tensor `count` that +// contains the count of each element of `y` in `x`. In other words: +// +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` +// +// For example: +// +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` +// +// Arguments: +// x: 1-D. +// +// Returns 1-D.1-D.1-D. +func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "UniqueWithCounts", + Input: []tf.Input{ + x, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. +type StatelessRandomNormalAttr func(optionalAttr) + +// StatelessRandomNormalDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a normal distribution. +// +// The generated values will have mean 0 and standard deviation 1. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessRandomNormal", + Input: []tf.Input{ + shape, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Reshapes a quantized tensor as per the Reshape op. +// +// ``` +// +// Arguments: +// +// shape: Defines the shape of the output tensor. +// input_min: The minimum value of the input. +// input_max: The maximum value of the input. +// +// Returns This value is copied from input_min.This value is copied from input_max. +func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QuantizedReshape", + Input: []tf.Input{ + tensor, shape, input_min, input_max, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// GatherAttr is an optional argument to Gather. +type GatherAttr func(optionalAttr) + +// GatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func GatherValidateIndices(value bool) GatherAttr { + return func(m optionalAttr) { + m["validate_indices"] = value + } +} + +// Gather slices from `params` according to `indices`. +// +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// +// ```python +// # Scalar indices // output[:, ..., :] = params[indices, :, ... :] // // # Vector indices @@ -6008,44 +6080,33 @@ func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional .. return op.Output(0), op.Output(1), op.Output(2) } -// WriteAudioSummaryAttr is an optional argument to WriteAudioSummary. -type WriteAudioSummaryAttr func(optionalAttr) +// ResizeBilinearAttr is an optional argument to ResizeBilinear. +type ResizeBilinearAttr func(optionalAttr) -// WriteAudioSummaryMaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 +// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. // -// REQUIRES: value >= 1 -func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr { +// value: If true, rescale input by (new_height - 1) / (height - 1), which +// exactly aligns the 4 corners of images and resized images. If false, rescale +// by new_height / height. Treat similarly the width dimension. +// If not specified, defaults to false +func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { return func(m optionalAttr) { - m["max_outputs"] = value + m["align_corners"] = value } } -// Writes a `Summary` protocol buffer with audio. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: +// Resize `images` to `size` using bilinear interpolation. // -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// Input images can be of different types but output images are always float. // // Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns the created operation. -func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { if scope.Err() != nil { return } @@ -6054,103 +6115,14 @@ func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Ou a(attrs) } opspec := tf.OpSpec{ - Type: "WriteAudioSummary", + Type: "ResizeBilinear", Input: []tf.Input{ - writer, step, tag, tensor, sample_rate, + images, size, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// ProdAttr is an optional argument to Prod. -type ProdAttr func(optionalAttr) - -// ProdKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func ProdKeepDims(value bool) ProdAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the product of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Prod", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeBilinearAttr is an optional argument to ResizeBilinear. -type ResizeBilinearAttr func(optionalAttr) - -// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Resize `images` to `size` using bilinear interpolation. -// -// Input images can be of different types but output images are always float. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBilinear", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) + op := scope.AddOperation(opspec) + return op.Output(0) } // Computes softsign: `features / (abs(features) + 1)`. @@ -6482,6 +6454,83 @@ func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...Fix return op.Output(0) } +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process. +// +// Note that the hash function may change from time to time. +// This functionality will be deprecated and it's recommended to use +// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. +// +// Arguments: +// +// num_buckets: The number of buckets. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_buckets": num_buckets} + opspec := tf.OpSpec{ + Type: "StringToHashBucket", + Input: []tf.Input{ + string_tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes gradients for the exponential linear (Elu) operation. +// +// Arguments: +// gradients: The backpropagated gradients to the corresponding Elu operation. +// outputs: The outputs of the corresponding Elu operation. +// +// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, +// `gradients` otherwise. +func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "EluGrad", + Input: []tf.Input{ + gradients, outputs, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that contains `count` elements from the `input_dataset`. +// +// Arguments: +// +// count: A scalar representing the number of elements from the `input_dataset` +// that should be taken. A value of `-1` indicates that all of `input_dataset` +// is taken. +// +// +func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "TakeDataset", + Input: []tf.Input{ + input_dataset, count, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // The gradient operator for the SparseAdd op. // // The SparseAdd op calculates A + B, where A, B, and the sum are all represented @@ -6667,72 +6716,6 @@ func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyReso return scope.AddOperation(opspec) } -// SummaryWriterAttr is an optional argument to SummaryWriter. -type SummaryWriterAttr func(optionalAttr) - -// SummaryWriterSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func SummaryWriterSharedName(value string) SummaryWriterAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// SummaryWriterContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func SummaryWriterContainer(value string) SummaryWriterAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// Returns a handle to be used to access a summary writer. -// -// The summary writer is an in-graph resource which can be used by ops to write -// summaries to event files. -// -// Returns the summary writer resource. Scalar handle. -func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SummaryWriter", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentMean. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Applies softmax to a batched N-D `SparseTensor`. // // The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` @@ -6886,33 +6869,82 @@ func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.O return scope.AddOperation(opspec) } -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) +// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. +type ResourceApplyPowerSignAttr func(optionalAttr) -// CumprodExclusive sets the optional exclusive attribute to value. +// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, perform exclusive cumprod. +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. // If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { +func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["use_locking"] = value } } -// CumprodReverse sets the optional reverse attribute to value. +// Update '*var' according to the AddSign update. // -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Compute the cumulative product of the tensor `x` along `axis`. +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +// variable <- variable - lr_t * update // -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// logbase: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyPowerSign", + Input: []tf.Input{ + var_, m, lr, logbase, sign_decay, beta, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// CumprodAttr is an optional argument to Cumprod. +type CumprodAttr func(optionalAttr) + +// CumprodExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumprod. +// If not specified, defaults to false +func CumprodExclusive(value bool) CumprodAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumprodReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumprodReverse(value bool) CumprodAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative product of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumprod, which means that the first +// element of the input is identical to the first element of the output: // // ```python // tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] @@ -7214,27 +7246,6 @@ func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.Data return op.Output(0) } -// Writes a `GraphDef` protocol buffer to a `SummaryWriter`. -// -// Arguments: -// writer: Handle of `SummaryWriter`. -// step: The step to write the summary for. -// tensor: A scalar string of the serialized tf.GraphDef proto. -// -// Returns the created operation. -func WriteGraphSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WriteGraphSummary", - Input: []tf.Input{ - writer, step, tensor, - }, - } - return scope.AddOperation(opspec) -} - // ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. type ResourceSparseApplyAdagradAttr func(optionalAttr) @@ -8379,6 +8390,136 @@ func Erf(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// OneHotAttr is an optional argument to OneHot. +type OneHotAttr func(optionalAttr) + +// OneHotAxis sets the optional axis attribute to value. +// +// value: The axis to fill (default: -1, a new inner-most axis). +// If not specified, defaults to -1 +func OneHotAxis(value int64) OneHotAttr { + return func(m optionalAttr) { + m["axis"] = value + } +} + +// Returns a one-hot tensor. +// +// The locations represented by indices in `indices` take value `on_value`, +// while all other locations take value `off_value`. +// +// If the input `indices` is rank `N`, the output will have rank `N+1`, +// The new axis is created at dimension `axis` (default: the new axis is +// appended at the end). +// +// If `indices` is a scalar the output shape will be a vector of length `depth`. +// +// If `indices` is a vector of length `features`, the output shape will be: +// ``` +// features x depth if axis == -1 +// depth x features if axis == 0 +// ``` +// +// If `indices` is a matrix (batch) with shape `[batch, features]`, +// the output shape will be: +// ``` +// batch x features x depth if axis == -1 +// batch x depth x features if axis == 1 +// depth x batch x features if axis == 0 +// ``` +// +// +// Examples +// ========= +// +// Suppose that +// +// ``` +// indices = [0, 2, -1, 1] +// depth = 3 +// on_value = 5.0 +// off_value = 0.0 +// axis = -1 +// ``` +// +// Then output is `[4 x 3]`: +// +// ```output = +// [5.0 0.0 0.0] // one_hot(0) +// [0.0 0.0 5.0] // one_hot(2) +// [0.0 0.0 0.0] // one_hot(-1) +// [0.0 5.0 0.0] // one_hot(1) +// ``` +// +// Suppose that +// +// ``` +// indices = [0, 2, -1, 1] +// depth = 3 +// on_value = 0.0 +// off_value = 3.0 +// axis = 0 +// ``` +// +// Then output is `[3 x 4]`: +// +// ```output = +// [0.0 3.0 3.0 3.0] +// [3.0 3.0 3.0 0.0] +// [3.0 3.0 3.0 3.0] +// [3.0 0.0 3.0 3.0] +// // ^ one_hot(0) +// // ^ one_hot(2) +// // ^ one_hot(-1) +// // ^ one_hot(1) +// ``` +// Suppose that +// +// ``` +// indices = [[0, 2], [1, -1]] +// depth = 3 +// on_value = 1.0 +// off_value = 0.0 +// axis = -1 +// ``` +// +// Then output is `[2 x 2 x 3]`: +// +// ```output = +// [ +// [1.0, 0.0, 0.0] // one_hot(0) +// [0.0, 0.0, 1.0] // one_hot(2) +// ][ +// [0.0, 1.0, 0.0] // one_hot(1) +// [0.0, 0.0, 0.0] // one_hot(-1) +// ]``` +// +// Arguments: +// indices: A tensor of indices. +// depth: A scalar defining the depth of the one hot dimension. +// on_value: A scalar defining the value to fill in output when `indices[j] = i`. +// off_value: A scalar defining the value to fill in output when `indices[j] != i`. +// +// Returns The one-hot tensor. +func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OneHot", + Input: []tf.Input{ + indices, depth, on_value, off_value, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Reads the value of a variable. // // The tensor returned by this operation is immutable. @@ -8691,269 +8832,36 @@ func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) return op.Output(0) } -// Fills empty rows in the input 2-D `SparseTensor` with a default value. -// -// The input `SparseTensor` is represented via the tuple of inputs -// (`indices`, `values`, `dense_shape`). The output `SparseTensor` has the -// same `dense_shape` but with indices `output_indices` and values -// `output_values`. -// -// This op inserts a single entry for every row that doesn't have any values. -// The index is created as `[row, 0, ..., 0]` and the inserted value -// is `default_value`. -// -// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values: -// -// [0, 1]: a -// [0, 3]: b -// [2, 0]: c -// [3, 1]: d -// -// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values: -// -// [0, 1]: a -// [0, 3]: b -// [1, 0]: default_value -// [2, 0]: c -// [3, 1]: d -// [4, 0]: default_value -// -// The output `SparseTensor` will be in row-major order and will have the -// same shape as the input. -// -// This op also returns an indicator vector shaped `[dense_shape[0]]` such that -// -// empty_row_indicator[i] = True iff row i was an empty row. +// Computes softmax cross entropy cost and gradients to backpropagate. // -// And a reverse index map vector shaped `[indices.shape[0]]` that is used during -// backpropagation, +// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept +// a matrix of label probabilities, but rather a single label per row +// of features. This label is considered to have probability 1.0 for the +// given row. // -// reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :] +// Inputs are the logits, not probabilities. // // Arguments: -// indices: 2-D. the indices of the sparse tensor. -// values: 1-D. the values of the sparse tensor. -// dense_shape: 1-D. the shape of the sparse tensor. -// default_value: 0-D. default value to insert into location `[row, 0, ..., 0]` -// for rows missing from the input sparse tensor. -// output indices: 2-D. the indices of the filled sparse tensor. +// features: batch_size x num_classes matrix +// labels: batch_size vector with values in [0, num_classes). +// This is the label for the given minibatch entry. // -// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the -// input sparse tensor.1-D. a map from the input indices to the output indices. -func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) { +// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). +func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseFillEmptyRows", + Type: "SparseSoftmaxCrossEntropyWithLogits", Input: []tf.Input{ - indices, values, dense_shape, default_value, + features, labels, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) + return op.Output(0), op.Output(1) } -// Reverses specific dimensions of a tensor. -// -// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions -// of `tensor`, this operation reverses each dimension i of `tensor` where -// `dims[i]` is `True`. -// -// `tensor` can have up to 8 dimensions. The number of dimensions -// of `tensor` must equal the number of elements in `dims`. In other words: -// -// `rank(tensor) = size(dims)` -// -// For example: -// -// ``` -// # tensor 't' is [[[[ 0, 1, 2, 3], -// # [ 4, 5, 6, 7], -// # [ 8, 9, 10, 11]], -// # [[12, 13, 14, 15], -// # [16, 17, 18, 19], -// # [20, 21, 22, 23]]]] -// # tensor 't' shape is [1, 2, 3, 4] -// -// # 'dims' is [False, False, False, True] -// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], -// [ 7, 6, 5, 4], -// [ 11, 10, 9, 8]], -// [[15, 14, 13, 12], -// [19, 18, 17, 16], -// [23, 22, 21, 20]]]] -// -// # 'dims' is [False, True, False, False] -// reverse(t, dims) ==> [[[[12, 13, 14, 15], -// [16, 17, 18, 19], -// [20, 21, 22, 23] -// [[ 0, 1, 2, 3], -// [ 4, 5, 6, 7], -// [ 8, 9, 10, 11]]]] -// -// # 'dims' is [False, False, True, False] -// reverse(t, dims) ==> [[[[8, 9, 10, 11], -// [4, 5, 6, 7], -// [0, 1, 2, 3]] -// [[20, 21, 22, 23], -// [16, 17, 18, 19], -// [12, 13, 14, 15]]]] -// ``` -// -// Arguments: -// tensor: Up to 8-D. -// dims: 1-D. The dimensions to reverse. -// -// Returns The same shape as `tensor`. -func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Reverse", - Input: []tf.Input{ - tensor, dims, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes log softmax activations. -// -// For each batch `i` and class `j` we have -// -// logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i]))) -// -// Arguments: -// logits: 2-D with shape `[batch_size, num_classes]`. -// -// Returns Same shape as `logits`. -func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogSoftmax", - Input: []tf.Input{ - logits, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the inverse permutation of a tensor. -// -// This operation computes the inverse of an index permutation. It takes a 1-D -// integer tensor `x`, which represents the indices of a zero-based array, and -// swaps each value with its index position. In other words, for an output tensor -// `y` and an input tensor `x`, this operation computes the following: -// -// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]` -// -// The values must include 0. There can be no duplicate values or negative values. -// -// For example: -// -// ``` -// # tensor `x` is [3, 4, 0, 2, 1] -// invert_permutation(x) ==> [2, 4, 3, 0, 1] -// ``` -// -// Arguments: -// x: 1-D. -// -// Returns 1-D. -func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InvertPermutation", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. -// -// This operation folds the padded areas of `input` by `MirrorPad` according to the -// `paddings` you specify. `paddings` must be the same as `paddings` argument -// given to the corresponding `MirrorPad` op. -// -// The folded size of each dimension D of the output is: -// -// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. -// # 'paddings' is [[0, 1]], [0, 1]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[ 1, 5] -// [11, 28]] -// ``` -// -// Arguments: -// input: The input tensor to be folded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: The mode used in the `MirrorPad` op. -// -// Returns The folded tensor. -func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPadGrad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softmax cross entropy cost and gradients to backpropagate. -// -// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept -// a matrix of label probabilities, but rather a single label per row -// of features. This label is considered to have probability 1.0 for the -// given row. -// -// Inputs are the logits, not probabilities. -// -// Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size vector with values in [0, num_classes). -// This is the label for the given minibatch entry. -// -// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). -func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmaxCrossEntropyWithLogits", - Input: []tf.Input{ - features, labels, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Fast Fourier transform. +// Fast Fourier transform. // // Computes the 1-dimensional discrete Fourier transform over the inner-most // dimension of `input`. @@ -9367,173 +9275,37 @@ func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtyp return op.Output(0) } -// HashTableV2Attr is an optional argument to HashTableV2. -type HashTableV2Attr func(optionalAttr) - -// HashTableV2Container sets the optional container attribute to value. +// Inverse 2D fast Fourier transform. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func HashTableV2Container(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// HashTableV2SharedName sets the optional shared_name attribute to value. +// Computes the inverse 2-dimensional discrete Fourier transform over the +// inner-most 2 dimensions of `input`. // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func HashTableV2SharedName(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// Arguments: +// input: A complex64 tensor. // -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value +// Returns A complex64 tensor of the same shape as `input`. The inner-most 2 +// dimensions of `input` are replaced with their inverse 2D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifft2 +// @end_compatibility +func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT2D", + Input: []tf.Input{ + input, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates a non-initialized hash table. +// Creates a tensor filled with a scalar value. // -// This op creates a hash table, specifying the type of its keys and values. -// Before using the table you will have to initialize it. After initialization the -// table will be immutable. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "HashTableV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. -type MapUnstageNoKeyAttr func(optionalAttr) - -// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns a random (key, value) -// -// from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapUnstageNoKey", - Input: []tf.Input{ - indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstageNoKey", err) - return - } - return key, values -} - -// Inverse 2D fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform over the -// inner-most 2 dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their inverse 2D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft2 -// @end_compatibility -func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT2D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a tensor filled with a scalar value. -// -// This operation creates a tensor of shape `dims` and fills it with `value`. +// This operation creates a tensor of shape `dims` and fills it with `value`. // // For example: // @@ -9940,55 +9712,53 @@ func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values return op.Output(0), op.Output(1) } -// Transforms a Tensor into a serialized TensorProto proto. +// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). // -// Arguments: -// tensor: A Tensor of type `T`. +// The Hurwitz zeta function is defined as: // -// Returns A serialized TensorProto proto of the input tensor. -func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { +// +// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) +func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SerializeTensor", + Type: "Zeta", Input: []tf.Input{ - tensor, + x, q, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixSolveAttr is an optional argument to MatrixSolve. -type MatrixSolveAttr func(optionalAttr) +// ProdAttr is an optional argument to Prod. +type ProdAttr func(optionalAttr) -// MatrixSolveAdjoint sets the optional adjoint attribute to value. +// ProdKeepDims sets the optional keep_dims attribute to value. // -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. +// value: If true, retain reduced dimensions with length 1. // If not specified, defaults to false -func MatrixSolveAdjoint(value bool) MatrixSolveAttr { +func ProdKeepDims(value bool) ProdAttr { return func(m optionalAttr) { - m["adjoint"] = value + m["keep_dims"] = value } } -// Solves systems of linear equations. +// Computes the product of elements across dimensions of a tensor. // -// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is -// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix -// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `True` then each output matrix satisfies -// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns Shape is `[..., M, K]`. -func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { +// Returns The reduced tensor. +func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9997,9 +9767,9 @@ func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...Matr a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixSolve", + Type: "Prod", Input: []tf.Input{ - matrix, rhs, + input, axis, }, Attrs: attrs, } @@ -10007,65 +9777,162 @@ func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...Matr return op.Output(0) } -// Looks up keys in a table, outputs the corresponding values. +// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. +type FusedResizeAndPadConv2DAttr func(optionalAttr) + +// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. // -// The tensor `keys` must of the same type as the keys of the table. -// The output `values` is of the type of the table values. +// value: If true, rescale input by (new_height - 1) / (height - 1), +// which exactly aligns the 4 corners of images and resized images. If false, rescale +// by new_height / height. Treat similarly the width dimension. +// If not specified, defaults to false +func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { + return func(m optionalAttr) { + m["resize_align_corners"] = value + } +} + +// Performs a resize and padding as a preprocess during a convolution. // -// The scalar `default_value` is the value output for keys not present in the -// table. It must also be of the same type as the table values. +// It's often possible to do spatial transformations more efficiently as part of +// the packing stage of a convolution, so this op allows for an optimized +// implementation where these stages are fused together. This prevents the need to +// write out the intermediate results as whole tensors, reducing memory pressure, +// and we can get some latency gains by merging the transformation calculations. +// The data_format attribute for Conv2D isn't supported by this op, and defaults to +// 'NHWC' order. +// Internally this op uses a single per-graph scratch buffer, which means that it +// will block if multiple versions are being run in parallel. This is because this +// operator is primarily an optimization to minimize memory usage. // // Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. // -// Returns Same shape as `keys`. Values found in the table, or `default_values` -// for missing keys. -func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. Must be in the same order as the dimension specified with format. +// padding: The type of padding algorithm to use. +func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LookupTableFindV2", + Type: "FusedResizeAndPadConv2D", Input: []tf.Input{ - table_handle, keys, default_value, + input, size, paddings, filter, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse 3D fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform over the -// inner-most 3 dimensions of `input`. +// Transforms a Tensor into a serialized TensorProto proto. // // Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// tensor: A Tensor of type `T`. // -// @compatibility(numpy) -// Equivalent to np.fft.ifftn with 3 dimensions. -// @end_compatibility -func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { +// Returns A serialized TensorProto proto of the input tensor. +func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IFFT3D", + Type: "SerializeTensor", Input: []tf.Input{ - input, + tensor, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Adds `bias` to `value`. -// +// MatrixSolveAttr is an optional argument to MatrixSolve. +type MatrixSolveAttr func(optionalAttr) + +// MatrixSolveAdjoint sets the optional adjoint attribute to value. +// +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// If not specified, defaults to false +func MatrixSolveAdjoint(value bool) MatrixSolveAttr { + return func(m optionalAttr) { + m["adjoint"] = value + } +} + +// Solves systems of linear equations. +// +// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is +// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix +// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `True` then each output matrix satisfies +// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. +// +// Arguments: +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. +// +// Returns Shape is `[..., M, K]`. +func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixSolve", + Input: []tf.Input{ + matrix, rhs, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse 3D fast Fourier transform. +// +// Computes the inverse 3-dimensional discrete Fourier transform over the +// inner-most 3 dimensions of `input`. +// +// Arguments: +// input: A complex64 tensor. +// +// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifftn with 3 dimensions. +// @end_compatibility +func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT3D", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Adds `bias` to `value`. +// // This is a deprecated version of BiasAdd and will be soon removed. // // This is a special case of `tf.add` where `bias` is restricted to be 1-D. @@ -11123,174 +10990,157 @@ func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_s return op.Output(0) } -// Flushes the writer's unwritten events. -// -// Arguments: -// writer: A handle to the summary writer resource. +// Computes sigmoid of `x` element-wise. // -// Returns the created operation. -func FlushSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) { +// Specifically, `y = 1 / (1 + exp(-x))`. +func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FlushSummaryWriter", + Type: "Sigmoid", Input: []tf.Input{ - writer, + x, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// QuantizeV2Attr is an optional argument to QuantizeV2. -type QuantizeV2Attr func(optionalAttr) +// FusedBatchNormAttr is an optional argument to FusedBatchNorm. +type FusedBatchNormAttr func(optionalAttr) -// QuantizeV2Mode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func QuantizeV2Mode(value string) QuantizeV2Attr { +// FusedBatchNormEpsilon sets the optional epsilon attribute to value. +// +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { return func(m optionalAttr) { - m["mode"] = value + m["epsilon"] = value } } -// QuantizeV2RoundMode sets the optional round_mode attribute to value. -// If not specified, defaults to "HALF_AWAY_FROM_ZERO" -func QuantizeV2RoundMode(value string) QuantizeV2Attr { +// FusedBatchNormDataFormat sets the optional data_format attribute to value. +// +// value: The data format for x and y. Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { return func(m optionalAttr) { - m["round_mode"] = value + m["data_format"] = value } } -// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. The -// 'round_mode' attribute controls which rounding tie-breaking algorithm is used -// when rounding float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8, out[i] -= (range(T) + 1) / 2.0 -// ``` -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// Assume the input is type float and has a possible range of [0.0, 6.0] and the -// output type is quint8 ([0, 255]). The min_range and max_range values should be -// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -// value of the input by 255/6 and cast to quint8. -// -// If the output type was qint8 ([-128, 127]), the operation will additionally -// subtract each value by 128 prior to casting, so that the range of values aligns -// with the range of qint8. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ``` -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = num_discrete_values / range -// quantized = round(input * range_scale) - round(range_min * range_scale) + -// numeric_limits::min() -// quantized = max(quantized, numeric_limits::min()) -// quantized = min(quantized, numeric_limits::max()) -// ``` -// -// The biggest difference between this and MIN_COMBINED is that the minimum range -// is rounded first, before it's subtracted from the rounded value. With -// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -// and dequantizing will introduce a larger and larger error. -// -// *SCALED mode Example* -// -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. -// -// If the mode is `SCALED`, we do not use the full range of the output type, -// choosing to elide the lowest possible value for symmetry (e.g., output range is -// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to -// 0. -// -// We first find the range of values in our tensor. The -// range we use is always centered on 0, so we find m such that -// ```c++ -// m = max(abs(input_min), abs(input_max)) -// ``` -// -// Our input tensor range is then `[-m, m]`. -// -// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. -// If T is signed, this is -// ``` -// num_bits = sizeof(T) * 8 -// [min_fixed, max_fixed] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] -// ``` -// -// Otherwise, if T is unsigned, the fixed-point range is -// ``` -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] -// ``` -// -// From this we compute our scaling factor, s: -// ```c++ -// s = (max_fixed - min_fixed) / (2 * m) -// ``` +// FusedBatchNormIsTraining sets the optional is_training attribute to value. // -// Now we can quantize the elements of our tensor: -// ```c++ -// result = round(input * s) -// ``` +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Batch normalization. // -// One thing to watch out for is that the operator may choose to adjust the -// requested minimum and maximum values slightly during the quantization process, -// so you should always use the output ports as the range for further calculations. -// For example, if the requested minimum and maximum values are close to equal, -// they will be separated by a small epsilon value to prevent ill-formed quantized -// buffers from being created. Otherwise, you can end up with buffers where all the -// quantized values map to the same float value, which causes problems for -// operations that have to perform further calculations on them. +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // // Arguments: +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// offset: A 1D Tensor for offset, to shift to the normalized x. +// mean: A 1D Tensor for population mean. Used for inference only; +// must be empty for training. +// variance: A 1D Tensor for population variance. Used for inference only; +// must be empty for training. // -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -// -// -// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. -func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow +// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by +// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused +// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance +// in the cuDNN case), to be reused in the gradient computation. +func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizeV2", + Type: "FusedBatchNorm", Input: []tf.Input{ - input, min_range, max_range, + x, scale, offset, mean, variance, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Component-wise divides a SparseTensor by a dense Tensor. -// -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. +// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. +type RandomStandardNormalAttr func(optionalAttr) + +// RandomStandardNormalSeed sets the optional seed attribute to value. // -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a normal distribution. +// +// The generated values will have mean 0 and standard deviation 1. +// +// Arguments: +// shape: The shape of the output tensor. +// dtype: The type of the output. +// +// Returns A tensor of the specified shape filled with random normal values. +func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomStandardNormal", + Input: []tf.Input{ + shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Component-wise divides a SparseTensor by a dense Tensor. +// +// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not +// the other direction. +// +// Arguments: +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. // sp_shape: 1-D. Shape of the input SparseTensor. // dense: `R`-D. The dense Tensor operand. // @@ -11309,6 +11159,89 @@ func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output return op.Output(0) } +// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. +type FractionalAvgPoolGradAttr func(optionalAttr) + +// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. +// +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: +// +// `index 0 1 2 3 4` +// +// `value 20 5 16 3 7` +// +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [41/3, 26/3] for fractional avg pooling. +// If not specified, defaults to false +func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { + return func(m optionalAttr) { + m["overlapping"] = value + } +} + +// Computes gradient of the FractionalAvgPool function. +// +// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for +// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of +// out_backprop to those indices that form the same pooling cell. Therefore, we +// just need to know the shape of original input tensor, instead of the whole +// tensor. +// +// Arguments: +// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` +// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients +// w.r.t. the output of `fractional_avg_pool`. +// row_pooling_sequence: row pooling sequence, form pooling region with +// col_pooling_sequence. +// col_pooling_sequence: column pooling sequence, form pooling region with +// row_pooling sequence. +// +// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. +func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FractionalAvgPoolGrad", + Input: []tf.Input{ + orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Concatenates tensors along one dimension. +// +// Arguments: +// concat_dim: 0-D. The dimension along which to concatenate. Must be in the +// range [0, rank(values)). +// values: The `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Concat", + Input: []tf.Input{ + concat_dim, tf.OutputList(values), + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. type ResourceApplyMomentumAttr func(optionalAttr) @@ -11417,327 +11350,86 @@ func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, return op.Output(0) } -// Returns the truth value of (x >= y) element-wise. +// Returns element-wise integer closest to x. // -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// If the result is midway between two representable values, +// the even representable is chosen. +// For example: +// +// ``` +// rint(-1.5) ==> -2.0 +// rint(0.5000001) ==> 1.0 +// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] +// ``` +func Rint(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "GreaterEqual", + Type: "Rint", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Conv3DAttr is an optional argument to Conv3D. -type Conv3DAttr func(optionalAttr) +// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. +type OrderedMapUnstageNoKeyAttr func(optionalAttr) -// Conv3DDataFormat sets the optional data_format attribute to value. +// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DDataFormat(value string) Conv3DAttr { +// REQUIRES: value >= 0 +func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { return func(m optionalAttr) { - m["data_format"] = value + m["capacity"] = value } } -// Conv3DDilations sets the optional dilations attribute to value. +// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DDilations(value []int64) Conv3DAttr { +// REQUIRES: value >= 0 +func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { return func(m optionalAttr) { - m["dilations"] = value + m["memory_limit"] = value } } -// Computes a 3-D convolution given 5-D `input` and `filter` tensors. -// -// In signal processing, cross-correlation is a measure of similarity of -// two waveforms as a function of a time-lag applied to one of them. This -// is also known as a sliding dot product or sliding inner-product. -// -// Our Conv3D implements a form of cross-correlation. +// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns the (key, value) element with the smallest // -// Arguments: -// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. -// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, -// out_channels]`. `in_channels` must match between `input` and `filter`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { +// key from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv3D", + Type: "OrderedMapUnstageNoKey", Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds up a SparseTensor and a dense Tensor, using these special rules: -// -// (1) Broadcasts the dense side to have the same shape as the sparse side, if -// eligible; -// (2) Then, only the dense values pointed to by the indices of the SparseTensor -// participate in the cwise addition. -// -// By these rules, the result is a logical SparseTensor with exactly the same -// indices and shape, but possibly with different non-zero values. The output of -// this Op is the resultant non-zero values. -// -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. -// -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseDenseCwiseAdd", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Read an element from the TensorArray into output `value`. -// -// Arguments: -// handle: The handle to a TensorArray. -// -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. -// -// Returns The tensor that is read from the TensorArray. -func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "TensorArrayReadV3", - Input: []tf.Input{ - handle, index, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EncodePngAttr is an optional argument to EncodePng. -type EncodePngAttr func(optionalAttr) - -// EncodePngCompression sets the optional compression attribute to value. -// -// value: Compression level. -// If not specified, defaults to -1 -func EncodePngCompression(value int64) EncodePngAttr { - return func(m optionalAttr) { - m["compression"] = value - } -} - -// PNG-encode an image. -// -// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` -// where `channels` is: -// -// * 1: for grayscale. -// * 2: for grayscale + alpha. -// * 3: for RGB. -// * 4: for RGBA. -// -// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder -// default or a value from 0 to 9. 9 is the highest compression level, generating -// the smallest output, but is slower. -// -// Arguments: -// image: 3-D with shape `[height, width, channels]`. -// -// Returns 0-D. PNG-encoded image. -func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EncodePng", - Input: []tf.Input{ - image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. -type DataFormatVecPermuteAttr func(optionalAttr) - -// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. -// -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["src_format"] = value - } -} - -// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. -// -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["dst_format"] = value - } -} - -// Returns the permuted vector/tensor in the destination data format given the -// -// one in the source data format. -// -// Arguments: -// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. -// -// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. -func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DataFormatVecPermute", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise integer closest to x. -// -// If the result is midway between two representable values, -// the even representable is chosen. -// For example: -// -// ``` -// rint(-1.5) ==> -2.0 -// rint(0.5000001) ==> 1.0 -// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] -// ``` -func Rint(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rint", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. -type OrderedMapUnstageNoKeyAttr func(optionalAttr) - -// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns the (key, value) element with the smallest -// -// key from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapUnstageNoKey", - Input: []tf.Input{ - indices, + indices, }, Attrs: attrs, } @@ -11909,35 +11601,154 @@ func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.Dat return outputs } -// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. -type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) +// SerializeManySparseAttr is an optional argument to SerializeManySparse. +type SerializeManySparseAttr func(optionalAttr) -// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. +// SerializeManySparseOutType sets the optional out_type attribute to value. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { return func(m optionalAttr) { - m["seed"] = value + m["out_type"] = value } } -// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. +// The `SparseTensor` must have rank `R` greater than 1, and the first dimension +// is treated as the minibatch dimension. Elements of the `SparseTensor` +// must be sorted in increasing order of this first dimension. The serialized +// `SparseTensor` objects going into each row of `serialized_sparse` will have +// rank `R-1`. // -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. +// The minibatch size `N` is extracted from `sparse_shape[0]`. +// +// Arguments: +// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. +// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. +func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SerializeManySparse", + Input: []tf.Input{ + sparse_indices, sparse_values, sparse_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes inverse hyperbolic cosine of x element-wise. +func Acosh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Acosh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TensorArrayV2Attr is an optional argument to TensorArrayV2. +type TensorArrayV2Attr func(optionalAttr) + +// TensorArrayV2ElementShape sets the optional element_shape attribute to value. +// If not specified, defaults to +func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { + return func(m optionalAttr) { + m["element_shape"] = value + } +} + +// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. +// If not specified, defaults to false +func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { + return func(m optionalAttr) { + m["dynamic_size"] = value + } +} + +// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. +// If not specified, defaults to true +func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { + return func(m optionalAttr) { + m["clear_after_read"] = value + } +} + +// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. +// If not specified, defaults to "" +func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { + return func(m optionalAttr) { + m["tensor_array_name"] = value + } +} + +// Deprecated. Use TensorArrayV3 +// +// DEPRECATED at GraphDef version 26: Use TensorArrayV3 +func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorArrayV2", + Input: []tf.Input{ + size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. +type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) + +// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Generates labels for candidate sampling with a learned unigram distribution. +// +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. // // For each batch, this op picks a single set of sampled candidate labels. // @@ -12455,60 +12266,106 @@ func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { return op.Output(0) } -// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. -type SparseTensorDenseMatMulAttr func(optionalAttr) +// Computes gradients for SparseSegmentMean. +// +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. +// +// Arguments: +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentMeanGrad", + Input: []tf.Input{ + grad, indices, segment_ids, output_dim0, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. +// Returns the truth value of (x >= y) element-wise. // -// value: Use the adjoint of A in the matrix multiply. If A is complex, this -// is transpose(conj(A)). Otherwise it's transpose(A). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { +// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "GreaterEqual", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Conv3DAttr is an optional argument to Conv3D. +type Conv3DAttr func(optionalAttr) + +// Conv3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DDataFormat(value string) Conv3DAttr { return func(m optionalAttr) { - m["adjoint_a"] = value + m["data_format"] = value } } -// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. +// Conv3DDilations sets the optional dilations attribute to value. // -// value: Use the adjoint of B in the matrix multiply. If B is complex, this -// is transpose(conj(B)). Otherwise it's transpose(B). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DDilations(value []int64) Conv3DAttr { return func(m optionalAttr) { - m["adjoint_b"] = value + m["dilations"] = value } } -// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". +// Computes a 3-D convolution given 5-D `input` and `filter` tensors. // -// No validity checking is performed on the indices of A. However, the following -// input format is recommended for optimal behavior: +// In signal processing, cross-correlation is a measure of similarity of +// two waveforms as a function of a time-lag applied to one of them. This +// is also known as a sliding dot product or sliding inner-product. // -// if adjoint_a == false: -// A should be sorted in lexicographically increasing order. Use SparseReorder -// if you're not sure. -// if adjoint_a == true: -// A should be sorted in order of increasing dimension 1 (i.e., "column major" -// order instead of "row major" order). +// Our Conv3D implements a form of cross-correlation. // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. -// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. -// b: 2-D. A dense Matrix. -func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { +// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. +// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, +// out_channels]`. `in_channels` must match between `input` and `filter`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseTensorDenseMatMul", + Type: "Conv3D", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + input, filter, }, Attrs: attrs, } @@ -12516,235 +12373,254 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp return op.Output(0) } -// Deserialize and concatenate `SparseTensors` from a serialized minibatch. +// Adds up a SparseTensor and a dense Tensor, using these special rules: // -// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where -// `N` is the minibatch size and the rows correspond to packed outputs of -// `SerializeSparse`. The ranks of the original `SparseTensor` objects -// must all match. When the final `SparseTensor` is created, it has rank one -// higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension). +// (1) Broadcasts the dense side to have the same shape as the sparse side, if +// eligible; +// (2) Then, only the dense values pointed to by the indices of the SparseTensor +// participate in the cwise addition. // -// The output `SparseTensor` object's shape values for all dimensions but the -// first are the max across the input `SparseTensor` objects' shape values -// for the corresponding dimensions. Its first shape value is `N`, the minibatch -// size. +// By these rules, the result is a logical SparseTensor with exactly the same +// indices and shape, but possibly with different non-zero values. The output of +// this Op is the resultant non-zero values. // -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and +// Arguments: +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseDenseCwiseAdd", + Input: []tf.Input{ + sp_indices, sp_values, sp_shape, dense, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Read an element from the TensorArray into output `value`. // -// then the final deserialized `SparseTensor` will be: +// Arguments: +// handle: The handle to a TensorArray. // -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. // -// Arguments: -// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. -// Must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { +// Returns The tensor that is read from the TensorArray. +func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "DeserializeManySparse", + Type: "TensorArrayReadV3", Input: []tf.Input{ - serialized_sparse, + handle, index, flow_in, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// StringJoinAttr is an optional argument to StringJoin. -type StringJoinAttr func(optionalAttr) +// QuantizeV2Attr is an optional argument to QuantizeV2. +type QuantizeV2Attr func(optionalAttr) -// StringJoinSeparator sets the optional separator attribute to value. -// -// value: string, an optional join separator. -// If not specified, defaults to "" -func StringJoinSeparator(value string) StringJoinAttr { +// QuantizeV2Mode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func QuantizeV2Mode(value string) QuantizeV2Attr { return func(m optionalAttr) { - m["separator"] = value + m["mode"] = value } } -// Joins the strings in the given list of string tensors into one tensor; +// QuantizeV2RoundMode sets the optional round_mode attribute to value. +// If not specified, defaults to "HALF_AWAY_FROM_ZERO" +func QuantizeV2RoundMode(value string) QuantizeV2Attr { + return func(m optionalAttr) { + m["round_mode"] = value + } +} + +// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. // -// with the given separator (default is an empty separator). +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. The +// 'round_mode' attribute controls which rounding tie-breaking algorithm is used +// when rounding float values to their quantized equivalents. +// +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: +// +// ``` +// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +// if T == qint8, out[i] -= (range(T) + 1) / 2.0 +// ``` +// here `range(T) = numeric_limits::max() - numeric_limits::min()` +// +// *MIN_COMBINED Mode Example* +// +// Assume the input is type float and has a possible range of [0.0, 6.0] and the +// output type is quint8 ([0, 255]). The min_range and max_range values should be +// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +// value of the input by 255/6 and cast to quint8. +// +// If the output type was qint8 ([-128, 127]), the operation will additionally +// subtract each value by 128 prior to casting, so that the range of values aligns +// with the range of qint8. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ``` +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = num_discrete_values / range +// quantized = round(input * range_scale) - round(range_min * range_scale) + +// numeric_limits::min() +// quantized = max(quantized, numeric_limits::min()) +// quantized = min(quantized, numeric_limits::max()) +// ``` +// +// The biggest difference between this and MIN_COMBINED is that the minimum range +// is rounded first, before it's subtracted from the rounded value. With +// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +// and dequantizing will introduce a larger and larger error. +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, we do not use the full range of the output type, +// choosing to elide the lowest possible value for symmetry (e.g., output range is +// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to +// 0. +// +// We first find the range of values in our tensor. The +// range we use is always centered on 0, so we find m such that +// ```c++ +// m = max(abs(input_min), abs(input_max)) +// ``` +// +// Our input tensor range is then `[-m, m]`. +// +// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. +// If T is signed, this is +// ``` +// num_bits = sizeof(T) * 8 +// [min_fixed, max_fixed] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] +// ``` +// +// Otherwise, if T is unsigned, the fixed-point range is +// ``` +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] +// ``` +// +// From this we compute our scaling factor, s: +// ```c++ +// s = (max_fixed - min_fixed) / (2 * m) +// ``` +// +// Now we can quantize the elements of our tensor: +// ```c++ +// result = round(input * s) +// ``` +// +// One thing to watch out for is that the operator may choose to adjust the +// requested minimum and maximum values slightly during the quantization process, +// so you should always use the output ports as the range for further calculations. +// For example, if the requested minimum and maximum values are close to equal, +// they will be separated by a small epsilon value to prevent ill-formed quantized +// buffers from being created. Otherwise, you can end up with buffers where all the +// quantized values map to the same float value, which causes problems for +// operations that have to perform further calculations on them. // // Arguments: -// inputs: A list of string tensors. The tensors must all have the same shape, -// or be scalars. Scalars may be mixed in; these will be broadcast to the shape -// of non-scalar inputs. -func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { +// +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +// +// +// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. +func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"T": T} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StringJoin", + Type: "QuantizeV2", Input: []tf.Input{ - tf.OutputList(inputs), + input, min_range, max_range, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Returns immutable tensor from memory region. -// -// The current implementation memmaps the tensor from a file. +// Returns the truth value of (x < y) element-wise. // -// Arguments: -// dtype: Type of the returned tensor. -// shape: Shape of the returned tensor. -// memory_region_name: Name of readonly memory region used by the tensor, see -// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. -func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { +// *NOTE*: `Less` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} opspec := tf.OpSpec{ - Type: "ImmutableConst", - - Attrs: attrs, + Type: "Less", + Input: []tf.Input{ + x, y, + }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. -// -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. +// QuantizedReluXAttr is an optional argument to QuantizedReluX. +type QuantizedReluXAttr func(optionalAttr) + +// QuantizedReluXOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. // -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IRFFT", - Input: []tf.Input{ - input, fft_length, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Concatenates a list of `SparseTensor` along the specified dimension. -// -// Concatenation is with respect to the dense versions of these sparse tensors. -// It is assumed that each input is a `SparseTensor` whose elements are ordered -// along increasing dimension number. -// -// All inputs' shapes must match, except for the concat dimension. The -// `indices`, `values`, and `shapes` lists must have the same length. -// -// The output shape is identical to the inputs', except along the concat -// dimension, where it is the sum of the inputs' sizes along that dimension. -// -// The output elements will be resorted to preserve the sort order along -// increasing dimension number. -// -// This op runs in `O(M log M)` time, where `M` is the total number of non-empty -// values across all inputs. This is due to the need for an internal sort in -// order to concatenate efficiently across an arbitrary dimension. -// -// For example, if `concat_dim = 1` and the inputs are -// -// sp_inputs[0]: shape = [2, 3] -// [0, 2]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// sp_inputs[1]: shape = [2, 4] -// [0, 1]: "d" -// [0, 2]: "e" -// -// then the output will be -// -// shape = [2, 7] -// [0, 2]: "a" -// [0, 4]: "d" -// [0, 5]: "e" -// [1, 0]: "b" -// [1, 1]: "c" -// -// Graphically this is equivalent to doing -// -// [ a] concat [ d e ] = [ a d e ] -// [b c ] [ ] [b c ] -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. Non-empty values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), -// where rank is the number of dimensions in each input `SparseTensor`. +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. // -// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. -func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"concat_dim": concat_dim} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseConcat", + Type: "QuantizedReluX", Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), + features, max_value, min_features, max_features, }, Attrs: attrs, } @@ -12752,118 +12628,38 @@ func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes return op.Output(0), op.Output(1), op.Output(2) } -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are -// -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// hashed_output: If true, returns the hash of the cross instead of the string. -// This will allow us avoiding string manipulations. -// num_buckets: It is used if hashed_output is true. -// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. -// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` -// function to combine the crosses fingerprints. -// -// +// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2. +type WholeFileReaderV2Attr func(optionalAttr) + +// WholeFileReaderV2Container sets the optional container attribute to value. // -// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. -func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} - opspec := tf.OpSpec{ - Type: "SparseCross", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), - }, - Attrs: attrs, +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) } -// ListDiffAttr is an optional argument to ListDiff. -type ListDiffAttr func(optionalAttr) - -// ListDiffOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func ListDiffOutIdx(value tf.DataType) ListDiffAttr { +// WholeFileReaderV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr { return func(m optionalAttr) { - m["out_idx"] = value + m["shared_name"] = value } } -// Computes the difference between two lists of numbers or strings. -// -// Given a list `x` and a list `y`, this operation returns a list `out` that -// represents all values that are in `x` but not in `y`. The returned list `out` -// is sorted in the same order that the numbers appear in `x` (duplicates are -// preserved). This operation also returns a list `idx` that represents the -// position of each `out` element in `x`. In other words: -// -// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]` -// -// For example, given this input: -// -// ``` -// x = [1, 2, 3, 4, 5, 6] -// y = [1, 3, 5] -// ``` -// -// This operation would return: -// -// ``` -// out ==> [2, 4, 6] -// idx ==> [1, 3, 5] -// ``` +// A Reader that outputs the entire contents of a file as a value. // -// Arguments: -// x: 1-D. Values to keep. -// y: 1-D. Values to remove. +// To use, enqueue filenames in a Queue. The output of ReaderRead will +// be a filename (key) and the contents of that file (value). // -// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`. -func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) { +// Returns The handle to reference the Reader. +func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } @@ -12872,108 +12668,140 @@ func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "ListDiff", - Input: []tf.Input{ - x, y, - }, + Type: "WholeFileReaderV2", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. -// -// This Op does not require `a_indices` be sorted in standard lexicographic order. +// Transforms a tf.Example proto (as a string) into typed tensors. // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. -// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. -// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. -// b: `ndims`-D Tensor. With shape `a_shape`. -func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { +// serialized: A vector containing a batch of binary serialized Example protos. +// dense_defaults: A list of Tensors (some may be empty), whose length matches +// the length of `dense_keys`. dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// num_sparse: The number of sparse features to be parsed from the example. This +// must match the lengths of `sparse_keys` and `sparse_types`. +// sparse_keys: A list of `num_sparse` strings. +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: The keys expected in the Examples' features associated with dense +// values. +// sparse_types: A list of `num_sparse` types; the data types of data in each +// Feature given in sparse_keys. +// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// dense_shapes: The shapes of data in each Feature given in dense_keys. +// The length of this list must match the length of `dense_keys`. The +// number of elements in the Feature corresponding to dense_key[j] must +// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == +// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] +// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, +// ..., DN), the shape of the output Tensor dense_values[j] will be (M, +// D1, .., DN), where M is the number of blocks of elements of length +// D1 * .... * DN, in the input. +func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} opspec := tf.OpSpec{ - Type: "SparseTensorDenseAdd", + Type: "ParseSingleExample", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + serialized, tf.OutputList(dense_defaults), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + return sparse_indices, sparse_values, sparse_shapes, dense_values } -// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. -type SparseToSparseSetOperationAttr func(optionalAttr) +// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. +type QuantizedConv2DAttr func(optionalAttr) -// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { +// QuantizedConv2DOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["out_type"] = value } } -// Applies set operation along last dimension of 2 `SparseTensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the -// order and range of `set1` and `set2` indices. -// -// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, -// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same -// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. +// QuantizedConv2DDilations sets the optional dilations attribute to value. // -// If `validate_indices` is `True`, this op validates the order and range of `set1` -// and `set2` indices. +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes a 2D convolution given quantized 4D input and filter tensors. // -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. +// The inputs are quantized tensors where the lowest value represents the real +// number of the associated minimum, and the highest represents the maximum. +// This means that you can only interpret the quantized output in the same way, by +// taking the returned minimum and maximum values into account. // // Arguments: -// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must -// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the -// max set size across `0...n-1` dimensions. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the -// max set size across `0...n-1` dimensions. // +// filter: filter's input_depth dimension must match input's depth dimensions. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// min_filter: The float value that the lowest quantized filter value represents. +// max_filter: The float value that the highest quantized filter value represents. +// strides: The stride of the sliding window for each dimension of the input +// tensor. +// padding: The type of padding algorithm to use. // -// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"set_operation": set_operation} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseToSparseSetOperation", + Type: "QuantizedConv2D", Input: []tf.Input{ - set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, + input, filter, min_input, max_input, min_filter, max_filter, }, Attrs: attrs, } @@ -12981,66 +12809,44 @@ func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_value return op.Output(0), op.Output(1), op.Output(2) } -// Computes numerical negative value element-wise. -// -// I.e., \\(y = -x\\). -func Neg(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Neg", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. -type FakeQuantWithMinMaxVarsAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} +// ResourceGatherAttr is an optional argument to ResourceGather. +type ResourceGatherAttr func(optionalAttr) -// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { +// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { return func(m optionalAttr) { - m["narrow_range"] = value + m["validate_indices"] = value } } -// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` +// Gather slices from the variable pointed to by `resource` according to `indices`. // -// and `max` to 'outputs' tensor of same shape as `inputs`. +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: // -// `[min; max]` define the clamping range for the `inputs` data. -// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` -// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and -// then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] // -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] +// +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] +// ``` +func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVars", + Type: "ResourceGather", Input: []tf.Input{ - inputs, min, max, + resource, indices, }, Attrs: attrs, } @@ -13048,29 +12854,23 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max return op.Output(0) } -// Writes a `Summary` protocol buffer with a histogram. -// -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. +// Delete the TensorArray from its resource container. // -// This op reports an `InvalidArgument` error if any value is not finite. +// This enables the user to close and release the resource in the middle +// of a step/run. // // Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. +// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). // // Returns the created operation. -func WriteHistogramSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { +func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "WriteHistogramSummary", + Type: "TensorArrayCloseV3", Input: []tf.Input{ - writer, step, tag, values, + handle, }, } return scope.AddOperation(opspec) @@ -13375,59 +13175,6 @@ func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, n return op.Output(0), op.Output(1), op.Output(2) } -// Returns the element-wise min of two SparseTensors. -// -// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. -// -// Arguments: -// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, in the canonical lexicographic ordering. -// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. -// a_shape: 1-D. Shape of the input SparseTensor. -// b_indices: counterpart to `a_indices` for the other operand. -// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. -// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. -// -// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. -func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSparseMinimum", - Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Constructs a tensor by tiling a given tensor. -// -// This operation creates a new tensor by replicating `input` `multiples` times. -// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements, -// and the values of `input` are replicated `multiples[i]` times along the 'i'th -// dimension. For example, tiling `[a b c d]` by `[2]` produces -// `[a b c d a b c d]`. -// -// Arguments: -// input: 1-D or higher. -// multiples: 1-D. Length must be the same as the number of dimensions in `input` -func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tile", - Input: []tf.Input{ - input, multiples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Saves the input tensors to disk. // // The size of `tensor_names` must match the number of tensors in `data`. `data[i]` @@ -13476,40 +13223,75 @@ func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap. -type TakeManySparseFromTensorsMapAttr func(optionalAttr) +// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. +type SparseTensorDenseMatMulAttr func(optionalAttr) -// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value. +// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. // -// value: The container name for the `SparseTensorsMap` read by this op. -// If not specified, defaults to "" -func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr { +// value: Use the adjoint of A in the matrix multiply. If A is complex, this +// is transpose(conj(A)). Otherwise it's transpose(A). +// If not specified, defaults to false +func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { return func(m optionalAttr) { - m["container"] = value + m["adjoint_a"] = value } } -// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value. +// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. // -// value: The shared name for the `SparseTensorsMap` read by this op. -// It should not be blank; rather the `shared_name` or unique Operation name -// of the Op that created the original `SparseTensorsMap` should be used. -// If not specified, defaults to "" -func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr { +// value: Use the adjoint of B in the matrix multiply. If B is complex, this +// is transpose(conj(B)). Otherwise it's transpose(B). +// If not specified, defaults to false +func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["adjoint_b"] = value } } -// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them. +// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". // -// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where -// `N` is the minibatch size and the rows correspond to the output handles of -// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`. The ranks of the -// original `SparseTensor` objects that went into the given input ops must all -// match. When the final `SparseTensor` is created, it has rank one +// No validity checking is performed on the indices of A. However, the following +// input format is recommended for optimal behavior: +// +// if adjoint_a == false: +// A should be sorted in lexicographically increasing order. Use SparseReorder +// if you're not sure. +// if adjoint_a == true: +// A should be sorted in order of increasing dimension 1 (i.e., "column major" +// order instead of "row major" order). +// +// Arguments: +// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. +// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. +// b: 2-D. A dense Matrix. +func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SparseTensorDenseMatMul", + Input: []tf.Input{ + a_indices, a_values, a_shape, b, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deserialize and concatenate `SparseTensors` from a serialized minibatch. +// +// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where +// `N` is the minibatch size and the rows correspond to packed outputs of +// `SerializeSparse`. The ranks of the original `SparseTensor` objects +// must all match. When the final `SparseTensor` is created, it has rank one // higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension on the left). +// (they have been concatenated along a new row dimension). // // The output `SparseTensor` object's shape values for all dimensions but the // first are the max across the input `SparseTensor` objects' shape values @@ -13520,29 +13302,24 @@ func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTens // standard lexicographic order. If this is not the case, after this // step run `SparseReorder` to restore index ordering. // -// For example, if the handles represent an input, which is a `[2, 3]` matrix -// representing two original `SparseTensor` objects: +// For example, if the serialized input is a `[2 x 3]` matrix representing two +// original `SparseTensor` objects: // -// ``` // index = [ 0] // [10] // [20] // values = [1, 2, 3] // shape = [50] -// ``` // // and // -// ``` // index = [ 2] // [10] // values = [4, 5] // shape = [30] -// ``` // -// then the final `SparseTensor` will be: +// then the final deserialized `SparseTensor` will be: // -// ``` // index = [0 0] // [0 10] // [0 20] @@ -13550,27 +13327,20 @@ func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTens // [1 10] // values = [1, 2, 3, 4, 5] // shape = [2 50] -// ``` // // Arguments: -// sparse_handles: 1-D, The `N` serialized `SparseTensor` objects. -// Shape: `[N]`. -// dtype: The `dtype` of the `SparseTensor` objects stored in the -// `SparseTensorsMap`. -// -// Returns 2-D. The `indices` of the minibatch `SparseTensor`.1-D. The `values` of the minibatch `SparseTensor`.1-D. The `shape` of the minibatch `SparseTensor`. -func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { +// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. +// Must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` objects. +func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TakeManySparseFromTensorsMap", + Type: "DeserializeManySparse", Input: []tf.Input{ - sparse_handles, + serialized_sparse, }, Attrs: attrs, } @@ -13578,240 +13348,290 @@ func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype return op.Output(0), op.Output(1), op.Output(2) } -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let +// StringJoinAttr is an optional argument to StringJoin. +type StringJoinAttr func(optionalAttr) + +// StringJoinSeparator sets the optional separator attribute to value. // -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, +// value: string, an optional join separator. +// If not specified, defaults to "" +func StringJoinSeparator(value string) StringJoinAttr { + return func(m optionalAttr) { + m["separator"] = value + } +} + +// Joins the strings in the given list of string tensors into one tensor; // -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// with the given separator (default is an empty separator). // // Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed precision at `k` as a `bool Tensor`. -func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { +// inputs: A list of string tensors. The tensors must all have the same shape, +// or be scalars. Scalars may be mixed in; these will be broadcast to the shape +// of non-scalar inputs. +func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "InTopKV2", + Type: "StringJoin", Input: []tf.Input{ - predictions, targets, k, + tf.OutputList(inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Assigns a new value to a variable. +// Returns immutable tensor from memory region. // -// Any ReadVariableOp with a control dependency on this op is guaranteed to return -// this value or a subsequent newer value of the variable. +// The current implementation memmaps the tensor from a file. // // Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value to set the new tensor to use. -// -// Returns the created operation. -func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { +// dtype: Type of the returned tensor. +// shape: Shape of the returned tensor. +// memory_region_name: Name of readonly memory region used by the tensor, see +// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. +func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} opspec := tf.OpSpec{ - Type: "AssignVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} + Type: "ImmutableConst", -// Returns a tensor of ones with the same shape and type as x. -// -// Arguments: -// x: a tensor of type T. -// -// Returns a tensor of the same shape and type as x but filled with ones. -func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OnesLike", - Input: []tf.Input{ - x, - }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// The gradient of SparseFillEmptyRows. +// Inverse real-valued fast Fourier transform. // -// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, -// shaped `[N_full]`, where `N_full >= N` and copies data into either -// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and -// `d_default_value` is a scalar. +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. // -// d_values[j] = grad_values[reverse_index_map[j]] -// d_default_value = sum_{k : 0 .. N_full - 1} ( -// grad_values[k] * 1{k not in reverse_index_map}) +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. // -// Arguments: -// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. -// grad_values: 1-D. The gradients from backprop. +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. // -// Returns 1-D. The backprop into values.0-D. The backprop into default_value. -func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseFillEmptyRowsGrad", - Input: []tf.Input{ - reverse_index_map, grad_values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. // -// if < 0, `scale * features` otherwise. +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. // -// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -func Selu(scope *Scope, features tf.Output) (activations tf.Output) { +// @compatibility(numpy) +// Equivalent to np.fft.irfft +// @end_compatibility +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Selu", + Type: "IRFFT", Input: []tf.Input{ - features, + input, fft_length, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SetSizeAttr is an optional argument to SetSize. -type SetSizeAttr func(optionalAttr) - -// SetSizeValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SetSizeValidateIndices(value bool) SetSizeAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Number of unique elements along last dimension of input `set`. +// Concatenates a list of `SparseTensor` along the specified dimension. // -// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, -// and `set_shape`. The last dimension contains values in a set, duplicates are -// allowed but ignored. +// Concatenation is with respect to the dense versions of these sparse tensors. +// It is assumed that each input is a `SparseTensor` whose elements are ordered +// along increasing dimension number. // -// If `validate_indices` is `True`, this op validates the order and range of `set` -// indices. +// All inputs' shapes must match, except for the concat dimension. The +// `indices`, `values`, and `shapes` lists must have the same length. +// +// The output shape is identical to the inputs', except along the concat +// dimension, where it is the sum of the inputs' sizes along that dimension. +// +// The output elements will be resorted to preserve the sort order along +// increasing dimension number. +// +// This op runs in `O(M log M)` time, where `M` is the total number of non-empty +// values across all inputs. This is due to the need for an internal sort in +// order to concatenate efficiently across an arbitrary dimension. +// +// For example, if `concat_dim = 1` and the inputs are +// +// sp_inputs[0]: shape = [2, 3] +// [0, 2]: "a" +// [1, 0]: "b" +// [1, 1]: "c" +// +// sp_inputs[1]: shape = [2, 4] +// [0, 1]: "d" +// [0, 2]: "e" +// +// then the output will be +// +// shape = [2, 7] +// [0, 2]: "a" +// [0, 4]: "d" +// [0, 5]: "e" +// [1, 0]: "b" +// [1, 1]: "c" +// +// Graphically this is equivalent to doing +// +// [ a] concat [ d e ] = [ a d e ] +// [b c ] [ ] [b c ] // // Arguments: -// set_indices: 2D `Tensor`, indices of a `SparseTensor`. -// set_values: 1D `Tensor`, values of a `SparseTensor`. -// set_shape: 1D `Tensor`, shape of a `SparseTensor`. +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. Non-empty values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), +// where rank is the number of dimensions in each input `SparseTensor`. // -// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st -// `n-1` dimensions as `set`. Each value is the number of unique elements in -// the corresponding `[0...n-1]` dimension of `set`. -func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { +// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. +func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"concat_dim": concat_dim} opspec := tf.OpSpec{ - Type: "SetSize", + Type: "SparseConcat", Input: []tf.Input{ - set_indices, set_values, set_shape, + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes the sign and the log of the absolute value of the determinant of +// Generates sparse cross from a list of sparse and dense tensors. // -// one or more square matrices. +// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each +// representing features of one feature column. It outputs a 2D `SparseTensor` with +// the batchwise crosses of these features. // -// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions -// form square matrices. The outputs are two tensors containing the signs and -// absolute values of the log determinants for all N input submatrices -// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). -// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU -// is the LU decomposition of the input and P is the corresponding -// permutation matrix. +// For example, if the inputs are +// +// inputs[0]: SparseTensor with shape = [2, 2] +// [0, 0]: "a" +// [1, 0]: "b" +// [1, 1]: "c" +// +// inputs[1]: SparseTensor with shape = [2, 1] +// [0, 0]: "d" +// [1, 0]: "e" +// +// inputs[2]: Tensor [["f"], ["g"]] +// +// then the output will be +// +// shape = [2, 2] +// [0, 0]: "a_X_d_X_f" +// [1, 0]: "b_X_e_X_g" +// [1, 1]: "c_X_e_X_g" +// +// if hashed_output=true then the output will be +// +// shape = [2, 2] +// [0, 0]: FingerprintCat64( +// Fingerprint64("f"), FingerprintCat64( +// Fingerprint64("d"), Fingerprint64("a"))) +// [1, 0]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("b"))) +// [1, 1]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("c"))) // // Arguments: -// input: Shape is `[N, M, M]`. +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// dense_inputs: 2-D. Columns represented by dense `Tensor`. +// hashed_output: If true, returns the hash of the cross instead of the string. +// This will allow us avoiding string manipulations. +// num_buckets: It is used if hashed_output is true. +// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. +// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` +// function to combine the crosses fingerprints. // -// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants -// of the N input matrices. Shape is `[N]`. -func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { +// +// +// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated or hashed +// `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. +func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} opspec := tf.OpSpec{ - Type: "LogMatrixDeterminant", + Type: "SparseCross", Input: []tf.Input{ - input, + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2) } -// SumAttr is an optional argument to Sum. -type SumAttr func(optionalAttr) +// ListDiffAttr is an optional argument to ListDiff. +type ListDiffAttr func(optionalAttr) -// SumKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SumKeepDims(value bool) SumAttr { +// ListDiffOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func ListDiffOutIdx(value tf.DataType) ListDiffAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["out_idx"] = value } } -// Computes the sum of elements across dimensions of a tensor. +// Computes the difference between two lists of numbers or strings. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// Given a list `x` and a list `y`, this operation returns a list `out` that +// represents all values that are in `x` but not in `y`. The returned list `out` +// is sorted in the same order that the numbers appear in `x` (duplicates are +// preserved). This operation also returns a list `idx` that represents the +// position of each `out` element in `x`. In other words: +// +// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]` +// +// For example, given this input: +// +// ``` +// x = [1, 2, 3, 4, 5, 6] +// y = [1, 3, 5] +// ``` +// +// This operation would return: +// +// ``` +// out ==> [2, 4, 6] +// idx ==> [1, 3, 5] +// ``` // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// x: 1-D. Values to keep. +// y: 1-D. Values to remove. // -// Returns The reduced tensor. -func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { +// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`. +func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) { if scope.Err() != nil { return } @@ -13820,81 +13640,163 @@ func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (ou a(attrs) } opspec := tf.OpSpec{ - Type: "Sum", + Type: "ListDiff", Input: []tf.Input{ - input, axis, + x, y, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Delete the tensor specified by its handle in the session. +// Concatenates quantized tensors along one dimension. // // Arguments: -// handle: The handle for a tensor stored in the session state. +// concat_dim: 0-D. The dimension along which to concatenate. Must be in the +// range [0, rank(values)). +// values: The `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// input_mins: The minimum scalar values for each of the input tensors. +// input_maxes: The maximum scalar values for each of the input tensors. // -// Returns the created operation. -func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) { +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. +func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "DeleteSessionTensor", + Type: "QuantizedConcat", Input: []tf.Input{ - handle, + concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// L2 Loss. +// Slice a `SparseTensor` based on the `start` and `size`. // -// Computes half the L2 norm of a tensor without the `sqrt`: +// For example, if the input is // -// output = sum(t ** 2) / 2 +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] +// +// Graphically the output tensors are: +// +// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] +// [ a ] +// [b c ] +// +// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] +// [ d e ] +// [ ] // // Arguments: -// t: Typically 2-D, but may have any dimensions. +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// start: 1-D. tensor represents the start of the slice. +// size: 1-D. tensor represents the size of the slice. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. // -// Returns 0-D. -func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "L2Loss", + Type: "SparseSlice", Input: []tf.Input{ - t, + indices, values, shape, start, size, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. -type DenseToSparseSetOperationAttr func(optionalAttr) - -// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { +// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. +// +// This Op does not require `a_indices` be sorted in standard lexicographic order. +// +// Arguments: +// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. +// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. +// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. +// b: `ndims`-D Tensor. With shape `a_shape`. +func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseTensorDenseAdd", + Input: []tf.Input{ + a_indices, a_values, a_shape, b, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the set of files matching one or more glob patterns. +// +// Note that this routine only supports wildcard characters in the +// basename portion of the pattern, not in the directory portion. +// +// Arguments: +// pattern: Shell wildcard pattern(s). Scalar or vector of type string. +// +// Returns A vector of matching filenames. +func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MatchingFiles", + Input: []tf.Input{ + pattern, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. +type SparseToSparseSetOperationAttr func(optionalAttr) + +// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { return func(m optionalAttr) { m["validate_indices"] = value } } -// Applies set operation along last dimension of `Tensor` and `SparseTensor`. +// Applies set operation along last dimension of 2 `SparseTensor` inputs. // // See SetOperationOp::SetOperationFromContext for values of `set_operation`. // +// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the +// order and range of `set1` and `set2` indices. +// +// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, +// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same +// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// // Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, // and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same // as `set1`. Dimension `n` contains values in a set, duplicates are allowed but // ignored. // -// If `validate_indices` is `True`, this op validates the order and range of `set2` -// indices. +// If `validate_indices` is `True`, this op validates the order and range of `set1` +// and `set2` indices. // // Output `result` is a `SparseTensor` represented by `result_indices`, // `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this @@ -13903,21 +13805,26 @@ func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperat // `[0...n-1]` dimension of `set`. // // Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. +// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must +// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the +// max set size across `0...n-1` dimensions. // set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major // order. // set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major // order. // set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the -// max set size across `n-1` dimensions. +// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the +// max set size across `0...n-1` dimensions. // // // Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is // the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` // is the max result set size across all `0...n-1` dimensions. -func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } @@ -13926,9 +13833,9 @@ func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "DenseToSparseSetOperation", + Type: "SparseToSparseSetOperation", Input: []tf.Input{ - set1, set2_indices, set2_values, set2_shape, + set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, }, Attrs: attrs, } @@ -13936,58 +13843,66 @@ func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Out return op.Output(0), op.Output(1), op.Output(2) } -// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. -type FusedResizeAndPadConv2DAttr func(optionalAttr) - -// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. +// Computes numerical negative value element-wise. // -// value: If true, rescale input by (new_height - 1) / (height - 1), -// which exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. +// I.e., \\(y = -x\\). +func Neg(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Neg", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. +type FakeQuantWithMinMaxVarsAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. // If not specified, defaults to false -func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { +func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { return func(m optionalAttr) { - m["resize_align_corners"] = value + m["narrow_range"] = value } } -// Performs a resize and padding as a preprocess during a convolution. +// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` // -// It's often possible to do spatial transformations more efficiently as part of -// the packing stage of a convolution, so this op allows for an optimized -// implementation where these stages are fused together. This prevents the need to -// write out the intermediate results as whole tensors, reducing memory pressure, -// and we can get some latency gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and defaults to -// 'NHWC' order. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. +// and `max` to 'outputs' tensor of same shape as `inputs`. // -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. +// `[min; max]` define the clamping range for the `inputs` data. +// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` +// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and +// then de-quantized and output as floats in `[min; max]` interval. +// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. // -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { +// This operation has a gradient and thus allows for training `min` and `max` +// values. +func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FusedResizeAndPadConv2D", + Type: "FakeQuantWithMinMaxVars", Input: []tf.Input{ - input, size, paddings, filter, + inputs, min, max, }, Attrs: attrs, } @@ -13995,132 +13910,154 @@ func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, padd return op.Output(0) } -// Subtracts a value from the current value of a variable. -// -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. +// Returns the element-wise min of two SparseTensors. // -// Outputs the incremented value, which can be used to totally order the -// increments to this variable. +// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. // // Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, in the canonical lexicographic ordering. +// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. +// a_shape: 1-D. Shape of the input SparseTensor. +// b_indices: counterpart to `a_indices` for the other operand. +// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. +// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. // -// Returns the created operation. -func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { +// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. +func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "AssignSubVariableOp", + Type: "SparseSparseMinimum", Input: []tf.Input{ - resource, value, + a_indices, a_values, a_shape, b_indices, b_values, b_shape, }, } - return scope.AddOperation(opspec) -} - -// RestoreAttr is an optional argument to Restore. -type RestoreAttr func(optionalAttr) - -// RestorePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. -// If not specified, defaults to -1 -func RestorePreferredShard(value int64) RestoreAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// Restores a tensor from checkpoint files. -// -// Reads a tensor stored in one or several files. If there are several files (for -// instance because a tensor was saved as slices), `file_pattern` may contain -// wildcard symbols (`*` and `?`) in the filename portion only, not in the -// directory portion. -// -// If a `file_pattern` matches several files, `preferred_shard` can be used to hint -// in which file the requested tensor is likely to be found. This op will first -// open the file at index `preferred_shard` in the list of matching files and try -// to restore tensors from that file. Only if some tensors or tensor slices are -// not found in that first file, then the Op opens all the files. Setting -// `preferred_shard` to match the value passed as the `shard` input -// of a matching `Save` Op may speed up Restore. This attribute only affects -// performance, not correctness. The default value -1 means files are processed in -// order. +// Constructs a tensor by tiling a given tensor. // -// See also `RestoreSlice`. +// This operation creates a new tensor by replicating `input` `multiples` times. +// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements, +// and the values of `input` are replicated `multiples[i]` times along the 'i'th +// dimension. For example, tiling `[a b c d]` by `[2]` produces +// `[a b c d a b c d]`. // // Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { +// input: 1-D or higher. +// multiples: 1-D. Length must be the same as the number of dimensions in `input` +func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Restore", + Type: "Tile", Input: []tf.Input{ - file_pattern, tensor_name, + input, multiples, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. -type QuantizedResizeBilinearAttr func(optionalAttr) +// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap. +type TakeManySparseFromTensorsMapAttr func(optionalAttr) -// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value. // -// value: If true, rescale input by (new_height - 1) / (height - 1), which -// exactly aligns the 4 corners of images and resized images. If false, rescale -// by new_height / height. Treat similarly the width dimension. -// If not specified, defaults to false -func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { +// value: The container name for the `SparseTensorsMap` read by this op. +// If not specified, defaults to "" +func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["container"] = value } } -// Resize quantized `images` to `size` using quantized bilinear interpolation. +// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value. // -// Input images and output images must be quantized types. +// value: The shared name for the `SparseTensorsMap` read by this op. +// It should not be blank; rather the `shared_name` or unique Operation name +// of the Op that created the original `SparseTensorsMap` should be used. +// If not specified, defaults to "" +func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them. // -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where +// `N` is the minibatch size and the rows correspond to the output handles of +// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`. The ranks of the +// original `SparseTensor` objects that went into the given input ops must all +// match. When the final `SparseTensor` is created, it has rank one +// higher than the ranks of the incoming `SparseTensor` objects +// (they have been concatenated along a new row dimension on the left). +// +// The output `SparseTensor` object's shape values for all dimensions but the +// first are the max across the input `SparseTensor` objects' shape values +// for the corresponding dimensions. Its first shape value is `N`, the minibatch +// size. // +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. // +// For example, if the handles represent an input, which is a `[2, 3]` matrix +// representing two original `SparseTensor` objects: // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { +// ``` +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] +// ``` +// +// and +// +// ``` +// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] +// ``` +// +// then the final `SparseTensor` will be: +// +// ``` +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] +// ``` +// +// Arguments: +// sparse_handles: 1-D, The `N` serialized `SparseTensor` objects. +// Shape: `[N]`. +// dtype: The `dtype` of the `SparseTensor` objects stored in the +// `SparseTensorsMap`. +// +// Returns 2-D. The `indices` of the minibatch `SparseTensor`.1-D. The `values` of the minibatch `SparseTensor`.1-D. The `shape` of the minibatch `SparseTensor`. +func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedResizeBilinear", + Type: "TakeManySparseFromTensorsMap", Input: []tf.Input{ - images, size, min, max, + sparse_handles, }, Attrs: attrs, } @@ -14128,182 +14065,164 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min return op.Output(0), op.Output(1), op.Output(2) } -// Computes the minimum along segments of a tensor. +// Says whether the targets are in the top `K` predictions. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. // -// Computes a tensor such that -// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such -// that `segment_ids[j] == i`. +// More formally, let // -// If the min is empty for a given segment ID `i`, `output[i] = 0`. +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, // -//
-// -//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ // // Arguments: +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// Returns Computed precision at `k` as a `bool Tensor`. +func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMin", + Type: "InTopKV2", Input: []tf.Input{ - data, segment_ids, + predictions, targets, k, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. -type SdcaOptimizerAttr func(optionalAttr) - -// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. +// Assigns a new value to a variable. // -// value: Whether to use Adapative SDCA for the inner loop. -// If not specified, defaults to false -func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { - return func(m optionalAttr) { - m["adaptative"] = value +// Any ReadVariableOp with a control dependency on this op is guaranteed to return +// this value or a subsequent newer value of the variable. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value to set the new tensor to use. +// +// Returns the created operation. +func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AssignVariableOp", + Input: []tf.Input{ + resource, value, + }, } + return scope.AddOperation(opspec) } -// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for -// -// linear models with L1 + L2 regularization. As global optimization objective is -// strongly-convex, the optimizer optimizes the dual objective at each step. The -// optimizer applies each update one example at a time. Examples are sampled -// uniformly, and the optimizer is learning rate free and enjoys linear convergence -// rate. +// Returns a tensor of ones with the same shape and type as x. // -// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
-// Shai Shalev-Shwartz, Tong Zhang. 2012 +// Arguments: +// x: a tensor of type T. // -// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ +// Returns a tensor of the same shape and type as x but filled with ones. +func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "OnesLike", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// The gradient of SparseFillEmptyRows. // -// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, -// Peter Richtarik, Martin Takac. 2015 +// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, +// shaped `[N_full]`, where `N_full >= N` and copies data into either +// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and +// `d_default_value` is a scalar. // -// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 +// d_values[j] = grad_values[reverse_index_map[j]] +// d_default_value = sum_{k : 0 .. N_full - 1} ( +// grad_values[k] * 1{k not in reverse_index_map}) // // Arguments: -// sparse_example_indices: a list of vectors which contain example indices. -// sparse_feature_indices: a list of vectors which contain feature indices. -// sparse_feature_values: a list of vectors which contains feature value -// associated with each feature group. -// dense_features: a list of matrices which contains the dense feature values. -// example_weights: a vector which contains the weight associated with each -// example. -// example_labels: a vector which contains the label/target associated with each -// example. -// sparse_indices: a list of vectors where each value is the indices which has -// corresponding weights in sparse_weights. This field maybe omitted for the -// dense approach. -// sparse_weights: a list of vectors where each value is the weight associated with -// a sparse feature group. -// dense_weights: a list of vectors where the values are the weights associated -// with a dense feature group. -// example_state_data: a list of vectors containing the example state data. -// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, -// squared and hinge losses. -// l1: Symmetric l1 regularization strength. -// l2: Symmetric l2 regularization strength. -// num_loss_partitions: Number of partitions of the global loss function. -// num_inner_iterations: Number of iterations per mini-batch. +// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. +// grad_values: 1-D. The gradients from backprop. // -// Returns a list of vectors containing the updated example state -// data.a list of vectors where each value is the delta -// weights associated with a sparse feature group.a list of vectors where the values are the delta -// weights associated with a dense feature group. -func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { +// Returns 1-D. The backprop into values.0-D. The backprop into default_value. +func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "SdcaOptimizer", + Type: "SparseFillEmptyRowsGrad", Input: []tf.Input{ - tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, + reverse_index_map, grad_values, }, - Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` +// +// if < 0, `scale * features` otherwise. +// +// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) +func Selu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - out_example_state_data = op.Output(idx) - if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights -} - -// SparseMatMulAttr is an optional argument to SparseMatMul. -type SparseMatMulAttr func(optionalAttr) - -// SparseMatMulTransposeA sets the optional transpose_a attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeA(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// SparseMatMulTransposeB sets the optional transpose_b attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeB(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value + opspec := tf.OpSpec{ + Type: "Selu", + Input: []tf.Input{ + features, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["a_is_sparse"] = value - } -} +// SetSizeAttr is an optional argument to SetSize. +type SetSizeAttr func(optionalAttr) -// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { +// SetSizeValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SetSizeValidateIndices(value bool) SetSizeAttr { return func(m optionalAttr) { - m["b_is_sparse"] = value + m["validate_indices"] = value } } -// Multiply matrix "a" by matrix "b". +// Number of unique elements along last dimension of input `set`. // -// The inputs must be two-dimensional matrices and the inner dimension of "a" must -// match the outer dimension of "b". This op is optimized for the case where at -// least one of "a" or "b" is sparse. The breakeven for using this versus a dense -// matrix multiply on one platform was 30% zero values in the sparse matrix. +// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, +// and `set_shape`. The last dimension contains values in a set, duplicates are +// allowed but ignored. // -// The gradient computation of this operation will only take advantage of sparsity -// in the input gradient when that gradient comes from a Relu. -func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { +// If `validate_indices` is `True`, this op validates the order and range of `set` +// indices. +// +// Arguments: +// set_indices: 2D `Tensor`, indices of a `SparseTensor`. +// set_values: 1D `Tensor`, values of a `SparseTensor`. +// set_shape: 1D `Tensor`, shape of a `SparseTensor`. +// +// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st +// `n-1` dimensions as `set`. Each value is the number of unique elements in +// the corresponding `[0...n-1]` dimension of `set`. +func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { if scope.Err() != nil { return } @@ -14312,9 +14231,9 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM a(attrs) } opspec := tf.OpSpec{ - Type: "SparseMatMul", + Type: "SetSize", Input: []tf.Input{ - a, b, + set_indices, set_values, set_shape, }, Attrs: attrs, } @@ -14322,52 +14241,64 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM return op.Output(0) } -// Computes the power of one value to another. +// Computes the sign and the log of the absolute value of the determinant of // -// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for -// corresponding elements in `x` and `y`. For example: +// one or more square matrices. // -// ``` -// # tensor 'x' is [[2, 2]], [3, 3]] -// # tensor 'y' is [[8, 16], [2, 3]] -// tf.pow(x, y) ==> [[256, 65536], [9, 27]] -// ``` -func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions +// form square matrices. The outputs are two tensors containing the signs and +// absolute values of the log determinants for all N input submatrices +// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). +// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU +// is the LU decomposition of the input and P is the corresponding +// permutation matrix. +// +// Arguments: +// input: Shape is `[N, M, M]`. +// +// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants +// of the N input matrices. Shape is `[N]`. +func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Pow", + Type: "LogMatrixDeterminant", Input: []tf.Input{ - x, y, + input, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// ShapeAttr is an optional argument to Shape. -type ShapeAttr func(optionalAttr) +// SumAttr is an optional argument to Sum. +type SumAttr func(optionalAttr) -// ShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func ShapeOutType(value tf.DataType) ShapeAttr { +// SumKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SumKeepDims(value bool) SumAttr { return func(m optionalAttr) { - m["out_type"] = value + m["keep_dims"] = value } } -// Returns the shape of a tensor. +// Computes the sum of elements across dimensions of a tensor. // -// This operation returns a 1-D integer tensor representing the shape of `input`. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // -// For example: +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { +// Returns The reduced tensor. +func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -14376,9 +14307,9 @@ func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Outp a(attrs) } opspec := tf.OpSpec{ - Type: "Shape", + Type: "Sum", Input: []tf.Input{ - input, + input, axis, }, Attrs: attrs, } @@ -14386,203 +14317,191 @@ func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Outp return op.Output(0) } -// Computes fingerprints of the input strings. +// Delete the tensor specified by its handle in the session. // // Arguments: -// input: vector of strings to compute fingerprints on. +// handle: The handle for a tensor stored in the session state. // -// Returns a (N,2) shaped matrix where N is the number of elements in the input -// vector. Each row contains the low and high parts of the fingerprint. -func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { +// Returns the created operation. +func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SdcaFprint", + Type: "DeleteSessionTensor", Input: []tf.Input{ - input, + handle, }, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. -type RandomPoissonV2Attr func(optionalAttr) - -// RandomPoissonV2Seed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// RandomPoissonV2Dtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT64 -func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["dtype"] = value - } + return scope.AddOperation(opspec) } -// Outputs random values from the Poisson distribution(s) described by rate. +// L2 Loss. // -// This op uses two algorithms, depending on rate. If rate >= 10, then -// the algorithm by Hormann is used to acquire samples via -// transformation-rejection. -// See http://www.sciencedirect.com/science/article/pii/0167668793909974. +// Computes half the L2 norm of a tensor without the `sqrt`: // -// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform -// random variables. -// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer -// Programming, Volume 2. Addison Wesley +// output = sum(t ** 2) / 2 // // Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in rate. -// rate: A tensor in which each scalar is a "rate" parameter describing the -// associated poisson distribution. +// t: Typically 2-D, but may have any dimensions. // -// Returns A tensor with shape `shape + shape(rate)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `rate[i0, i1, ...iN]`. -func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { +// Returns 0-D. +func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "RandomPoissonV2", + Type: "L2Loss", Input: []tf.Input{ - shape, rate, + t, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. -type MatrixTriangularSolveAttr func(optionalAttr) +// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. +type DenseToSparseSetOperationAttr func(optionalAttr) -// MatrixTriangularSolveLower sets the optional lower attribute to value. -// -// value: Boolean indicating whether the innermost matrices in `matrix` are -// lower or upper triangular. +// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. // If not specified, defaults to true -func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { +func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { return func(m optionalAttr) { - m["lower"] = value + m["validate_indices"] = value } } -// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. +// Applies set operation along last dimension of `Tensor` and `SparseTensor`. // -// @compatibility(numpy) -// Equivalent to np.linalg.triangular_solve -// @end_compatibility -// If not specified, defaults to false -func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Solves systems of linear equations with upper or lower triangular matrices by +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. // -// backsubstitution. +// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, +// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same +// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. // -// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form -// square matrices. If `lower` is `True` then the strictly upper triangular part -// of each inner-most matrix is assumed to be zero and not accessed. -// If `lower` is False then the strictly lower triangular part of each inner-most -// matrix is assumed to be zero and not accessed. -// `rhs` is a tensor of shape `[..., M, K]`. +// If `validate_indices` is `True`, this op validates the order and range of `set2` +// indices. // -// The output is a tensor of shape `[..., M, K]`. If `adjoint` is -// `True` then the innermost matrices in `output` satisfy matrix equations -// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `False` then the strictly then the innermost matrices in -// `output` satisfy matrix equations -// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. +// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. +// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must +// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the +// max set size across `n-1` dimensions. // -// Returns Shape is `[..., M, K]`. -func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { +// +// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"set_operation": set_operation} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixTriangularSolve", + Type: "DenseToSparseSetOperation", Input: []tf.Input{ - matrix, rhs, + set1, set2_indices, set2_values, set2_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes inverse hyperbolic sine of x element-wise. -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { +// Subtracts a value from the current value of a variable. +// +// Any ReadVariableOp which depends directly or indirectly on this assign is +// guaranteed to see the incremented value or a subsequent newer one. +// +// Outputs the incremented value, which can be used to totally order the +// increments to this variable. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Asinh", + Type: "AssignSubVariableOp", Input: []tf.Input{ - x, + resource, value, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Creates a dataset with a range of values. Corresponds to python's xrange. +// RestoreAttr is an optional argument to Restore. +type RestoreAttr func(optionalAttr) + +// RestorePreferredShard sets the optional preferred_shard attribute to value. // -// Arguments: -// start: corresponds to start in python's xrange(). -// stop: corresponds to stop in python's xrange(). -// step: corresponds to step in python's xrange(). +// value: Index of file to open first if multiple files match +// `file_pattern`. +// If not specified, defaults to -1 +func RestorePreferredShard(value int64) RestoreAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. // +// Reads a tensor stored in one or several files. If there are several files (for +// instance because a tensor was saved as slices), `file_pattern` may contain +// wildcard symbols (`*` and `?`) in the filename portion only, not in the +// directory portion. // -func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// If a `file_pattern` matches several files, `preferred_shard` can be used to hint +// in which file the requested tensor is likely to be found. This op will first +// open the file at index `preferred_shard` in the list of matching files and try +// to restore tensors from that file. Only if some tensors or tensor slices are +// not found in that first file, then the Op opens all the files. Setting +// `preferred_shard` to match the value passed as the `shard` input +// of a matching `Save` Op may speed up Restore. This attribute only affects +// performance, not correctness. The default value -1 means files are processed in +// order. +// +// See also `RestoreSlice`. +// +// Arguments: +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RangeDataset", + Type: "Restore", Input: []tf.Input{ - start, stop, step, + file_pattern, tensor_name, }, Attrs: attrs, } @@ -14590,212 +14509,229 @@ func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, return op.Output(0) } -// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. -type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) +// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. +type QuantizedResizeBilinearAttr func(optionalAttr) -// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. +// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { +// value: If true, rescale input by (new_height - 1) / (height - 1), which +// exactly aligns the 4 corners of images and resized images. If false, rescale +// by new_height / height. Treat similarly the width dimension. +// If not specified, defaults to false +func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { return func(m optionalAttr) { - m["data_format"] = value + m["align_corners"] = value } } -// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. +// Resize quantized `images` to `size` using quantized bilinear interpolation. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of depthwise convolution with respect to the input. +// Input images and output images must be quantized types. // // Arguments: -// input_sizes: An integer vector representing the shape of `input`, based -// on `data_format`. For example, if `data_format` is 'NHWC' then -// `input` is a 4-D `[batch, height, width, channels]` tensor. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns 4-D with shape according to `data_format`. For example, if -// `data_format` is 'NHWC', output shape is `[batch, in_height, -// in_width, in_channels]`. Gradient w.r.t. the input of the -// convolution. -func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { +// +// +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropInput", + Type: "QuantizedResizeBilinear", Input: []tf.Input{ - input_sizes, filter, out_backprop, + images, size, min, max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Adds sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] += updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] += updates[i, ...] +// Computes the minimum along segments of a tensor. // -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. // -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. +// Computes a tensor such that +// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such +// that `segment_ids[j] == i`. // -// Requires `updates.shape = indices.shape + ref.shape[1:]`. +// If the min is empty for a given segment ID `i`, `output[i] = 0`. // //
-// +// //
// // Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. // -// Returns the created operation. -func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterAdd", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// Computes the gradient for the inverse of `x` wrt its input. +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. // -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReciprocalGrad", + Type: "SegmentMin", Input: []tf.Input{ - y, dy, + data, segment_ids, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the min of x and y (i.e. x < y ? x : y) element-wise. +// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. +type SdcaOptimizerAttr func(optionalAttr) + +// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. // -// *NOTE*: `Minimum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Minimum", +// value: Whether to use Adapative SDCA for the inner loop. +// If not specified, defaults to false +func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { + return func(m optionalAttr) { + m["adaptative"] = value + } +} + +// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for +// +// linear models with L1 + L2 regularization. As global optimization objective is +// strongly-convex, the optimizer optimizes the dual objective at each step. The +// optimizer applies each update one example at a time. Examples are sampled +// uniformly, and the optimizer is learning rate free and enjoys linear convergence +// rate. +// +// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
+// Shai Shalev-Shwartz, Tong Zhang. 2012 +// +// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ +// +// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, +// Peter Richtarik, Martin Takac. 2015 +// +// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 +// +// Arguments: +// sparse_example_indices: a list of vectors which contain example indices. +// sparse_feature_indices: a list of vectors which contain feature indices. +// sparse_feature_values: a list of vectors which contains feature value +// associated with each feature group. +// dense_features: a list of matrices which contains the dense feature values. +// example_weights: a vector which contains the weight associated with each +// example. +// example_labels: a vector which contains the label/target associated with each +// example. +// sparse_indices: a list of vectors where each value is the indices which has +// corresponding weights in sparse_weights. This field maybe omitted for the +// dense approach. +// sparse_weights: a list of vectors where each value is the weight associated with +// a sparse feature group. +// dense_weights: a list of vectors where the values are the weights associated +// with a dense feature group. +// example_state_data: a list of vectors containing the example state data. +// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, +// squared and hinge losses. +// l1: Symmetric l1 regularization strength. +// l2: Symmetric l2 regularization strength. +// num_loss_partitions: Number of partitions of the global loss function. +// num_inner_iterations: Number of iterations per mini-batch. +// +// Returns a list of vectors containing the updated example state +// data.a list of vectors where each value is the delta +// weights associated with a sparse feature group.a list of vectors where the values are the delta +// weights associated with a dense feature group. +func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SdcaOptimizer", Input: []tf.Input{ - x, y, + tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + out_example_state_data = op.Output(idx) + if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { + scope.UpdateErr("SdcaOptimizer", err) + return + } + if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { + scope.UpdateErr("SdcaOptimizer", err) + return + } + return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights } -// MfccAttr is an optional argument to Mfcc. -type MfccAttr func(optionalAttr) +// SparseMatMulAttr is an optional argument to SparseMatMul. +type SparseMatMulAttr func(optionalAttr) -// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. -// -// value: The highest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 4000 -func MfccUpperFrequencyLimit(value float32) MfccAttr { +// SparseMatMulTransposeA sets the optional transpose_a attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeA(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["upper_frequency_limit"] = value + m["transpose_a"] = value } } -// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. -// -// value: The lowest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 20 -func MfccLowerFrequencyLimit(value float32) MfccAttr { +// SparseMatMulTransposeB sets the optional transpose_b attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeB(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["lower_frequency_limit"] = value + m["transpose_b"] = value } } -// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. -// -// value: Resolution of the Mel bank used internally. -// If not specified, defaults to 40 -func MfccFilterbankChannelCount(value int64) MfccAttr { +// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["filterbank_channel_count"] = value + m["a_is_sparse"] = value } } -// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. -// -// value: How many output channels to produce per time slice. -// If not specified, defaults to 13 -func MfccDctCoefficientCount(value int64) MfccAttr { +// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["dct_coefficient_count"] = value + m["b_is_sparse"] = value } } -// Transforms a spectrogram into a form that's useful for speech recognition. +// Multiply matrix "a" by matrix "b". // -// Mel Frequency Cepstral Coefficients are a way of representing audio data that's -// been effective as an input feature for machine learning. They are created by -// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the -// higher frequencies that are less significant to the human ear. They have a long -// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum -// is a good resource to learn more. +// The inputs must be two-dimensional matrices and the inner dimension of "a" must +// match the outer dimension of "b". This op is optimized for the case where at +// least one of "a" or "b" is sparse. The breakeven for using this versus a dense +// matrix multiply on one platform was 30% zero values in the sparse matrix. // -// Arguments: -// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared -// set to true. -// sample_rate: How many samples per second the source audio used. -func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { +// The gradient computation of this operation will only take advantage of sparsity +// in the input gradient when that gradient comes from a Relu. +func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { if scope.Err() != nil { return } @@ -14804,9 +14740,9 @@ func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional . a(attrs) } opspec := tf.OpSpec{ - Type: "Mfcc", + Type: "SparseMatMul", Input: []tf.Input{ - spectrogram, sample_rate, + a, b, }, Attrs: attrs, } @@ -14814,244 +14750,259 @@ func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional . return op.Output(0) } -// Returns the element-wise sum of a list of tensors. -// -// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not -// wait for all of its inputs to be ready before beginning to sum. This can -// save memory if inputs are ready at different times, since minimum temporary -// storage is proportional to the output size rather than the inputs size. -// -// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. +// MultinomialAttr is an optional argument to Multinomial. +type MultinomialAttr func(optionalAttr) + +// MultinomialSeed sets the optional seed attribute to value. // -// Returns a `Tensor` of same shape and type as the elements of `inputs`. +// value: If either seed or seed2 is set to be non-zero, the internal random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func MultinomialSeed(value int64) MultinomialAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// MultinomialSeed2 sets the optional seed2 attribute to value. // -// Arguments: -// inputs: A list of `Tensor` objects, each with same shape and type. -// shape: Shape of elements of `inputs`. -func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { - if scope.Err() != nil { - return +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func MultinomialSeed2(value int64) MultinomialAttr { + return func(m optionalAttr) { + m["seed2"] = value } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "AccumulateNV2", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, +} + +// MultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { + return func(m optionalAttr) { + m["output_dtype"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Convert the quantized 'input' tensor into a lower-precision 'output', using the -// -// actual distribution of the values to maximize the usage of the lower bit depth -// and adjusting the output min and max ranges accordingly. -// -// [input_min, input_max] are scalar floats that specify the range for the float -// interpretation of the 'input' data. For example, if input_min is -1.0f and -// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. -// -// This operator tries to squeeze as much precision as possible into an output with -// a lower bit depth by calculating the actual min and max values found in the -// data. For example, maybe that quint16 input has no values lower than 16,384 and -// none higher than 49,152. That means only half the range is actually needed, all -// the float interpretations are between -0.5f and 0.5f, so if we want to compress -// the data into a quint8 output, we can use that range rather than the theoretical -// -1.0f to 1.0f that is suggested by the input min and max. -// -// In practice, this is most useful for taking output from operations like -// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and -// may have large potential output ranges, but in practice have a distribution of -// input values that only uses a small fraction of the possible range. By feeding -// that output into this operator, we can reduce it from 32 bits down to 8 with -// minimal loss of accuracy. +// Draws samples from a multinomial distribution. // // Arguments: +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. // -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. -// -// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. -func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "QuantizeDownAndShrinkRange", + Type: "Multinomial", Input: []tf.Input{ - input, input_min, input_max, + logits, num_samples, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// RandomGammaAttr is an optional argument to RandomGamma. -type RandomGammaAttr func(optionalAttr) +// EncodeJpegAttr is an optional argument to EncodeJpeg. +type EncodeJpegAttr func(optionalAttr) -// RandomGammaSeed sets the optional seed attribute to value. +// EncodeJpegFormat sets the optional format attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomGammaSeed(value int64) RandomGammaAttr { +// value: Per pixel image format. +// If not specified, defaults to "" +func EncodeJpegFormat(value string) EncodeJpegAttr { return func(m optionalAttr) { - m["seed"] = value + m["format"] = value } } -// RandomGammaSeed2 sets the optional seed2 attribute to value. +// EncodeJpegQuality sets the optional quality attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomGammaSeed2(value int64) RandomGammaAttr { +// value: Quality of the compression from 0 to 100 (higher is better and slower). +// If not specified, defaults to 95 +func EncodeJpegQuality(value int64) EncodeJpegAttr { return func(m optionalAttr) { - m["seed2"] = value + m["quality"] = value } } -// Outputs random values from the Gamma distribution(s) described by alpha. -// -// This op uses the algorithm by Marsaglia et al. to acquire samples via -// transformation-rejection from pairs of uniform and normal random variables. -// See http://dl.acm.org/citation.cfm?id=358414 +// EncodeJpegProgressive sets the optional progressive attribute to value. // -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in alpha. -// alpha: A tensor in which each scalar is a "shape" parameter describing the -// associated gamma distribution. +// value: If True, create a JPEG that loads progressively (coarse to fine). +// If not specified, defaults to false +func EncodeJpegProgressive(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["progressive"] = value + } +} + +// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. // -// Returns A tensor with shape `shape + shape(alpha)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. -func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { - if scope.Err() != nil { - return +// value: If True, spend CPU/RAM to reduce size with no quality change. +// If not specified, defaults to false +func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["optimize_size"] = value } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +} + +// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. +// +// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. +// If not specified, defaults to true +func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["chroma_downsampling"] = value } - opspec := tf.OpSpec{ - Type: "RandomGamma", - Input: []tf.Input{ - shape, alpha, - }, - Attrs: attrs, +} + +// EncodeJpegDensityUnit sets the optional density_unit attribute to value. +// +// value: Unit used to specify `x_density` and `y_density`: +// pixels per inch (`'in'`) or centimeter (`'cm'`). +// If not specified, defaults to "in" +func EncodeJpegDensityUnit(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["density_unit"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. -type QuantizedConv2DAttr func(optionalAttr) +// EncodeJpegXDensity sets the optional x_density attribute to value. +// +// value: Horizontal pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegXDensity(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["x_density"] = value + } +} -// QuantizedConv2DOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { +// EncodeJpegYDensity sets the optional y_density attribute to value. +// +// value: Vertical pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegYDensity(value int64) EncodeJpegAttr { return func(m optionalAttr) { - m["out_type"] = value + m["y_density"] = value } } -// QuantizedConv2DDilations sets the optional dilations attribute to value. +// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { +// value: If not empty, embed this XMP metadata in the image header. +// If not specified, defaults to "" +func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { return func(m optionalAttr) { - m["dilations"] = value + m["xmp_metadata"] = value } } -// Computes a 2D convolution given quantized 4D input and filter tensors. +// JPEG-encode an image. // -// The inputs are quantized tensors where the lowest value represents the real -// number of the associated minimum, and the highest represents the maximum. -// This means that you can only interpret the quantized output in the same way, by -// taking the returned minimum and maximum values into account. +// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. // -// Arguments: +// The attr `format` can be used to override the color format of the encoded +// output. Values can be: // -// filter: filter's input_depth dimension must match input's depth dimensions. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_filter: The float value that the lowest quantized filter value represents. -// max_filter: The float value that the highest quantized filter value represents. -// strides: The stride of the sliding window for each dimension of the input -// tensor. -// padding: The type of padding algorithm to use. +// * `''`: Use a default format based on the number of channels in the image. +// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension +// of `image` must be 1. +// * `rgb`: Output an RGB JPEG image. The `channels` dimension +// of `image` must be 3. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { +// If `format` is not specified or is the empty string, a default format is picked +// in function of the number of channels in `image`: +// +// * 1: Output a grayscale image. +// * 3: Output an RGB image. +// +// Arguments: +// image: 3-D with shape `[height, width, channels]`. +// +// Returns 0-D. JPEG-encoded image. +func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedConv2D", + Type: "EncodeJpeg", Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, + image, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// ResourceGatherAttr is an optional argument to ResourceGather. -type ResourceGatherAttr func(optionalAttr) +// Computes the power of one value to another. +// +// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for +// corresponding elements in `x` and `y`. For example: +// +// ``` +// # tensor 'x' is [[2, 2]], [3, 3]] +// # tensor 'y' is [[8, 16], [2, 3]] +// tf.pow(x, y) ==> [[256, 65536], [9, 27]] +// ``` +func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Pow", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { +// ShapeAttr is an optional argument to Shape. +type ShapeAttr func(optionalAttr) + +// ShapeOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func ShapeOutType(value tf.DataType) ShapeAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["out_type"] = value } } -// Gather slices from the variable pointed to by `resource` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: +// Returns the shape of a tensor. // -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] +// This operation returns a 1-D integer tensor representing the shape of `input`. // -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] +// For example: // -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] // ``` -func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// shape(t) ==> [2, 2, 3] +// ``` +func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceGather", + Type: "Shape", Input: []tf.Input{ - resource, indices, + input, }, Attrs: attrs, } @@ -15059,70 +15010,82 @@ func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype t return op.Output(0) } -// Delete the TensorArray from its resource container. -// -// This enables the user to close and release the resource in the middle -// of a step/run. +// Computes fingerprints of the input strings. // // Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// input: vector of strings to compute fingerprints on. // -// Returns the created operation. -func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { +// Returns a (N,2) shaped matrix where N is the number of elements in the input +// vector. Each row contains the low and high parts of the fingerprint. +func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayCloseV3", + Type: "SdcaFprint", Input: []tf.Input{ - handle, + input, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomUniformIntAttr is an optional argument to RandomUniformInt. -type RandomUniformIntAttr func(optionalAttr) +// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. +type RandomPoissonV2Attr func(optionalAttr) -// RandomUniformIntSeed sets the optional seed attribute to value. +// RandomPoissonV2Seed sets the optional seed attribute to value. // // value: If either `seed` or `seed2` are set to be non-zero, the random number // generator is seeded by the given seed. Otherwise, it is seeded by a // random seed. // If not specified, defaults to 0 -func RandomUniformIntSeed(value int64) RandomUniformIntAttr { +func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { return func(m optionalAttr) { m["seed"] = value } } -// RandomUniformIntSeed2 sets the optional seed2 attribute to value. +// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. // // value: A second seed to avoid seed collision. // If not specified, defaults to 0 -func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { +func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { return func(m optionalAttr) { m["seed2"] = value } } -// Outputs random integers from a uniform distribution. +// RandomPoissonV2Dtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT64 +func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs random values from the Poisson distribution(s) described by rate. // -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. +// This op uses two algorithms, depending on rate. If rate >= 10, then +// the algorithm by Hormann is used to acquire samples via +// transformation-rejection. +// See http://www.sciencedirect.com/science/article/pii/0167668793909974. // -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). +// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform +// random variables. +// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer +// Programming, Volume 2. Addison Wesley // // Arguments: -// shape: The shape of the output tensor. -// minval: 0-D. Inclusive lower bound on the generated integers. -// maxval: 0-D. Exclusive upper bound on the generated integers. +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in rate. +// rate: A tensor in which each scalar is a "rate" parameter describing the +// associated poisson distribution. // -// Returns A tensor of the specified shape filled with uniform random integers. -func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { +// Returns A tensor with shape `shape + shape(rate)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `rate[i0, i1, ...iN]`. +func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -15131,9 +15094,9 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf a(attrs) } opspec := tf.OpSpec{ - Type: "RandomUniformInt", + Type: "RandomPoissonV2", Input: []tf.Input{ - shape, minval, maxval, + shape, rate, }, Attrs: attrs, } @@ -15141,98 +15104,109 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf return op.Output(0) } -// SkipgramAttr is an optional argument to Skipgram. -type SkipgramAttr func(optionalAttr) +// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. +type MatrixTriangularSolveAttr func(optionalAttr) -// SkipgramWindowSize sets the optional window_size attribute to value. +// MatrixTriangularSolveLower sets the optional lower attribute to value. // -// value: The number of words to predict to the left and right of the target. -// If not specified, defaults to 5 -func SkipgramWindowSize(value int64) SkipgramAttr { +// value: Boolean indicating whether the innermost matrices in `matrix` are +// lower or upper triangular. +// If not specified, defaults to true +func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { return func(m optionalAttr) { - m["window_size"] = value + m["lower"] = value } } -// SkipgramMinCount sets the optional min_count attribute to value. +// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. // -// value: The minimum number of word occurrences for it to be included in the -// vocabulary. -// If not specified, defaults to 5 -func SkipgramMinCount(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["min_count"] = value - } -} - -// SkipgramSubsample sets the optional subsample attribute to value. +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. // -// value: Threshold for word occurrence. Words that appear with higher -// frequency will be randomly down-sampled. Set to 0 to disable. -// If not specified, defaults to 0.001 -func SkipgramSubsample(value float32) SkipgramAttr { +// @compatibility(numpy) +// Equivalent to np.linalg.triangular_solve +// @end_compatibility +// If not specified, defaults to false +func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { return func(m optionalAttr) { - m["subsample"] = value + m["adjoint"] = value } } -// Parses a text file and creates a batch of examples. +// Solves systems of linear equations with upper or lower triangular matrices by // -// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result +// backsubstitution. +// +// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form +// square matrices. If `lower` is `True` then the strictly upper triangular part +// of each inner-most matrix is assumed to be zero and not accessed. +// If `lower` is False then the strictly lower triangular part of each inner-most +// matrix is assumed to be zero and not accessed. +// `rhs` is a tensor of shape `[..., M, K]`. +// +// The output is a tensor of shape `[..., M, K]`. If `adjoint` is +// `True` then the innermost matrices in `output` satisfy matrix equations +// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `False` then the strictly then the innermost matrices in +// `output` satisfy matrix equations +// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. // // Arguments: -// filename: The corpus's text file name. -// batch_size: The size of produced batch. +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. // -// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. -func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { +// Returns Shape is `[..., M, K]`. +func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Skipgram", - + Type: "MatrixTriangularSolve", + Input: []tf.Input{ + matrix, rhs, + }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) + return op.Output(0) } -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. -// -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { - return func(m optionalAttr) { - m["out_type"] = value +// Computes inverse hyperbolic sine of x element-wise. +func Asinh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Asinh", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Converts each string in the input Tensor to the specified numeric type. +// Creates a dataset with a range of values. Corresponds to python's xrange. // -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) +// Arguments: +// start: corresponds to start in python's xrange(). +// stop: corresponds to stop in python's xrange(). +// step: corresponds to step in python's xrange(). // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { +// +func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "StringToNumber", + Type: "RangeDataset", Input: []tf.Input{ - string_tensor, + start, stop, step, }, Attrs: attrs, } @@ -15240,271 +15214,253 @@ func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToN return op.Output(0) } -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) +// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. +type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["data_format"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. // -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of depthwise convolution with respect to the input. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. +// input_sizes: An integer vector representing the shape of `input`, based +// on `data_format`. For example, if `data_format` is 'NHWC' then +// `input` is a 4-D `[batch, height, width, channels]` tensor. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. // -// Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { +// Returns 4-D with shape according to `data_format`. For example, if +// `data_format` is 'NHWC', output shape is `[batch, in_height, +// in_width, in_channels]`. Gradient w.r.t. the input of the +// convolution. +func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", + Type: "DepthwiseConv2dNativeBackpropInput", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + input_sizes, filter, out_backprop, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// TruncatedNormalAttr is an optional argument to TruncatedNormal. -type TruncatedNormalAttr func(optionalAttr) - -// TruncatedNormalSeed sets the optional seed attribute to value. +// Adds sparse updates to the variable referenced by `resource`. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func TruncatedNormalSeed(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// TruncatedNormalSeed2 sets the optional seed2 attribute to value. +// This operation computes // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a truncated normal distribution. +// # Scalar indices +// ref[indices, ...] += updates[...] // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// # Vector indices (for each i) +// ref[indices[i], ...] += updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions add. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]`. +// +//
+// +//
// // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. // -// Returns A tensor of the specified shape filled with random truncated normal -// values. -func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { +// Returns the created operation. +func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TruncatedNormal", + Type: "ResourceScatterAdd", Input: []tf.Input{ - shape, + resource, indices, updates, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) - -// RandomShuffleSeed sets the optional seed attribute to value. +// Computes the gradient for the inverse of `x` wrt its input. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed"] = value +// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` +// is the corresponding input gradient. +func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } -} - -// RandomShuffleSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed2"] = value + opspec := tf.OpSpec{ + Type: "ReciprocalGrad", + Input: []tf.Input{ + y, dy, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: -// -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` -// -// Arguments: -// value: The tensor to be shuffled. +// Returns the min of x and y (i.e. x < y ? x : y) element-wise. // -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { +// *NOTE*: `Minimum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "RandomShuffle", + Type: "Minimum", Input: []tf.Input{ - value, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. -type OrderedMapIncompleteSizeAttr func(optionalAttr) +// MfccAttr is an optional argument to Mfcc. +type MfccAttr func(optionalAttr) -// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. // -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { +// value: The highest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 4000 +func MfccUpperFrequencyLimit(value float32) MfccAttr { return func(m optionalAttr) { - m["capacity"] = value + m["upper_frequency_limit"] = value } } -// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. // -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { +// value: The lowest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 20 +func MfccLowerFrequencyLimit(value float32) MfccAttr { return func(m optionalAttr) { - m["memory_limit"] = value + m["lower_frequency_limit"] = value } } -// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { +// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. +// +// value: Resolution of the Mel bank used internally. +// If not specified, defaults to 40 +func MfccFilterbankChannelCount(value int64) MfccAttr { return func(m optionalAttr) { - m["container"] = value + m["filterbank_channel_count"] = value } } -// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { +// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. +// +// value: How many output channels to produce per time slice. +// If not specified, defaults to 13 +func MfccDctCoefficientCount(value int64) MfccAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["dct_coefficient_count"] = value } } -// Op returns the number of incomplete elements in the underlying container. -func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { +// Transforms a spectrogram into a form that's useful for speech recognition. +// +// Mel Frequency Cepstral Coefficients are a way of representing audio data that's +// been effective as an input feature for machine learning. They are created by +// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the +// higher frequencies that are less significant to the human ear. They have a long +// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum +// is a good resource to learn more. +// +// Arguments: +// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared +// set to true. +// sample_rate: How many samples per second the source audio used. +func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapIncompleteSize", - + Type: "Mfcc", + Input: []tf.Input{ + spectrogram, sample_rate, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DecodeRawAttr is an optional argument to DecodeRaw. -type DecodeRawAttr func(optionalAttr) - -// DecodeRawLittleEndian sets the optional little_endian attribute to value. +// Returns the element-wise sum of a list of tensors. // -// value: Whether the input `bytes` are in little-endian order. -// Ignored for `out_type` values that are stored in a single byte like -// `uint8`. -// If not specified, defaults to true -func DecodeRawLittleEndian(value bool) DecodeRawAttr { - return func(m optionalAttr) { - m["little_endian"] = value - } -} - -// Reinterpret the bytes of a string as a vector of numbers. +// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not +// wait for all of its inputs to be ready before beginning to sum. This can +// save memory if inputs are ready at different times, since minimum temporary +// storage is proportional to the output size rather than the inputs size. // -// Arguments: -// bytes: All the elements must have the same length. +// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. // +// Returns a `Tensor` of same shape and type as the elements of `inputs`. // -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { +// Arguments: +// inputs: A list of `Tensor` objects, each with same shape and type. +// shape: Shape of elements of `inputs`. +func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "DecodeRaw", + Type: "AccumulateNV2", Input: []tf.Input{ - bytes, + tf.OutputList(inputs), }, Attrs: attrs, } @@ -15512,99 +15468,95 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... return op.Output(0) } -// Copy a tensor setting everything outside a central band in each innermost matrix +// Convert the quantized 'input' tensor into a lower-precision 'output', using the // -// to zero. +// actual distribution of the values to maximize the usage of the lower bit depth +// and adjusting the output min and max ranges accordingly. // -// The `band` part is computed as follows: -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor with the same shape where +// [input_min, input_max] are scalar floats that specify the range for the float +// interpretation of the 'input' data. For example, if input_min is -1.0f and +// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. // -// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. +// This operator tries to squeeze as much precision as possible into an output with +// a lower bit depth by calculating the actual min and max values found in the +// data. For example, maybe that quint16 input has no values lower than 16,384 and +// none higher than 49,152. That means only half the range is actually needed, all +// the float interpretations are between -0.5f and 0.5f, so if we want to compress +// the data into a quint8 output, we can use that range rather than the theoretical +// -1.0f to 1.0f that is suggested by the input min and max. // -// The indicator function +// In practice, this is most useful for taking output from operations like +// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and +// may have large potential output ranges, but in practice have a distribution of +// input values that only uses a small fraction of the possible range. By feeding +// that output into this operator, we can reduce it from 32 bits down to 8 with +// minimal loss of accuracy. // -// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && -// (num_upper < 0 || (n-m) <= num_upper)`. +// Arguments: // -// For example: -// -// ``` -// # if 'input' is [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [-2, -1, 0, 1] -// [-3, -2, -1, 0]], -// -// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [ 0, -1, 0, 1] -// [ 0, 0, -1, 0]], -// -// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] -// [-1, 0, 1, 0] -// [-2, -1, 0, 1] -// [ 0, -2, -1, 0]] -// ``` -// -// Useful special cases: -// -// ``` -// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. -// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. -// tf.matrix_band_part(input, 0, 0) ==> Diagonal. -// ``` -// -// Arguments: -// input: Rank `k` tensor. -// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire -// lower triangle. -// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep -// entire upper triangle. +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// out_type: The type of the output. Should be a lower bit depth than Tinput. // -// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. -func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { +// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. +func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "MatrixBandPart", + Type: "QuantizeDownAndShrinkRange", Input: []tf.Input{ - input, num_lower, num_upper, + input, input_min, input_max, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// DecodeCompressedAttr is an optional argument to DecodeCompressed. -type DecodeCompressedAttr func(optionalAttr) +// RandomGammaAttr is an optional argument to RandomGamma. +type RandomGammaAttr func(optionalAttr) -// DecodeCompressedCompressionType sets the optional compression_type attribute to value. +// RandomGammaSeed sets the optional seed attribute to value. // -// value: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// If not specified, defaults to "" -func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomGammaSeed(value int64) RandomGammaAttr { return func(m optionalAttr) { - m["compression_type"] = value + m["seed"] = value } } -// Decompress strings. +// RandomGammaSeed2 sets the optional seed2 attribute to value. // -// This op decompresses each element of the `bytes` input `Tensor`, which -// is assumed to be compressed using the given `compression_type`. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomGammaSeed2(value int64) RandomGammaAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from the Gamma distribution(s) described by alpha. // -// The `output` is a string `Tensor` of the same shape as `bytes`, -// each element containing the decompressed data from the corresponding -// element in `bytes`. +// This op uses the algorithm by Marsaglia et al. to acquire samples via +// transformation-rejection from pairs of uniform and normal random variables. +// See http://dl.acm.org/citation.cfm?id=358414 // // Arguments: -// bytes: A Tensor of string which is compressed. +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in alpha. +// alpha: A tensor in which each scalar is a "shape" parameter describing the +// associated gamma distribution. // -// Returns A Tensor with the same shape as input `bytes`, uncompressed -// from bytes. -func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { +// Returns A tensor with shape `shape + shape(alpha)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. +func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -15613,9 +15565,9 @@ func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompresse a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeCompressed", + Type: "RandomGamma", Input: []tf.Input{ - bytes, + shape, alpha, }, Attrs: attrs, } @@ -15623,38 +15575,48 @@ func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompresse return op.Output(0) } -// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2. -type WholeFileReaderV2Attr func(optionalAttr) +// RandomUniformIntAttr is an optional argument to RandomUniformInt. +type RandomUniformIntAttr func(optionalAttr) -// WholeFileReaderV2Container sets the optional container attribute to value. +// RandomUniformIntSeed sets the optional seed attribute to value. // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformIntSeed(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["container"] = value + m["seed"] = value } } -// WholeFileReaderV2SharedName sets the optional shared_name attribute to value. +// RandomUniformIntSeed2 sets the optional seed2 attribute to value. // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["seed2"] = value } } -// A Reader that outputs the entire contents of a file as a value. +// Outputs random integers from a uniform distribution. // -// To use, enqueue filenames in a Queue. The output of ReaderRead will -// be a filename (key) and the contents of that file (value). +// The generated values are uniform integers in the range `[minval, maxval)`. +// The lower bound `minval` is included in the range, while the upper bound +// `maxval` is excluded. // -// Returns The handle to reference the Reader. -func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) { +// The random integers are slightly biased unless `maxval - minval` is an exact +// power of two. The bias is small for values of `maxval - minval` significantly +// smaller than the range of the output (either `2^32` or `2^64`). +// +// Arguments: +// shape: The shape of the output tensor. +// minval: 0-D. Inclusive lower bound on the generated integers. +// maxval: 0-D. Exclusive upper bound on the generated integers. +// +// Returns A tensor of the specified shape filled with uniform random integers. +func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -15663,165 +15625,280 @@ func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_ a(attrs) } opspec := tf.OpSpec{ - Type: "WholeFileReaderV2", - + Type: "RandomUniformInt", + Input: []tf.Input{ + shape, minval, maxval, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Transforms a tf.Example proto (as a string) into typed tensors. +// SkipgramAttr is an optional argument to Skipgram. +type SkipgramAttr func(optionalAttr) + +// SkipgramWindowSize sets the optional window_size attribute to value. +// +// value: The number of words to predict to the left and right of the target. +// If not specified, defaults to 5 +func SkipgramWindowSize(value int64) SkipgramAttr { + return func(m optionalAttr) { + m["window_size"] = value + } +} + +// SkipgramMinCount sets the optional min_count attribute to value. +// +// value: The minimum number of word occurrences for it to be included in the +// vocabulary. +// If not specified, defaults to 5 +func SkipgramMinCount(value int64) SkipgramAttr { + return func(m optionalAttr) { + m["min_count"] = value + } +} + +// SkipgramSubsample sets the optional subsample attribute to value. +// +// value: Threshold for word occurrence. Words that appear with higher +// frequency will be randomly down-sampled. Set to 0 to disable. +// If not specified, defaults to 0.001 +func SkipgramSubsample(value float32) SkipgramAttr { + return func(m optionalAttr) { + m["subsample"] = value + } +} + +// Parses a text file and creates a batch of examples. +// +// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result // // Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// dense_defaults: A list of Tensors (some may be empty), whose length matches -// the length of `dense_keys`. dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// num_sparse: The number of sparse features to be parsed from the example. This -// must match the lengths of `sparse_keys` and `sparse_types`. -// sparse_keys: A list of `num_sparse` strings. -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: The keys expected in the Examples' features associated with dense -// values. -// sparse_types: A list of `num_sparse` types; the data types of data in each -// Feature given in sparse_keys. -// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: The shapes of data in each Feature given in dense_keys. -// The length of this list must match the length of `dense_keys`. The -// number of elements in the Feature corresponding to dense_key[j] must -// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == -// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] -// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, -// ..., DN), the shape of the output Tensor dense_values[j] will be (M, -// D1, .., DN), where M is the number of blocks of elements of length -// D1 * .... * DN, in the input. -func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { +// filename: The corpus's text file name. +// batch_size: The size of produced batch. +// +// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. +func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} + attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ParseSingleExample", - Input: []tf.Input{ - serialized, tf.OutputList(dense_defaults), - }, + Type: "Skipgram", + Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) +} + +// StringToNumberAttr is an optional argument to StringToNumber. +type StringToNumberAttr func(optionalAttr) + +// StringToNumberOutType sets the optional out_type attribute to value. +// +// value: The numeric type to interpret each string in `string_tensor` as. +// If not specified, defaults to DT_FLOAT +func StringToNumberOutType(value tf.DataType) StringToNumberAttr { + return func(m optionalAttr) { + m["out_type"] = value } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleExample", err) +} + +// Converts each string in the input Tensor to the specified numeric type. +// +// (Note that int32 overflow results in an error while float overflow +// results in a rounded value.) +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { + if scope.Err() != nil { return } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return + opspec := tf.OpSpec{ + Type: "StringToNumber", + Input: []tf.Input{ + string_tensor, + }, + Attrs: attrs, } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) + +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value } - return sparse_indices, sparse_values, sparse_shapes, dense_values } -// Computes acos of x element-wise. -func Acos(scope *Scope, x tf.Output) (y tf.Output) { +// Update '*var' according to the Ftrl-proximal scheme. +// +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Acos", + Type: "ResourceApplyFtrlV2", Input: []tf.Input{ - x, + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) +// TruncatedNormalAttr is an optional argument to TruncatedNormal. +type TruncatedNormalAttr func(optionalAttr) -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { +// TruncatedNormalSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func TruncatedNormalSeed(value int64) TruncatedNormalAttr { return func(m optionalAttr) { - m["Targmax"] = value + m["seed"] = value } } -// Performs max pooling on the input and outputs both max values and indices. +// TruncatedNormalSeed2 sets the optional seed2 attribute to value. // -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index -// `((b * height + y) * width + x) * channels + c`. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a truncated normal distribution. // -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // // Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { +// Returns A tensor of the specified shape filled with random truncated normal +// values. +func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", + Type: "TruncatedNormal", Input: []tf.Input{ - input, + shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) + +// RandomShuffleSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomShuffleSeed(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleSeed2(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Randomly shuffles a tensor along its first dimension. +// +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: +// +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` // // Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. +// value: The tensor to be shuffled. // -// Returns A Tensor of type `out_type`. -func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ParseTensor", + Type: "RandomShuffle", Input: []tf.Input{ - serialized, + value, }, Attrs: attrs, } @@ -15829,49 +15906,47 @@ func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (outp return op.Output(0) } -// MapClearAttr is an optional argument to MapClear. -type MapClearAttr func(optionalAttr) +// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. +type OrderedMapIncompleteSizeAttr func(optionalAttr) -// MapClearCapacity sets the optional capacity attribute to value. +// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func MapClearCapacity(value int64) MapClearAttr { +func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { return func(m optionalAttr) { m["capacity"] = value } } -// MapClearMemoryLimit sets the optional memory_limit attribute to value. +// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func MapClearMemoryLimit(value int64) MapClearAttr { +func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { return func(m optionalAttr) { m["memory_limit"] = value } } -// MapClearContainer sets the optional container attribute to value. +// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. // If not specified, defaults to "" -func MapClearContainer(value string) MapClearAttr { +func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { return func(m optionalAttr) { m["container"] = value } } -// MapClearSharedName sets the optional shared_name attribute to value. +// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. // If not specified, defaults to "" -func MapClearSharedName(value string) MapClearAttr { +func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { return func(m optionalAttr) { m["shared_name"] = value } } -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { +// Op returns the number of incomplete elements in the underlying container. +func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { if scope.Err() != nil { return } @@ -15880,167 +15955,161 @@ func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o * a(attrs) } opspec := tf.OpSpec{ - Type: "MapClear", + Type: "OrderedMapIncompleteSize", Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// DecodeCSVAttr is an optional argument to DecodeCSV. -type DecodeCSVAttr func(optionalAttr) - -// DecodeCSVFieldDelim sets the optional field_delim attribute to value. -// -// value: char delimiter to separate fields in a record. -// If not specified, defaults to "," -func DecodeCSVFieldDelim(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["field_delim"] = value - } -} +// DecodeRawAttr is an optional argument to DecodeRaw. +type DecodeRawAttr func(optionalAttr) -// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. +// DecodeRawLittleEndian sets the optional little_endian attribute to value. // -// value: If false, treats double quotation marks as regular -// characters inside of the string fields (ignoring RFC 4180, Section 2, -// Bullet 5). +// value: Whether the input `bytes` are in little-endian order. +// Ignored for `out_type` values that are stored in a single byte like +// `uint8`. // If not specified, defaults to true -func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { +func DecodeRawLittleEndian(value bool) DecodeRawAttr { return func(m optionalAttr) { - m["use_quote_delim"] = value + m["little_endian"] = value } } -// DecodeCSVNaValue sets the optional na_value attribute to value. -// -// value: Additional string to recognize as NA/NaN. -// If not specified, defaults to "" -func DecodeCSVNaValue(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["na_value"] = value - } -} - -// Convert CSV records to tensors. Each column maps to one tensor. -// -// RFC 4180 format is expected for the CSV records. -// (https://tools.ietf.org/html/rfc4180) -// Note that we allow leading and trailing spaces with int or float field. +// Reinterpret the bytes of a string as a vector of numbers. // // Arguments: -// records: Each string is a record/row in the csv and all records should have -// the same format. -// record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or empty if the column is required. +// bytes: All the elements must have the same length. // -// Returns Each tensor will have the same shape as records. -func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { +// +// Returns A Tensor with one more dimension than the input `bytes`. The +// added dimension will have size equal to the length of the elements +// of `bytes` divided by the number of bytes to represent `out_type`. +func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"out_type": out_type} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeCSV", + Type: "DecodeRaw", Input: []tf.Input{ - records, tf.OutputList(record_defaults), + bytes, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("DecodeCSV", err) - return - } - return output + return op.Output(0) } -// Returns the rank of a tensor. +// Copy a tensor setting everything outside a central band in each innermost matrix // -// This operation returns an integer representing the rank of `input`. +// to zero. +// +// The `band` part is computed as follows: +// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a +// tensor with the same shape where +// +// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. +// +// The indicator function +// +// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && +// (num_upper < 0 || (n-m) <= num_upper)`. // // For example: // // ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// # shape of tensor 't' is [2, 2, 3] -// rank(t) ==> 3 +// # if 'input' is [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [-2, -1, 0, 1] +// [-3, -2, -1, 0]], +// +// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [ 0, -1, 0, 1] +// [ 0, 0, -1, 0]], +// +// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] +// [-1, 0, 1, 0] +// [-2, -1, 0, 1] +// [ 0, -2, -1, 0]] // ``` // -// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank -// of a tensor is the number of indices required to uniquely select each element -// of the tensor. Rank is also known as "order", "degree", or "ndims." -func Rank(scope *Scope, input tf.Output) (output tf.Output) { +// Useful special cases: +// +// ``` +// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. +// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. +// tf.matrix_band_part(input, 0, 0) ==> Diagonal. +// ``` +// +// Arguments: +// input: Rank `k` tensor. +// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire +// lower triangle. +// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep +// entire upper triangle. +// +// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. +func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Rank", + Type: "MatrixBandPart", Input: []tf.Input{ - input, + input, num_lower, num_upper, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fact", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} +// DecodeCompressedAttr is an optional argument to DecodeCompressed. +type DecodeCompressedAttr func(optionalAttr) -// Makes its input available to the next iteration. -// -// Arguments: -// data: The tensor to be made available to the next iteration. +// DecodeCompressedCompressionType sets the optional compression_type attribute to value. // -// Returns The same tensor as `data`. -func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NextIteration", - Input: []tf.Input{ - data, - }, +// value: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// If not specified, defaults to "" +func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { + return func(m optionalAttr) { + m["compression_type"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. +// Decompress strings. // -// Arguments: +// This op decompresses each element of the `bytes` input `Tensor`, which +// is assumed to be compressed using the given `compression_type`. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. +// The `output` is a string `Tensor` of the same shape as `bytes`, +// each element containing the decompressed data from the corresponding +// element in `bytes`. // +// Arguments: +// bytes: A Tensor of string which is compressed. // -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns A Tensor with the same shape as input `bytes`, uncompressed +// from bytes. +func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SkipDataset", + Type: "DecodeCompressed", Input: []tf.Input{ - input_dataset, count, + bytes, }, Attrs: attrs, } @@ -16048,13 +16117,13 @@ func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_ return op.Output(0) } -// Computes hyperbolic tangent of `x` element-wise. -func Tanh(scope *Scope, x tf.Output) (y tf.Output) { +// Computes acos of x element-wise. +func Acos(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Tanh", + Type: "Acos", Input: []tf.Input{ x, }, @@ -16063,71 +16132,37 @@ func Tanh(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Computes the maximum along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such -// that `segment_ids[j] == i`. -// -// If the max is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMax", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AvgPoolGradAttr is an optional argument to AvgPoolGrad. -type AvgPoolGradAttr func(optionalAttr) +// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. +type MaxPoolWithArgmaxAttr func(optionalAttr) -// AvgPoolGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { +// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. +// If not specified, defaults to DT_INT64 +func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { return func(m optionalAttr) { - m["data_format"] = value + m["Targmax"] = value } } -// Computes gradients of the average pooling function. +// Performs max pooling on the input and outputs both max values and indices. +// +// The indices in `argmax` are flattened, so that a maximum value at position +// `[b, y, x, c]` becomes flattened index +// `((b * height + y) * width + x) * channels + c`. +// +// The indices returned are always in `[0, height) x [0, width)` before flattening, +// even if padding is involved and the mathematically correct answer is outside +// (either negative or too large). This is a bug, but fixing it is difficult to do +// in a safe backwards compatible way, especially due to flattening. // // Arguments: -// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. -// the output of `avg_pool`. -// ksize: The size of the sliding window for each dimension of the input. -// strides: The stride of the sliding window for each dimension of the input. +// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. // padding: The type of padding algorithm to use. // -// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. -func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { +// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. +func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { if scope.Err() != nil { return } @@ -16136,50 +16171,74 @@ func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPoolGrad", + Type: "MaxPoolWithArgmax", Input: []tf.Input{ - orig_input_shape, grad, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// StageClearAttr is an optional argument to StageClear. -type StageClearAttr func(optionalAttr) - -// StageClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// +// Arguments: +// serialized: A scalar string containing a serialized TensorProto proto. +// out_type: The type of the serialized tensor. The provided type must match the +// type of the serialized tensor and no implicit conversion will take place. +// +// Returns A Tensor of type `out_type`. +func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + opspec := tf.OpSpec{ + Type: "ParseTensor", + Input: []tf.Input{ + serialized, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MapClearAttr is an optional argument to MapClear. +type MapClearAttr func(optionalAttr) + +// MapClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // // REQUIRES: value >= 0 -func StageClearCapacity(value int64) StageClearAttr { +func MapClearCapacity(value int64) MapClearAttr { return func(m optionalAttr) { m["capacity"] = value } } -// StageClearMemoryLimit sets the optional memory_limit attribute to value. +// MapClearMemoryLimit sets the optional memory_limit attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func StageClearMemoryLimit(value int64) StageClearAttr { +func MapClearMemoryLimit(value int64) MapClearAttr { return func(m optionalAttr) { m["memory_limit"] = value } } -// StageClearContainer sets the optional container attribute to value. +// MapClearContainer sets the optional container attribute to value. // If not specified, defaults to "" -func StageClearContainer(value string) StageClearAttr { +func MapClearContainer(value string) MapClearAttr { return func(m optionalAttr) { m["container"] = value } } -// StageClearSharedName sets the optional shared_name attribute to value. +// MapClearSharedName sets the optional shared_name attribute to value. // If not specified, defaults to "" -func StageClearSharedName(value string) StageClearAttr { +func MapClearSharedName(value string) MapClearAttr { return func(m optionalAttr) { m["shared_name"] = value } @@ -16188,7 +16247,7 @@ func StageClearSharedName(value string) StageClearAttr { // Op removes all elements in the underlying container. // // Returns the created operation. -func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { +func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -16197,135 +16256,167 @@ func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "StageClear", + Type: "MapClear", Attrs: attrs, } return scope.AddOperation(opspec) } -// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. -type ComputeAccidentalHitsAttr func(optionalAttr) +// DecodeCSVAttr is an optional argument to DecodeCSV. +type DecodeCSVAttr func(optionalAttr) -// ComputeAccidentalHitsSeed sets the optional seed attribute to value. +// DecodeCSVFieldDelim sets the optional field_delim attribute to value. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { +// value: char delimiter to separate fields in a record. +// If not specified, defaults to "," +func DecodeCSVFieldDelim(value string) DecodeCSVAttr { return func(m optionalAttr) { - m["seed"] = value + m["field_delim"] = value } } -// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. +// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { +// value: If false, treats double quotation marks as regular +// characters inside of the string fields (ignoring RFC 4180, Section 2, +// Bullet 5). +// If not specified, defaults to true +func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_quote_delim"] = value } } -// Computes the ids of the positions in sampled_candidates that match true_labels. +// DecodeCSVNaValue sets the optional na_value attribute to value. // -// When doing log-odds NCE, the result of this op should be passed through a -// SparseToDense op, then added to the logits of the sampled candidates. This has -// the effect of 'removing' the sampled labels that match the true labels by -// making the classifier sure that they are sampled labels. +// value: Additional string to recognize as NA/NaN. +// If not specified, defaults to "" +func DecodeCSVNaValue(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["na_value"] = value + } +} + +// Convert CSV records to tensors. Each column maps to one tensor. +// +// RFC 4180 format is expected for the CSV records. +// (https://tools.ietf.org/html/rfc4180) +// Note that we allow leading and trailing spaces with int or float field. // // Arguments: -// true_classes: The true_classes output of UnpackSparseLabels. -// sampled_candidates: The sampled_candidates output of CandidateSampler. -// num_true: Number of true labels per context. +// records: Each string is a record/row in the csv and all records should have +// the same format. +// record_defaults: One tensor per column of the input record, with either a +// scalar default value for that column or empty if the column is required. // -// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label -// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element -// is -FLOAT_MAX. -func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { +// Returns Each tensor will have the same shape as records. +func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ComputeAccidentalHits", + Type: "DecodeCSV", Input: []tf.Input{ - true_classes, sampled_candidates, + records, tf.OutputList(record_defaults), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("DecodeCSV", err) + return + } + return output } -// Computes sigmoid of `x` element-wise. +// Returns the rank of a tensor. // -// Specifically, `y = 1 / (1 + exp(-x))`. -func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { +// This operation returns an integer representing the rank of `input`. +// +// For example: +// +// ``` +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// # shape of tensor 't' is [2, 2, 3] +// rank(t) ==> 3 +// ``` +// +// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank +// of a tensor is the number of indices required to uniquely select each element +// of the tensor. Rank is also known as "order", "degree", or "ndims." +func Rank(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Sigmoid", + Type: "Rank", Input: []tf.Input{ - x, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. -type RandomStandardNormalAttr func(optionalAttr) - -// RandomStandardNormalSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { - return func(m optionalAttr) { - m["seed"] = value +// Output a fact about factorials. +func Fact(scope *Scope) (fact tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Fact", } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. +// Makes its input available to the next iteration. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value +// Arguments: +// data: The tensor to be made available to the next iteration. +// +// Returns The same tensor as `data`. +func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NextIteration", + Input: []tf.Input{ + data, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Outputs random values from a normal distribution. -// -// The generated values will have mean 0 and standard deviation 1. +// Creates a dataset that skips `count` elements from the `input_dataset`. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. // -// Returns A tensor of the specified shape filled with random normal values. -func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. +// +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "RandomStandardNormal", + Type: "SkipDataset", Input: []tf.Input{ - shape, + input_dataset, count, }, Attrs: attrs, } @@ -16333,343 +16424,320 @@ func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, opti return op.Output(0) } -// FusedBatchNormAttr is an optional argument to FusedBatchNorm. -type FusedBatchNormAttr func(optionalAttr) - -// FusedBatchNormEpsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { - return func(m optionalAttr) { - m["epsilon"] = value +// Computes hyperbolic tangent of `x` element-wise. +func Tanh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// FusedBatchNormDataFormat sets the optional data_format attribute to value. -// -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { - return func(m optionalAttr) { - m["data_format"] = value + opspec := tf.OpSpec{ + Type: "Tanh", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FusedBatchNormIsTraining sets the optional is_training attribute to value. +// Computes the maximum along segments of a tensor. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Batch normalization. +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. // -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// Computes a tensor such that +// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such +// that `segment_ids[j] == i`. +// +// If the max is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
// // Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. // -// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "FusedBatchNorm", + Type: "SegmentMax", Input: []tf.Input{ - x, scale, offset, mean, variance, + data, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return op.Output(0) } -// Computes tan of x element-wise. -func Tan(scope *Scope, x tf.Output) (y tf.Output) { +// AvgPoolGradAttr is an optional argument to AvgPoolGrad. +type AvgPoolGradAttr func(optionalAttr) + +// AvgPoolGradDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes gradients of the average pooling function. +// +// Arguments: +// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. +// the output of `avg_pool`. +// ksize: The size of the sliding window for each dimension of the input. +// strides: The stride of the sliding window for each dimension of the input. +// padding: The type of padding algorithm to use. +// +// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. +func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Tan", + Type: "AvgPoolGrad", Input: []tf.Input{ - x, + orig_input_shape, grad, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2. -type FusedBatchNormV2Attr func(optionalAttr) +// StageClearAttr is an optional argument to StageClear. +type StageClearAttr func(optionalAttr) -// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value. +// StageClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr { +// REQUIRES: value >= 0 +func StageClearCapacity(value int64) StageClearAttr { return func(m optionalAttr) { - m["epsilon"] = value + m["capacity"] = value } } -// FusedBatchNormV2DataFormat sets the optional data_format attribute to value. +// StageClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr { +// REQUIRES: value >= 0 +func StageClearMemoryLimit(value int64) StageClearAttr { return func(m optionalAttr) { - m["data_format"] = value + m["memory_limit"] = value } } -// FusedBatchNormV2IsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr { +// StageClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StageClearContainer(value string) StageClearAttr { return func(m optionalAttr) { - m["is_training"] = value + m["container"] = value } } -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. +// StageClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StageClearSharedName(value string) StageClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. // -// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { +// Returns the created operation. +func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNormV2", - Input: []tf.Input{ - x, scale, offset, mean, variance, - }, + Type: "StageClear", + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return scope.AddOperation(opspec) } -// MultinomialAttr is an optional argument to Multinomial. -type MultinomialAttr func(optionalAttr) +// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. +type ComputeAccidentalHitsAttr func(optionalAttr) -// MultinomialSeed sets the optional seed attribute to value. +// ComputeAccidentalHitsSeed sets the optional seed attribute to value. // -// value: If either seed or seed2 is set to be non-zero, the internal random number -// generator is seeded by the given seed. Otherwise, a random seed is used. +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. // If not specified, defaults to 0 -func MultinomialSeed(value int64) MultinomialAttr { +func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { return func(m optionalAttr) { m["seed"] = value } } -// MultinomialSeed2 sets the optional seed2 attribute to value. +// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. // -// value: A second seed to avoid seed collision. +// value: An second seed to avoid seed collision. // If not specified, defaults to 0 -func MultinomialSeed2(value int64) MultinomialAttr { +func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { return func(m optionalAttr) { m["seed2"] = value } } -// MultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// Draws samples from a multinomial distribution. +// Computes the ids of the positions in sampled_candidates that match true_labels. +// +// When doing log-odds NCE, the result of this op should be passed through a +// SparseToDense op, then added to the logits of the sampled candidates. This has +// the effect of 'removing' the sampled labels that match the true labels by +// making the classifier sure that they are sampled labels. // // Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. +// true_classes: The true_classes output of UnpackSparseLabels. +// sampled_candidates: The sampled_candidates output of CandidateSampler. +// num_true: Number of true labels per context. // -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { +// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label +// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element +// is -FLOAT_MAX. +func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_true": num_true} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Multinomial", + Type: "ComputeAccidentalHits", Input: []tf.Input{ - logits, num_samples, + true_classes, sampled_candidates, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// EncodeJpegAttr is an optional argument to EncodeJpeg. -type EncodeJpegAttr func(optionalAttr) - -// EncodeJpegFormat sets the optional format attribute to value. +// Looks up keys in a table, outputs the corresponding values. // -// value: Per pixel image format. -// If not specified, defaults to "" -func EncodeJpegFormat(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["format"] = value - } -} - -// EncodeJpegQuality sets the optional quality attribute to value. +// The tensor `keys` must of the same type as the keys of the table. +// The output `values` is of the type of the table values. // -// value: Quality of the compression from 0 to 100 (higher is better and slower). -// If not specified, defaults to 95 -func EncodeJpegQuality(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["quality"] = value +// The scalar `default_value` is the value output for keys not present in the +// table. It must also be of the same type as the table values. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// +// +// Returns Same shape as `keys`. Values found in the table, or `default_values` +// for missing keys. +func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LookupTableFindV2", + Input: []tf.Input{ + table_handle, keys, default_value, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// EncodeJpegProgressive sets the optional progressive attribute to value. +// Bucketizes 'input' based on 'boundaries'. // -// value: If True, create a JPEG that loads progressively (coarse to fine). -// If not specified, defaults to false -func EncodeJpegProgressive(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["progressive"] = value - } -} - -// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. +// For example, if the inputs are +// boundaries = [0, 10, 100] +// input = [[-5, 10000] +// [150, 10] +// [5, 100]] // -// value: If True, spend CPU/RAM to reduce size with no quality change. -// If not specified, defaults to false -func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["optimize_size"] = value - } -} - -// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. +// then the output will be +// output = [[0, 3] +// [3, 2] +// [1, 3]] // -// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. -// If not specified, defaults to true -func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["chroma_downsampling"] = value - } -} - -// EncodeJpegDensityUnit sets the optional density_unit attribute to value. +// Arguments: +// input: Any shape of Tensor contains with int or float type. +// boundaries: A sorted list of floats gives the boundary of the buckets. // -// value: Unit used to specify `x_density` and `y_density`: -// pixels per inch (`'in'`) or centimeter (`'cm'`). -// If not specified, defaults to "in" -func EncodeJpegDensityUnit(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["density_unit"] = value - } -} - -// EncodeJpegXDensity sets the optional x_density attribute to value. +// Returns Same shape with 'input', each value of input replaced with bucket index. // -// value: Horizontal pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegXDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["x_density"] = value +// @compatibility(numpy) +// Equivalent to np.digitize. +// @end_compatibility +func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// EncodeJpegYDensity sets the optional y_density attribute to value. -// -// value: Vertical pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegYDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["y_density"] = value + attrs := map[string]interface{}{"boundaries": boundaries} + opspec := tf.OpSpec{ + Type: "Bucketize", + Input: []tf.Input{ + input, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. +// EncodePngAttr is an optional argument to EncodePng. +type EncodePngAttr func(optionalAttr) + +// EncodePngCompression sets the optional compression attribute to value. // -// value: If not empty, embed this XMP metadata in the image header. -// If not specified, defaults to "" -func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { +// value: Compression level. +// If not specified, defaults to -1 +func EncodePngCompression(value int64) EncodePngAttr { return func(m optionalAttr) { - m["xmp_metadata"] = value + m["compression"] = value } } -// JPEG-encode an image. -// -// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. -// -// The attr `format` can be used to override the color format of the encoded -// output. Values can be: +// PNG-encode an image. // -// * `''`: Use a default format based on the number of channels in the image. -// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension -// of `image` must be 1. -// * `rgb`: Output an RGB JPEG image. The `channels` dimension -// of `image` must be 3. +// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` +// where `channels` is: // -// If `format` is not specified or is the empty string, a default format is picked -// in function of the number of channels in `image`: +// * 1: for grayscale. +// * 2: for grayscale + alpha. +// * 3: for RGB. +// * 4: for RGBA. // -// * 1: Output a grayscale image. -// * 3: Output an RGB image. +// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder +// default or a value from 0 to 9. 9 is the highest compression level, generating +// the smallest output, but is slower. // // Arguments: // image: 3-D with shape `[height, width, channels]`. // -// Returns 0-D. JPEG-encoded image. -func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { +// Returns 0-D. PNG-encoded image. +func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { if scope.Err() != nil { return } @@ -16678,7 +16746,7 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont a(attrs) } opspec := tf.OpSpec{ - Type: "EncodeJpeg", + Type: "EncodePng", Input: []tf.Input{ image, }, @@ -16688,164 +16756,91 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont return op.Output(0) } -// MaxPoolGradAttr is an optional argument to MaxPoolGrad. -type MaxPoolGradAttr func(optionalAttr) - -// MaxPoolGradDataFormat sets the optional data_format attribute to value. +// Updates the table to associates keys with values. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradDataFormat(value string) MaxPoolGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the maxpooling function. +// The tensor `keys` must be of the same type as the keys of the table. +// The tensor `values` must be of the type of the table values. // // Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// values: Values to associate with keys. // -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) { +// Returns the created operation. +func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MaxPoolGrad", + Type: "LookupTableInsertV2", Input: []tf.Input{ - orig_input, orig_output, grad, + table_handle, keys, values, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// CropAndResizeAttr is an optional argument to CropAndResize. -type CropAndResizeAttr func(optionalAttr) - -// CropAndResizeMethod sets the optional method attribute to value. -// -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeMethod(value string) CropAndResizeAttr { - return func(m optionalAttr) { - m["method"] = value +// Returns element-wise smallest integer in not less than x. +func Ceil(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. -// -// value: Value used for extrapolation, when applicable. -// If not specified, defaults to 0 -func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { - return func(m optionalAttr) { - m["extrapolation_value"] = value + opspec := tf.OpSpec{ + Type: "Ceil", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Extracts crops from the input image tensor and bilinearly resizes them (possibly -// -// with aspect ratio change) to a common output size specified by `crop_size`. This -// is more general than the `crop_to_bounding_box` op which extracts a fixed size -// slice from the input image and does not allow resizing or aspect ratio change. -// -// Returns a tensor with `crops` from the input `image` at positions defined at the -// bounding box locations in `boxes`. The cropped boxes are all resized (with -// bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The -// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The -// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the -// method will give identical results to using `tf.image.resize_bilinear()` -// with `align_corners=True`. +// Computes the number of elements in the given table. // // Arguments: -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All -// cropped image patches are resized to this size. The aspect ratio of the image -// content is not preserved. Both `crop_height` and `crop_width` need to be -// positive. +// table_handle: Handle to the table. // -// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { +// Returns Scalar that contains number of elements in the table. +func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "CropAndResize", + Type: "LookupTableSizeV2", Input: []tf.Input{ - image, boxes, box_ind, crop_size, + table_handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. -type ResourceApplyPowerSignAttr func(optionalAttr) +// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. +type ResizeBilinearGradAttr func(optionalAttr) -// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. +// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. // -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: If true, rescale grads by (orig_height - 1) / (height - 1), which +// exactly aligns the 4 corners of grads and original_image. If false, rescale by +// orig_height / height. Treat similarly the width dimension. // If not specified, defaults to false -func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { +func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["align_corners"] = value } } -// Update '*var' according to the AddSign update. -// -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g -// variable <- variable - lr_t * update +// Computes the gradient of bilinear interpolation. // // Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// logbase: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. -// grad: The gradient. +// grads: 4-D with shape `[batch, height, width, channels]`. +// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, +// The image tensor that was resized. // -// Returns the created operation. -func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. +// Gradients with respect to the input image. Input image must have been +// float or double. +func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -16854,110 +16849,57 @@ func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyPowerSign", + Type: "ResizeBilinearGrad", Input: []tf.Input{ - var_, m, lr, logbase, sign_decay, beta, grad, + grads, original_image, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. -type MutableHashTableV2Attr func(optionalAttr) - -// MutableHashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableV2Container(value string) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// Outputs all keys and values in the table. // // Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// table_handle: Handle to the table. // -// Returns Handle to a table. -func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutableHashTableV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Disallowed in GraphDef version >= 2. // -// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead -func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { +// +// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. +func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} opspec := tf.OpSpec{ - Type: "AdjustContrast", + Type: "LookupTableExportV2", Input: []tf.Input{ - images, contrast_factor, min_value, max_value, + table_handle, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Table initializer that takes two tensors for keys and values respectively. +// Replaces the contents of the table with the specified keys and values. +// +// The tensor `keys` must be of the same type as the keys of the table. +// The tensor `values` must be of the type of the table values. // // Arguments: -// table_handle: Handle to a table which will be initialized. -// keys: Keys of type Tkey. -// values: Values of type Tval. +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// values: Values to associate with keys. // // Returns the created operation. -func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "InitializeTableV2", + Type: "LookupTableImportV2", Input: []tf.Input{ table_handle, keys, values, }, @@ -16965,263 +16907,629 @@ func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, val return scope.AddOperation(opspec) } -// PrintAttr is an optional argument to Print. -type PrintAttr func(optionalAttr) +// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. +type MapUnstageNoKeyAttr func(optionalAttr) -// PrintMessage sets the optional message attribute to value. +// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: A string, prefix of the error message. -// If not specified, defaults to "" -func PrintMessage(value string) PrintAttr { +// REQUIRES: value >= 0 +func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["message"] = value + m["capacity"] = value } } -// PrintFirstN sets the optional first_n attribute to value. +// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// value: Only log `first_n` number of times. -1 disables logging. -// If not specified, defaults to -1 -func PrintFirstN(value int64) PrintAttr { +// REQUIRES: value >= 0 +func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["first_n"] = value + m["memory_limit"] = value } } -// PrintSummarize sets the optional summarize attribute to value. -// -// value: Only print this many entries of each tensor. -// If not specified, defaults to 3 -func PrintSummarize(value int64) PrintAttr { +// MapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["summarize"] = value + m["container"] = value } } -// Prints a list of tensors. -// -// Passes `input` through to `output` and prints `data` when evaluating. -// -// Arguments: -// input: The tensor passed to `output` -// data: A list of tensors to print out when op is evaluated. +// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns a random (key, value) // -// Returns = The unmodified `input` tensor -func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { +// from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Print", + Type: "MapUnstageNoKey", Input: []tf.Input{ - input, tf.OutputList(data), + indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. -// -// Arguments: -// tag: A string attached to this summary. Used for organization in TensorBoard. -// tensor: A tensor to serialize. -// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin -// data. -func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "TensorSummaryV2", - Input: []tf.Input{ - tag, tensor, serialized_summary_metadata, - }, + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapUnstageNoKey", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return key, values } -// Creates a dataset that asynchronously prefetches elements from `input_dataset`. -// -// Arguments: -// -// buffer_size: The maximum number of elements to buffer in an iterator over -// this dataset. -// +// HashTableV2Attr is an optional argument to HashTableV2. +type HashTableV2Attr func(optionalAttr) + +// HashTableV2Container sets the optional container attribute to value. // -func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "PrefetchDataset", - Input: []tf.Input{ - input_dataset, buffer_size, - }, - Attrs: attrs, +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func HashTableV2Container(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// TensorSummaryAttr is an optional argument to TensorSummary. -type TensorSummaryAttr func(optionalAttr) - -// TensorSummaryDescription sets the optional description attribute to value. +// HashTableV2SharedName sets the optional shared_name attribute to value. // -// value: A json-encoded SummaryDescription proto. +// value: If non-empty, this table is shared under the given name across +// multiple sessions. // If not specified, defaults to "" -func TensorSummaryDescription(value string) TensorSummaryAttr { +func HashTableV2SharedName(value string) HashTableV2Attr { return func(m optionalAttr) { - m["description"] = value + m["shared_name"] = value } } -// TensorSummaryLabels sets the optional labels attribute to value. +// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. // -// value: An unused list of strings. -// If not specified, defaults to <> -func TensorSummaryLabels(value []string) TensorSummaryAttr { +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { return func(m optionalAttr) { - m["labels"] = value + m["use_node_name_sharing"] = value } } -// TensorSummaryDisplayName sets the optional display_name attribute to value. +// Creates a non-initialized hash table. // -// value: An unused string. +// This op creates a hash table, specifying the type of its keys and values. +// Before using the table you will have to initialize it. After initialization the +// table will be immutable. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "HashTableV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. +type MutableHashTableV2Attr func(optionalAttr) + +// MutableHashTableV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. // If not specified, defaults to "" -func TensorSummaryDisplayName(value string) TensorSummaryAttr { +func MutableHashTableV2Container(value string) MutableHashTableV2Attr { return func(m optionalAttr) { - m["display_name"] = value + m["container"] = value } } -// Outputs a `Summary` protocol buffer with a tensor. +// MutableHashTableV2SharedName sets the optional shared_name attribute to value. // -// This op is being phased out in favor of TensorSummaryV2, which lets callers pass -// a tag as well as a serialized SummaryMetadata proto string that contains -// plugin-specific data. We will keep this op to maintain backwards compatibility. +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// Creates an empty hash table. +// +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a scalar. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. // // Arguments: -// tensor: A tensor to serialize. -func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorSummary", - Input: []tf.Input{ - tensor, - }, + Type: "MutableHashTableV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the gradient for the tanh of `x` wrt its input. +// DequantizeAttr is an optional argument to Dequantize. +type DequantizeAttr func(optionalAttr) + +// DequantizeMode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func DequantizeMode(value string) DequantizeAttr { + return func(m optionalAttr) { + m["mode"] = value + } +} + +// Dequantize the 'input' tensor into a float Tensor. // -// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` -// is the corresponding input gradient. -func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. +// +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: +// +// ``` +// if T == qint8, in[i] += (range(T) + 1)/ 2.0 +// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) +// ``` +// here `range(T) = numeric_limits::max() - numeric_limits::min()` +// +// *MIN_COMBINED Mode Example* +// +// If the input comes from a QuantizedRelu6, the output type is +// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is +// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. +// Dequantize on quint8 will take each value, cast to float, and multiply +// by 6 / 255. +// Note that if quantizedtype is qint8, the operation will additionally add +// each value by 128 prior to casting. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ```c++ +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = range / num_discrete_values +// const double offset_input = static_cast(input) - lowest_quantized; +// result = range_min + ((input - numeric_limits::min()) * range_scale) +// ``` +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, we do not use the full range of the output type, +// choosing to elide the lowest possible value for symmetry (e.g., output range is +// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to +// 0. +// +// We first find the range of values in our tensor. The +// range we use is always centered on 0, so we find m such that +// ```c++ +// m = max(abs(input_min), abs(input_max)) +// ``` +// +// Our input tensor range is then `[-m, m]`. +// +// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. +// If T is signed, this is +// ``` +// num_bits = sizeof(T) * 8 +// [min_fixed, max_fixed] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] +// ``` +// +// Otherwise, if T is unsigned, the fixed-point range is +// ``` +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] +// ``` +// +// From this we compute our scaling factor, s: +// ```c++ +// s = (2 * m) / (max_fixed - min_fixed) +// ``` +// +// Now we can dequantize the elements of our tensor: +// ```c++ +// result = input * s +// ``` +// +// Arguments: +// +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TanhGrad", + Type: "Dequantize", Input: []tf.Input{ - y, dy, + input, min_range, max_range, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs a `Summary` protocol buffer with scalar values. -// -// The input `tags` and `values` must have the same shape. The generated summary -// has a summary value for each tag-value pair in `tags` and `values`. -// -// Arguments: -// tags: Tags for the summary. -// values: Same shape as `tags. Values for the summary. +// Flips all bits elementwise. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { +// The result will have exactly those bits set, that are not set in `x`. The +// computation is performed on the underlying representation of x. +func Invert(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ScalarSummary", + Type: "Invert", Input: []tf.Input{ - tags, values, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs a `Summary` protocol buffer with a histogram. -// -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. -// -// This op reports an `InvalidArgument` error if any value is not finite. -// -// Arguments: -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. +// Deprecated. Disallowed in GraphDef version >= 2. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { +// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead +func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "HistogramSummary", + Type: "AdjustContrast", Input: []tf.Input{ - tag, values, + images, contrast_factor, min_value, max_value, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the number of elements in the given queue. +// Table initializer that takes two tensors for keys and values respectively. // // Arguments: -// handle: The handle to a queue. +// table_handle: Handle to a table which will be initialized. +// keys: Keys of type Tkey. +// values: Values of type Tval. // -// Returns The number of elements in the given queue. -func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { +// Returns the created operation. +func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "QueueSizeV2", + Type: "InitializeTableV2", Input: []tf.Input{ - handle, + table_handle, keys, values, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// ImageSummaryAttr is an optional argument to ImageSummary. -type ImageSummaryAttr func(optionalAttr) +// PrintAttr is an optional argument to Print. +type PrintAttr func(optionalAttr) + +// PrintMessage sets the optional message attribute to value. +// +// value: A string, prefix of the error message. +// If not specified, defaults to "" +func PrintMessage(value string) PrintAttr { + return func(m optionalAttr) { + m["message"] = value + } +} + +// PrintFirstN sets the optional first_n attribute to value. +// +// value: Only log `first_n` number of times. -1 disables logging. +// If not specified, defaults to -1 +func PrintFirstN(value int64) PrintAttr { + return func(m optionalAttr) { + m["first_n"] = value + } +} + +// PrintSummarize sets the optional summarize attribute to value. +// +// value: Only print this many entries of each tensor. +// If not specified, defaults to 3 +func PrintSummarize(value int64) PrintAttr { + return func(m optionalAttr) { + m["summarize"] = value + } +} + +// Prints a list of tensors. +// +// Passes `input` through to `output` and prints `data` when evaluating. +// +// Arguments: +// input: The tensor passed to `output` +// data: A list of tensors to print out when op is evaluated. +// +// Returns = The unmodified `input` tensor +func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Print", + Input: []tf.Input{ + input, tf.OutputList(data), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. +// +// Arguments: +// tag: A string attached to this summary. Used for organization in TensorBoard. +// tensor: A tensor to serialize. +// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin +// data. +func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorSummaryV2", + Input: []tf.Input{ + tag, tensor, serialized_summary_metadata, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that asynchronously prefetches elements from `input_dataset`. +// +// Arguments: +// +// buffer_size: The maximum number of elements to buffer in an iterator over +// this dataset. +// +// +func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "PrefetchDataset", + Input: []tf.Input{ + input_dataset, buffer_size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TensorSummaryAttr is an optional argument to TensorSummary. +type TensorSummaryAttr func(optionalAttr) + +// TensorSummaryDescription sets the optional description attribute to value. +// +// value: A json-encoded SummaryDescription proto. +// If not specified, defaults to "" +func TensorSummaryDescription(value string) TensorSummaryAttr { + return func(m optionalAttr) { + m["description"] = value + } +} + +// TensorSummaryLabels sets the optional labels attribute to value. +// +// value: An unused list of strings. +// If not specified, defaults to <> +func TensorSummaryLabels(value []string) TensorSummaryAttr { + return func(m optionalAttr) { + m["labels"] = value + } +} + +// TensorSummaryDisplayName sets the optional display_name attribute to value. +// +// value: An unused string. +// If not specified, defaults to "" +func TensorSummaryDisplayName(value string) TensorSummaryAttr { + return func(m optionalAttr) { + m["display_name"] = value + } +} + +// Outputs a `Summary` protocol buffer with a tensor. +// +// This op is being phased out in favor of TensorSummaryV2, which lets callers pass +// a tag as well as a serialized SummaryMetadata proto string that contains +// plugin-specific data. We will keep this op to maintain backwards compatibility. +// +// Arguments: +// tensor: A tensor to serialize. +func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorSummary", + Input: []tf.Input{ + tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the gradient for the tanh of `x` wrt its input. +// +// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` +// is the corresponding input gradient. +func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TanhGrad", + Input: []tf.Input{ + y, dy, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with scalar values. +// +// The input `tags` and `values` must have the same shape. The generated summary +// has a summary value for each tag-value pair in `tags` and `values`. +// +// Arguments: +// tags: Tags for the summary. +// values: Same shape as `tags. Values for the summary. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ScalarSummary", + Input: []tf.Input{ + tags, values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with a histogram. +// +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. +// +// This op reports an `InvalidArgument` error if any value is not finite. +// +// Arguments: +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "HistogramSummary", + Input: []tf.Input{ + tag, values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the number of elements in the given queue. +// +// Arguments: +// handle: The handle to a queue. +// +// Returns The number of elements in the given queue. +func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QueueSizeV2", + Input: []tf.Input{ + handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ImageSummaryAttr is an optional argument to ImageSummary. +type ImageSummaryAttr func(optionalAttr) // ImageSummaryMaxImages sets the optional max_images attribute to value. // @@ -17533,31 +17841,7 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values return op.Output(0) } -// Writes a `Summary` protocol buffer with scalar values. -// -// The input `tag` and `value` must have the scalars. -// -// Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tag: Tag for the summary. -// value: Value for the summary. -// -// Returns the created operation. -func WriteScalarSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WriteScalarSummary", - Input: []tf.Input{ - writer, step, tag, value, - }, - } - return scope.AddOperation(opspec) -} - -// Computes the matrix exponential of one or more square matrices: +// Computes the matrix exponential of one or more square matrices: // // exp(A) = \sum_{n=0}^\infty A^n/n! // @@ -18841,29 +19125,6 @@ func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) { return op.Output(0) } -// Returns the set of files matching one or more glob patterns. -// -// Note that this routine only supports wildcard characters in the -// basename portion of the pattern, not in the directory portion. -// -// Arguments: -// pattern: Shell wildcard pattern(s). Scalar or vector of type string. -// -// Returns A vector of matching filenames. -func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatchingFiles", - Input: []tf.Input{ - pattern, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. type ResizeBicubicGradAttr func(optionalAttr) @@ -20681,77 +20942,6 @@ func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf return op.Output(0) } -// Slice a `SparseTensor` based on the `start` and `size`. -// -// For example, if the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] -// [ a ] -// [b c ] -// -// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] -// [ d e ] -// [ ] -// -// Arguments: -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// start: 1-D. tensor represents the start of the slice. -// size: 1-D. tensor represents the size of the slice. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSlice", - Input: []tf.Input{ - indices, values, shape, start, size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Concatenates quantized tensors along one dimension. -// -// Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// input_mins: The minimum scalar values for each of the input tensors. -// input_maxes: The maximum scalar values for each of the input tensors. -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. -func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QuantizedConcat", - Input: []tf.Input{ - concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Gradients for batch normalization. // // DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() @@ -22596,28 +22786,6 @@ func Abs(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Flushes and closes the summary writer. -// -// Also removes it from the resource manager. To reopen, use another -// CreateSummaryFileWriter op. -// -// Arguments: -// writer: A handle to the summary writer resource. -// -// Returns the created operation. -func CloseSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CloseSummaryWriter", - Input: []tf.Input{ - writer, - }, - } - return scope.AddOperation(opspec) -} - // StackV2Attr is an optional argument to StackV2. type StackV2Attr func(optionalAttr) @@ -24972,101 +25140,6 @@ func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// Scatter `updates` into a new (initially zero) tensor according to `indices`. -// -// Creates a new tensor by applying sparse `updates` to individual -// values or slices within a zero tensor of the given `shape` according to -// indices. This operator is the inverse of the @{tf.gather_nd} operator which -// extracts values or slices from a given tensor. -// -// **WARNING**: The order in which updates are applied is nondeterministic, so the -// output will be nondeterministic if `indices` contains duplicates. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of scatter is to insert individual elements in a tensor by -// index. For example, say we want to insert 4 scattered elements in a rank-1 -// tensor with 8 elements. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// shape = tf.constant([8]) -// scatter = tf.scatter_nd(indices, updates, shape) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [0, 11, 0, 10, 9, 0, 0, 12] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// shape = tf.constant([4, 4, 4]) -// scatter = tf.scatter_nd(indices, updates, shape) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], -// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] -// -// Arguments: -// indices: Index tensor. -// updates: Updates to scatter into output. -// shape: 1-D. The shape of the resulting tensor. -// -// Returns A new tensor with the given shape and updates applied according -// to the indices. -func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScatterNd", - Input: []tf.Input{ - indices, updates, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SpaceToDepthAttr is an optional argument to SpaceToDepth. type SpaceToDepthAttr func(optionalAttr) @@ -25917,31 +25990,6 @@ func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf return op.Output(0) } -// Concatenates tensors along one dimension. -// -// Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Concat", - Input: []tf.Input{ - concat_dim, tf.OutputList(values), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Compute the lower regularized incomplete Gamma function `Q(a, x)`. // // The lower regularized incomplete Gamma function is defined as: @@ -26131,129 +26179,122 @@ func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Flips all bits elementwise. +// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm. +type QuantizedInstanceNormAttr func(optionalAttr) + +// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value. // -// The result will have exactly those bits set, that are not set in `x`. The -// computation is performed on the underlying representation of x. -func Invert(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Invert", - Input: []tf.Input{ - x, - }, +// value: If True, `given_y_min` and `given_y_min` +// and `given_y_max` are used as the output range. Otherwise, +// the implementation computes the output range. +// If not specified, defaults to false +func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr { + return func(m optionalAttr) { + m["output_range_given"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// DequantizeAttr is an optional argument to Dequantize. -type DequantizeAttr func(optionalAttr) - -// DequantizeMode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func DequantizeMode(value string) DequantizeAttr { +// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value. +// +// value: Output in `y_min` if `output_range_given` is True. +// If not specified, defaults to 0 +func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr { return func(m optionalAttr) { - m["mode"] = value + m["given_y_min"] = value } } -// Dequantize the 'input' tensor into a float Tensor. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// if T == qint8, in[i] += (range(T) + 1)/ 2.0 -// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) -// ``` -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// If the input comes from a QuantizedRelu6, the output type is -// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is -// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. -// Dequantize on quint8 will take each value, cast to float, and multiply -// by 6 / 255. -// Note that if quantizedtype is qint8, the operation will additionally add -// each value by 128 prior to casting. +// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value. // -// If the mode is 'MIN_FIRST', then this approach is used: +// value: Output in `y_max` if `output_range_given` is True. +// If not specified, defaults to 0 +func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr { + return func(m optionalAttr) { + m["given_y_max"] = value + } +} + +// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value. // -// ```c++ -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = range / num_discrete_values -// const double offset_input = static_cast(input) - lowest_quantized; -// result = range_min + ((input - numeric_limits::min()) * range_scale) -// ``` +// value: A small float number to avoid dividing by 0. +// If not specified, defaults to 1e-05 +func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr { + return func(m optionalAttr) { + m["variance_epsilon"] = value + } +} + +// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value. // -// *SCALED mode Example* +// value: Minimum value of `y_max - y_min` +// If not specified, defaults to 0.001 +func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr { + return func(m optionalAttr) { + m["min_separation"] = value + } +} + +// Quantized Instance normalization. // -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. +// Arguments: +// x: A 4D input Tensor. +// x_min: The value represented by the lowest quantized input. +// x_max: The value represented by the highest quantized input. // -// If the mode is `SCALED`, we do not use the full range of the output type, -// choosing to elide the lowest possible value for symmetry (e.g., output range is -// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to -// 0. +// Returns A 4D Tensor.The value represented by the lowest quantized output.The value represented by the highest quantized output. +func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedInstanceNorm", + Input: []tf.Input{ + x, x_min, x_max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the diagonal part of the tensor. // -// We first find the range of values in our tensor. The -// range we use is always centered on 0, so we find m such that -// ```c++ -// m = max(abs(input_min), abs(input_max)) -// ``` +// This operation returns a tensor with the `diagonal` part +// of the `input`. The `diagonal` part is computed as follows: // -// Our input tensor range is then `[-m, m]`. +// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a +// tensor of rank `k` with dimensions `[D1,..., Dk]` where: // -// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. -// If T is signed, this is -// ``` -// num_bits = sizeof(T) * 8 -// [min_fixed, max_fixed] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] -// ``` +// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`. // -// Otherwise, if T is unsigned, the fixed-point range is -// ``` -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] -// ``` +// For example: // -// From this we compute our scaling factor, s: -// ```c++ -// s = (2 * m) / (max_fixed - min_fixed) // ``` +// # 'input' is [[1, 0, 0, 0] +// [0, 2, 0, 0] +// [0, 0, 3, 0] +// [0, 0, 0, 4]] // -// Now we can dequantize the elements of our tensor: -// ```c++ -// result = input * s +// tf.diag_part(input) ==> [1, 2, 3, 4] // ``` // // Arguments: +// input: Rank k tensor where k is even and not zero. // -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { +// Returns The extracted diagonal. +func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Dequantize", + Type: "DiagPart", Input: []tf.Input{ - input, min_range, max_range, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -26876,39 +26917,53 @@ func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Out return op.Output(0) } -// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. -type DataFormatDimMapAttr func(optionalAttr) +// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. +type QuantizeAndDequantizeAttr func(optionalAttr) -// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. -// -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { +// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { return func(m optionalAttr) { - m["src_format"] = value + m["signed_input"] = value } } -// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. -// -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { +// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { return func(m optionalAttr) { - m["dst_format"] = value + m["num_bits"] = value } } -// Returns the dimension index in the destination data format given the one in -// -// the source data format. -// -// Arguments: -// x: A Tensor with each element as a dimension index in source data format. -// Must be in the range [-4, 4). +// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to false +func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["range_given"] = value + } +} + +// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. +// If not specified, defaults to 0 +func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["input_min"] = value + } +} + +// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. +// If not specified, defaults to 0 +func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["input_max"] = value + } +} + +// Use QuantizeAndDequantizeV2 instead. // -// Returns A Tensor with each element as a dimension index in destination data format. -func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { +// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 +func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -26917,9 +26972,9 @@ func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAtt a(attrs) } opspec := tf.OpSpec{ - Type: "DataFormatDimMap", + Type: "QuantizeAndDequantize", Input: []tf.Input{ - x, + input, }, Attrs: attrs, } @@ -26927,29 +26982,212 @@ func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAtt return op.Output(0) } -// Return the shape of s0 op s1 with broadcast. -// -// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the -// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. -func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BroadcastArgs", - Input: []tf.Input{ - s0, s1, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} +// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. +type QueueDequeueV2Attr func(optionalAttr) -// Return the reduction indices for computing gradients of s0 op s1 with broadcast. +// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. // -// This is typically used by gradient computations for a broadcasting operation. -func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) { - if scope.Err() != nil { +// value: If the queue is empty, this operation will block for up to +// timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { + return func(m optionalAttr) { + m["timeout_ms"] = value + } +} + +// Dequeues a tuple of one or more tensors from the given queue. +// +// This operation has k outputs, where k is the number of components +// in the tuples stored in the given queue, and output i is the ith +// component of the dequeued tuple. +// +// N.B. If the queue is empty, this operation will block until an element +// has been dequeued (or 'timeout_ms' elapses, if specified). +// +// Arguments: +// handle: The handle to a queue. +// component_types: The type of each component in a tuple. +// +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QueueDequeueV2", + Input: []tf.Input{ + handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueV2", err) + return + } + return components +} + +// Returns locations of nonzero / true values in a tensor. +// +// This operation returns the coordinates of true elements in `condition`. The +// coordinates are returned in a 2-D tensor where the first dimension (rows) +// represents the number of true elements, and the second dimension (columns) +// represents the coordinates of the true elements. Keep in mind, the shape of +// the output tensor can vary depending on how many true values there are in +// `condition`. Indices are output in row-major order. +// +// For example: +// +// ``` +// # 'input' tensor is [[True, False] +// # [True, False]] +// # 'input' has two true values, so output has two coordinates. +// # 'input' has rank of 2, so coordinates have two indices. +// where(input) ==> [[0, 0], +// [1, 0]] +// +// # `condition` tensor is [[[True, False] +// # [True, False]] +// # [[False, True] +// # [False, True]] +// # [[False, False] +// # [False, True]]] +// # 'input' has 5 true values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// +// # `condition` tensor is [[[1.5, 0.0] +// # [-0.5, 0.0]] +// # [[0.0, 0.25] +// # [0.0, 0.75]] +// # [[0.0, 0.0] +// # [0.0, 0.01]]] +// # 'input' has 5 nonzero values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// +// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.5j, 0.0 + 0.0j]] +// # [[0.0 + 0.0j, 0.25 + 1.5j] +// # [0.0 + 0.0j, 0.75 + 0.0j]] +// # [[0.0 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.0j, 0.01 + 0.0j]]] +// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// ``` +func Where(scope *Scope, condition tf.Output) (index tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Where", + Input: []tf.Input{ + condition, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. +type DataFormatDimMapAttr func(optionalAttr) + +// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. +// +// value: source data format. +// If not specified, defaults to "NHWC" +func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { + return func(m optionalAttr) { + m["src_format"] = value + } +} + +// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. +// +// value: destination data format. +// If not specified, defaults to "NCHW" +func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { + return func(m optionalAttr) { + m["dst_format"] = value + } +} + +// Returns the dimension index in the destination data format given the one in +// +// the source data format. +// +// Arguments: +// x: A Tensor with each element as a dimension index in source data format. +// Must be in the range [-4, 4). +// +// Returns A Tensor with each element as a dimension index in destination data format. +func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DataFormatDimMap", + Input: []tf.Input{ + x, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Return the shape of s0 op s1 with broadcast. +// +// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the +// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. +func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BroadcastArgs", + Input: []tf.Input{ + s0, s1, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Return the reduction indices for computing gradients of s0 op s1 with broadcast. +// +// This is typically used by gradient computations for a broadcasting operation. +func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) { + if scope.Err() != nil { return } opspec := tf.OpSpec{ @@ -27293,134 +27531,24 @@ func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddin return op.Output(0) } -// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2. -type QuantizeAndDequantizeV2Attr func(optionalAttr) - -// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value. +// SpaceToBatch for 4-D tensors of type T. // -// value: If the quantization is signed or unsigned. -// If not specified, defaults to true -func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value. +// This is a legacy version of the more general SpaceToBatchND. // -// value: The bitwidth of the quantization. -// If not specified, defaults to 8 -func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value. +// Zero-pads and then rearranges (permutes) blocks of spatial data into batch. +// More specifically, this op outputs a copy of the input tensor where values from +// the `height` and `width` dimensions are moved to the `batch` dimension. After +// the zero-padding, both `height` and `width` of the input must be divisible by the +// block size. // -// value: If the range is given or should be computed from the tensor. -// If not specified, defaults to false -func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// Quantizes then dequantizes a tensor. +// Arguments: +// input: 4-D with shape `[batch, height, width, depth]`. +// paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies +// the padding of the input with zeros across the spatial dimensions as follows: // -// This op simulates the precision loss from the quantized forward pass by: -// 1. Quantizing the tensor to fixed point numbers, which should match the target -// quantization method when it is used in inference. -// 2. Dequantizing it back to floating point numbers for the following ops, most -// likely matmul. +// paddings = [[pad_top, pad_bottom], [pad_left, pad_right]] // -// There are different ways to quantize. This version does not use the full range -// of the output type, choosing to elide the lowest possible value for symmetry -// (e.g., output range is -127 to 127, not -128 to 127 for signed 8 bit -// quantization), so that 0.0 maps to 0. -// -// To perform this op, we first find the range of values in our tensor. The range -// we use is always centered on 0, so we find m such that -// -// 1. m = max(abs(input_min), abs(input_max)) if range_given is true, -// 2. m = max(abs(min_elem(input)), abs(max_elem(input))) otherwise. -// -// Our input tensor range is then [-m, m]. -// -// Next, we choose our fixed-point quantization buckets, [min_fixed, max_fixed]. -// If signed_input is true, this is -// -// [min_fixed, max_fixed ] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]. -// -// Otherwise, if signed_input is false, the fixed-point range is -// -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]. -// -// From this we compute our scaling factor, s: -// -// s = (max_fixed - min_fixed) / (2 * m). -// -// Now we can quantize and dequantize the elements of our tensor. An element e -// is transformed into e': -// -// e' = (e * s).round_to_nearest() / s. -// -// Note that we have a different number of buckets in the signed vs. unsigned -// cases. For example, if num_bits == 8, we get 254 buckets in the signed case -// vs. 255 in the unsigned case. -// -// For example, suppose num_bits = 8 and m = 1. Then -// -// [min_fixed, max_fixed] = [-127, 127], and -// s = (127 + 127) / 2 = 127. -// -// Given the vector {-1, -0.5, 0, 0.3}, this is quantized to -// {-127, -63, 0, 38}, and dequantized to {-1, -63.0/127, 0, 38.0/127}. -// -// Arguments: -// input: Tensor to quantize and then dequantize. -// input_min: If range_given, this is the min of the range, otherwise this input -// will be ignored. -// input_max: If range_given, this is the max of the range, otherwise this input -// will be ignored. -func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV2", - Input: []tf.Input{ - input, input_min, input_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SpaceToBatch for 4-D tensors of type T. -// -// This is a legacy version of the more general SpaceToBatchND. -// -// Zero-pads and then rearranges (permutes) blocks of spatial data into batch. -// More specifically, this op outputs a copy of the input tensor where values from -// the `height` and `width` dimensions are moved to the `batch` dimension. After -// the zero-padding, both `height` and `width` of the input must be divisible by the -// block size. -// -// Arguments: -// input: 4-D with shape `[batch, height, width, depth]`. -// paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies -// the padding of the input with zeros across the spatial dimensions as follows: -// -// paddings = [[pad_top, pad_bottom], [pad_left, pad_right]] -// -// The effective spatial dimensions of the zero-padded input tensor will be: +// The effective spatial dimensions of the zero-padded input tensor will be: // // height_pad = pad_top + height + pad_bottom // width_pad = pad_left + width + pad_right @@ -27549,734 +27677,213 @@ func UnpackAxis(value int64) UnpackAttr { // Arguments: // value: 1-D or higher, with `axis` dimension size equal to `num`. // -// -// Returns The list of tensors unpacked from `value`. -func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num": num} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Unpack", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("Unpack", err) - return - } - return output -} - -// Increments variable pointed to by 'resource' until it reaches 'limit'. -// -// Arguments: -// resource: Should be from a scalar `Variable` node. -// limit: If incrementing ref would bring it above limit, instead generates an -// 'OutOfRange' error. -// -// -// Returns A copy of the input before increment. If nothing else modifies the -// input, the values produced will all be distinct. -func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"limit": limit, "T": T} - opspec := tf.OpSpec{ - Type: "ResourceCountUpTo", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Delete the stack from its resource container. -// -// Arguments: -// handle: The handle to a stack. -// -// Returns the created operation. -func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StackCloseV2", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - -// BatchToSpace for N-D tensors of type T. -// -// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape -// `block_shape + [batch]`, interleaves these blocks back into the grid defined by -// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as -// the input. The spatial dimensions of this intermediate result are then -// optionally cropped according to `crops` to produce the output. This is the -// reverse of SpaceToBatch. See below for a precise description. -// -// Arguments: -// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, -// where spatial_shape has M dimensions. -// block_shape: 1-D with shape `[M]`, all values must be >= 1. -// crops: 2-D with shape `[M, 2]`, all values must be >= 0. -// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input -// dimension `i + 1`, which corresponds to spatial dimension `i`. It is -// required that -// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. -// -// This operation is equivalent to the following steps: -// -// 1. Reshape `input` to `reshaped` of shape: -// [block_shape[0], ..., block_shape[M-1], -// batch / prod(block_shape), -// input_shape[1], ..., input_shape[N-1]] -// -// 2. Permute dimensions of `reshaped` to produce `permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1], block_shape[0], -// ..., -// input_shape[M], block_shape[M-1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// 3. Reshape `permuted` to produce `reshaped_permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0], -// ..., -// input_shape[M] * block_shape[M-1], -// -// input_shape[M+1], -// ..., -// input_shape[N-1]] -// -// 4. Crop the start and end of dimensions `[1, ..., M]` of -// `reshaped_permuted` according to `crops` to produce the output of shape: -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], -// ..., -// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 3]` and value: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// The output tensor has shape `[1, 4, 4, 1]` and value: -// -// ``` -// x = [[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]] -// ``` -// -// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [2, 0]]`: -// -// ``` -// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], -// [[[0], [2], [4]]], [[[0], [10], [12]]], -// [[[0], [5], [7]]], [[[0], [13], [15]]], -// [[[0], [6], [8]]], [[[0], [14], [16]]]] -// ``` -// -// The output tensor has shape `[2, 2, 4, 1]` and value: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BatchToSpaceND", - Input: []tf.Input{ - input, block_shape, crops, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Extract `patches` from `images` and put them in the "depth" output dimension. -// -// Arguments: -// images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`. -// ksizes: The size of the sliding window for each dimension of `images`. -// strides: 1-D of length 4. How far the centers of two consecutive patches are in -// the images. Must be: `[1, stride_rows, stride_cols, 1]`. -// rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the -// input stride, specifying how far two consecutive patch samples are in the -// input. Equivalent to extracting patches with -// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by -// subsampling them spatially by a factor of `rates`. This is equivalent to -// `rate` in dilated (a.k.a. Atrous) convolutions. -// padding: The type of padding algorithm to use. -// -// We specify the size-related attributes as: -// -// ```python -// ksizes = [1, ksize_rows, ksize_cols, 1] -// strides = [1, strides_rows, strides_cols, 1] -// rates = [1, rates_rows, rates_cols, 1] -// ``` -// -// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows * -// ksize_cols * depth]` containing image patches with size -// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note -// `out_rows` and `out_cols` are the dimensions of the output patches. -func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "ExtractImagePatches", - Input: []tf.Input{ - images, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Bitcasts a tensor from one type to another without copying data. -// -// Given a tensor `input`, this operation returns a tensor that has the same buffer -// data as `input` with datatype `type`. -// -// If the input datatype `T` is larger than the output datatype `type` then the -// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)]. -// -// If `T` is smaller than `type`, the operator requires that the rightmost -// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from -// [..., sizeof(`type`)/sizeof(`T`)] to [...]. -// -// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different -// endian orderings will give different results. -func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "Bitcast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OneHotAttr is an optional argument to OneHot. -type OneHotAttr func(optionalAttr) - -// OneHotAxis sets the optional axis attribute to value. -// -// value: The axis to fill (default: -1, a new inner-most axis). -// If not specified, defaults to -1 -func OneHotAxis(value int64) OneHotAttr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Returns a one-hot tensor. -// -// The locations represented by indices in `indices` take value `on_value`, -// while all other locations take value `off_value`. -// -// If the input `indices` is rank `N`, the output will have rank `N+1`, -// The new axis is created at dimension `axis` (default: the new axis is -// appended at the end). -// -// If `indices` is a scalar the output shape will be a vector of length `depth`. -// -// If `indices` is a vector of length `features`, the output shape will be: -// ``` -// features x depth if axis == -1 -// depth x features if axis == 0 -// ``` -// -// If `indices` is a matrix (batch) with shape `[batch, features]`, -// the output shape will be: -// ``` -// batch x features x depth if axis == -1 -// batch x depth x features if axis == 1 -// depth x batch x features if axis == 0 -// ``` -// -// -// Examples -// ========= -// -// Suppose that -// -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 5.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[4 x 3]`: -// -// ```output = -// [5.0 0.0 0.0] // one_hot(0) -// [0.0 0.0 5.0] // one_hot(2) -// [0.0 0.0 0.0] // one_hot(-1) -// [0.0 5.0 0.0] // one_hot(1) -// ``` -// -// Suppose that -// -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 0.0 -// off_value = 3.0 -// axis = 0 -// ``` -// -// Then output is `[3 x 4]`: -// -// ```output = -// [0.0 3.0 3.0 3.0] -// [3.0 3.0 3.0 0.0] -// [3.0 3.0 3.0 3.0] -// [3.0 0.0 3.0 3.0] -// // ^ one_hot(0) -// // ^ one_hot(2) -// // ^ one_hot(-1) -// // ^ one_hot(1) -// ``` -// Suppose that -// -// ``` -// indices = [[0, 2], [1, -1]] -// depth = 3 -// on_value = 1.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[2 x 2 x 3]`: -// -// ```output = -// [ -// [1.0, 0.0, 0.0] // one_hot(0) -// [0.0, 0.0, 1.0] // one_hot(2) -// ][ -// [0.0, 1.0, 0.0] // one_hot(1) -// [0.0, 0.0, 0.0] // one_hot(-1) -// ]``` -// -// Arguments: -// indices: A tensor of indices. -// depth: A scalar defining the depth of the one hot dimension. -// on_value: A scalar defining the value to fill in output when `indices[j] = i`. -// off_value: A scalar defining the value to fill in output when `indices[j] != i`. -// -// Returns The one-hot tensor. -func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OneHot", - Input: []tf.Input{ - indices, depth, on_value, off_value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. -type QueueDequeueV2Attr func(optionalAttr) - -// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is empty, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Dequeues a tuple of one or more tensors from the given queue. -// -// This operation has k outputs, where k is the number of components -// in the tuples stored in the given queue, and output i is the ith -// component of the dequeued tuple. -// -// N.B. If the queue is empty, this operation will block until an element -// has been dequeued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// component_types: The type of each component in a tuple. -// -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueDequeueV2", - Input: []tf.Input{ - handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueV2", err) - return - } - return components -} - -// Returns locations of nonzero / true values in a tensor. -// -// This operation returns the coordinates of true elements in `condition`. The -// coordinates are returned in a 2-D tensor where the first dimension (rows) -// represents the number of true elements, and the second dimension (columns) -// represents the coordinates of the true elements. Keep in mind, the shape of -// the output tensor can vary depending on how many true values there are in -// `condition`. Indices are output in row-major order. -// -// For example: -// -// ``` -// # 'input' tensor is [[True, False] -// # [True, False]] -// # 'input' has two true values, so output has two coordinates. -// # 'input' has rank of 2, so coordinates have two indices. -// where(input) ==> [[0, 0], -// [1, 0]] -// -// # `condition` tensor is [[[True, False] -// # [True, False]] -// # [[False, True] -// # [False, True]] -// # [[False, False] -// # [False, True]]] -// # 'input' has 5 true values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// -// # `condition` tensor is [[[1.5, 0.0] -// # [-0.5, 0.0]] -// # [[0.0, 0.25] -// # [0.0, 0.75]] -// # [[0.0, 0.0] -// # [0.0, 0.01]]] -// # 'input' has 5 nonzero values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// -// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.5j, 0.0 + 0.0j]] -// # [[0.0 + 0.0j, 0.25 + 1.5j] -// # [0.0 + 0.0j, 0.75 + 0.0j]] -// # [[0.0 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.0j, 0.01 + 0.0j]]] -// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// ``` -func Where(scope *Scope, condition tf.Output) (index tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Where", - Input: []tf.Input{ - condition, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. -type QuantizeAndDequantizeAttr func(optionalAttr) - -// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to false -func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["input_min"] = value - } -} - -// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["input_max"] = value - } -} - -// Use QuantizeAndDequantizeV2 instead. -// -// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 -func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { +// +// Returns The list of tensors unpacked from `value`. +func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num": num} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizeAndDequantize", + Type: "Unpack", Input: []tf.Input{ - input, + value, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("Unpack", err) + return + } + return output } -// Returns the diagonal part of the tensor. -// -// This operation returns a tensor with the `diagonal` part -// of the `input`. The `diagonal` part is computed as follows: -// -// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a -// tensor of rank `k` with dimensions `[D1,..., Dk]` where: -// -// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`. -// -// For example: -// -// ``` -// # 'input' is [[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]] -// -// tf.diag_part(input) ==> [1, 2, 3, 4] -// ``` +// Increments variable pointed to by 'resource' until it reaches 'limit'. // // Arguments: -// input: Rank k tensor where k is even and not zero. +// resource: Should be from a scalar `Variable` node. +// limit: If incrementing ref would bring it above limit, instead generates an +// 'OutOfRange' error. // -// Returns The extracted diagonal. -func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { +// +// Returns A copy of the input before increment. If nothing else modifies the +// input, the values produced will all be distinct. +func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"limit": limit, "T": T} opspec := tf.OpSpec{ - Type: "DiagPart", + Type: "ResourceCountUpTo", Input: []tf.Input{ - input, + resource, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm. -type QuantizedInstanceNormAttr func(optionalAttr) - -// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value. -// -// value: If True, `given_y_min` and `given_y_min` -// and `given_y_max` are used as the output range. Otherwise, -// the implementation computes the output range. -// If not specified, defaults to false -func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["output_range_given"] = value - } -} - -// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value. +// Delete the stack from its resource container. // -// value: Output in `y_min` if `output_range_given` is True. -// If not specified, defaults to 0 -func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["given_y_min"] = value - } -} - -// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value. +// Arguments: +// handle: The handle to a stack. // -// value: Output in `y_max` if `output_range_given` is True. -// If not specified, defaults to 0 -func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["given_y_max"] = value +// Returns the created operation. +func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return } -} - -// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value. -// -// value: A small float number to avoid dividing by 0. -// If not specified, defaults to 1e-05 -func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["variance_epsilon"] = value + opspec := tf.OpSpec{ + Type: "StackCloseV2", + Input: []tf.Input{ + handle, + }, } + return scope.AddOperation(opspec) } -// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value. +// BatchToSpace for N-D tensors of type T. // -// value: Minimum value of `y_max - y_min` -// If not specified, defaults to 0.001 -func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["min_separation"] = value - } -} - -// Quantized Instance normalization. +// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape +// `block_shape + [batch]`, interleaves these blocks back into the grid defined by +// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as +// the input. The spatial dimensions of this intermediate result are then +// optionally cropped according to `crops` to produce the output. This is the +// reverse of SpaceToBatch. See below for a precise description. // // Arguments: -// x: A 4D input Tensor. -// x_min: The value represented by the lowest quantized input. -// x_max: The value represented by the highest quantized input. +// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, +// where spatial_shape has M dimensions. +// block_shape: 1-D with shape `[M]`, all values must be >= 1. +// crops: 2-D with shape `[M, 2]`, all values must be >= 0. +// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input +// dimension `i + 1`, which corresponds to spatial dimension `i`. It is +// required that +// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. // -// Returns A 4D Tensor.The value represented by the lowest quantized output.The value represented by the highest quantized output. -func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) { +// This operation is equivalent to the following steps: +// +// 1. Reshape `input` to `reshaped` of shape: +// [block_shape[0], ..., block_shape[M-1], +// batch / prod(block_shape), +// input_shape[1], ..., input_shape[N-1]] +// +// 2. Permute dimensions of `reshaped` to produce `permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1], block_shape[0], +// ..., +// input_shape[M], block_shape[M-1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// 3. Reshape `permuted` to produce `reshaped_permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0], +// ..., +// input_shape[M] * block_shape[M-1], +// +// input_shape[M+1], +// ..., +// input_shape[N-1]] +// +// 4. Crop the start and end of dimensions `[1, ..., M]` of +// `reshaped_permuted` according to `crops` to produce the output of shape: +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], +// ..., +// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// Some examples: +// +// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 1]` and value: +// +// ``` +// x = [[[[1], [2]], [[3], [4]]]] +// ``` +// +// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 3]` and value: +// +// ``` +// x = [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// ``` +// +// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// x = [[[[1], [3]], [[9], [11]]], +// [[[2], [4]], [[10], [12]]], +// [[[5], [7]], [[13], [15]]], +// [[[6], [8]], [[14], [16]]]] +// ``` +// +// The output tensor has shape `[1, 4, 4, 1]` and value: +// +// ``` +// x = [[[1], [2], [3], [4]], +// [[5], [6], [7], [8]], +// [[9], [10], [11], [12]], +// [[13], [14], [15], [16]]] +// ``` +// +// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [2, 0]]`: +// +// ``` +// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], +// [[[0], [2], [4]]], [[[0], [10], [12]]], +// [[[0], [5], [7]]], [[[0], [13], [15]]], +// [[[0], [6], [8]]], [[[0], [14], [16]]]] +// ``` +// +// The output tensor has shape `[2, 2, 4, 1]` and value: +// +// ``` +// x = [[[[1], [2], [3], [4]], +// [[5], [6], [7], [8]]], +// [[[9], [10], [11], [12]], +// [[13], [14], [15], [16]]]] +// ``` +func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizedInstanceNorm", + Type: "BatchToSpaceND", Input: []tf.Input{ - x, x_min, x_max, + input, block_shape, crops, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -- GitLab From 1a8258e0593270a8e2370517dff8faafce40a687 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 19 Mar 2018 22:29:33 -0700 Subject: [PATCH 228/960] Added infeed support for experimental C APIs associated with TPU graph rewrite. This initial design of the C API is different from (and mostly higher level than) the python API counterparts for infeed, in that the python API has explicit graph construction APIs for generating infeed enqueue/dequeue ops (e.g. split_inputs_and_generate_enqueue_ops() and generate_dequeue_op()), while the C API takes an input graph and redirects all input nodes to feed the infeed enqueue. One requirement/restriction is that the input nodes in the TF graph (e.g. Placeholder) must specify their tensor shapes, for infeed enqueue and dequeue nodes to properly compile with XLA. The API for more general shape support will be designed and implemented later. PiperOrigin-RevId: 189693028 --- tensorflow/c/c_api_experimental.cc | 204 +++++++++++++++++++++-------- tensorflow/c/c_api_experimental.h | 13 +- tensorflow/c/c_test_util.cc | 10 +- tensorflow/c/c_test_util.h | 3 +- 4 files changed, 172 insertions(+), 58 deletions(-) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index f6d8949bb0..eb17e16d3e 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -26,6 +26,7 @@ using tensorflow::Node; using tensorflow::NodeBuilder; using tensorflow::NodeDef; using tensorflow::Status; +using tensorflow::string; namespace { @@ -38,12 +39,28 @@ TF_Operation* ToTF_Operation(Node* node) { // Graph rewrite algorithm (modeled after the python TPU graph rewrite path): // -// 1. For each input node I, feed it to a new TPUReplicatedInput node, which in -// turn feeds a new Identity node N, and store the mapping I->N. +// 1. For each input node I, with C being the consumer node of I's output: // -// 2. Rewrite all existing graph nodes by adding a attribute on TPU cluster. For -// each node reading some input node I, rewire it to read from N instead based -// on the I->N mapping in step #1. +// a) When infeed is not specified, feed I to a new TPUReplicatedInput node +// (both running on CPU), which in turn feeds a new Identity node N, and N feeds +// C (both running on TPU). +// +// b) Otherwise, feed I to a new InfeedEnqueueTuple node IE, both running on +// CPU. Also set an InfeedDequeueTuple node ID to feed C, both running on +// TPU. +// +// In case b), if we have multiple input nodes, they all feed into the same +// InfeedEnqueueTuple node, so that the graph has a single pair of infeed +// enqueue and dequeue nodes. The list of output tensors from the dequeue node +// can go to different consumer nodes. For example, say the original graph has +// input nodes I1 and I2 respectively feeding nodes C1 and C2. After the rewrite +// with infeed ops, we will have: I1 and I2 feed a single infeed enqueue node +// IE, and a corresponding infeed dequeue node ID produces a list of two +// tensors, respectively feeding C1 and C2. +// +// 2. Rewrite all existing graph nodes by adding an attribute on TPU +// cluster. For each node C reading some input node I, rewire it to read from a +// new input node generated in step #1 above. // // 3. For each output node O, feed it to a new Identity node, which in turn // feeds a new TPUReplicatedOutput node, which in turn feeds a new Identity node @@ -66,7 +83,8 @@ class GraphRewriter { for (int i = 0; i < num_input_nodes; ++i) { // Will fill in the value part later when we create the associated new // input node. - input_node_map_[input_nodes[i].oper->node.name()] = nullptr; + input_node_map_[input_nodes[i].oper->node.name()] = + NodeBuilder::NodeOut(nullptr, -1); } // Grab all existing nodes for the upcoming rewrite, before mutating the @@ -84,19 +102,24 @@ class GraphRewriter { // On success, sets `config_op` and `shutdown_op` to the corresponding // "ConfigureDistributedTPU" and "ShutdownDistributedTPU" nodes added to the // graph. - tensorflow::Status Rewrite(TF_Output* new_output_nodes, TF_Output* config_op, - TF_Output* shutdown_op) + tensorflow::Status Rewrite(TF_Output* new_output_nodes, + TF_Operation** infeed_enqueue_node, + TF_Output* config_op, TF_Output* shutdown_op) EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { - TF_RETURN_IF_ERROR(ProcessInputNodes()); + TF_RETURN_IF_ERROR(ProcessInputNodes(infeed_enqueue_node)); return RewriteGraphAndAddOutputNodes(new_output_nodes, config_op, shutdown_op); } private: - // Synthensizes new nodes for the input nodes, and creates a replicated - // metadata node. - tensorflow::Status ProcessInputNodes() EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { + // Synthesizes new graph nodes (infeed enqueue or TPU replicated input + // nodes) for the input nodes, and creates a replicated metadata node. + // + // When `infeed_enqueue_node` is non-NULL and there are some input nodes, + // also adds the infeed dequeue node. + tensorflow::Status ProcessInputNodes(TF_Operation** infeed_enqueue_node) + EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { Node* metadata_node; TF_RETURN_IF_ERROR( NodeBuilder(metadata_node_name_.c_str(), "TPUReplicateMetadata") @@ -104,34 +127,85 @@ class GraphRewriter { .Attr("_tpu_replicate", cluster_name_.c_str()) .Finalize(&graph_->graph, &metadata_node)); - for (int i = 0; i < input_node_map_.size(); ++i) { - VLOG(1) << "Handling input node " << input_nodes_[i].oper->node.name(); - Node* replicated_input_node; - { - std::string replicated_input_name("TPUReplicate/input" + - std::to_string(i)); - NodeBuilder::NodeOut input(&input_nodes_[i].oper->node, - input_nodes_[i].index); - std::vector input_list; - input_list.push_back(input); + Node* dequeue_node = nullptr; + // Be deterministic in the corner case where `use_infeed` below is false. + if (infeed_enqueue_node) *infeed_enqueue_node = nullptr; + const bool use_infeed = + infeed_enqueue_node != nullptr && !input_node_map_.empty(); + if (use_infeed) { + std::vector new_input_list; + new_input_list.reserve(input_node_map_.size()); + std::vector input_dtypes; + input_dtypes.reserve(input_node_map_.size()); + std::vector input_shapes; + input_shapes.reserve(input_node_map_.size()); + for (int i = 0; i < input_node_map_.size(); ++i) { + Node& input_node = input_nodes_[i].oper->node; + new_input_list.push_back( + NodeBuilder::NodeOut(&input_node, input_nodes_[i].index)); + input_dtypes.push_back(input_node.output_type(input_nodes_[i].index)); + tensorflow::TensorShapeProto shape; TF_RETURN_IF_ERROR( - NodeBuilder(replicated_input_name.c_str(), "TPUReplicatedInput") - // This op requires an input list. - .Input(input_list) - .Finalize(&graph_->graph, &replicated_input_node)); + tensorflow::GetNodeAttr(input_node.attrs(), "shape", &shape)); + VLOG(1) << "Input node " << i << " has shape " << shape.DebugString(); + input_shapes.push_back(shape); } + // Enqueue always runs on CPU. + Node* enqueue_node; + TF_RETURN_IF_ERROR(NodeBuilder("InfeedEnqueueTuple", "InfeedEnqueueTuple") + .Input(new_input_list) + .Device("/device:CPU:0") + .Attr("device_ordinal", 0) + .Attr("dtypes", input_dtypes) + .Attr("shapes", input_shapes) + .Finalize(&graph_->graph, &enqueue_node)); + *infeed_enqueue_node = ToTF_Operation(enqueue_node); + // The dequeue node should be put onto the "_tpu_replicate" cluster. + TF_RETURN_IF_ERROR( + NodeBuilder("TPUReplicate/InfeedDequeueTuple", "InfeedDequeueTuple") + .ControlInput(metadata_node) + .Attr("_tpu_replicate", cluster_name_.c_str()) + .Attr("dtypes", input_dtypes) + .Attr("shapes", input_shapes) + .Finalize(&graph_->graph, &dequeue_node)); + } - { - Node* new_input_node; - const std::string new_input_name("TPUReplicate/replicated_input_" + - std::to_string(i)); - TF_RETURN_IF_ERROR(NodeBuilder(new_input_name.c_str(), "Identity") - .Input(replicated_input_node, 0) - .ControlInput(metadata_node) - .Attr("_tpu_replicate", cluster_name_.c_str()) - .Finalize(&graph_->graph, &new_input_node)); - DCHECK_GT(input_node_map_.count(input_nodes_[i].oper->node.name()), 0); - input_node_map_[input_nodes_[i].oper->node.name()] = new_input_node; + for (int i = 0; i < input_node_map_.size(); ++i) { + VLOG(1) << "Handling input node " << input_nodes_[i].oper->node.name(); + if (use_infeed) { + DCHECK(dequeue_node); + input_node_map_[input_nodes_[i].oper->node.name()] = + NodeBuilder::NodeOut(dequeue_node, i); + } else { + Node* replicated_input_node; + { + std::string replicated_input_name("TPUReplicate/input" + + std::to_string(i)); + NodeBuilder::NodeOut input(&input_nodes_[i].oper->node, + input_nodes_[i].index); + std::vector input_list; + input_list.push_back(input); + TF_RETURN_IF_ERROR( + NodeBuilder(replicated_input_name.c_str(), "TPUReplicatedInput") + // This op requires an input list. + .Input(input_list) + .Finalize(&graph_->graph, &replicated_input_node)); + } + + { + Node* new_input_node; + const std::string new_input_name("TPUReplicate/replicated_input_" + + std::to_string(i)); + TF_RETURN_IF_ERROR(NodeBuilder(new_input_name.c_str(), "Identity") + .Input(replicated_input_node, 0) + .ControlInput(metadata_node) + .Attr("_tpu_replicate", cluster_name_.c_str()) + .Finalize(&graph_->graph, &new_input_node)); + DCHECK_GT(input_node_map_.count(input_nodes_[i].oper->node.name()), + 0); + input_node_map_[input_nodes_[i].oper->node.name()] = + NodeBuilder::NodeOut(new_input_node, 0); + } } } return Status::OK(); @@ -163,7 +237,9 @@ class GraphRewriter { } const NodeDef& old_def = n->def(); - Node* new_node; + // Let node C be the consumer of `n`'s output in the original graph. + // This new node will feed into C in the rewritten graph. + NodeBuilder::NodeOut new_node; if (input_node_map_.count(n->name())) { new_node = input_node_map_[n->name()]; } else { @@ -173,10 +249,19 @@ class GraphRewriter { new_def.set_name(new_node_name); new_def.clear_input(); for (int i = 0; i < old_def.input_size(); ++i) { - const std::string& old_input_name = old_def.input(i); - const std::string new_input_name = + const string old_input_name = old_def.input(i); + // When there are multiple input nodes that get mapped to the same + // infeed dequeue node, use different output ports of the dequeue + // node. e.g. Say in the original graph, input I1 feeds C1, and I2 + // feeds C2. After the rewrite, I1 and I2 both feed a new infeed + // enqueue node, and the corresponding dequeue node has its output + // port 0 feeding C1, and output port 1 feeding C2. Note C1 and C2 + // could be the same node (e.g. an Add that takes 2 inputs). + const string new_input_name = input_node_map_.count(old_input_name) > 0 - ? std::string(input_node_map_[old_input_name]->name()) + ? tensorflow::strings::StrCat( + input_node_map_[old_input_name].node->name(), ":", + input_node_map_[old_input_name].index) : "TPUReplicate/" + old_input_name; new_def.add_input(new_input_name); } @@ -192,11 +277,12 @@ class GraphRewriter { } tensorflow::AddNodeAttr("_tpu_replicate", cluster_name_.c_str(), &new_def); - new_node = graph_->graph.AddNode(new_def, &s); + new_node = NodeBuilder::NodeOut(graph_->graph.AddNode(new_def, &s), 0); if (!s.ok()) { return s; } - VLOG(1) << "The rewritten node node is " << new_node->DebugString(); + VLOG(1) << "The rewritten node node is " + << new_node.node->DebugString(); } if (output_node_map_.count(n->name()) > 0) { @@ -206,7 +292,17 @@ class GraphRewriter { const PortIndexPair& pair = it->second; Node* out_identity_node; { - VLOG(1) << "Handling its output port " << pair.port + // If this output node is also an input, use the input_node_map_'s + // stored port, which would also work for an infeed dequeue op. + // Otherwise use pair.port. + // An example of the former: Say the graph has input nodes I1 and + // I2, and the output nodes are also I1 and I2. In the rewritten + // graph with infeed, the 2 output nodes will both come from a + // single infeed dequeue node ID, with output ports respectively + // set to 0 and 1. + const int output_port = + input_node_map_.count(n->name()) ? new_node.index : pair.port; + VLOG(1) << "Handling its output port " << output_port << " at output index " << pair.index; std::string output_node_name = "TPUReplicate/Identity"; if (pair.index > 0) { @@ -214,7 +310,7 @@ class GraphRewriter { } TF_RETURN_IF_ERROR( NodeBuilder(output_node_name.c_str(), "Identity") - .Input(new_node, pair.port) + .Input(new_node.node, output_port) .Device(!old_def.device().empty() ? old_def.device() : tensorflow::strings::StrCat( @@ -289,16 +385,18 @@ class GraphRewriter { // Keep mappings from the current input nodes to newly created input nodes, // which we will use to rewrite existing nodes that read these // inputs. e.g. A node that reads input node PlaceHolder could be rewired to - // read the created TPUReplicate/replicated_input_0 node. - std::unordered_map input_node_map_; + // read the created TPUReplicate/replicated_input_0 node or some output port + // of the created TPUReplicate/InfeedDequeueTuple node. Because of the latter + // case, we the map entries store NodeBuilder::NodeOut, and not just Node*. + std::unordered_map input_node_map_; std::vector nodes_to_rewrite_; // Map from name to set{(output port, output tensor idx)}. - // e.g. Say ther are 3 output tensors, respectively produced by (node 0, + // e.g. Say there are 3 output tensors, respectively produced by (node 0, // port 0), (node 0, port 1), (node 1, port 0). Then the mapping entries // are: node 0 -> {(port 0, idx 0), (port 1, idx 1)} node 1 -> {(port 0, idx - // 2)} Based on these mappings, we will generated 3 new output nodes. + // 2)} Based on these mappings, we will generate 3 new output nodes. struct PortIndexPair { int port; int index; @@ -331,7 +429,9 @@ TF_Output TF_SetupTPUExecution(TF_Session* session, int num_input_nodes, const TF_Output* input_nodes, int num_output_nodes, const TF_Output* output_nodes, - TF_Output* new_output_nodes, TF_Status* status) { + TF_Output* new_output_nodes, + TF_Operation** infeed_enqueue_node, + TF_Status* status) { TF_Output config_op, shutdown_op; { auto graph = session->graph; @@ -341,8 +441,8 @@ TF_Output TF_SetupTPUExecution(TF_Session* session, int num_input_nodes, << graph->graph.ToGraphDefDebug().DebugString(); GraphRewriter rewriter(graph, num_input_nodes, input_nodes, num_output_nodes, output_nodes); - status->status = - rewriter.Rewrite(new_output_nodes, &config_op, &shutdown_op); + status->status = rewriter.Rewrite(new_output_nodes, infeed_enqueue_node, + &config_op, &shutdown_op); if (!status->status.ok()) { return shutdown_op; } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index af65123131..2bad278d63 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -63,7 +63,15 @@ TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options, // Sets up TPU execution, by rewriting the graph accordingly, and initializing // TPU system. // -// On success, returns a shutdown node to be used in a subsequent +// When `infeed_enqueue_node` is non-NULL and there are input tensors, rewrites +// the graph by adding the relevant infeed enqueue/dequeue ops, and returns the +// enqueue op in `infeed_enqueue_node` on success, so that user can run that +// node and feed input tensors. When there are no input tensors, +// `infeed_enqueue_node` is ignored, and user should not run that node later. +// TODO(hongm): In this case, we currently only support input tensors of dim 0 +// shape. Lift that constraint. +// +// On success, also returns a shutdown node to be used in a subsequent // TF_ShutdownTPUExecution(), and sets the new output nodes in // `new_output_nodes` for caller to fetch from. Must be called exactly once // before TF_SessionRun(). @@ -76,7 +84,8 @@ TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options, TF_CAPI_EXPORT extern TF_Output TF_SetupTPUExecution( TF_Session* session, int num_input_nodes, const TF_Output* input_nodes, int num_output_nodes, const TF_Output* output_nodes, - TF_Output* new_output_nodes, TF_Status* status); + TF_Output* new_output_nodes, TF_Operation** infeed_enqueue_node, + TF_Status* status); // Shuts down TPU system. For any `session` where TF_SetupTPUExecution() has // been successfully called, this call must be made exactly once before the diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc index 22f77e7b87..f3b28c1708 100644 --- a/tensorflow/c/c_test_util.cc +++ b/tensorflow/c/c_test_util.cc @@ -94,18 +94,22 @@ TF_Tensor* FloatTensor(float v) { // one cannot call ASSERT_* methods in non-void-returning functions (when // exceptions are disabled during compilation) void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, - TF_DataType dtype, TF_Operation** op) { + TF_DataType dtype, const std::vector& dims, + TF_Operation** op) { TF_OperationDescription* desc = TF_NewOperation(graph, "Placeholder", name); TF_SetAttrType(desc, "dtype", dtype); + if (!dims.empty()) { + TF_SetAttrShape(desc, "shape", dims.data(), dims.size()); + } *op = TF_FinishOperation(desc, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); ASSERT_NE(*op, nullptr); } TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name, - TF_DataType dtype) { + TF_DataType dtype, const std::vector& dims) { TF_Operation* op; - PlaceholderHelper(graph, s, name, dtype, &op); + PlaceholderHelper(graph, s, name, dtype, dims, &op); return op; } diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index d87c57fd51..cd19cf8d62 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -48,7 +48,8 @@ TF_Tensor* FloatTensor(float v); TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name = "feed", - TF_DataType dtype = TF_INT32); + TF_DataType dtype = TF_INT32, + const std::vector& dims = {}); TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name = "const"); -- GitLab From 2311e9ced599d08f705afd631ee45cf027d05618 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 19 Mar 2018 22:56:01 -0700 Subject: [PATCH 229/960] Predictions have to be updated for exported output signatures PiperOrigin-RevId: 189694707 --- tensorflow/contrib/estimator/BUILD | 1 + .../estimator/python/estimator/extenders.py | 13 +++++- .../python/estimator/extenders_test.py | 45 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 26d6bc5ae6..2f7ed7cd73 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -142,6 +142,7 @@ py_test( deps = [ ":extenders", "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/predictor", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:framework_ops", diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py index 2b6881b814..266ae93305 100644 --- a/tensorflow/contrib/estimator/python/estimator/extenders.py +++ b/tensorflow/contrib/estimator/python/estimator/extenders.py @@ -23,6 +23,7 @@ import six from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import util as estimator_util +from tensorflow.python.estimator.export.export_output import PredictOutput from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.ops import clip_ops @@ -233,7 +234,17 @@ def forward_features(estimator, keys=None): 'argument of forward_features to filter unwanted features. Type of ' 'features[{}] is {}.'.format(key, key, type(feature))) predictions[key] = feature - return spec._replace(predictions=predictions) + spec = spec._replace(predictions=predictions) + if spec.export_outputs: + for ekey in ['predict', 'serving_default']: + if (ekey in spec.export_outputs and + isinstance(spec.export_outputs[ekey], + PredictOutput)): + export_outputs = spec.export_outputs[ekey].outputs + for key in get_keys(features): + export_outputs[key] = predictions[key] + + return spec return estimator_lib.Estimator( model_fn=new_model_fn, diff --git a/tensorflow/contrib/estimator/python/estimator/extenders_test.py b/tensorflow/contrib/estimator/python/estimator/extenders_test.py index ad1a8ef152..407af2deaf 100644 --- a/tensorflow/contrib/estimator/python/estimator/extenders_test.py +++ b/tensorflow/contrib/estimator/python/estimator/extenders_test.py @@ -18,20 +18,27 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os +import tempfile import numpy as np from tensorflow.contrib.estimator.python.estimator import extenders +from tensorflow.contrib.predictor import from_saved_model from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import estimator_lib from tensorflow.python.estimator.canned import linear from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import variables +from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.training import training +from tensorflow.python.util import compat def get_input_fn(x, y): @@ -177,6 +184,44 @@ class ForwardFeaturesTest(test.TestCase): self.assertIn('id', predictions) self.assertEqual(101, predictions['id']) + def test_forward_in_exported(self): + + def serving_input_fn(): + features_ph = { + 'x': array_ops.placeholder(dtypes.float32, [None]), + 'id': array_ops.placeholder(dtypes.int32, [None]) + } + features = { + key: array_ops.expand_dims(tensor, -1) + for key, tensor in features_ph.items() + } + return estimator_lib.export.ServingInputReceiver(features, features_ph) + def input_fn(): + return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]] + # create estimator + feature_columns = [fc.numeric_column('x')] + estimator = linear.LinearRegressor(feature_columns) + estimator.train(input_fn=input_fn, steps=1) + estimator = extenders.forward_features(estimator, 'id') + + # export saved model + tmpdir = tempfile.mkdtemp() + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + export_dir = estimator.export_savedmodel(export_dir_base, serving_input_fn) + self.assertTrue(gfile.Exists(export_dir)) + + # restore model + predict_fn = from_saved_model(export_dir, signature_def_key='predict') + predictions = predict_fn({'x': [3], 'id': [101]}) + + # verify that 'id' exists in predictions + self.assertIn('id', predictions) + self.assertEqual(101, predictions['id']) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + def test_forward_list(self): def input_fn(): -- GitLab From e2e67c528316be8ea4f624af8757e80d7f00b5b6 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 19 Mar 2018 23:15:42 -0700 Subject: [PATCH 230/960] Fix some edge cases around scalar indices in the gather expander I discovered these when changing the tf2xla bridge to directly emit gather operations. - DeScalarizeGatherIndices was assuming that gather_indices must be of at least rank 1. Fix this to be more general. - We were passing in the wrong version of gather indices to ExpandFirstDimIntoNDims. We don't strictly need to pass in transposed_gather_indices (since if transposed_gather_indices is rank 1 then the transpose has to be an identity transpose), passing in descalarized_gather_indices would also have been fine, but transposed_gather_indices seems more uniform. - ExpandGatherDimsInAccumulator was assuming that gather_indices must be of at least rank 1 (by calling CollapseFirstNDims). Fix this to be more general. - We were trying to go through with emitting zero sized gather operations. I don't think it is worth dealing with all of the edge cases this would expose so now we just punt to ZeroSizedHloElimination. PiperOrigin-RevId: 189696444 --- .../compiler/xla/service/gather_expander.cc | 44 +++++++++---- .../xla/service/hlo_creation_utils.cc | 18 ------ .../compiler/xla/service/hlo_creation_utils.h | 10 --- .../xla/tests/gather_operation_test.cc | 62 +++++++++++++++++++ 4 files changed, 93 insertions(+), 41 deletions(-) diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc index 58c62d8ce9..488bed35fe 100644 --- a/tensorflow/compiler/xla/service/gather_expander.cc +++ b/tensorflow/compiler/xla/service/gather_expander.cc @@ -53,9 +53,14 @@ static StatusOr DeScalarizeGatherIndices( return gather_indices; } - int64 last_index = gather_indices_shape.dimensions( - gather_indices_shape.dimensions_size() - 1); - return ExpandLastDimIntoNDims(gather_indices, {last_index, 1}); + DCHECK_EQ(index_vector_dim, gather_indices_shape.dimensions_size()); + + std::vector result_shape_dims; + c_copy(gather_indices_shape.dimensions(), + std::back_inserter(result_shape_dims)); + result_shape_dims.push_back(1); + + return MakeReshapeHlo(result_shape_dims, gather_indices); } // Canonicalizes the gather_indices tensors so that we only have deal with some @@ -81,16 +86,17 @@ static StatusOr CanonicalizeGatherIndices( // all of the non-index-vector dimensions. const Shape& shape = transposed_gather_indices->shape(); if (shape.dimensions_size() == 1) { - return ExpandFirstDimIntoNDims(gather_indices, {1, shape.dimensions(0)}); + return ExpandFirstDimIntoNDims(transposed_gather_indices, + {1, shape.dimensions(0)}); } else { return CollapseFirstNDims(transposed_gather_indices, shape.dimensions_size() - 1); } } -// Expands out the gather dimensions in the accumulator produced by the while -// loop. -static StatusOr ExpandGatherDimsInAccumulator( +// Expands out or contracts away the gather dimensions in the accumulator +// produced by the while loop. +static StatusOr AdjustGatherDimsInAccumulator( const Shape& gather_indices_shape, HloInstruction* accumulator, int64 index_vector_dim) { std::vector output_gather_dim_bounds; @@ -103,9 +109,14 @@ static StatusOr ExpandGatherDimsInAccumulator( if (output_gather_dim_bounds.empty()) { // If output_gather_dim_bounds is empty we must be lowering a (effectively) - // dynamic-slice. + // dynamic-slice. In that case, there is a leading degenerate gather + // dimension that we added to make this special case play well with the + // general while loop which we need to remove now. CHECK_EQ(accumulator->shape().dimensions(0), 1); - return CollapseFirstNDims(accumulator, 2); + ArraySlice reshaped_dim_sizes = + AsInt64Slice(accumulator->shape().dimensions()); + reshaped_dim_sizes.remove_prefix(1); + return MakeReshapeHlo(reshaped_dim_sizes, accumulator); } return ExpandFirstDimIntoNDims(accumulator, output_gather_dim_bounds); @@ -290,6 +301,8 @@ static StatusOr PermuteGatherAndWindowDims( StatusOr GatherExpander::ExpandGather( HloInstruction* gather_instr) { + CHECK(!ShapeUtil::HasZeroElements(gather_instr->shape())); + HloComputation* computation = gather_instr->parent(); HloInstruction* operand = gather_instr->mutable_operand(0); HloInstruction* gather_indices = gather_instr->mutable_operand(1); @@ -331,7 +344,7 @@ StatusOr GatherExpander::ExpandGather( TF_ASSIGN_OR_RETURN( HloInstruction * accumulator_with_output_gather_dims_decanonicalized, - ExpandGatherDimsInAccumulator(gather_indices->shape(), + AdjustGatherDimsInAccumulator(gather_indices->shape(), accumulator_with_window_dims_elided, dim_numbers.index_vector_dim())); @@ -341,12 +354,17 @@ StatusOr GatherExpander::ExpandGather( } StatusOr GatherExpander::Run(HloModule* module) { + auto is_nontrivial_gather = [](HloInstruction* inst) { + return inst->opcode() == HloOpcode::kGather && + // Avoid expanding gather ops that produce zero sized tensors, + // instead punt these to ZeroSizedHloElimination. + !ShapeUtil::HasZeroElements(inst->shape()); + }; + std::vector gather_instrs; for (HloComputation* computation : module->MakeNonfusionComputations()) { c_copy_if(computation->instructions(), std::back_inserter(gather_instrs), - [](HloInstruction* inst) { - return inst->opcode() == HloOpcode::kGather; - }); + is_nontrivial_gather); } for (HloInstruction* inst : gather_instrs) { diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index fbe71f8d5b..b186767ce7 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -201,24 +201,6 @@ StatusOr ExpandFirstDimIntoNDims( return MakeReshapeHlo(new_shape, operand); } -StatusOr ExpandLastDimIntoNDims( - HloInstruction* operand, ArraySlice expanded_dims) { - CHECK_GT(operand->shape().dimensions_size(), 0); - CHECK_EQ(operand->shape().dimensions(operand->shape().dimensions_size() - 1), - Product(expanded_dims)); - - std::vector expanded_shape_dim_bounds; - expanded_shape_dim_bounds.reserve(expanded_dims.size() + - operand->shape().dimensions_size() - 1); - std::copy(operand->shape().dimensions().begin(), - operand->shape().dimensions().end() - 1, - std::back_inserter(expanded_shape_dim_bounds)); - c_copy(expanded_dims, std::back_inserter(expanded_shape_dim_bounds)); - Shape new_shape = ShapeUtil::MakeShape(operand->shape().element_type(), - expanded_shape_dim_bounds); - return MakeReshapeHlo(new_shape, operand); -} - StatusOr ElideDegenerateDims(HloInstruction* operand, ArraySlice dims_to_elide) { CHECK(c_is_sorted(dims_to_elide)); diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h index 6032ebab74..d99e32a737 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.h +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h @@ -119,16 +119,6 @@ StatusOr CollapseFirstNDims(HloInstruction* operand, int64 n); StatusOr ExpandFirstDimIntoNDims( HloInstruction* operand, tensorflow::gtl::ArraySlice expanded_dims); -// Expands (via reshape) the last (logical) dimension of `operand` into a -// sequence of `expanded_dims` dimensions. `operand` must at least be of rank 1 -// and the number of elements in its last dimension must be equal to the -// product of `expanded_dims`. -// -// For instance if `operand` has shape f32[9,7,200] and expanded_dims is -// {2,5,20} the result is `operand` reshaped to [9,7,2,5,20]. -StatusOr ExpandLastDimIntoNDims( - HloInstruction* operand, tensorflow::gtl::ArraySlice expanded_dims); - // Elides (via reshape) a set of degenerate dimensions (dimensions containing // exactly one element), `dims_to_elide` from `operand`. Every dimension in // `dims_to_elide` must be a degenerate dimension. `dims_to_elide` must be diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc index 0830e9c8f0..8ba91946c0 100644 --- a/tensorflow/compiler/xla/tests/gather_operation_test.cc +++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc @@ -335,5 +335,67 @@ ENTRY main { {operand.get(), gather_indices.get(), in_bounds_mask.get()}); } +XLA_TEST_F(GatherOperationTest, OneScalarIndex) { + const char* hlo_text = R"( +HloModule OneScalarIndex + +ENTRY main { + operand = s32[2,3,2]{2,1,0} parameter(0) + index = s32[] parameter(1) + ROOT gather = s32[1,3,2]{2,1,0} gather(operand, index), + output_window_dims={0,1,2}, + elided_window_dims={}, + gather_dims_to_operand_dims={0}, + index_vector_dim=0, + window_bounds={1,3,2} +} +)"; + std::unique_ptr operand = Literal::CreateR3( + {{{1, 2}, {3, 4}, {5, 6}}, {{7, 8}, {9, 10}, {11, 12}}}); + std::unique_ptr gather_indices = Literal::CreateR0(1); + RunTest(hlo_text, operand.get(), gather_indices.get()); +} + +XLA_TEST_F(GatherOperationTest, ScalarResult) { + const char* hlo_text = R"( +HloModule ScalarResult + +ENTRY main { + operand = s32[4]{0} parameter(0) + index = s32[] parameter(1) + ROOT gather = s32[] gather(operand, index), + output_window_dims={}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=0, + window_bounds={1} +} +)"; + std::unique_ptr operand = Literal::CreateR1({1, 2, 3, 4}); + std::unique_ptr gather_indices = Literal::CreateR0(1); + RunTest(hlo_text, operand.get(), gather_indices.get()); +} + +XLA_TEST_F(GatherOperationTest, ZeroSizedResult) { + const string hlo_text = R"( +HloModule ZeroSizedResult + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[0] parameter(1) + ROOT gather = s32[0,3] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1, 3} +} +)"; + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = Literal::CreateR1({}); + RunTest(hlo_text, operand.get(), gather_indices.get()); +} + } // namespace } // namespace xla -- GitLab From 28db3a7eae4986e3e662de16188cf7a03be33768 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 02:26:31 -0700 Subject: [PATCH 231/960] Fix bug PiperOrigin-RevId: 189712233 --- tensorflow/compiler/jit/xla_launch_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 076cbd2084..bb7316c60c 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -169,7 +169,7 @@ void XlaComputationLaunchContext::PopulateOutputs( int output_num = 0; for (int i = 0; i < ctx->num_outputs(); ++i) { AllocatorAttributes alloc_attrs = ctx->output_alloc_attr(i); - Allocator* allocator = ctx->device()->GetAllocator(alloc_attrs); + Allocator* allocator = ctx->device()->GetAllocator({}); if (tensor_info_manager_ && !alloc_attrs.on_host()) { allocator = tensor_info_manager_; } -- GitLab From 163bb675579bbc3a115c0caac9b42891f629bfd4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 03:11:32 -0700 Subject: [PATCH 232/960] - Added support for data to be specified in RNN classes as large tensors with time folded into the batch dimension instead of lists of tensors - Significant refactoring of RNN classes - Fixed a bunch of issues in the LayerCollection docstrings, especially around the 'reuse' argument. PiperOrigin-RevId: 189716331 --- .../kernel_tests/fisher_factors_test.py | 12 +- .../contrib/kfac/python/ops/fisher_blocks.py | 344 +++++++++--------- .../contrib/kfac/python/ops/fisher_factors.py | 65 ++-- .../kfac/python/ops/layer_collection.py | 163 ++++++--- tensorflow/contrib/kfac/python/ops/utils.py | 3 + 5 files changed, 314 insertions(+), 273 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index 16f02f1199..e007f70939 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -862,8 +862,7 @@ class FullyConnectedMultiKFTest(test.TestCase): with tf_ops.Graph().as_default(): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') - tensor_list = [tensor] - factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False) + factor = ff.FullyConnectedMultiKF((tensor,), has_bias=False) factor.instantiate_cov_variables() self.assertEqual([3, 3], factor.get_cov().get_shape().as_list()) @@ -872,8 +871,7 @@ class FullyConnectedMultiKFTest(test.TestCase): dtype = dtypes.float64_ref random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - tensor_list = [tensor] - factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False) + factor = ff.FullyConnectedMultiKF((tensor,), has_bias=False) factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) @@ -883,8 +881,7 @@ class FullyConnectedMultiKFTest(test.TestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - tensor_list = [tensor] - factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=True) + factor = ff.FullyConnectedMultiKF((tensor,), has_bias=True) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -895,8 +892,7 @@ class FullyConnectedMultiKFTest(test.TestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - tensor_list = [tensor] - factor = ff.FullyConnectedMultiKF((tensor_list,)) + factor = ff.FullyConnectedMultiKF((tensor,)) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index 79d0424dca..f517e3148f 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -106,55 +106,6 @@ def _make_partitionedtensors_grads(grads_list): return tuple(utils.PartitionedTensor(grads) for grads in grads_list) -def _make_partitionedtensors_multi_inputs(inputs): - """Constructs PartitionedTensors for inputs. - - The purpose of this method is to package up the towers/minibatch dimension - of these arrays into PartitionedTensor objects. - - This version of this function is for use with FisherBlocks that deal with - multiple uses or time-steps. One PartitionedTensor is created for each - use/time-step. The FisherBlock will be responsible for concatenating - (or doing whatever else it wants) with the resulting lists. - - Args: - inputs: a 2-D list of Tensors. First index is tower/mini-batch, second is - use/time-step. - - Returns: - A tuple of PartitionedTensor's, one per use/time-step. - """ - num_uses = len(inputs[0]) - assert all(len(input_) == num_uses for input_ in inputs) - - return tuple(utils.PartitionedTensor(input_) for input_ in zip(*inputs)) - - -def _make_partitionedtensors_multi_grads(grads_list): - """Constructs PartitionedTensors for grads_list. - - The purpose of this method is to package up the towers/minibatch dimension - of these arrays into PartitionedTensor objects. - - This version of this function is for use with FisherBlocks that deal with - multiple uses or time-steps. One PartitionedTensor is created for each - use/time-step. The FisherBlock will be responsible for concatenating - (or doing whatever else it wants) with the resulting lists. - - Args: - grads_list: 3-D list of Tensors. First index is for source, second is for - tower, third is for use/time-step. - - Returns: - 2-D tuple of PartitionedTensors. First index is for source, second is for - use/time-step. - """ - num_uses = len(grads_list[0][0]) - assert all(len(grad) == num_uses for grads in grads_list for grad in grads) - return tuple(tuple(utils.PartitionedTensor(grad) - for grad in zip(*grads)) for grads in grads_list) - - def normalize_damping(damping, num_replications): """Normalize damping after adjusting scale by NORMALIZE_DAMPING_POWER.""" if NORMALIZE_DAMPING_POWER: @@ -662,7 +613,7 @@ class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): class KroneckerProductFB(FisherBlock): - """A base class for FisherBlocks with separate input and output factors. + """A base class for blocks with separate input and output Kronecker factors. The Fisher block is approximated as a Kronecker product of the input and output factors. @@ -783,67 +734,6 @@ class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): self._setup_damping(damping) -class EmbeddingKFACMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): - """K-FAC FisherBlock for embedding layers used multiple times in the graph. - - Similar to EmbeddingKFACFB except that this version supports multiple uses - of the parameter within a single model. These uses could correspond to - "time-steps", but they don't have to. - - Does not support bias parameters. - """ - - def __init__(self, layer_collection, vocab_size): - """Creates a EmbeddingKFACMultiIndepFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - vocab_size: int. Size of vocabulary for this embedding layer. - """ - self._vocab_size = vocab_size - - super(EmbeddingKFACMultiIndepFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - """Instantiate Kronecker Factors for this FisherBlock. - - Args: - grads_list: List of list of list of Tensors. grads_list[i][j][k] is the - gradient of the loss with respect to 'outputs' from source 'i', - tower/mini-batch 'j', and use/time-step 'k'. Each Tensor has shape - [tower_minibatch_size, output_size]. - damping: 0-D Tensor or float. 'damping' * identity is approximately added - to this FisherBlock's Fisher approximation. - """ - inputs = self._inputs - self._num_uses = num_uses = len(inputs[0]) - - # Check that all mini-batches/towers have the same number of uses - assert all(len(input_) == num_uses for input_ in inputs) - # Do the same for grads_list - assert all(len(grad) == num_uses for grad in grads for grads in grads_list) - # Merge uses and towers/minibatches dimensions together so we can handle - # it using a non-multi factor. - inputs = nest.flatten(inputs) - - # Note that we call the multi version of make_partitionedtensors only for - # grads_list here. - inputs = _make_partitionedtensors_inputs(inputs) - grads_list = _make_partitionedtensors_multi_grads(grads_list) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.EmbeddingInputKroneckerFactor, - (inputs, self._vocab_size)) - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, (grads_list,)) - self._setup_damping(damping, normalization=num_uses) - - @property - def _renorm_coeff(self): - return self._num_uses - - class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): """K-FAC FisherBlock for fully-connected (dense) layers. @@ -1232,7 +1122,70 @@ def num_conv_locations(input_shape, strides): return spatial_input_locations // spatial_strides_divisor -class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): +class InputOutputMultiMinibatchMultiUse(InputOutputMultiMinibatch): + """Adds methods for multi-use/time-step case to InputOutputMultiMinibatch.""" + + def __init__(self, num_uses=None, *args, **kwargs): + self._num_uses = num_uses + super(InputOutputMultiMinibatchMultiUse, self).__init__(*args, **kwargs) + + def _process_data(self, grads_list): + """Process temporal/multi-use data into a standard format.""" + + inputs = self._inputs + + # The first possible data format is where inputs is a list of tensors, + # one for each use/time-step. + if isinstance(inputs[0], (list, tuple)): + # The first index is tower/minibatch, the second is use/time-step + num_uses = len(inputs[0]) + if self._num_uses is not None and self._num_uses != num_uses: + raise ValueError("num_uses argument doesn't match length of inputs.") + else: + self._num_uses = num_uses + + # Check that all mini-batches/towers have the same number of uses + if not all(len(input_) == num_uses for input_ in inputs): + raise ValueError("Length of inputs argument is inconsistent across " + "mini-batches/towers.") + # Fold uses/time-step and towers/minibatches dimensions together + inputs = nest.flatten(inputs) + + inputs = _make_partitionedtensors_inputs(inputs) + # If inputs is not a tuple then we assume that inputs is a tensor + # with 'uses' folded into the batch dimension. (And grads_list is a list + # across sources of such Tensors.) This is the native format that the + # factor will take as arguments. + + # Now we perform the analogous processing for grads_list + if isinstance(grads_list[0][0], (list, tuple)): + num_uses = len(grads_list[0][0]) + if self._num_uses is not None and self._num_uses != num_uses: + raise ValueError("num_uses argument doesn't match length of outputs, " + "or length of outputs is inconsistent with length of " + "inputs.") + else: + self._num_uses = num_uses + + if not all(len(grad) == num_uses for grads in grads_list + for grad in grads): + raise ValueError("Length of outputs argument is inconsistent across " + "mini-batches/towers.") + + grads_list = tuple(nest.flatten(grads) for grads in grads_list) + grads_list = _make_partitionedtensors_grads(grads_list) + + if self._num_uses is None: + raise ValueError("You must supply a value for the num_uses argument if " + "the number of uses cannot be inferred from inputs or " + "outputs arguments (e.g. if they are both given in the " + "single Tensor format, instead of as lists of Tensors.") + + return inputs, grads_list + + +class FullyConnectedMultiIndepFB(InputOutputMultiMinibatchMultiUse, + KroneckerProductFB): """FisherBlock for fully-connected layers that share parameters. This class implements the "independence across time" approximation from the @@ -1240,42 +1193,43 @@ class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): https://openreview.net/pdf?id=HyMTkQZAb """ - def __init__(self, layer_collection, has_bias=False): + def __init__(self, layer_collection, has_bias=False, num_uses=None): """Creates a FullyConnectedMultiIndepFB block. Args: layer_collection: LayerCollection instance. has_bias: bool. If True, estimates Fisher with respect to a bias parameter as well as the layer's parameters. + num_uses: int or None. Number of uses of the layer in the model's graph. + Only required if the data is formatted with uses/time folded into the + batch dimension (instead of uses/time being a list dimension). + (Default: None) """ self._has_bias = has_bias - super(FullyConnectedMultiIndepFB, self).__init__(layer_collection) + super(FullyConnectedMultiIndepFB, self).__init__( + layer_collection=layer_collection, + num_uses=num_uses) def instantiate_factors(self, grads_list, damping): - - self._num_uses = float(len(self._inputs[0])) - inputs = _make_partitionedtensors_multi_inputs(self._inputs) - grads_list = _make_partitionedtensors_multi_grads(grads_list) + inputs, grads_list = self._process_data(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedMultiKF, - ((inputs,), self._has_bias)) + ((inputs,), self._num_uses, self._has_bias)) self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, (grads_list,)) + fisher_factors.FullyConnectedMultiKF, (grads_list, self._num_uses)) self._setup_damping(damping, normalization=self._num_uses) @property def _renorm_coeff(self): - return self._num_uses - - def tensors_to_compute_grads(self): - return self._outputs + return float(self._num_uses) -class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): +class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatchMultiUse, + KroneckerProductFB): """FisherBlock for 2D convolutional layers using the basic KFC approx. Similar to ConvKFCBasicFB except that this version supports multiple @@ -1291,7 +1245,8 @@ class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): strides=None, dilation_rate=None, data_format=None, - extract_patches_fn=None): + extract_patches_fn=None, + num_uses=None): """Creates a ConvKFCBasicMultiIndepFB block. Args: @@ -1312,6 +1267,10 @@ class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): extract_patches_fn: str or None. Name of function that extracts image patches. One of "extract_convolution_patches", "extract_image_patches", "extract_pointwise_conv2d_patches". + num_uses: int or None. Number of uses of the layer in the model's graph. + Only required if the data is formatted with uses/time folded into the + batch dimension (instead of uses/time being a list dimension). + (Default: None) """ self._padding = padding self._strides = maybe_tuple(strides) @@ -1323,28 +1282,16 @@ class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): fltr = params[0] if self._has_bias else params self._filter_shape = tuple(fltr.shape.as_list()) - super(ConvKFCBasicMultiIndepFB, self).__init__(layer_collection) + super(ConvKFCBasicMultiIndepFB, self).__init__( + layer_collection=layer_collection, + num_uses=num_uses) def instantiate_factors(self, grads_list, damping): - # Infer number of locations upon which convolution is applied. - self._num_locations = num_locations = num_conv_locations( - self._inputs[0][0].shape.as_list(), self._strides) - - # The first index is tower/minibatch, the second is use/time-step - inputs = self._inputs - self._num_uses = num_uses = len(inputs[0]) - - # Check that all mini-batches/towers have the same number of uses - assert all(len(input_) == num_uses for input_ in inputs) - assert all(len(grad) == num_uses for grads in grads_list for grad in grads) - - # Fold uses/time-step and towers/minibatches dimensions together - inputs = nest.flatten(inputs) - # And do the same for grads_list - grads_list = tuple(nest.flatten(grads) for grads in grads_list) + inputs, grads_list = self._process_data(grads_list) - inputs = _make_partitionedtensors_inputs(inputs) - grads_list = _make_partitionedtensors_grads(grads_list) + # Infer number of locations upon which convolution is applied. + self._num_locations = num_conv_locations(inputs.shape.as_list(), + self._strides) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvInputKroneckerFactor, @@ -1354,20 +1301,75 @@ class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): self._output_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvOutputKroneckerFactor, (grads_list,)) - self._setup_damping(damping, normalization=(num_locations * num_uses)) + self._setup_damping(damping, normalization= + (self._num_locations * self._num_uses)) @property def _renorm_coeff(self): return self._num_locations * self._num_uses +class EmbeddingKFACMultiIndepFB(InputOutputMultiMinibatchMultiUse, + KroneckerProductFB): + """K-FAC FisherBlock for embedding layers used multiple times in the graph. + + Similar to EmbeddingKFACFB except that this version supports multiple uses + of the parameter within a single model. These uses could correspond to time + steps in an RNN architecture, but they don't have to. + + Does not support bias parameters. + """ + + def __init__(self, layer_collection, vocab_size, num_uses): + """Creates a EmbeddingKFACMultiIndepFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + vocab_size: int. Size of vocabulary for this embedding layer. + num_uses: int or None. Number of uses of the layer in the model's graph. + Only required if the data is formatted with time folded into the batch + dimension (instead of time being a list dimension). (Default: None) + """ + self._vocab_size = vocab_size + + super(EmbeddingKFACMultiIndepFB, self).__init__( + layer_collection=layer_collection, + num_uses=num_uses) + + def instantiate_factors(self, grads_list, damping): + """Instantiate Kronecker Factors for this FisherBlock. + + Args: + grads_list: List of list of list of Tensors. grads_list[i][j][k] is the + gradient of the loss with respect to 'outputs' from source 'i', + tower/mini-batch 'j', and use/time-step 'k'. Each Tensor has shape + [tower_minibatch_size, output_size]. + damping: 0-D Tensor or float. 'damping' * identity is approximately added + to this FisherBlock's Fisher approximation. + """ + inputs, grads_list = self._process_data(grads_list) + + self._input_factor = self._layer_collection.make_or_get_factor( + fisher_factors.EmbeddingInputKroneckerFactor, + (inputs, self._vocab_size)) + self._output_factor = self._layer_collection.make_or_get_factor( + fisher_factors.FullyConnectedMultiKF, (grads_list, self._num_uses)) + self._setup_damping(damping, normalization=self._num_uses) + + @property + def _renorm_coeff(self): + return float(self._num_uses) + + class SeriesFBApproximation(enum.IntEnum): """See FullyConnectedSeriesFB.__init__ for description and usage.""" option1 = 1 option2 = 2 -class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): +class FullyConnectedSeriesFB(InputOutputMultiMinibatchMultiUse, + KroneckerProductFB): """FisherBlock for fully-connected layers that share parameters across time. This class implements the "Option 1" and "Option 2" approximation from the @@ -1383,6 +1385,7 @@ class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): def __init__(self, layer_collection, has_bias=False, + num_uses=None, option=SeriesFBApproximation.option2): """Constructs a new `FullyConnectedSeriesFB`. @@ -1390,6 +1393,10 @@ class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): layer_collection: The collection of all layers in the K-FAC approximate Fisher information matrix to which this FisherBlock belongs. has_bias: Whether the layer includes a bias parameter. + num_uses: int or None. Number of time-steps over which the layer + is used. Only required if the data is formatted with time folded into + the batch dimension (instead of time being a list dimension). + (Default: None) option: A `SeriesFBApproximation` specifying the simplifying assumption to be used in this block. `option1` approximates the cross-covariance over time as a symmetric matrix, while `option2` makes @@ -1400,39 +1407,33 @@ class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): self._has_bias = has_bias self._option = option - super(FullyConnectedSeriesFB, self).__init__(layer_collection) + super(FullyConnectedSeriesFB, self).__init__( + layer_collection=layer_collection, + num_uses=num_uses) - def instantiate_factors(self, grads_list, damping): + @property + def _num_timesteps(self): + return self._num_uses - self._num_timesteps = len(self._inputs[0]) - assert len(grads_list[0][0]) == self._num_timesteps + @property + def _renorm_coeff(self): + # This should no longer be used since the multiply_X functions from the base + # class have been overridden + assert False - inputs = _make_partitionedtensors_multi_inputs(self._inputs) - grads_list = _make_partitionedtensors_multi_grads(grads_list) + def instantiate_factors(self, grads_list, damping): + inputs, grads_list = self._process_data(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, ((inputs,), self._has_bias)) + fisher_factors.FullyConnectedMultiKF, + ((inputs,), self._num_uses, self._has_bias)) self._input_factor.register_cov_dt1() self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, (grads_list,)) + fisher_factors.FullyConnectedMultiKF, (grads_list, self._num_uses)) self._output_factor.register_cov_dt1() - def compute_damping(): - normalized_damping = normalize_damping(damping, self._num_timesteps) - return compute_pi_adjusted_damping(self._input_factor.get_cov(), - self._output_factor.get_cov(), - normalized_damping**0.5) - - damping_id = ("compute_pi_adjusted_damping", - "cov", self._input_factor.name, - "cov", self._output_factor.name, - "normalize_damping", - damping, self._num_timesteps, "power", 0.5) - self._input_damping_func = _package_func(lambda: compute_damping()[0], - damping_id + ("ref", 0)) - self._output_damping_func = _package_func(lambda: compute_damping()[1], - damping_id + ("ref", 1)) + self._setup_damping(damping, normalization=self._num_uses) def register_matpower(self, exp): if exp != -1: @@ -1562,6 +1563,3 @@ class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): return utils.mat2d_to_layer_params(vector, Z) # pylint: enable=invalid-name - - def tensors_to_compute_grads(self): - return self._outputs diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index 6fc163e232..f521363536 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -35,7 +35,6 @@ from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import moving_averages -from tensorflow.python.util import nest # Whether to initialize covariance estimators at a zero matrix (or the identity # matrix). @@ -1227,27 +1226,24 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): return compute_cov(reshaped_tensor) -class FullyConnectedMultiKF(InverseProvidingFactor): +class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): """Kronecker factor for a fully connected layer used multiple times.""" def __init__(self, - tensor_lists, + tensors, + num_uses=None, has_bias=False): """Constructs a new `FullyConnectedMultiKF`. Args: - tensor_lists: 2D array (list of lists) of Tensors of shape - [batch_size, n]. Each of these tensors is usually a layer's inputs or - its output's gradients. The first dimension of the array is the source, - and the second is the use in the graph (which is sometimes a - "time-step"). + tensors: List of Tensors of shape, each of shape [batch_size, n]. Each of + these tensors is usually a layer's inputs or its output's gradients. + The list is over sources. + num_uses: int. The number of time-steps / uses. has_bias: bool. If True, '1' is appended to each row. """ - self._tensor_lists = tensor_lists - self._has_bias = has_bias - self._num_timesteps = len(tensor_lists[0]) - self._tensors = [None] * len(tensor_lists) + self._num_uses = num_uses self._cov_dt1 = None self._make_cov_dt1 = False @@ -1256,20 +1252,17 @@ class FullyConnectedMultiKF(InverseProvidingFactor): self._option1quants_registrations = set() self._option2quants_registrations = set() - super(FullyConnectedMultiKF, self).__init__() - - @property - def _var_scope(self): - return "ff_fc_multi_" + scope_string_from_params( - tuple(nest.flatten(self._tensor_lists)) + (self._has_bias,)) + super(FullyConnectedMultiKF, self).__init__(tensors=tensors, + has_bias=has_bias) @property - def _num_sources(self): - return len(self._tensor_lists) + def _num_timesteps(self): + return self._num_uses @property - def _dtype(self): - return self._tensor_lists[0][0].dtype + def _var_scope(self): + return "ff_fc_multi_" + scope_string_from_params( + tuple(self._tensors) + (self._num_timesteps, self._has_bias,)) def make_covariance_update_op(self, ema_decay): @@ -1291,36 +1284,28 @@ class FullyConnectedMultiKF(InverseProvidingFactor): return op - def _compute_new_cov(self, idx=0): - # Concatenate across time/replications - tensor = array_ops.concat(self._tensor_lists[idx], 0) + def _compute_new_cov_dt1(self, idx=0): # pylint: disable=missing-docstring + tensor = self._tensors[idx] if self._has_bias: + # This appending is technically done twice (the other time is for + # _compute_new_cov()) tensor = append_homog(tensor) - # We save these so they can be used by _compute_new_cov_dt1 - self._tensors[idx] = tensor - return compute_cov(tensor) - def _compute_new_cov_dt1(self, idx=0): # pylint: disable=missing-docstring - tensor = self._tensors[idx] - batch_size = array_ops.shape(self._tensor_lists[idx][0])[0] - # Is there a more elegant way to do this computation? + total_len = array_ops.shape(tensor)[0] + batch_size = total_len // self._num_timesteps + tensor_present = tensor[:-batch_size, :] tensor_future = tensor[batch_size:, :] + # We specify a normalizer for this computation to ensure a PSD Fisher # block estimate. This is equivalent to padding with zeros, as was done # in Section B.2 of the appendix. - normalizer = self._num_timesteps * batch_size return compute_cov( - tensor_future, tensor_right=tensor_present, normalizer=normalizer) - - @property - def _cov_shape(self): - size = self._tensor_lists[0][0].shape[1] + self._has_bias - return [size, size] + tensor_future, tensor_right=tensor_present, normalizer=total_len) @property def _vec_shape(self): - size = self._tensor_lists[0][0].shape[1] + self._has_bias + size = self._tensors[0].shape[1] + self._has_bias return [size] def get_option1quants(self, damping_func): diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 00eae8b399..7727c607db 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -572,13 +572,15 @@ class LayerCollection(object): params: Embedding matrix of shape [vocab_size, embedding_size]. inputs: Tensor of shape [batch_size, input_size] and dtype int32. Indices into embedding matrix. - outputs: Tensor of shape [batch_size, output_size]. Outputs + outputs: Tensor of shape [batch_size, embedding_size]. Outputs produced by layer. approx: str or None. If not None must be "kron". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -616,9 +618,11 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -665,9 +669,11 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -743,9 +749,11 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -796,9 +804,11 @@ class LayerCollection(object): data_format: str or None. Format of data. approx: str or None. If not None must "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -862,9 +872,11 @@ class LayerCollection(object): approx: str or None. If not None must be one of "kron" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'inputs' and 'outputs' as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -905,9 +917,10 @@ class LayerCollection(object): approx: str or None. It not None, must be one of "full" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'batch_size' to the total + mini-batch size use when estimating the Fisher block for this layer + (which must have already been registered). If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -924,7 +937,8 @@ class LayerCollection(object): self._add_uses(params, float("inf")) def register_fully_connected_multi(self, params, inputs, outputs, - approx=None, reuse=VARIABLE_SCOPE): + num_uses=None, approx=None, + reuse=VARIABLE_SCOPE): """Register fully connected layers with shared parameters. This can handle general fully-connected layers with shared parameters, but @@ -935,19 +949,31 @@ class LayerCollection(object): params: Tensor or 2-tuple of Tensors corresponding to weight and bias of this layer. Weight matrix should have shape [input_size, output_size]. Bias should have shape [output_size]. - inputs: A list of tensors, each of shape [batch_size, input_size]. Inputs + inputs: A list of Tensors, each of shape [batch_size, input_size]. Inputs to layer. The list indexes each use in the graph (which might - correspond to a "time-step" in an RNN). - outputs: A list of tensors, the same length as 'inputs', each of shape + correspond to a "time-step" in an RNN). OR, can be single Tensor, of + shape [batch_size * num_uses, input_size], which is a reshaped version + of a Tensor of shape [batch_size, num_uses, input_size]. + outputs: A list of Tensors, the same length as 'inputs', each of shape [batch_size, output_size]. Outputs produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an - RNN). Needs to correspond with the order used in 'inputs'. + RNN). Needs to correspond with the order used in 'inputs'. OR, can be + a single Tensor of shape [batch_size * num_uses, output_size], which is + a reshaped version of a Tensor of shape [batch_size, num_uses, + output_size]. + num_uses: int or None. The number uses/time-steps in the graph where the + layer appears. Only needed if both inputs and outputs are given in the + single Tensor format. (Default: None) approx: str or None. If not None, must be of "kron_indep", "kron_series_1" or "kron_series_2". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds inputs and outputs as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the + word 'use' here has a completely different meaning to "use in the graph" + as it perturns to the 'inputs', 'outputs', and 'num_uses' arguments.) + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -960,7 +986,8 @@ class LayerCollection(object): # should be added back in here (and for the other block types, arguably). has_bias = isinstance(params, (tuple, list)) - block = self.register_block(params, block_type(self, has_bias=has_bias), + block = self.register_block(params, block_type(self, has_bias=has_bias, + num_uses=num_uses), reuse=reuse) block.register_additional_minibatch(inputs, outputs) @@ -973,6 +1000,7 @@ class LayerCollection(object): padding, inputs, outputs, + num_uses=None, data_format=None, dilations=None, approx=None, @@ -988,19 +1016,32 @@ class LayerCollection(object): padding: string. see tf.nn.conv2d for valid values. inputs: A list of Tensors, each of shape [batch_size, height, width, in_channels]. Inputs to layer. The list indexes each use in the graph - (which might correspond to a "time-step" in an RNN). + (which might correspond to a "time-step" in an RNN). OR, can be single + Tensor, of shape [batch_size * num_uses, height, width, in_channels], + which is a reshaped version of a Tensor of shape [batch_size, num_uses, + height, width, in_channels]. outputs: A list of Tensors, each of shape [batch_size, height, width, out_channels]. Output produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). - Needs to correspond with the order used in 'inputs'. + Needs to correspond with the order used in 'inputs'. OR, can be a + single Tensor, of shape [batch_size*num_uses, height, width, + out_channels], which is a reshaped version of a Tensor of shape + [batch_size, num_uses, height, width, out_channels]. + num_uses: int or None. The number uses/time-steps in the graph where the + layer appears. Only needed if both inputs and outputs are given in the + single Tensor format. (Default: None) data_format: str or None. Format of data. dilations: List of 4 ints. Dilations along each dimension. approx: str or None. If not None must by "kron_indep". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds inputs and outputs as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the + word 'use' here has a completely different meaning to "use in the graph" + as it perturns to the 'inputs', 'outputs', and 'num_uses' arguments.) + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -1020,7 +1061,8 @@ class LayerCollection(object): strides=strides, data_format=data_format, dilation_rate=dilations, - extract_patches_fn="extract_image_patches"), + extract_patches_fn="extract_image_patches", + num_uses=num_uses), reuse=reuse) block.register_additional_minibatch(inputs, outputs) @@ -1036,6 +1078,7 @@ class LayerCollection(object): params, inputs, outputs, + num_uses=None, approx=None, reuse=VARIABLE_SCOPE): """Registers embedding layers with shared parameters. @@ -1045,16 +1088,29 @@ class LayerCollection(object): inputs: A list of Tensors, each of shape [batch_size, input_size] and dtype int32. Indices into embedding matrix. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). - outputs: A list of Tensors, each of shape [batch_size, output_size]. + OR, can be single Tensor, of shape [batch_size * num_uses, input_size], + which is a reshaped version of a Tensor of shape [batch_size, num_uses, + input_size]. + outputs: A list of Tensors, each of shape [batch_size, embedding_size]. Outputs produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). Needs to - correspond with the order used in 'inputs'. + correspond with the order used in 'inputs'. OR, can be a + single Tensor, of shape [batch_size*num_uses, embedding_size], which + is a reshaped version of a Tensor of shape [batch_size, num_uses, + embedding_size]. + num_uses: int or None. The number uses/time-steps in the graph where the + layer appears. Only needed if both inputs and outputs are given in the + single Tensor format. (Default: None) approx: str or None. If not None must by "kron_indep". The Fisher approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, reuse an existing FisherBlock. If False, - create a new FisherBlock. If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds inputs and outputs as an + additional mini-batch/tower of data to use when estimating the Fisher + block for this layer (which must have already been registered). If + "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the + word 'use' here has a completely different meaning to "use in the graph" + as it perturns to the 'inputs', 'outputs', and 'num_uses' arguments.) + (Default: "VARIABLE_SCOPE") Raises: ValueError: For improper value to 'approx'. @@ -1070,7 +1126,7 @@ class LayerCollection(object): vocab_size = int(params.shape[0]) block = self.register_block( - params, block_type(self, vocab_size), reuse=reuse) + params, block_type(self, vocab_size, num_uses=num_uses), reuse=reuse) block.register_additional_minibatch(inputs, outputs) self._add_uses(params, len(inputs)) @@ -1093,9 +1149,10 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. - If False, create a new FisherBlock. If VARIABLE_SCOPE, use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'logits' as an additional + mini-batch/tower of inputs to the loss-function/predictive distribution + (which must have already been registered). If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") """ loss = lf.CategoricalLogitsNegativeLogProbLoss(logits, targets=targets, seed=seed) @@ -1126,9 +1183,10 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. - If False, create a new FisherBlock. If VARIABLE_SCOPE, use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'mean' and 'var' as an additional + mini-batch/tower of inputs to the loss-function/predictive distribution + (which must have already been registered). If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") """ loss = lf.NormalMeanNegativeLogProbLoss(mean, var, targets=targets, seed=seed) @@ -1154,9 +1212,10 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) - reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. - If False, create a new FisherBlock. If VARIABLE_SCOPE, use - tf.get_variable_scope().reuse. + reuse: bool or str. If True, this adds 'logits' as an additional + mini-batch/tower of inputs to the loss-function/predictive distribution + (which must have already been registered). If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") """ loss = lf.MultiBernoulliNegativeLogProbLoss(logits, targets=targets, seed=seed) diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index c589b18193..c9de0c7270 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -667,6 +667,9 @@ class PartitionedTensor(object): def __ne__(self, other): return not self == other # pylint: disable=g-comparison-negation + def __getitem__(self, key): + return self.as_tensor()[key] + def as_tensor(self, dtype=None, name=None, as_ref=False): with ops.name_scope(name, "PartitionedTensor.as_tensor", self.tensors): assert not as_ref -- GitLab From 9183c33884c4492589f2d8648178d00645c30691 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 20 Mar 2018 03:48:38 -0700 Subject: [PATCH 233/960] Don't spin in a loop when we're not waiting on any GPU events. PiperOrigin-RevId: 189719711 --- .../core/common_runtime/gpu/gpu_event_mgr.cc | 53 ++++++++++--------- .../core/common_runtime/gpu/gpu_event_mgr.h | 3 +- tensorflow/core/protobuf/config.proto | 4 +- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc index 2452efc779..af6a59a85d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc @@ -30,10 +30,6 @@ EventMgr::EventMgr(gpu::StreamExecutor* se, const GPUOptions& gpu_options) polling_active_delay_usecs_(gpu_options.polling_active_delay_usecs() ? gpu_options.polling_active_delay_usecs() : 10), - polling_inactive_delay_msecs_( - gpu_options.polling_inactive_delay_msecs() - ? gpu_options.polling_inactive_delay_msecs() - : 1), accumulated_stream_(nullptr), accumulated_tensors_(new TensorReferenceVector), accumulated_tensor_bytes_(0), @@ -78,16 +74,22 @@ EventMgr::~EventMgr() { void EventMgr::StartPollingLoop() { CHECK(polling_stopped_ == nullptr); - stop_polling_.reset(new Notification); + { + mutex_lock l(mu_); + stop_polling_ = false; + } polling_stopped_.reset(new Notification); threadpool_.Schedule([this]() { PollLoop(); }); } void EventMgr::StopPollingLoop() { - if (stop_polling_) { - stop_polling_->Notify(); + if (polling_stopped_) { + { + mutex_lock l(mu_); + stop_polling_ = true; + events_pending_.notify_all(); + } polling_stopped_->WaitForNotification(); - stop_polling_.reset(nullptr); polling_stopped_.reset(nullptr); } } @@ -121,28 +123,31 @@ void EventMgr::FlushAccumulatedTensors() { accumulated_stream_ = nullptr; } -// A polling loop to detect completion of GPU events. There's a -// tradeoff between achieving low latency detection, which argues for -// little delay between calls, and minimizing CPU use and lock -// contention, which argue for longer delay. The current strategy is -// to poll frequently when the queue is non-empty, and infrequently -// otherwise. +// A polling loop to detect completion of GPU events. +// +// While one or more events is outstanding, poll for completed events. When no +// events are outstanding, we sleep until one is enqueued. void EventMgr::PollLoop() { - bool queue_empty = false; - while (!stop_polling_->HasBeenNotified()) { - if (queue_empty) { - mutex_lock l(mu_); - WaitForMilliseconds(&l, &events_pending_, polling_inactive_delay_msecs_); - } else { - Env::Default()->SleepForMicroseconds(polling_active_delay_usecs_); - } - ToFreeVector to_free; + ToFreeVector to_free; + while (true) { + bool events_still_pending; { mutex_lock l(mu_); + if (stop_polling_) { + break; + } + if (used_events_.empty()) { + events_pending_.wait(l); + } PollEvents(true, &to_free); - queue_empty = used_events_.empty(); + events_still_pending = !used_events_.empty(); } FreeMemory(to_free); + to_free.clear(); + + if (events_still_pending) { + Env::Default()->SleepForMicroseconds(polling_active_delay_usecs_); + } } polling_stopped_->Notify(); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h index 9692b24084..d23898e1f2 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h +++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h @@ -94,7 +94,6 @@ class EventMgr { perftools::gputools::StreamExecutor* const exec_; const int64 deferred_bytes_threshold_; const int32 polling_active_delay_usecs_; - const int32 polling_inactive_delay_msecs_; mutex mu_; condition_variable events_pending_ GUARDED_BY(mu_); @@ -180,7 +179,7 @@ class EventMgr { // A FIFO queue of InUse events and associated tensors. std::deque used_events_ GUARDED_BY(mu_); - std::unique_ptr stop_polling_; + bool stop_polling_ GUARDED_BY(mu_); std::unique_ptr polling_stopped_; // The main PollLoop for the event manager runs in this threadpool. diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index abbbe392aa..a3557e4721 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -67,9 +67,7 @@ message GPUOptions { // set or set to 0, gets set to a non-zero default. int32 polling_active_delay_usecs = 6; - // In the event polling loop sleep this many millisconds between - // PollEvents calls, when the queue is empty. If value is not - // set or set to 0, gets set to a non-zero default. + // This field is deprecated and ignored. int32 polling_inactive_delay_msecs = 7; // Force all tensors to be gpu_compatible. On a GPU-enabled TensorFlow, -- GitLab From 198bf16225b3224e9af6bafd9f1b4c1433557281 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Tue, 20 Mar 2018 13:18:13 +0200 Subject: [PATCH 234/960] Fix dataset resampling bug introduced by a bug in datasets itself. Fixes github issue #16606. The core issue is that in the case of certain random Tensors, the following two lines aren't the same: ``` rand_0s_and_1s_ds = ... gather_ds = rand_0s_and_1s_ds.map(lambda i: tf.gather([0, 1], i)) tup_ds = tf.data.Dataset.zip(gather_ds, rand_0s_and_1s_ds) ``` ``` rand_0s_and_1s_ds = ... tup_ds = rand_0s_and_1s_ds.map(lambda i: (tf.gather([0, 1], i), i)) ``` Note that this does NOT fix the underlying issue of drawing multiple sampes from the underlying distribution. Tested: With the new test, `bazel test :resample_test` fails before and succeeds after. --- .../data/python/kernel_tests/resample_test.py | 36 +++++++++++++++++++ .../contrib/data/python/ops/resampling.py | 10 +++--- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index c16207fa48..a76c6b1e39 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -21,8 +21,11 @@ import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import resampling +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.ops import random_ops from tensorflow.python.ops import string_ops +from tensorflow.python.ops import math_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -68,6 +71,39 @@ class ResampleTest(test.TestCase): returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) + def testRandomClasses(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + num_samples = 100 # We don't need many samples to test a dirac-delta target distribution + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + dataset = dataset_ops.Dataset.from_tensor_slices(data_np) + + # Apply a random mapping that preserves the data distribution. + def _remap_fn(_): + return math_ops.cast(random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0] + dataset = dataset.map(_remap_fn) + + # Reshape distribution. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) + + get_next = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + returned = [] + with self.assertRaises(errors.OutOfRangeError): + while True: + returned.append(sess.run(get_next)) + + classes, _ = zip(*returned) + bincount = np.bincount(np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) + + self.assertAllClose(target_dist, bincount, atol=1e-2) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 56f526a330..c99cdd3ff2 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -101,11 +101,11 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): initial_dist_ds)) .map(maybe_warn_on_large_rejection)) - current_probabilities_ds = dataset_ops.Dataset.zip( - (acceptance_dist_ds, class_values_ds)).map(array_ops.gather) - filtered_ds = ( - dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds, - dataset)) + def _gather_and_copy(class_val, acceptance_prob, data): + return (class_val, array_ops.gather(acceptance_prob, class_val), data) + current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + filtered_ds = (current_probabilities_and_class_and_data_ds\ .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) -- GitLab From 87b715325a74d34f1331d14d8df640308ec10d12 Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Tue, 20 Mar 2018 06:25:19 -0700 Subject: [PATCH 235/960] Adds float64 support for avg pool and its gradient. Eigen NumTraits is modified to directly use std::numeric_limits, which resolves a broken test caused by inconsistency between the host and devices values of Eigen::NumTraits::highest(). This returns +inf on device, due to third_party/eigen3/Eigen/src/Core/util/Meta.h, and __DBL_MAX__ (1.7976931348623157e+308) on host, making the behavior for doubles (on device) inconsistent with both the behavior of floats Eigen::NumTraits::highest() and the behavior of std::numeric_limits::max() PiperOrigin-RevId: 189731521 --- tensorflow/core/kernels/avgpooling_op.cc | 20 +++++++++++++++++++ .../core/kernels/avgpooling_op_gpu.cu.cc | 7 +++++++ tensorflow/core/kernels/eigen_pooling.h | 11 +++++----- .../python/kernel_tests/pooling_ops_test.py | 7 +++++-- 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index ec9cbc2a9b..a763f1321f 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -102,6 +102,9 @@ class AvgPoolingOp : public UnaryOp { TensorFormat data_format_; }; +REGISTER_KERNEL_BUILDER( + Name("AvgPool").Device(DEVICE_CPU).TypeConstraint("T"), + AvgPoolingOp); REGISTER_KERNEL_BUILDER( Name("AvgPool").Device(DEVICE_CPU).TypeConstraint("T"), AvgPoolingOp); @@ -153,11 +156,13 @@ class AvgPoolingOp : public UnaryOp { TensorShape output_shape = params.forward_output_shape(); if (data_format_ == FORMAT_NCHW) { + LOG(INFO) << "DnnPoolingOp"; DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, tensor_in, output_shape, /*propagate_nans=*/false); } else { + LOG(INFO) << "SpatialAvgPooling"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); @@ -189,6 +194,7 @@ namespace functor { DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // namespace functor @@ -198,6 +204,9 @@ REGISTER_KERNEL_BUILDER( REGISTER_KERNEL_BUILDER( Name("AvgPool").Device(DEVICE_GPU).TypeConstraint("T"), AvgPoolingOp); +REGISTER_KERNEL_BUILDER( + Name("AvgPool").Device(DEVICE_GPU).TypeConstraint("T"), + AvgPoolingOp); #endif // GOOGLE_CUDA // The operation to compute AvgPool gradients. @@ -423,6 +432,12 @@ class AvgPoolingGradOp : public OpKernel { TensorFormat data_format_; }; +REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("orig_input_shape") + .Label("cudnn"), + AvgPoolingGradOp); REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") .Device(DEVICE_GPU) .TypeConstraint("T") @@ -553,6 +568,11 @@ REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") .TypeConstraint("T") .HostMemory("orig_input_shape"), AvgPoolingGradOpCustomGPUKernel); +REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("orig_input_shape"), + AvgPoolingGradOpCustomGPUKernel); REGISTER_KERNEL_BUILDER(Name("AvgPoolGrad") .Device(DEVICE_GPU) .TypeConstraint("T") diff --git a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc index 6537b42f1e..35511d5c31 100644 --- a/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/avgpooling_op_gpu.cu.cc @@ -35,6 +35,7 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_KERNELS(Eigen::half) DEFINE_GPU_KERNELS(float) +DEFINE_GPU_KERNELS(double) #undef DEFINE_GPU_KERNELS @@ -99,6 +100,12 @@ bool RunAvePoolBackwardNHWC(const T* const top_diff, const int num, return d.ok(); } +template bool RunAvePoolBackwardNHWC( + const double* const top_diff, const int num, const int height, + const int width, const int channels, const int pooled_height, + const int pooled_width, const int kernel_h, const int kernel_w, + const int stride_h, const int stride_w, const int pad_t, const int pad_l, + double* const bottom_diff, const GPUDevice& d); template bool RunAvePoolBackwardNHWC( const float* const top_diff, const int num, const int height, const int width, const int channels, const int pooled_height, diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h index 896c995761..2f83780525 100644 --- a/tensorflow/core/kernels/eigen_pooling.h +++ b/tensorflow/core/kernels/eigen_pooling.h @@ -334,7 +334,8 @@ struct AvgPoolMeanReducer { } template - void reducePacketWithType(T, const Packet& p, Packet* accum) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType( + T, const Packet& p, Packet* accum) { Packet skip_mask = pequal(p, pset1(-Eigen::NumTraits::highest())); (*accum) = padd(*accum, psel(p, pset1(0), skip_mask)); @@ -480,11 +481,9 @@ SpatialAvgPooling(const Input& input, DenseIndex patchRows, Eigen::type2index<3> > >::type reduction_dims; #endif return input - .extract_image_patches( - patchRows, patchCols, strideRows, strideCols, in_strideRows, - in_strideCols, padding_type, - -Eigen::NumTraits::Scalar>::type>::highest()) + .extract_image_patches(patchRows, patchCols, strideRows, strideCols, + in_strideRows, in_strideCols, padding_type, + -Eigen::NumTraits::highest()) .reduce(reduction_dims, mean_with_nan) .reshape(post_reduce_dims); } diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 2f3bea5825..ed44a1a4d1 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -123,8 +123,9 @@ class PoolingTest(test.TestCase): if input_sizes[-1] % 4 != 0: tf_logging.info("Skipping test for depth %d", input_sizes[-1]) return - tf_logging.info("Running %s test. %r %r %d %r %r %r", data_format, v2, - input_sizes, total_size, pool_func, ksize, strides) + tf_logging.info("Running %s test. %r %r %d %r %r %r %s", data_format, v2, + input_sizes, total_size, pool_func, ksize, strides, + data_type) # Initializes the input tensor with array containing incrementing # numbers from 1, wrapping round to -127 after 127 to support int8. x = [((f + 128) % 255) - 127 for f in range(total_size)] @@ -193,6 +194,8 @@ class PoolingTest(test.TestCase): self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding, data_format, dtypes.float32, expected, use_gpu, v2) + self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding, + data_format, dtypes.float64, expected, use_gpu, v2) if not use_gpu or test_util.CudaSupportsHalfMatMulAndConv(): self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding, -- GitLab From e28a79eae228be8e65b5dff8bb8aa5ee2f41f70a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 06:28:09 -0700 Subject: [PATCH 236/960] Make TensorSpec and BoundedTensorSpec serializable. PiperOrigin-RevId: 189731737 --- tensorflow/python/framework/tensor_spec.py | 6 ++++++ tensorflow/python/framework/tensor_spec_test.py | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/tensorflow/python/framework/tensor_spec.py b/tensorflow/python/framework/tensor_spec.py index 546c48adba..6676cfcaa3 100644 --- a/tensorflow/python/framework/tensor_spec.py +++ b/tensorflow/python/framework/tensor_spec.py @@ -110,6 +110,9 @@ class TensorSpec(object): def __ne__(self, other): return not self == other + def __reduce__(self): + return TensorSpec, (self._shape, self._dtype, self._name) + class BoundedTensorSpec(TensorSpec): """A `TensorSpec` that specifies minimum and maximum values. @@ -210,4 +213,7 @@ class BoundedTensorSpec(TensorSpec): return (tensor_spec_eq and np.allclose(self.minimum, other.minimum) and np.allclose(self.maximum, other.maximum)) + def __reduce__(self): + return BoundedTensorSpec, (self._shape, self._dtype, self._minimum, + self._maximum, self._name) diff --git a/tensorflow/python/framework/tensor_spec_test.py b/tensorflow/python/framework/tensor_spec_test.py index b33d769d86..2e9e43e122 100644 --- a/tensorflow/python/framework/tensor_spec_test.py +++ b/tensorflow/python/framework/tensor_spec_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import pickle + import numpy as np from tensorflow.python.framework import constant_op @@ -143,6 +145,10 @@ class TensorSpecTest(test_util.TensorFlowTestCase): unbounded_spec = tensor_spec.TensorSpec((1, 2), dtypes.int32) self.assertFalse(unbounded_spec.is_bounded()) + def testSerialization(self): + desc = tensor_spec.TensorSpec([1, 5], dtypes.float32, "test") + self.assertEqual(pickle.loads(pickle.dumps(desc)), desc) + class BoundedTensorSpecTest(test_util.TensorFlowTestCase): @@ -243,6 +249,10 @@ class BoundedTensorSpecTest(test_util.TensorFlowTestCase): self.assertEqual(spec.dtype.max, bounded_spec.maximum) self.assertEqual(spec.name, bounded_spec.name) + def testSerialization(self): + desc = tensor_spec.BoundedTensorSpec([1, 5], dtypes.float32, -1, 1, "test") + self.assertEqual(pickle.loads(pickle.dumps(desc)), desc) + if __name__ == "__main__": googletest.main() -- GitLab From a40c8024f9beec346c2c1d98e9238c5d48ea0dca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 07:27:16 -0700 Subject: [PATCH 237/960] Drop name_scope from operation names during quantization to avoid doubling it up. PiperOrigin-RevId: 189737746 --- tensorflow/contrib/quantize/python/common.py | 8 +++++++ .../contrib/quantize/python/quantize.py | 6 +++++ .../contrib/quantize/python/quantize_test.py | 24 +++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/tensorflow/contrib/quantize/python/common.py b/tensorflow/contrib/quantize/python/common.py index 3138149468..bf648e158e 100644 --- a/tensorflow/contrib/quantize/python/common.py +++ b/tensorflow/contrib/quantize/python/common.py @@ -123,3 +123,11 @@ def CreateOrGetQuantizationStep(): # normal variables to return a tensor of the same name. return array_ops.identity( state_ops.assign_add(quantization_step_tensor, 1)) + + +def DropStringPrefix(s, prefix): + """If the string starts with this prefix, drops it.""" + if s.startswith(prefix): + return s[len(prefix):] + else: + return s diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 9780e6dbcc..2b5b877e8e 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -367,6 +367,12 @@ def _InsertQuantOp(context, consumer operation. """ name_prefix = _AddContextToName(context, name) + # This is needed on TPU where name_scope == 'TPUReplicate/loop', and + # name_prefix starts with 'TPUReplicate/loop/'; without dropping it + # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which + # breaks things later. + name_prefix = common.DropStringPrefix(name_prefix, ops.get_name_scope() + '/') + inputs = producer.outputs[0] if moving_avg: quant = ( diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index 8e60f4b661..216310abe4 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -164,6 +164,30 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertTrue('FakeQuantWithMinMaxVars' in [i.op.type for i in bypass_tensor.op.inputs]) + def testWithNameScope(self): + self._RunTestOverParameters(self._TestWithNameScope) + + def _TestWithNameScope(self, is_training): + graph = ops.Graph() + with graph.as_default(): + with graph.name_scope('name_scope'): + batch_size, height, width, depth = 5, 128, 128, 3 + input1 = array_ops.zeros((batch_size, height, width, depth)) + _ = conv2d( + input1, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + scope='test') + + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) + + for op in graph.get_operations(): + self.assertTrue(not op.name.startswith('name_scope/name_scope/'), + 'Broken op: %s' % op.name) + def _WeightInit(self, stddev): """Returns truncated normal variable initializer. -- GitLab From 222c2b858fb1722f9aaf45e780b08e505e845665 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 20 Mar 2018 08:08:48 -0700 Subject: [PATCH 238/960] tfdbg: Curses CLI: Fill line-end whitespace with default color pair to prevent spurious color pairs from appearing in certain text terminal environments. RELNOTES: Bug fix: tfdbg curses CLI: fix spurious background colors in some text terminals. PiperOrigin-RevId: 189742433 --- tensorflow/python/debug/cli/curses_ui.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow/python/debug/cli/curses_ui.py b/tensorflow/python/debug/cli/curses_ui.py index bb52f90512..f66cefb427 100644 --- a/tensorflow/python/debug/cli/curses_ui.py +++ b/tensorflow/python/debug/cli/curses_ui.py @@ -1185,6 +1185,22 @@ class CursesUI(base_ui.BaseUI): self._main_menu = None self._main_menu_pad = None + def _pad_line_end_with_whitespace(self, pad, row, line_end_x): + """Pad the whitespace at the end of a line with the default color pair. + + Prevents spurious color pairs from appearing at the end of the lines in + certain text terimnals. + + Args: + pad: The curses pad object to operate on. + row: (`int`) row index. + line_end_x: (`int`) column index of the end of the line (beginning of + the whitespace). + """ + if line_end_x < self._max_x - 2: + pad.addstr(row, line_end_x, " " * (self._max_x - 3 - line_end_x), + self._default_color_pair) + def _screen_add_line_to_output_pad(self, pad, row, txt, color_segments=None): """Render a line in a text pad. @@ -1208,6 +1224,7 @@ class CursesUI(base_ui.BaseUI): if not color_segments: pad.addstr(row, 0, txt, self._default_color_pair) + self._pad_line_end_with_whitespace(pad, row, len(txt)) return if not isinstance(color_segments, list): @@ -1248,6 +1265,8 @@ class CursesUI(base_ui.BaseUI): for segment, color_pair in zip(all_segments, all_color_pairs): if segment[1] < self._max_x: pad.addstr(row, segment[0], txt[segment[0]:segment[1]], color_pair) + if all_segments: + self._pad_line_end_with_whitespace(pad, row, all_segments[-1][1]) def _screen_scroll_output_pad(self, pad, viewport_top, viewport_left, screen_location_top, screen_location_left, -- GitLab From 4a4c13788634e73f3c1bd01abd142a607c2fd253 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 20 Mar 2018 08:32:30 -0700 Subject: [PATCH 239/960] Fixed the bug that the export code triggers the TPU validation. PiperOrigin-RevId: 189745966 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 5a8fa04e7c..f61f6bb52e 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1044,8 +1044,8 @@ class _ModelFnWrapper(object): self._params = params self._ctx = ctx - def call_without_tpu(self, features, labels): - return self._call_model_fn(features, labels) + def call_without_tpu(self, features, labels, is_export_mode): + return self._call_model_fn(features, labels, is_export_mode=is_export_mode) def convert_to_single_tpu_train_step(self, dequeue_fn): """Converts user provided model_fn` as a single train step on TPU. @@ -1204,7 +1204,7 @@ class _ModelFnWrapper(object): return predict_step, host_calls, captured_scaffold_fn - def _call_model_fn(self, features, labels, is_export_mode=True): + def _call_model_fn(self, features, labels, is_export_mode=False): """Calls the model_fn with required parameters.""" model_fn_args = util.fn_args(self._model_fn) kwargs = {} @@ -1230,7 +1230,11 @@ class _ModelFnWrapper(object): 'required by TPUEstimator to pass batch size as ' 'params[\'batch_size\']'.format(self._model_fn)) - batch_size_for_model_fn = self._ctx.batch_size_for_model_fn + if is_export_mode: + batch_size_for_model_fn = None + else: + batch_size_for_model_fn = self._ctx.batch_size_for_model_fn + if batch_size_for_model_fn is not None: params[_BATCH_SIZE_KEY] = batch_size_for_model_fn @@ -1778,6 +1782,8 @@ class TPUEstimator(estimator_lib.Estimator): eval_batch_size, predict_batch_size, use_tpu) + self._is_input_fn_invoked = None + def _create_global_step(self, graph): """Creates a global step suitable for TPUs. @@ -1860,6 +1866,9 @@ class TPUEstimator(estimator_lib.Estimator): if 'mode' in input_fn_args: kwargs['mode'] = mode + # Records the fact input_fn has been invoked. + self._is_input_fn_invoked = True + with self._ctx.with_mode(mode) as ctx: # Setting the batch size in params first. This helps user to have same # input_fn for use_tpu=True/False. @@ -1907,15 +1916,24 @@ class TPUEstimator(estimator_lib.Estimator): with self._ctx.with_mode(mode) as ctx: model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, ctx) - # For export_savedmodel, input_fn is never passed to Estimator. So, - # if features is callable, it means it is the input_fn passed by - # TPUEstimator._call_input_fn. Then we can know if the mode == PREDICT, - # it implies, it is the .predict API, not export_savedmodel API. - is_export_mode = not callable(features) + if mode != model_fn_lib.ModeKeys.PREDICT: + is_export_mode = False + else: + # For export_savedmodel, input_fn is never passed to Estimator. So, by + # checking the self._is_input_fn_invoked bit, we can know, given the + # mode == PREDICT, it is the .predict API, not export_savedmodel API. + if self._is_input_fn_invoked: + is_export_mode = False + else: + is_export_mode = True + + # Clear the bit. + self._is_input_fn_invoked = None if ctx.is_running_on_cpu(is_export_mode=is_export_mode): logging.info('Running %s on CPU', mode) - return model_fn_wrapper.call_without_tpu(features, labels) + return model_fn_wrapper.call_without_tpu( + features, labels, is_export_mode=is_export_mode) assert labels is None, '`labels` passed to `model_fn` must be `None`.' # TPUEstimator._call_input_fn passes `input_fn` as features to here. -- GitLab From 6a16e22421626ceffecb025b1cd80722c36aea0d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 09:27:24 -0700 Subject: [PATCH 240/960] Revise the main API for more consistent notation and add a supplemental shortcut to mark functions as "run in py_func". This is an intermediate step to simplifying the execution of plotting code. PiperOrigin-RevId: 189753509 --- tensorflow/contrib/py2tf/__init__.py | 7 +- tensorflow/contrib/py2tf/impl/BUILD | 1 + tensorflow/contrib/py2tf/impl/api.py | 107 +++++++++++----------- tensorflow/contrib/py2tf/impl/api_test.py | 38 +++++--- tensorflow/contrib/py2tf/utils/py_func.py | 40 +++++++- 5 files changed, 122 insertions(+), 71 deletions(-) diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py index 6531183cb5..a4b62a0976 100644 --- a/tensorflow/contrib/py2tf/__init__.py +++ b/tensorflow/contrib/py2tf/__init__.py @@ -24,15 +24,16 @@ from __future__ import print_function from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl.api import convert from tensorflow.contrib.py2tf.impl.api import converted_call -from tensorflow.contrib.py2tf.impl.api import graph_ready +from tensorflow.contrib.py2tf.impl.api import do_not_convert +from tensorflow.contrib.py2tf.impl.api import RunMode from tensorflow.contrib.py2tf.impl.api import to_code from tensorflow.contrib.py2tf.impl.api import to_graph from tensorflow.contrib.py2tf.pyct.transformer import PyFlowParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'to_graph', 'to_code', 'convert', 'graph_ready', 'converted_call', 'utils', - 'PyFlowParseError' + 'utils', 'convert', 'converted_call', 'do_not_convert', 'RunMode', + 'to_code', 'to_graph', 'PyFlowParseError' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/py2tf/impl/BUILD b/tensorflow/contrib/py2tf/impl/BUILD index 90ffabbc9b..cc49d71b78 100644 --- a/tensorflow/contrib/py2tf/impl/BUILD +++ b/tensorflow/contrib/py2tf/impl/BUILD @@ -42,6 +42,7 @@ py_test( ":impl", "//tensorflow/contrib/py2tf/utils", "//tensorflow/python:client_testlib", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 883b304089..3a40729e5a 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -20,8 +20,12 @@ from __future__ import print_function from functools import wraps +from enum import Enum + +# pylint:disable=g-bad-import-order import gast import six +# pylint:enable=g-bad-import-order from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import conversion @@ -29,6 +33,7 @@ from tensorflow.contrib.py2tf.pyct import compiler from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.utils import builtins +from tensorflow.contrib.py2tf.utils import py_func from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect @@ -37,55 +42,6 @@ from tensorflow.python.util import tf_inspect # (currently we require (module + class name, type)) -def graph_ready(f): - """No-op decorator that explicitly marks a function as graph-ready. - - Graph-ready functions are assumed to not need any conversion. - - Args: - f: Any callable. - Returns: - f itself. - """ - setattr(f, '__pyct_is_compile_decorator', True) - return f - - -def convert_inline(f, *args, **kwargs): - """Shorthand to convert and call a function. - - For example, the following two statements are equivalent: - - @convert() - def foo(): - ... - foo(bar) - - def foo(): - ... - convert_inline(foo, bar) - - Args: - f: Function to convert. Only this call will be converted. - *args: Passed through to f. - **kwargs: Passed through to f, with the following exceptions: - * arg_value_hints: A dict mapping parameter names to objects that can - hint at the type of those parameters. - - Returns: - The result of the converted f applied to args and kwargs. - """ - if 'arg_value_hints' in kwargs: - arg_value_hints = kwargs['arg_value_hints'] - del kwargs['arg_value_hints'] - else: - arg_value_hints = None - if tf_inspect.ismethod(f): - # When converting methods, the result is still an unbound function. - args = (f.__self__,) + args - return convert(arg_value_hints)(f)(*args, **kwargs) - - def convert(recursive=False, verbose=False, arg_types=None): """Decorator that compiles a function to graph mode. @@ -122,6 +78,55 @@ def convert(recursive=False, verbose=False, arg_types=None): return decorator +class RunMode(Enum): + GRAPH = 1 + PY_FUNC = 2 + + +def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None): + """Decorator that suppresses compilation of a function. + + Args: + run_as: RunMode value. Whether to run the function as-is, or wrap it into + a py_func. + return_dtypes: See py2tf.utils.py_func.wrap_py_func. Setting to None or + empty list or tuple will create a dummy return value that can be used + to set control dependencies. + + Returns: + A decorator that wraps the original function. + """ + def decorator(f): + """Decorator implementation.""" + + @wraps(f) + def graph_wrapper(*args, **kwargs): + return f(*args, **kwargs) + + @wraps(f) + def py_func_wrapper(*args, **kwargs): + if kwargs: + raise NotImplementedError( + 'RunMode.PY_FUNC does not yet support kwargs') + # TODO(mdan): Add support for kwargs. + return py_func.wrap_py_func( + f, return_dtypes, args, use_dummy_return=not return_dtypes) + + if run_as == RunMode.GRAPH: + wrapper = graph_wrapper + elif run_as == RunMode.PY_FUNC: + wrapper = py_func_wrapper + else: + raise ValueError('unknown value for run_as: %s' % run_as) + + # Sometimes the decorator is just desugared, making it impossible to detect. + # This attribute makes detection easier. + setattr(wrapper, '__pyct_is_compile_decorator', True) + return wrapper + + return decorator + + def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): """Compiles a function call inline.""" # TODO(mdan): This needs cleanup. @@ -227,7 +232,7 @@ def to_graph(e, """ conversion_map = conversion.ConversionMap( recursive=recursive, - nocompile_decorators=(convert, graph_ready, convert_inline), + nocompile_decorators=(convert, do_not_convert, converted_call), partial_types=partial_types, api_module=tf_inspect.getmodule(to_graph)) _, name = conversion.entity_to_graph(e, conversion_map, arg_values, arg_types) @@ -274,7 +279,7 @@ def to_code(e, """ conversion_map = conversion.ConversionMap( recursive=recursive, - nocompile_decorators=(convert, graph_ready, convert_inline), + nocompile_decorators=(convert, do_not_convert, converted_call), partial_types=partial_types, api_module=tf_inspect.getmodule(to_graph)) conversion.entity_to_graph(e, conversion_map, arg_values, arg_types) diff --git a/tensorflow/contrib/py2tf/impl/api_test.py b/tensorflow/contrib/py2tf/impl/api_test.py index 13f8e66018..a7b1aba852 100644 --- a/tensorflow/contrib/py2tf/impl/api_test.py +++ b/tensorflow/contrib/py2tf/impl/api_test.py @@ -18,10 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import api from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.utils import py_func from tensorflow.python.framework import constant_op from tensorflow.python.platform import test @@ -81,11 +84,11 @@ class ApiTest(test.TestCase): constant_op.constant(-2)) self.assertListEqual([0, 1], sess.run(x).tolist()) - def test_decorator_calls_converted(self): + def test_decorator_calls_unconverted_graph(self): class TestClass(object): - @api.graph_ready + @api.do_not_convert(api.RunMode.GRAPH) def called_member(self, a): return tf.negative(a) @@ -102,20 +105,23 @@ class ApiTest(test.TestCase): constant_op.constant(-2)) self.assertListEqual([0, 1], sess.run(x).tolist()) - def test_decorator_calls_decorated(self): + def test_decorator_calls_unconverted_py_func(self): class TestClass(object): - @api.convert() + @api.do_not_convert( + api.RunMode.PY_FUNC, return_dtypes=py_func.MatchDType(1)) def called_member(self, a): - if a < 0: - a = -a - return a + return np.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): while tf.reduce_sum(x) > s: - x //= self.called_member(a) + y = self.called_member(a) + # set_shape works around while_loop's limitations. + # TODO(mdan): Allow specifying shapes (or ShapeLike) instead. + y.set_shape(a.shape) + x //= y return x tc = TestClass() @@ -125,10 +131,11 @@ class ApiTest(test.TestCase): constant_op.constant(-2)) self.assertListEqual([0, 1], sess.run(x).tolist()) - def test_convert_call_site_decorator(self): + def test_decorator_calls_decorated(self): class TestClass(object): + @api.convert() def called_member(self, a): if a < 0: a = -a @@ -137,7 +144,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): while tf.reduce_sum(x) > s: - x //= api.convert_inline(self.called_member, a) + x //= self.called_member(a) return x tc = TestClass() @@ -147,17 +154,20 @@ class ApiTest(test.TestCase): constant_op.constant(-2)) self.assertListEqual([0, 1], sess.run(x).tolist()) - def test_graph_ready_call_site_decorator(self): + def test_convert_call_site_decorator(self): class TestClass(object): def called_member(self, a): - return tf.negative(a) + if a < 0: + a = -a + return a @api.convert(recursive=True) def test_method(self, x, s, a): while tf.reduce_sum(x) > s: - x //= api.graph_ready(self.called_member(a)) + x //= api.converted_call(self.called_member, False, False, {}, self, + a) return x tc = TestClass() @@ -168,6 +178,7 @@ class ApiTest(test.TestCase): self.assertListEqual([0, 1], sess.run(x).tolist()) def test_to_graph_basic(self): + def test_fn(x, s): while tf.reduce_sum(x) > s: x //= 2 @@ -180,6 +191,7 @@ class ApiTest(test.TestCase): self.assertListEqual([1, 2], sess.run(x).tolist()) def test_to_code_basic(self): + def test_fn(x, s): while tf.reduce_sum(x) > s: x /= 2 diff --git a/tensorflow/contrib/py2tf/utils/py_func.py b/tensorflow/contrib/py2tf/utils/py_func.py index 838872d092..79920cd841 100644 --- a/tensorflow/contrib/py2tf/utils/py_func.py +++ b/tensorflow/contrib/py2tf/utils/py_func.py @@ -18,11 +18,23 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import namedtuple + from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import script_ops +class MatchDType(namedtuple('MatchDType', ('arg_number',))): + """Allows matching the dtype of an argument. + + Used in conjunction with function calls. For example, MatchDType(0) will + match the DType of the first argument. + """ + + pass + + def wrap_py_func(f, return_dtypes, arguments, use_dummy_return=False): """Helper that wraps a callable to py_func. @@ -34,10 +46,12 @@ def wrap_py_func(f, return_dtypes, arguments, use_dummy_return=False): Args: f: Callable - return_dtypes: DType, tuple, list or None, the data type for each of f's - return value. None if f has no return values or use_dummy_return is - True. - arguments: Arguments for f + return_dtypes: None, individual of tuple/list of DType or MatchDType, the + data type for each of f's return value(s). Set to None if f has no + return values or use_dummy_return is True. Use MatchDType to define a + dtype identical to that of `i`th argument (argument 0 is the first); + an argument must of Tensor type if it is to be used with MatchDType. + arguments: Arguments for f, as list or tuple. use_dummy_return: If True, the function will return a dummy value of 1 and discard its actual return value. Returns: @@ -58,6 +72,24 @@ def wrap_py_func(f, return_dtypes, arguments, use_dummy_return=False): if arg_is_tensor[j]: i += 1 + def match_argument(arg_number): + arg = arguments[arg_number] + if not arg_is_tensor[arg_number]: + raise ValueError( + 'argument %d was used with MatchDType and must be a tf.Tensor, but ' + 'was %s instead' % (arg_number, type(arg))) + return arg.dtype + + if return_dtypes: + if isinstance(return_dtypes, MatchDType): + return_dtypes = match_argument(return_dtypes.arg_number) + elif isinstance(return_dtypes, (list, tuple)): + return_dtypes = tuple( + match_argument(a.arg_number) if isinstance(a, MatchDType) else a + for a in return_dtypes) + else: + assert isinstance(return_dtypes, dtypes.DType) + def f_wrapper(*tensor_args): f_args = tuple(tensor_args[index_in_tensor_list[i]] if arg_is_tensor[i] else arguments[i] for i in range(n)) -- GitLab From 1a28fe156080d2c4a1986026c5bbb776a99b4ec9 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Tue, 20 Mar 2018 10:39:51 -0700 Subject: [PATCH 241/960] Mark sparse wrap test as no-pip --- tensorflow/contrib/image/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 2924aef815..8ba51a1d83 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -243,6 +243,7 @@ cuda_py_test( name = "sparse_image_warp_test", size = "medium", srcs = ["python/kernel_tests/sparse_image_warp_test.py"], + tags = ["no_pip"], additional_deps = [ ":sparse_image_warp_py", "//third_party/py/numpy", -- GitLab From e543ae60a73bb8137227b417ed4f9a80f10f63a1 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Tue, 20 Mar 2018 11:03:51 -0700 Subject: [PATCH 242/960] Fix BUILD file formatting --- tensorflow/contrib/image/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 8ba51a1d83..79eb3762ed 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -243,7 +243,6 @@ cuda_py_test( name = "sparse_image_warp_test", size = "medium", srcs = ["python/kernel_tests/sparse_image_warp_test.py"], - tags = ["no_pip"], additional_deps = [ ":sparse_image_warp_py", "//third_party/py/numpy", @@ -260,6 +259,7 @@ cuda_py_test( "//tensorflow/core:protos_all_py", ], data = [":sparse_image_warp_test_data"], + tags = ["no_pip"], ) filegroup( -- GitLab From 27a4b79704f03569fc6edd5b3a30ff6cf599310d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 11:04:44 -0700 Subject: [PATCH 243/960] Add support for automatically wrapping NumPy functions based on a whitelist. PiperOrigin-RevId: 189771575 --- .../contrib/py2tf/converters/call_trees.py | 31 ++++++++++++++++++- .../py2tf/converters/call_trees_test.py | 18 +++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index ca8726f916..74fbf80677 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -22,6 +22,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import namedtuple import types import gast @@ -34,6 +35,16 @@ from tensorflow.contrib.py2tf.pyct import transformer from tensorflow.python.util import tf_inspect +class FunctionInfo(namedtuple('FunctionInfo', ('dtype',))): + pass + + +# TODO(mdan): Move this to config.py. +KNOWN_NUMPY_FUNCTIONS = { + ('numpy', 'random', 'binomial'): FunctionInfo(dtype='tf.int64'), +} + + class FunctionNamer(object): """Describes the interface for CallTreeTransformer's namer.""" @@ -185,6 +196,18 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) + def _wrap_to_py_func_single_return(self, node, fqn): + # TODO(mdan): Properly handle varargs, kwargs, etc. + template = """ + py2tf_utils.wrap_py_func(func, dtype, (original_args,), False) + """ + dtype = KNOWN_NUMPY_FUNCTIONS[fqn].dtype + return templates.replace_as_expression( + template, + func=node.func, + dtype=parser.parse_expression(dtype), + original_args=node.args) + def _insert_dynamic_conversion(self, node): """Inlines a dynamic conversion for a dynamic function.""" # TODO(mdan): Pass information on the statically compiled functions. @@ -248,10 +271,16 @@ class CallTreeTransformer(transformer.Base): self.generic_visit(node) if anno.hasanno(node.func, 'live_val'): target_entity = anno.getanno(node.func, 'live_val') + if anno.hasanno(node.func, 'fqn'): + target_fqn = anno.getanno(node.func, 'fqn') if self._function_is_compilable(target_entity): node = self._rename_compilable_function(node) + elif target_fqn in KNOWN_NUMPY_FUNCTIONS: + # TODO(mdan): Should we replace these with equivalent TF ops instead? + node = self._wrap_to_py_func_single_return(node, target_fqn) else: - raise NotImplementedError('py_func with return values') + raise NotImplementedError( + 'py_func with return values (unknown function)') else: if self.context.recursive: node = self._insert_dynamic_conversion(node) diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/py2tf/converters/call_trees_test.py index d482a9ef78..1106432da6 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/py2tf/converters/call_trees_test.py @@ -18,9 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.contrib.py2tf.converters import call_trees from tensorflow.contrib.py2tf.converters import converter_test_base from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -105,6 +109,20 @@ class CallTreesTest(converter_test_base.TestCase): sess.run(sess.graph.get_operations()[0]) self.assertEquals('bar', a.foo) + def test_py_func_wrap_known_function(self): + + def test_fn(): + return np.random.binomial(2, 0.5) + + node = self.parse_and_analyze(test_fn, {'np': np}) + node = call_trees.transform(node, self.ctx, (), ()) + + with self.compiled(node, dtypes.int64) as result: + result.np = np + with self.test_session() as sess: + self.assertTrue(isinstance(result.test_fn(), ops.Tensor)) + self.assertIn(sess.run(result.test_fn()), (0, 1, 2)) + def test_uncompiled_modules(self): def test_fn(a): -- GitLab From 3bca4298aacd9f89de2ac532bb7fedcdec1a5bb6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 11:12:47 -0700 Subject: [PATCH 244/960] Replace std::clock with random::New64() for setting random seed PiperOrigin-RevId: 189773399 --- tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.cc | 4 ++-- tensorflow/contrib/tensor_forest/kernels/v4/input_data.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.cc b/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.cc index da600d34ea..63d4d9ba50 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.cc +++ b/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.cc @@ -19,6 +19,7 @@ #include "tensorflow/contrib/tensor_forest/kernels/tree_utils.h" #include "tensorflow/contrib/tensor_forest/kernels/v4/stat_utils.h" #include "tensorflow/core/lib/random/distribution_sampler.h" +#include "tensorflow/core/lib/random/random.h" namespace tensorflow { namespace tensorforest { @@ -122,9 +123,8 @@ ClassificationStats::ClassificationStats(const TensorForestParams& params, right_gini_.reset(new RunningGiniScores()); } - uint64 time_seed = static_cast(std::clock()); single_rand_ = std::unique_ptr( - new random::PhiloxRandom(time_seed)); + new random::PhiloxRandom(random::New64())); rng_ = std::unique_ptr( new random::SimplePhilox(single_rand_.get())); } diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h b/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h index b991e6339f..95f75b4d7e 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/input_data.h @@ -23,6 +23,7 @@ #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/random/simple_philox.h" namespace tensorflow { @@ -56,9 +57,8 @@ class TensorDataSet { // Set up the random number generator. if (split_sampling_random_seed_ == 0) { - uint64 time_seed = static_cast(std::clock()); single_rand_ = std::unique_ptr( - new random::PhiloxRandom(time_seed)); + new random::PhiloxRandom(random::New64())); } else { single_rand_ = std::unique_ptr( new random::PhiloxRandom(split_sampling_random_seed_)); -- GitLab From 1c4e42b39fd9ae2da14d7eb323bedc144a6e659b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 20 Mar 2018 11:13:48 -0700 Subject: [PATCH 245/960] Use 32 bit induction variable in gather expander Right now this is unconditional (and we fail with Unimplemented() if a 32 bit induction variable is not large enough), but eventually we may want to be smarter about this. PiperOrigin-RevId: 189773581 --- tensorflow/compiler/xla/service/BUILD | 12 ++++ .../compiler/xla/service/gather_expander.cc | 19 +++++- .../xla/service/gather_expander_test.cc | 51 ++++++++++++++++ tensorflow/compiler/xla/service/while_util.cc | 21 +++---- tensorflow/compiler/xla/service/while_util.h | 2 +- tensorflow/compiler/xla/tests/BUILD | 2 + .../xla/tests/gather_operation_test.cc | 60 +++++++++++++++++++ tensorflow/compiler/xla/util.h | 9 +++ 8 files changed, 163 insertions(+), 13 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gather_expander_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 43c56484ea..d4d67872cf 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1276,6 +1276,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "gather_expander_test", + srcs = ["gather_expander_test.cc"], + deps = [ + ":gather_expander", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/tests:test_macros_header", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep + "//tensorflow/compiler/xla/tools/parser:hlo_parser", + ], +) + cc_library( name = "conditional_simplifier", srcs = ["conditional_simplifier.cc"], diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc index 488bed35fe..221ff7900f 100644 --- a/tensorflow/compiler/xla/service/gather_expander.cc +++ b/tensorflow/compiler/xla/service/gather_expander.cc @@ -306,18 +306,33 @@ StatusOr GatherExpander::ExpandGather( HloComputation* computation = gather_instr->parent(); HloInstruction* operand = gather_instr->mutable_operand(0); HloInstruction* gather_indices = gather_instr->mutable_operand(1); + const Shape& gather_indices_shape = gather_indices->shape(); const Shape& output_shape = gather_instr->shape(); int64 output_rank = output_shape.dimensions_size(); const GatherDimensionNumbers& dim_numbers = gather_instr->gather_dimension_numbers(); + int64 gather_loop_trip_count = 1; + for (int64 i = 0, e = gather_indices_shape.dimensions_size(); i < e; i++) { + if (i != dim_numbers.index_vector_dim()) { + gather_loop_trip_count *= gather_indices_shape.dimensions(i); + } + } + + if (!IsInt32(gather_loop_trip_count)) { + return Unimplemented( + "Gather operations with more than 2147483647 gather indices are not " + "supported. This error occurred for %s.", + gather_instr->ToString().c_str()); + } + TF_ASSIGN_OR_RETURN(HloInstruction * canonical_gather_indices, CanonicalizeGatherIndices( gather_indices, dim_numbers.index_vector_dim())); - const int64 gather_loop_trip_count = - canonical_gather_indices->shape().dimensions(0); + CHECK_EQ(gather_loop_trip_count, + canonical_gather_indices->shape().dimensions(0)); TF_ASSIGN_OR_RETURN( HloInstruction * accumulator_init, diff --git a/tensorflow/compiler/xla/service/gather_expander_test.cc b/tensorflow/compiler/xla/service/gather_expander_test.cc new file mode 100644 index 0000000000..ba41ee8428 --- /dev/null +++ b/tensorflow/compiler/xla/service/gather_expander_test.cc @@ -0,0 +1,51 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gather_expander.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" + +namespace xla { +namespace { +TEST(GatherExpanderTest, ErrorStatusOnTooManyIndices) { + const string hlo_text = R"( +HloModule TensorFlowGatherMultipleBatchDims + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2147483647,5] parameter(1) + ROOT gather = s32[2147483647,3,5] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={1}, + gather_dims_to_operand_dims={1}, + index_vector_dim=2, + window_bounds={3, 1} +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_text)); + + Status status = GatherExpander{}.Run(module.get()).status(); + EXPECT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); + + ASSERT_THAT( + status.error_message(), + ::testing::HasSubstr("Gather operations with more than 2147483647 gather " + "indices are not supported.")); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/while_util.cc b/tensorflow/compiler/xla/service/while_util.cc index 8cd5882f32..bd07941843 100644 --- a/tensorflow/compiler/xla/service/while_util.cc +++ b/tensorflow/compiler/xla/service/while_util.cc @@ -142,23 +142,23 @@ WhileUtil::MakeInstructionsLiveIn( static StatusOr> MakeCountedLoopConditionComputation(const Shape& loop_state_shape, - int64 trip_count) { + int32 trip_count) { Shape scalar_pred = ShapeUtil::MakeShape(PRED, {}); - Shape scalar_s64 = ShapeUtil::MakeShape(S64, {}); TF_ASSIGN_OR_RETURN(std::unique_ptr cond_computation, CreateComputationWithSignature( {&loop_state_shape}, scalar_pred, "while_cond")); HloInstruction* trip_count_constant = cond_computation->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(trip_count))); + HloInstruction::CreateConstant(Literal::CreateR0(trip_count))); HloInstruction* param = cond_computation->parameter_instruction(0); - TF_ASSIGN_OR_RETURN(HloInstruction * counter, + TF_ASSIGN_OR_RETURN(HloInstruction * indvar, MakeGetTupleElementHlo(param, 0)); + TF_ASSIGN_OR_RETURN( HloInstruction * compare, - MakeBinaryHlo(HloOpcode::kLt, counter, trip_count_constant)); + MakeBinaryHlo(HloOpcode::kLt, indvar, trip_count_constant)); cond_computation->set_root_instruction(compare); return std::move(cond_computation); } @@ -171,8 +171,7 @@ static StatusOr> MakeCountedLoopBodyComputation( CreateComputationWithSignature( {&loop_state_shape}, loop_state_shape, "while_body")); HloInstruction* one = body_computation->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1))); - + HloInstruction::CreateConstant(Literal::CreateR0(1))); HloInstruction* param = body_computation->parameter_instruction(0); TF_ASSIGN_OR_RETURN(HloInstruction * indvar, MakeGetTupleElementHlo(param, 0)); @@ -200,7 +199,7 @@ static StatusOr MakeInitTupleFromInitValues( std::vector init_values_with_indvar; init_values_with_indvar.reserve(init_values.size() + 1); HloInstruction* zero = computation->AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))); + HloInstruction::CreateConstant(Literal::CreateR0(0))); init_values_with_indvar.push_back(zero); c_copy(init_values, std::back_inserter(init_values_with_indvar)); return computation->AddInstruction( @@ -210,16 +209,18 @@ static StatusOr MakeInitTupleFromInitValues( static Shape MakeLoopStateShape(const WhileUtil::LoopStateTy& init_values) { std::vector loop_state_shape_components; loop_state_shape_components.reserve(init_values.size() + 1); - loop_state_shape_components.push_back(ShapeUtil::MakeShape(S64, {})); + loop_state_shape_components.push_back(ShapeUtil::MakeShape(S32, {})); c_transform(init_values, std::back_inserter(loop_state_shape_components), [](HloInstruction* instr) { return instr->shape(); }); return ShapeUtil::MakeTupleShape(loop_state_shape_components); } /*static*/ StatusOr WhileUtil::MakeCountedLoop( - HloComputation* computation, int64 trip_count, + HloComputation* computation, int32 trip_count, const WhileUtil::LoopStateTy& init_values, const WhileUtil::LoopBodyGeneratorTy& loop_body_generator) { + CHECK_GE(trip_count, 0); + Shape loop_state_shape = MakeLoopStateShape(init_values); TF_ASSIGN_OR_RETURN( std::unique_ptr cond, diff --git a/tensorflow/compiler/xla/service/while_util.h b/tensorflow/compiler/xla/service/while_util.h index 80f7e16e64..1688d46742 100644 --- a/tensorflow/compiler/xla/service/while_util.h +++ b/tensorflow/compiler/xla/service/while_util.h @@ -71,7 +71,7 @@ class WhileUtil { // return loop_state; // } static StatusOr MakeCountedLoop( - HloComputation* computation, int64 trip_count, + HloComputation* computation, int32 trip_count, const LoopStateTy& init_values, const LoopBodyGeneratorTy& loop_body_generator); }; diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 025ac129d7..04a9c1ef79 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -676,7 +676,9 @@ xla_test( name = "gather_operation_test", srcs = ["gather_operation_test.cc"], deps = [ + ":client_library_test_base", ":hlo_test_base", + "//tensorflow/compiler/xla:execution_options_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/tests/gather_operation_test.cc b/tensorflow/compiler/xla/tests/gather_operation_test.cc index 8ba91946c0..9db68ff7a6 100644 --- a/tensorflow/compiler/xla/tests/gather_operation_test.cc +++ b/tensorflow/compiler/xla/tests/gather_operation_test.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/compiler/xla/execution_options_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" @@ -397,5 +399,63 @@ ENTRY main { RunTest(hlo_text, operand.get(), gather_indices.get()); } +class GatherClientLibraryTest : public ClientLibraryTestBase {}; + +// TODO(b/30671675): Asynchronous execution on stream is not yet supported on +// GPU and CPU_PARALLEL. +XLA_TEST_F(GatherClientLibraryTest, + DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(Basic))) { + // We create this HLO, but using the ComputationBuilder API. + // + // ENTRY main { + // operand = s32[3,3] parameter(0) + // indices = s32[2] parameter(1) + // ROOT gather = s32[2,3] gather(operand, indices), + // output_window_dims={1}, + // elided_window_dims={0}, + // gather_dims_to_operand_dims={0}, + // index_vector_dim=1, + // window_bounds={1, 3} + // } + + ComputationBuilder builder(client_, "gather_basic"); + + Shape operand_shape = ShapeUtil::MakeShape(S32, {3, 3}); + Shape indices_shape = ShapeUtil::MakeShape(S32, {2}); + + auto operand = builder.Parameter(0, operand_shape, "operand"); + auto indices = builder.Parameter(1, indices_shape, "indices"); + GatherDimensionNumbers dim_numbers; + dim_numbers.add_output_window_dims(1); + dim_numbers.add_elided_window_dims(0); + dim_numbers.add_gather_dims_to_operand_dims(0); + dim_numbers.set_index_vector_dim(1); + builder.Gather(operand, indices, dim_numbers, {1, 3}); + + std::vector expected = {}; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr operand_arg, + client_->TransferToServer(*Literal::CreateR2( + {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}))); + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr indices_arg, + client_->TransferToServer(*Literal::CreateR1({0, 2}))); + TF_ASSERT_OK_AND_ASSIGN(std::vector devices, + client_->GetDeviceHandles(1)); + xla::ExecutionOptions execution_options = CreateDefaultExecutionOptions(); + *execution_options.add_device_handles() = devices[0]; + TF_ASSERT_OK_AND_ASSIGN(Computation computation, builder.Build()); + std::vector computation_instances = { + {computation, + {operand_arg.get(), indices_arg.get()}, + execution_options, + /*execution_profile=*/nullptr}}; + TF_ASSERT_OK_AND_ASSIGN( + std::vector> result_data, + client_->ExecuteParallel(computation_instances)); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr result_literal, + client_->Transfer(*(result_data[0]))); + LiteralTestUtil::ExpectEqual( + *result_literal, *Literal::CreateR2({{1, 2, 3}, {7, 8, 9}})); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index ff99d3728d..2da9f9ed6f 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -519,6 +519,15 @@ int64 FindIndex(const C& c, Value&& value) { auto it = c_find(c, std::forward(value)); return std::distance(c.begin(), it); } + +// Returns true if `x` fits in 32-bits. +template +bool IsInt32(T x) { + // Following conversion rules: "the value is unchanged if it can be + // represented in the destination type (and bit-field width); otherwise, the + // value is implementation-defined." + return static_cast(x) == x; +} } // namespace xla #define XLA_LOG_LINES(SEV, STRING) \ -- GitLab From beaf17d4b2b2e79e97b08b0382b302771ae6081e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 11:21:40 -0700 Subject: [PATCH 246/960] Update version of nsync used by TensorFlow. The primary change is that on Linux, the C++11 build of nsync will now use underlying system primitives to implement a semaphore instead of the C++11 primitives, which are currently surprisingly slow on Linux. PiperOrigin-RevId: 189775201 --- tensorflow/contrib/cmake/external/nsync.cmake | 2 +- tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt | 8 +++++++- tensorflow/contrib/makefile/compile_nsync.sh | 5 +++-- tensorflow/workspace.bzl | 8 ++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/cmake/external/nsync.cmake b/tensorflow/contrib/cmake/external/nsync.cmake index f3a37ff508..b9d1dd88d4 100644 --- a/tensorflow/contrib/cmake/external/nsync.cmake +++ b/tensorflow/contrib/cmake/external/nsync.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(nsync_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/nsync/public) set(nsync_URL https://github.com/google/nsync) -set(nsync_TAG 8502189abfa44c249c01c2cad64e6ed660a9a668) +set(nsync_TAG 0559ce013feac8db639ee1bf776aca0325d28777) set(nsync_BUILD ${CMAKE_CURRENT_BINARY_DIR}/nsync/src/nsync) set(nsync_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/nsync/install) diff --git a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt index aaae18a313..6f059c7225 100644 --- a/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt +++ b/tensorflow/contrib/cmake/patches/nsync/CMakeLists.txt @@ -42,7 +42,6 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") include_directories ("${PROJECT_SOURCE_DIR}/platform/c++11") add_definitions ("-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11") set (NSYNC_OS_CPP_SRC - "platform/c++11/src/nsync_semaphore_mutex.cc" "platform/c++11/src/per_thread_waiter.cc" "platform/c++11/src/yield.cc" "platform/c++11/src/time_rep_timespec.cc" @@ -52,6 +51,7 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") include_directories ("${PROJECT_SOURCE_DIR}/platform/win32") add_compile_options ("/TP") set (NSYNC_OS_SRC + "platform/c++11/src/nsync_semaphore_mutex.cc" "platform/win32/src/clock_gettime.c" "platform/win32/src/pthread_key_win32.cc" ${NSYNC_OS_CPP_SRC} @@ -68,6 +68,7 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") add_compile_options ("-std=c++11") set (NSYNC_OS_SRC ${NSYNC_OS_CPP_SRC} + "platform/c++11/src/nsync_semaphore_mutex.cc" "platform/posix/src/clock_gettime.c" "platform/posix/src/nsync_semaphore_mutex.c" ) @@ -75,9 +76,11 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") "platform/posix/src/start_thread.c" ) elseif ("${CMAKE_SYSTEM_NAME}X" STREQUAL "LinuxX") + include_directories (BEFORE "${PROJECT_SOURCE_DIR}/platform/c++11.futex") include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") add_compile_options ("-std=c++11") set (NSYNC_OS_SRC + "platform/linux/src/nsync_semaphore_futex.c" ${NSYNC_OS_CPP_SRC} ) set (NSYNC_TEST_OS_SRC @@ -87,6 +90,7 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") add_compile_options ("-std=c++11") set (NSYNC_OS_SRC + "platform/c++11/src/nsync_semaphore_mutex.cc" ${NSYNC_OS_CPP_SRC} ) set (NSYNC_TEST_OS_SRC @@ -96,6 +100,7 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") add_compile_options ("-std=c++11") set (NSYNC_OS_SRC + "platform/c++11/src/nsync_semaphore_mutex.cc" ${NSYNC_OS_CPP_SRC} ) set (NSYNC_TEST_OS_SRC @@ -105,6 +110,7 @@ if ("${NSYNC_LANGUAGE}X" STREQUAL "c++11X") include_directories ("${PROJECT_SOURCE_DIR}/platform/posix") add_compile_options ("-std=c++11") set (NSYNC_OS_SRC + "platform/c++11/src/nsync_semaphore_mutex.cc" ${NSYNC_OS_CPP_SRC} ) set (NSYNC_TEST_OS_SRC diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index 7927997678..e8c6edd7ba 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -109,17 +109,18 @@ for arch in $archs; do linux) makefile=' CC=${CC_PREFIX} g++ PLATFORM_CPPFLAGS=-DNSYNC_USE_CPP11_TIMEPOINT -DNSYNC_ATOMIC_CPP11 \ + -I../../platform/c++11.futex \ -I../../platform/c++11 -I../../platform/gcc \ -I../../platform/posix -pthread PLATFORM_CFLAGS=-std=c++11 -Werror -Wall -Wextra -pedantic PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 - PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ + PLATFORM_C=../../platform/linux/src/nsync_semaphore_futex.c \ ../../platform/c++11/src/per_thread_waiter.cc \ ../../platform/c++11/src/yield.cc \ ../../platform/c++11/src/time_rep_timespec.cc \ ../../platform/c++11/src/nsync_panic.cc - PLATFORM_OBJS=nsync_semaphore_mutex.o per_thread_waiter.o yield.o \ + PLATFORM_OBJS=nsync_semaphore_futex.o per_thread_waiter.o yield.o \ time_rep_timespec.o nsync_panic.o TEST_PLATFORM_C=../../platform/c++11/src/start_thread.cc TEST_PLATFORM_OBJS=start_thread.o diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index db70e4515b..cf1611a883 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -388,11 +388,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "nsync", urls = [ - "https://mirror.bazel.build/github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz", - "https://github.com/google/nsync/archive/8502189abfa44c249c01c2cad64e6ed660a9a668.tar.gz", + "https://mirror.bazel.build/github.com/google/nsync/archive/0559ce013feac8db639ee1bf776aca0325d28777.tar.gz", + "https://github.com/google/nsync/archive/0559ce013feac8db639ee1bf776aca0325d28777.tar.gz", ], - sha256 = "51f81ff4202bbb820cdbedc061bd2eb6765f2b5c06489e7a8694bedac329e8f8", - strip_prefix = "nsync-8502189abfa44c249c01c2cad64e6ed660a9a668", + sha256 = "6284454c5cd8b1dae2eeb8cf5eb63004de930b5427ed5f6b1aa793513df6b361", + strip_prefix = "nsync-0559ce013feac8db639ee1bf776aca0325d28777", ) tf_http_archive( -- GitLab From 4e5900eb874668e569cfa1b75c463a9f0b15738f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 11:27:54 -0700 Subject: [PATCH 247/960] The Quantized BroadcastSub portion of #17123 PiperOrigin-RevId: 189776376 --- .../internal/optimized/optimized_ops.h | 59 ++++++++++ .../internal/reference/reference_ops.h | 59 ++++++++++ tensorflow/contrib/lite/kernels/sub.cc | 56 ++++++++++ tensorflow/contrib/lite/kernels/sub_test.cc | 101 ++++++++++++++++++ 4 files changed, 275 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 004433498d..f7840258ec 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2280,6 +2280,65 @@ void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, } } +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sub = scaled_input1_val - scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sub, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 527276f7bd..472ddc60df 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1461,6 +1461,65 @@ void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, } } +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sub = scaled_input1_val - scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sub, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index c15a7a50a4..66b06aeaec 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -106,6 +106,59 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, #undef TF_LITE_SUB } +template +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteSubParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + const int left_shift = 20; + const double twice_max_input_scale = + 2 * std::max(input1->params.scale, input2->params.scale); + const double real_input1_multiplier = + input1->params.scale / twice_max_input_scale; + const double real_input2_multiplier = + input2->params.scale / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << left_shift) * output->params.scale); + + int32 input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, + &input1_shift); + int32 input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, + &input2_shift); + int32 output_multiplier; + int output_shift; + QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_SUB(type, opname) \ + type::opname(left_shift, GetTensorData(input1), \ + GetTensorDims(input1), input1_offset, input1_multiplier, \ + input1_shift, GetTensorData(input2), \ + GetTensorDims(input2), input2_offset, input2_multiplier, \ + input2_shift, output_offset, output_multiplier, output_shift, \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)); + // The quantized version of Sub doesn't support activations, so we + // always use BroadcastSub. + if (kernel_type == kReference) { + TF_LITE_SUB(reference_ops, BroadcastSub); + } else { + TF_LITE_SUB(optimized_ops, BroadcastSub); + } +#undef TF_LITE_SUB +} + template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); @@ -117,6 +170,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteFloat32) { EvalFloat(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + EvalQuantized(context, node, params, data, input1, input2, + output); } else { context->ReportError(context, "Inputs and outputs not all float types."); return kTfLiteError; diff --git a/tensorflow/contrib/lite/kernels/sub_test.cc b/tensorflow/contrib/lite/kernels/sub_test.cc index fdbb4243bb..ff07aeec49 100644 --- a/tensorflow/contrib/lite/kernels/sub_test.cc +++ b/tensorflow/contrib/lite/kernels/sub_test.cc @@ -52,6 +52,23 @@ class FloatSubOpModel : public BaseSubOpModel { std::vector GetOutput() { return ExtractVector(output_); } }; +class QuantizedSubOpModel : public BaseSubOpModel { + public: + using BaseSubOpModel::BaseSubOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// for quantized Sub, the error shouldn't exceed 2*step +float GetTolerance(int min, int max) { + float kQuantizedStep = (max - min) / 255.0; + float kQuantizedTolerance = 2.0 * kQuantizedStep; + return kQuantizedTolerance; +} + TEST(FloatSubOpModel, NoActivation) { FloatSubOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, @@ -108,6 +125,90 @@ TEST(FloatSubOpModel, WithBroadcast) { } } +TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = { + {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = { + {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.2}, {0.6, 0.4, -0.18, 0.5}}; + std::vector> results = { + {-0.5, -0.2, 0.0, 0.3}, + {-0.8, -0.2, -0.1, 0.9}, + {-0.61, -0.2, 0.88, -0.2}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedTestsActivationRELU_N1_TO_1) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.8, 0.2, 0.7, 0.5}}; + std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.4, -0.8, 0.3}}; + std::vector> results = {{-1.0, -0.2, 0.0, 1.0}, + {-1.0, -0.2, 1.0, 0.2}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_RELU_N1_TO_1); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, kQuantizedTolerance))) + << "With shape number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedWithBroadcast) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.7}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, kQuantizedTolerance))) + << "With shape number " << i; + } +} + } // namespace } // namespace tflite int main(int argc, char** argv) { -- GitLab From f57f7d09eeb7402f2455564fafbcebf7ac4b8fe3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 11:36:07 -0700 Subject: [PATCH 248/960] Don't run tensorflow/python:function_test under asan. It gets flaky timeouts. PiperOrigin-RevId: 189777986 --- tensorflow/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 7ece482ea7..11195b3565 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1034,6 +1034,7 @@ cuda_py_tests( "//tensorflow/core:protos_all_py", ], shard_count = 10, + tags = ["noasan"], ) py_test( -- GitLab From 15d6e8310e1f2ffaa901110903ce7403717b4d2b Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Tue, 20 Mar 2018 11:45:23 -0700 Subject: [PATCH 249/960] Improved accuracy of op_level_cost_estimator (QuantizeV2, Dequantize, Gather). PiperOrigin-RevId: 189779691 --- .../grappler/costs/op_level_cost_estimator.cc | 81 ++++++++++++++----- .../grappler/costs/op_level_cost_estimator.h | 16 ++-- .../costs/op_level_cost_estimator_test.cc | 35 +++++++- 3 files changed, 103 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 29ef317e46..84ad8a3e84 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/clusters/utils.h" namespace tensorflow { @@ -46,6 +47,7 @@ constexpr char kShape[] = "Shape"; constexpr char kSize[] = "Size"; constexpr char kStopGradient[] = "StopGradient"; constexpr char kPreventGradient[] = "PreventGradient"; +constexpr char kGather[] = "Gather"; static const Costs::Duration kMinComputeTime(1); @@ -167,6 +169,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {kNoOp, wrap(&OpLevelCostEstimator::PredictNoOp)}, + {kGather, wrap(&OpLevelCostEstimator::PredictGather)}, + {kPlaceholder, wrap(&OpLevelCostEstimator::PredictIdentity)}, {kIdentity, wrap(&OpLevelCostEstimator::PredictIdentity)}, {kRefIdentity, wrap(&OpLevelCostEstimator::PredictIdentity)}, @@ -184,6 +188,17 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {kShape, wrap(&OpLevelCostEstimator::PredictMetadata)}, {kSize, wrap(&OpLevelCostEstimator::PredictMetadata)}}; + // Quantize = apply min and max bounds, multiply by scale factor and round. + const int quantize_v2_cost = + Eigen::internal::functor_traits< + Eigen::internal::scalar_product_op>::Cost + + Eigen::internal::functor_traits< + Eigen::internal::scalar_max_op>::Cost + + Eigen::internal::functor_traits< + Eigen::internal::scalar_min_op>::Cost + + Eigen::internal::functor_traits< + Eigen::internal::scalar_round_op>::Cost; + elementwise_ops_ = { // Unary ops alphabetically sorted {"Acos", Eigen::internal::functor_traits< @@ -200,6 +215,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { Eigen::internal::scalar_ceil_op>::Cost}, {"Cos", Eigen::internal::functor_traits< Eigen::internal::scalar_cos_op>::Cost}, + {"Dequantize", Eigen::internal::functor_traits< + Eigen::internal::scalar_product_op>::Cost}, {"Erf", 1}, {"Erfc", 1}, {"Exp", Eigen::internal::functor_traits< @@ -218,6 +235,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() { Eigen::internal::scalar_log1p_op>::Cost}, {"Neg", Eigen::internal::functor_traits< Eigen::internal::scalar_opposite_op>::Cost}, + {"QuantizeV2", quantize_v2_cost}, {"Reciprocal", Eigen::internal::functor_traits< Eigen::internal::scalar_inverse_op>::Cost}, {"Rint", 1}, @@ -411,28 +429,33 @@ Costs OpLevelCostEstimator::PredictCostOfAnUnknownOp( } Costs OpLevelCostEstimator::PredictOpCountBasedCost( - double operations, const OpInfo& op_features) const { - DeviceInfo device_perf = GetDeviceInfo(op_features.device()); - if (device_perf.gigaops <= 0 || device_perf.gb_per_sec <= 0) { - VLOG(1) << "BAD DEVICE. Op:" << op_features.op() - << " device type:" << op_features.device().type() - << " device model:" << op_features.device().model(); - } + double operations, const OpInfo& op_info) const { + bool unknown_shapes = false; + const double input_size = CalculateInputSize(op_info, &unknown_shapes); + const double output_size = CalculateOutputSize(op_info, &unknown_shapes); + const double total_io_bytes = input_size + output_size; + Costs costs = PredictOpCountBasedCost(operations, total_io_bytes, op_info); + costs.inaccurate = unknown_shapes; + costs.max_memory = output_size; + return costs; +} - Costs::NanoSeconds compute_cost(std::ceil(operations / device_perf.gigaops)); - VLOG(1) << "Op:" << op_features.op() << " GOps:" << operations / 1e9 - << " Execution Time (ns):" << compute_cost.count(); +Costs OpLevelCostEstimator::PredictOpCountBasedCost( + double operations, double total_io_bytes, const OpInfo& op_info) const { + const DeviceInfo device_info = GetDeviceInfo(op_info.device()); + if (device_info.gigaops <= 0 || device_info.gb_per_sec <= 0) { + VLOG(1) << "BAD DEVICE. Op:" << op_info.op() + << " device type:" << op_info.device().type() + << " device model:" << op_info.device().model(); + } - bool found_unknown_shapes = false; - const double total_input_size = - CalculateInputSize(op_features, &found_unknown_shapes); - const double total_output_size = - CalculateOutputSize(op_features, &found_unknown_shapes); - const double total_io_size = total_input_size + total_output_size; + Costs::NanoSeconds compute_cost(std::ceil(operations / device_info.gigaops)); + VLOG(1) << "Op:" << op_info.op() << " GOps:" << operations / 1e9 + << " Compute Time (ns):" << compute_cost.count(); Costs::NanoSeconds memory_cost( - std::ceil(total_io_size / device_perf.gb_per_sec)); - VLOG(1) << "Op:" << op_features.op() << " Size (KB):" << (total_io_size) / 1e3 + std::ceil(total_io_bytes / device_info.gb_per_sec)); + VLOG(1) << "Op:" << op_info.op() << " Size (KB):" << (total_io_bytes) / 1e3 << " Memory Time (ns):" << memory_cost.count(); Costs costs; @@ -443,8 +466,6 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost( } else { costs.execution_time = compute_cost + memory_cost; } - costs.inaccurate = found_unknown_shapes; - costs.max_memory = total_output_size; return costs; } @@ -867,7 +888,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropFilterOperations( int64 OpLevelCostEstimator::CalculateTensorElementCount( const OpInfo::TensorProperties& tensor, bool* found_unknown_shapes) const { - VLOG(2) << " with " << tensor.dtype() << " tensor of shape " + VLOG(2) << " with " << DataTypeString(tensor.dtype()) << " tensor of shape " << tensor.shape().DebugString(); int64 tensor_size = 1; int num_dims = std::max(1, tensor.shape().dim_size()); @@ -1028,5 +1049,23 @@ Costs OpLevelCostEstimator::PredictMetadata(const OpContext& op_context) const { return costs; } +Costs OpLevelCostEstimator::PredictGather(const OpContext& op_context) const { + // Gather op can have a very large input, but only the size of the output + // matters, because indices may select only a very small subset of input. + + const auto& op_info = op_context.op_info; + + bool unknown_shapes = false; + const int64 op_count = + CalculateTensorElementCount(op_info.outputs(0), &unknown_shapes); + const double output_size = CalculateOutputSize(op_info, &unknown_shapes); + const double total_io = 2 * output_size; + Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info); + costs.inaccurate = unknown_shapes; + costs.max_memory = output_size; + + return costs; +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index 7bb530fe31..e5dd31a7a2 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -51,10 +51,15 @@ class OpLevelCostEstimator { // Predict cost of an op for which no accurate estimator is defined. Costs PredictCostOfAnUnknownOp(const OpContext& op_context) const; - // Naive cost estimate based on operations divided by device ops/sec, - // and input/output tensor sizes. - Costs PredictOpCountBasedCost(double operations, - const OpInfo& op_features) const; + // Naive cost estimate based on the given operations count and total + // input/output tensor sizes of the given op_info combined. + Costs PredictOpCountBasedCost(double operations, const OpInfo& op_info) const; + + // Naive cost estimate based on the given operations count and the given total + // io size in bytes. Sizes of op_info inputs and outputs are not taken into + // consideration. + Costs PredictOpCountBasedCost(double operations, double total_io_bytes, + const OpInfo& op_info) const; // This family of routines counts the number of operations to perform the // specified TensorFlow Op. @@ -125,7 +130,7 @@ class OpLevelCostEstimator { // implementation just divides the operations to // perform the op (from the "Count" routines, // above) by the device peak operations per - // second. Override to supply a better estimate. + // second. // Implementation of costs other than // execution_time is optional, depending on the // device. @@ -139,6 +144,7 @@ class OpLevelCostEstimator { Costs PredictVariable(const OpContext& op_context) const; Costs PredictBatchMatMul(const OpContext& op_context) const; Costs PredictMetadata(const OpContext& op_context) const; + Costs PredictGather(const OpContext& op_context) const; // Utility function for safe division. Returns 0 // if rhs is 0 or negative. diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 4790b9bab2..d5360cba24 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -75,8 +75,8 @@ OpContext DescribeMatMulUnknownShape() { // Wrangles the minimum number of proto fields to set up an input of // arbitrary rank and type. void DescribeArbitraryRankInput(const std::vector& dims, DataType dtype, - OpInfo* op_features) { - auto input = op_features->add_inputs(); + OpInfo* op_info) { + auto input = op_info->add_inputs(); input->set_dtype(dtype); auto shape = input->mutable_shape(); for (auto d : dims) { @@ -84,6 +84,18 @@ void DescribeArbitraryRankInput(const std::vector& dims, DataType dtype, } } +// Wrangles the minimum number of proto fields to set up an output of +// arbitrary rank and type. +void DescribeArbitraryRankOutput(const std::vector& dims, DataType dtype, + OpInfo* op_info) { + auto output = op_info->add_outputs(); + output->set_dtype(dtype); + auto shape = output->mutable_shape(); + for (auto d : dims) { + shape->add_dim()->set_size(d); + } +} + // Returns an OpInfo for a BatchMatMul OpContext DescribeBatchMatMul(const std::vector& dims_a, const std::vector& dims_b) { @@ -200,6 +212,23 @@ class OpLevelCostEstimatorTest : public ::testing::Test { OpLevelCostEstimator estimator_; }; +TEST_F(OpLevelCostEstimatorTest, TestGatherCosts) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op("Gather"); + + // Huge first input shouldn't affect Gather execution and memory costs. + DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); + DescribeArbitraryRankInput({16}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankOutput({16, 10}, DT_FLOAT, &op_context.op_info); + + auto cost = estimator_.PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(128), cost.memory_time); + EXPECT_EQ(Costs::Duration(16), cost.compute_time); + EXPECT_EQ(Costs::Duration(144), cost.execution_time); + EXPECT_FALSE(cost.inaccurate); +} + TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) { auto cost = PredictCosts(DescribeBiasAdd(1000, 10)); EXPECT_EQ(Costs::Duration(8400), cost.memory_time); @@ -354,7 +383,7 @@ TEST_F(OpLevelCostEstimatorTest, GetTensorShapeProtoFromTensorProto) { TensorProto tensor_proto; TensorShapeProto tensor_shape_proto; - // Dimention larger than max value; should fail while converting to Tensor + // Dimension larger than max value; should fail while converting to Tensor // class. tensor_proto.mutable_tensor_shape()->add_dim()->set_size(255); EXPECT_FALSE( -- GitLab From 98c955ee73e95591b00793f8fe9de5b1d588a0ea Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 20 Mar 2018 12:45:54 -0700 Subject: [PATCH 250/960] improve fp16 tftrt prediction (#17857) delay fp32 to fp16 conversion to reduce accumulated rounding error --- .../contrib/tensorrt/convert/convert_nodes.cc | 63 ++++++++++--------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 7f4b57f9f4..979b5648c2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -547,6 +547,19 @@ class Converter { } }; +TRT_ShapedWeights ConvertFP32ToFP16(Converter& ctx, + const TRT_ShapedWeights& weights_src) { + auto dtype_new = tensorflow::DataType::DT_HALF; + TRT_ShapedWeights weights = + ctx.get_temp_weights(dtype_new, weights_src.shape_); + const float* src = static_cast(weights_src.GetValues()); + Eigen::half* dst = const_cast( + static_cast(weights.GetValues())); + for (int64_t i = 0; i < weights_src.count(); i++) { + dst[i] = Eigen::half_impl::float_to_half_rtne(src[i]); + } + return weights; +} // **************************************************************************** // Constant folding functions // TODO(jie): once optimizer kicks in, we should have done constant folding @@ -956,6 +969,10 @@ tensorflow::Status BinaryTensorOpWeight( } } + if (ctx.isFP16()) { + weights = ConvertFP32ToFP16(ctx, weights); + } + // prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); @@ -1022,6 +1039,10 @@ tensorflow::Status ConvertConv2DHelper( VLOG(2) << "groups count: " << num_groups; TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + if (ctx.isFP16()) { + weights_rsck = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); + } + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); TRT_ShapedWeights biases(weights.type_); @@ -1292,8 +1313,11 @@ tensorflow::Status ConvertScale(Converter& ctx, // Implement tensor binaryOp weight [channel wise] for now; const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); + if (ctx.isFP16()) { + weights = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); + } + TRT_ShapedWeights empty_weights(weights.type_); TFAttrs attrs(node_def); @@ -1388,33 +1412,16 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - auto half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - half_tensor.device(defd) = - tensor.flat().template cast(); - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } else if (!weights_tensor.int_val().empty()) { VLOG(2) << "int!!!" << node_def.name(); nvinfer1::Dims scalar_shape; -- GitLab From be7adf828e5a23cdd883b0d43756b7f123c4088a Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 20 Mar 2018 12:51:50 -0700 Subject: [PATCH 251/960] [TF:XLA] Bump open source llvm revision to r327958 PiperOrigin-RevId: 189792132 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index cf1611a883..675acbe5f6 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/cfb3cd346a75b17856c4e2ba6365e15d9ab0c763.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/cfb3cd346a75b17856c4e2ba6365e15d9ab0c763.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/1c3cdea2f181d8e14ee184466c5fb237f1b4cda8.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/1c3cdea2f181d8e14ee184466c5fb237f1b4cda8.tar.gz", ], - sha256 = "2cf79b1891926b7af6173c1031d040fc07b2682ff66039c5822e074566c48956", - strip_prefix = "llvm-cfb3cd346a75b17856c4e2ba6365e15d9ab0c763", + sha256 = "1efbb9b05af88368be984d2f6526061d4a857181ef10f8841889a3a46869bb01", + strip_prefix = "llvm-1c3cdea2f181d8e14ee184466c5fb237f1b4cda8", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 13b993095f155bd4dd7fc3b057a7b5043ef0a06c Mon Sep 17 00:00:00 2001 From: Mingxing Tan Date: Tue, 20 Mar 2018 12:54:01 -0700 Subject: [PATCH 252/960] Add broadcasting support for fused add or sub. PiperOrigin-RevId: 189792542 --- .../fuse_binary_into_preceding_affine.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc index 5b57178b18..76c6be00d4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc @@ -50,7 +50,17 @@ void FuseAddOrSubParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, // TODO(b/62904716): Bias array should become 1-D when padding removed. const int depth = bias_shape.dims(bias_shape.dimensions_count() - 1); - CHECK_EQ(depth, operand_shape.dims(operand_shape.dimensions_count() - 1)); + int operand_channel_increment = 0; + if (operand_shape.dimensions_count() >= 1 && + operand_shape.dims(operand_shape.dimensions_count() - 1) == + bias_shape.dims(bias_shape.dimensions_count() - 1)) { + operand_channel_increment = 1; + } else if (operand_shape.dimensions_count() == 0 || + operand_shape.dims(operand_shape.dimensions_count() - 1) == 1) { + operand_channel_increment = 0; + } else { + LOG(FATAL) << "Operand shape mismatch."; + } enum class OpType { BiasPlusOperand, BiasMinusOperand, OperandMinusBias }; @@ -60,9 +70,10 @@ void FuseAddOrSubParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, ? OpType::BiasMinusOperand : OpType::OperandMinusBias; + int operand_channel = 0; for (int i = 0; i < depth; i++) { float& bias_val = bias_data[i]; - const float operand_val = operand_data[i]; + const float operand_val = operand_data[operand_channel]; if (optype == OpType::BiasPlusOperand) { bias_val += operand_val; } else if (optype == OpType::BiasMinusOperand) { @@ -72,6 +83,7 @@ void FuseAddOrSubParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, } else { LOG(FATAL) << "Should not get here."; } + operand_channel += operand_channel_increment; } } -- GitLab From e4313551d184932c9a135d4edacf42711e5b3483 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 12:56:51 -0700 Subject: [PATCH 253/960] TFBT: Pass label_keys to the head class constructor. PiperOrigin-RevId: 189793004 --- tensorflow/contrib/boosted_trees/estimator_batch/estimator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index 01752416b3..70454aa6db 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -81,7 +81,8 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): n_classes=n_classes, weight_column_name=weight_column_name, enable_centered_bias=False, - loss_fn=loss_fn) + loss_fn=loss_fn, + label_keys=label_keys) if learner_config.num_classes == 0: learner_config.num_classes = n_classes elif learner_config.num_classes != n_classes: -- GitLab From 278ead7d06e427df09f910031cb9195c8a4da559 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 13:02:36 -0700 Subject: [PATCH 254/960] In allocate_transient_arrays.cc, was not handling the case where the same array occurs more than once in the list of inputs or outputs of a node. PiperOrigin-RevId: 189794090 --- .../lite/toco/allocate_transient_arrays.cc | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc index 49cc1fc2aa..621fbcb98d 100644 --- a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc +++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc @@ -248,29 +248,49 @@ void AllocateTransientArrays(Model* model, op_index++) { const auto& op = model->operators[op_index]; // Allocate those arrays whose lifespan starts exactly here. + std::vector arrays_to_allocate; for (const auto& input : op->inputs) { if (StartsAt(array_lifespans[input], op_index)) { - AllocateTransientArray(*model, input, &allocator, - transient_data_alignment); + if (std::find(arrays_to_allocate.begin(), arrays_to_allocate.end(), + input) == arrays_to_allocate.end()) { + arrays_to_allocate.push_back(input); + } } } for (const auto& output : op->outputs) { if (StartsAt(array_lifespans[output], op_index)) { - AllocateTransientArray(*model, output, &allocator, - transient_data_alignment); + if (std::find(arrays_to_allocate.begin(), arrays_to_allocate.end(), + output) == arrays_to_allocate.end()) { + arrays_to_allocate.push_back(output); + } } } + for (const string& array : arrays_to_allocate) { + AllocateTransientArray(*model, array, &allocator, + transient_data_alignment); + } + // Deallocate those arrays whose lifespan ends exactly here. + std::vector arrays_to_deallocate; for (const auto& input : op->inputs) { if (EndsAt(array_lifespans[input], op_index)) { - DeallocateTransientArray(*model, input, &allocator); + if (std::find(arrays_to_deallocate.begin(), arrays_to_deallocate.end(), + input) == arrays_to_deallocate.end()) { + arrays_to_deallocate.push_back(input); + } } } for (const auto& output : op->outputs) { if (EndsAt(array_lifespans[output], op_index)) { - DeallocateTransientArray(*model, output, &allocator); + if (std::find(arrays_to_deallocate.begin(), arrays_to_deallocate.end(), + output) == arrays_to_deallocate.end()) { + arrays_to_deallocate.push_back(output); + } } } + for (const string& array : arrays_to_deallocate) { + DeallocateTransientArray(*model, array, &allocator); + } } // Just out of curiosity (not used in the actual allocation process) -- GitLab From d3e3b78c631a975df498ed8cee65d505ddbe9aac Mon Sep 17 00:00:00 2001 From: joel-shor Date: Tue, 20 Mar 2018 22:06:21 +0200 Subject: [PATCH 255/960] Adjust indentations to conform to pylint. --- .../data/python/kernel_tests/resample_test.py | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index a76c6b1e39..897815656a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -72,38 +72,38 @@ class ResampleTest(test.TestCase): self.assertAllClose(target_dist, returned_dist, atol=1e-2) def testRandomClasses(self): - init_dist = [0.25, 0.25, 0.25, 0.25] - target_dist = [0.0, 0.0, 0.0, 1.0] - num_classes = len(init_dist) - num_samples = 100 # We don't need many samples to test a dirac-delta target distribution - data_np = np.random.choice(num_classes, num_samples, p=init_dist) - - dataset = dataset_ops.Dataset.from_tensor_slices(data_np) - - # Apply a random mapping that preserves the data distribution. - def _remap_fn(_): - return math_ops.cast(random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0] - dataset = dataset.map(_remap_fn) - - # Reshape distribution. - dataset = dataset.apply( - resampling.rejection_resample( - class_func=lambda x: x, - target_dist=target_dist, - initial_dist=init_dist)) - - get_next = dataset.make_one_shot_iterator().get_next() - - with self.test_session() as sess: - returned = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - returned.append(sess.run(get_next)) - - classes, _ = zip(*returned) - bincount = np.bincount(np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) - - self.assertAllClose(target_dist, bincount, atol=1e-2) + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + num_samples = 100 # We don't need many samples to test a dirac-delta target distribution + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + dataset = dataset_ops.Dataset.from_tensor_slices(data_np) + + # Apply a random mapping that preserves the data distribution. + def _remap_fn(_): + return math_ops.cast(random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0] + dataset = dataset.map(_remap_fn) + + # Reshape distribution. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) + + get_next = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + returned = [] + with self.assertRaises(errors.OutOfRangeError): + while True: + returned.append(sess.run(get_next)) + + classes, _ = zip(*returned) + bincount = np.bincount(np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) + + self.assertAllClose(target_dist, bincount, atol=1e-2) if __name__ == "__main__": test.main() -- GitLab From 5483b5894fb06ffeae49af74e573c114b2e3b787 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Tue, 20 Mar 2018 22:12:01 +0200 Subject: [PATCH 256/960] Fix indent mistake. --- tensorflow/contrib/data/python/ops/resampling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 44de42e94d..e440d4a35f 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -102,10 +102,11 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): .map(maybe_warn_on_large_rejection)) def _gather_and_copy(class_val, acceptance_prob, data): - return (class_val, array_ops.gather(acceptance_prob, class_val), data) + return (class_val, array_ops.gather(acceptance_prob, class_val), data) current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) - filtered_ds = (current_probabilities_and_class_and_data_ds\ + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + filtered_ds = ( + current_probabilities_and_class_and_data_ds .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) -- GitLab From 13ae129449cdeb7afbad98bc8a00ad5c82a0ca31 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 20 Mar 2018 13:34:02 -0700 Subject: [PATCH 257/960] Improved the performance of the function optimizer. PiperOrigin-RevId: 189799697 --- .../grappler/optimizers/function_optimizer.cc | 78 +++++++++++++------ 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 97effae8c8..2a6b8a325f 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -140,19 +140,53 @@ class FakeCPUDevice : public Device { Status Sync() override { return Status::OK(); } }; -Status InlineSymbolicGradient(const NodeDef& node, - const FunctionDefLibrary& library, - GraphDef* inlined_graph) { - Env* env = Env::Default(); - DeviceAttributes attr; - attr.set_name("/device:CPU:0"); - attr.set_device_type("CPU"); - FakeCPUDevice* dev = new FakeCPUDevice(env, attr); - std::vector devices; - devices.push_back(dev); - DeviceMgr dvc_mgr(devices); - FunctionLibraryDefinition function_library(OpRegistry::Global(), library); +class SymbolicGradientEnv { + public: + SymbolicGradientEnv(int graph_version, const FunctionDefLibrary& library) + : graph_version_(graph_version), library_(library) {} + + FunctionLibraryDefinition* function_library() { + InitializeIfNeeded(); + return fld_.get(); + } + FunctionLibraryRuntime* function_library_runtime() { + InitializeIfNeeded(); + return flr_; + } + + private: + // This initialization is expensive. Do it lazily to avoid paying for it + // unless it's needed. + void InitializeIfNeeded() { + if (flr_) { + return; + } + Env* env = Env::Default(); + DeviceAttributes attr; + attr.set_name("/device:CPU:0"); + attr.set_device_type("CPU"); + FakeCPUDevice* dev = new FakeCPUDevice(env, attr); + std::vector devices; + devices.push_back(dev); + dvc_mgr_.reset(new DeviceMgr(devices)); + fld_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), library_)); + OptimizerOptions optimizer_opts; + optimizer_opts.set_do_function_inlining(true); + pflr_.reset(new ProcessFunctionLibraryRuntime( + dvc_mgr_.get(), env, graph_version_, fld_.get(), optimizer_opts)); + flr_ = pflr_->GetFLR(dev->name()); + } + + const int graph_version_; + const FunctionDefLibrary& library_; + std::unique_ptr dvc_mgr_; + std::unique_ptr fld_; + std::unique_ptr pflr_; + FunctionLibraryRuntime* flr_ = nullptr; +}; +Status InlineSymbolicGradient(const NodeDef& node, SymbolicGradientEnv* env, + GraphDef* inlined_graph) { GraphDef graph_def; // Create a node to anchor the gradient inputs @@ -186,24 +220,18 @@ Status InlineSymbolicGradient(const NodeDef& node, } // Convert the graphdef to a graph - OptimizerOptions optimizer_opts; - optimizer_opts.set_do_function_inlining(true); - ProcessFunctionLibraryRuntime pflr(&dvc_mgr, env, - inlined_graph->versions().producer(), - &function_library, optimizer_opts); - FunctionLibraryRuntime* flr = pflr.GetFLR(dev->name()); - CHECK(flr); GraphConstructorOptions graph_ctor_opts; graph_ctor_opts.allow_internal_ops = true; graph_ctor_opts.expect_device_spec = false; - Graph graph(function_library); + Graph graph(env->function_library()); TF_RETURN_IF_ERROR( ConvertGraphDefToGraph(graph_ctor_opts, graph_def, &graph)); // Recursively inline the functions until there is nothing more to inline. We // should at least expand one function. int counter = 0; - while (counter < 50 && ExpandInlineFunctions(flr, &graph)) { + while (counter < 50 && + ExpandInlineFunctions(env->function_library_runtime(), &graph)) { ++counter; } @@ -279,11 +307,12 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, return Status::OK(); } - *optimized_graph->mutable_versions() = item.graph.versions(); + SymbolicGradientEnv env(item.graph.versions().producer(), + item.graph.library()); + for (const NodeDef& node : item.graph.node()) { if (node.op() == "SymbolicGradient") { - TF_RETURN_IF_ERROR( - InlineSymbolicGradient(node, item.graph.library(), optimized_graph)); + TF_RETURN_IF_ERROR(InlineSymbolicGradient(node, &env, optimized_graph)); continue; } auto it = functions.find(node.op()); @@ -299,6 +328,7 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // inlined based on the context in which they're instantiated. // TODO(bsteiner): trim the library to remove unused function definitions + *optimized_graph->mutable_versions() = item.graph.versions(); *optimized_graph->mutable_library() = item.graph.library(); return Status::OK(); -- GitLab From 4a6ab2cb8c2f33ffb6b64d61bd09f006e75982c8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 13:38:09 -0700 Subject: [PATCH 258/960] Build tflite interpreter from buffer in python interface PiperOrigin-RevId: 189800400 --- tensorflow/contrib/lite/python/interpreter.py | 26 ++++++--- .../contrib/lite/python/interpreter_test.py | 53 ++++++++++--------- .../interpreter_wrapper.cc | 9 +++- .../interpreter_wrapper/interpreter_wrapper.h | 6 ++- 4 files changed, 62 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index 5b5a7c3199..accdd04671 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -23,19 +23,33 @@ from tensorflow.contrib.lite.python.interpreter_wrapper import tensorflow_wrap_i class Interpreter(object): """Interpreter inferace for TF-Lite Models.""" - def __init__(self, model_path): + def __init__(self, model_path=None, model_content=None): """Constructor. Args: model_path: Path to TF-Lite Flatbuffer file. + model_content: Content of model. Raises: - ValueError: If the interpreter was unable to open the model. + ValueError: If the interpreter was unable to create. """ - self._interpreter = ( - interpreter_wrapper.InterpreterWrapper_CreateWrapperCPP(model_path)) - if not self._interpreter: - raise ValueError('Failed to open {}'.format(model_path)) + if model_path and not model_content: + self._interpreter = ( + interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromFile( + model_path)) + if not self._interpreter: + raise ValueError('Failed to open {}'.format(model_path)) + elif model_content and not model_path: + self._interpreter = ( + interpreter_wrapper.InterpreterWrapper_CreateWrapperCPPFromBuffer( + model_content, len(model_content))) + if not self._interpreter: + raise ValueError( + 'Failed to create model from {} bytes'.format(len(model_content))) + elif not model_path and not model_path: + raise ValueError('`model_path` or `model_content` must be specified.') + else: + raise ValueError('Can\'t both provide `model_path` and `model_content`') def allocate_tensors(self): if not self._interpreter.AllocateTensors(): diff --git a/tensorflow/contrib/lite/python/interpreter_test.py b/tensorflow/contrib/lite/python/interpreter_test.py index e0215b721c..e85390c56c 100644 --- a/tensorflow/contrib/lite/python/interpreter_test.py +++ b/tensorflow/contrib/lite/python/interpreter_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import io import numpy as np from tensorflow.contrib.lite.python import interpreter as interpreter_wrapper @@ -29,7 +30,8 @@ class InterpreterTest(test_util.TensorFlowTestCase): def testFloat(self): interpreter = interpreter_wrapper.Interpreter( - resource_loader.get_path_to_datafile('testdata/permute_float.tflite')) + model_path=resource_loader.get_path_to_datafile( + 'testdata/permute_float.tflite')) interpreter.allocate_tensors() input_details = interpreter.get_input_details() @@ -53,29 +55,32 @@ class InterpreterTest(test_util.TensorFlowTestCase): self.assertTrue((expected_output == output_data).all()) def testUint8(self): - interpreter = interpreter_wrapper.Interpreter( - resource_loader.get_path_to_datafile('testdata/permute_uint8.tflite')) - interpreter.allocate_tensors() - - input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertEqual('input', input_details[0]['name']) - self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 4] == input_details[0]['shape']).all()) - - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual('output', output_details[0]['name']) - self.assertEqual(np.uint8, output_details[0]['dtype']) - self.assertTrue(([1, 4] == output_details[0]['shape']).all()) - - test_input = np.array([[1, 2, 3, 4]], dtype=np.uint8) - expected_output = np.array([[4, 3, 2, 1]], dtype=np.uint8) - interpreter.set_tensor(input_details[0]['index'], test_input) - interpreter.invoke() - - output_data = interpreter.get_tensor(output_details[0]['index']) - self.assertTrue((expected_output == output_data).all()) + model_path = resource_loader.get_path_to_datafile( + 'testdata/permute_uint8.tflite') + with io.open(model_path, 'rb') as model_file: + data = model_file.read() + interpreter = interpreter_wrapper.Interpreter(model_content=data) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('input', input_details[0]['name']) + self.assertEqual(np.uint8, input_details[0]['dtype']) + self.assertTrue(([1, 4] == input_details[0]['shape']).all()) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('output', output_details[0]['name']) + self.assertEqual(np.uint8, output_details[0]['dtype']) + self.assertTrue(([1, 4] == output_details[0]['shape']).all()) + + test_input = np.array([[1, 2, 3, 4]], dtype=np.uint8) + expected_output = np.array([[4, 3, 2, 1]], dtype=np.uint8) + interpreter.set_tensor(input_details[0]['index'], test_input) + interpreter.invoke() + + output_data = interpreter.get_tensor(output_details[0]['index']) + self.assertTrue((expected_output == output_data).all()) if __name__ == '__main__': diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index f30067de94..14e1190c80 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -302,12 +302,19 @@ PyObject* InterpreterWrapper::GetTensor(int i) const { return PyArray_Return(reinterpret_cast(np_array)); } -InterpreterWrapper* InterpreterWrapper::CreateWrapperCPP( +InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromFile( const char* model_path) { std::unique_ptr model = tflite::FlatBufferModel::BuildFromFile(model_path); return model ? new InterpreterWrapper(std::move(model)) : nullptr; } +InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer( + const char* data, size_t len) { + std::unique_ptr model = + tflite::FlatBufferModel::BuildFromBuffer(data, len); + return model ? new InterpreterWrapper(std::move(model)) : nullptr; +} + } // namespace interpreter_wrapper } // namespace tflite diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index dea71ca879..63bdb30f79 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -37,7 +37,11 @@ namespace interpreter_wrapper { class InterpreterWrapper { public: // SWIG caller takes ownership of pointer. - static InterpreterWrapper* CreateWrapperCPP(const char* model_path); + static InterpreterWrapper* CreateWrapperCPPFromFile(const char* model_path); + + // SWIG caller takes ownership of pointer. + static InterpreterWrapper* CreateWrapperCPPFromBuffer(const char* data, + size_t len); ~InterpreterWrapper(); bool AllocateTensors(); -- GitLab From b697da432aada697f2485734827b7bed5dbf2599 Mon Sep 17 00:00:00 2001 From: Terry Koo Date: Tue, 20 Mar 2018 13:39:42 -0700 Subject: [PATCH 259/960] Makes protobuf dep in tf.contrib.data conditional using if_static(). In non-monolithic builds, adding it unconditionally would duplicate of protobuf symbols among tf.contrib op libraries. Guarding it with if_static() restricts the dep to monolithic builds, which should be able to dedupe the symbols at link time. PiperOrigin-RevId: 189800612 --- tensorflow/contrib/data/BUILD | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 5ba2297e7f..d787ed8a1a 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -9,6 +9,10 @@ load( "tf_custom_op_library", "tf_gen_op_libs", ) +load( + "//tensorflow/core:platform/default/build_config_root.bzl", + "if_static", +) py_library( name = "data", @@ -29,10 +33,11 @@ py_library( tf_custom_op_library( name = "_dataset_ops.so", srcs = ["ops/dataset_ops.cc"], - deps = [ - "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/core:lib_proto_parsing", - ], + deps = ["//tensorflow/contrib/data/kernels:dataset_kernels"] + + if_static( + extra_deps = ["//tensorflow/core:lib_proto_parsing"], + otherwise = [], + ), ) tf_gen_op_libs( -- GitLab From 46321876c6ece27677f6c51400b799a9a8540324 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Tue, 20 Mar 2018 22:54:22 +0200 Subject: [PATCH 260/960] Fix more lint errors. --- tensorflow/contrib/data/python/kernel_tests/resample_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 7f9a16430c..99e56e9a31 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -82,7 +82,8 @@ class ResampleTest(test.TestCase): # Apply a random mapping that preserves the data distribution. def _remap_fn(_): - return math_ops.cast(random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0] + return math_ops.cast(random_ops.random_uniform([1]) * num_classes, + dtypes.int32)[0] dataset = dataset.map(_remap_fn) # Reshape distribution. -- GitLab From 9a24e8acfcd8c9046e1abaac9dbf5e146186f4c2 Mon Sep 17 00:00:00 2001 From: Piotr Czapla Date: Tue, 20 Mar 2018 22:20:20 +0100 Subject: [PATCH 261/960] Add training parameter to dropout to make it work (#16133) * Add training parameter to dropout to make it work I think that without this parameter set dropout is disabled all the time. At least this is what I read in the documentation, besides adding this improves training. * Removing redundant if statement around dropout * Fix linter error: line longer than 80. --- tensorflow/examples/learn/mnist.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/learn/mnist.py b/tensorflow/examples/learn/mnist.py index 98819b20bf..3ead8614b6 100644 --- a/tensorflow/examples/learn/mnist.py +++ b/tensorflow/examples/learn/mnist.py @@ -61,8 +61,10 @@ def conv_model(features, labels, mode): # Densely connected layer with 1024 neurons. h_fc1 = tf.layers.dense(h_pool2_flat, 1024, activation=tf.nn.relu) - if mode == tf.estimator.ModeKeys.TRAIN: - h_fc1 = tf.layers.dropout(h_fc1, rate=0.5) + h_fc1 = tf.layers.dropout( + h_fc1, + rate=0.5, + training=(mode == tf.estimator.ModeKeys.TRAIN)) # Compute logits (1 per class) and compute loss. logits = tf.layers.dense(h_fc1, N_DIGITS, activation=None) -- GitLab From 4d7d62d5101f069017c8714c53299be022b4ff74 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 20 Mar 2018 14:23:39 -0700 Subject: [PATCH 262/960] Internal Change. PiperOrigin-RevId: 189809845 --- tensorflow/python/framework/test_util.py | 6 +++--- tensorflow/python/keras/BUILD | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index e9e86e452b..d8f8569939 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -902,9 +902,9 @@ class TensorFlowTestCase(googletest.TestCase): Use the `use_gpu` and `force_gpu` options to control where ops are run. If `force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if - `use_gpu` - is True, TensorFlow tries to run as many ops on the GPU as possible. If both - `force_gpu and `use_gpu` are False, all ops are pinned to the CPU. + `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as + possible. If both `force_gpu and `use_gpu` are False, all ops are pinned to + the CPU. Example: ```python diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index eef91e9c5b..3180b9f410 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -8,6 +8,7 @@ exports_files(["LICENSE"]) package(default_visibility = ["//visibility:public"]) load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test") config_setting( name = "empty_condition", @@ -656,16 +657,17 @@ py_test( ], ) -py_test( +cuda_py_test( name = "multi_gpu_utils_test", - size = "medium", srcs = ["_impl/keras/utils/multi_gpu_utils_test.py"], - srcs_version = "PY2AND3", - tags = ["multi_gpu"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], + tags = [ + "guitar", + "multi_gpu", ], ) -- GitLab From a0e07f998b388f0ecc7b7cf2256522f28482b285 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 20 Mar 2018 14:30:36 -0700 Subject: [PATCH 263/960] [tf.data] Improve docstring for `tf.contrib.data.Counter`. PiperOrigin-RevId: 189811108 --- tensorflow/contrib/data/python/ops/counter.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/counter.py b/tensorflow/contrib/data/python/ops/counter.py index 63226fe781..6ef65f9624 100644 --- a/tensorflow/contrib/data/python/ops/counter.py +++ b/tensorflow/contrib/data/python/ops/counter.py @@ -25,7 +25,7 @@ from tensorflow.python.framework import ops def Counter(start=0, step=1, dtype=dtypes.int64): - """Creates a `Dataset` of a `step`-separated count startin from `start`. + """Creates a `Dataset` that counts from `start` in steps of size `step`. For example: @@ -38,12 +38,13 @@ def Counter(start=0, step=1, dtype=dtypes.int64): ``` Args: - start: starting value for count. - step: step size. - dtype: counter data type. + start: (Optional.) The starting value for the counter. Defaults to 0. + step: (Optional.) The step size for the counter. Defaults to 1. + dtype: (Optional.) The data type for counter elements. Defaults to + `tf.int64`. Returns: - A `Dataset` of scalar elements. + A `Dataset` of scalar `dtype` elements. """ with ops.name_scope("counter"): start = ops.convert_to_tensor(start, dtype=dtype, name="start") -- GitLab From 546d1d467372a176f337f2614165c6d754a386da Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 14:33:19 -0700 Subject: [PATCH 264/960] [XLA] Simplify the HLO proto: don't nest the fusion computation in an fusion HloInstructionProto. PiperOrigin-RevId: 189811729 --- tensorflow/compiler/xla/service/hlo.proto | 3 +- .../compiler/xla/service/hlo_computation.cc | 18 ++++++------ .../compiler/xla/service/hlo_computation.h | 10 +------ .../compiler/xla/service/hlo_instruction.cc | 28 ++++++++++--------- .../compiler/xla/service/hlo_instruction.h | 7 +---- tensorflow/compiler/xla/service/hlo_module.cc | 22 ++------------- 6 files changed, 30 insertions(+), 58 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index bf903d6a39..b86fbd821b 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -38,6 +38,8 @@ option cc_enable_arenas = true; message HloInstructionProto { reserved 10; reserved "parameter_name"; + reserved 12; + reserved "fused_instructions_computation"; string name = 1; string opcode = 2; @@ -58,7 +60,6 @@ message HloInstructionProto { // Fusion state, only present for kFusion. string fusion_kind = 11; - HloComputationProto fused_instructions_computation = 12; // Index for kGetTupleElement. int64 tuple_index = 13; diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index f99c7cf5e4..4e852190a8 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -406,18 +406,15 @@ HloComputationProto HloComputation::ToProto() const { /* static */ StatusOr> HloComputation::CreateFromProto( HloModule* module, const HloComputationProto& proto, - const tensorflow::gtl::FlatMap& computation_map, - const std::function)>& - add_fused_computation, - HloInstruction* fusion_instruction) { + const tensorflow::gtl::FlatMap& computation_map) { std::vector> instructions; tensorflow::gtl::FlatMap instruction_map; int64 parameter_count = 0; for (const HloInstructionProto& instruction_proto : proto.instructions()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr instruction, - HloInstruction::CreateFromProto( - module, instruction_proto, instruction_map, - computation_map, add_fused_computation)); + TF_ASSIGN_OR_RETURN( + std::unique_ptr instruction, + HloInstruction::CreateFromProto(module, instruction_proto, + instruction_map, computation_map)); if (instruction->opcode() == HloOpcode::kParameter) { parameter_count++; } @@ -429,8 +426,9 @@ HloComputation::CreateFromProto( TF_RET_CHECK(!proto.root_name().empty()); TF_RET_CHECK(ContainsKey(instruction_map, proto.root_name())); HloInstruction* root = instruction_map.at(proto.root_name()); - return WrapUnique(new HloComputation( - proto.name(), parameter_count, &instructions, root, fusion_instruction)); + return WrapUnique(new HloComputation(proto.name(), parameter_count, + &instructions, root, + /*fusion_instruction=*/nullptr)); } void HloComputation::FuseInstructionsInto( diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index dd9d346999..630d3675de 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -163,17 +163,9 @@ class HloComputation { // computation_map: a map from computation name to HloComputation*. This map // must contain all computations which the newly constructed computation // calls. - // add_fused_computation: A function to call to add a fused - // computation. Used only when the instruction is a fusion instruction. - // fusion_instruction: if non-null then the newly created computation will - // be constructed as a fused computation with this instruction as its - // fusion parent. static StatusOr> CreateFromProto( HloModule* module, const HloComputationProto& proto, - const tensorflow::gtl::FlatMap& computation_map, - const std::function)>& - add_fused_computation, - HloInstruction* fusion_instruction = nullptr); + const tensorflow::gtl::FlatMap& computation_map); // Gets the instructions in this computation. // diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index d33add23d0..83fcc5da6d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -37,6 +37,7 @@ limitations under the License. #include "tensorflow/compiler/xla/window_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" @@ -52,9 +53,7 @@ using ::tensorflow::strings::StrCat; StatusOr> HloInstruction::CreateFromProto( HloModule* module, const HloInstructionProto& proto, const tensorflow::gtl::FlatMap& instruction_map, - const tensorflow::gtl::FlatMap& computation_map, - const std::function)>& - add_fused_computation) { + const tensorflow::gtl::FlatMap& computation_map) { TF_RET_CHECK(!proto.opcode().empty()); TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode())); TF_RET_CHECK(proto.has_shape()); @@ -76,17 +75,20 @@ StatusOr> HloInstruction::CreateFromProto( // HloInstructionProto and do not appear as an HloComputationProto within the // HloModuleProto. if (instruction->opcode() == HloOpcode::kFusion) { - TF_RET_CHECK(proto.has_fused_instructions_computation()); TF_RET_CHECK(!proto.fusion_kind().empty()); TF_ASSIGN_OR_RETURN(instruction->fusion_kind_, StringToFusionKind(proto.fusion_kind())); - TF_ASSIGN_OR_RETURN(std::unique_ptr fused_computation, - HloComputation::CreateFromProto( - module, proto.fused_instructions_computation(), - computation_map, add_fused_computation, - /*fusion_instruction=*/instruction.get())); - instruction->called_computations_.push_back(fused_computation.get()); - add_fused_computation(std::move(fused_computation)); + + // Find the fused computation and set its fusion instruction. + TF_RET_CHECK(proto.called_computation_names_size() == 1) + << "Expect 1 called computation for fusion instruction, but sees " + << proto.called_computation_names_size(); + const string& fusion_name = proto.called_computation_names(0); + auto* fused_computation = FindPtrOrNull(computation_map, fusion_name); + TF_RET_CHECK(fused_computation != nullptr) + << "No fusion computation named " << fusion_name; + fused_computation->SetFusionInstruction(instruction.get()); + instruction->called_computations_.push_back(fused_computation); } else { for (const string& computation_name : proto.called_computation_names()) { TF_RET_CHECK(ContainsKey(computation_map, computation_name)) @@ -2330,8 +2332,8 @@ HloInstructionProto HloInstruction::ToProto() const { proto.set_parameter_number(parameter_number_); if (opcode() == HloOpcode::kFusion) { proto.set_fusion_kind(xla::ToString(fusion_kind())); - *proto.mutable_fused_instructions_computation() = - fused_instructions_computation()->ToProto(); + *proto.add_called_computation_names() = + fused_instructions_computation()->name(); } else { for (const HloComputation* computation : called_computations_) { *proto.add_called_computation_names() = computation->name(); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e4c86214c2..a111e1e4a6 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -184,15 +184,10 @@ class HloInstruction { // computation_map: a map from computation name to HloComputation*. This map // must contain all computations which the newly constructed instruction // calls. - // add_fused_computation: A function to call to add a fused - // computation. Used (clearly) when the instruction is a fusion - // instruction. static StatusOr> CreateFromProto( HloModule* module, const HloInstructionProto& proto, const tensorflow::gtl::FlatMap& instruction_map, - const tensorflow::gtl::FlatMap& computation_map, - const std::function)>& - add_fused_computation); + const tensorflow::gtl::FlatMap& computation_map); // Creates a parameter-retrieving instruction. static std::unique_ptr CreateParameter(int64 parameter_number, diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index cdea3d5978..4091ebbfd3 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -207,11 +207,6 @@ HloModuleProto HloModule::ToProto() const { proto.set_name(name_); proto.set_entry_computation_name(entry_computation_->name()); for (const HloComputation* computation : MakeComputationPostOrder()) { - // Fusion computations are added when the fusion instructions are created by - // HloInstruction::CreateFromProto. - if (computation->IsFusionComputation()) { - continue; - } HloComputationProto computation_proto = computation->ToProto(); if (computation->name() == entry_computation_->name()) { *proto.mutable_program_shape() = computation_proto.program_shape(); @@ -256,16 +251,9 @@ StatusOr> HloModule::CreateFromProto( tensorflow::gtl::FlatMap computation_map; for (const HloComputationProto& computation_proto : proto.computations()) { - TF_ASSIGN_OR_RETURN( - std::unique_ptr computation, - HloComputation::CreateFromProto( - module.get(), computation_proto, computation_map, - /*add_fused_computation=*/ - [&module](std::unique_ptr fused_computation) { - module->AddComputationInternal(std::move(fused_computation), - /*is_entry=*/false, - /*uniquify_names=*/false); - })); + TF_ASSIGN_OR_RETURN(std::unique_ptr computation, + HloComputation::CreateFromProto( + module.get(), computation_proto, computation_map)); CHECK_NE(computation.get(), nullptr); TF_RET_CHECK(!ContainsKey(computation_map, computation->name())); string computation_name = computation->name(); @@ -283,10 +271,6 @@ StatusOr> HloModule::CreateFromProto( tensorflow::gtl::FlatSet computation_names; tensorflow::gtl::FlatSet instruction_names; for (HloComputation* computation : module->computations()) { - if (computation->IsFusionComputation()) { - continue; - } - TF_RET_CHECK(!ContainsKey(computation_names, computation->name())) << "Computation name is not unique: " << computation->name(); computation_names.insert(computation->name()); -- GitLab From 38a5c2dba2806951ae4defba0f1392469ae422de Mon Sep 17 00:00:00 2001 From: joel-shor Date: Tue, 20 Mar 2018 23:46:39 +0200 Subject: [PATCH 265/960] Hopefully final indent fix. --- .../contrib/data/python/kernel_tests/resample_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 99e56e9a31..0e3131b725 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -88,10 +88,10 @@ class ResampleTest(test.TestCase): # Reshape distribution. dataset = dataset.apply( - resampling.rejection_resample( - class_func=lambda x: x, - target_dist=target_dist, - initial_dist=init_dist)) + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) get_next = dataset.make_one_shot_iterator().get_next() @@ -102,7 +102,8 @@ class ResampleTest(test.TestCase): returned.append(sess.run(get_next)) classes, _ = zip(*returned) - bincount = np.bincount(np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) + bincount = np.bincount( + np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) self.assertAllClose(target_dist, bincount, atol=1e-2) -- GitLab From c3ed1c402ff5d21f2e46b931e95f87991f2c3099 Mon Sep 17 00:00:00 2001 From: Terry Koo Date: Tue, 20 Mar 2018 14:52:58 -0700 Subject: [PATCH 266/960] Revert "Adds missing protobuf dep to tf.contrib.data ops. (#17840)" (#17864) * Revert "Adds missing protobuf dep to tf.contrib.data ops. (#17840)" This reverts commit 36ec749ec79c2313924666a1c5324620e493d0c4. * Protect lib_proto_parsing dep with if_static(). --- tensorflow/contrib/data/BUILD | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 5ba2297e7f..d787ed8a1a 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -9,6 +9,10 @@ load( "tf_custom_op_library", "tf_gen_op_libs", ) +load( + "//tensorflow/core:platform/default/build_config_root.bzl", + "if_static", +) py_library( name = "data", @@ -29,10 +33,11 @@ py_library( tf_custom_op_library( name = "_dataset_ops.so", srcs = ["ops/dataset_ops.cc"], - deps = [ - "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/core:lib_proto_parsing", - ], + deps = ["//tensorflow/contrib/data/kernels:dataset_kernels"] + + if_static( + extra_deps = ["//tensorflow/core:lib_proto_parsing"], + otherwise = [], + ), ) tf_gen_op_libs( -- GitLab From b7f42a4ed5e363660562e9f020875d79ce3c1300 Mon Sep 17 00:00:00 2001 From: joel-shor Date: Wed, 21 Mar 2018 00:04:13 +0200 Subject: [PATCH 267/960] Really the last indendation bug fix. --- .../contrib/data/python/kernel_tests/resample_test.py | 3 ++- tensorflow/contrib/data/python/ops/resampling.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 0e3131b725..38efcd3cba 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -103,7 +103,8 @@ class ResampleTest(test.TestCase): classes, _ = zip(*returned) bincount = np.bincount( - np.array(classes), minlength=num_classes).astype(np.float32) / len(classes) + np.array(classes), + minlength=num_classes).astype(np.float32) / len(classes) self.assertAllClose(target_dist, bincount, atol=1e-2) diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index e440d4a35f..0e127f72cd 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -104,10 +104,10 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): def _gather_and_copy(class_val, acceptance_prob, data): return (class_val, array_ops.gather(acceptance_prob, class_val), data) current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) filtered_ds = ( - current_probabilities_and_class_and_data_ds - .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) + current_probabilities_and_class_and_data_ds + .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) return _apply_fn -- GitLab From 8030be47e2eee7a43a55a349ca034e0c80abcc0b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 15:10:07 -0700 Subject: [PATCH 268/960] Tweak statistical testing test to avoid making a zillion TF session.run calls. PiperOrigin-RevId: 189819449 --- tensorflow/contrib/distributions/BUILD | 6 +-- .../kernel_tests/statistical_testing_test.py | 40 ++++++++++--------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index e9c827a618..4ddec73ec8 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -486,11 +486,7 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], - tags = [ - "manual", - "noasan", - "noguitar", - ], + tags = ["noasan"], # Was found to time out in asan ) cuda_py_test( diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index 3548ac1807..fc071c273d 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -31,30 +31,34 @@ class StatisticalTestingTest(test.TestCase): def test_dkwm_design_mean_one_sample_soundness(self): numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] + def check_soundness(ff, fp): + sufficient_n = st.min_num_samples_for_dkwm_mean_test( + numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + return check_ops.assert_less_equal(detectable_d, numbers) with self.test_session() as sess: - for ff in rates: - for fp in rates: - sufficient_n = st.min_num_samples_for_dkwm_mean_test( - numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( - sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + sess.run([check_soundness(ff, fp) + for ff in rates + for fp in rates]) def test_dkwm_design_mean_two_sample_soundness(self): numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] + def check_soundness(ff, fp): + (sufficient_n1, + sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( + numbers, 0., 1., 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample + detectable_d = d_fn( + sufficient_n1, 0., 1., sufficient_n2, 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + return check_ops.assert_less_equal(detectable_d, numbers) with self.test_session() as sess: - for ff in rates: - for fp in rates: - (sufficient_n1, - sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( - numbers, 0., 1., 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample - detectable_d = d_fn( - sufficient_n1, 0., 1., sufficient_n2, 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + sess.run([check_soundness(ff, fp) + for ff in rates + for fp in rates]) def test_true_mean_confidence_interval_by_dkwm_one_sample(self): rng = np.random.RandomState(seed=0) -- GitLab From 44c55c0fbf531043368866683fa01bba42b9a1d0 Mon Sep 17 00:00:00 2001 From: Ankit Gupta Date: Tue, 20 Mar 2018 15:41:45 -0700 Subject: [PATCH 269/960] LoggingTensorHook to read from runconfig in Estimator (#17157) * got loggingtensor to read from runconfig * updated pydoc --- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/estimator/run_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 5245a050a1..6a4132bca2 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -849,7 +849,7 @@ class Estimator(object): 'loss': estimator_spec.loss, 'step': global_step_tensor }, - every_n_iter=100) + every_n_iter=self._config.log_step_count_steps) ]) worker_hooks.extend(estimator_spec.training_hooks) diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 62f035bce5..820fda7765 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -423,7 +423,7 @@ class RunConfig(object): to be saved. The default value of 10,000 hours effectively disables the feature. log_step_count_steps: The frequency, in number of global steps, that the - global step/sec will be logged during training. + global step/sec and the loss will be logged during training. Raises: -- GitLab From 0bd851b38810540034069d92a2f76a026429bced Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 20 Mar 2018 16:11:23 -0700 Subject: [PATCH 270/960] [XLA] Make HLO memory schedulers pluggable. Introduce a typedef MemorySchedulerAlgorithm which is a function instead of an enum to allow experimentation with non-standard schedulers. Refactoring only; no functional changes to the scheduling itself. PiperOrigin-RevId: 189830685 --- .../xla/service/hlo_rematerialization.cc | 2 +- .../xla/service/hlo_rematerialization.h | 6 +- .../xla/service/hlo_rematerialization_test.cc | 16 ++--- .../compiler/xla/service/hlo_scheduling.cc | 61 +++++++++++-------- .../compiler/xla/service/hlo_scheduling.h | 43 +++++++++---- .../xla/service/hlo_scheduling_test.cc | 3 +- 6 files changed, 80 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 98b8d34be1..b063244893 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -1320,7 +1320,7 @@ StatusOr HloRematerialization::Run( /* static */ StatusOr HloRematerialization::RematerializeAndSchedule( const HloRematerialization::ShapeSizeFunction& size_function, int64 memory_limit_bytes, HloModule* hlo_module, - SchedulerAlgorithm scheduler_algorithm, + MemorySchedulerAlgorithm scheduler_algorithm, SequentialHloOrdering::HloModuleSequence* sequence, RematerializationSizes* sizes) { HloRematerialization remat(scheduler_algorithm, size_function); diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h index 5255343903..2ee2dd0571 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.h +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h @@ -66,12 +66,12 @@ class HloRematerialization { // code generation. static StatusOr RematerializeAndSchedule( const ShapeSizeFunction& size_function, int64 memory_limit_bytes, - HloModule* hlo_module, SchedulerAlgorithm scheduler_algorithm, + HloModule* hlo_module, MemorySchedulerAlgorithm scheduler_algorithm, SequentialHloOrdering::HloModuleSequence* sequence, RematerializationSizes* sizes = nullptr); protected: - HloRematerialization(SchedulerAlgorithm scheduler_algorithm, + HloRematerialization(MemorySchedulerAlgorithm scheduler_algorithm, const ShapeSizeFunction& size_function) : scheduler_algorithm_(scheduler_algorithm), size_function_(size_function) {} @@ -108,7 +108,7 @@ class HloRematerialization { const HloInstruction* instruction) const; // Selects an algorithm to use for HLO scheduling. - SchedulerAlgorithm scheduler_algorithm_; + MemorySchedulerAlgorithm scheduler_algorithm_; // Function which computes the size of the top-level buffer of a shape. const ShapeSizeFunction size_function_; diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc index 1b7d26dde5..83de54f3fa 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization_test.cc @@ -162,7 +162,7 @@ TEST_F(HloRematerializationTest, SingleComputation) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/14 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); EXPECT_TRUE(changed); // Root should not have changed. @@ -195,7 +195,7 @@ TEST_F(HloRematerializationTest, SingleComputationNoRematerialization) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/20 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); // No instructions should have been materialized. EXPECT_FALSE(changed); @@ -236,7 +236,7 @@ TEST_F(HloRematerializationTest, RematerializeAroundWhile) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/17 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); EXPECT_TRUE(changed); // Only the entry computation should have a rematerialized instruction added. @@ -272,7 +272,7 @@ TEST_F(HloRematerializationTest, RematerializeEntryAndWhileBody) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/15 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); EXPECT_TRUE(changed); // Both computations should have a rematerialized instruction added. @@ -314,7 +314,7 @@ TEST_F(HloRematerializationTest, RematerializeNestedComputations) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/13 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); EXPECT_TRUE(changed); // All computations should have a rematerialized instruction added. @@ -385,7 +385,7 @@ TEST_F(HloRematerializationTest, RngNotRematerialized) { bool changed, HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/4 * ByteSizeOf(vec1024_shape_), - module.get(), SchedulerAlgorithm::kAuto, &sequence)); + module.get(), DefaultMemoryScheduler, &sequence)); EXPECT_TRUE(changed); // The rng should not have been rematerialized. EXPECT_EQ(count_rngs(entry_computation), 1); @@ -480,7 +480,7 @@ TEST_F(HloRematerializationTest, InstructionRematerializedMultipleTimes) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/22 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); EXPECT_TRUE(changed); // The broadcast should have been rematerialized 3 times. @@ -577,7 +577,7 @@ TEST_P(IndirectUseTest, IndirectUseNotRematerialized) { HloRematerialization::RematerializeAndSchedule( ByteSizeOf, /*memory_limit_bytes=*/22 * 1024, module.get(), - SchedulerAlgorithm::kAuto, &sequence)); + DefaultMemoryScheduler, &sequence)); // Rematerialization should only occur if the rematerializable instruction has // no indirect uses. if (indirectly_used) { diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 099dd8dd8e..1a767628f6 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -340,7 +340,33 @@ int64 SumLogicalBufferSizes( return size; } -StatusOr> RunDFSMemoryScheduler( +StatusOr MinimumMemoryForComputation( + const HloComputation& computation, + const std::vector& sequence, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function) { + TF_ASSIGN_OR_RETURN( + HeapSimulator::Result result, + HeapSimulator::Run(MakeUnique(), computation, + sequence, points_to_analysis, size_function)); + return result.heap_size; +} + +StatusOr> CreateMemoryMinimizingSequence( + const HloComputation& computation, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function, + const MemorySchedulerAlgorithm& algorithm) { + VLOG(2) << "Computation: " << computation.name(); + if (algorithm) { + return algorithm(computation, points_to_analysis, size_function); + } + return DefaultMemoryScheduler(computation, points_to_analysis, size_function); +} + +} // namespace + +StatusOr> DFSMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function) { @@ -397,32 +423,17 @@ StatusOr> RunDFSMemoryScheduler( return sequence; } -StatusOr MinimumMemoryForComputation( +StatusOr> ListMemoryScheduler( const HloComputation& computation, - const std::vector& sequence, const TuplePointsToAnalysis& points_to_analysis, const LogicalBuffer::SizeFunction& size_function) { - TF_ASSIGN_OR_RETURN( - HeapSimulator::Result result, - HeapSimulator::Run(MakeUnique(), computation, - sequence, points_to_analysis, size_function)); - return result.heap_size; + return ListScheduler::Run(computation, points_to_analysis, size_function); } -StatusOr> CreateMemoryMinimizingSequence( +StatusOr> DefaultMemoryScheduler( const HloComputation& computation, const TuplePointsToAnalysis& points_to_analysis, - const LogicalBuffer::SizeFunction& size_function, - SchedulerAlgorithm algorithm) { - VLOG(2) << "Computation: " << computation.name(); - if (algorithm == SchedulerAlgorithm::kListSchedule) { - return ListScheduler::Run(computation, points_to_analysis, size_function); - } - if (algorithm == SchedulerAlgorithm::kDfsSchedule) { - return RunDFSMemoryScheduler(computation, points_to_analysis, - size_function); - } - + const LogicalBuffer::SizeFunction& size_function) { // We try both a list-scheduler based ordering and a DFS based ordering, and // choose whichever returns a lower min-memory, not accounting for // fragmentation. @@ -432,7 +443,7 @@ StatusOr> CreateMemoryMinimizingSequence( // within the caller's context. But it's good enough for now. TF_ASSIGN_OR_RETURN( std::vector list_sequence, - ListScheduler::Run(computation, points_to_analysis, size_function)); + ListMemoryScheduler(computation, points_to_analysis, size_function)); TF_ASSIGN_OR_RETURN( const int64 list_memory, MinimumMemoryForComputation(computation, list_sequence, @@ -441,7 +452,7 @@ StatusOr> CreateMemoryMinimizingSequence( TF_ASSIGN_OR_RETURN( std::vector dfs_sequence, - RunDFSMemoryScheduler(computation, points_to_analysis, size_function)); + DFSMemoryScheduler(computation, points_to_analysis, size_function)); TF_ASSIGN_OR_RETURN( const int64 dfs_memory, MinimumMemoryForComputation(computation, dfs_sequence, points_to_analysis, @@ -459,12 +470,10 @@ StatusOr> CreateMemoryMinimizingSequence( } } -} // namespace - StatusOr CreateMemoryMinimizingSequence(const HloModule& module, const LogicalBuffer::SizeFunction& size_function, - SchedulerAlgorithm algorithm) { + const MemorySchedulerAlgorithm& algorithm) { SequentialHloOrdering::HloModuleSequence sequence; TF_ASSIGN_OR_RETURN(std::unique_ptr points_to_analysis, TuplePointsToAnalysis::Run(&module)); @@ -480,7 +489,7 @@ CreateMemoryMinimizingSequence(const HloModule& module, StatusOr> CreateMemoryMinimizingSequence( const HloComputation& computation, const LogicalBuffer::SizeFunction& size_function, - SchedulerAlgorithm algorithm) { + const MemorySchedulerAlgorithm& algorithm) { CHECK(!computation.IsFusionComputation()); TF_ASSIGN_OR_RETURN(std::unique_ptr points_to_analysis, TuplePointsToAnalysis::Run(computation.parent())); diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.h b/tensorflow/compiler/xla/service/hlo_scheduling.h index 1d1eb1e064..068e68383d 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.h +++ b/tensorflow/compiler/xla/service/hlo_scheduling.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_ordering.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" +#include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -33,28 +34,48 @@ StatusOr MinimumMemoryForSequence( const SequentialHloOrdering::HloModuleSequence& module_sequence, const LogicalBuffer::SizeFunction& size_function); -enum class SchedulerAlgorithm { - kListSchedule, - kDfsSchedule, +// A memory scheduler computes an execution sequence for the HLO instructions in +// 'computation' that minimizes peak memory, given a points-to analysis result +// that describes buffer aliasing, together with a target-specific size function +// that maps a tensor's logical size to its padded size. +typedef std::function>( + const HloComputation&, const TuplePointsToAnalysis&, + const LogicalBuffer::SizeFunction&)> + MemorySchedulerAlgorithm; - // Selects the available scheduler algorithm that had the minimum memory in - // the resulting sequence (a la MinimumMemoryForSequence). - kAuto, -}; +// List scheduler +StatusOr> ListMemoryScheduler( + const HloComputation& computation, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function); + +// DFS-order scheduler +StatusOr> DFSMemoryScheduler( + const HloComputation& computation, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function); + +// The default scheduling algorithm. Runs both the list scheduler +// and the DFS scheduler, and chooses whichever returns a lower min-memory, +// not accounting for fragmentation. +StatusOr> DefaultMemoryScheduler( + const HloComputation& computation, + const TuplePointsToAnalysis& points_to_analysis, + const LogicalBuffer::SizeFunction& size_function); // Returns an HloModuleSequence which seeks to minimize the memory required for // the computation. size_function is the function returning the number of bytes // required for a LogicalBuffer. StatusOr -CreateMemoryMinimizingSequence( - const HloModule& module, const LogicalBuffer::SizeFunction& size_function, - SchedulerAlgorithm algorithm = SchedulerAlgorithm::kAuto); +CreateMemoryMinimizingSequence(const HloModule& module, + const LogicalBuffer::SizeFunction& size_function, + const MemorySchedulerAlgorithm& algorithm = {}); // Overload of above that computes the sequence for a single computation. StatusOr> CreateMemoryMinimizingSequence( const HloComputation& computation, const LogicalBuffer::SizeFunction& size_function, - SchedulerAlgorithm algorithm = SchedulerAlgorithm::kAuto); + const MemorySchedulerAlgorithm& algorithm = {}); } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc index 2dd6e43851..74544c4a67 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc @@ -165,8 +165,7 @@ ENTRY root { }; TF_ASSERT_OK_AND_ASSIGN( SequentialHloOrdering::HloModuleSequence sequence, - CreateMemoryMinimizingSequence(*module, size_fn, - SchedulerAlgorithm::kListSchedule)); + CreateMemoryMinimizingSequence(*module, size_fn, ListMemoryScheduler)); // Verify that all instructions are in the sequence. EXPECT_EQ(module->entry_computation()->instruction_count(), sequence.at(module->entry_computation()).size()); -- GitLab From 49ee96a60bea1b595cff3cb550cfc8d2ade5ed8b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 16:13:58 -0700 Subject: [PATCH 271/960] [XLA] Use IDs instead of names to represent the edges of HLO graph in hlo.proto. PiperOrigin-RevId: 189831057 --- .../xla/client/xla_client/xla_builder.cc | 17 +++--- tensorflow/compiler/xla/service/hlo.proto | 34 +++++++----- .../compiler/xla/service/hlo_computation.cc | 27 +++++----- .../compiler/xla/service/hlo_computation.h | 18 ++++--- .../compiler/xla/service/hlo_instruction.cc | 54 ++++++++++--------- .../compiler/xla/service/hlo_instruction.h | 8 +-- tensorflow/compiler/xla/service/hlo_module.cc | 18 +++++-- 7 files changed, 104 insertions(+), 72 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 6328a4f350..8829fc6cca 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -99,16 +99,17 @@ StatusOr XlaBuilder::Build() { // Not all instructions can be roots. Walk backwards from the last added // instruction until a valid root is found. + entry.set_root_id(-1); for (int64 i = instructions_.size() - 1; i >= 0; i--) { TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(instructions_[i].opcode())); if (CanBeRoot(opcode)) { - entry.set_root_name(instructions_[i].name()); + entry.set_root_id(instructions_[i].id()); *program_shape->mutable_result() = instructions_[i].shape(); break; } } - if (entry.root_name().empty()) { + if (entry.root_id() == -1) { return FailedPrecondition("no root instruction was found"); } @@ -141,7 +142,9 @@ StatusOr XlaBuilder::Build() { XlaComputation computation(id); HloModuleProto* module = computation.mutable_proto(); module->set_name(entry.name()); + module->set_id(entry.id()); module->set_entry_computation_name(entry.name()); + module->set_entry_computation_id(entry.id()); *module->mutable_program_shape() = entry.program_shape(); for (auto& e : embedded_) { module->add_computations()->Swap(&e.second); @@ -162,8 +165,8 @@ XlaOp XlaBuilder::Add(const XlaOp& lhs, const XlaOp& rhs, ShapeInference::InferBinaryOpShape( HloOpcode::kAdd, lhs_instr->shape(), rhs_instr->shape(), broadcast_dimensions)); - instr.add_operand_names(lhs_instr->name()); - instr.add_operand_names(rhs_instr->name()); + instr.add_operand_ids(lhs_instr->id()); + instr.add_operand_ids(rhs_instr->id()); return AddInstruction(std::move(instr)); }; return NoteErrorOrReturn(op()); @@ -195,11 +198,12 @@ XlaOp XlaBuilder::Call(const XlaComputation& computation, // Add input operands. for (const auto& operand : operands) { TF_ASSIGN_OR_RETURN(auto operand_instr, LookUpInstruction(operand)); - instr.add_operand_names(operand_instr->name()); + instr.add_operand_ids(operand_instr->id()); } // Add called computation. - *instr.add_called_computation_names() = computation.proto().name(); + instr.add_called_computation_ids( + computation.proto().entry_computation_id()); for (const HloComputationProto& e : computation.proto().computations()) { embedded_.insert({e.id(), e}); } @@ -229,6 +233,7 @@ XlaOp XlaBuilder::Parameter(int64 parameter_number, const Shape& shape, XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr) { const int64 handle = instructions_.size(); + instr.set_id(handle); if (instr.name().empty()) { instr.set_name(StrCat(instr.opcode(), ".", handle)); } else { diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index b86fbd821b..406feadfd4 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -13,13 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// DO NOT USE THESE PROTO MESSAGES FOR ANYTHING OTHER THAN DEBUGGING. -// -// Don't use these protos in the real compilation or execution codepaths. The -// data format is meant for debugging only, and may change without notice. +// This proto file defines messages which represent the HLO module. This is a +// full fidelity serialization of the c++ HLO constructs. // // Many of the protos below are simple 1-to-1 serializations of the -// corresponding C++ classes. +// corresponding C++ classes, e.g., HloModule, HloComputation, and +// HloInstruction. // // FIELD NAMES ARE IMPORTANT // @@ -40,16 +39,17 @@ message HloInstructionProto { reserved "parameter_name"; reserved 12; reserved "fused_instructions_computation"; + reserved 4; + reserved "operand_names"; + reserved 5; + reserved "control_predecessor_names"; + reserved 6; + reserved "called_computation_names"; string name = 1; string opcode = 2; xla.Shape shape = 3; - // TODO(b/67782397): Replace instruction names with HloInstruction ids. - repeated string operand_names = 4; - repeated string control_predecessor_names = 5; - repeated string called_computation_names = 6; - xla.OpMetadata metadata = 7; // Literal, only present for kConstant. @@ -137,30 +137,38 @@ message HloInstructionProto { // The id of this instruction. int64 id = 35; + + repeated int64 operand_ids = 36; + repeated int64 control_predecessor_ids = 37; + repeated int64 called_computation_ids = 38; } // Serialization of HloComputation. message HloComputationProto { + reserved 3; + reserved "root_name"; + string name = 1; // The array of instructions is always in a valid dependency order, where // operands appear before their users. repeated HloInstructionProto instructions = 2; - // The name of the root of the computation. - string root_name = 3; - // The program shape (with layout) of this computation. xla.ProgramShape program_shape = 4; // The id of this computation. int64 id = 5; + + // The id of the root of the computation. + int64 root_id = 6; } // Serialization of HloModule. message HloModuleProto { string name = 1; string entry_computation_name = 2; + int64 entry_computation_id = 6; // The array of computations is always in a valid dependency order, where // callees appear before their callers. diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 4e852190a8..6f983d0b95 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -65,6 +65,7 @@ HloComputation::HloComputation( std::vector>* instructions, HloInstruction* root_instruction, HloInstruction* fusion_instruction) : name_(name), + unique_id_(-1), root_instruction_(root_instruction), fusion_instruction_(fusion_instruction) { param_instructions_.resize(parameter_count, nullptr); @@ -101,7 +102,7 @@ HloInstruction* HloComputation::AddInstructionInternal( instruction->UniquifyName(&parent()->instruction_name_uniquer()); instruction->SetUniqueId(parent()->NewUniqueInstructionId()); } - Reparent(instruction.get()); + instruction->set_parent(this); HloInstruction* pinst = instruction.get(); instruction_iterators_[pinst] = instructions_.insert(instructions_.end(), std::move(instruction)); @@ -158,10 +159,6 @@ Status HloComputation::RemoveParameter(int64 param_no) { return Status::OK(); } -void HloComputation::Reparent(HloInstruction* instruction) { - instruction->set_parent(this); -} - bool HloComputation::IsRemovable(const HloInstruction* instruction) { // If the instruction has control predecessors or successors then we cannot // remove the instruction without violating ordering constraints (added, for @@ -393,12 +390,16 @@ string HloComputation::ToString(const HloPrintOptions& options) const { HloComputationProto HloComputation::ToProto() const { HloComputationProto proto; + CHECK(unique_id_ != -1) + << "This computation does not have a valid id. Please make sure the " + "computation is inside a module before dumping it."; + proto.set_id(unique_id_); proto.set_name(name_); for (const HloInstruction* instruction : MakeInstructionPostOrder()) { HloInstructionProto instruction_proto = instruction->ToProto(); proto.add_instructions()->Swap(&instruction_proto); } - proto.set_root_name(root_instruction()->name()); + proto.set_root_id(root_instruction()->unique_id()); *proto.mutable_program_shape() = ComputeProgramShape(); return proto; } @@ -406,9 +407,9 @@ HloComputationProto HloComputation::ToProto() const { /* static */ StatusOr> HloComputation::CreateFromProto( HloModule* module, const HloComputationProto& proto, - const tensorflow::gtl::FlatMap& computation_map) { + const tensorflow::gtl::FlatMap& computation_map) { std::vector> instructions; - tensorflow::gtl::FlatMap instruction_map; + tensorflow::gtl::FlatMap instruction_map; int64 parameter_count = 0; for (const HloInstructionProto& instruction_proto : proto.instructions()) { TF_ASSIGN_OR_RETURN( @@ -418,14 +419,14 @@ HloComputation::CreateFromProto( if (instruction->opcode() == HloOpcode::kParameter) { parameter_count++; } - TF_RET_CHECK(!ContainsKey(instruction_map, instruction->name())); - instruction_map[instruction->name()] = instruction.get(); + TF_RET_CHECK(!ContainsKey(instruction_map, instruction_proto.id())); + instruction_map[instruction_proto.id()] = instruction.get(); instructions.push_back(std::move(instruction)); } - TF_RET_CHECK(!proto.root_name().empty()); - TF_RET_CHECK(ContainsKey(instruction_map, proto.root_name())); - HloInstruction* root = instruction_map.at(proto.root_name()); + TF_RET_CHECK(proto.root_id() != -1); + TF_RET_CHECK(ContainsKey(instruction_map, proto.root_id())); + HloInstruction* root = instruction_map.at(proto.root_id()); return WrapUnique(new HloComputation(proto.name(), parameter_count, &instructions, root, /*fusion_instruction=*/nullptr)); diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 630d3675de..9d3f6e9a2c 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -160,12 +160,12 @@ class HloComputation { // module: the module which will contain the computation. The newly created // computation is *not* added to the module, however. // proto: the proto to convert from. - // computation_map: a map from computation name to HloComputation*. This map + // computation_map: a map from computation id to HloComputation*. This map // must contain all computations which the newly constructed computation // calls. static StatusOr> CreateFromProto( HloModule* module, const HloComputationProto& proto, - const tensorflow::gtl::FlatMap& computation_map); + const tensorflow::gtl::FlatMap& computation_map); // Gets the instructions in this computation. // @@ -334,6 +334,15 @@ class HloComputation { fusion_instruction_ = fusion_instruction; } + // The id of this computation should be unique within the module. + void SetUniqueId(int64 id) { + CHECK_EQ(unique_id_, -1); + CHECK_GE(id, 0); + unique_id_ = id; + } + + int64 unique_id() const { return unique_id_; } + private: explicit HloComputation( const string& name, int parameter_count, @@ -344,10 +353,6 @@ class HloComputation { HloInstruction* AddInstructionInternal( std::unique_ptr instruction); - // Helper for setting the parent of instructions that are added to this - // computation. - void Reparent(HloInstruction* instruction); - // Fuses HLOs in instructions_to_fuse into fusion_instruction. // // Pre-condition: fusion_instruction's opcode is kFusion. @@ -365,6 +370,7 @@ class HloComputation { std::vector CollectUnreachableRoots() const; string name_; + int64 unique_id_; HloInstruction* root_instruction_; // If this computation is a fusion computation, this field points to the diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 83fcc5da6d..a2a2c1e615 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -52,22 +52,22 @@ using ::tensorflow::strings::StrCat; /* static */ StatusOr> HloInstruction::CreateFromProto( HloModule* module, const HloInstructionProto& proto, - const tensorflow::gtl::FlatMap& instruction_map, - const tensorflow::gtl::FlatMap& computation_map) { + const tensorflow::gtl::FlatMap& instruction_map, + const tensorflow::gtl::FlatMap& computation_map) { TF_RET_CHECK(!proto.opcode().empty()); TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode())); TF_RET_CHECK(proto.has_shape()); auto instruction = WrapUnique(new HloInstruction(opcode, proto.shape())); - for (const string& operand_name : proto.operand_names()) { - TF_RET_CHECK(ContainsKey(instruction_map, operand_name)) - << "No instruction named " << operand_name; - instruction->AppendOperand(instruction_map.at(operand_name)); - } - for (const string& predecessor_name : proto.control_predecessor_names()) { - TF_RET_CHECK(ContainsKey(instruction_map, predecessor_name)) - << "No instruction named " << predecessor_name; - TF_RETURN_IF_ERROR(instruction_map.at(predecessor_name) + for (const int64 operand_id : proto.operand_ids()) { + TF_RET_CHECK(ContainsKey(instruction_map, operand_id)) + << "No instruction with id " << operand_id; + instruction->AppendOperand(instruction_map.at(operand_id)); + } + for (const int64 predecessor_id : proto.control_predecessor_ids()) { + TF_RET_CHECK(ContainsKey(instruction_map, predecessor_id)) + << "No instruction with id " << predecessor_id; + TF_RETURN_IF_ERROR(instruction_map.at(predecessor_id) ->AddControlDependencyTo(instruction.get())); } @@ -80,21 +80,21 @@ StatusOr> HloInstruction::CreateFromProto( StringToFusionKind(proto.fusion_kind())); // Find the fused computation and set its fusion instruction. - TF_RET_CHECK(proto.called_computation_names_size() == 1) + TF_RET_CHECK(proto.called_computation_ids_size() == 1) << "Expect 1 called computation for fusion instruction, but sees " - << proto.called_computation_names_size(); - const string& fusion_name = proto.called_computation_names(0); - auto* fused_computation = FindPtrOrNull(computation_map, fusion_name); + << proto.called_computation_ids_size(); + const int64 fusion_id = proto.called_computation_ids(0); + auto* fused_computation = FindPtrOrNull(computation_map, fusion_id); TF_RET_CHECK(fused_computation != nullptr) - << "No fusion computation named " << fusion_name; + << "No fusion computation with id " << fusion_id; fused_computation->SetFusionInstruction(instruction.get()); instruction->called_computations_.push_back(fused_computation); } else { - for (const string& computation_name : proto.called_computation_names()) { - TF_RET_CHECK(ContainsKey(computation_map, computation_name)) - << "No computation named " << computation_name; + for (const int64 computation_id : proto.called_computation_ids()) { + TF_RET_CHECK(ContainsKey(computation_map, computation_id)) + << "No computation with id " << computation_id; instruction->called_computations_.push_back( - computation_map.at(computation_name)); + computation_map.at(computation_id)); } } @@ -2315,14 +2315,18 @@ string HloInstruction::ToShortString() const { HloInstructionProto HloInstruction::ToProto() const { HloInstructionProto proto; + CHECK(unique_id_ != -1) + << "This instruction does not have a valid id. Please make sure the " + "instruction is inside a module before dumping it."; + proto.set_id(unique_id_); proto.set_name(name_); proto.set_opcode(HloOpcodeString(opcode_)); *proto.mutable_shape() = shape_; for (const HloInstruction* operand : operands_) { - *proto.add_operand_names() = operand->name(); + proto.add_operand_ids(operand->unique_id()); } for (const HloInstruction* control : control_predecessors_) { - *proto.add_control_predecessor_names() = control->name(); + proto.add_control_predecessor_ids(control->unique_id()); } *proto.mutable_metadata() = metadata_; @@ -2332,11 +2336,11 @@ HloInstructionProto HloInstruction::ToProto() const { proto.set_parameter_number(parameter_number_); if (opcode() == HloOpcode::kFusion) { proto.set_fusion_kind(xla::ToString(fusion_kind())); - *proto.add_called_computation_names() = - fused_instructions_computation()->name(); + proto.add_called_computation_ids( + fused_instructions_computation()->unique_id()); } else { for (const HloComputation* computation : called_computations_) { - *proto.add_called_computation_names() = computation->name(); + proto.add_called_computation_ids(computation->unique_id()); } } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index a111e1e4a6..a94ba145df 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -179,15 +179,15 @@ class HloInstruction { // module: the module which will contain the instruction. The newly created // instruction is *not* added to the module or any computation, however. // proto: the proto to convert from. - // instruction_map: a map from instruction name to HloInstruction*. This map + // instruction_map: a map from instruction id to HloInstruction*. This map // must contain all operands of the newly constructed instruction. - // computation_map: a map from computation name to HloComputation*. This map + // computation_map: a map from computation id to HloComputation*. This map // must contain all computations which the newly constructed instruction // calls. static StatusOr> CreateFromProto( HloModule* module, const HloInstructionProto& proto, - const tensorflow::gtl::FlatMap& instruction_map, - const tensorflow::gtl::FlatMap& computation_map); + const tensorflow::gtl::FlatMap& instruction_map, + const tensorflow::gtl::FlatMap& computation_map); // Creates a parameter-retrieving instruction. static std::unique_ptr CreateParameter(int64 parameter_number, diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 4091ebbfd3..2037764dae 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -83,6 +83,11 @@ HloComputation* HloModule::AddComputationInternal( for (auto* instruction : computation->instructions()) { instruction->SetUniqueId(NewUniqueInstructionId()); } + // Set unique id to this computation. + CHECK_NE(computation->root_instruction()->unique_id(), -1) + << "Root has no valid id: " << computation->ToString(); + computation->SetUniqueId(computation->root_instruction()->unique_id()); + computation->set_parent(this); computations_.push_back(std::move(computation)); return computations_.back().get(); @@ -204,8 +209,10 @@ string HloModule::ToString(const HloPrintOptions& options) const { HloModuleProto HloModule::ToProto() const { HloModuleProto proto; + proto.set_id(unique_id_); proto.set_name(name_); proto.set_entry_computation_name(entry_computation_->name()); + proto.set_entry_computation_id(entry_computation_->unique_id()); for (const HloComputation* computation : MakeComputationPostOrder()) { HloComputationProto computation_proto = computation->ToProto(); if (computation->name() == entry_computation_->name()) { @@ -249,19 +256,20 @@ StatusOr> HloModule::CreateFromProto( auto module = MakeUnique(proto.name(), entry_computation_handle, module_config); - tensorflow::gtl::FlatMap computation_map; + tensorflow::gtl::FlatMap computation_map; for (const HloComputationProto& computation_proto : proto.computations()) { TF_ASSIGN_OR_RETURN(std::unique_ptr computation, HloComputation::CreateFromProto( module.get(), computation_proto, computation_map)); CHECK_NE(computation.get(), nullptr); - TF_RET_CHECK(!ContainsKey(computation_map, computation->name())); - string computation_name = computation->name(); + int64 computation_id = computation_proto.id(); + TF_RET_CHECK(computation_id != -1); + TF_RET_CHECK(!ContainsKey(computation_map, computation_id)); // Don't uniquify names because we want names to be stable across // serialization and deserialization. - computation_map[computation_name] = module->AddComputationInternal( + computation_map[computation_id] = module->AddComputationInternal( std::move(computation), - /*is_entry=*/proto.entry_computation_name() == computation_name, + /*is_entry=*/proto.entry_computation_id() == computation_id, /*uniquify_names=*/false); } TF_RET_CHECK(module->entry_computation_ != nullptr); -- GitLab From 9d1c63a6516290a79b70c54aea1b8fd917be17f3 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 20 Mar 2018 16:32:22 -0700 Subject: [PATCH 272/960] Use softmax_crossentropy_with_logits_v2 in tf.keras since softmax_crossentropy_with_logits is deprecated. PiperOrigin-RevId: 189833677 --- tensorflow/python/keras/_impl/keras/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 04866fbe0f..7baf27642a 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -3373,7 +3373,7 @@ def categorical_crossentropy(target, output, from_logits=False): target * math_ops.log(output), axis=len(output.get_shape()) - 1) else: - return nn.softmax_cross_entropy_with_logits(labels=target, logits=output) + return nn.softmax_cross_entropy_with_logits_v2(labels=target, logits=output) @tf_export('keras.backend.sparse_categorical_crossentropy') -- GitLab From 4321469f1db7a6ff220c2415c63f433df6e7161d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 17:00:33 -0700 Subject: [PATCH 273/960] Fixing bug in MultitaskOptimizerWrapper where types of tensors were mismatching. PiperOrigin-RevId: 189837743 --- .../opt/python/training/multitask_optimizer_wrapper.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py index cb6c77a86f..9076cc9d12 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -22,6 +22,7 @@ import types import six from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -40,8 +41,10 @@ def _get_wrapper(fn, opt): def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond(all_zeros, control_flow_ops.no_op, - lambda: fn(grad, *args, **kwargs)) + def call_fn(): + with ops.control_dependencies([fn(grad, *args, **kwargs)]): + return control_flow_ops.no_op() + return control_flow_ops.cond(all_zeros, control_flow_ops.no_op, call_fn) wrapper = types.MethodType(wrapper, opt) return wrapper -- GitLab From 8dbcacd6a0ab68f2ebe90bda93bb915699313946 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 18:36:33 -0700 Subject: [PATCH 274/960] [XLA] Simpify XlaBuilder: extract common add instruction logic. PiperOrigin-RevId: 189848174 --- .../xla/client/xla_client/xla_builder.cc | 61 ++++++++----------- .../xla/client/xla_client/xla_builder.h | 6 +- 2 files changed, 30 insertions(+), 37 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 8829fc6cca..82b61d4d51 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -51,21 +51,16 @@ bool CanBeRoot(HloOpcode opcode) { } } -void SetOpcode(HloInstructionProto* instr, HloOpcode opcode) { - instr->set_opcode(HloOpcodeString(opcode)); -} - } // namespace -StatusOr> XlaBuilder::GetShape(const XlaOp& op) const { +StatusOr XlaBuilder::GetShape(const XlaOp& op) const { TF_ASSIGN_OR_RETURN(auto instr, LookUpInstruction(op)); - return MakeUnique(instr->shape()); + return instr->shape(); } StatusOr XlaOp::GetShape() const { TF_RET_CHECK(builder_ != nullptr); - TF_ASSIGN_OR_RETURN(auto shape, builder_->GetShape(*this)); - return *shape; + return builder_->GetShape(*this); } XlaBuilder::XlaBuilder(const string& computation_name) @@ -158,49 +153,41 @@ XlaOp XlaBuilder::Add(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { auto op = [&]() -> StatusOr { HloInstructionProto instr; - SetOpcode(&instr, HloOpcode::kAdd); - TF_ASSIGN_OR_RETURN(const auto* lhs_instr, LookUpInstruction(lhs)); - TF_ASSIGN_OR_RETURN(const auto* rhs_instr, LookUpInstruction(rhs)); - TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), - ShapeInference::InferBinaryOpShape( - HloOpcode::kAdd, lhs_instr->shape(), - rhs_instr->shape(), broadcast_dimensions)); - instr.add_operand_ids(lhs_instr->id()); - instr.add_operand_ids(rhs_instr->id()); - return AddInstruction(std::move(instr)); + TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, lhs.GetShape()); + TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, rhs.GetShape()); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferBinaryOpShape(HloOpcode::kAdd, lhs_shape, + rhs_shape, broadcast_dimensions)); + return AddInstruction(std::move(instr), HloOpcode::kAdd, {lhs, rhs}); }; return NoteErrorOrReturn(op()); } XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) { HloInstructionProto instr; - SetOpcode(&instr, HloOpcode::kConstant); *instr.mutable_shape() = literal.shape(); *instr.mutable_literal() = literal.ToProto(); - return AddInstruction(std::move(instr)); + return AddInstruction(std::move(instr), HloOpcode::kConstant); } XlaOp XlaBuilder::Call(const XlaComputation& computation, tensorflow::gtl::ArraySlice operands) { auto op = [&]() -> StatusOr { HloInstructionProto instr; - SetOpcode(&instr, HloOpcode::kCall); - std::vector operand_shapes; + std::vector operand_shape_ptrs; + std::vector operand_shapes; for (const auto& operand : operands) { - TF_ASSIGN_OR_RETURN(const auto* input, LookUpInstruction(operand)); - operand_shapes.push_back(&input->shape()); + TF_ASSIGN_OR_RETURN(const Shape& shape, operand.GetShape()); + operand_shapes.push_back(shape); } + c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), + [](const Shape& shape) { return &shape; }); TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), ShapeInference::InferCallShape( - operand_shapes, + operand_shape_ptrs, /*to_apply=*/computation.GetProgramShape())); - // Add input operands. - for (const auto& operand : operands) { - TF_ASSIGN_OR_RETURN(auto operand_instr, LookUpInstruction(operand)); - instr.add_operand_ids(operand_instr->id()); - } - // Add called computation. instr.add_called_computation_ids( computation.proto().entry_computation_id()); @@ -208,7 +195,7 @@ XlaOp XlaBuilder::Call(const XlaComputation& computation, embedded_.insert({e.id(), e}); } - return AddInstruction(std::move(instr)); + return AddInstruction(std::move(instr), HloOpcode::kCall, operands); }; return NoteErrorOrReturn(op()); } @@ -217,7 +204,6 @@ XlaOp XlaBuilder::Parameter(int64 parameter_number, const Shape& shape, const string& name) { auto op = [&]() -> StatusOr { HloInstructionProto instr; - SetOpcode(&instr, HloOpcode::kParameter); if (parameter_numbers_.find(parameter_number) != parameter_numbers_.end()) { return InvalidArgument("parameter %lld already registered", parameter_number); @@ -226,20 +212,25 @@ XlaOp XlaBuilder::Parameter(int64 parameter_number, const Shape& shape, instr.set_parameter_number(parameter_number); instr.set_name(name); *instr.mutable_shape() = shape; - return AddInstruction(std::move(instr)); + return AddInstruction(std::move(instr), HloOpcode::kParameter); }; return NoteErrorOrReturn(op()); } -XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr) { +XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, + tensorflow::gtl::ArraySlice operands) { const int64 handle = instructions_.size(); instr.set_id(handle); + instr.set_opcode(HloOpcodeString(opcode)); if (instr.name().empty()) { instr.set_name(StrCat(instr.opcode(), ".", handle)); } else { // Append the handle to make sure the name is unique. instr.set_name(StrCat(instr.name(), ".", handle)); } + for (const auto& operand : operands) { + instr.add_operand_ids(operand.handle()); + } instructions_.push_back(instr); XlaOp op(handle, this); diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index 7632bd289d..f1d10ecdb9 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" @@ -157,14 +158,15 @@ class XlaBuilder { XlaOp ConstantR0(NativeT value); // Returns the shape of the given op. - StatusOr> GetShape(const XlaOp& op) const; + StatusOr GetShape(const XlaOp& op) const; // Builds the computation with the requested operations, or returns a non-ok // status. StatusOr Build(); private: - XlaOp AddInstruction(HloInstructionProto&& instr); + XlaOp AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, + tensorflow::gtl::ArraySlice operands = {}); // Notes that the error occurred by: // * storing it internally and capturing a backtrace if it's the first error -- GitLab From 8e4ee96bcce043bdb221e8fe116581ae90bae00d Mon Sep 17 00:00:00 2001 From: "Ziyue(Louis) Lu" Date: Tue, 20 Mar 2018 18:43:20 -0700 Subject: [PATCH 275/960] Updated README.md Edited the word 'lets you' to 'enables you to', which I think is better for this sentence. The word 'enable' here gives the readers a feeling that the architecture makes some awesome things possible, while 'let' is more like to give some permissions to the users. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3cdb6e478d..0a309ebe2d 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while the graph edges represent the multidimensional data arrays (tensors) that flow -between them. This flexible architecture lets you deploy computation to one +between them. This flexible architecture enables you to deploy computation to one or more CPUs or GPUs in a desktop, server, or mobile device without rewriting code. TensorFlow also includes TensorBoard, a data visualization toolkit. -- GitLab From 03b742c7edd04d18ef0b1bdd5539fd543fe34c4a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 18:45:17 -0700 Subject: [PATCH 276/960] Minor documentation fix PiperOrigin-RevId: 189848838 --- tensorflow/python/training/saver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 5f68eec6ce..5ef8bd9e9c 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1969,7 +1969,7 @@ def export_meta_graph(filename=None, saver_def: `SaverDef` protocol buffer. collection_list: List of string keys to collect. as_text: If `True`, writes the `MetaGraphDef` as an ASCII proto. - graph: The `Graph` to import into. If `None`, use the default graph. + graph: The `Graph` to export. If `None`, use the default graph. export_scope: Optional `string`. Name scope under which to extract the subgraph. The scope name will be striped from the node definitions for easy import later into new name scopes. If `None`, the whole graph -- GitLab From 239769f55cfd703270e14257cedb3abdf51a6423 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Tue, 20 Mar 2018 19:14:26 -0700 Subject: [PATCH 277/960] [XLA] Plumb hlo dump options via local client. PiperOrigin-RevId: 189851211 --- .../xla/client/executable_build_options.cc | 22 ++++++++ .../xla/client/executable_build_options.h | 15 ++++++ .../xla/python/local_computation_builder.i | 52 +++++++++++++++---- .../compiler/xla/python/numpy_bridge.cc | 3 +- tensorflow/compiler/xla/python/numpy_bridge.h | 3 ++ tensorflow/compiler/xla/python/xla_client.py | 2 + .../compiler/xla/service/local_service.cc | 10 ++++ 7 files changed, 94 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc index d84f2018e1..4ff4da6215 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.cc +++ b/tensorflow/compiler/xla/client/executable_build_options.cc @@ -76,6 +76,28 @@ ExecutableBuildOptions::generate_hlo_graph() const { return generate_hlo_graph_; } +ExecutableBuildOptions& ExecutableBuildOptions::set_dump_optimized_hlo_proto_to( + tensorflow::StringPiece dirpath) { + dump_optimized_hlo_proto_to_ = dirpath.ToString(); + return *this; +} + +const tensorflow::gtl::optional& +ExecutableBuildOptions::dump_optimized_hlo_proto_to() const { + return dump_optimized_hlo_proto_to_; +} + +ExecutableBuildOptions& ExecutableBuildOptions::set_dump_per_pass_hlo_proto_to( + tensorflow::StringPiece dirpath) { + dump_per_pass_hlo_proto_to_ = dirpath.ToString(); + return *this; +} + +const tensorflow::gtl::optional& +ExecutableBuildOptions::dump_per_pass_hlo_proto_to() const { + return dump_per_pass_hlo_proto_to_; +} + ExecutableBuildOptions& ExecutableBuildOptions::set_hlo_profile(bool enabled) { hlo_profile_ = enabled; return *this; diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h index 3e18e5de64..85b2cd96cb 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.h +++ b/tensorflow/compiler/xla/client/executable_build_options.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/device_memory_allocator.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/optional.h" namespace xla { @@ -57,6 +58,18 @@ class ExecutableBuildOptions { ExecutableBuildOptions& set_generate_hlo_graph(string regex); const tensorflow::gtl::optional& generate_hlo_graph() const; + // If set, specifies a dirpath to dump the end-of-optimization-pipeline HLO + // protobuf to (as in DebugOptions). + ExecutableBuildOptions& set_dump_optimized_hlo_proto_to( + tensorflow::StringPiece dirpath); + const tensorflow::gtl::optional& dump_optimized_hlo_proto_to() const; + + // If set, specifies a dirpath to dump the per-pass-in-pipeline HLO protobufs + // to (as in DebugOptions). + ExecutableBuildOptions& set_dump_per_pass_hlo_proto_to( + tensorflow::StringPiece dirpath); + const tensorflow::gtl::optional& dump_per_pass_hlo_proto_to() const; + // If set, specifies that we should record an HLO profile during execution and // log it after execution (as in DebugOptions). ExecutableBuildOptions& set_hlo_profile(bool enabled); @@ -72,6 +85,8 @@ class ExecutableBuildOptions { Shape result_layout_; bool result_layout_set_ = false; tensorflow::gtl::optional generate_hlo_graph_; + tensorflow::gtl::optional dump_optimized_hlo_proto_to_; + tensorflow::gtl::optional dump_per_pass_hlo_proto_to_; DeviceMemoryAllocator* device_allocator_ = nullptr; }; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index ca91cf0d50..8f231d1a12 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -141,6 +141,33 @@ bool GetIntAttr(PyObject* o, const char* field, int64* result) { return true; } +// Returns "ok"; true if there is no error, false if there was an error. +bool HandleStringAttribute(PyObject* o, + const char* attr_name, + std::function f) { + if (!PyObject_HasAttrString(o, attr_name)) { + return true; // It's ok for the object to not have the attribute. + } + PyObject* attr = PyObject_GetAttrString(o, attr_name); + if (attr == nullptr) { + return false; // An error occurred getting the attribute. + } + if (attr == Py_None) { + Py_DECREF(attr); + return true; // The attribute is None, which we consider ok. + } + if (!PyString_Check(attr)) { + string message = tensorflow::strings::Printf("%s must be a string or none; got %s", + attr_name, numpy::PyObjectCppRepr(attr).c_str()); + PyErr_SetString(PyExc_TypeError, message.c_str()); + Py_DECREF(attr); + return false; // Type error, not ok. + } + f(PyString_AsString(attr)); + Py_DECREF(attr); + return true; // Handled string attribute, ok! +} + } } %} @@ -820,20 +847,23 @@ tensorflow::ImportNumpy(); if ($input == Py_None) { $1 = NULL; } else { - PyObject* o = PyObject_GetAttrString($input, "generate_hlo_graph"); - if (!o) { - return NULL; + if (!HandleStringAttribute($input, "generate_hlo_graph", [&](string s) { + build_options.set_generate_hlo_graph(std::move(s)); + })) { + return nullptr; } - if (o != Py_None) { - if (!PyString_Check(o)) { - PyErr_SetString(PyExc_TypeError, "ExecutableBuildOptions.generate_hlo_graph must be a string or None."); - return NULL; - } - build_options.set_generate_hlo_graph(PyString_AsString(o)); + if (!HandleStringAttribute($input, "dump_optimized_hlo_proto_to", [&](string s) { + build_options.set_dump_optimized_hlo_proto_to(std::move(s)); + })) { + return nullptr; + } + if (!HandleStringAttribute($input, "dump_per_pass_hlo_proto_to", [&](string s) { + build_options.set_dump_per_pass_hlo_proto_to(std::move(s)); + })) { + return nullptr; } - Py_DECREF(o); - o = PyObject_GetAttrString($input, "hlo_profile"); + PyObject* o = PyObject_GetAttrString($input, "hlo_profile"); if (o == NULL) { return NULL; } diff --git a/tensorflow/compiler/xla/python/numpy_bridge.cc b/tensorflow/compiler/xla/python/numpy_bridge.cc index 3d87480728..eec48479c9 100644 --- a/tensorflow/compiler/xla/python/numpy_bridge.cc +++ b/tensorflow/compiler/xla/python/numpy_bridge.cc @@ -170,8 +170,7 @@ static string PyObjectCppStr(PyObject* o) { return ExtractStringAndDecref(s); } -// Safely returns a repr of the given Python object o as a C++ string. -static string PyObjectCppRepr(PyObject* o) { +string PyObjectCppRepr(PyObject* o) { PyObject* r = PyObject_Repr(o); return ExtractStringAndDecref(r); } diff --git a/tensorflow/compiler/xla/python/numpy_bridge.h b/tensorflow/compiler/xla/python/numpy_bridge.h index adfcc3b858..9656cb1c31 100644 --- a/tensorflow/compiler/xla/python/numpy_bridge.h +++ b/tensorflow/compiler/xla/python/numpy_bridge.h @@ -107,6 +107,9 @@ void CopyLiteralToNumpyArray(const Literal& literal, PyArrayObject* py_array) { std::copy(source.begin(), source.end(), dest); } +// Safely returns a repr of the given Python object o as a C++ string. +string PyObjectCppRepr(PyObject* o); + // Workarounds for Python 2 and 3 interop PyObject* LongToPyIntOrPyLong(long x); // NOLINT diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index d747a0b65c..e548d420f4 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -320,6 +320,8 @@ class CompileOptions(object): def __init__(self): self.generate_hlo_graph = None + self.dump_optimized_hlo_proto_to = None + self.dump_per_pass_hlo_proto_to = None self.hlo_profile = False diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 74aa6eaa17..7fd1ccd1a8 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -125,6 +125,16 @@ StatusOr> LocalService::CompileExecutable( execution_options.mutable_debug_options()->set_xla_generate_hlo_graph( build_options.generate_hlo_graph().value()); } + if (build_options.dump_optimized_hlo_proto_to().has_value()) { + execution_options.mutable_debug_options() + ->set_xla_dump_optimized_hlo_proto_to( + build_options.dump_optimized_hlo_proto_to().value()); + } + if (build_options.dump_per_pass_hlo_proto_to().has_value()) { + execution_options.mutable_debug_options() + ->set_xla_dump_per_pass_hlo_proto_to( + build_options.dump_per_pass_hlo_proto_to().value()); + } if (build_options.result_layout() != nullptr) { *execution_options.mutable_shape_with_output_layout() = *build_options.result_layout(); -- GitLab From 5e8f1530b4f355d66f02929e948ed02c096bcaaa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 20 Mar 2018 19:48:18 -0700 Subject: [PATCH 278/960] Add reduce window tests for the cases when the input shape has 1 element. PiperOrigin-RevId: 189853631 --- .../compiler/xla/tests/reduce_window_test.cc | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 8b736f62f0..f66fb5cacc 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -1351,5 +1351,41 @@ ENTRY R2Window { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001})); } +TEST_F(ReduceWindowTextTest, R2EffectiveScalar) { + const string& hlo_string = R"( +HloModule R2Window +mul { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT mul = f32[] multiply(lhs, rhs) +} +ENTRY R2Window { + operand = f32[1,1]{1,0} parameter(0) + negate = f32[1,1]{1,0} negate(operand) + constant = f32[] constant(1) + ROOT reduce-window = f32[1,1]{1,0} reduce-window(negate, constant), window={size=1x1 pad=0_0x0_0}, to_apply=mul +} +)"; + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001})); +} + +TEST_F(ReduceWindowTextTest, R3EffectiveScalar) { + const string& hlo_string = R"( +HloModule R3Window +mul { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT mul = f32[] multiply(lhs, rhs) +} +ENTRY R3Window { + operand = f32[1,1,1]{2,1,0} parameter(0) + negate = f32[1,1,1]{2,1,0} negate(operand) + constant = f32[] constant(1) + ROOT reduce-window = f32[1,1,1]{2,1,0} reduce-window(negate, constant), window={size=1x1x1 pad=0_0x0_0x0_0}, to_apply=mul +} +)"; + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0.001})); +} + } // namespace } // namespace xla -- GitLab From aab6d07a0df3326ce416c331ff951a77fe802a8e Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Tue, 20 Mar 2018 20:25:03 -0700 Subject: [PATCH 279/960] Remove recently introduced LOG INFO statements from AvgPoolingOp PiperOrigin-RevId: 189856039 --- tensorflow/core/kernels/avgpooling_op.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index a763f1321f..c581d1451f 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -156,13 +156,11 @@ class AvgPoolingOp : public UnaryOp { TensorShape output_shape = params.forward_output_shape(); if (data_format_ == FORMAT_NCHW) { - LOG(INFO) << "DnnPoolingOp"; DnnPoolingOp::Compute( context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_, stride_, padding_, data_format_, tensor_in, output_shape, /*propagate_nans=*/false); } else { - LOG(INFO) << "SpatialAvgPooling"; Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); -- GitLab From 9ef37abc6f3da1d3d1699293f1afb52494125161 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 20 Mar 2018 20:28:36 -0700 Subject: [PATCH 280/960] Revert "Fix dataset resampling bug introduced by a bug in datasets itself. fixes #16606 " (#17874) --- .../data/python/kernel_tests/resample_test.py | 39 ------------------- .../contrib/data/python/ops/resampling.py | 11 +++--- 2 files changed, 5 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 38efcd3cba..913ab9b9f8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -21,11 +21,8 @@ import numpy as np from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.ops import random_ops from tensorflow.python.ops import string_ops -from tensorflow.python.ops import math_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -71,42 +68,6 @@ class ResampleTest(test.TestCase): returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) - def testRandomClasses(self): - init_dist = [0.25, 0.25, 0.25, 0.25] - target_dist = [0.0, 0.0, 0.0, 1.0] - num_classes = len(init_dist) - num_samples = 100 # We don't need many samples to test a dirac-delta target distribution - data_np = np.random.choice(num_classes, num_samples, p=init_dist) - - dataset = dataset_ops.Dataset.from_tensor_slices(data_np) - - # Apply a random mapping that preserves the data distribution. - def _remap_fn(_): - return math_ops.cast(random_ops.random_uniform([1]) * num_classes, - dtypes.int32)[0] - dataset = dataset.map(_remap_fn) - - # Reshape distribution. - dataset = dataset.apply( - resampling.rejection_resample( - class_func=lambda x: x, - target_dist=target_dist, - initial_dist=init_dist)) - - get_next = dataset.make_one_shot_iterator().get_next() - - with self.test_session() as sess: - returned = [] - with self.assertRaises(errors.OutOfRangeError): - while True: - returned.append(sess.run(get_next)) - - classes, _ = zip(*returned) - bincount = np.bincount( - np.array(classes), - minlength=num_classes).astype(np.float32) / len(classes) - - self.assertAllClose(target_dist, bincount, atol=1e-2) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 0e127f72cd..f4015f19fb 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -101,13 +101,12 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): initial_dist_ds)) .map(maybe_warn_on_large_rejection)) - def _gather_and_copy(class_val, acceptance_prob, data): - return (class_val, array_ops.gather(acceptance_prob, class_val), data) - current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( - (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) + current_probabilities_ds = dataset_ops.Dataset.zip( + (acceptance_dist_ds, class_values_ds)).map(array_ops.gather) filtered_ds = ( - current_probabilities_and_class_and_data_ds - .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) + dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds, + dataset)) + .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) return _apply_fn -- GitLab From e79eb0b8de130bf905a101608681e9c18561356c Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 20 Mar 2018 20:28:38 -0700 Subject: [PATCH 281/960] Fix windows GPU build scripts. (#17870) PiperOrigin-RevId: 188629017 --- tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index b87e4a9bec..4656afe025 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -37,7 +37,7 @@ SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX -G"Visual Studio 14" :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From 4e108ef30d7cd7ae5e1c550ec5ae27e79b8c6e39 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 20 Mar 2018 20:28:58 -0700 Subject: [PATCH 282/960] Revert "Windows: Enable tensorflow/contrib in Bazel build (#16659)" (#17774) * Revert "Windows: Enable tensorflow/contrib in Bazel build (#16659)" This reverts commit c6a12c77a50778e28de3590f4618bc2b62f3ecab. * Add kafka back to contrib_py rule. * Update __init__.py --- configure.py | 2 +- tensorflow/contrib/BUILD | 8 +- tensorflow/contrib/__init__.py | 6 +- .../boosted_trees/lib/utils/batch_features.h | 6 +- tensorflow/contrib/distributions/BUILD | 2 - tensorflow/contrib/eager/python/BUILD | 5 +- .../python/examples/linear_regression/BUILD | 1 - tensorflow/contrib/gan/BUILD | 1 - .../contrib/kfac/python/kernel_tests/BUILD | 1 - tensorflow/contrib/labeled_tensor/BUILD | 1 - tensorflow/contrib/layers/BUILD | 2 - tensorflow/contrib/learn/BUILD | 5 - tensorflow/contrib/lookup/BUILD | 1 - tensorflow/contrib/py2tf/converters/BUILD | 2 - tensorflow/contrib/py2tf/utils/BUILD | 1 - .../contrib/remote_fused_graph/pylib/BUILD | 1 + tensorflow/contrib/saved_model/BUILD | 1 - tensorflow/contrib/session_bundle/BUILD | 1 - .../contrib/slim/python/slim/data/BUILD | 1 - tensorflow/contrib/tensor_forest/BUILD | 1 + tensorflow/contrib/tensorboard/BUILD | 1 - tensorflow/contrib/timeseries/examples/BUILD | 5 +- .../timeseries/python/timeseries/BUILD | 5 +- .../timeseries/state_space_models/BUILD | 1 - tensorflow/contrib/tpu/BUILD | 1 - tensorflow/contrib/util/loader.py | 7 +- tensorflow/core/framework/dataset.h | 4 +- tensorflow/core/lib/core/stringpiece.cc | 2 + tensorflow/core/lib/core/stringpiece.h | 2 +- tensorflow/core/platform/tracing.h | 2 +- tensorflow/python/BUILD | 92 ++-------- tensorflow/python/debug/BUILD | 1 - tensorflow/python/keras/BUILD | 5 +- tensorflow/python/kernel_tests/BUILD | 4 + tensorflow/tensorflow.bzl | 20 +-- .../windows/cpu/pip/build_tf_windows.sh | 3 +- tensorflow/tools/def_file_filter/BUILD | 0 tensorflow/tools/def_file_filter/BUILD.tpl | 15 -- .../def_file_filter/def_file_filter.py.tpl | 168 ------------------ .../def_file_filter_configure.bzl | 56 ------ tensorflow/tools/pip_package/BUILD | 128 +++++++------ tensorflow/workspace.bzl | 8 +- 42 files changed, 125 insertions(+), 454 deletions(-) delete mode 100644 tensorflow/tools/def_file_filter/BUILD delete mode 100644 tensorflow/tools/def_file_filter/BUILD.tpl delete mode 100644 tensorflow/tools/def_file_filter/def_file_filter.py.tpl delete mode 100644 tensorflow/tools/def_file_filter/def_file_filter_configure.bzl diff --git a/configure.py b/configure.py index d14edef1be..7d61c2e5e3 100644 --- a/configure.py +++ b/configure.py @@ -1380,7 +1380,7 @@ def main(): # environment variables. environ_cp = dict(os.environ) - check_bazel_version('0.10.0') + check_bazel_version('0.5.4') reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index c2663c5e83..d103da79e3 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,7 +8,6 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") -load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -40,6 +39,7 @@ py_library( "//tensorflow/contrib/estimator:estimator_py", "//tensorflow/contrib/factorization:factorization_py", "//tensorflow/contrib/feature_column:feature_column_py", + "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/fused_conv:fused_conv_py", "//tensorflow/contrib/gan", @@ -51,6 +51,7 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", + "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -62,6 +63,7 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", + "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -108,10 +110,6 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", - ]) + if_not_windows([ - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code - "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code - "//tensorflow/contrib/kafka", # has some linking issue on opensssl. ]), ) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 669d611b01..4f6f539027 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow @@ -85,8 +83,7 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -if os.name != 'nt': - from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.lite.python import lite from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs @@ -95,7 +92,6 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") -del os del LazyLoader del absolute_import diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7815fa049a..da5e744851 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_int_features, (int64*) nullptr); + QCHECK_NE(num_dense_float_features, nullptr); + QCHECK_NE(num_sparse_float_features, nullptr); + QCHECK_NE(num_sparse_int_features, nullptr); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 80dd1ccd04..4ddec73ec8 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -454,7 +454,6 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1121,7 +1120,6 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 1c5ebbc6ca..4fba014d6f 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -270,10 +270,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index 2f6cfdf31e..f86331af6f 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,7 +22,6 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index ff6f3b7441..0eb0e3cbe2 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,7 +354,6 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index d1c449402a..146ae8b7e2 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -114,7 +114,6 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 544065dac6..894e6f6946 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,7 +70,6 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index cc7bbabf21..852d06e1e3 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,7 +188,6 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -354,7 +353,6 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index b05f5eeaee..9c59150580 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -117,7 +117,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -173,7 +172,6 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -192,7 +190,6 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -593,7 +590,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -823,7 +819,6 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 0a6edc33c5..8ca03f4193 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -46,7 +46,6 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, - tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 4bb6f76019..f624c42686 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -81,7 +81,6 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -92,7 +91,6 @@ py_test( name = "call_trees_test", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/contrib/py2tf/impl", diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 8bc338e801..d029289f5a 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -83,7 +83,6 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 54c66271cd..27f0a7f58f 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,6 +38,7 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index b10757df47..245fe07f2b 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -53,7 +53,6 @@ py_test( size = "small", srcs = ["python/saved_model/reader_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":saved_model_py", diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 6b5d8b323d..75a753ed89 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -163,7 +163,6 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 7aa1684839..5daabbd62e 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,7 +61,6 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 07b6b1f142..1e4cc3f095 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -553,6 +553,7 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index db2e000ef8..d833744d0c 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,7 +9,6 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index 70bf67c779..bb86ecb220 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,10 +25,7 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", # b/67513579 - ], + tags = ["notsan"], # b/67513579 deps = [ ":predict", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 64f5cd8357..ed3ed4c0e1 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -156,7 +156,9 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = ["no_pip_gpu"], # b/63391119 + tags = [ + "no_pip_gpu", # b/63391119 + ], deps = [ ":feature_keys", ":head", @@ -425,7 +427,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 - "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index 07df7bc9a5..c86d06e923 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,7 +40,6 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 371d1b6672..eea19e9465 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -225,7 +225,6 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index dca01d26f4..f4283cd9ed 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,10 +42,9 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid making every user_ops aware of windows, re-write - # the file extension from .so to .dll if .so file doesn't exist. - if not os.path.exists(path): - path = re.sub(r'\.so$', '.dll', path) + # To avoid makeing every user_ops aware of windows, re-write + # the file extension from .so to .dll. + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index cfe23d1ffe..beaf0adbc5 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -474,11 +474,11 @@ class GraphDatasetBase : public DatasetBase { } // Key for storing the Dataset graph in the serialized format. - TF_EXPORT static const char kDatasetGraphKey[]; + static const char kDatasetGraphKey[]; // Key for storing the output node of the Dataset graph in the serialized // format. - TF_EXPORT static const char kDatasetGraphOutputNodeKey[]; + static const char kDatasetGraphOutputNodeKey[]; private: Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 0b006fa2b4..5bd79778a6 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -55,4 +55,6 @@ StringPiece StringPiece::substr(size_t pos, size_t n) const { return StringPiece(data_ + pos, n); } +const StringPiece::size_type StringPiece::npos = size_type(-1); + } // namespace tensorflow diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 835b938cbf..79409cce4b 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -65,7 +65,7 @@ class StringPiece { iterator begin() const { return data_; } iterator end() const { return data_ + size_; } - static const size_t npos = size_type(-1); + static const size_t npos; // Return the ith byte in the referenced data. // REQUIRES: n < size() diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h index eebbeaeba6..8f7bff1bb0 100644 --- a/tensorflow/core/platform/tracing.h +++ b/tensorflow/core/platform/tracing.h @@ -103,7 +103,7 @@ class Tracing { friend class ScopedAnnotation; friend class TraceMe; - TF_EXPORT static std::atomic tracing_engine_; + static std::atomic tracing_engine_; static Tracing::Engine* engine() { return tracing_engine_.load(std::memory_order_acquire); } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 86548f4346..3a8ba2db04 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -28,7 +28,6 @@ load("//tensorflow:tensorflow.bzl", "py_tests") load("//tensorflow:tensorflow.bzl", "tf_py_build_info_genrule") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") -load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") @@ -87,7 +86,6 @@ py_library( ":ops", ":platform", ":pywrap_tensorflow", - ":saver_test_utils", ":script_ops", ":session_ops", ":sets", @@ -97,29 +95,31 @@ py_library( ":standard_ops", ":state_ops", ":string_ops", - ":subscribe", ":summary", ":tensor_array_ops", + ":training", + ":saver_test_utils", + ":subscribe", ":test_ops", # TODO: Break testing code out into separate rule. - ":tf_cluster", ":tf_item", + ":tf_cluster", ":tf_optimizer", - ":training", ":util", ":weights_broadcast_ops", - "//tensorflow/contrib:contrib_py", + "//third_party/py/numpy", "//tensorflow/core:protos_all_py", "//tensorflow/python/data", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras", + "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", - "//tensorflow/python/ops/losses", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", - "//third_party/py/numpy", - ], + ] + if_not_windows([ + "//tensorflow/contrib:contrib_py", + ]), ) tf_py_build_info_genrule() @@ -947,6 +947,7 @@ py_test( srcs = ["framework/contrib_test.py"], main = "framework/contrib_test.py", srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", @@ -1313,6 +1314,7 @@ py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -1660,6 +1662,7 @@ py_test( size = "small", srcs = ["ops/clip_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":client_testlib", ":clip_ops", @@ -2728,6 +2731,7 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, + tags = ["no_windows"], ) cuda_py_test( @@ -3264,10 +3268,6 @@ tf_py_wrap_cc( "util/transform_graph.i", "util/util.i", ], - win_def_file = select({ - "//tensorflow:windows": ":pywrap_tensorflow_filtered_def_file", - "//conditions:default": None, - }), deps = [ ":bfloat16_lib", ":cost_analyzer_lib", @@ -3311,65 +3311,6 @@ tf_py_wrap_cc( tf_additional_gdr_deps()), ) -# ** Targets for Windows build (start) ** -# We need the following targets to expose symbols from _pywrap_tensorflow.dll - -# Build a cc_binary from tf_custom_op_library_additional_deps_impl, -# it contains all object code from its dependencies. -cc_binary( - name = "tf_custom_op_library_additional_deps.so", - linkshared = 1, - linkstatic = 1, - deps = tf_custom_op_library_additional_deps_impl(), -) - -# Get a DEF file generated by parsing all object files -# of tf_custom_op_library_additional_deps.so -filegroup( - name = "pywrap_tensorflow_def_file", - srcs = [":tf_custom_op_library_additional_deps.so"], - output_group = "def_file", -) - -# Filter the DEF file to reduce the number of symbols to 64K or less. -# Note that we also write the name of the pyd file into DEF file so that -# the dynamic libraries of custom ops can find it at runtime. -genrule( - name = "pywrap_tensorflow_filtered_def_file", - srcs = [":pywrap_tensorflow_def_file"], - outs = ["pywrap_tensorflow_filtered_def_file.def"], - cmd = select({ - "//tensorflow:windows": """ - $(location @local_config_def_file_filter//:def_file_filter) \\ - --input $(location :pywrap_tensorflow_def_file) \\ - --output $@ \\ - --target _pywrap_tensorflow_internal.pyd - """, - "//conditions:default": "touch $@", # Just a placeholder for Unix platforms - }), - tools = ["@local_config_def_file_filter//:def_file_filter"], -) - -# Get the import library of _pywrap_tensorflow_internal.dll -filegroup( - name = "pywrap_tensorflow_import_lib_file", - srcs = [":_pywrap_tensorflow_internal.so"], - output_group = "interface_library", -) - -# Create a cc_import rule for the import library of _pywrap_tensorflow_internal.dll -# so that custom ops' dynamic libraries can link against it. -cc_import( - name = "pywrap_tensorflow_import_lib", - interface_library = select({ - "//tensorflow:windows": ":pywrap_tensorflow_import_lib_file", - "//conditions:default": "not_exsiting_on_unix.lib", # Just a placeholder for Unix platforms - }), - system_provided = 1, -) - -# ** Targets for Windows build (end) ** - py_library( name = "lib", srcs = [ @@ -3742,6 +3683,7 @@ py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":client_testlib", ":lib", @@ -4049,6 +3991,7 @@ py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":checkpoint_ops_gen", ":client", @@ -4089,7 +4032,10 @@ py_test( size = "medium", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], # b/67945581 + tags = [ + "no_windows", + "notsan", # b/67945581 + ], deps = [ ":array_ops", ":client_testlib", diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index c60f692390..512d292ee2 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -913,7 +913,6 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index c9b68594cf..3180b9f410 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -637,10 +637,7 @@ py_test( size = "small", srcs = ["_impl/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 3499086d0a..d9571fa2be 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -295,6 +295,7 @@ tf_py_test( "//tensorflow/python:nn_grad", ], data = ["//tensorflow/core:image_testdata"], + tags = ["no_windows"], ) tf_py_test( @@ -1140,6 +1141,7 @@ tf_py_test( "//tensorflow/python:variables", ], data = ["//tensorflow/core:lmdb_testdata"], + tags = ["no_windows"], ) cuda_py_test( @@ -2329,6 +2331,7 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 4, + tags = ["no_windows"], ) cuda_py_test( @@ -2459,6 +2462,7 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, + tags = ["no_windows"], ) cuda_py_test( diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index c8d175dcf2..9b0db8a112 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1176,22 +1176,6 @@ def tf_custom_op_library_additional_deps(): "@protobuf_archive//:protobuf_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), - ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"]) - -# A list of targets that contains the implemenation of -# tf_custom_op_library_additional_deps. It's used to generate a DEF file for -# exporting symbols from _pywrap_tensorflow.dll on Windows. -def tf_custom_op_library_additional_deps_impl(): - return [ - # for @protobuf_archive//:protobuf_headers - "@protobuf_archive//:protobuf", - # for @nsync//:nsync_headers - "@nsync//:nsync_cpp", - # for //third_party/eigen3 - clean_dep("//third_party/eigen3"), - # for //tensorflow/core:framework_headers_lib - clean_dep("//tensorflow/core:framework"), - clean_dep("//tensorflow/core:reader_base"), ] # Traverse the dependency graph along the "deps" attribute of the @@ -1278,7 +1262,6 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(is_external=True), - features = ["windows_export_all_symbols"], linkopts=linkopts + select({ "//conditions:default": [ "-lm", @@ -1425,8 +1408,7 @@ def tf_py_wrap_cc(name, ]) + tf_extension_copts()), linkopts=tf_extension_linkopts() + extra_linkopts, linkstatic=1, - deps=deps + extra_deps, - **kwargs) + deps=deps + extra_deps) native.genrule( name="gen_" + cc_library_pyd_name, srcs=[":" + cc_library_name], diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 40189a6d1b..8b8ba31a0d 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -65,5 +65,4 @@ bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ - //${PY_TEST_DIR}/tensorflow/python/... \ - //${PY_TEST_DIR}/tensorflow/contrib/... + //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/def_file_filter/BUILD b/tensorflow/tools/def_file_filter/BUILD deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tensorflow/tools/def_file_filter/BUILD.tpl b/tensorflow/tools/def_file_filter/BUILD.tpl deleted file mode 100644 index 3cb72f4979..0000000000 --- a/tensorflow/tools/def_file_filter/BUILD.tpl +++ /dev/null @@ -1,15 +0,0 @@ -# Description: -# Tools for filtering DEF file for TensorFlow on Windows -# -# On Windows, we use a DEF file generated by Bazel to export -# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). -# The maximum number of symbols that can be exported per DLL is 64K, -# so we have to filter some useless symbols through this python script. - -package(default_visibility = ["//visibility:public"]) - -py_binary( - name = "def_file_filter", - srcs = ["def_file_filter.py"], - srcs_version = "PY2AND3", -) diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl deleted file mode 100644 index 8bdc03eb0f..0000000000 --- a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""def_file_filter.py - tool to filter a windows def file. - -The def file can be used to export symbols from the tensorflow dll to enable -tf.load_library(). - -Because the linker allows only 64K symbols to be exported per dll -we filter the symbols down to the essentials. The regular expressions -we use for this are specific to tensorflow. - -TODO: this works fine but there is an issue with exporting -'const char * const' and importing it from a user_ops. The problem is -on the importing end and using __declspec(dllimport) works around it. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import io -import os -import re -import subprocess -import sys -import tempfile - -# External tools we use that come with visual studio sdk -UNDNAME = "%{undname_bin_path}" - -# Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") - -# Include if matched before exclude -INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" - r"google::protobuf::internal::ArenaImpl::AllocateAligned|" # for contrib/data/_prefetching_ops - r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops - r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops - r"tensorflow::internal::LogMessage|" - r"tensorflow::internal::LogString|" - r"tensorflow::internal::CheckOpMessageBuilder|" - r"tensorflow::internal::MakeCheckOpValueString|" - r"tensorflow::internal::PickUnusedPortOrDie|" - r"tensorflow::internal::ValidateDevice|" - r"tensorflow::ops::internal::Enter|" - r"tensorflow::strings::internal::AppendPieces|" - r"tensorflow::strings::internal::CatPieces|" - r"tensorflow::io::internal::JoinPathImpl") - -# Include if matched after exclude -INCLUDE_RE = re.compile(r"^(TF_\w*)$|" - r"^(TFE_\w*)$|" - r"nsync::|" - r"tensorflow::|" - r"functor::|" - r"perftools::gputools") - -# We want to identify data members explicitly in the DEF file, so that no one -# can implicitly link against the DLL if they use one of the variables exported -# from the DLL and the header they use does not decorate the symbol with -# __declspec(dllimport). It is easier to detect what a data symbol does -# NOT look like, so doing it with the below regex. -DATA_EXCLUDE_RE = re.compile(r"[)(]|" - r"vftable|" - r"vbtable|" - r"vcall|" - r"RTTI|" - r"protobuf::internal::ExplicitlyConstructed") - -def get_args(): - """Parse command line.""" - filename_list = lambda x: x.split(";") - parser = argparse.ArgumentParser() - parser.add_argument("--input", type=filename_list, - help="paths to input def file", - required=True) - parser.add_argument("--output", help="output deffile", required=True) - parser.add_argument("--target", help="name of the target", required=True) - args = parser.parse_args() - return args - - -def main(): - """main.""" - args = get_args() - - # Pipe dumpbin to extract all linkable symbols from libs. - # Good symbols are collected in candidates and also written to - # a temp file. - candidates = [] - tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False) - for def_file_path in args.input: - def_file = open(def_file_path, 'r') - for line in def_file: - cols = line.split() - sym = cols[0] - tmpfile.file.write(sym + "\n") - candidates.append(sym) - tmpfile.file.close() - - # Run the symbols through undname to get their undecorated name - # so we can filter on something readable. - with open(args.output, "w") as def_fp: - # track dupes - taken = set() - - # Header for the def file. - def_fp.write("LIBRARY " + args.target + "\n") - def_fp.write("EXPORTS\n") - def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") - - # Each symbols returned by undname matches the same position in candidates. - # We compare on undname but use the decorated name from candidates. - dupes = 0 - proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) - for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): - decorated = candidates[idx] - if decorated in taken: - # Symbol is already in output, done. - dupes += 1 - continue - - if not INCLUDEPRE_RE.search(line): - if EXCLUDE_RE.search(line): - continue - if not INCLUDE_RE.search(line): - continue - - if "deleting destructor" in line: - # Some of the symbols convered by INCLUDEPRE_RE export deleting - # destructor symbols, which is a bad idea. - # So we filter out such symbols here. - continue - - if DATA_EXCLUDE_RE.search(line): - def_fp.write("\t" + decorated + "\n") - else: - def_fp.write("\t" + decorated + " DATA\n") - taken.add(decorated) - def_fp.close() - - exit_code = proc.wait() - if exit_code != 0: - print("{} failed, exit={}".format(UNDNAME, exit_code)) - return exit_code - - os.unlink(tmpfile.name) - - print("symbols={}, taken={}, dupes={}" - .format(len(candidates), len(taken), dupes)) - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl deleted file mode 100644 index 47539b2423..0000000000 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ /dev/null @@ -1,56 +0,0 @@ -"""Repository rule for def file filter autoconfiguration. - -This repository reuses Bazel's VC detect mechanism to find undname.exe, -which is a tool used in def_file_filter.py. - -def_file_filter.py is for filtering the DEF file for TensorFlow on Windows. -On Windows, we use a DEF file generated by Bazel to export symbols from the -tensorflow dynamic library(_pywrap_tensorflow.dll). The maximum number of -symbols that can be exported per DLL is 64K, so we have to filter some useless -symbols through this python script. - -`def_file_filter_config` depends on the following environment variables: - * `BAZEL_VC` - * `BAZEL_VS` - * `VS90COMNTOOLS` - * `VS100COMNTOOLS` - * `VS110COMNTOOLS` - * `VS120COMNTOOLS` - * `VS140COMNTOOLS` -""" - -load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_vc_path") -load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_msvc_tool") -load("@bazel_tools//tools/cpp:lib_cc_configure.bzl", "auto_configure_fail") - -def _def_file_filter_configure_impl(repository_ctx): - if repository_ctx.os.name.lower().find("windows") == -1: - repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") - repository_ctx.file("def_file_filter.py", "") - return - vc_path = find_vc_path(repository_ctx) - if vc_path == "visual-studio-not-found": - auto_configure_fail("Visual C++ build tools not found on your machine") - undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") - - repository_ctx.template( - "def_file_filter.py", - Label("//tensorflow/tools/def_file_filter:def_file_filter.py.tpl"), - { - "%{undname_bin_path}": undname_bin_path, - }) - repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") - - -def_file_filter_configure = repository_rule( - implementation = _def_file_filter_configure_impl, - environ = [ - "BAZEL_VC", - "BAZEL_VS", - "VS90COMNTOOLS", - "VS100COMNTOOLS", - "VS110COMNTOOLS", - "VS120COMNTOOLS", - "VS140COMNTOOLS" - ], -) diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 2607b9d704..d55a883df5 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -48,65 +48,36 @@ py_binary( deps = ["//tensorflow:tensorflow_py"], ) -COMMON_PIP_DEPS = [ - ":licenses", - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/boosted_trees:boosted_trees_pip", - "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", - "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/eager/python:evaluator", - "//tensorflow/contrib/gan:gan", - "//tensorflow/contrib/graph_editor:graph_editor_pip", - "//tensorflow/contrib/keras:keras", - "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/py2tf:py2tf", - "//tensorflow/contrib/py2tf/converters:converters", - "//tensorflow/contrib/py2tf/converters:test_lib", - "//tensorflow/contrib/py2tf/impl:impl", - "//tensorflow/contrib/py2tf/pyct:pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", - "//tensorflow/contrib/receptive_field:receptive_field_pip", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/signal:test_util", - "//tensorflow/contrib/slim:slim", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - "//tensorflow/contrib/specs:specs", - "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/contrib/tensor_forest:init_py", - "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - "//tensorflow/contrib/timeseries:timeseries_pip", - "//tensorflow/contrib/tpu", - "//tensorflow/examples/tutorials/mnist:package", - "//tensorflow/python:distributed_framework_test_lib", - "//tensorflow/python:meta_graph_testdata", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/python/saved_model:saved_model", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python:test_ops", - "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", -] - # On Windows, python binary is a zip file of runfiles tree. # Add everything to its data dependency for generating a runfiles tree # for building the pip package on Windows. py_binary( name = "simple_console_for_windows", srcs = ["simple_console_for_windows.py"], - data = COMMON_PIP_DEPS, + data = [ + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/saved_model", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/contrib/summary:summary_test_util", + # These targets don't build on Windows yet. Exclude them for now. + # "//tensorflow/contrib/slim", + # "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + # "//tensorflow/contrib/specs", + # "//tensorflow/contrib/tensor_forest:init_py", + # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + # "//tensorflow/examples/tutorials/mnist:package", + ], srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) @@ -167,12 +138,61 @@ sh_binary( data = select({ "//tensorflow:windows": [":simple_console_for_windows"], "//tensorflow:windows_msvc": [":simple_console_for_windows"], - "//conditions:default": COMMON_PIP_DEPS + [ + "//conditions:default": [ + ":licenses", + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", ":simple_console", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/boosted_trees:boosted_trees_pip", + "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:checkpointable_utils", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/gan:gan", + "//tensorflow/contrib/graph_editor:graph_editor_pip", + "//tensorflow/contrib/keras:keras", + "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/lite/python:interpreter_test_data", "//tensorflow/contrib/lite/toco:toco", "//tensorflow/contrib/lite/toco/python:toco_wrapper", "//tensorflow/contrib/lite/toco/python:toco_from_protos", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/py2tf:py2tf", + "//tensorflow/contrib/py2tf/converters:converters", + "//tensorflow/contrib/py2tf/converters:test_lib", + "//tensorflow/contrib/py2tf/impl:impl", + "//tensorflow/contrib/py2tf/pyct:pyct", + "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", + "//tensorflow/contrib/slim:slim", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + "//tensorflow/contrib/specs:specs", + "//tensorflow/contrib/summary:summary_test_util", + "//tensorflow/contrib/tensor_forest:init_py", + "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + "//tensorflow/contrib/timeseries:timeseries_pip", + "//tensorflow/contrib/tpu", + "//tensorflow/examples/tutorials/mnist:package", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:meta_graph_testdata", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/saved_model:saved_model", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python:test_ops", + "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], }) + if_mkl(["//third_party/mkl:intel_binary_blob"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0e910b774d..675acbe5f6 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -12,8 +12,6 @@ load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compil load("//third_party:repo.bzl", "tf_http_archive") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") -load("//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", - "def_file_filter_configure") def _extract_version_number(bazel_version): """Extracts the semantic version number from a version string @@ -69,7 +67,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # We must check the bazel version before trying to parse any other BUILD # files, in case the parsing of those build files depends on the bazel # version we require here. - check_bazel_version_at_least("0.10.0") + check_bazel_version_at_least("0.5.4") clang6_configure(name="local_config_clang6") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") @@ -77,10 +75,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sycl_configure(name="local_config_sycl") python_configure(name="local_config_python") - # For windows bazel build - # TODO: Remove def file filter when TensorFlow can export symbols properly on Windows. - def_file_filter_configure(name = "local_config_def_file_filter") - # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name="local_config_arm_compiler", -- GitLab From bf741007d1f6f440a2671b9fa8894af3df10ed44 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 20 Mar 2018 21:30:02 -0700 Subject: [PATCH 283/960] C API: fix device + colocation edge case in import_graph_def This change makes the C API consistent with the Python API, by making sure that all nodes in a colocation group have the device of the op named in the "_class" attr (all other ops' devices are ignored). This is currently done by preserving the current Python logic for colocation and devices, which only works if all ops start with no device set. Without this change, imported nodes would have the device specified in the GraphDef. This change unsets any device before running the Python logic. PiperOrigin-RevId: 189859688 --- tensorflow/python/framework/importer.py | 11 +++-- tensorflow/python/framework/importer_test.py | 43 ++++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index a9e399f59b..4ea34d7bb2 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -301,14 +301,17 @@ def _ProcessNewOps(graph): colocation_pairs = {} for new_op in graph._add_new_tf_operations(compute_devices=False): # pylint: disable=protected-access + original_device = new_op.device + new_op._set_device('') # pylint: disable=protected-access colocation_names = _GetColocationNames(new_op) if colocation_names: colocation_pairs[new_op] = colocation_names - # Don't apply this op's device function, since colocation constraints - # override device functions. Note that this op's device may still be set - # by the loop below. + # Don't set a device for this op, since colocation constraints override + # device functions and the original device. Note that this op's device may + # still be set by the loop below. + # TODO(skyewm): why does it override the original device? else: - with _MaybeDevice(new_op.device): + with _MaybeDevice(original_device): graph._apply_device_functions(new_op) # pylint: disable=protected-access # The following loop populates the device field of ops that are colocated diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index bf5d9fe093..6593b17184 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -680,6 +680,49 @@ class ImportGraphDefTest(test.TestCase): "list { s: 'loc:@imported_graph/A' }", b.node_def.attr["_class"]) + def testColocationAndDevice(self): + # A and B are colocated, device set on A. + original_graph_def = self._MakeGraphDef(""" + node { name: 'A' op: 'None' device: '/device:CPU:0' attr { + key: '_class' + value { list { s: 'loc:@A' } } + } } + node { name: 'B' op: 'None' attr { + key: '_class' + value { list { s: 'loc:@A' } } + } }""") + + with ops.Graph().as_default(): + a, b = importer.import_graph_def(original_graph_def, + return_elements=["A", "B"], + name="") + self.assertEqual(a.device, "/device:CPU:0") + self.assertEqual(b.device, "/device:CPU:0") + self.assertEqual(a.colocation_groups(), [b"loc:@A"]) + self.assertEqual(b.colocation_groups(), [b"loc:@A"]) + + # A and B are colocated, device set on B. + original_graph_def = self._MakeGraphDef(""" + node { name: 'A' op: 'None' attr { + key: '_class' + value { list { s: 'loc:@A' } } + } } + node { name: 'B' op: 'None' device: '/device:CPU:0' attr { + key: '_class' + value { list { s: 'loc:@A' } } + } }""") + + with ops.Graph().as_default(): + a, b = importer.import_graph_def(original_graph_def, + return_elements=["A", "B"], + name="") + # TODO(skyewm): this behavior seems inconsistent with the above. Why is + # B's device ignored? + self.assertEqual(a.device, "") + self.assertEqual(b.device, "") + self.assertEqual(a.colocation_groups(), [b"loc:@A"]) + self.assertEqual(b.colocation_groups(), [b"loc:@A"]) + def testColocationWithDeviceFn(self): original_graph_def = self._MakeGraphDef(""" node { name: 'A' op: 'None' attr { -- GitLab From 73d17507de23db1c843de587441a958342c2f1e7 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Tue, 20 Mar 2018 21:39:16 -0700 Subject: [PATCH 284/960] Make variable scope and scope counts local to current thread so that they work correctly in multi-threaded environments. PiperOrigin-RevId: 189860229 --- tensorflow/contrib/eager/python/network.py | 2 +- .../kernel_tests/variable_scope_test.py | 87 +++++++++++++ tensorflow/python/ops/template.py | 2 +- tensorflow/python/ops/variable_scope.py | 121 ++++++++++++------ 4 files changed, 171 insertions(+), 41 deletions(-) diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index 4c937716e8..e55a9276ab 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -149,7 +149,7 @@ class Network(base.Layer): # check we might have name collisions if the parent scope on init gets # closed before build is called. self._variable_scope_counts_on_init = ( - variable_scope._get_default_variable_store().variable_scopes_count) + variable_scope.get_variable_scope_store().variable_scopes_count) def _name_scope_name(self, current_variable_scope): """Overrides Layer op naming to match variable naming.""" diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 531d0cdf90..86ab9fbb70 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import gc +import threading import numpy @@ -1349,5 +1350,91 @@ class PartitionInfoTest(test.TestCase): self.assertEqual(0, partition_info.single_slice_dim([2, 3])) +class VariableScopeMultithreadedTest(test.TestCase): + + def testTwoThreadsDisjointScopeEntry(self): + + def thread_fn(i, graph): + with graph.as_default(): + with variable_scope.variable_scope("foo"): + if i == 0: + v = variable_scope.get_variable("v", []) + self.assertEquals("foo/v:0", v.name) + else: + # Any thread after the first one should fail to create variable + # with the same name. + with self.assertRaises(ValueError): + variable_scope.get_variable("v", []) + + graph = ops.get_default_graph() + threads = [ + threading.Thread(target=thread_fn, args=(i, graph,)) for i in range(2)] + + threads[0].start() + # Allow thread 0 to finish before starting thread 1. + threads[0].join() + threads[1].start() + threads[1].join() + + def testTwoThreadsNestedScopeEntry(self): + + def thread_fn(i, graph, run_event, pause_event): + with graph.as_default(): + with variable_scope.variable_scope("foo"): + if i == 0: + v = variable_scope.get_variable("v", []) + self.assertEquals("foo/v:0", v.name) + else: + # Any thread after the first one should fail to create variable + # with the same name. + with self.assertRaises(ValueError): + variable_scope.get_variable("v", []) + pause_event.set() + run_event.wait() + + graph = ops.get_default_graph() + run_events = [threading.Event() for _ in range(2)] + pause_events = [threading.Event() for _ in range(2)] + threads = [ + threading.Thread( + target=thread_fn, args=(i, graph, run_events[i], pause_events[i])) + for i in range(2) + ] + + # Start first thread. + threads[0].start() + pause_events[0].wait() + # Start next thread once the first thread has paused. + threads[1].start() + pause_events[1].wait() + # Resume both threads. + run_events[0].set() + run_events[1].set() + threads[0].join() + threads[1].join() + + def testReenterMainScope(self): + + def thread_fn(graph, main_thread_scope): + with graph.as_default(): + # Variable created with main scope will have prefix "main". + with variable_scope.variable_scope(main_thread_scope): + with variable_scope.variable_scope("foo"): + v = variable_scope.get_variable("v", []) + self.assertEquals("main/foo/v:0", v.name) + + # Variable created outside main scope will not have prefix "main". + with variable_scope.variable_scope("bar"): + v = variable_scope.get_variable("v", []) + self.assertEquals("bar/v:0", v.name) + + graph = ops.get_default_graph() + with variable_scope.variable_scope("main") as main_thread_scope: + thread = threading.Thread( + target=thread_fn, args=(graph, main_thread_scope)) + thread.start() + thread.join() + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 0a391d896a..0294ecee54 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -583,7 +583,7 @@ class _EagerTemplateVariableStore(object): if self._variable_scope_name is None: raise RuntimeError("A variable scope must be set before an " "_EagerTemplateVariableStore object exits.") - self._eager_variable_store._store.close_variable_subscopes( # pylint: disable=protected-access + variable_scope.get_variable_scope_store().close_variable_subscopes( self._variable_scope_name) def _variables_in_scope(self, variable_list): diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index c1af8ff8d3..c35735ca65 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -24,6 +24,7 @@ import copy import enum # pylint: disable=g-bad-import-order import functools import sys +import threading import traceback import six @@ -211,23 +212,8 @@ class _VariableStore(object): """Create a variable store.""" self._vars = {} # A dictionary of the stored TensorFlow variables. self._partitioned_vars = {} # A dict of the stored PartitionedVariables. - self.variable_scopes_count = {} # Count re-used variable scopes. self._store_eager_variables = False - def open_variable_scope(self, scope_name): - if scope_name in self.variable_scopes_count: - self.variable_scopes_count[scope_name] += 1 - else: - self.variable_scopes_count[scope_name] = 1 - - def close_variable_subscopes(self, scope_name): - for k in self.variable_scopes_count: - if not scope_name or k.startswith(scope_name + "/"): - self.variable_scopes_count[k] = 0 - - def variable_scope_count(self, scope_name): - return self.variable_scopes_count.get(scope_name, 0) - def get_variable(self, name, shape=None, dtype=dtypes.float32, initializer=None, regularizer=None, reuse=None, trainable=True, collections=None, caching_device=None, @@ -1160,18 +1146,49 @@ class VariableScope(object): _VARSTORE_KEY = ("__variable_store",) -_VARSCOPE_KEY = ("__varscope",) +_VARSCOPESTORE_KEY = ("__varscope",) + + +class _VariableScopeStore(threading.local): + """A thread local store for the current variable scope and scope counts.""" + + def __init__(self): + super(_VariableScopeStore, self).__init__() + self.current_scope = VariableScope(False) + self.variable_scopes_count = {} + + def open_variable_scope(self, scope_name): + if scope_name in self.variable_scopes_count: + self.variable_scopes_count[scope_name] += 1 + else: + self.variable_scopes_count[scope_name] = 1 + + def close_variable_subscopes(self, scope_name): + for k in self.variable_scopes_count: + if not scope_name or k.startswith(scope_name + "/"): + self.variable_scopes_count[k] = 0 + + def variable_scope_count(self, scope_name): + return self.variable_scopes_count.get(scope_name, 0) + + +def get_variable_scope_store(): + """Returns the variable scope store for current thread.""" + scope_store = ops.get_collection(_VARSCOPESTORE_KEY) + + if not scope_store: + scope_store = _VariableScopeStore() + ops.add_to_collection(_VARSCOPESTORE_KEY, scope_store) + else: + scope_store = scope_store[0] + + return scope_store @tf_export("get_variable_scope") def get_variable_scope(): """Returns the current variable scope.""" - scope = ops.get_collection(_VARSCOPE_KEY) - if scope: # This collection has at most 1 element, the default scope at [0]. - return scope[0] - scope = VariableScope(False) - ops.add_to_collection(_VARSCOPE_KEY, scope) - return scope + return get_variable_scope_store().current_scope def _get_default_variable_store(): @@ -1575,10 +1592,8 @@ class _pure_variable_scope(object): # pylint: disable=invalid-name self._dtype = dtype self._use_resource = use_resource self._constraint = constraint - get_variable_scope() # Ensure that a default exists, then get a pointer. - # Get the reference to the collection as we want to modify it in place. - self._default_varscope = ops.get_collection_ref(_VARSCOPE_KEY) self._var_store = _get_default_variable_store() + self._var_scope_store = get_variable_scope_store() if isinstance(self._name_or_scope, VariableScope): self._new_name = self._name_or_scope.name name_scope = self._name_or_scope._name_scope # pylint: disable=protected-access @@ -1626,10 +1641,11 @@ class _pure_variable_scope(object): # pylint: disable=invalid-name a reuse scope, or if reuse is not `None` or `True`. TypeError: when the types of some arguments are not appropriate. """ - self._old = self._default_varscope[0] + self._old = self._var_scope_store.current_scope if isinstance(self._name_or_scope, VariableScope): - self._var_store.open_variable_scope(self._new_name) - self._old_subscopes = copy.copy(self._var_store.variable_scopes_count) + self._var_scope_store.open_variable_scope(self._new_name) + self._old_subscopes = copy.copy( + self._var_scope_store.variable_scopes_count) variable_scope_object = self._cached_variable_scope_object else: # Handler for the case when we just prolong current variable scope. @@ -1672,17 +1688,17 @@ class _pure_variable_scope(object): # pylint: disable=invalid-name variable_scope_object.set_dtype(self._dtype) if self._use_resource is not None: variable_scope_object.set_use_resource(self._use_resource) - self._var_store.open_variable_scope(self._new_name) - self._default_varscope[0] = variable_scope_object + self._var_scope_store.open_variable_scope(self._new_name) + self._var_scope_store.current_scope = variable_scope_object return variable_scope_object def __exit__(self, type_arg, value_arg, traceback_arg): # If jumping out from a non-prolonged scope, restore counts. if isinstance(self._name_or_scope, VariableScope): - self._var_store.variable_scopes_count = self._old_subscopes + self._var_scope_store.variable_scopes_count = self._old_subscopes else: - self._var_store.close_variable_subscopes(self._new_name) - self._default_varscope[0] = self._old + self._var_scope_store.close_variable_subscopes(self._new_name) + self._var_scope_store.current_scope = self._old def _maybe_wrap_custom_getter(custom_getter, old_getter): @@ -1707,13 +1723,13 @@ def _maybe_wrap_custom_getter(custom_getter, old_getter): def _get_unique_variable_scope(prefix): """Get a name with the given prefix unique in the current variable scope.""" - var_store = _get_default_variable_store() + var_scope_store = get_variable_scope_store() current_scope = get_variable_scope() name = current_scope.name + "/" + prefix if current_scope.name else prefix - if var_store.variable_scope_count(name) == 0: + if var_scope_store.variable_scope_count(name) == 0: return prefix idx = 1 - while var_store.variable_scope_count(name + ("_%d" % idx)) > 0: + while var_scope_store.variable_scope_count(name + ("_%d" % idx)) > 0: idx += 1 return prefix + ("_%d" % idx) @@ -1729,9 +1745,10 @@ class variable_scope(object): graph, ensures that graph is the default graph, and pushes a name scope and a variable scope. - If `name_or_scope` is not None, it is used as is. If `scope` is None, then - `default_name` is used. In that case, if the same name has been previously - used in the same scope, it will be made unique by appending `_N` to it. + If `name_or_scope` is not None, it is used as is. If `name_or_scope` is None, + then `default_name` is used. In that case, if the same name has been + previously used in the same scope, it will be made unique by appending `_N` + to it. Variable scope allows you to create new variables and to share already created ones while providing checks to not create or share by accident. For details, @@ -1810,6 +1827,32 @@ class variable_scope(object): discouraged) to pass False to the reuse argument, yielding undocumented behaviour slightly different from None. Starting at 1.1.0 passing None and False as reuse has exactly the same effect. + + A note about using variable scopes in multi-threaded environment: Variable + scopes are thread local, so one thread will not see another thread's current + scope. Also, when using `default_name`, unique scopes names are also generated + only on a per thread basis. If the same name was used within a different + thread, that doesn't prevent a new thread from creating the same scope. + However, the underlying variable store is shared across threads (within the + same graph). As such, if another thread tries to create a new variable with + the same name as a variable created by a previous thread, it will fail unless + reuse is True. + + Further, each thread starts with an empty variable scope. So if you wish to + preserve name prefixes from a scope from the main thread, you should capture + the main thread's scope and re-enter it in each thread. For e.g. + + ``` + main_thread_scope = variable_scope.get_variable_scope() + + # Thread's target function: + def thread_target_fn(captured_scope): + with variable_scope.variable_scope(captured_scope): + # .... regular code for this thread + + + thread = threading.Thread(target=thread_target_fn, args=(main_thread_scope,)) + ``` """ def __init__(self, -- GitLab From d5d74b0aaaa221ad64aa1d86cb8428df2b885cf7 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Tue, 20 Mar 2018 23:07:37 -0700 Subject: [PATCH 285/960] Make graph's name scope thread local so that two threads opening the same scope don't get nested under each other. PiperOrigin-RevId: 189865854 --- tensorflow/python/framework/ops.py | 13 +++++++++-- tensorflow/python/framework/ops_test.py | 29 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 4be2e2c15d..50a1d3fe04 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2727,8 +2727,6 @@ class Graph(object): self._next_id_counter = 0 # GUARDED_BY(self._lock) self._nodes_by_name = dict() # GUARDED_BY(self._lock) self._version = 0 # GUARDED_BY(self._lock) - # Current name stack: uniquified names - self._name_stack = "" # Maps a name used in the graph to the next id to use for that name. self._names_in_use = {} self._stack_state_is_thread_local = False @@ -3907,6 +3905,17 @@ class Graph(object): finally: self._default_original_op = old_original_op + @property + def _name_stack(self): + # This may be called from a thread where name_stack doesn't yet exist. + if not hasattr(self._thread_local, "_name_stack"): + self._thread_local._name_stack = "" + return self._thread_local._name_stack + + @_name_stack.setter + def _name_stack(self, name_stack): + self._thread_local._name_stack = name_stack + # pylint: disable=g-doc-return-or-yield,line-too-long @tf_contextlib.contextmanager def name_scope(self, name): diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index d96e0708f8..aa51391871 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1556,6 +1556,35 @@ class MultithreadedGraphStateTest(test_util.TensorFlowTestCase): input: "^ColocateWithMe_2" } """, gd) + def testNameStack(self): + + class NameSettingThread(self.TestThread): + + def run(self): + with g.name_scope("foo"): + op1 = g.create_op("FloatOutput", [], [dtypes.float32]) + self.has_mutated_graph.set() + self.should_continue.wait() + self.should_continue.clear() + op2 = g.create_op("FloatOutput", [], [dtypes.float32]) + self.result = (op1, op2) + + g = ops.Graph() + threads = [NameSettingThread(g, i) for i in range(3)] + for t in threads: + t.start() + t.has_mutated_graph.wait() + t.has_mutated_graph.clear() + + for t in threads: + t.should_continue.set() + t.join() + + suffixes = ["", "_1", "_2"] + for t, s in zip(threads, suffixes): + self.assertEquals("foo" + s + "/FloatOutput", t.result[0].name) + self.assertEquals("foo" + s + "/FloatOutput_1", t.result[1].name) + @test_util.with_c_api class ObjectWithName(object): -- GitLab From e69000c347ddf023a3b1926d812881fd8c5a055b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 04:09:32 -0700 Subject: [PATCH 286/960] We were ValueOrDie()ing in one place, and TF_CHECK_OK()ing in another. Both should gracefully return an error condition. Add some tests to check this. PiperOrigin-RevId: 189888700 --- tensorflow/compiler/jit/xla_device_context.cc | 12 ++-- tensorflow/compiler/jit/xla_launch_util.cc | 14 +++- tensorflow/compiler/tests/BUILD | 20 ++++++ tensorflow/compiler/tests/oom_test.py | 72 +++++++++++++++++++ 4 files changed, 111 insertions(+), 7 deletions(-) create mode 100644 tensorflow/compiler/tests/oom_test.py diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index 88f7c15f0b..93e0dbb9b9 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -36,10 +36,14 @@ XlaDeviceAllocator::~XlaDeviceAllocator() = default; string XlaDeviceAllocator::Name() { return "xla"; } void* XlaDeviceAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { - se::DeviceMemoryBase dmem = - backend_->memory_allocator() - ->Allocate(device_ordinal_, num_bytes, /*retry_on_failure=*/false) - .ValueOrDie(); + auto status_or_dmem = backend_->memory_allocator()->Allocate( + device_ordinal_, num_bytes, /*retry_on_failure=*/false); + if (!status_or_dmem.status().ok()) { + LOG(ERROR) << "Failed to allocate memory: " + << status_or_dmem.status().ToString(); + return nullptr; + } + se::DeviceMemoryBase dmem = status_or_dmem.ValueOrDie(); VLOG(2) << "Allocated XLA device tensor " << dmem.opaque() << "(" << num_bytes << ")"; return dmem.opaque(); diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index bb7316c60c..21f58c8310 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -56,12 +56,20 @@ XlaAllocator::XlaAllocator(const gpu::Platform* platform, OpKernelContext* op_context) : xla::DeviceMemoryAllocator(platform), op_context_(op_context) {} -XlaAllocator::~XlaAllocator() { CHECK(allocated_.empty()); } +XlaAllocator::~XlaAllocator() { + for (void* ptr : allocated_) { + op_context_->device()->GetAllocator({})->DeallocateRaw(ptr); + } +} xla::StatusOr XlaAllocator::Allocate( int device_ordinal, uint64 size, bool retry_on_failure) { void* data = op_context_->device()->GetAllocator({})->AllocateRaw( Allocator::kAllocatorAlignment, size); + if (!data) { + return errors::ResourceExhausted( + "OOM when allocating temporary tensor with size ", size); + } allocated_.insert(data); return gpu::DeviceMemoryBase(data, size); } @@ -182,8 +190,8 @@ void XlaComputationLaunchContext::PopulateOutputs( // Copy host -> device. (Empty tensors don't have backing buffers.) VLOG(1) << "Constant output tensor on device"; - TF_CHECK_OK( - ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); + OP_REQUIRES_OK( + ctx, ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); const void* src_ptr = DMAHelper::base(&const_tensor); void* dst_ptr = DMAHelper::base(output_tensor); diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index bbb6089ea8..26d4ca0c13 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -191,6 +191,26 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "oom_test", + size = "medium", + srcs = ["oom_test.py"], + disabled_backends = [ + "cpu", + "cpu_ondemand", + ], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:array_ops_gen", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:gradient_checker", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + tf_xla_py_test( name = "conv2d_test", size = "medium", diff --git a/tensorflow/compiler/tests/oom_test.py b/tensorflow/compiler/tests/oom_test.py new file mode 100644 index 0000000000..66be0d61d0 --- /dev/null +++ b/tensorflow/compiler/tests/oom_test.py @@ -0,0 +1,72 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for out-of-memory conditions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.compiler.tests import xla_test +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.platform import googletest + + +class OutOfMemoryTest(xla_test.XLATestCase): + + def testOutputOutOfMemory(self): + """Allocates tensors until out of memory. + + Generates a large rank-1 tensor. The tensor is an output of an XLA + computation, not constant. + + Check that a ResourceExhaustedError is raised and can be caught. + """ + size = 5e8 + with self.test_session(): + # Force the compiled code to not be constant by feeding in an addend. + p = array_ops.placeholder(dtypes.float32, shape=[]) + with self.test_scope(): + # Create a large R1 tensor. + c = array_ops.zeros([size]) + p + + self.assertRaises( + errors.ResourceExhaustedError, lambda: c.eval(feed_dict={p: 1.0})) + + def testConstantOutOfMemory(self): + """Allocates constant tensors until out of memory. + + Generates a large rank-1 tensor and a small rank-1 tensor. The tensors are + constant outputs of an XLA computation, not variable. + + Multiple constant outputs are created, one small, one large. The small + tensor will have already been allocated when the large tensor fails. + + Check that a ResourceExhaustedError is raised and can be caught. + """ + size = 5e8 + with self.test_session() as sess: + with self.test_scope(): + # Create two R1 tensors, size 5 and size n. + b = array_ops.zeros([5]) + c = array_ops.zeros([size]) + e = control_flow_ops.tuple([b, c]) + self.assertRaises(errors.ResourceExhaustedError, lambda: sess.run(e)) + + +if __name__ == "__main__": + googletest.main() -- GitLab From 73a5fb686c10f044f245b27f246ff56f690ada1f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 05:55:32 -0700 Subject: [PATCH 287/960] Minor cosmetic improvement to error message. PiperOrigin-RevId: 189895415 --- tensorflow/contrib/py2tf/converters/lists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/converters/lists.py b/tensorflow/contrib/py2tf/converters/lists.py index 06e1dad8f4..12ebd00062 100644 --- a/tensorflow/contrib/py2tf/converters/lists.py +++ b/tensorflow/contrib/py2tf/converters/lists.py @@ -61,7 +61,7 @@ class ListTransformer(transformer.Base): return templates.replace_as_expression(template, dtype_name=dtype_name) def _pre_populated_list(self, node): - raise NotImplementedError() + raise NotImplementedError('pre-populated lists') def visit_Expr(self, node): node = self.generic_visit(node) -- GitLab From abd5b15ababbb5601f02691620d4d8e094cff64e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 06:57:06 -0700 Subject: [PATCH 288/960] Tensorflow/GCS: Check whether we are running under GCE before trying to obtain auth token from GCE. Before this change, if a process is not running under GCE, the token request to http://metadata server would time out after 3+ minutes of retry. After this change, the check is bypassed, and we return an empty token to the caller. At that point, the caller's request to read/write a file in GCS would either succeed or fail depending on whether the bucket is publicly accessible. PiperOrigin-RevId: 189900977 --- tensorflow/core/platform/cloud/BUILD | 41 +++++ tensorflow/core/platform/cloud/fake_env.cc | 62 +++++++ tensorflow/core/platform/cloud/fake_env.h | 60 +++++++ .../core/platform/cloud/gce_env_utils.cc | 159 ++++++++++++++++++ .../core/platform/cloud/gce_env_utils.h | 29 ++++ .../core/platform/cloud/gcp_env_utils_test.cc | 53 ++++++ .../platform/cloud/google_auth_provider.cc | 11 ++ .../platform/cloud/google_auth_provider.h | 7 +- .../cloud/google_auth_provider_test.cc | 39 +++-- tensorflow/core/platform/env.h | 4 +- 10 files changed, 449 insertions(+), 16 deletions(-) create mode 100644 tensorflow/core/platform/cloud/fake_env.cc create mode 100644 tensorflow/core/platform/cloud/fake_env.h create mode 100644 tensorflow/core/platform/cloud/gce_env_utils.cc create mode 100644 tensorflow/core/platform/cloud/gce_env_utils.h create mode 100644 tensorflow/core/platform/cloud/gcp_env_utils_test.cc diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 21636641e7..e43639e9c7 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -79,6 +79,18 @@ cc_library( ], ) +cc_library( + name = "gce_env_utils", + srcs = ["gce_env_utils.cc"], + hdrs = ["gce_env_utils.h"], + copts = tf_copts(), + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:lib_internal", + ], +) + cc_library( name = "gcs_file_system", srcs = ["gcs_file_system.cc"], @@ -158,6 +170,7 @@ cc_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":curl_http_request", + ":gce_env_utils", ":oauth_client", ":retrying_utils", "//tensorflow/core:lib", @@ -243,6 +256,21 @@ cc_library( ], ) +cc_library( + name = "fake_env", + srcs = [ + "fake_env.cc", + ], + hdrs = [ + "fake_env.h", + ], + copts = tf_copts(), + deps = [ + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:lib_internal", + ], +) + tf_cc_test( name = "expiring_lru_cache_test", size = "small", @@ -348,6 +376,7 @@ tf_cc_test( "testdata/service_account_credentials.json", ], deps = [ + ":fake_env", ":google_auth_provider", ":http_request_fake", ":oauth_client", @@ -394,3 +423,15 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +tf_cc_test( + name = "gce_env_utils_test", + size = "small", + srcs = ["gcp_env_utils_test.cc"], + deps = [ + ":fake_env", + ":gce_env_utils", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/core/platform/cloud/fake_env.cc b/tensorflow/core/platform/cloud/fake_env.cc new file mode 100644 index 0000000000..221166839e --- /dev/null +++ b/tensorflow/core/platform/cloud/fake_env.cc @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/cloud/fake_env.h" + +namespace tensorflow { +namespace test { + +Status FakeEnv::FakeRandomAccessFile::Read(uint64 offset, size_t n, + StringPiece* result, + char* scratch) const { + CHECK_EQ(offset, 0); + CHECK_EQ(n, 256); + Status s; + string platform; + switch (env_type_) { + case kGoogle: { + platform = "Google\n "; + s = errors::OutOfRange(""); + break; + } + case kGce: { + platform = " Google Compute Engine\n "; + s = errors::OutOfRange(""); + break; + } + case kLocal: { + platform = "HP Linux Workstation"; + s = Status::OK(); + break; + } + case kBad: { + platform = ""; + s = errors::Internal("Expected"); + break; + } + } + strncpy(scratch, platform.data(), strlen(platform.data())); + *result = StringPiece(scratch, platform.length()); + return s; +} + +Status FakeEnv::NewRandomAccessFile(const string& fname, + std::unique_ptr* result) { + result->reset(new FakeRandomAccessFile(env_type_)); + return Status::OK(); +} + +} // namespace test +} // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/fake_env.h b/tensorflow/core/platform/cloud/fake_env.h new file mode 100644 index 0000000000..7c162d9d66 --- /dev/null +++ b/tensorflow/core/platform/cloud/fake_env.h @@ -0,0 +1,60 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_CLOUD_FAKE_ENV_H_ +#define TENSORFLOW_CORE_PLATFORM_CLOUD_FAKE_ENV_H_ + +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { +namespace test { + +/// Env implementation that stubs out the calls to read a file and time. +class FakeEnv : public EnvWrapper { + public: + enum EnvType { + kGoogle, + kGce, + kLocal, + kBad, + }; + + FakeEnv(EnvType env_type) : EnvWrapper(Env::Default()), env_type_(env_type) {} + + class FakeRandomAccessFile : public RandomAccessFile { + public: + FakeRandomAccessFile(EnvType env_type) : env_type_(env_type) {} + + Status Read(uint64 offset, size_t n, StringPiece* result, + char* scratch) const override; + + private: + EnvType env_type_; + }; + + Status NewRandomAccessFile( + const string& fname, std::unique_ptr* result) override; + + uint64 NowSeconds() override { return now; } + uint64 now = 10000; + + private: + EnvType env_type_; +}; + +} // namespace test +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_CLOUD_FAKE_ENV_H_ diff --git a/tensorflow/core/platform/cloud/gce_env_utils.cc b/tensorflow/core/platform/cloud/gce_env_utils.cc new file mode 100644 index 0000000000..d78374c4b8 --- /dev/null +++ b/tensorflow/core/platform/cloud/gce_env_utils.cc @@ -0,0 +1,159 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/cloud/gce_env_utils.h" + +#if defined(PLATFORM_WINDOWS) +#include +#include +#include +#include + +// The order if these includes is important, windows.h has to come first. +// clang-format off +#include // NOLINT +#include // NOLINT +#include // NOLINT +// clang-format on +#else +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/lib/strings/str_util.h" +#endif + +namespace tensorflow { + +constexpr char kExpectedGoogleProductName[] = "Google"; +constexpr char kExpectedGceProductName[] = "Google Compute Engine"; + +constexpr char kWinCheckCommand[] = "powershell.exe"; +constexpr char kWinCheckCommandArgs[] = + "(Get-WmiObject -Class Win32_BIOS).Manufacturer"; + +constexpr char kLinuxProductNameFile[] = "/sys/class/dmi/id/product_name"; + +const size_t kBiosDataBufferSize = 256; + +namespace { + +#if defined(PLATFORM_WINDOWS) + +Status IsRunningOnWinGce(bool* is_running_under_gce) { + *is_running_under_gce = FALSE; + SECURITY_ATTRIBUTES sa; + sa.nLength = sizeof(sa); + sa.lpSecurityDescriptor = NULL; + sa.bInheritHandle = TRUE; + + // Handles to input and output of the pipe connecting us + // to the child process running powershell(). The output of this + // child process will be written to 'process_output_in' and read from + // 'process_output_in'. + HANDLE process_output_out = NULL; + HANDLE process_output_in = NULL; + + // Create the actually pipe connecting us to the child process. + if (!CreatePipe(&process_output_out, &process_output_in, &sa, 0)) { + return errors::Internal("CreatePipe() failed"); + } + if (!SetHandleInformation(process_output_out, HANDLE_FLAG_INHERIT, 0)) { + return errors::Internal("SetHandleInformation() failed"); + } + + PROCESS_INFORMATION pi; + STARTUPINFO si; + DWORD flags = CREATE_NO_WINDOW; + ZeroMemory(&pi, sizeof(pi)); + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags |= STARTF_USESTDHANDLES; + si.hStdInput = NULL; + + // Connect the process to pipe's input. + si.hStdError = process_output_in; + si.hStdOutput = process_output_in; + // Execute (and wait for) powershell command to read the product information + // out of the registry. + TCHAR cmd[kBiosDataBufferSize]; + snprintf(cmd, kBiosDataBufferSize, "%s %s", _T(kWinCheckCommand), + _T(kWinCheckCommandArgs)); + + if (!CreateProcess(NULL, cmd, NULL, NULL, TRUE, flags, NULL, NULL, &si, + &pi)) { + return errors::Internal("CreateProcess() failed"); + } + + WaitForSingleObject(pi.hProcess, INFINITE); + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + // Read data from the pipe. Note that we are reading only kBiosDataBufferSize + // chars. There might be technically more data than that but we are looking + // for Google product identifiers that are much shorter than + // kBiosDataBufferSize. + DWORD dwread = 0; + CHAR buffer[kBiosDataBufferSize]; + if (!ReadFile(process_output_out, buffer, kBiosDataBufferSize, &dwread, + NULL)) { + return errors::Internal("Failed reading from the pipe."); + } + std::string output(buffer, 0, dwread); + // Trim whitespaces + output.erase(output.begin(), + std::find_if(output.begin(), output.end(), + [](int ch) { return !std::isspace(ch); })); + output.erase(std::find_if(output.rbegin(), output.rend(), + [](int ch) { return !std::isspace(ch); }) + .base(), + output.end()); + *is_running_under_gce = + output == kExpectedGceProductName || output == kExpectedGoogleProductName; + return Status::OK(); +} + +#else + +Status IsRunningOnLinuxGce(Env* env, bool* is_running_under_gce) { + std::unique_ptr file; + TF_RETURN_IF_ERROR(env->NewRandomAccessFile(kLinuxProductNameFile, &file)); + char buf[kBiosDataBufferSize + 1]; + std::fill(buf, buf + kBiosDataBufferSize + 1, '\0'); + StringPiece product_name; + const Status s = file->Read(0, kBiosDataBufferSize, &product_name, buf); + if (!s.ok() && !errors::IsOutOfRange(s)) { + // We expect OutOfRange error because bios file doesn't correspond to its + // state size, + return s; + } + str_util::RemoveLeadingWhitespace(&product_name); + str_util::RemoveTrailingWhitespace(&product_name); + *is_running_under_gce = (product_name == kExpectedGceProductName || + product_name == kExpectedGoogleProductName); + return Status::OK(); +} + +#endif + +} // namespace + +Status IsRunningOnGce(Env* env, bool* is_running_under_gce) { + *is_running_under_gce = false; +#if defined(PLATFORM_WINDOWS) + return IsRunningOnWinGce(is_running_under_gce); +#else + return IsRunningOnLinuxGce(env, is_running_under_gce); +#endif +} + +} // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gce_env_utils.h b/tensorflow/core/platform/cloud/gce_env_utils.h new file mode 100644 index 0000000000..25aaeb7db3 --- /dev/null +++ b/tensorflow/core/platform/cloud/gce_env_utils.h @@ -0,0 +1,29 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_CLOUD_GCE_ENV_UTILS_H_ +#define TENSORFLOW_CORE_PLATFORM_CLOUD_GCE_ENV_UTILS_H_ + +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { + +// Check whether the current process is running under GCE. +Status IsRunningOnGce(Env* env, bool* is_running_under_gce); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_CLOUD_GCE_ENV_UTILS_H_ diff --git a/tensorflow/core/platform/cloud/gcp_env_utils_test.cc b/tensorflow/core/platform/cloud/gcp_env_utils_test.cc new file mode 100644 index 0000000000..910397b52b --- /dev/null +++ b/tensorflow/core/platform/cloud/gcp_env_utils_test.cc @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/cloud/gce_env_utils.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/cloud/fake_env.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +namespace { + +TEST(GcpEnvUtils, IsRunningOnGce) { + { + test::FakeEnv env(test::FakeEnv::kGoogle); + bool is_running_on_gcp = false; + TF_EXPECT_OK(IsRunningOnGce(&env, &is_running_on_gcp)); + EXPECT_TRUE(is_running_on_gcp); + } + { + test::FakeEnv env(test::FakeEnv::kGce); + bool is_running_on_gcp = false; + TF_EXPECT_OK(IsRunningOnGce(&env, &is_running_on_gcp)); + EXPECT_TRUE(is_running_on_gcp); + } + { + test::FakeEnv env(test::FakeEnv::kLocal); + bool is_running_on_gcp = false; + TF_EXPECT_OK(IsRunningOnGce(&env, &is_running_on_gcp)); + EXPECT_FALSE(is_running_on_gcp); + } + { + test::FakeEnv env(test::FakeEnv::kBad); + bool is_running_on_gcp = false; + EXPECT_TRUE(errors::IsInternal(IsRunningOnGce(&env, &is_running_on_gcp))); + } +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc index 7e39b63e3e..0e8a620464 100644 --- a/tensorflow/core/platform/cloud/google_auth_provider.cc +++ b/tensorflow/core/platform/cloud/google_auth_provider.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/base64.h" #include "tensorflow/core/platform/cloud/curl_http_request.h" +#include "tensorflow/core/platform/cloud/gce_env_utils.h" #include "tensorflow/core/platform/cloud/retrying_utils.h" #include "tensorflow/core/platform/env.h" @@ -207,6 +208,16 @@ Status GoogleAuthProvider::GetTokenFromFiles() { } Status GoogleAuthProvider::GetTokenFromGce() { + if (!is_running_on_gce_.has_value()) { + bool is_running_on_gce = false; + TF_RETURN_IF_ERROR(IsRunningOnGce(env_, &is_running_on_gce)); + is_running_on_gce_ = is_running_on_gce; + } + if (!is_running_on_gce_.value()) { + // Assume bucket is world-accessible. If not, the access will be rejected. + current_token_ = ""; + return Status::OK(); + } const auto get_token_from_gce = [this]() { std::unique_ptr request(http_request_factory_->Create()); std::vector response_buffer; diff --git a/tensorflow/core/platform/cloud/google_auth_provider.h b/tensorflow/core/platform/cloud/google_auth_provider.h index 00da25a959..79a57ff2a0 100644 --- a/tensorflow/core/platform/cloud/google_auth_provider.h +++ b/tensorflow/core/platform/cloud/google_auth_provider.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_PLATFORM_GOOGLE_AUTH_PROVIDER_H_ #include +#include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/platform/cloud/auth_provider.h" #include "tensorflow/core/platform/cloud/oauth_client.h" #include "tensorflow/core/platform/mutex.h" @@ -46,7 +47,10 @@ class GoogleAuthProvider : public AuthProvider { /// standard gcloud tool's location. Status GetTokenFromFiles() EXCLUSIVE_LOCKS_REQUIRED(mu_); - /// Gets the bearer token from Google Compute Engine environment. + /// Gets the bearer token from Google Compute Engine environment. May return + /// an empty token if the current process is not running under GCE. If that + /// happens the caller will try to use the empty token and either succeed + /// if the resource is publicly accessible or fail with a permissions error. Status GetTokenFromGce() EXCLUSIVE_LOCKS_REQUIRED(mu_); /// Gets the bearer token from the systen env variable, for testing purposes. @@ -57,6 +61,7 @@ class GoogleAuthProvider : public AuthProvider { Env* env_; mutex mu_; string current_token_ GUARDED_BY(mu_); + tensorflow::gtl::optional is_running_on_gce_ GUARDED_BY(mu_); uint64 expiration_timestamp_sec_ GUARDED_BY(mu_) = 0; // The initial delay for exponential backoffs when retrying failed calls. const int64 initial_retry_delay_usec_; diff --git a/tensorflow/core/platform/cloud/google_auth_provider_test.cc b/tensorflow/core/platform/cloud/google_auth_provider_test.cc index 4281c6c737..55829f84d9 100644 --- a/tensorflow/core/platform/cloud/google_auth_provider_test.cc +++ b/tensorflow/core/platform/cloud/google_auth_provider_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/platform/cloud/fake_env.h" #include "tensorflow/core/platform/cloud/http_request_fake.h" #include "tensorflow/core/platform/test.h" @@ -26,14 +27,6 @@ namespace { constexpr char kTestData[] = "core/platform/cloud/testdata/"; -class FakeEnv : public EnvWrapper { - public: - FakeEnv() : EnvWrapper(Env::Default()) {} - - uint64 NowSeconds() override { return now; } - uint64 now = 10000; -}; - class FakeOAuthClient : public OAuthClient { public: Status GetTokenFromServiceAccountJson( @@ -89,7 +82,7 @@ TEST_F(GoogleAuthProviderTest, EnvironmentVariable_Caching) { auto oauth_client = new FakeOAuthClient; std::vector requests; - FakeEnv env; + test::FakeEnv env(test::FakeEnv::kGoogle); GoogleAuthProvider provider(std::unique_ptr(oauth_client), std::unique_ptr( new FakeHttpRequestFactory(&requests)), @@ -123,7 +116,7 @@ TEST_F(GoogleAuthProviderTest, GCloudRefreshToken) { auto oauth_client = new FakeOAuthClient; std::vector requests; - FakeEnv env; + test::FakeEnv env(test::FakeEnv::kGoogle); GoogleAuthProvider provider(std::unique_ptr(oauth_client), std::unique_ptr( new FakeHttpRequestFactory(&requests)), @@ -169,7 +162,7 @@ TEST_F(GoogleAuthProviderTest, RunningOnGCE) { "token_type":"Bearer" })")}); - FakeEnv env; + test::FakeEnv env(test::FakeEnv::kGoogle); GoogleAuthProvider provider(std::unique_ptr(oauth_client), std::unique_ptr( new FakeHttpRequestFactory(&requests)), @@ -195,7 +188,7 @@ TEST_F(GoogleAuthProviderTest, OverrideForTesting) { auto oauth_client = new FakeOAuthClient; std::vector empty_requests; - FakeEnv env; + test::FakeEnv env(test::FakeEnv::kGoogle); GoogleAuthProvider provider(std::unique_ptr(oauth_client), std::unique_ptr( new FakeHttpRequestFactory(&empty_requests)), @@ -215,7 +208,25 @@ TEST_F(GoogleAuthProviderTest, NothingAvailable) { "Header Metadata-Flavor: Google\n", "", errors::NotFound("404"), 404)}); - FakeEnv env; + test::FakeEnv env(test::FakeEnv::kGoogle); + GoogleAuthProvider provider(std::unique_ptr(oauth_client), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + &env, 0); + + string token; + TF_EXPECT_OK(provider.GetToken(&token)); + EXPECT_EQ("", token); +} + +TEST_F(GoogleAuthProviderTest, AccessingPublicBucket) { + setenv("CLOUDSDK_CONFIG", + io::JoinPath(testing::TensorFlowSrcRoot(), kTestData).c_str(), 1); + + auto oauth_client = new FakeOAuthClient; + std::vector requests; + + test::FakeEnv env(test::FakeEnv::kLocal); GoogleAuthProvider provider(std::unique_ptr(oauth_client), std::unique_ptr( new FakeHttpRequestFactory(&requests)), @@ -223,6 +234,8 @@ TEST_F(GoogleAuthProviderTest, NothingAvailable) { string token; TF_EXPECT_OK(provider.GetToken(&token)); + // We are assuming we are accessing a public bucket (and we are not running + // on GCE) so we an empty token is returned. EXPECT_EQ("", token); } diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h index 4ce4e0b4e0..2a114d47a8 100644 --- a/tensorflow/core/platform/env.h +++ b/tensorflow/core/platform/env.h @@ -88,8 +88,8 @@ class Env { /// The ownership of the returned RandomAccessFile is passed to the caller /// and the object should be deleted when is not used. The file object /// shouldn't live longer than the Env object. - Status NewRandomAccessFile(const string& fname, - std::unique_ptr* result); + virtual Status NewRandomAccessFile(const string& fname, + std::unique_ptr* result); /// \brief Creates an object that writes to a new file with the specified /// name. -- GitLab From 39dd4ee6a3727a0eb30a8d5b8f39390383a1e761 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 21 Mar 2018 07:33:03 -0700 Subject: [PATCH 289/960] [XLA] Initialize arrays using cudaMemset when possible. Previously we were using our own hand-rolled initializer thunk. This worked OK for reduces, because the amount of data we were initializing is usually small. But for e.g. select-and-scatter, it's quite slow. This patch lets us use cudaMemset instead. PiperOrigin-RevId: 189904720 --- tensorflow/compiler/xla/service/gpu/BUILD | 4 + .../xla/service/gpu/ir_emitter_unnested.cc | 129 ++++++++++++------ .../xla/service/gpu/ir_emitter_unnested.h | 10 +- .../compiler/xla/service/gpu/memset_thunk.cc | 39 ++++++ .../compiler/xla/service/gpu/memset_thunk.h | 65 +++++++++ tensorflow/compiler/xla/service/gpu/thunk.h | 2 + tensorflow/compiler/xla/tests/reduce_test.cc | 42 ++++++ 7 files changed, 247 insertions(+), 44 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/memset_thunk.cc create mode 100644 tensorflow/compiler/xla/service/gpu/memset_thunk.h diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index a3b7e10ae8..93b2f2a474 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -241,6 +241,7 @@ cc_library( "gpu_executable.cc", "infeed_thunk.cc", "kernel_thunk.cc", + "memset_thunk.cc", "sequential_thunk.cc", "thunk_schedule.cc", "tuple_thunk.cc", @@ -257,6 +258,7 @@ cc_library( "gpu_executable.h", "infeed_thunk.h", "kernel_thunk.h", + "memset_thunk.h", "sequential_thunk.h", "thunk.h", "thunk_schedule.h", @@ -273,6 +275,7 @@ cc_library( "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:shape_tree", "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", @@ -293,6 +296,7 @@ cc_library( "//tensorflow/core/platform/default/build_config:cudnn_plugin", "//tensorflow/core/platform/default/build_config:cufft_plugin", "//tensorflow/core/platform/default/build_config:stream_executor_cuda", # build_cleaner: keep + "//tensorflow/stream_executor", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 2381d7a7d5..135a607ab9 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include +#include #include #include #include @@ -44,6 +46,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/gpu/ir_emitter_context.h" #include "tensorflow/compiler/xla/service/gpu/kernel_thunk.h" +#include "tensorflow/compiler/xla/service/gpu/memset_thunk.h" #include "tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h" @@ -498,12 +501,11 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { switch (root->opcode()) { case HloOpcode::kReduce: { VLOG(3) << "Emitting fused reduction to vector: " << fusion->ToString(); + TF_ASSIGN_OR_RETURN(std::unique_ptr initializer_thunk, + BuildInitializerThunk(fusion)); std::vector> thunks; - thunks.emplace_back(BuildKernelThunk(fusion)); - TF_RETURN_IF_ERROR(EmitInitializer( - fusion, static_cast(thunks.back().get()))); - bindings_.UnbindAllLocalIrValues(); - thunks.emplace_back(BuildKernelThunk(fusion)); + thunks.push_back(std::move(initializer_thunk)); + thunks.push_back(BuildKernelThunk(fusion)); thunk_sequence_->emplace_back( MakeUnique(std::move(thunks), fusion)); std::vector parameter_arrays; @@ -1635,14 +1637,14 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) { if (IsReductionToVector(*reduce) && // NVPTX backend can't do atomic cmpxchg any narrower than 32 bits 32 <= primitive_util::BitWidth(reduce->shape().element_type())) { + TF_ASSIGN_OR_RETURN(std::unique_ptr initializer_thunk, + BuildInitializerThunk(reduce)); std::vector> thunks; - thunks.emplace_back(BuildKernelThunk(reduce)); - TF_RETURN_IF_ERROR(EmitInitializer( - reduce, static_cast(thunks.back().get()))); - bindings_.UnbindAllLocalIrValues(); - thunks.emplace_back(BuildKernelThunk(reduce)); + thunks.push_back(std::move(initializer_thunk)); + thunks.push_back(BuildKernelThunk(reduce)); thunk_sequence_->emplace_back( MakeUnique(std::move(thunks), reduce)); + return EmitReductionToVector( reduce, input->shape(), [&](const llvm_ir::IrArray::Index& index) { @@ -1706,16 +1708,13 @@ Status IrEmitterUnnested::HandleSelectAndScatter( CHECK_EQ(rank, ShapeUtil::Rank(source->shape())); CHECK_EQ(rank, window.dimensions_size()); - { - std::vector> thunks; - thunks.emplace_back(BuildKernelThunk(select_and_scatter)); - TF_RETURN_IF_ERROR(EmitInitializer( - select_and_scatter, static_cast(thunks.back().get()))); - bindings_.UnbindAllLocalIrValues(); - thunks.emplace_back(BuildKernelThunk(select_and_scatter)); - thunk_sequence_->emplace_back( - MakeUnique(std::move(thunks), select_and_scatter)); - } + TF_ASSIGN_OR_RETURN(std::unique_ptr initializer_thunk, + BuildInitializerThunk(select_and_scatter)); + std::vector> thunks; + thunks.push_back(std::move(initializer_thunk)); + thunks.push_back(BuildKernelThunk(select_and_scatter)); + thunk_sequence_->emplace_back( + MakeUnique(std::move(thunks), select_and_scatter)); // TODO(b/31410564): Implement dilation rate for select-and-scatter. if (window_util::HasDilation(window)) { @@ -2036,7 +2035,7 @@ Status IrEmitterUnnested::HandleGather(HloInstruction* gather) { return Unimplemented("Gather is not implemented on GPUs."); } -std::unique_ptr IrEmitterUnnested::BuildKernelThunk( +std::unique_ptr IrEmitterUnnested::BuildKernelThunk( const HloInstruction* inst) { const BufferAssignment& buffer_assn = ir_emitter_context_->buffer_assignment(); @@ -2260,37 +2259,87 @@ std::unique_ptr IrEmitterUnnested::BuildFftThunk( /*output_shape=*/inst->shape(), inst); } -Status IrEmitterUnnested::EmitInitializer(const HloInstruction* hlo, - KernelThunk* thunk) { +StatusOr> IrEmitterUnnested::BuildInitializerThunk( + const HloInstruction* hlo) { bool fused = HloOpcode::kFusion == hlo->opcode(); - const HloInstruction* inst = fused ? hlo->fused_expression_root() : hlo; - CHECK(inst->opcode() == HloOpcode::kSelectAndScatter || - inst->opcode() == HloOpcode::kReduce); - const HloInstruction* init_value = nullptr; - switch (inst->opcode()) { - case HloOpcode::kSelectAndScatter: - init_value = inst->operand(2); - break; - case HloOpcode::kReduce: - init_value = inst->operand(1); - break; - default: - LOG(FATAL) << "Opcode " << inst->opcode() - << " should not need an initializer."; - } + const HloInstruction* init_value = [&] { + switch (inst->opcode()) { + case HloOpcode::kSelectAndScatter: + return inst->operand(2); + case HloOpcode::kReduce: + return inst->operand(1); + default: + LOG(FATAL) << "Opcode " << inst->opcode() + << " should not need an initializer."; + } + }(); if (fused && init_value->opcode() == HloOpcode::kParameter) { init_value = hlo->operand(init_value->parameter_number()); } - return EmitTargetElementLoopInThunk( + // In the common case, the initializer is a constant. In this case, emit a + // device-memset call if we can. Currently StreamExecutor only supports + // zeroing and 32-bit memsets. + if (init_value->IsConstant()) { + CHECK(ShapeUtil::IsScalar(init_value->shape())); + int64 num_bytes = ShapeUtil::ByteSizeOfElements(init_value->shape()); + const auto& literal = init_value->literal(); + + // Are all the bytes of this scalar equal to 0? If so, we can create a + // MemzeroThunk. + ArraySlice literal_bytes( + reinterpret_cast(literal.untyped_data()), num_bytes); + if (c_all_of(literal_bytes, [](uint8 byte) { return byte == 0; })) { + return {MakeUnique(GetAllocationSlice(*hlo), hlo)}; + } + + // If the literal is 8 or 16 bits wide, we can emit a 32-bit memset by + // repeating the literal 4 or 2 times, so long as the destination buffer is + // an even multiple of 32 bits long. + if ((num_bytes == 1 || num_bytes == 2) && + ShapeUtil::ByteSizeOf(hlo->shape()) % 4 == 0) { + uint16 pattern16; + if (num_bytes == 1) { + uint8 b = literal_bytes.front(); + pattern16 = uint16{b} | (uint16{b} << 8); + } else { + pattern16 = literal_bytes.front(); + } + uint32 pattern32 = uint32{pattern16} | (uint32{pattern16} << 16); + return {MakeUnique(pattern32, + GetAllocationSlice(*hlo), hlo)}; + } + + // If the literal is an even multiple of 32 bits wide, we can emit a 32-bit + // memset so long as all 32-bit words of the scalar are equal to each other. + if (num_bytes >= 4 && num_bytes % 4 == 0 && + memcmp(literal_bytes.data(), literal_bytes.data() + 4, + literal_bytes.size() - 4) == 0) { + uint32 word; + memcpy(&word, literal_bytes.data(), sizeof(word)); + return {MakeUnique(word, GetAllocationSlice(*hlo), + hlo)}; + } + } + + // Otherwise fall back to our slow initializer code. + std::unique_ptr kernel_thunk = BuildKernelThunk(hlo); + TF_RETURN_IF_ERROR(EmitTargetElementLoopInThunk( *hlo, [=](const llvm_ir::IrArray::Index& index) { return GetIrArray(*init_value, *hlo) .EmitReadArrayElement(index, &ir_builder_); }, - thunk); + kernel_thunk.get())); + + // Clean up state left behind by emitting the loop above. (This is normally + // done in IrEmitterUnnested::Postprocess().) + bindings_.UnbindAllLocalIrValues(); + + // Convert unique_ptr to StatusOr>. + return {std::move(kernel_thunk)}; } namespace { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index b83a2337e2..66c62e2d2d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -148,13 +148,10 @@ class IrEmitterUnnested : public IrEmitter { tensorflow::gtl::ArraySlice dimensions_to_reduce, HloComputation* reducer); - // Emits code to initialize buffer of `inst` in given `thunk`. - Status EmitInitializer(const HloInstruction* inst, KernelThunk* thunk); - // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned // Thunk object. - std::unique_ptr BuildKernelThunk(const HloInstruction* inst); + std::unique_ptr BuildKernelThunk(const HloInstruction* inst); // Returns a FftThunk that calls cuFFT to implement `inst`. std::unique_ptr BuildFftThunk(const HloInstruction* inst); @@ -163,6 +160,11 @@ class IrEmitterUnnested : public IrEmitter { // to make sure `inst` outlives the lifetime of the returned Thunk object. std::unique_ptr BuildGemmThunk(const HloInstruction* inst); + // Returns a thunk that, given a reduce or select-and-scatter op, initializes + // its memory to the appropriate initial value. + StatusOr> BuildInitializerThunk( + const HloInstruction* hlo); + // Returns a thunk that calls host-to-device cuMemcpy to implement `inst`. std::unique_ptr BuildHostToDeviceCopyThunk(const HloInstruction* inst); diff --git a/tensorflow/compiler/xla/service/gpu/memset_thunk.cc b/tensorflow/compiler/xla/service/gpu/memset_thunk.cc new file mode 100644 index 0000000000..18e673542c --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/memset_thunk.cc @@ -0,0 +1,39 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/memset_thunk.h" +#include "tensorflow/stream_executor/stream_executor.h" + +namespace xla { +namespace gpu { + +namespace se = ::perftools::gputools; + +Status MemzeroThunk::ExecuteOnStream( + const BufferAllocations& buffer_allocations, se::Stream* stream) { + se::DeviceMemoryBase dest_data = buffer_allocations.GetDeviceAddress(dest_); + stream->ThenMemZero(&dest_data, dest_data.size()); + return Status::OK(); +} + +Status Memset32BitValueThunk::ExecuteOnStream( + const BufferAllocations& buffer_allocations, se::Stream* stream) { + se::DeviceMemoryBase dest_data = buffer_allocations.GetDeviceAddress(dest_); + stream->ThenMemset32(&dest_data, value_, dest_data.size()); + return Status::OK(); +} + +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/memset_thunk.h b/tensorflow/compiler/xla/service/gpu/memset_thunk.h new file mode 100644 index 0000000000..b4bb74d1dd --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/memset_thunk.h @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_MEMSET_THUNK_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_MEMSET_THUNK_H_ + +#include "tensorflow/compiler/xla/service/buffer_assignment.h" +#include "tensorflow/compiler/xla/service/gpu/thunk.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/stream_executor/stream_executor.h" + +// This file contains thunks that set a buffer's elements to a particular value. +// This can be faster than emitting a kernel to set the elements. + +namespace xla { +namespace gpu { + +// Thunk that zeroes out a given chunk of memory. +class MemzeroThunk : public Thunk { + public: + explicit MemzeroThunk(const BufferAllocation::Slice& dest, + const HloInstruction* hlo) + : Thunk(Kind::kMemzero, hlo), dest_(dest) {} + + Status ExecuteOnStream(const BufferAllocations& buffer_allocations, + perftools::gputools::Stream* stream) override; + + private: + const BufferAllocation::Slice dest_; +}; + +// Thunk that sets a given chunk of memory to a particular 32-bit value. The +// destination chunk must have size divisible by 32 bits. +class Memset32BitValueThunk : public Thunk { + public: + explicit Memset32BitValueThunk(uint32 value, + const BufferAllocation::Slice& dest, + const HloInstruction* hlo) + : Thunk(Kind::kMemset32BitValue, hlo), value_(value), dest_(dest) {} + + Status ExecuteOnStream(const BufferAllocations& buffer_allocations, + perftools::gputools::Stream* stream) override; + + private: + uint32 value_; + const BufferAllocation::Slice dest_; +}; + +} // namespace gpu +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_MEMSET_THUNK_H_ diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h index 2c3032d79b..9eea958d12 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk.h +++ b/tensorflow/compiler/xla/service/gpu/thunk.h @@ -51,6 +51,8 @@ class Thunk { kGemm, kInfeed, kKernel, + kMemset32BitValue, + kMemzero, kSequential, kTuple, kWhile, diff --git a/tensorflow/compiler/xla/tests/reduce_test.cc b/tensorflow/compiler/xla/tests/reduce_test.cc index 50d7b5074d..3a097a01ab 100644 --- a/tensorflow/compiler/xla/tests/reduce_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_test.cc @@ -884,5 +884,47 @@ XLA_TEST_F(ReduceTest, ReduceOrPredR2_64x32_To_R1) { RunR2ToR1PredTest(/*and_reduce=false*/ false, /*rows=64*/ 64); } +// Tests reductions with different initial values. There's no test macro that +// combines TYPED_TEST and TYPED_P, so we have to do it manually. +class ReduceInitializerTest : public ReduceTest { + protected: + template + void DoTest(T initializer, int num_elems) { + ComputationBuilder builder(client_, TestName()); + Computation max_fn = CreateScalarMaxComputation( + primitive_util::NativeToPrimitiveType(), &builder); + + auto init = builder.ConstantR0(initializer); + std::vector input_arr(num_elems, std::numeric_limits::lowest()); + auto input_literal = Literal::CreateR1(input_arr); + auto input_data = + client_->TransferToServer(*input_literal).ConsumeValueOrDie(); + builder.Reduce(builder.Parameter(0, input_literal->shape(), "input"), init, + max_fn, {0}); + + ComputeAndCompareR0(&builder, initializer, {input_data.get()}); + } +}; + +XLA_TEST_F(ReduceInitializerTest, U8Small) { DoTest(42, 2); } + +XLA_TEST_F(ReduceInitializerTest, U8BigPowerOf2) { DoTest(42, 4096); } + +XLA_TEST_F(ReduceInitializerTest, U8InitializerBigNonPowerOf2) { + DoTest(42, 4095); +} + +XLA_TEST_F(ReduceInitializerTest, U64InitializerZero) { + DoTest(0, 1024); +} + +XLA_TEST_F(ReduceInitializerTest, U64InitializerOne) { + DoTest(1, 1024); +} + +XLA_TEST_F(ReduceInitializerTest, U64InitializerBigValue) { + DoTest(1234556789123, 1024); +} + } // namespace } // namespace xla -- GitLab From 5a1fddfdf20bd978963050c24ac71d7937071ca5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 07:34:28 -0700 Subject: [PATCH 290/960] Install documentation: adds note for virtual env with fish shell PiperOrigin-RevId: 189904848 --- tensorflow/docs_src/install/install_linux.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 88ceca3cda..2741b61bb2 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -131,7 +131,8 @@ Take the following steps to install TensorFlow with Virtualenv: commands:
$ source ~/tensorflow/bin/activate # bash, sh, ksh, or zsh
-    $ source ~/tensorflow/bin/activate.csh  # csh or tcsh
+ $ source ~/tensorflow/bin/activate.csh # csh or tcsh + $ . ~/tensorflow/bin/activate.fish # fish The preceding source command should change your prompt to the following: -- GitLab From 2a9387d771f4ba99ba09b197ede82a6ea9671af0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 08:21:28 -0700 Subject: [PATCH 291/960] Automated g4 rollback of changelist 189888700 PiperOrigin-RevId: 189910239 --- tensorflow/compiler/jit/xla_device_context.cc | 12 ++-- tensorflow/compiler/jit/xla_launch_util.cc | 14 +--- tensorflow/compiler/tests/BUILD | 20 ------ tensorflow/compiler/tests/oom_test.py | 72 ------------------- 4 files changed, 7 insertions(+), 111 deletions(-) delete mode 100644 tensorflow/compiler/tests/oom_test.py diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc index 93e0dbb9b9..88f7c15f0b 100644 --- a/tensorflow/compiler/jit/xla_device_context.cc +++ b/tensorflow/compiler/jit/xla_device_context.cc @@ -36,14 +36,10 @@ XlaDeviceAllocator::~XlaDeviceAllocator() = default; string XlaDeviceAllocator::Name() { return "xla"; } void* XlaDeviceAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { - auto status_or_dmem = backend_->memory_allocator()->Allocate( - device_ordinal_, num_bytes, /*retry_on_failure=*/false); - if (!status_or_dmem.status().ok()) { - LOG(ERROR) << "Failed to allocate memory: " - << status_or_dmem.status().ToString(); - return nullptr; - } - se::DeviceMemoryBase dmem = status_or_dmem.ValueOrDie(); + se::DeviceMemoryBase dmem = + backend_->memory_allocator() + ->Allocate(device_ordinal_, num_bytes, /*retry_on_failure=*/false) + .ValueOrDie(); VLOG(2) << "Allocated XLA device tensor " << dmem.opaque() << "(" << num_bytes << ")"; return dmem.opaque(); diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 21f58c8310..bb7316c60c 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -56,20 +56,12 @@ XlaAllocator::XlaAllocator(const gpu::Platform* platform, OpKernelContext* op_context) : xla::DeviceMemoryAllocator(platform), op_context_(op_context) {} -XlaAllocator::~XlaAllocator() { - for (void* ptr : allocated_) { - op_context_->device()->GetAllocator({})->DeallocateRaw(ptr); - } -} +XlaAllocator::~XlaAllocator() { CHECK(allocated_.empty()); } xla::StatusOr XlaAllocator::Allocate( int device_ordinal, uint64 size, bool retry_on_failure) { void* data = op_context_->device()->GetAllocator({})->AllocateRaw( Allocator::kAllocatorAlignment, size); - if (!data) { - return errors::ResourceExhausted( - "OOM when allocating temporary tensor with size ", size); - } allocated_.insert(data); return gpu::DeviceMemoryBase(data, size); } @@ -190,8 +182,8 @@ void XlaComputationLaunchContext::PopulateOutputs( // Copy host -> device. (Empty tensors don't have backing buffers.) VLOG(1) << "Constant output tensor on device"; - OP_REQUIRES_OK( - ctx, ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); + TF_CHECK_OK( + ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); const void* src_ptr = DMAHelper::base(&const_tensor); void* dst_ptr = DMAHelper::base(output_tensor); diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 26d4ca0c13..bbb6089ea8 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -191,26 +191,6 @@ tf_xla_py_test( ], ) -tf_xla_py_test( - name = "oom_test", - size = "medium", - srcs = ["oom_test.py"], - disabled_backends = [ - "cpu", - "cpu_ondemand", - ], - deps = [ - ":xla_test", - "//tensorflow/python:array_ops", - "//tensorflow/python:array_ops_gen", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:gradient_checker", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) - tf_xla_py_test( name = "conv2d_test", size = "medium", diff --git a/tensorflow/compiler/tests/oom_test.py b/tensorflow/compiler/tests/oom_test.py deleted file mode 100644 index 66be0d61d0..0000000000 --- a/tensorflow/compiler/tests/oom_test.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional tests for out-of-memory conditions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.compiler.tests import xla_test -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.platform import googletest - - -class OutOfMemoryTest(xla_test.XLATestCase): - - def testOutputOutOfMemory(self): - """Allocates tensors until out of memory. - - Generates a large rank-1 tensor. The tensor is an output of an XLA - computation, not constant. - - Check that a ResourceExhaustedError is raised and can be caught. - """ - size = 5e8 - with self.test_session(): - # Force the compiled code to not be constant by feeding in an addend. - p = array_ops.placeholder(dtypes.float32, shape=[]) - with self.test_scope(): - # Create a large R1 tensor. - c = array_ops.zeros([size]) + p - - self.assertRaises( - errors.ResourceExhaustedError, lambda: c.eval(feed_dict={p: 1.0})) - - def testConstantOutOfMemory(self): - """Allocates constant tensors until out of memory. - - Generates a large rank-1 tensor and a small rank-1 tensor. The tensors are - constant outputs of an XLA computation, not variable. - - Multiple constant outputs are created, one small, one large. The small - tensor will have already been allocated when the large tensor fails. - - Check that a ResourceExhaustedError is raised and can be caught. - """ - size = 5e8 - with self.test_session() as sess: - with self.test_scope(): - # Create two R1 tensors, size 5 and size n. - b = array_ops.zeros([5]) - c = array_ops.zeros([size]) - e = control_flow_ops.tuple([b, c]) - self.assertRaises(errors.ResourceExhaustedError, lambda: sess.run(e)) - - -if __name__ == "__main__": - googletest.main() -- GitLab From 56054e42a474a527f12f4d8d0b1f37eb1efd189d Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 21 Mar 2018 08:25:34 -0700 Subject: [PATCH 292/960] [tf.contrib CriticalSection] Avoid deadlocks using additional control dependencies on the lock op. PiperOrigin-RevId: 189910726 --- .../python/ops/critical_section_ops.py | 203 ++++++++++++------ .../python/ops/critical_section_test.py | 143 +++++++++++- 2 files changed, 277 insertions(+), 69 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index cc19372acf..1893d7b466 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -24,10 +24,8 @@ import collections # from tensorflow.core.protobuf import critical_section_pb2 from tensorflow.python.eager import context -from tensorflow.python.eager import function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_resource_variable_ops @@ -48,6 +46,26 @@ class _ExecutionSignature( pass +def _identity(x): + """Identity op that recognizes `TensorArray`, `Operation`, and `Tensor`.""" + if isinstance(x, tensor_array_ops.TensorArray): + return x.identity() + elif isinstance(x, ops.Operation): + return control_flow_ops.group(x) + elif context.executing_eagerly() and x is None: + return None + else: + return array_ops.identity(x) + + +def _get_colocation(op): + """Get colocation symbol from op, if any.""" + try: + return op.get_attr("_class") + except ValueError: + return None + + class CriticalSection(object): """Critical section. @@ -180,8 +198,8 @@ class CriticalSection(object): The tensors returned from `fn(*args, **kwargs)`. Raises: - ValueError: If `fn` attempts to use this `CriticalSection` in any nested - way. + ValueError: If `fn` attempts to lock this `CriticalSection` in any nested + or lazy way that may cause a deadlock. ValueError: If `exclusive_resource_access` is not provided (is `True`) and another `CriticalSection` has an execution requesting the same resources as in `*args`, `**kwargs`, and any additionaly captured @@ -193,69 +211,52 @@ class CriticalSection(object): exclusive_resource_access = kwargs.pop("exclusive_resource_access", True) with ops.name_scope(name, "critical_section_execute", []): - lock = gen_resource_variable_ops.mutex_lock(self._handle) - - with ops.control_dependencies([lock]): - c_known_ops = set() - c_captured_tensors = set() - def add_op_internal(op): - c_known_ops.add(op) - for i in op.inputs: - if i.op not in c_known_ops: - c_captured_tensors.add(i) + # Ensure that mutex locking only happens *after* all args and + # kwargs have been executed. This avoids certain types of deadlocks. + lock = gen_resource_variable_ops.mutex_lock(self._handle) - c = function.HelperContext(add_op_internal) - with c: + if not context.executing_eagerly(): + # NOTE(ebrevdo): This is to ensure we don't pick up spurious + # Operations created by other threads. + with ops.get_default_graph()._lock: # pylint: disable=protected-access + existing_ops = ops.get_default_graph().get_operations() + with ops.control_dependencies([lock]): + r = fn(*args, **kwargs) + # TODO(ebrevdo): If creating critical sections in a python loop, this + # makes graph creation time quadratic. Revisit if this + # becomes a problem. + created_ops = (set(ops.get_default_graph().get_operations()) + .difference(existing_ops)) + else: + with ops.control_dependencies([lock]): r = fn(*args, **kwargs) - resource_inputs = set([ - x for x in - list(nest.flatten(args)) + nest.flatten(kwargs.values()) + - list(c_captured_tensors) - if tensor_util.is_tensor(x) and x.dtype == dtypes.resource]) - - if self._handle in resource_inputs: - raise ValueError("The function fn attempts to access the " - "CriticalSection in which it would be running. " - "This is illegal and would cause deadlocks. " - "CriticalSection: %s." % self._handle) - if not context.executing_eagerly(): - # Collections and op introspection does not work in eager - # mode. This is generally ok; since eager mode (as of - # writing) executes sequentially anyway. - for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): - sg_handle_name = ops.convert_to_tensor(sg.handle).name - self_handle_name = ops.convert_to_tensor(self._handle).name - if sg_handle_name == self_handle_name: - # Other executions in the same critical section are allowed. - continue - if not (exclusive_resource_access or sg.exclusive_resource_access): - # Neither execution requested exclusive access. - continue - resource_intersection = resource_inputs.intersection(sg.resources) - if resource_intersection: - raise ValueError( - "This execution would access resources: %s. Either this " - "lock (CriticalSection: %s) or lock '%s' " - "(CriticalSection: %s) requested exclusive resource access " - "of this resource. Did you mean to call execute with keyword " - "argument exclusive_resource_access=False?" % - (list(resource_intersection), self._handle.name, - sg.op.name, sg.handle.name)) - - def identity(x): # pylint: disable=invalid-name - if isinstance(x, tensor_array_ops.TensorArray): - return x.identity() - elif isinstance(x, ops.Operation): - return control_flow_ops.group(x) - elif context.executing_eagerly() and x is None: - return None - else: - return array_ops.identity(x) - - r_flat = [identity(x) for x in nest.flatten(r)] + self._add_control_dependencies_to_lock(created_ops, lock.op) + + # captured_resources is a list of resources that are directly + # accessed only by ops created during fn(), not by any + # ancestors of those ops in the graph. + captured_resources = set([ + input_ for op in created_ops + for input_ in op.inputs + if input_.dtype == dtypes.resource + ]) + + # NOTE(ebrevdo): The only time self._is_self_handle() is True + # in this call is if one of the recently created ops, within + # the execute(), themselves attempt to access the + # CriticalSection. This will cause a deadlock. + if any(self._is_self_handle(x) for x in captured_resources): + raise ValueError("The function fn attempts to directly access the " + "CriticalSection in which it would be running. " + "This is illegal and would cause deadlocks.") + + self._check_multiple_access_to_resources( + captured_resources, exclusive_resource_access) + + r_flat = [_identity(x) for x in nest.flatten(r)] with ops.control_dependencies(r_flat): # The identity must run on the same machine as self._handle @@ -268,23 +269,93 @@ class CriticalSection(object): # Make sure that if any element of r is accessed, all of # them are executed together. - r = nest.pack_sequence_as( - r, control_flow_ops.tuple(nest.flatten(r))) + r = nest.pack_sequence_as(r, control_flow_ops.tuple(nest.flatten(r))) with ops.control_dependencies([ensure_lock_exists]): - outputs = nest.map_structure(identity, r) + outputs = nest.map_structure(_identity, r) if not context.executing_eagerly(): signature = _ExecutionSignature( op=lock.op, handle=self._handle, - resources=list(resource_inputs), + resources=list(captured_resources), exclusive_resource_access=exclusive_resource_access) ops.add_to_collections( CRITICAL_SECTION_EXECUTIONS, signature) return outputs + def _add_control_dependencies_to_lock(self, created_ops, lock_op): + """To avoid deadlocks, all args must be executed before lock_op.""" + # Get all arguments (explicit and captured) of all ops created by fn(). + all_args = set([input_.op for op in created_ops for input_ in op.inputs]) + all_args.update( + input_op for op in created_ops for input_op in op.control_inputs) + # Unfortunately, we can't use sets throughout because TF seems to + # create new Operation objects for the same op sometimes; and we + # can't rely on id(op). + + # pylint: disable=protected-access + all_args_dict = dict((op._id, op) for op in all_args) + + # Remove ops created within fn, or that lock_op already has a + # control dependency on. Also remove a possible self-loop. + for op in created_ops: + all_args_dict.pop(op._id, None) + for op in lock_op.control_inputs: + all_args_dict.pop(op._id, None) + for input_ in lock_op.inputs: + all_args_dict.pop(input_.op._id, None) + all_args_dict.pop(lock_op._id, None) + + lock_op._add_control_inputs(all_args_dict.values()) + # pylint: enable=protected-access + + def _is_self_handle(self, x): + """Check if the tensor `x` is the same Mutex as `self._handle`.""" + return (x.op.type == "MutexV2" + # blank shared_name means the op will create a unique one. + and x.op.get_attr("shared_name") + and (x.op.get_attr("shared_name") == + self._handle.op.get_attr("shared_name")) + and (x.op.device == self._handle.op.device + or _get_colocation(x.op) == _get_colocation(self._handle.op))) + + def _check_multiple_access_to_resources( + self, captured_resources, exclusive_resource_access): + """Raise if captured_resources are accessed by another CriticalSection. + + Args: + captured_resources: Set of tensors of type resource. + exclusive_resource_access: Whether this execution requires exclusive + resource access. + + Raises: + ValueError: If any tensors in `captured_resources` are also accessed + by another `CriticalSection`, and at least one of them requires + exclusive resource access. + """ + # Collections and op introspection does not work in eager + # mode. This is generally ok; since eager mode (as of + # writing) executes sequentially anyway. + for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): + if self._is_self_handle(sg.handle): + # Other executions in the same critical section are allowed. + continue + if not (exclusive_resource_access or sg.exclusive_resource_access): + # Neither execution requested exclusive access. + continue + resource_intersection = captured_resources.intersection(sg.resources) + if resource_intersection: + raise ValueError( + "This execution would access resources: %s. Either this " + "lock (CriticalSection: %s) or lock '%s' " + "(CriticalSection: %s) requested exclusive resource access " + "of this resource. Did you mean to call execute with keyword " + "argument exclusive_resource_access=False?" % + (list(resource_intersection), self._handle.name, + sg.op.name, sg.handle.name)) + # TODO(ebrevdo): Re-enable once CriticalSection is in core. # def to_proto(self, export_scope=None): diff --git a/tensorflow/contrib/framework/python/ops/critical_section_test.py b/tensorflow/contrib/framework/python/ops/critical_section_test.py index c916592ce1..e24140bd72 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_test.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_test.py @@ -25,6 +25,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging as logging # TODO(ebrevdo): Re-enable once CriticalSection is in core. # from tensorflow.python.training import saver as saver_lib @@ -37,7 +38,7 @@ class CriticalSectionTest(test.TestCase): v = resource_variable_ops.ResourceVariable(0.0, name="v") def fn(a, b): - c = v.read_value() + c = v.value() with ops.control_dependencies([c]): nv = v.assign_add(a * b) with ops.control_dependencies([nv]): @@ -140,15 +141,151 @@ class CriticalSectionTest(test.TestCase): ops.get_collection(critical_section_ops.CRITICAL_SECTION_EXECUTIONS)]) def testRecursiveCriticalSectionAccessIsIllegal(self): + # This does not work properly in eager mode. Eager users will + # just hit a deadlock if they do this. But at least it'll be easier + # to debug. + cs = critical_section_ops.CriticalSection() + def fn(x): + return cs.execute(lambda y: y + 1, x) + with self.assertRaisesRegexp( + ValueError, + r"attempts to directly access the CriticalSection in which it " + r"would be running"): + cs.execute(fn, 1.0) + + def testRecursiveCriticalSectionAccessViaCapturedTensorIsProtected(self): + # This one is subtle; and we're being overly cautious here. The + # deadlock we are ensuring we catch is: + # + # to_capture = CS[lambda x: x + 1](1.0) + # deadlocked = CS[lambda x: x + to_capture](1.0) + # + # This would have caused a deadlock because executing `deadlocked` will + # lock the mutex on CS; but then due to dependencies, will attempt + # to compute `to_capture`. This computation requires locking CS, + # but that is not possible now because CS is already locked by + # `deadlocked`. + # + # We check that CriticalSection.execute properly inserts new + # control dependencies to its lock to ensure all captured + # operations are finished before anything runs within the critical section. + cs = critical_section_ops.CriticalSection(shared_name="cs") + fn = array_ops.identity + to_capture = cs.execute(fn, 1.0) + fn_captures = lambda x: x + to_capture + to_capture_too = array_ops.identity(to_capture) + + ex_0 = cs.execute(fn_captures, 1.0) + + with ops.control_dependencies([to_capture]): + # This is OK because to_capture will execute before this next call + ex_1 = cs.execute(fn_captures, 1.0) + + dependency = array_ops.identity(to_capture) + + fn_captures_dependency = lambda x: x + dependency + + ex_2 = cs.execute(fn_captures_dependency, 1.0) + + with ops.control_dependencies([to_capture_too]): + ex_3 = cs.execute(fn_captures_dependency, 1.0) + + # Ensure there's no actual deadlock on to_execute. + self.assertEquals(2.0, self.evaluate(ex_0)) + self.assertEquals(2.0, self.evaluate(ex_1)) + self.assertEquals(2.0, self.evaluate(ex_2)) + self.assertEquals(2.0, self.evaluate(ex_3)) + + def testRecursiveCriticalSectionAccessWithinLoopIsProtected(self): + cs = critical_section_ops.CriticalSection(shared_name="cs") + + def body_implicit_capture(i, j): + # This would have caused a deadlock if not for logic in execute + # that inserts additional control dependencies onto the lock op: + # * Loop body argument j is captured by fn() + # * i is running in parallel to move forward the execution + # * j is not being checked by the predicate function + # * output of cs.execute() is returned as next j. + fn = lambda: j + 1 + return (i + 1, cs.execute(fn)) + + (i_n, j_n) = control_flow_ops.while_loop( + lambda i, _: i < 1000, + body_implicit_capture, + [0, 0], + parallel_iterations=25) + logging.warn( + "\n==============\nRunning " + "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " + "body_implicit_capture'\n" + "==============\n") + self.assertEquals((1000, 1000), self.evaluate((i_n, j_n))) + logging.warn( + "\n==============\nSuccessfully finished running " + "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " + "body_implicit_capture'\n" + "==============\n") + + def body_implicit_capture_protected(i, j): + # This version is ok because we manually add a control + # dependency on j, which is an argument to the while_loop body + # and captured by fn. + fn = lambda: j + 1 + with ops.control_dependencies([j]): + return (i + 1, cs.execute(fn)) + + (i_n, j_n) = control_flow_ops.while_loop( + lambda i, _: i < 1000, + body_implicit_capture_protected, + [0, 0], + parallel_iterations=25) + logging.warn( + "\n==============\nRunning " + "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " + "body_implicit_capture_protected'\n" + "==============\n") + self.assertEquals((1000, 1000), self.evaluate((i_n, j_n))) + logging.warn( + "\n==============\nSuccessfully finished running " + "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " + "body_implicit_capture_protected'\n" + "==============\n") + + def body_args_capture(i, j): + # This version is ok because j is an argument to fn and we can + # ensure there's a control dependency on j. + fn = lambda x: x + 1 + return (i + 1, cs.execute(fn, j)) + + (i_n, j_n) = control_flow_ops.while_loop( + lambda i, _: i < 1000, + body_args_capture, + [0, 0], + parallel_iterations=25) + logging.warn( + "\n==============\nRunning " + "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " + "body_args_capture'\n" + "==============\n") + self.assertEquals((1000, 1000), self.evaluate((i_n, j_n))) + logging.warn( + "\n==============\nSuccessfully finished running " + "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock " + "body_args_capture'\n" + "==============\n") + + def testRecursiveCriticalSectionAccessIsIllegalSameSharedName(self): # This does not work properly in eager mode. Eager users will # just hit a deadlock if they do this. But at least it'll be easier # to debug. cs = critical_section_ops.CriticalSection(shared_name="cs") + cs_same = critical_section_ops.CriticalSection(shared_name="cs") def fn(x): - return cs.execute(lambda x: x+1, x) + return cs_same.execute(lambda x: x+1, x) with self.assertRaisesRegexp( ValueError, - r"attempts to access the CriticalSection in which it would be running"): + r"attempts to directly access the CriticalSection in which it " + r"would be running"): cs.execute(fn, 1.0) def testMultipleCSExecutionsRequestSameResource(self): -- GitLab From 73cea1b095c0211b532663ea5edf0dc50ff5a448 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 21 Mar 2018 08:40:35 -0700 Subject: [PATCH 293/960] More accurate shape inference for TensorArrayGatherV3 and TensorArrayScatterV3 PiperOrigin-RevId: 189912762 --- tensorflow/core/ops/data_flow_ops.cc | 37 +++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc index 4f946fb3ca..3112f35da4 100644 --- a/tensorflow/core/ops/data_flow_ops.cc +++ b/tensorflow/core/ops/data_flow_ops.cc @@ -668,13 +668,31 @@ REGISTER_OP("TensorArrayGatherV3") .Attr("dtype: type") .Attr("element_shape: shape = { unknown_rank: true }") .SetShapeFn([](InferenceContext* c) { + ShapeHandle indices; ShapeHandle unused; DimensionHandle unused_dim; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &indices)); TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim)); TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - return shape_inference::UnknownShape(c); + auto shapes = c->input_handle_shapes_and_types(0); + if (shapes != nullptr && !shapes->empty()) { + ShapeHandle tensor_shape = shapes->at(0).shape; + ShapeHandle output_shape; + TF_RETURN_IF_ERROR( + c->Concatenate(indices, tensor_shape, &output_shape)); + c->set_output(0, output_shape); + return Status::OK(); + } else { + PartialTensorShape p; + TF_RETURN_IF_ERROR(c->GetAttr("element_shape", &p)); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(p, &s)); + ShapeHandle output_shape; + TF_RETURN_IF_ERROR(c->Concatenate(indices, s, &output_shape)); + c->set_output(0, output_shape); + return Status::OK(); + } }); REGISTER_OP("TensorArrayScatterV3") @@ -685,12 +703,25 @@ REGISTER_OP("TensorArrayScatterV3") .Output("flow_out: float") .Attr("T: type") .SetShapeFn([](InferenceContext* c) { + ShapeHandle indices; ShapeHandle unused; DimensionHandle unused_dim; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &indices)); TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), 0), 2, &unused_dim)); TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + ShapeHandle value_shape; + // Assert that the length of the indices tensor is equal to the first + // dimension of the value tensor. + TF_RETURN_IF_ERROR( + c->MergePrefix(c->input(2), indices, &value_shape, &indices)); + auto shapes = c->input_handle_shapes_and_types(0); + if (shapes != nullptr && !shapes->empty()) { + ShapeHandle tensor_shape = shapes->at(0).shape; + ShapeHandle fed_shape; + TF_RETURN_IF_ERROR(c->Subshape(value_shape, 1, &fed_shape)); + TF_RETURN_IF_ERROR(c->Merge(tensor_shape, fed_shape, &fed_shape)); + } return shape_inference::ScalarShape(c); }); -- GitLab From 8337e1778a485102494f99d1924dda546daef4a9 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 21 Mar 2018 08:45:19 -0700 Subject: [PATCH 294/960] Refactor pruning code to support custom node rewrites for feeds and fetches. PiperOrigin-RevId: 189913309 --- .../common_runtime/graph_execution_state.cc | 49 ++- .../common_runtime/graph_execution_state.h | 5 + tensorflow/core/graph/subgraph.cc | 354 ++++++++++-------- tensorflow/core/graph/subgraph.h | 108 +++++- 4 files changed, 333 insertions(+), 183 deletions(-) diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index f5e3d78242..2f17af273f 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -237,6 +237,42 @@ void GraphExecutionState::RestoreStatefulNodes(Graph* graph) { } } +Status GraphExecutionState::PruneGraph( + const BuildGraphOptions& options, Graph* graph, + subgraph::RewriteGraphMetadata* out_rewrite_metadata) { + std::vector> feed_rewrites; + feed_rewrites.reserve(options.callable_options.feed_size()); + std::vector> fetch_rewrites; + fetch_rewrites.reserve(options.callable_options.fetch_size()); + const DeviceAttributes* device_info = + &device_set_->client_device()->attributes(); + if (options.use_function_convention) { + for (int i = 0; i < options.callable_options.feed_size(); ++i) { + feed_rewrites.emplace_back(new subgraph::ArgFeedRewrite( + &options.callable_options.feed(i), device_info, i)); + } + for (int i = 0; i < options.callable_options.fetch_size(); ++i) { + fetch_rewrites.emplace_back(new subgraph::RetvalFetchRewrite( + &options.callable_options.fetch(i), device_info, i)); + } + } else { + for (const string& feed : options.callable_options.feed()) { + feed_rewrites.emplace_back( + new subgraph::RecvFeedRewrite(&feed, device_info)); + } + for (const string& fetch : options.callable_options.fetch()) { + fetch_rewrites.emplace_back( + new subgraph::SendFetchRewrite(&fetch, device_info)); + } + } + std::vector target_node_names( + options.callable_options.target().begin(), + options.callable_options.target().end()); + return subgraph::RewriteGraphForExecution(graph, feed_rewrites, + fetch_rewrites, target_node_names, + out_rewrite_metadata); +} + Status GraphExecutionState::InitBaseGraph(const BuildGraphOptions& options) { const GraphDef* graph_def = &original_graph_def_; @@ -251,10 +287,8 @@ Status GraphExecutionState::InitBaseGraph(const BuildGraphOptions& options) { session_options_->config.graph_options().place_pruned_graph()) { // Rewrite the graph before placement. rewrite_metadata_.reset(new subgraph::RewriteGraphMetadata); - TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( - new_graph.get(), options.callable_options, - device_set_->client_device()->attributes(), - options.use_function_convention, rewrite_metadata_.get())); + TF_RETURN_IF_ERROR( + PruneGraph(options, new_graph.get(), rewrite_metadata_.get())); } // Save stateful placements before placing. @@ -404,12 +438,7 @@ Status GraphExecutionState::BuildGraph(const BuildGraphOptions& options, subgraph::RewriteGraphMetadata rewrite_metadata; if (session_options_ == nullptr || !session_options_->config.graph_options().place_pruned_graph()) { - // Extract the subset of the graph that needs to be run, adding feed/fetch - // ops as needed. - TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( - ng.get(), options.callable_options, - device_set_->client_device()->attributes(), - options.use_function_convention, &rewrite_metadata)); + TF_RETURN_IF_ERROR(PruneGraph(options, ng.get(), &rewrite_metadata)); } else { // This GraphExecutionState represents a graph that was // pruned when this was constructed, so we copy the metadata from diff --git a/tensorflow/core/common_runtime/graph_execution_state.h b/tensorflow/core/common_runtime/graph_execution_state.h index 2312e1a89f..2154ef5bd3 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.h +++ b/tensorflow/core/common_runtime/graph_execution_state.h @@ -177,6 +177,11 @@ class GraphExecutionState { void SaveStatefulNodes(Graph* graph); void RestoreStatefulNodes(Graph* graph); + // Extract the subset of the graph that needs to be run, adding feed/fetch + // ops as needed. + Status PruneGraph(const BuildGraphOptions& options, Graph* graph, + subgraph::RewriteGraphMetadata* out_rewrite_metadata); + Status OptimizeGraph(const BuildGraphOptions& options, std::unique_ptr* optimized_graph); diff --git a/tensorflow/core/graph/subgraph.cc b/tensorflow/core/graph/subgraph.cc index ca93d049d0..193cf88aed 100644 --- a/tensorflow/core/graph/subgraph.cc +++ b/tensorflow/core/graph/subgraph.cc @@ -28,13 +28,13 @@ limitations under the License. #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/graph_constructor.h" -#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/tensor_id.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { +namespace subgraph { // ---------------------------------------------------------------------------- // Subgraph construction-related routines @@ -44,6 +44,8 @@ namespace tensorflow { namespace { +typedef std::unordered_map NameIndex; + // Rewrite graph by replacing the output tensors specified in // "fed_outputs" with special feed nodes for each specified output // tensor, and removing any nodes that are now disconnected from the @@ -53,59 +55,33 @@ namespace { // Return true on success. On error, return false and sets *error to // an appropriate error message (and *g is left in an indeterminate // state). -static Status FeedInputs(Graph* g, const DeviceAttributes& device_info, - const gtl::ArraySlice& fed_outputs, - bool use_function_convention, - subgraph::NameIndex* name_index, - DataTypeVector* out_feed_types) { +Status FeedInputs( + Graph* g, const std::vector>& feed_rewrites, + NameIndex* name_index, DataTypeVector* out_feed_types) { out_feed_types->clear(); - out_feed_types->reserve(fed_outputs.size()); - for (size_t i = 0; i < fed_outputs.size(); ++i) { - const string& t = fed_outputs[i]; + out_feed_types->reserve(feed_rewrites.size()); + for (size_t i = 0; i < feed_rewrites.size(); ++i) { + const string& t = feed_rewrites[i]->endpoint_name(); TensorId id(ParseTensorName(t)); auto iter = name_index->find(id.first); if (iter == name_index->end()) { return errors::NotFound("FeedInputs: unable to find feed output ", t); } - const Node* n = iter->second; + Node* n = iter->second; DCHECK_EQ(n->name(), id.first); if (id.second >= n->num_outputs()) { return errors::InvalidArgument( "FeedInputs: ", t, " should have output index < ", n->num_outputs()); } - Node* recv_node; - - if (!use_function_convention) { - TF_RETURN_IF_ERROR( - NodeBuilder(strings::StrCat("_recv_", id.first, "_", id.second), - "_Recv") - .Attr("tensor_type", BaseType(n->output_type(id.second))) - .Attr("tensor_name", t) - .Attr("send_device", device_info.name()) - .Attr("recv_device", device_info.name()) - .Attr("send_device_incarnation", - static_cast(device_info.incarnation())) - .Attr("client_terminated", true) - .Finalize(g, &recv_node)); - } else { - // NOTE(mrry): We must include the index as part of the node - // name, because _Arg is a "stateful" kernel and therefore - // its name must uniquely identify a kernel instance across all - // graphs in the same session. - TF_RETURN_IF_ERROR(NodeBuilder(strings::StrCat("_arg_", id.first, "_", - id.second, "_", i), - "_Arg") - .Attr("T", BaseType(n->output_type(id.second))) - .Attr("index", static_cast(i)) - .Finalize(g, &recv_node)); - } - recv_node->set_assigned_device_name(device_info.name()); + Node* feed_node; + TF_RETURN_IF_ERROR( + feed_rewrites[i]->AddNode(g, {n, id.second}, &feed_node)); // Update name_index - (*name_index)[recv_node->name()] = recv_node; - g->AddControlEdge(g->source_node(), recv_node); + (*name_index)[feed_node->name()] = feed_node; + g->AddControlEdge(g->source_node(), feed_node); // Look through edges coming out of "n" for edges whose src_output() index // matches "output_index". If found, replace the edges with a connection @@ -119,7 +95,7 @@ static Status FeedInputs(Graph* g, const DeviceAttributes& device_info, n->type_string() == "PlaceholderV2")) { // When feeding a Placeholder node, any outgoing control edges // will be replaced with a control edge from the replacement - // recv_node. + // feed_node. // TODO(josh11b,mrry): Come up with a more elegant way of addressing // the general version of this problem. to_remove.emplace_back(e); @@ -128,10 +104,10 @@ static Status FeedInputs(Graph* g, const DeviceAttributes& device_info, for (const Edge* e : to_remove) { if (e->src_output() == id.second) { - g->AddEdge(recv_node, 0, e->dst(), e->dst_input()); + g->AddEdge(feed_node, 0, e->dst(), e->dst_input()); } else { CHECK_EQ(Graph::kControlSlot, e->src_output()); - g->AddControlEdge(recv_node, e->dst()); + g->AddControlEdge(feed_node, e->dst()); } g->RemoveEdge(e); } @@ -140,9 +116,61 @@ static Status FeedInputs(Graph* g, const DeviceAttributes& device_info, return Status::OK(); } -static bool AddNodeToTargets(const string& node_or_tensor_name, - const subgraph::NameIndex& name_index, - std::unordered_set* targets) { +Status FetchOutputs( + Graph* g, const std::vector>& fetch_rewrites, + NameIndex* name_index, std::vector* out_fetch_nodes, + DataTypeVector* out_fetch_types) { + out_fetch_nodes->clear(); + out_fetch_nodes->reserve(fetch_rewrites.size()); + for (size_t i = 0; i < fetch_rewrites.size(); ++i) { + const string& t = fetch_rewrites[i]->endpoint_name(); + + // Parse t into node_name and output_index. + TensorId id(ParseTensorName(t)); + + // Find node in graph with that name. + auto iter = name_index->find(id.first); + if (iter == name_index->end()) { + return errors::NotFound("FetchOutputs node ", t, ": not found"); + } + Node* n = iter->second; + DCHECK_EQ(n->name(), id.first); + VLOG(2) << "Found fetch node for " << t; + + // Validate output_index + if (n->num_outputs() == 0) { + return errors::InvalidArgument( + "Tried to fetch data for '", t, + "', which produces no output. To run to a node but not fetch any " + "data, pass '", + t, + "' as an argument to the 'target_node_names' argument of the " + "Session::Run API."); + } else if (id.second >= n->num_outputs()) { + return errors::InvalidArgument("FetchOutputs ", t, + ": output index too large, must be < ", + n->num_outputs()); + } + + // Create the fetch Node and connect it up + Node* fetch_node; + TF_RETURN_IF_ERROR( + fetch_rewrites[i]->AddNode(g, {n, id.second}, &fetch_node)); + + // Update the index. + (*name_index)[fetch_node->name()] = fetch_node; + + g->AddControlEdge(fetch_node, g->sink_node()); + out_fetch_nodes->push_back(fetch_node); + out_fetch_types->push_back(BaseType(n->output_type(id.second))); + } + + return Status::OK(); +} + +bool AddNodeToTargets(const string& node_or_tensor_name, + const NameIndex& name_index, + std::unordered_set* targets) { TensorId id = ParseTensorName(node_or_tensor_name); auto iter = name_index.find(id.first); if (iter == name_index.end()) { @@ -154,9 +182,9 @@ static bool AddNodeToTargets(const string& node_or_tensor_name, return true; } -static Status PruneForTargets(Graph* g, const subgraph::NameIndex& name_index, - const std::vector& fetch_nodes, - const gtl::ArraySlice& target_nodes) { +Status PruneForTargets(Graph* g, const NameIndex& name_index, + const std::vector& fetch_nodes, + const gtl::ArraySlice& target_nodes) { string not_found; std::unordered_set targets; for (Node* n : fetch_nodes) { @@ -183,108 +211,149 @@ static Status PruneForTargets(Graph* g, const subgraph::NameIndex& name_index, } // namespace -namespace subgraph { +Status ArgFeedRewrite::AddNode(Graph* g, NodeBuilder::NodeOut feed_tensor, + Node** out_node) { + // NOTE(mrry): We must include the index as part of the node + // name, because _Arg is a "stateful" kernel and therefore + // its name must uniquely identify a kernel instance across all + // graphs in the same session. + TF_RETURN_IF_ERROR( + NodeBuilder(strings::StrCat("_arg_", feed_tensor.node->name(), "_", + feed_tensor.index, "_", arg_index_), + "_Arg") + .Attr("T", BaseType(feed_tensor.node->output_type(feed_tensor.index))) + .Attr("index", arg_index_) + .Finalize(g, out_node)); + (*out_node)->set_assigned_device_name(device_info().name()); + return Status::OK(); +} -Status FetchOutputs(Graph* g, const DeviceAttributes& device_info, - const gtl::ArraySlice& fetch_outputs, - bool use_function_convention, NameIndex* name_index, - std::vector* out_fetch_nodes, - DataTypeVector* out_fetch_types) { - out_fetch_nodes->clear(); - out_fetch_nodes->reserve(fetch_outputs.size()); - for (size_t i = 0; i < fetch_outputs.size(); ++i) { - const string& t = fetch_outputs[i]; +Status RecvFeedRewrite::AddNode(Graph* g, NodeBuilder::NodeOut feed_tensor, + Node** out_node) { + TF_RETURN_IF_ERROR( + NodeBuilder(strings::StrCat("_recv_", feed_tensor.node->name(), "_", + feed_tensor.index), + "_Recv") + .Attr("tensor_type", + BaseType(feed_tensor.node->output_type(feed_tensor.index))) + .Attr("tensor_name", endpoint_name()) + .Attr("send_device", device_info().name()) + .Attr("recv_device", device_info().name()) + .Attr("send_device_incarnation", + static_cast(device_info().incarnation())) + .Attr("client_terminated", true) + .Finalize(g, out_node)); + + (*out_node)->set_assigned_device_name(device_info().name()); + return Status::OK(); +} - // Parse t into node_name and output_index. - TensorId id(ParseTensorName(t)); +Status RetvalFetchRewrite::AddNode(Graph* g, NodeBuilder::NodeOut fetch_tensor, + Node** out_node) { + // NOTE(mrry): We must include the index as part of the node + // name, because _Retval is a "stateful" kernel and therefore + // its name must uniquely identify a kernel instance across all + // graphs in the same session. + TF_RETURN_IF_ERROR( + NodeBuilder(strings::StrCat("_retval_", fetch_tensor.node->name(), "_", + fetch_tensor.index, "_", retval_index_), + "_Retval") + .Input(fetch_tensor.node, fetch_tensor.index) + .Attr("T", + BaseType(fetch_tensor.node->output_type(fetch_tensor.index))) + .Attr("index", retval_index_) + .Finalize(g, out_node)); + (*out_node)->set_assigned_device_name(device_info().name()); + return Status::OK(); +} - // Find node in graph with that name. - auto iter = name_index->find(id.first); - if (iter == name_index->end()) { - return errors::NotFound("FetchOutputs node ", t, ": not found"); - } - Node* n = iter->second; - DCHECK_EQ(n->name(), id.first); - VLOG(2) << "Found fetch node for " << t; +Status SendFetchRewrite::AddNode(Graph* g, NodeBuilder::NodeOut fetch_tensor, + Node** out_node) { + TF_RETURN_IF_ERROR( + NodeBuilder(strings::StrCat("_send_", fetch_tensor.node->name(), "_", + fetch_tensor.index), + "_Send") + .Input(fetch_tensor.node, fetch_tensor.index) + .Attr("tensor_name", endpoint_name()) + .Attr("send_device", device_info().name()) + .Attr("recv_device", device_info().name()) + .Attr("send_device_incarnation", + static_cast(device_info().incarnation())) + .Attr("client_terminated", true) + .Finalize(g, out_node)); + (*out_node)->set_assigned_device_name(device_info().name()); + return Status::OK(); +} - // Validate output_index - if (n->num_outputs() == 0) { - return errors::InvalidArgument( - "Tried to fetch data for '", t, - "', which produces no output. To run to a node but not fetch any " - "data, pass '", - t, - "' as an argument to the 'target_node_names' argument of the " - "Session::Run API."); - } else if (id.second >= n->num_outputs()) { - return errors::InvalidArgument("FetchOutputs ", t, - ": output index too large, must be < ", - n->num_outputs()); +Status RewriteGraphForExecution( + Graph* g, const gtl::ArraySlice& fed_outputs, + const gtl::ArraySlice& fetch_outputs, + const gtl::ArraySlice& target_node_names, + const DeviceAttributes& device_info, bool use_function_convention, + RewriteGraphMetadata* out_metadata) { + std::vector> feed_rewrites; + feed_rewrites.reserve(fed_outputs.size()); + if (use_function_convention) { + for (size_t i = 0; i < fed_outputs.size(); ++i) { + feed_rewrites.emplace_back(new ArgFeedRewrite( + &fed_outputs[i], &device_info, static_cast(i))); } - - // Create the fetch Node and connect it up - Node* send_node; - if (!use_function_convention) { - TF_RETURN_IF_ERROR( - NodeBuilder(strings::StrCat("_send_", id.first, "_", id.second), - "_Send") - .Input(n, id.second) - .Attr("tensor_name", t) - .Attr("send_device", device_info.name()) - .Attr("recv_device", device_info.name()) - .Attr("send_device_incarnation", - static_cast(device_info.incarnation())) - .Attr("client_terminated", true) - .Finalize(g, &send_node)); - } else { - // NOTE(mrry): We must include the index as part of the node - // name, because _Retval is a "stateful" kernel and therefore - // its name must uniquely identify a kernel instance across all - // graphs in the same session. - TF_RETURN_IF_ERROR(NodeBuilder(strings::StrCat("_retval_", id.first, "_", - id.second, "_", i), - "_Retval") - .Input(n, id.second) - .Attr("T", BaseType(n->output_type(id.second))) - .Attr("index", static_cast(i)) - .Finalize(g, &send_node)); + } else { + for (const string& fed_output : fed_outputs) { + feed_rewrites.emplace_back( + new RecvFeedRewrite(&fed_output, &device_info)); } - send_node->set_assigned_device_name(device_info.name()); - - // Update the index. - (*name_index)[send_node->name()] = send_node; + } - g->AddControlEdge(send_node, g->sink_node()); - out_fetch_nodes->push_back(send_node); - out_fetch_types->push_back(BaseType(n->output_type(id.second))); + std::vector> fetch_rewrites; + fetch_rewrites.reserve(fetch_outputs.size()); + if (use_function_convention) { + for (size_t i = 0; i < fetch_outputs.size(); ++i) { + fetch_rewrites.emplace_back(new RetvalFetchRewrite( + &fetch_outputs[i], &device_info, static_cast(i))); + } + } else { + for (const string& fetch_output : fetch_outputs) { + fetch_rewrites.emplace_back( + new SendFetchRewrite(&fetch_output, &device_info)); + } } - return Status::OK(); + return RewriteGraphForExecution(g, feed_rewrites, fetch_rewrites, + target_node_names, out_metadata); +} + +namespace { +template +std::vector ConvertToVector(StringContainer field) { + return std::vector(field.begin(), field.end()); } +} // namespace Status RewriteGraphForExecution( - Graph* g, const gtl::ArraySlice& fed_outputs, - const gtl::ArraySlice& fetch_outputs, + Graph* g, const std::vector>& feed_rewrites, + const std::vector>& fetch_rewrites, const gtl::ArraySlice& target_node_names, - const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata) { - if (fetch_outputs.empty() && target_node_names.empty()) { + if (fetch_rewrites.empty() && target_node_names.empty()) { return errors::InvalidArgument( "Must specify at least one target to fetch or execute."); } std::unordered_set endpoints; - for (const string& endpoint_name : fed_outputs) { - auto result = endpoints.insert(endpoint_name); + for (const auto& feed_rewrite : feed_rewrites) { + auto result = endpoints.insert(feed_rewrite->endpoint_name()); if (!result.second) { - return errors::InvalidArgument("Endpoint \"", endpoint_name, + return errors::InvalidArgument("Endpoint \"", + feed_rewrite->endpoint_name(), "\" fed more than once."); } } - for (const auto& fetch : fetch_outputs) { - if (endpoints.count(fetch) > 0) { - return errors::InvalidArgument(fetch, " is both fed and fetched."); + for (const auto& fetch_rewrite : fetch_rewrites) { + if (endpoints.count(fetch_rewrite->endpoint_name()) > 0) { + return errors::InvalidArgument(fetch_rewrite->endpoint_name(), + " is both fed and fetched."); } } @@ -297,19 +366,17 @@ Status RewriteGraphForExecution( } // Add the feeds. This may replace nodes in the graph, including the nodes - // currently listed in "fetch_nodes". We pass "name_index" so the index is + // currently listed in "fetch_rewrites". We pass "name_index" so the index is // kept up to date. - if (!fed_outputs.empty()) { - TF_RETURN_IF_ERROR(FeedInputs(g, device_info, fed_outputs, - use_function_convention, &name_index, - &out_metadata->feed_types)); + if (!feed_rewrites.empty()) { + TF_RETURN_IF_ERROR( + FeedInputs(g, feed_rewrites, &name_index, &out_metadata->feed_types)); } // Add the fetch nodes, also updating "name_index". std::vector fetch_nodes; - if (!fetch_outputs.empty()) { - TF_RETURN_IF_ERROR(FetchOutputs(g, device_info, fetch_outputs, - use_function_convention, &name_index, + if (!fetch_rewrites.empty()) { + TF_RETURN_IF_ERROR(FetchOutputs(g, fetch_rewrites, &name_index, &fetch_nodes, &out_metadata->fetch_types)); } @@ -323,25 +390,6 @@ Status RewriteGraphForExecution( return Status::OK(); } -namespace { -template -std::vector ConvertToVector(StringContainer field) { - return std::vector(field.begin(), field.end()); -} -} // namespace - -Status RewriteGraphForExecution(Graph* g, - const CallableOptions& callable_options, - const DeviceAttributes& device_info, - bool use_function_convention, - RewriteGraphMetadata* out_metadata) { - return RewriteGraphForExecution(g, ConvertToVector(callable_options.feed()), - ConvertToVector(callable_options.fetch()), - ConvertToVector(callable_options.target()), - device_info, use_function_convention, - out_metadata); -} - } // namespace subgraph } // namespace tensorflow diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h index 0dc59582f4..ba35846d93 100644 --- a/tensorflow/core/graph/subgraph.h +++ b/tensorflow/core/graph/subgraph.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/protobuf/config.pb.h" @@ -39,6 +40,37 @@ struct RewriteGraphMetadata { DataTypeVector fetch_types; }; +// Describes the action to take on a particular tensor endpoint (described by +// a ":" pair) when pruning the graph. +// +// The `AddNode()` method must be overridden to describe this action. The method +// will be invoked once during `RewriteGraphForExecution()` with tensor endpoint +// named by `endpoint_name`, and it may either create a single new node, or fail +// with an error if the resulting graph would be invalid. +class PruneRewrite { + public: + // `endpoint_name` and `device_info` must outlive this object. + PruneRewrite(const string* endpoint_name, const DeviceAttributes* device_info) + : endpoint_name_(endpoint_name), device_info_(device_info) {} + virtual ~PruneRewrite() {} + + // Creates a new node whose output replaces the given `tensor` in graph `g`. + // The node will be assigned to the device named in `device_info`. + virtual Status AddNode(Graph* g, NodeBuilder::NodeOut tensor, + Node** out_node) = 0; + + // Returns the name of the tensor to which this rewrite applies. + const string& endpoint_name() { return *endpoint_name_; } + + protected: + // The device on which the new node will be created. + const DeviceAttributes& device_info() { return *device_info_; } + + private: + const string* const endpoint_name_; // Not owned. + const DeviceAttributes* const device_info_; // Not owned. +}; + // Rewrite the graph structure of "*g" to deal with feeding node // outputs, fetching node outputs, and only running a subset of the // graph. "fed_outputs" and "fetch_outputs" are both lists of @@ -49,7 +81,7 @@ struct RewriteGraphMetadata { // In the resulting graph "*g", output edges in "fed_outputs" have // been redirected to special "_recv" nodes introduced into the graph. // If these fed nodes are not needed in order to compute the effects -// of the nodes in "targets_nodes" and "fetch_outputs", then these may +// of the nodes in "target_node_names" and "fetch_outputs", then these may // be omitted from the graph. // // In the resulting graph "*g", additional "_send" nodes are connected @@ -71,25 +103,61 @@ Status RewriteGraphForExecution( const gtl::ArraySlice& target_node_names, const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata); -Status RewriteGraphForExecution(Graph* g, - const CallableOptions& callable_options, - const DeviceAttributes& device_info, - bool use_function_convention, - RewriteGraphMetadata* out_metadata); - -typedef std::unordered_map NameIndex; - -// Augment "*g" by adding special "fetch" nodes that connect to the -// tensor outputs specified in "fetch_outputs" to retrieve the output -// of the tensors. The new nodes added are set up to execute on -// "client_device_name", and are returned in "*fetch_nodes". -// -// Return OK on success. On error, return false and sets *error to -// an appropriate error message (and *g is left in an indeterminate -// state). -Status FetchOutputs(Graph* g, const DeviceAttributes& device_info, - const gtl::ArraySlice& fetch_outputs, - NameIndex* name_index, std::vector* fetch_nodes); + +// A more general version of the above function that supports +// customizable rewriting actions for each fed and fetched tensor. +Status RewriteGraphForExecution( + Graph* g, const std::vector>& feed_rewrites, + const std::vector>& fetch_rewrites, + const gtl::ArraySlice& target_node_names, + RewriteGraphMetadata* out_metadata); + +///////////////////////////////////////////////////////// +// Custom rewrite actions for fed and fetched tensors. // +///////////////////////////////////////////////////////// + +// A rewrite action that adds an _Arg node for a fed tensor. +class ArgFeedRewrite : public PruneRewrite { + public: + ArgFeedRewrite(const string* endpoint_name, + const DeviceAttributes* device_info, int32 arg_index) + : PruneRewrite(endpoint_name, device_info), arg_index_(arg_index) {} + Status AddNode(Graph* g, NodeBuilder::NodeOut feed_tensor, + Node** out_node) override; + + private: + const int32 arg_index_; +}; + +// A rewrite action that adds a client-terminated _Recv node for a fed tensor. +class RecvFeedRewrite : public PruneRewrite { + public: + using PruneRewrite::PruneRewrite; + Status AddNode(Graph* g, NodeBuilder::NodeOut feed_tensor, + Node** out_node) override; +}; + +// A rewrite action that adds a _Retval node for a fetched tensor. +class RetvalFetchRewrite : public PruneRewrite { + public: + RetvalFetchRewrite(const string* endpoint_name, + const DeviceAttributes* device_info, int32 retval_index) + : PruneRewrite(endpoint_name, device_info), retval_index_(retval_index) {} + Status AddNode(Graph* g, NodeBuilder::NodeOut fetch_tensor, + Node** out_node) override; + + private: + const int32 retval_index_; +}; + +// A rewrite action that adds a client-terminated _Send node for a +// fetched tensor. +class SendFetchRewrite : public PruneRewrite { + public: + using PruneRewrite::PruneRewrite; + Status AddNode(Graph* g, NodeBuilder::NodeOut fetch_tensor, + Node** out_node) override; +}; } // namespace subgraph } // namespace tensorflow -- GitLab From 754c0615c94bbc7f8ede78b8b16cc616104994ef Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 21 Mar 2018 09:19:11 -0700 Subject: [PATCH 295/960] Deleted dead code and fixed compilation warnings PiperOrigin-RevId: 189918110 --- .../costs/op_level_cost_estimator_test.cc | 17 ----------------- .../grappler/optimizers/dependency_optimizer.h | 6 ++---- .../grappler/optimizers/function_optimizer.h | 5 +---- .../grappler/optimizers/layout_optimizer.cc | 4 ---- 4 files changed, 3 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index d5360cba24..a92f230101 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -55,23 +55,6 @@ OpContext DescribeMatMul(int m, int n, int l, int k) { return op_context; } -// Returns an OpInfo for MatMul with unknown input shapes. -OpContext DescribeMatMulUnknownShape() { - OpContext op_context; - SetCpuDevice(&op_context.op_info); - op_context.op_info.set_op("MatMul"); - - auto input = op_context.op_info.add_inputs(); - auto shape = input->mutable_shape(); - shape->set_unknown_rank(true); - - input = op_context.op_info.add_inputs(); - shape = input->mutable_shape(); - shape->set_unknown_rank(true); - - return op_context; -} - // Wrangles the minimum number of proto fields to set up an input of // arbitrary rank and type. void DescribeArbitraryRankInput(const std::vector& dims, DataType dtype, diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.h b/tensorflow/core/grappler/optimizers/dependency_optimizer.h index 61ed154793..b4db98125a 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.h +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.h @@ -29,9 +29,8 @@ namespace grappler { // optimizations, such as removing nodes that are effectively noops. class DependencyOptimizer : public GraphOptimizer { public: - DependencyOptimizer() : opt_level_(RewriterConfig::ON) {} - explicit DependencyOptimizer(RewriterConfig::Toggle opt_level) - : opt_level_(opt_level) {} + DependencyOptimizer() {} + explicit DependencyOptimizer(RewriterConfig::Toggle opt_level) {} ~DependencyOptimizer() override {} string name() const override { return "dependency_optimizer"; }; @@ -63,7 +62,6 @@ class DependencyOptimizer : public GraphOptimizer { // Main driver of dependency optimizations. Status OptimizeDependencies(); - RewriterConfig::Toggle opt_level_; bool fetch_nodes_known_; std::unordered_set nodes_to_preserve_; std::unique_ptr node_map_; diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.h b/tensorflow/core/grappler/optimizers/function_optimizer.h index b124efe01d..41444e4673 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.h +++ b/tensorflow/core/grappler/optimizers/function_optimizer.h @@ -26,7 +26,7 @@ namespace grappler { // operations to make the overall graph more efficient. class FunctionOptimizer : public GraphOptimizer { public: - FunctionOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} + FunctionOptimizer(RewriterConfig::Toggle opt_level) {} ~FunctionOptimizer() override {} string name() const override { return "function_optimizer"; }; @@ -36,9 +36,6 @@ class FunctionOptimizer : public GraphOptimizer { void Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& optimized_graph, double result) override; - - private: - RewriterConfig::Toggle opt_level_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index e4af71c40a..18e63f823b 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -301,10 +301,6 @@ bool IsComparisonOp(const NodeDef& node) { return is_compare; } -bool IsLogicalOp(const NodeDef& node) { - return IsLogicalAnd(node) || IsLogicalNot(node) || IsLogicalOr(node); -} - bool IsReduceOp(const NodeDef& node) { return IsSum(node) || IsMean(node) || IsProd(node) || IsMax(node) || IsMin(node) || IsAll(node) || IsAny(node); -- GitLab From 326bfa618a86c9fd604b8b98be6baff46337b6c6 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 21 Mar 2018 09:20:28 -0700 Subject: [PATCH 296/960] Don't run tensorflow/python:function_test under fastbuild. It gets flaky timeouts. PiperOrigin-RevId: 189918276 --- tensorflow/python/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 11195b3565..d11ee6f74c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1034,7 +1034,10 @@ cuda_py_tests( "//tensorflow/core:protos_all_py", ], shard_count = 10, - tags = ["noasan"], + tags = [ + "noasan", + "optonly", + ], ) py_test( -- GitLab From 335c782f5c504e36e496a33180d8243760a4001c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 09:26:16 -0700 Subject: [PATCH 297/960] Deletes sequential_feature_column(|_test).py. PiperOrigin-RevId: 189919029 --- .../sequential_feature_column.py | 325 ------------ .../sequential_feature_column_test.py | 471 ------------------ 2 files changed, 796 deletions(-) delete mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py delete mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py deleted file mode 100644 index 4ed7268e7a..0000000000 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ /dev/null @@ -1,325 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Experimental methods for tf.feature_column sequence input.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import abc -import collections - - -from tensorflow.python.feature_column import feature_column as fc -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import parsing_ops -from tensorflow.python.ops import sparse_ops -from tensorflow.python.ops import variable_scope - -# TODO(b/73160931): Fix pydoc. -# pylint: disable=g-doc-args,missing-docstring,protected-access -# TODO(b/73827486): Support SequenceExample. - - -def sequence_input_layer( - features, - feature_columns, - weight_collections=None, - trainable=True, - scope=None): - """"Builds input layer for sequence input. - - All `feature_columns` must be sequence dense columns with the same - `sequence_length`. The output of this method can be fed into sequence - networks, such as RNN. - - The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. - `T` is the maximum sequence length for this batch, which could differ from - batch to batch. - - If multiple `feature_columns` are given with `Di` `num_elements` each, their - outputs are concatenated. So, the final `Tensor` has shape - `[batch_size, T, D0 + D1 + ... + Dn]`. - - Example: - - ```python - rating = sequence_numeric_column('rating') - watches = sequence_categorical_column_with_identity( - 'watches', num_buckets=1000) - watches_embedding = embedding_column(watches, dimension=10) - columns = [rating, watches] - - features = tf.parse_example(..., features=make_parse_example_spec(columns)) - input_layer, sequence_length = sequence_input_layer(features, columns) - - rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) - outputs, state = tf.nn.dynamic_rnn( - rnn_cell, inputs=input_layer, sequence_length=sequence_length) - ``` - - Returns: - An `(input_layer, sequence_length)` tuple where: - - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. - `T` is the maximum sequence length for this batch, which could differ - from batch to batch. `D` is the sum of `num_elements` for all - `feature_columns`. - - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence - length for each example. - Raises: - ValueError: If any of the `feature_columns` is the wrong type. - """ - feature_columns = fc._clean_feature_columns(feature_columns) - for c in feature_columns: - if not isinstance(c, _SequenceDenseColumn): - raise ValueError( - 'All feature_columns must be of type _SequenceDenseColumn. ' - 'Given (type {}): {}'.format(type(c), c)) - - with variable_scope.variable_scope( - scope, default_name='sequence_input_layer', values=features.values()): - builder = fc._LazyBuilder(features) - output_tensors = [] - sequence_lengths = [] - ordered_columns = [] - for column in sorted(feature_columns, key=lambda x: x.name): - ordered_columns.append(column) - with variable_scope.variable_scope( - None, default_name=column._var_scope_name): - dense_tensor, sequence_length = column._get_sequence_dense_tensor( - builder, - weight_collections=weight_collections, - trainable=trainable) - # Flattens the final dimension to produce a 3D Tensor. - num_elements = column._variable_shape.num_elements() - shape = array_ops.shape(dense_tensor) - output_tensors.append( - array_ops.reshape( - dense_tensor, - shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) - sequence_lengths.append(sequence_length) - fc._verify_static_batch_size_equality(output_tensors, ordered_columns) - # TODO(b/73160931): Verify sequence_length equality. - return array_ops.concat(output_tensors, -1), sequence_lengths[0] - - -# TODO(b/73160931): Add remaining categorical columns. -def sequence_categorical_column_with_identity( - key, num_buckets, default_value=None): - return _SequenceCategoricalColumn( - fc.categorical_column_with_identity( - key=key, - num_buckets=num_buckets, - default_value=default_value)) - - -# TODO(b/73160931): Merge with embedding_column -def _sequence_embedding_column( - categorical_column, dimension, initializer=None, ckpt_to_load_from=None, - tensor_name_in_ckpt=None, max_norm=None, trainable=True): - if not isinstance(categorical_column, _SequenceCategoricalColumn): - raise ValueError( - 'categorical_column must be of type _SequenceCategoricalColumn. ' - 'Given (type {}): {}'.format( - type(categorical_column), categorical_column)) - return _SequenceEmbeddingColumn( - fc.embedding_column( - categorical_column, - dimension=dimension, - initializer=initializer, - ckpt_to_load_from=ckpt_to_load_from, - tensor_name_in_ckpt=tensor_name_in_ckpt, - max_norm=max_norm, - trainable=trainable)) - - -def sequence_numeric_column( - key, - shape=(1,), - default_value=0., - dtype=dtypes.float32): - # TODO(b/73160931): Add validations. - return _SequenceNumericColumn( - key, - shape=shape, - default_value=default_value, - dtype=dtype) - - -class _SequenceDenseColumn(fc._FeatureColumn): - """Represents dense sequence data.""" - - __metaclass__ = abc.ABCMeta - - TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name - 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) - - @abc.abstractproperty - def _variable_shape(self): - """`TensorShape` without batch and sequence dimensions.""" - pass - - @abc.abstractmethod - def _get_sequence_dense_tensor( - self, inputs, weight_collections=None, trainable=None): - """Returns a `TensorSequenceLengthPair`.""" - pass - - -def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): - with ops.name_scope(None, 'sequence_length') as name_scope: - row_ids = sp_tensor.indices[:, 0] - column_ids = sp_tensor.indices[:, 1] - column_ids += array_ops.ones_like(column_ids) - seq_length = ( - math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) - # If the last n rows do not have ids, seq_length will have shape - # [batch_size - n]. Pad the remaining values with zeros. - n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] - padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) - return array_ops.concat([seq_length, padding], axis=0, name=name_scope) - - -class _SequenceCategoricalColumn( - fc._CategoricalColumn, - collections.namedtuple( - '_SequenceCategoricalColumn', ['categorical_column'])): - - @property - def name(self): - return self.categorical_column.name - - @property - def _parse_example_spec(self): - return self.categorical_column._parse_example_spec - - def _transform_feature(self, inputs): - return self.categorical_column._transform_feature(inputs) - - @property - def _num_buckets(self): - return self.categorical_column._num_buckets - - def _get_sparse_tensors(self, inputs, weight_collections=None, - trainable=None): - sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) - id_tensor = sparse_tensors.id_tensor - weight_tensor = sparse_tensors.weight_tensor - # Expands final dimension, so that embeddings are not combined during - # embedding lookup. - check_id_rank = check_ops.assert_equal( - array_ops.rank(id_tensor), 2, - data=[ - 'Column {} expected ID tensor of rank 2. '.format(self.name), - 'id_tensor shape: ', array_ops.shape(id_tensor)]) - with ops.control_dependencies([check_id_rank]): - id_tensor = sparse_ops.sparse_reshape( - id_tensor, - shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) - if weight_tensor is not None: - check_weight_rank = check_ops.assert_equal( - array_ops.rank(weight_tensor), 2, - data=[ - 'Column {} expected weight tensor of rank 2.'.format(self.name), - 'weight_tensor shape:', array_ops.shape(weight_tensor)]) - with ops.control_dependencies([check_weight_rank]): - weight_tensor = sparse_ops.sparse_reshape( - weight_tensor, - shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) - return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) - - def _sequence_length(self, inputs): - sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) - return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) - - -class _SequenceEmbeddingColumn( - _SequenceDenseColumn, - collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): - - @property - def name(self): - return self.embedding_column.name - - @property - def _parse_example_spec(self): - return self.embedding_column._parse_example_spec - - def _transform_feature(self, inputs): - return self.embedding_column._transform_feature(inputs) - - @property - def _variable_shape(self): - return self.embedding_column._variable_shape - - def _get_sequence_dense_tensor( - self, inputs, weight_collections=None, trainable=None): - dense_tensor = self.embedding_column._get_dense_tensor( - inputs=inputs, - weight_collections=weight_collections, - trainable=trainable) - sequence_length = self.embedding_column.categorical_column._sequence_length( - inputs) - return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) - - -class _SequenceNumericColumn( - _SequenceDenseColumn, - collections.namedtuple( - '_SequenceNumericColumn', - ['key', 'shape', 'default_value', 'dtype'])): - - @property - def name(self): - return self.key - - @property - def _parse_example_spec(self): - return {self.key: parsing_ops.VarLenFeature(self.dtype)} - - def _transform_feature(self, inputs): - return inputs.get(self.key) - - @property - def _variable_shape(self): - return tensor_shape.TensorShape(self.shape) - - def _get_sequence_dense_tensor( - self, inputs, weight_collections=None, trainable=None): - # Do nothing with weight_collections and trainable since no variables are - # created in this function. - del weight_collections - del trainable - sp_tensor = inputs.get(self) - dense_tensor = sparse_ops.sparse_tensor_to_dense( - sp_tensor, default_value=self.default_value) - # Reshape into [batch_size, T, variable_shape]. - dense_shape = array_ops.concat( - [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], - axis=0) - dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) - sequence_length = _sequence_length_from_sparse_tensor( - sp_tensor, num_elements=self._variable_shape.num_elements()) - return _SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) - -# pylint: enable=g-doc-args,missing-docstring,protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py deleted file mode 100644 index 59674869a2..0000000000 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py +++ /dev/null @@ -1,471 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for sequential_feature_column.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc -from tensorflow.python.feature_column.feature_column import _LazyBuilder -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.platform import test -from tensorflow.python.training import monitored_session - - -class SequenceInputLayerTest(test.TestCase): - - def test_embedding_column(self): - vocabulary_size = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [2, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - - embedding_dimension_a = 2 - embedding_values_a = ( - (1., 2.), # id 0 - (3., 4.), # id 1 - (5., 6.) # id 2 - ) - embedding_dimension_b = 3 - embedding_values_b = ( - (11., 12., 13.), # id 0 - (14., 15., 16.), # id 1 - (17., 18., 19.) # id 2 - ) - def _get_initializer(embedding_dimension, embedding_values): - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - return _initializer - - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [2, 0] - [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], - ] - expected_sequence_length = [1, 2] - - categorical_column_a = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column_a = sfc._sequence_embedding_column( - categorical_column_a, dimension=embedding_dimension_a, - initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) - categorical_column_b = sfc.sequence_categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_b = sfc._sequence_embedding_column( - categorical_column_b, dimension=embedding_dimension_b, - initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) - - input_layer, sequence_length = sfc.sequence_input_layer( - features={ - 'aaa': sparse_input_a, - 'bbb': sparse_input_b, - }, - # Test that columns are reordered alphabetically. - feature_columns=[embedding_column_b, embedding_column_a]) - - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual( - ('sequence_input_layer/aaa_embedding/embedding_weights:0', - 'sequence_input_layer/bbb_embedding/embedding_weights:0'), - tuple([v.name for v in global_vars])) - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) - self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) - self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_numeric_column(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_input_layer = [ - [[0.], [1.]], - [[10.], [0.]], - ] - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa') - - input_layer, sequence_length = sfc.sequence_input_layer( - features={'aaa': sparse_input}, - feature_columns=[numeric_column]) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_numeric_column_multi_dim(self): - """Tests sequence_input_layer for multi-dimensional numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - # The output of numeric_column._get_dense_tensor should be flattened. - expected_input_layer = [ - [[0., 1., 2., 3.], [4., 5., 6., 7.]], - [[10., 11., 12., 13.], [0., 0., 0., 0.]], - ] - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) - - input_layer, sequence_length = sfc.sequence_input_layer( - features={'aaa': sparse_input}, - feature_columns=[numeric_column]) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - -def _assert_sparse_tensor_value(test_case, expected, actual): - test_case.assertEqual(np.int64, np.array(actual.indices).dtype) - test_case.assertAllEqual(expected.indices, actual.indices) - - test_case.assertEqual( - np.array(expected.values).dtype, np.array(actual.values).dtype) - test_case.assertAllEqual(expected.values, actual.values) - - test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) - test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) - - -class SequenceCategoricalColumnWithIdentityTest(test.TestCase): - - def test_get_sparse_tensors(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((1, 2, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) - - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - - self.assertIsNone(id_weight_pair.weight_tensor) - with monitored_session.MonitoredSession() as sess: - _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - def test_get_sparse_tensors_inputs3d(self): - """Tests _get_sparse_tensors when the input is already 3D Tensor.""" - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=(1, 2, 0), - dense_shape=(2, 2, 1)) - - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'Column aaa expected ID tensor of rank 2\.\s*' - r'id_tensor shape:\s*\[2 2 1\]'): - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({'aaa': inputs})) - with monitored_session.MonitoredSession() as sess: - id_weight_pair.id_tensor.eval(session=sess) - - def test_sequence_length(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_sequence_length_with_zeros(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((1, 0), (3, 0), (3, 1)), - values=(1, 2, 0), - dense_shape=(5, 2)) - expected_sequence_length = [0, 1, 0, 2, 0] - - sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - -class SequenceEmbeddingColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - expected_lookups = [ - # example 0, ids [2] - [[7., 11.], [0., 0.]], - # example 1, ids [0, 1] - [[1., 2.], [3., 5.]], - # example 2, ids [] - [[0., 0.], [0., 0.]], - # example 3, ids [1] - [[3., 5.], [0., 0.]], - ] - - categorical_column = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = sfc._sequence_embedding_column( - categorical_column, dimension=embedding_dimension, - initializer=_initializer) - - embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual( - ('embedding_weights:0',), tuple([v.name for v in global_vars])) - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) - self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) - - def test_sequence_length(self): - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] - - categorical_column = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = sfc._sequence_embedding_column( - categorical_column, dimension=2) - - _, sequence_length = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_sequence_length_with_empty_rows(self): - """Tests _sequence_length when some examples do not have ids.""" - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [] - # example 1, ids [2] - # example 2, ids [0, 1] - # example 3, ids [] - # example 4, ids [1] - # example 5, ids [] - indices=((1, 0), (2, 0), (2, 1), (4, 0)), - values=(2, 0, 1, 1), - dense_shape=(6, 2)) - expected_sequence_length = [0, 1, 2, 0, 1, 0] - - categorical_column = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = sfc._sequence_embedding_column( - categorical_column, dimension=2) - - _, sequence_length = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - -class SequenceNumericColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_dense_tensor = [ - [[0.], [1.]], - [[10.], [0.]], - ] - numeric_column = sfc.sequence_numeric_column('aaa') - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - - def test_get_sequence_dense_tensor_with_shape(self): - """Tests get_sequence_dense_tensor with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_dense_tensor = [ - [[0., 1., 2.], [3., 4., 5.]], - [[10., 11., 12.], [0., 0., 0.]], - ] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - - def test_get_dense_tensor_multi_dim(self): - """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - expected_dense_tensor = [ - [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], - [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], - ] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - - def test_sequence_length(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) - - _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_sequence_length_with_shape(self): - """Tests _sequence_length with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa') - - _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - def test_sequence_length_with_empty_rows(self): - """Tests _sequence_length when some examples do not have ids.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [] - # example 1, values [[0.], [1.]] - # example 2, [[2.]] - # example 3, values [] - # example 4, [[3.]] - # example 5, values [] - indices=((1, 0), (1, 1), (2, 0), (4, 0)), - values=(0., 1., 2., 3.), - dense_shape=(6, 2)) - expected_sequence_length = [0, 2, 1, 0, 1, 0] - numeric_column = sfc.sequence_numeric_column('aaa') - - _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - - -if __name__ == '__main__': - test.main() -- GitLab From 911225a7eaf2872472484bce5f717d287a0e3224 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 21 Mar 2018 09:59:18 -0700 Subject: [PATCH 298/960] Added an option to run shape analysis assuming the shapes of the feed nodes are valid. PiperOrigin-RevId: 189923541 --- tensorflow/python/grappler/model_analyzer.cc | 5 +++-- tensorflow/python/grappler/model_analyzer.h | 2 +- tensorflow/python/grappler/model_analyzer.i | 8 +++++--- tensorflow/python/grappler/model_analyzer.py | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/grappler/model_analyzer.cc b/tensorflow/python/grappler/model_analyzer.cc index d23eb811ac..5a76cdd8fb 100644 --- a/tensorflow/python/grappler/model_analyzer.cc +++ b/tensorflow/python/grappler/model_analyzer.cc @@ -26,9 +26,10 @@ namespace grappler { ModelAnalyzer::ModelAnalyzer(const GrapplerItem& item) : item_(item) {} -Status ModelAnalyzer::GenerateReport(bool debug, std::ostream& os) { +Status ModelAnalyzer::GenerateReport(bool debug, bool assume_valid_feeds, + std::ostream& os) { GraphProperties properties(item_); - TF_RETURN_IF_ERROR(properties.InferStatically(false)); + TF_RETURN_IF_ERROR(properties.InferStatically(assume_valid_feeds)); for (const auto& node : item_.MainOpsFanin()) { PrintNodeInfo(node, properties, debug, os); diff --git a/tensorflow/python/grappler/model_analyzer.h b/tensorflow/python/grappler/model_analyzer.h index 5bc551927d..97ffafabe1 100644 --- a/tensorflow/python/grappler/model_analyzer.h +++ b/tensorflow/python/grappler/model_analyzer.h @@ -31,7 +31,7 @@ class GraphProperties; class ModelAnalyzer { public: explicit ModelAnalyzer(const GrapplerItem& item); - Status GenerateReport(bool debug, std::ostream& os); + Status GenerateReport(bool debug, bool assume_valid_feeds, std::ostream& os); private: void PrintNodeInfo(const NodeDef* node, const GraphProperties& properties, diff --git a/tensorflow/python/grappler/model_analyzer.i b/tensorflow/python/grappler/model_analyzer.i index 7c3a692d0e..4955780764 100644 --- a/tensorflow/python/grappler/model_analyzer.i +++ b/tensorflow/python/grappler/model_analyzer.i @@ -40,7 +40,8 @@ limitations under the License. %} %{ -string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug) { +string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, + bool assume_valid_feeds, bool debug) { tensorflow::grappler::ItemConfig cfg; cfg.apply_optimizations = false; std::unique_ptr item = @@ -53,10 +54,11 @@ string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug tensorflow::grappler::ModelAnalyzer analyzer(*item); std::stringstream os; - analyzer.GenerateReport(debug, os); + analyzer.GenerateReport(debug, assume_valid_feeds, os); return os.str(); } %} -string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, bool debug); +string GenerateModelReport(const tensorflow::MetaGraphDef& metagraph, + bool assume_valid_feeds, bool debug); diff --git a/tensorflow/python/grappler/model_analyzer.py b/tensorflow/python/grappler/model_analyzer.py index 535889e1c4..98cdc57850 100644 --- a/tensorflow/python/grappler/model_analyzer.py +++ b/tensorflow/python/grappler/model_analyzer.py @@ -22,11 +22,12 @@ from tensorflow.python import pywrap_tensorflow as tf_wrap from tensorflow.python.framework import errors -def GenerateModelReport(metagraph, debug=False): +def GenerateModelReport(metagraph, assume_valid_feeds=True, debug=False): """Report what's known statically about each node in the provided metagraph. Args: metagraph: A TensorFlow MetaGraphDef. + assume_valid_feeds: If True, assume that the shape of the fed nodes is valid debug: Add some information useful for debugging. Returns: @@ -34,6 +35,6 @@ def GenerateModelReport(metagraph, debug=False): """ with errors.raise_exception_on_not_ok_status(): ret_from_swig = tf_wrap.GenerateModelReport(metagraph.SerializeToString(), - debug) + assume_valid_feeds, debug) return ret_from_swig -- GitLab From d854706bb3ccbcd3808ed5d89cb4b094634614ef Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Wed, 21 Mar 2018 10:20:25 -0700 Subject: [PATCH 299/960] Collapse adjacent dimensions that have no paddings. For example, tf.pad(<4D tensor>, [[0, 0], [0, 0], [0, 0], [0, 1]]) is equivalent to a 2D pad, which is faster. PiperOrigin-RevId: 189926996 --- tensorflow/core/kernels/pad_op.cc | 127 ++++++++++++++++-- tensorflow/python/kernel_tests/pad_op_test.py | 27 ++++ 2 files changed, 143 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index a7238ef67b..41494f56c5 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -104,42 +104,147 @@ class PadOp : public OpKernel { return; } - Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + TensorShape collapsed_input_shape; + TensorShape collapsed_output_shape; + Tensor collapsed_paddings; + if (fixed_dims > 1 && + CollapseAdjacentNonPaddedDimensions( + in0.shape(), in1, output_shape, &collapsed_input_shape, + &collapsed_paddings, &collapsed_output_shape)) { + Tensor collapsed_input; + CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape)); + Tensor collapsed_output; + AllocatorAttributes alloc_attrs; + alloc_attrs.set_on_host(context->input_memory_type(0) == HOST_MEMORY); + OP_REQUIRES_OK(context, + context->allocate_temp(collapsed_input.dtype(), + collapsed_output_shape, + &collapsed_output, alloc_attrs)); + const Tensor& collapsed_paddings_ref = collapsed_paddings; + typename TTypes::ConstMatrix collapsed_paddings_matrix = + collapsed_paddings_ref.matrix(); + OperateWithVariableRank(context, collapsed_input_shape.dims(), + collapsed_input, collapsed_paddings_matrix, + pad_value, &collapsed_output); + + Tensor output; + CHECK(output.CopyFrom(collapsed_output, output_shape)); + context->set_output(0, output); + } else { + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, output_shape, &output)); + OperateWithVariableRank(context, fixed_dims, in0, paddings, pad_value, + output); + } + } + + private: + // Collapses adjacent dimensions that are not padded to one dimension for + // speed. Returns true if any two dimensions are collapsed. For example, + // + // Pad(input_shape=[8, 28, 28, 3], + // paddings=[[0, 0], [0, 0], [0, 0], [0, 1]] + // is equivalent to + // Pad(input_shape=[6272, 3], + // paddings=[[0, 0], [0, 1]]) + // + // input_shape: the original input shape. + // paddings_as_tensor: the original paddings. + // output_shape: the original output shape. + // collapsed_input_shape: the input shape after collapsing. + // collapsed_paddings_as_tensor: the paddings after collapsing. + // collapsed_output_shape: the output shape after collapsing. + static bool CollapseAdjacentNonPaddedDimensions( + const TensorShape& input_shape, const Tensor& paddings_as_tensor, + const TensorShape& output_shape, TensorShape* collapsed_input_shape, + Tensor* collapsed_paddings_as_tensor, + TensorShape* collapsed_output_shape) { + bool collapsed = false; + typename TTypes::ConstMatrix paddings = + paddings_as_tensor.matrix(); + std::vector> collapsed_paddings; + int i = 0; + while (i < paddings.dimension(0)) { + if (paddings(i, 0) != 0 || paddings(i, 1) != 0) { + // If padded, copy the original dimension over. + collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), + input_shape.dim_size(i)); + collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), + output_shape.dim_size(i)); + collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)}); + ++i; + } else { + // If not padded, find the next dimension that is padded and collapse + // all dimensions in between to one dimension. + int64 collapsed_input_dim_size = input_shape.dim_size(i); + int64 collapsed_output_dim_size = output_shape.dim_size(i); + ++i; + while (i < paddings.dimension(0) && paddings(i, 0) == 0 && + paddings(i, 1) == 0) { + collapsed = true; + collapsed_input_dim_size *= input_shape.dim_size(i); + collapsed_output_dim_size *= output_shape.dim_size(i); + ++i; + } + collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), + collapsed_input_dim_size); + collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), + collapsed_output_dim_size); + collapsed_paddings.push_back({0, 0}); + } + } + + // Copy collapsed_paddings to collapsed_paddings_as_tensor. + *collapsed_paddings_as_tensor = + Tensor(paddings_as_tensor.dtype(), + TensorShape({static_cast(collapsed_paddings.size()), 2})); + auto collapsed_paddings_as_matrix = + collapsed_paddings_as_tensor->matrix(); + for (size_t i = 0; i < collapsed_paddings.size(); ++i) { + collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first; + collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second; + } + return collapsed; + } + + void OperateWithVariableRank(OpKernelContext* context, int fixed_dims, + const Tensor& input, + typename TTypes::ConstMatrix paddings, + T pad_value, Tensor* output) { // Invoke the dims-specific implementation. switch (fixed_dims) { case 0: - Operate<0>(context, in0.tensor(), paddings, pad_value, output); + Operate<0>(context, input.tensor(), paddings, pad_value, output); break; case 1: // TODO(irving): Once Pad doesn't need a scalar special case, // change flat to tensor. That is, once !allow_legacy_scalars(). - Operate<1>(context, in0.flat(), paddings, pad_value, output); + Operate<1>(context, input.flat(), paddings, pad_value, output); break; case 2: - Operate<2>(context, in0.tensor(), paddings, pad_value, output); + Operate<2>(context, input.tensor(), paddings, pad_value, output); break; case 3: - Operate<3>(context, in0.tensor(), paddings, pad_value, output); + Operate<3>(context, input.tensor(), paddings, pad_value, output); break; case 4: - Operate<4>(context, in0.tensor(), paddings, pad_value, output); + Operate<4>(context, input.tensor(), paddings, pad_value, output); break; case 5: - Operate<5>(context, in0.tensor(), paddings, pad_value, output); + Operate<5>(context, input.tensor(), paddings, pad_value, output); break; case 6: - Operate<6>(context, in0.tensor(), paddings, pad_value, output); + Operate<6>(context, input.tensor(), paddings, pad_value, output); break; default: OP_REQUIRES(context, false, errors::InvalidArgument("Only ranks up to 6 supported: ", - in0.shape().DebugString())); + input.shape().DebugString())); } } - private: template void Operate(OpKernelContext* context, typename TTypes::ConstTensor input, diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py index 9ed5947aae..361853448c 100644 --- a/tensorflow/python/kernel_tests/pad_op_test.py +++ b/tensorflow/python/kernel_tests/pad_op_test.py @@ -336,5 +336,32 @@ class PadOpTest(test.TestCase): self.assertAllEqual(inp, out) self.assertShapeEqual(inp, tf_val) + def testCollapseAdjacentNonPaddedDimensions(self): + # pyformat: disable + paddings_values = [[[0, 0], [0, 0], [0, 0], [0, 1]], + [[0, 0], [2, 3], [0, 0], [0, 0]], + [[0, 0], [0, 0], [0, 0], [0, 0]]] + # pyformat: enable + for paddings_value in paddings_values: + for dtype in [dtypes.float32, dtypes.int32]: + inp = constant_op.constant(1, shape=[8, 28, 28, 3], dtype=dtype) + paddings = constant_op.constant(paddings_value, dtype=dtypes.int32) + padded = array_ops.pad(inp, paddings) + middle = array_ops.slice(padded, [row[0] for row in paddings_value], + [dim.value for dim in inp.shape.dims]) + left = array_ops.slice(padded, [0, 0, 0, 0], + [row[0] for row in paddings_value]) + right = array_ops.slice( + padded, + [paddings_value[i][0] + inp.shape.dims[i].value for i in range(4)], + [-1, -1, -1, -1]) + with self.test_session(use_gpu=True): + self.assertAllEqual(inp.eval(), middle.eval()) + self.assertAllEqual( + np.zeros([row[0] for row in paddings_value]), left.eval()) + self.assertAllEqual( + np.zeros([row[1] for row in paddings_value]), right.eval()) + + if __name__ == "__main__": test.main() -- GitLab From e50fb3f561f1bfcd0a5fb457c69d50da64c789f8 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 21 Mar 2018 10:26:49 -0700 Subject: [PATCH 300/960] Fix zipfile path for MacOS builds. For some reason, the zipfile module on Macs appears to work differently and complains about the whl file we are trying to extract not being found. PiperOrigin-RevId: 189928007 --- tensorflow/tools/ci_build/copy_binary.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index b5a282b64a..420d390d2b 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -60,7 +60,7 @@ def copy_binary(directory, origin_tag, new_tag, version, gpu=False): package = "tf_nightly" origin_binary = BINARY_STRING_TEMPLATE % (package, version, origin_tag) new_binary = BINARY_STRING_TEMPLATE % (package, version, new_tag) - zip_ref = zipfile.ZipFile(directory + origin_binary, "r") + zip_ref = zipfile.ZipFile(os.path.join(directory, origin_binary), "r") try: tmpdir = tempfile.mkdtemp() @@ -115,6 +115,7 @@ def main(): args = parser.parse_args() # Argument checking + args.filename = os.path.abspath(args.filename) check_existence(args.filename) regex_groups = re.search(TF_NIGHTLY_REGEX, args.filename) directory = regex_groups.group(1) -- GitLab From d7cb36a6876e02540c13f31f468a84f54c8591d4 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 21 Mar 2018 10:47:18 -0700 Subject: [PATCH 301/960] [XLA:GPU] Don't crash if a GTE feeds into a bitcast. GTE and bitcast are sort of "implicitly fused", so we have to handle them in this way. PiperOrigin-RevId: 189931422 --- .../xla/service/gpu/ir_emitter_unnested.cc | 19 +++++++++--- tensorflow/compiler/xla/tests/BUILD | 1 + .../compiler/xla/tests/hlo_test_base.cc | 7 +++++ tensorflow/compiler/xla/tests/hlo_test_base.h | 6 ++++ tensorflow/compiler/xla/tests/tuple_test.cc | 29 +++++++++++++++++++ 5 files changed, 58 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 135a607ab9..199e6b7874 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1979,11 +1979,22 @@ GetHloBufferSlices(const HloInstruction* hlo, } } - // If *that* didn't work, check whether instr is a GTE instruction. If it - // is, see if we can get a buffer for its parent, and continue walking up - // parents until we find a defined buffer or we hit something that's not a - // GTE. + // If *that* didn't work, walk up any bitcasts that we might see. These + // must appear before any GTE instructions, because it's illegal to bitcast + // to a tuple type. const HloInstruction* parent = instr; + while (parent->opcode() == HloOpcode::kBitcast) { + parent = parent->operand(0); + + auto slice = GetKnownAtRuntimeSlice(parent, {}, buffer_assn); + if (slice.has_value()) { + return {{*slice, gte_indices}}; + } + } + + // Finally, check whether instr is a GTE instruction. If it is, see if we + // can get a buffer for its parent, and continue walking up parents until we + // find a defined buffer or we hit something that's not a GTE. while (parent->opcode() == HloOpcode::kGetTupleElement) { gte_indices.push_front(parent->tuple_index()); parent = parent->operand(0); diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 04a9c1ef79..7fb7919674 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1009,6 +1009,7 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/tests:client_library_test_base", + "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index 5f62c44f25..e574644dea 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -115,6 +115,13 @@ StatusOr> HloTestBase::Execute( return test_runner_.Execute(std::move(module), arguments); } +StatusOr> HloTestBase::ExecuteNoHloPasses( + std::unique_ptr module, + tensorflow::gtl::ArraySlice arguments) { + return test_runner_.Execute(std::move(module), arguments, + /*run_hlo_passes=*/false); +} + std::unique_ptr HloTestBase::ExecuteAndTransfer( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments) { diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index e375f13a44..3e8e2360bb 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -98,6 +98,12 @@ class HloTestBase : public ::testing::Test { std::unique_ptr module, tensorflow::gtl::ArraySlice arguments); + // Same as above, except the module will be executed without running any HLO + // passes on it. + StatusOr> ExecuteNoHloPasses( + std::unique_ptr module, + tensorflow::gtl::ArraySlice arguments); + std::unique_ptr ExecuteAndTransfer( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments); diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index 2029312f94..fa60af4b6a 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -514,5 +515,33 @@ XLA_TEST_F(TupleTest, ComplexTuples) { error_spec_); } +class TupleHloTest : public HloTestBase {}; + +// Disabled on CPU parallel because that's broken and will be removed soon. +// Disabled on the interpreter because bitcast doesn't exist on the interpreter. +TEST_F(TupleHloTest, + DISABLED_ON_INTERPRETER(DISABLED_ON_CPU_PARALLEL(BitcastAfterGTE))) { + const char* testcase = R"( + HloModule m + + ENTRY test { + name.1 = (f32[3]{0}) parameter(0) + get-tuple-element.1 = f32[3]{0} get-tuple-element(name.1), index=0 + bitcast = f32[1,3]{1,0} bitcast(get-tuple-element.1) + copy = f32[1,3]{1,0} copy(bitcast) + ROOT tuple.4 = (f32[1,3]{1,0}) tuple(copy) + } + )"; + auto module = + HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest()) + .ValueOrDie(); + auto param = Literal::MakeTupleOwned(Literal::CreateR1({1, 2, 3})); + TF_ASSERT_OK_AND_ASSIGN(auto result, + ExecuteNoHloPasses(std::move(module), {param.get()})); + EXPECT_TRUE(LiteralTestUtil::Equal( + *result, + *Literal::MakeTupleOwned(Literal::CreateR2({{1, 2, 3}})))); +} + } // namespace } // namespace xla -- GitLab From 53f823c1273c7670fb5c337ae7ac2e9647a1fa4f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 11:01:54 -0700 Subject: [PATCH 302/960] Update the doc to reflect the change of replacing std::clock with random::New64() as random number generator seed. PiperOrigin-RevId: 189934377 --- tensorflow/contrib/tensor_forest/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensor_forest/README.md b/tensorflow/contrib/tensor_forest/README.md index 8b24430c71..9e1491ea66 100644 --- a/tensorflow/contrib/tensor_forest/README.md +++ b/tensorflow/contrib/tensor_forest/README.md @@ -116,7 +116,7 @@ a different `feature_bagging_fraction * num_features` sized subset of the input features. Defaults to 1.0 (no feature bagging). * `base_random_seed`. By default (`base_random_seed = 0`), the random number -generator for each tree is seeded by the current time (in microseconds) when +generator for each tree is seeded by a 64-bit random value when each tree is first created. Using a non-zero value causes tree training to be deterministic, in that the i-th tree's random number generator is seeded with the value `base_random_seed + i`. -- GitLab From a0d3ce1de30735dd8d0ed8f95a6eb4d0c3e7773b Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 21 Mar 2018 11:10:13 -0700 Subject: [PATCH 303/960] Adding `drop_remainder` option for the `map_and_batch` transformation, which allows the user to express whether they wish to drop the last batch if its size is smaller than desired; the default is not to drop the smaller batch. PiperOrigin-RevId: 189936029 --- .../kernel_tests/batch_dataset_op_test.py | 42 ++++++++++++------- .../contrib/data/python/ops/batching.py | 30 +++++++++---- .../kernels/data/map_and_batch_dataset_op.cc | 17 +++++++- .../core/ops/compat/ops_history.v1.pbtxt | 4 ++ tensorflow/core/ops/dataset_ops.cc | 1 + 5 files changed, 69 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index a2da953c7b..5abb38c2d2 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -311,10 +311,10 @@ class BatchDatasetTest(test.TestCase): self.assertEqual([None], dataset.output_shapes[1][0].as_list()) self.assertEqual([None, 30], dataset.output_shapes[1][1].as_list()) - def _testBatchAndMapDatasetHelper(self, num_parallel_batches=1): + def _testMapAndBatchDatasetHelper(self, num_parallel_batches=1): """Test a dataset that maps a TF function across its input elements.""" # The pipeline is TensorSliceDataset -> - # RepeatDataset(count) -> BatchAndMapDataset(square_3, batch_size). + # RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size). components = (np.arange(7), np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], np.array(37.0) * np.arange(7)) @@ -381,26 +381,38 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): sess.run(init_op, feed_dict={count: 14, batch_size: 0}) - def testBatchAndMapDataset(self): - return self._testBatchAndMapDatasetHelper() + def testMapAndBatchDataset(self): + return self._testMapAndBatchDatasetHelper() - def testBatchAndMapDatasetWithParallelBatching(self): - return self._testBatchAndMapDatasetHelper(num_parallel_batches=10) + def testMapAndBatchDatasetWithParallelBatching(self): + return self._testMapAndBatchDatasetHelper(num_parallel_batches=10) - def testMapAndBatchYieldsPartialBatch(self): - iterator = (dataset_ops.Dataset.range(10) - .apply(batching.map_and_batch( - lambda x: array_ops.reshape(x * x, [1]), 4)) - .make_one_shot_iterator()) - self.assertEqual([None, 1], iterator.output_shapes.as_list()) + def _testMapAndBatchPartialBatchHelper(self, drop_remainder=False): + iterator = ( + dataset_ops.Dataset.range(10).apply( + batching.map_and_batch( + lambda x: array_ops.reshape(x * x, [1]), + batch_size=4, + drop_remainder=drop_remainder)).make_one_shot_iterator()) + if drop_remainder: + self.assertEqual([4, 1], iterator.output_shapes.as_list()) + else: + self.assertEqual([None, 1], iterator.output_shapes.as_list()) next_element = iterator.get_next() with self.test_session() as sess: self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) - self.assertAllEqual([[64], [81]], sess.run(next_element)) + if not drop_remainder: + self.assertAllEqual([[64], [81]], sess.run(next_element)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testMapAndBatchPartialBatch(self): + return self._testMapAndBatchPartialBatchHelper() + + def testMapAndBatchPartialBatchDropRemainder(self): + return self._testMapAndBatchPartialBatchHelper(drop_remainder=True) + def testMapAndBatchSparse(self): def _sparse(i): @@ -425,7 +437,7 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) - def testBatchAndMapDatasetFails(self): + def testMapAndBatchDatasetFails(self): """Test a dataset that maps a TF function across its input elements.""" dataset = dataset_ops.Dataset.from_tensors( array_ops.check_numerics( @@ -439,7 +451,7 @@ class BatchDatasetTest(test.TestCase): with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): sess.run(init_op, feed_dict={batch_size: 14}) - def testBatchAndMapDatasetShapeMismatch(self): + def testMapAndBatchDatasetShapeMismatch(self): """Test a dataset that maps a TF function across its input elements.""" def generator(): diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 6463d75750..a212adf6cf 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -348,13 +348,19 @@ class _RestructuredDataset(dataset_ops.Dataset): class _MapAndBatchDataset(dataset_ops.MapDataset): """A `Dataset` that maps a function over a batch of elements.""" - def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches): + def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches, + drop_remainder): """See `Dataset.map()` for details.""" super(_MapAndBatchDataset, self).__init__(input_dataset, map_func) - self._batch_size = ops.convert_to_tensor( + self._batch_size_t = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") - self._num_parallel_batches = ops.convert_to_tensor( + self._num_parallel_batches_t = ops.convert_to_tensor( num_parallel_batches, dtype=dtypes.int64, name="num_parallel_batches") + self._drop_remainder_t = ops.convert_to_tensor( + drop_remainder, dtype=dtypes.bool, name="drop_remainder") + + self._batch_size = batch_size + self._drop_remainder = drop_remainder def _as_variant_tensor(self): # pylint: disable=protected-access @@ -363,8 +369,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): input_resource, self._map_func.captured_inputs, f=self._map_func, - batch_size=self._batch_size, - num_parallel_batches=self._num_parallel_batches, + batch_size=self._batch_size_t, + num_parallel_batches=self._num_parallel_batches_t, + drop_remainder=self._drop_remainder_t, output_types=nest.flatten( sparse.as_dense_types(self.output_types, self.output_classes)), output_shapes=nest.flatten( @@ -373,8 +380,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): @property def output_shapes(self): + dim = self._batch_size if self._drop_remainder else None return nest.pack_sequence_as(self._output_shapes, [ - tensor_shape.vector(None).concatenate(s) + tensor_shape.vector(dim).concatenate(s) for s in nest.flatten(self._output_shapes) ]) @@ -383,7 +391,10 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): return self._output_types -def map_and_batch(map_func, batch_size, num_parallel_batches=1): +def map_and_batch(map_func, + batch_size, + num_parallel_batches=1, + drop_remainder=False): """Fused implementation of `map` and `batch`. Maps `map_func` across `batch_size` consecutive elements of this dataset @@ -403,6 +414,9 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1): number of batches to create in parallel. On one hand, higher values can help mitigate the effect of stragglers. On the other hand, higher values can increase contention if CPU is scarce. + drop_remainder: A `tf.bool` scalar `tf.Tensor`, representing whether the + last batch should be dropped in case its size is smaller than desired; + the default behavior is not to drop the smaller batch. Returns: A `Dataset` transformation function, which can be passed to @@ -411,6 +425,6 @@ def map_and_batch(map_func, batch_size, num_parallel_batches=1): def _apply_fn(dataset): return _MapAndBatchDataset(dataset, map_func, batch_size, - num_parallel_batches) + num_parallel_batches, drop_remainder) return _apply_fn diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index e22200f758..aaf4dc7341 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -66,12 +66,16 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { errors::InvalidArgument( "num_parallel_batches must be greater than zero.")); + bool drop_remainder; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "drop_remainder", &drop_remainder)); + std::unique_ptr captured_func; OP_REQUIRES_OK(ctx, CapturedFunction::Create( func_, std::move(other_arguments), &captured_func)); *output = new Dataset(input, batch_size, num_parallel_batches, - output_types_, output_shapes_, + drop_remainder, output_types_, output_shapes_, std::move(captured_func), &ctx->eigen_cpu_device()); } @@ -79,13 +83,15 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { class Dataset : public DatasetBase { public: Dataset(const DatasetBase* input, int64 batch_size, - int64 num_parallel_batches, const DataTypeVector& output_types, + int64 num_parallel_batches, bool drop_remainder, + const DataTypeVector& output_types, const std::vector& output_shapes, std::unique_ptr captured_func, const Eigen::ThreadPoolDevice* device) : input_(input), batch_size_(batch_size), num_parallel_batches_(num_parallel_batches), + drop_remainder_(drop_remainder), output_types_(output_types), output_shapes_(output_shapes), captured_func_(std::move(captured_func)), @@ -177,6 +183,12 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { batch_results_[current_batch_index_].output.clear(); } else { if (num_elements < dataset()->batch_size_) { + if (dataset()->drop_remainder_) { + // Deallocate tensors allocated for the output. + batch_results_[current_batch_index_].output.clear(); + *end_of_sequence = true; + return Status::OK(); + } const std::vector& output = batch_results_[current_batch_index_].output; for (size_t i = 0; i < output.size(); ++i) { @@ -392,6 +404,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { const NameAttrList func_; const int64 batch_size_; const int64 num_parallel_batches_; + const bool drop_remainder_; const DataTypeVector output_types_; const std::vector output_shapes_; const std::unique_ptr captured_func_; diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 992e943966..ddf7627463 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -24383,6 +24383,10 @@ op { name: "num_parallel_batches" type: DT_INT64 } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } output_arg { name: "handle" type: DT_VARIANT diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index bdbbf6d7c3..f32baee45e 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -166,6 +166,7 @@ REGISTER_OP("MapAndBatchDataset") .Input("other_arguments: Targuments") .Input("batch_size: int64") .Input("num_parallel_batches: int64") + .Input("drop_remainder: bool") .Output("handle: variant") .Attr("f: func") .Attr("Targuments: list(type) >= 0") -- GitLab From 0eaec2864b737b7a278b028a1719d062470f3397 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 21 Mar 2018 11:12:28 -0700 Subject: [PATCH 304/960] Moves KernelAndDevice to common_runtime PiperOrigin-RevId: 189936396 --- tensorflow/BUILD | 1 + tensorflow/c/eager/BUILD | 5 + tensorflow/c/eager/c_api_internal.h | 1 + tensorflow/c/eager/runtime.cc | 133 +++-------------- tensorflow/c/eager/runtime.h | 52 +------ tensorflow/c/eager/runtime_test.cc | 129 ---------------- tensorflow/core/BUILD | 5 +- tensorflow/core/common_runtime/eager/BUILD | 86 +++++++++++ .../common_runtime/eager/kernel_and_device.cc | 132 +++++++++++++++++ .../common_runtime/eager/kernel_and_device.h | 85 +++++++++++ .../eager/kernel_and_device_test.cc | 140 ++++++++++++++++++ 11 files changed, 474 insertions(+), 295 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/BUILD create mode 100644 tensorflow/core/common_runtime/eager/kernel_and_device.cc create mode 100644 tensorflow/core/common_runtime/eager/kernel_and_device.h create mode 100644 tensorflow/core/common_runtime/eager/kernel_and_device_test.cc diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 057ac79961..e0d86997ff 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -591,6 +591,7 @@ filegroup( "//tensorflow/contrib/verbs:all_files", "//tensorflow/core:all_files", "//tensorflow/core/api_def:all_files", + "//tensorflow/core/common_runtime/eager:all_files", "//tensorflow/core/debug:all_files", "//tensorflow/core/distributed_runtime:all_files", "//tensorflow/core/distributed_runtime/rpc:all_files", diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 73a3450e0e..841ff48a38 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -28,6 +28,8 @@ tf_cuda_library( "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", "//tensorflow/core:core_cpu", + "//tensorflow/core/common_runtime/eager:eager_executor", + "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -62,6 +64,8 @@ tf_cuda_library( "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/common_runtime/eager:eager_executor", + "//tensorflow/core/common_runtime/eager:kernel_and_device", ], ) @@ -96,6 +100,7 @@ tf_cuda_library( "//conditions:default": [ "//tensorflow/c:c_api", "//tensorflow/core:core_cpu", + "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index cc5ed48b48..a79f8ddd33 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index 9b46cf8245..abe2793ce8 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/node_def.pb.h" @@ -95,22 +96,6 @@ Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) { return Status::OK(); } -Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, - TF_AttrType* out, unsigned char* is_list) { - auto* t = gtl::FindOrNull(m, attr_name); - if (t == nullptr) { - return errors::InvalidArgument("Attribute '", attr_name, - "' does not exist for this operation"); - } - *out = static_cast(*t & ~kIsList); - if (*t & kIsList) { - *is_list = 1; - } else { - *is_list = 0; - } - return Status::OK(); -} - #define DEFINE_SET_ATTR(value_type, value_field) \ template <> \ AttrBuilder& AttrBuilder::Set(StringPiece attr_name, value_type&& value) { \ @@ -168,6 +153,22 @@ const NodeDef& AttrBuilder::BuildNodeDef() { return *node_def_; } +Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, + TF_AttrType* out, unsigned char* is_list) { + auto* t = gtl::FindOrNull(m, attr_name); + if (t == nullptr) { + return errors::InvalidArgument("Attribute '", attr_name, + "' does not exist for this operation"); + } + *out = static_cast(*t & ~kIsList); + if (*t & kIsList) { + *is_list = 1; + } else { + *is_list = 0; + } + return Status::OK(); +} + namespace { inline tensorflow::Fprint128 FingerprintCat128(const tensorflow::Fprint128& a, const tensorflow::Fprint128& b) { @@ -245,104 +246,4 @@ void AttrBuilder::MayBeInitializeNodeDef() { } } -// static -Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, - KernelAndDevice* out) { - OpKernel* k = nullptr; - Status s = CreateOpKernel(device->device_type().c_str(), device, - device->GetAllocator(AllocatorAttributes()), - nullptr, ndef, TF_GRAPH_DEF_VERSION, &k); - out->device_ = device; - out->kernel_.reset(k); - out->flib_ = nullptr; - return s; -} - -// static -Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - KernelAndDevice* out) { - OpKernel* k = nullptr; - Status s = flib->CreateKernel(ndef, &k); - out->device_ = flib->device(); - out->kernel_.reset(k); - out->flib_ = flib; - return s; -} - -Status KernelAndDevice::Run(std::vector* input_tensors, - std::vector* output_tensors, - NodeExecStats* stats) { - gtl::InlinedVector inputs; - for (Tensor& t : *input_tensors) { - inputs.push_back(TensorValue(&t)); - } - - std::vector out_attrs(kernel_->num_outputs()); - for (size_t i = 0; i < out_attrs.size(); ++i) { - out_attrs[i].set_on_host(kernel_->output_memory_types()[i] == - tensorflow::HOST_MEMORY); - } - - OpKernelContext::Params params; - params.device = device_; - params.frame_iter = FrameAndIter(0, 0); - params.inputs = &inputs; - params.op_kernel = kernel_.get(); - params.resource_manager = device_->resource_manager(); - params.output_attr_array = gtl::vector_as_array(&out_attrs); - params.function_library = flib_; - params.slice_reader_cache = &slice_reader_cache_; - params.rendezvous = rendez_; - if (stats != nullptr) { - params.track_allocations = true; - } - // TODO(apassos): use a thread pool. - std::function)> runner = - [](std::function f) { f(); }; - params.runner = &runner; - - OpKernelContext context(¶ms); - - if (kernel_->def().op() == "_Recv") { - // TODO(apassos) do not special-case _Recv. Currently the GPU device fails - // if trying to run _Recv->Compute(), specifically checking for _Recv. To go - // around this we call _Recv->ComputeAsync, to mimic graph mode behavior. - AsyncOpKernel* async = kernel_->AsAsync(); - Notification done; - device_->ComputeAsync(async, &context, [&done]() { done.Notify(); }); - done.WaitForNotification(); - } else { - device_->Compute(kernel_.get(), &context); - } - if (!context.status().ok()) return context.status(); - - output_tensors->clear(); - for (int i = 0; i < context.num_outputs(); ++i) { - output_tensors->push_back(Tensor(*context.mutable_output(i))); - } - if (stats != nullptr) { - for (const auto& allocator_pair : context.wrapped_allocators()) { - AllocatorMemoryUsed* memory = stats->add_memory(); - memory->set_allocator_name(allocator_pair.first->Name()); - auto sizes = allocator_pair.second->GetSizes(); - memory->set_total_bytes(std::get<0>(sizes)); - memory->set_peak_bytes(std::get<1>(sizes)); - memory->set_live_bytes(std::get<2>(sizes)); - - AllocatorStats allocator_stats; - allocator_pair.first->GetStats(&allocator_stats); - memory->set_allocator_bytes_in_use(allocator_stats.bytes_in_use); - allocator_pair.second->GetRecordsAndUnRef(); - } - auto* ms = stats->mutable_memory_stats(); - ms->set_temp_memory_size(context.temp_memory_allocated()); - for (const auto& alloc_id : context.persistent_alloc_ids()) { - ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id); - } - - ms->set_persistent_memory_size(context.persistent_memory_allocated()); - } - return Status::OK(); -} - } // namespace tensorflow diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index ad16f65495..929b1b8296 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/c/c_api.h" #include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" @@ -45,6 +46,10 @@ Status OpDefForOp(const char* op_name, const OpDef** op_def); // Returns the AttrTypeMap for the TensorFlow operation named op_name. Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out); +// Looks for 'attr_name' in 'm' and sets 'out' and 'is_list'. +Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, + TF_AttrType* out, unsigned char* is_list); + // Looks for 'attr_name' in 'm' and sets 'out' and 'is_list'. Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, TF_AttrType* out, unsigned char* is_list); @@ -149,53 +154,6 @@ template <> AttrBuilder& AttrBuilder::Set(StringPiece attr_name, tensorflow::DataType&& value); -// KernelAndDevice encapsulates an instantiated kernel and the device it is on. -// -// Also see: -// https://www.tensorflow.org/code/tensorflow/core/common_runtime/kernel_benchmark_testlib.h -// and -// https://www.tensorflow.org/code/tensorflow/core/kernels/ops_testutil.h -class KernelAndDevice { - public: - // Populates 'out' with a kernel appropriate for 'ndef'. - // - // The provided FunctionLibraryRuntime MUST outlive all calls to - // Run() on the returned KernelAndDevice. - // - // TODO(ashankar): Figure out thread-safety concerns around - // FunctionLibraryRuntime (in particular, how the underlying - // FunctionLibraryDefinition might be mutated by another thread as new - // functions are registered with it). Conservatively, thread-safe usage of - // the FunctionLibraryRuntime is pushed on to the caller (see locking in - // c_api.cc). - static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - KernelAndDevice* out); - // TODO(ashankar): Remove this - static Status InitOp(Device* device, const NodeDef& ndef, - KernelAndDevice* out); - - KernelAndDevice(tensorflow::Rendezvous* rendez) - : device_(nullptr), flib_(nullptr), rendez_(rendez) {} - - // TODO(ashankar): Handle list-valued inputs. - Status Run(std::vector* inputs, std::vector* outputs, - NodeExecStats* stats); - - const OpKernel* kernel() const { return kernel_.get(); } - - Device* device() const { return device_; } - - DataTypeVector* mutable_output_dtypes() { return &output_dtypes_; } - const DataTypeVector& output_dtypes() { return output_dtypes_; } - - private: - std::unique_ptr kernel_; - Device* device_; - FunctionLibraryRuntime* flib_; - checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; - Rendezvous* rendez_; - DataTypeVector output_dtypes_; -}; } // namespace tensorflow diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index 4f75d27887..27ebeb0508 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -33,27 +33,6 @@ limitations under the License. namespace tensorflow { namespace { -class TestEnv { - public: - TestEnv() : flib_def_(OpRegistry::Global(), {}) { - Device* device = - DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0"); - device_mgr_.reset(new DeviceMgr({device})); - flib_runtime_ = NewFunctionLibraryRuntime(device_mgr_.get(), Env::Default(), - device, TF_GRAPH_DEF_VERSION, - &flib_def_, nullptr, {}, nullptr); - } - - FunctionLibraryRuntime* function_library_runtime() const { - return flib_runtime_.get(); - } - - private: - FunctionLibraryDefinition flib_def_; - std::unique_ptr device_mgr_; - std::unique_ptr flib_runtime_; -}; - TEST(AttrTypeMap, Lookup) { const AttrTypeMap* m = nullptr; Status s = AttrTypeMapForOp("ThisOpCannotPossiblyExist", &m); @@ -79,113 +58,5 @@ TEST(AttrTypeMap, Lookup) { EXPECT_NE(is_list, 0); } -TEST(KernelAndDevice, Run) { - Tensor t(Input({{1.0f, 2.0f}, {3.0f, 4.0f}}).tensor()); - std::vector inputs; - inputs.push_back(t); - inputs.push_back(t); - NodeDef ndef(AttrBuilder("MatMul") - .Set("T", DT_FLOAT) - .Set("transpose_a", false) - .Set("transpose_b", false) - .NumInputs(inputs.size()) - .BuildNodeDef()); - TestEnv env; - KernelAndDevice kernel(nullptr); - Status s = - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); - ASSERT_TRUE(s.ok()) << s; - std::vector outputs; - s = kernel.Run(&inputs, &outputs, nullptr); - ASSERT_TRUE(s.ok()) << s; - ASSERT_EQ(1, outputs.size()); - const Tensor& out = outputs[0]; - EXPECT_EQ(7, out.matrix()(0, 0)); - EXPECT_EQ(10, out.matrix()(0, 1)); - EXPECT_EQ(15, out.matrix()(1, 0)); - EXPECT_EQ(22, out.matrix()(1, 1)); -} - -void BM_CreateGraph(int iters) { - for (int i = 0; i < iters; ++i) { - Scope root = Scope::NewRootScope(); - auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}}); - auto M = ops::MatMul(root, C, C); - TF_CHECK_OK(root.status()); - } -} -BENCHMARK(BM_CreateGraph); - -void BM_RunGraph(int iters) { - tensorflow::testing::StopTiming(); - Scope root = Scope::NewRootScope(); - auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}}); - auto M = ops::MatMul(root, C, C); - SessionOptions opts; - opts.config.set_inter_op_parallelism_threads(1); - opts.config.set_intra_op_parallelism_threads(1); - ClientSession sess(root, opts); - std::vector outputs; - tensorflow::testing::StartTiming(); - for (int i = 0; i < iters; ++i) { - outputs.clear(); - TF_CHECK_OK(sess.Run({M}, &outputs)); - } -} -BENCHMARK(BM_RunGraph); - -void BM_CreateAndDestroySession(int iters) { - tensorflow::testing::StopTiming(); - Scope root = Scope::NewRootScope(); - auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}}); - auto M = ops::MatMul(root, C, C); - tensorflow::testing::StartTiming(); - for (int i = 0; i < iters; ++i) { - ClientSession sess(root); - } -} -BENCHMARK(BM_CreateAndDestroySession); - -void BM_KernelAndDeviceInit(int iters) { - tensorflow::testing::StopTiming(); - NodeDef ndef(AttrBuilder("MatMul") - .Set("T", DT_FLOAT) - .Set("transpose_a", false) - .Set("transpose_b", false) - .NumInputs(2) - .BuildNodeDef()); - TestEnv env; - KernelAndDevice k(nullptr); - tensorflow::testing::StartTiming(); - for (int i = 0; i < iters; ++i) { - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); - } -} -BENCHMARK(BM_KernelAndDeviceInit); - -void BM_KernelAndDeviceRun(int iters) { - tensorflow::testing::StopTiming(); - Tensor t(Input({{1.0f, 2.0f}, {3.0f, 4.0f}}).tensor()); - std::vector inputs; - inputs.push_back(t); - inputs.push_back(t); - std::vector outputs; - NodeDef ndef(AttrBuilder("MatMul") - .Set("T", DT_FLOAT) - .Set("transpose_a", false) - .Set("transpose_b", false) - .NumInputs(inputs.size()) - .BuildNodeDef()); - TestEnv env; - KernelAndDevice kernel(nullptr); - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); - tensorflow::testing::StartTiming(); - for (int i = 0; i < iters; ++i) { - TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); - } -} -BENCHMARK(BM_KernelAndDeviceRun); } // namespace } // namespace tensorflow diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 8124280914..42d222ff6b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -794,7 +794,6 @@ tf_cuda_library( hdrs = [ "common_runtime/device.h", "common_runtime/device_factory.h", - "common_runtime/eager/eager_executor.h", "common_runtime/optimization_registry.h", "common_runtime/shape_refiner.h", "graph/algorithm.h", @@ -1038,6 +1037,7 @@ filegroup( "util/tensor_bundle/*.h", "util/tensor_bundle/*.cc", "common_runtime/gpu/**/*", + "common_runtime/eager/*", "common_runtime/gpu_device_factory.*", ], ), @@ -1063,6 +1063,7 @@ filegroup( "**/*testlib*", "**/*main.cc", "common_runtime/gpu/**/*", + "common_runtime/eager/*", "common_runtime/gpu_device_factory.*", "graph/dot.*", ], @@ -2150,7 +2151,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/stats_publisher_interface.h", "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", - "common_runtime/eager/eager_executor.h", "graph/gradients.h", "graph/quantize_training.h", ] + if_mkl(["graph/mkl_graph_util.h"]) @@ -2170,7 +2170,6 @@ tf_cuda_library( "common_runtime/device_factory.cc", "common_runtime/device_mgr.cc", "common_runtime/device_set.cc", - "common_runtime/eager/eager_executor.cc", "common_runtime/executor.cc", "common_runtime/function.cc", "common_runtime/graph_optimizer.cc", diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD new file mode 100644 index 0000000000..8ba560bef8 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -0,0 +1,86 @@ +package( + default_visibility = [ + "//tensorflow:internal", + "//tensorflow_models:__subpackages__", + ], +) + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", + "tf_cuda_library", +) + +tf_cuda_library( + name = "eager_executor", + srcs = [ + "eager_executor.cc", + ], + hdrs = [ + "eager_executor.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cuda_library( + name = "kernel_and_device", + srcs = [ + "kernel_and_device.cc", + ], + hdrs = [ + "kernel_and_device.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cc_test( + name = "kernel_and_device_test", + srcs = ["kernel_and_device_test.cc"], + deps = [ + ":kernel_and_device", + "//tensorflow/c/eager:runtime", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:client_session", + "//tensorflow/cc:ops", + "//tensorflow/cc:scope", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +# ----------------------------------------------------------------------------- +# Google-internal targets. + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc new file mode 100644 index 0000000000..0a4895a938 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -0,0 +1,132 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" + +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/tensor_slice_reader_cache.h" + +namespace tensorflow { + +// static +Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, + KernelAndDevice* out) { + OpKernel* k = nullptr; + Status s = CreateOpKernel(device->device_type().c_str(), device, + device->GetAllocator(AllocatorAttributes()), + nullptr, ndef, TF_GRAPH_DEF_VERSION, &k); + out->device_ = device; + out->kernel_.reset(k); + out->flib_ = nullptr; + return s; +} + +// static +Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + KernelAndDevice* out) { + OpKernel* k = nullptr; + Status s = flib->CreateKernel(ndef, &k); + out->device_ = flib->device(); + out->kernel_.reset(k); + out->flib_ = flib; + return s; +} + +Status KernelAndDevice::Run(std::vector* input_tensors, + std::vector* output_tensors, + NodeExecStats* stats) { + gtl::InlinedVector inputs; + for (Tensor& t : *input_tensors) { + inputs.push_back(TensorValue(&t)); + } + + std::vector out_attrs(kernel_->num_outputs()); + for (size_t i = 0; i < out_attrs.size(); ++i) { + out_attrs[i].set_on_host(kernel_->output_memory_types()[i] == + tensorflow::HOST_MEMORY); + } + + OpKernelContext::Params params; + params.device = device_; + params.frame_iter = FrameAndIter(0, 0); + params.inputs = &inputs; + params.op_kernel = kernel_.get(); + params.resource_manager = device_->resource_manager(); + params.output_attr_array = gtl::vector_as_array(&out_attrs); + params.function_library = flib_; + params.slice_reader_cache = &slice_reader_cache_; + params.rendezvous = rendez_; + if (stats != nullptr) { + params.track_allocations = true; + } + // TODO(apassos): use a thread pool. + std::function)> runner = + [](std::function f) { f(); }; + params.runner = &runner; + + OpKernelContext context(¶ms); + + if (kernel_->def().op() == "_Recv") { + // TODO(apassos) do not special-case _Recv. Currently the GPU device fails + // if trying to run _Recv->Compute(), specifically checking for _Recv. To go + // around this we call _Recv->ComputeAsync, to mimic graph mode behavior. + AsyncOpKernel* async = kernel_->AsAsync(); + Notification done; + device_->ComputeAsync(async, &context, [&done]() { done.Notify(); }); + done.WaitForNotification(); + } else { + device_->Compute(kernel_.get(), &context); + } + if (!context.status().ok()) return context.status(); + + output_tensors->clear(); + for (int i = 0; i < context.num_outputs(); ++i) { + output_tensors->push_back(Tensor(*context.mutable_output(i))); + } + if (stats != nullptr) { + for (const auto& allocator_pair : context.wrapped_allocators()) { + AllocatorMemoryUsed* memory = stats->add_memory(); + memory->set_allocator_name(allocator_pair.first->Name()); + auto sizes = allocator_pair.second->GetSizes(); + memory->set_total_bytes(std::get<0>(sizes)); + memory->set_peak_bytes(std::get<1>(sizes)); + memory->set_live_bytes(std::get<2>(sizes)); + + AllocatorStats allocator_stats; + allocator_pair.first->GetStats(&allocator_stats); + memory->set_allocator_bytes_in_use(allocator_stats.bytes_in_use); + allocator_pair.second->GetRecordsAndUnRef(); + } + auto* ms = stats->mutable_memory_stats(); + ms->set_temp_memory_size(context.temp_memory_allocated()); + for (const auto& alloc_id : context.persistent_alloc_ids()) { + ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id); + } + + ms->set_persistent_memory_size(context.persistent_memory_allocated()); + } + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h new file mode 100644 index 0000000000..46ec550c78 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -0,0 +1,85 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_KERNEL_AND_DEVICE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_KERNEL_AND_DEVICE_H_ + +// Support for eager execution of TensorFlow kernels. + +#include +#include + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/util/tensor_slice_reader_cache.h" + +namespace tensorflow { + +// KernelAndDevice encapsulates an instantiated kernel and the device it is on. +// +// Also see: +// https://www.tensorflow.org/code/tensorflow/core/common_runtime/kernel_benchmark_testlib.h +// and +// https://www.tensorflow.org/code/tensorflow/core/kernels/ops_testutil.h +class KernelAndDevice { + public: + // Populates 'out' with a kernel appropriate for 'ndef'. + // + // The provided FunctionLibraryRuntime MUST outlive all calls to + // Run() on the returned KernelAndDevice. + // + // TODO(ashankar): Figure out thread-safety concerns around + // FunctionLibraryRuntime (in particular, how the underlying + // FunctionLibraryDefinition might be mutated by another thread as new + // functions are registered with it). Conservatively, thread-safe usage of + // the FunctionLibraryRuntime is pushed on to the caller (see locking in + // c_api.cc). + static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + KernelAndDevice* out); + // TODO(ashankar): Remove this + static Status InitOp(Device* device, const NodeDef& ndef, + KernelAndDevice* out); + + KernelAndDevice(tensorflow::Rendezvous* rendez) + : device_(nullptr), flib_(nullptr), rendez_(rendez) {} + + // TODO(ashankar): Handle list-valued inputs. + Status Run(std::vector* inputs, std::vector* outputs, + NodeExecStats* stats); + + const OpKernel* kernel() const { return kernel_.get(); } + + Device* device() const { return device_; } + + DataTypeVector* mutable_output_dtypes() { return &output_dtypes_; } + const DataTypeVector& output_dtypes() { return output_dtypes_; } + + private: + std::unique_ptr kernel_; + Device* device_; + FunctionLibraryRuntime* flib_; + checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; + Rendezvous* rendez_; + DataTypeVector output_dtypes_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_KERNEL_AND_DEVICE_H_ diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc b/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc new file mode 100644 index 0000000000..dd055c3c3e --- /dev/null +++ b/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc @@ -0,0 +1,140 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" + +#include +#include + +#include "tensorflow/c/eager/runtime.h" +#include "tensorflow/cc/client/client_session.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { +namespace { + +class TestEnv { + public: + TestEnv() : flib_def_(OpRegistry::Global(), {}) { + Device* device = + DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0"); + device_mgr_.reset(new DeviceMgr({device})); + flib_runtime_ = NewFunctionLibraryRuntime(device_mgr_.get(), Env::Default(), + device, TF_GRAPH_DEF_VERSION, + &flib_def_, nullptr, {}, nullptr); + } + + FunctionLibraryRuntime* function_library_runtime() const { + return flib_runtime_.get(); + } + + private: + FunctionLibraryDefinition flib_def_; + std::unique_ptr device_mgr_; + std::unique_ptr flib_runtime_; +}; + +void BM_CreateGraph(int iters) { + for (int i = 0; i < iters; ++i) { + Scope root = Scope::NewRootScope(); + auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}}); + auto M = ops::MatMul(root, C, C); + TF_CHECK_OK(root.status()); + } +} +BENCHMARK(BM_CreateGraph); + +void BM_RunGraph(int iters) { + tensorflow::testing::StopTiming(); + Scope root = Scope::NewRootScope(); + auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}}); + auto M = ops::MatMul(root, C, C); + SessionOptions opts; + opts.config.set_inter_op_parallelism_threads(1); + opts.config.set_intra_op_parallelism_threads(1); + ClientSession sess(root, opts); + std::vector outputs; + tensorflow::testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + outputs.clear(); + TF_CHECK_OK(sess.Run({M}, &outputs)); + } +} +BENCHMARK(BM_RunGraph); + +void BM_CreateAndDestroySession(int iters) { + tensorflow::testing::StopTiming(); + Scope root = Scope::NewRootScope(); + auto C = ops::Const(root, {{1.0, 2.0}, {3.0, 4.0}}); + auto M = ops::MatMul(root, C, C); + tensorflow::testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + ClientSession sess(root); + } +} +BENCHMARK(BM_CreateAndDestroySession); + +void BM_KernelAndDeviceInit(int iters) { + tensorflow::testing::StopTiming(); + NodeDef ndef(AttrBuilder("MatMul") + .Set("T", DT_FLOAT) + .Set("transpose_a", false) + .Set("transpose_b", false) + .NumInputs(2) + .BuildNodeDef()); + TestEnv env; + KernelAndDevice k(nullptr); + tensorflow::testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); + } +} +BENCHMARK(BM_KernelAndDeviceInit); + +void BM_KernelAndDeviceRun(int iters) { + tensorflow::testing::StopTiming(); + Tensor t(Input({{1.0f, 2.0f}, {3.0f, 4.0f}}).tensor()); + std::vector inputs; + inputs.push_back(t); + inputs.push_back(t); + std::vector outputs; + NodeDef ndef(AttrBuilder("MatMul") + .Set("T", DT_FLOAT) + .Set("transpose_a", false) + .Set("transpose_b", false) + .NumInputs(inputs.size()) + .BuildNodeDef()); + TestEnv env; + KernelAndDevice kernel(nullptr); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); + tensorflow::testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); + } +} +BENCHMARK(BM_KernelAndDeviceRun); +} // namespace +} // namespace tensorflow -- GitLab From 0a7f511aff6ef8900a9a56cd3207508e3cd8ec8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 11:16:13 -0700 Subject: [PATCH 305/960] Allowing dnn tree combined estimator to work with core versions of feature columns and losses PiperOrigin-RevId: 189937063 --- .../boosted_trees/estimator_batch/BUILD | 2 +- .../dnn_tree_combined_estimator.py | 213 ++++++++++++++---- .../dnn_tree_combined_estimator_test.py | 33 ++- 3 files changed, 200 insertions(+), 48 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index 289f5bb314..dae402204f 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -149,7 +149,7 @@ py_library( py_test( name = "dnn_tree_combined_estimator_test", - size = "small", + size = "medium", srcs = ["dnn_tree_combined_estimator_test.py"], srcs_version = "PY2AND3", tags = [ diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py index cec3892b57..2e7b8cba05 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py @@ -25,15 +25,20 @@ from __future__ import division from __future__ import print_function import six - from tensorflow.contrib import layers from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch from tensorflow.contrib.layers.python.layers import optimizers +from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.contrib.learn.python.learn.estimators import model_fn +from tensorflow.contrib.learn.python.learn.estimators import model_fn as contrib_model_fn_lib +from tensorflow.contrib.learn.python.learn.estimators import prediction_key +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator.export import export_output +from tensorflow.python.feature_column import feature_column as feature_column_lib from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import nn @@ -46,6 +51,52 @@ from tensorflow.python.training import training_util _DNN_LEARNING_RATE = 0.001 +_CORE_MODE_TO_CONTRIB_MODE_ = { + model_fn_lib.ModeKeys.TRAIN: contrib_model_fn_lib.ModeKeys.TRAIN, + model_fn_lib.ModeKeys.EVAL: contrib_model_fn_lib.ModeKeys.EVAL, + model_fn_lib.ModeKeys.PREDICT: contrib_model_fn_lib.ModeKeys.INFER +} + + +def _core_mode_to_contrib_mode(mode): + return _CORE_MODE_TO_CONTRIB_MODE_[mode] + + +def _export_outputs_to_output_alternatives(export_outputs): + """Converts EstimatorSpec.export_outputs to output_alternatives. + + Args: + export_outputs: export_outputs created by create_estimator_spec. + Returns: + converted output_alternatives. + """ + output = dict() + if export_outputs is not None: + for key, value in export_outputs.items(): + if isinstance(value, export_output.ClassificationOutput): + exported_predictions = { + prediction_key.PredictionKey.SCORES: value.scores, + prediction_key.PredictionKey.CLASSES: value.classes + } + output[key] = (constants.ProblemType.CLASSIFICATION, + exported_predictions) + return output + return None + + +def _estimator_spec_to_model_fn_ops(estimator_spec, is_regression): + alternatives = [] + if not is_regression: + _export_outputs_to_output_alternatives(estimator_spec.export_outputs) + + return model_fn.ModelFnOps( + mode=_core_mode_to_contrib_mode(estimator_spec.mode), + predictions=estimator_spec.predictions, + loss=estimator_spec.loss, + train_op=estimator_spec.train_op, + eval_metric_ops=estimator_spec.eval_metric_ops, + output_alternatives=alternatives) + def _get_optimizer(optimizer): if callable(optimizer): @@ -59,16 +110,26 @@ def _add_hidden_layer_summary(value, tag): summary.histogram("%s_activation" % tag, value) -def _dnn_tree_combined_model_fn( - features, labels, mode, head, dnn_hidden_units, - dnn_feature_columns, tree_learner_config, num_trees, - tree_examples_per_layer, - config=None, dnn_optimizer="Adagrad", - dnn_activation_fn=nn.relu, dnn_dropout=None, - dnn_input_layer_partitioner=None, - dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, - tree_feature_columns=None, - tree_center_bias=True): +def _dnn_tree_combined_model_fn(features, + labels, + mode, + head, + dnn_hidden_units, + dnn_feature_columns, + tree_learner_config, + num_trees, + tree_examples_per_layer, + config=None, + dnn_optimizer="Adagrad", + dnn_activation_fn=nn.relu, + dnn_dropout=None, + dnn_input_layer_partitioner=None, + dnn_input_layer_to_tree=True, + dnn_steps_to_train=10000, + tree_feature_columns=None, + tree_center_bias=False, + use_core_versions=False, + is_regression=False): """DNN and GBDT combined model_fn. Args: @@ -106,6 +167,9 @@ def _dnn_tree_combined_model_fn( set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. + use_core_versions: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. + is_regression: Whether the problem is regression or not. Returns: A `ModelFnOps` object. @@ -135,11 +199,17 @@ def _dnn_tree_combined_model_fn( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=dnn_partitioner) as input_layer_scope: - input_layer = layers.input_from_feature_columns( - columns_to_tensors=features, - feature_columns=dnn_feature_columns, - weight_collections=[dnn_parent_scope], - scope=input_layer_scope) + if use_core_versions: + input_layer = feature_column_lib.input_layer( + features=features, + feature_columns=dnn_feature_columns, + weight_collections=[dnn_parent_scope]) + else: + input_layer = layers.input_from_feature_columns( + columns_to_tensors=features, + feature_columns=dnn_feature_columns, + weight_collections=[dnn_parent_scope], + scope=input_layer_scope) previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( @@ -222,24 +292,51 @@ def _dnn_tree_combined_model_fn( del loss return control_flow_ops.no_op() - model_fn_ops = head.create_model_fn_ops( - features=features, - mode=mode, - labels=labels, - train_op_fn=_no_train_op_fn, - logits=tree_train_logits) - dnn_train_op = head.create_model_fn_ops( - features=features, - mode=mode, - labels=labels, - train_op_fn=_dnn_train_op_fn, - logits=dnn_logits).train_op - tree_train_op = head.create_model_fn_ops( - features=tree_features, - mode=mode, - labels=labels, - train_op_fn=_tree_train_op_fn, - logits=tree_train_logits).train_op + if use_core_versions: + model_fn_ops = head.create_estimator_spec( + features=features, + mode=mode, + labels=labels, + train_op_fn=_no_train_op_fn, + logits=tree_train_logits) + dnn_train_op = head.create_estimator_spec( + features=features, + mode=mode, + labels=labels, + train_op_fn=_dnn_train_op_fn, + logits=dnn_logits) + dnn_train_op = _estimator_spec_to_model_fn_ops(dnn_train_op, + is_regression).train_op + + tree_train_op = head.create_estimator_spec( + features=tree_features, + mode=mode, + labels=labels, + train_op_fn=_tree_train_op_fn, + logits=tree_train_logits) + tree_train_op = _estimator_spec_to_model_fn_ops(tree_train_op, + is_regression).train_op + + model_fn_ops = _estimator_spec_to_model_fn_ops(model_fn_ops, is_regression) + else: + model_fn_ops = head.create_model_fn_ops( + features=features, + mode=mode, + labels=labels, + train_op_fn=_no_train_op_fn, + logits=tree_train_logits) + dnn_train_op = head.create_model_fn_ops( + features=features, + mode=mode, + labels=labels, + train_op_fn=_dnn_train_op_fn, + logits=dnn_logits).train_op + tree_train_op = head.create_model_fn_ops( + features=tree_features, + mode=mode, + labels=labels, + train_op_fn=_tree_train_op_fn, + logits=tree_train_logits).train_op if tree_center_bias: num_trees += 1 @@ -277,7 +374,8 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator): dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, - tree_center_bias=True): + tree_center_bias=False, + use_core_versions=False): """Initializes a DNNBoostedTreeCombinedClassifier instance. Args: @@ -322,6 +420,8 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator): set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. + use_core_versions: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. """ head = head_lib.multi_class_head( n_classes=n_classes, @@ -336,8 +436,8 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator): tree_learner_config, num_trees, tree_examples_per_layer, config, dnn_optimizer, dnn_activation_fn, dnn_dropout, dnn_input_layer_partitioner, dnn_input_layer_to_tree, - dnn_steps_to_train, - tree_feature_columns, tree_center_bias) + dnn_steps_to_train, tree_feature_columns, tree_center_bias, + use_core_versions) super(DNNBoostedTreeCombinedClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, @@ -366,7 +466,8 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, - tree_center_bias=True): + tree_center_bias=False, + use_core_versions=False): """Initializes a DNNBoostedTreeCombinedRegressor instance. Args: @@ -411,6 +512,8 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. + use_core_versions: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. """ head = head_lib.regression_head( label_name=label_name, @@ -426,11 +529,26 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator): def _model_fn(features, labels, mode, config): return _dnn_tree_combined_model_fn( - features, labels, mode, head, dnn_hidden_units, dnn_feature_columns, - tree_learner_config, num_trees, tree_examples_per_layer, config, - dnn_optimizer, dnn_activation_fn, dnn_dropout, - dnn_input_layer_partitioner, dnn_input_layer_to_tree, - dnn_steps_to_train, tree_feature_columns, tree_center_bias) + features, + labels, + mode, + head, + dnn_hidden_units, + dnn_feature_columns, + tree_learner_config, + num_trees, + tree_examples_per_layer, + config, + dnn_optimizer, + dnn_activation_fn, + dnn_dropout, + dnn_input_layer_partitioner, + dnn_input_layer_to_tree, + dnn_steps_to_train, + tree_feature_columns, + tree_center_bias, + use_core_versions, + is_regression=True) super(DNNBoostedTreeCombinedRegressor, self).__init__( model_fn=_model_fn, model_dir=model_dir, @@ -460,7 +578,8 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator): dnn_input_layer_to_tree=True, dnn_steps_to_train=10000, tree_feature_columns=None, - tree_center_bias=True): + tree_center_bias=False, + use_core_versions=False): """Initializes a DNNBoostedTreeCombinedEstimator instance. Args: @@ -500,6 +619,8 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator): set to True, these features are in addition to dnn_feature_columns. tree_center_bias: Whether a separate tree should be created for first fitting the bias. + use_core_versions: Whether feature columns and loss are from the core (as + opposed to contrib) version of tensorflow. """ def _model_fn(features, labels, mode, config): return _dnn_tree_combined_model_fn( @@ -507,8 +628,8 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator): tree_learner_config, num_trees, tree_examples_per_layer, config, dnn_optimizer, dnn_activation_fn, dnn_dropout, dnn_input_layer_partitioner, dnn_input_layer_to_tree, - dnn_steps_to_train, - tree_feature_columns, tree_center_bias) + dnn_steps_to_train, tree_feature_columns, tree_center_bias, + use_core_versions) super(DNNBoostedTreeCombinedEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py index 83d58c5610..f495edc62f 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py @@ -19,15 +19,17 @@ from __future__ import division from __future__ import print_function import tempfile - from tensorflow.contrib.boosted_trees.estimator_batch import dnn_tree_combined_estimator as estimator from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.layers.python.layers import feature_column from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils from tensorflow.contrib.learn.python.learn.estimators import run_config +from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util +from tensorflow.python.ops.losses import losses from tensorflow.python.platform import googletest @@ -100,6 +102,35 @@ class DNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase): classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) + def testFitAndEvaluateDontThrowExceptionWithCore(self): + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 1 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + # Use core head + head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) + + classifier = estimator.DNNBoostedTreeCombinedEstimator( + head=head_fn, + dnn_hidden_units=[1], + # Use core feature columns + dnn_feature_columns=[core_feature_column.numeric_column("x")], + tree_learner_config=learner_config, + num_trees=1, + tree_examples_per_layer=3, + model_dir=model_dir, + config=config, + dnn_steps_to_train=10, + dnn_input_layer_to_tree=True, + tree_feature_columns=[], + use_core_versions=True) + + classifier.fit(input_fn=_train_input_fn, steps=15) + classifier.evaluate(input_fn=_eval_input_fn, steps=1) + if __name__ == "__main__": googletest.main() -- GitLab From 6832756b26af035535c9349fec9cad77091584ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 11:18:02 -0700 Subject: [PATCH 306/960] Update ops-related pbtxt files. PiperOrigin-RevId: 189937564 --- tensorflow/core/ops/ops.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 3beebdc6d4..72326e1137 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -11959,6 +11959,10 @@ op { name: "num_parallel_batches" type: DT_INT64 } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } output_arg { name: "handle" type: DT_VARIANT -- GitLab From 7742071501609cf1d5bcf552193ced245e9a290e Mon Sep 17 00:00:00 2001 From: Frank Perbet Date: Wed, 21 Mar 2018 11:40:25 -0700 Subject: [PATCH 307/960] Make the graph_editor C-API friendly: always construct ops with their inputs. PiperOrigin-RevId: 189941495 --- .../graph_editor/tests/transform_test.py | 52 ++++++- tensorflow/contrib/graph_editor/transform.py | 146 ++++++++++++++---- tensorflow/contrib/graph_editor/util.py | 29 ++-- tensorflow/python/framework/ops.py | 7 +- 4 files changed, 186 insertions(+), 48 deletions(-) diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py index ca00394388..2603de6407 100644 --- a/tensorflow/contrib/graph_editor/tests/transform_test.py +++ b/tensorflow/contrib/graph_editor/tests/transform_test.py @@ -23,6 +23,7 @@ from tensorflow.contrib import graph_editor as ge from tensorflow.contrib.graph_editor.tests import match from tensorflow.python.client import session from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -84,9 +85,9 @@ class TransformTest(test.TestCase): def test_transform(self): transformer = ge.Transformer() - def my_transform_op_handler(info, op): + def my_transform_op_handler(info, op, new_inputs): add_noise = op.name.startswith("Add") - op_, op_outputs_ = ge.transform.copy_op_handler(info, op) + op_, op_outputs_ = ge.transform.copy_op_handler(info, op, new_inputs) if not add_noise: return op_, op_outputs_ # add some noise to op @@ -201,15 +202,56 @@ class TransformTest(test.TestCase): get_operation_by_name("res/grad/mul1_grad/Mul_1")) # Make sure _original_ops are as expected. - self.assertEquals(original_mul1_grad._original_op.name, u"mul1") - self.assertEquals(result_mul1_grad._original_op.name, u"res/mul1") - self.assertNotEquals(res.name, g.name) + self.assertEqual(original_mul1_grad._original_op.name, u"mul1") + self.assertEqual(result_mul1_grad._original_op.name, u"res/mul1") + self.assertNotEqual(res.name, g.name) with session.Session() as sess: sess.run(variables.global_variables_initializer()) g_val, res_val = sess.run([g, res]) self.assertNear(g_val, 0.0, ERROR_TOLERANCE) self.assertNear(res_val, 0.0, ERROR_TOLERANCE) + def test_graph_while_loop(self): + graph = ops.Graph() + with graph.as_default(): + max_index = array_ops.placeholder(dtype=dtypes.int32, shape=tuple()) + index_start = constant_op.constant(1) + sum_start = constant_op.constant(0) + _, result = control_flow_ops.while_loop( + cond=lambda i, unused_s: i <= max_index, + body=lambda i, s: (i + 1, s + i), + loop_vars=[index_start, sum_start]) + copied_graph = ops.Graph() + _, copy_info = ge.copy( + graph, dst_graph=copied_graph, dst_scope="imported") + copied_result = copy_info.transformed(result) + copied_max_index = copy_info.transformed(max_index) + with copied_graph.as_default(): + with session.Session() as sess: + n = 10 + sum_val = sess.run(copied_result, feed_dict={copied_max_index: n}) + self.assertEqual(sum_val, 55) + + def test_graph_cond(self): + graph = ops.Graph() + with graph.as_default(): + choice = array_ops.placeholder(shape=(), dtype=dtypes.bool) + result = control_flow_ops.cond( + choice, + lambda: constant_op.constant(1), + lambda: constant_op.constant(2)) + copied_graph = ops.Graph() + _, copy_info = ge.copy( + graph, dst_graph=copied_graph, dst_scope="imported") + copied_result = copy_info.transformed(result) + copied_choice = copy_info.transformed(choice) + with copied_graph.as_default(): + with session.Session() as sess: + res = sess.run(copied_result, feed_dict={copied_choice: True}) + self.assertEqual(res, 1) + res = sess.run(copied_result, feed_dict={copied_choice: False}) + self.assertEqual(res, 2) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index 14ac529665..d8a48387a7 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -129,20 +129,26 @@ def transform_op_if_inside_handler(info, op, keep_if_possible=True): return None -def copy_op_handler(info, op, copy_shape=True): +def copy_op_handler(info, op, new_inputs, copy_shape=True): """Copy a `tf.Operation`. Args: info: Transform._TmpInfo instance. op: the `tf.Operation` to be copied. + new_inputs: The new inputs for this op. copy_shape: also copy the shape of the tensor Returns: A `(op, op_outputs)` tuple containing the transformed op and its outputs. """ + # The `new_inputs` was added to this function. For compatibility reason, + # let's raise an error if `new_inputs` is a boolean. + if isinstance(new_inputs, bool): + raise TypeError("the `new_inputs` argument must be an iterable.") + # pylint: disable=protected-access # Clone the node def: - node_def_ = deepcopy(op._node_def) + node_def_ = deepcopy(op.node_def) # Transform name: name_ = info.new_name(op.name) @@ -155,10 +161,10 @@ def copy_op_handler(info, op, copy_shape=True): # Make a copy of the op_def too. # Its unique to every _type_ of Operation. - op_def_ = deepcopy(op._op_def) + op_def_ = deepcopy(op.op_def) # Initialize a new Operation instance - op_ = tf_ops.Operation(node_def_, info.graph_, [], output_types_, + op_ = tf_ops.Operation(node_def_, info.graph_, new_inputs, output_types_, [], input_types_, None, op_def_) # copy the shape over @@ -170,6 +176,7 @@ def copy_op_handler(info, op, copy_shape=True): # attribute to exist, we will create a dummy original_op first and then # later finalise it with the actual original_op when all the ops have # been copied. + # TODO(fkp): Stop worrying about _original_op and remove this code? if op._original_op: op_._original_op = op._original_op @@ -328,6 +335,14 @@ class _TmpInfo(object): for key in self.graph.get_all_collection_keys()) self.cyclic_ops = [] self.transform_original_op_handler = transform_op_if_inside_handler + # The graph is transformed op by op, in the same order the original ops + # were created. However, this is sometimes not possible due to cycles + # (i.e. while loops). So when the transformer creates a new op whose + # inputs do not exist yet, temporary placeholders are created and stored + # in this `tmp_cyclic_ts` container. During a second pass, + # those temporary tensors are replaced by the proper transformed tensors + # (see the function `_finalize_cycles`). + self.tmp_cyclic_ts = [] def new_name(self, name): """Compute a destination name from a source name. @@ -428,10 +443,10 @@ class Transformer(object): # Create temporary info used during this transform call info = _TmpInfo(sgv, dst_graph, dst_scope, src_scope) - info.transform_original_op_handler = self.transform_original_op_handler self._copy_ops(info) - self._connect_ops(info) + self._finalize_cycles(info) + self._connect_control_inputs(info) # Compute information about the transformation res_info = TransformerInfo(info) @@ -440,10 +455,10 @@ class Transformer(object): def _copy_ops(self, info): """Copy ops without connecting them.""" - for op in info.sgv.ops: - logging.debug("Copying op: %s", op.name) - # TODO(fkp): return a subgraph? - op_, op_outputs_ = self.transform_op_handler(info, op) + sorted_ops = sorted(info.sgv.ops, key=lambda op: op._id) # pylint: disable=protected-access + for op in sorted_ops: + new_inputs = [self._transformed_t(info, t, op) for t in op.inputs] + op_, op_outputs_ = self.transform_op_handler(info, op, new_inputs) if op is op_: raise ValueError("In-place transformation not allowed.") @@ -456,27 +471,36 @@ class Transformer(object): info.transformed_ts[op_output] = op_output_ self.assign_collections_handler(info, op_output, op_output_) - def _connect_ops(self, info): + def _finalize_cycles(self, info): + """Reconnects the cyclic tensors.""" + for t, tmp_t_, consumer_op in info.tmp_cyclic_ts: + if t not in info.transformed_ts: + raise ValueError("The tensor {} should be transformed by now.".format( + t.name)) + if consumer_op not in info.transformed_ops: + raise ValueError("The op {} should be transformed by now.".format( + consumer_op.name)) + t_ = info.transformed_ts[t] + consumer_op_ = info.transformed_ops[consumer_op] + t_index_ = list(consumer_op_.inputs).index(tmp_t_) + consumer_op_._update_input(t_index_, t_, update_dtype=False) # pylint: disable=protected-access + + def _connect_control_inputs(self, info): """Connect the previously copied ops.""" for op in info.sgv.ops: - logging.debug("Finalizing op: %s", op.name) + logging.debug("Connecting control inputs of op: %s", op.name) op_ = info.transformed_ops[op] - # pylint: disable=protected-access - if op_.inputs: - raise ValueError("The newly transformed op should not have " - "any inputs yet: {}".format(op_.name)) - inputs_ = [self._transformed_t(info, t) for t in op.inputs] - for t in inputs_: - op_._add_input(t) - # Finalize original op. + # TODO(fkp): Stop worrying about _original_op and remove this code? + # pylint: disable=protected-access if op._original_op: - original_op = info.transform_original_op_handler(info, op._original_op) + original_op = self.transform_original_op_handler(info, op._original_op) if original_op is None: logging.debug("Could not find original op for: %s", op_.name) else: op_._original_op = original_op + # pylint: enable=protected-access # Finalize control inputs: control_inputs_ = [self.transform_control_input_handler(info, ci) @@ -525,19 +549,38 @@ class Transformer(object): return sgv_.remap(input_map_, output_map_) - def _transformed_t(self, info, t): + def _transformed_t(self, info, t, consumer_op): """Return tre transformed tensor of `t`.""" - if t not in info.transformed_ts: - # If op is not in the subgraph. - if t in info.sgv_inputs_set: - # t is an input of the subgraph. - return self.transform_external_input_handler(info, t) + if t in info.transformed_ts: + # If op is in the subgraph, just return its transformed counterpart. + return info.transformed_ts[t] + + if t in info.sgv_inputs_set: + # `t` is an input of the subgraph. + return self.transform_external_input_handler(info, t) + elif t.op in info.ops: + # `t` is an internal tensor but is not transformed yet because it + # belongs to a graph cycle. + logging.debug("Cyclic tensor: t.name = %s", t.name) + # Try to find an existing tensor we can use for now, + # otherwise create one. We'll rewire this later. + if consumer_op.type == "Merge": + first_input = consumer_op.inputs[0] + tmp_t_ = self._transformed_t(info, first_input, consumer_op) + elif t.op.type == "Enter": + enter_input = t.op.inputs[0] + tmp_t_ = self._transformed_t(info, enter_input, consumer_op) else: - # t is a hidden input of the subgraph. - return self.transform_external_hidden_input_handler(info, t) + with info.graph_.as_default(): + tmp_t_ = util.make_placeholder_from_tensor(t, scope=info.scope_, + prefix="geph_tmp") + logging.debug("Created temporary placeholder: %s.", tmp_t_.name) + # Register as temporary and return. + info.tmp_cyclic_ts.append((t, tmp_t_, consumer_op)) + return tmp_t_ else: - # If op is in the subgraph, just return its transformed. - return info.transformed_ts[t] + # `t` is a hidden input of the subgraph. + return self.transform_external_hidden_input_handler(info, t) def copy(sgv, dst_graph=None, dst_scope="", src_scope="", @@ -624,6 +667,40 @@ def copy_with_input_replacements(sgv, replacement_ts, sgv, dst_graph, dst_scope, src_scope, reuse_dst_scope=reuse_dst_scope) +def _add_control_flow_ops(ops, control_ios): + """Complete `ops` so that the tranformed graph is valid. + + Partially copying a graph can lead to a malformed graph. For instance, + copying half of a while construct is likely to result in an invalid graph. + This function attempts to add missing ops so that the transformation result + in a valid graph. + + Args: + ops: list of ops (modifed in-place). + control_ios: object created by a call to `util.ControlOutputs`. + """ + # Find while contexts. + control_flow_contexts = set() + for op in ops: + cfc = op._control_flow_context # pylint: disable=protected-access + if cfc: + control_flow_contexts.add(cfc) + # Find new ops. + new_ops = [] + for cfc in control_flow_contexts: + if cfc.IsWhileContext(): + new_ops += select.get_walks_intersection_ops( + [enter_t.op for enter_t in cfc.loop_enters], + [exit_t.op for exit_t in cfc.loop_exits], + control_ios=control_ios) + # Add new ops. + new_ops_set = set(new_ops) + ops_set = frozenset(ops) + for op in new_ops_set: + if op not in ops_set: + ops.append(op) + + def graph_replace(target_ts, replacement_ts, dst_scope="", src_scope="", reuse_dst_scope=False): """Create a new graph which compute the targets from the replaced Tensors. @@ -657,8 +734,13 @@ def graph_replace(target_ts, replacement_ts, dst_scope="", control_ios=control_ios) if not ops: raise ValueError("Targets and replacements are not connected!") + + # Complete ops to avoid malformed control flow. + # TODO(fkp): Consider moving this function deeper (in the transformer?). + _add_control_flow_ops(ops, control_ios) + # Create a copy of the relevant subgraph - _, info = copy_with_input_replacements( + unused_sgv_, info = copy_with_input_replacements( ops, replacement_ts, None, dst_scope, src_scope, reuse_dst_scope) # Return the transformed targets but keep the original if the transformed # counterpart cannot be found diff --git a/tensorflow/contrib/graph_editor/util.py b/tensorflow/contrib/graph_editor/util.py index 30bc33b9ee..584f4509cc 100644 --- a/tensorflow/contrib/graph_editor/util.py +++ b/tensorflow/contrib/graph_editor/util.py @@ -38,6 +38,11 @@ __all__ = [ ] +# The graph editor sometimes need to create placeholders, they are named +# "geph_*". "geph" stands for Graph-Editor PlaceHolder. +_DEFAULT_PLACEHOLDER_PREFIX = "geph" + + def concatenate_unique(la, lb): """Add all the elements of `lb` to `la` if they are not there already. @@ -405,7 +410,7 @@ def scope_basename(scope): return scope[slash + 1:] -def placeholder_name(t=None, scope=None): +def placeholder_name(t=None, scope=None, prefix=_DEFAULT_PLACEHOLDER_PREFIX): """Create placeholder name for the graph editor. Args: @@ -413,6 +418,7 @@ def placeholder_name(t=None, scope=None): on scope: absolute scope with which to prefix the placeholder's name. None means that the scope of t is preserved. "" means the root scope. + prefix: placeholder name prefix. Returns: A new placeholder name prefixed by "geph". Note that "geph" stands for Graph Editor PlaceHolder. This convention allows to quickly identify the @@ -430,19 +436,20 @@ def placeholder_name(t=None, scope=None): if scope is None: scope = op_dirname - if op_basename.startswith("geph__"): + if op_basename.startswith("{}__".format(prefix)): ph_name = op_basename else: - ph_name = "geph__{}_{}".format(op_basename, t.value_index) + ph_name = "{}__{}_{}".format(prefix, op_basename, t.value_index) return scope + ph_name else: if scope is None: scope = "" - return scope + "geph" + return "{}{}".format(scope, prefix) -def make_placeholder_from_tensor(t, scope=None): +def make_placeholder_from_tensor(t, scope=None, + prefix=_DEFAULT_PLACEHOLDER_PREFIX): """Create a `tf.placeholder` for the Graph Editor. Note that the correct graph scope must be set by the calling function. @@ -452,17 +459,19 @@ def make_placeholder_from_tensor(t, scope=None): (see function placeholder_name). scope: absolute scope within which to create the placeholder. None means that the scope of `t` is preserved. `""` means the root scope. + prefix: placeholder name prefix. Returns: A newly created `tf.placeholder`. Raises: TypeError: if `t` is not `None` or a `tf.Tensor`. """ return tf_array_ops.placeholder( - dtype=t.dtype, shape=t.get_shape(), name=placeholder_name( - t, scope=scope)) + dtype=t.dtype, shape=t.get_shape(), + name=placeholder_name(t, scope=scope, prefix=prefix)) -def make_placeholder_from_dtype_and_shape(dtype, shape=None, scope=None): +def make_placeholder_from_dtype_and_shape(dtype, shape=None, scope=None, + prefix=_DEFAULT_PLACEHOLDER_PREFIX): """Create a tf.placeholder for the Graph Editor. Note that the correct graph scope must be set by the calling function. @@ -474,11 +483,13 @@ def make_placeholder_from_dtype_and_shape(dtype, shape=None, scope=None): shape: the tensor shape (optional). scope: absolute scope within which to create the placeholder. None means that the scope of t is preserved. "" means the root scope. + prefix: placeholder name prefix. Returns: A newly created tf.placeholder. """ return tf_array_ops.placeholder( - dtype=dtype, shape=shape, name=placeholder_name(scope=scope)) + dtype=dtype, shape=shape, + name=placeholder_name(scope=scope, prefix=prefix)) _INTERNAL_VARIABLE_RE = re.compile(r"^__\w+__$") diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 50a1d3fe04..b3fa39fdab 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -1916,7 +1916,8 @@ class Operation(object): tensor._add_consumer(self) # pylint: disable=protected-access self._recompute_node_def() - def _update_input(self, index, tensor): + # TODO(skyewm): Remove `update_dtype` when we enable the C API. + def _update_input(self, index, tensor, update_dtype=True): """Update the input to this operation at the given index. NOTE: This is for TF internal use only. Please don't use it. @@ -1924,6 +1925,7 @@ class Operation(object): Args: index: the index of the input to update. tensor: the Tensor to be used as the input at the given index. + update_dtype: If `False`, the type for this input is not updated. Raises: TypeError: if tensor is not a Tensor, @@ -1943,7 +1945,8 @@ class Operation(object): else: self._inputs_val[index].consumers().remove(self) self._inputs_val[index] = tensor - self._input_types_val[index] = tensor.dtype + if update_dtype: + self._input_types_val[index] = tensor.dtype tensor._add_consumer(self) # pylint: disable=protected-access self._recompute_node_def() -- GitLab From 7a60167ba7718c23b0ed70d079bbb446f63a4fd9 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 21 Mar 2018 11:41:12 -0700 Subject: [PATCH 308/960] Don't run data_utils_test without optimizations. PiperOrigin-RevId: 189941645 --- tensorflow/python/keras/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 3180b9f410..711106d2db 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -613,6 +613,7 @@ py_test( "no_windows", "noasan", # times out "notsan", + "optonly", # times out ], deps = [ ":keras", -- GitLab From cbede3ea7574b36f429710bc08617d08455bcc21 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Wed, 21 Mar 2018 12:00:04 -0700 Subject: [PATCH 309/960] Fix compilation error with clang. Link to breaking CI build: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu-clang/232 PiperOrigin-RevId: 189944547 --- tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index f86aff47e1..e6811d4ad2 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -147,7 +147,7 @@ int main(int argc, char** argv) { tensorflow::string session_id = tensorflow::tpu::GetCurrentTimeStampAsString(); constexpr char kProfilePluginDirectory[] = "plugins/profile/"; - string repository_root = + tensorflow::string repository_root = ::tensorflow::io::JoinPath(FLAGS_logdir, kProfilePluginDirectory); while (true) { std::cout << "Starting to profile TPU traces for " << duration_ms << " ms. " -- GitLab From 2d0531d72c7dcbb0e149cafdd3a16ee8c3ff357a Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 21 Mar 2018 12:07:51 -0700 Subject: [PATCH 310/960] Merge changes from github. PiperOrigin-RevId: 189945839 --- README.md | 4 + SECURITY.md | 16 +- configure.py | 5 +- .../xla/service/generic_transfer_manager.cc | 9 +- .../compiler/xla/tests/convolution_test.cc | 2 +- tensorflow/contrib/BUILD | 3 +- tensorflow/contrib/cmake/README.md | 12 +- tensorflow/contrib/cmake/external/grpc.cmake | 1 + .../contrib/cmake/external/protobuf.cmake | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 4 + tensorflow/contrib/data/__init__.py | 4 + .../contrib/data/python/kernel_tests/BUILD | 17 + .../data/python/kernel_tests/resample_test.py | 4 +- .../kernel_tests/slide_dataset_op_test.py | 242 +++ tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/resampling.py | 4 +- tensorflow/contrib/data/python/ops/sliding.py | 102 ++ tensorflow/contrib/factorization/BUILD | 5 +- .../contrib/ffmpeg/default/ffmpeg_lib.cc | 9 +- .../contrib/framework/python/ops/arg_scope.py | 2 +- .../eval/python/classifier_metrics_impl.py | 8 +- .../eval/python/sliced_wasserstein_impl.py | 4 +- .../python/conditioning_utils_impl.py | 2 +- .../python/random_tensor_pool_impl.py | 4 +- .../features/python/virtual_batchnorm_test.py | 2 +- .../grid_rnn/python/ops/grid_rnn_cell.py | 2 +- tensorflow/contrib/image/BUILD | 1 + tensorflow/contrib/kafka/BUILD | 108 +- .../kafka/kernels/kafka_dataset_ops.cc | 4 +- tensorflow/contrib/kafka/ops/dataset_ops.cc | 44 + .../kafka/python/ops/kafka_dataset_ops.py | 9 +- .../kafka/python/ops/kafka_op_loader.py | 24 + .../contrib/kfac/python/ops/optimizer.py | 2 +- .../layers/python/layers/embedding_ops.py | 2 +- .../contrib/layers/python/layers/encoders.py | 2 +- tensorflow/contrib/learn/BUILD | 7 +- .../python/learn/estimators/estimator.py | 4 +- .../contrib/learn/python/learn/experiment.py | 2 +- .../learn/python/learn/ops/embeddings_ops.py | 2 +- tensorflow/contrib/lite/Makefile | 9 +- tensorflow/contrib/lite/README.md | 4 +- tensorflow/contrib/lite/arena_planner.h | 2 +- tensorflow/contrib/lite/build_rpi_lib.sh | 22 + tensorflow/contrib/lite/error_reporter.h | 2 +- tensorflow/contrib/lite/g3doc/ios.md | 9 + tensorflow/contrib/lite/g3doc/rpi.md | 50 + tensorflow/contrib/lite/interpreter.h | 2 +- tensorflow/contrib/lite/interpreter_test.cc | 2 +- tensorflow/contrib/lite/kernels/conv.cc | 2 +- .../contrib/lite/kernels/depthwise_conv.cc | 2 +- .../contrib/lite/kernels/fully_connected.cc | 2 +- .../lite/kernels/internal/spectrogram.cc | 1 + tensorflow/contrib/lite/kernels/kernel_util.h | 2 +- .../contrib/lite/kernels/lsh_projection.cc | 2 +- tensorflow/contrib/lite/kernels/lstm.cc | 6 +- tensorflow/contrib/lite/kernels/reshape.cc | 12 +- .../contrib/lite/kernels/reshape_test.cc | 2 +- tensorflow/contrib/lite/kernels/test_util.cc | 4 +- .../kernels/unidirectional_sequence_lstm.cc | 2 +- tensorflow/contrib/lite/memory_planner.h | 4 +- tensorflow/contrib/lite/model.h | 2 +- .../contrib/lite/nnapi/NeuralNetworksShim.h | 2 +- tensorflow/contrib/lite/rpi_makefile.inc | 33 + .../contrib/lite/schema/upgrade_schema.py | 8 +- .../contrib/lite/simple_memory_arena.cc | 6 +- tensorflow/contrib/lite/simple_memory_arena.h | 6 +- tensorflow/contrib/makefile/README.md | 2 + tensorflow/contrib/makefile/build_all_ios.sh | 5 +- .../contrib/metrics/python/ops/metric_ops.py | 2 +- .../model_pruning/python/layers/layers.py | 2 +- .../contrib/model_pruning/python/pruning.py | 2 +- tensorflow/contrib/mpi/mpi_utils.h | 2 + .../contrib/predictor/predictor_factories.py | 4 +- .../contrib/py2tf/converters/single_return.py | 2 +- .../quantize/python/fold_batch_norms.py | 4 +- .../contrib/quantize/python/quant_ops.py | 4 +- .../contrib/quantize/python/quantize.py | 2 +- .../contrib/quantize/python/quantize_graph.py | 2 +- .../python/quantize_parameterized_test.py | 8 +- .../contrib/quantize/python/quantize_test.py | 2 +- tensorflow/contrib/rnn/ops/gru_ops.cc | 2 +- .../rnn/python/kernel_tests/lstm_ops_test.py | 2 +- tensorflow/contrib/rnn/python/ops/lstm_ops.py | 3 +- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 10 +- .../kernel_tests/attention_wrapper_test.py | 3 + .../kernel_tests/beam_search_decoder_test.py | 104 +- .../seq2seq/python/ops/attention_wrapper.py | 19 +- .../seq2seq/python/ops/beam_search_decoder.py | 176 +- tensorflow/contrib/slim/README.md | 2 +- .../solvers/python/ops/least_squares.py | 2 +- .../solvers/python/ops/linear_equations.py | 2 +- tensorflow/contrib/tensorrt/BUILD | 2 + tensorflow/contrib/tensorrt/README.md | 23 +- tensorflow/contrib/tensorrt/__init__.py | 18 +- .../contrib/tensorrt/convert/convert_graph.cc | 256 ++- .../contrib/tensorrt/convert/convert_graph.h | 10 +- .../contrib/tensorrt/convert/convert_nodes.cc | 1481 ++++++++++++++--- .../contrib/tensorrt/convert/convert_nodes.h | 53 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 11 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 39 +- tensorflow/contrib/tensorrt/log/trt_logger.cc | 8 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 4 +- .../contrib/tensorrt/python/__init__.py | 1 + .../contrib/tensorrt/python/trt_convert.py | 70 +- .../tensorrt/resources/trt_int8_calibrator.cc | 56 +- .../tensorrt/resources/trt_int8_calibrator.h | 15 +- .../contrib/tensorrt/test/test_tftrt.py | 57 +- tensorflow/contrib/tensorrt/trt_conversion.i | 63 +- .../contrib/tpu/ops/tpu_embedding_ops.cc | 6 +- .../tpu/python/tpu/device_assignment.py | 4 +- .../contrib/tpu/python/tpu/tpu_config.py | 2 +- .../contrib/tpu/python/tpu/tpu_context.py | 4 +- .../contrib/tpu/python/tpu/tpu_estimator.py | 12 +- .../contrib/tpu/python/tpu/training_loop.py | 2 +- tensorflow/core/BUILD | 4 + .../base_api/api_def_SelfAdjointEig.pbtxt | 3 +- .../base_api/api_def_SelfAdjointEigV2.pbtxt | 3 +- .../base_api/api_def_SlideDataset.pbtxt | 18 + .../core/distributed_runtime/tensor_coding.cc | 4 +- tensorflow/core/graph/mkl_layout_pass.cc | 31 +- .../grappler/optimizers/loop_optimizer.cc | 8 +- tensorflow/core/kernels/BUILD | 12 +- tensorflow/core/kernels/concat_op.cc | 98 +- tensorflow/core/kernels/conv_ops_test.cc | 2 +- tensorflow/core/kernels/data/BUILD | 14 + .../core/kernels/data/slide_dataset_op.cc | 252 +++ tensorflow/core/kernels/depthtospace_op.cc | 3 + .../core/kernels/depthtospace_op_gpu.cu.cc | 6 + tensorflow/core/kernels/hexagon/BUILD | 1 + .../core/kernels/mkl_conv_grad_filter_ops.cc | 81 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 18 +- tensorflow/core/kernels/mkl_conv_ops.cc | 146 +- tensorflow/core/kernels/mkl_conv_ops.h | 117 +- .../core/kernels/mkl_input_conversion_op.cc | 7 +- tensorflow/core/kernels/mkl_relu_op.cc | 23 +- .../core/kernels/segment_reduction_ops.h | 8 + tensorflow/core/kernels/spacetodepth_op.cc | 3 + .../core/kernels/spacetodepth_op_gpu.cu.cc | 6 + tensorflow/core/lib/io/record_reader.cc | 2 + tensorflow/core/lib/io/record_reader.h | 4 +- tensorflow/core/ops/dataset_ops.cc | 12 +- tensorflow/core/ops/nn_ops.cc | 8 + .../platform/windows/windows_file_system.cc | 3 +- tensorflow/core/public/version.h | 2 +- tensorflow/core/util/stat_summarizer.h | 2 +- tensorflow/docs_src/community/welcome.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 66 +- tensorflow/docs_src/install/install_mac.md | 23 +- .../docs_src/install/install_sources.md | 4 +- .../docs_src/install/install_windows.md | 5 +- .../docs_src/performance/performance_guide.md | 2 +- tensorflow/docs_src/performance/xla/jit.md | 2 +- .../docs_src/programmers_guide/debugger.md | 3 +- tensorflow/docs_src/programmers_guide/faq.md | 4 +- .../docs_src/programmers_guide/saved_model.md | 4 +- .../summaries_and_tensorboard.md | 2 +- .../docs_src/programmers_guide/using_tpu.md | 7 +- tensorflow/docs_src/tutorials/deep_cnn.md | 2 +- .../docs_src/tutorials/image_retraining.md | 2 +- .../docs_src/tutorials/kernel_methods.md | 6 +- tensorflow/docs_src/tutorials/layers.md | 12 +- .../docs_src/tutorials/recurrent_quickdraw.md | 3 +- tensorflow/docs_src/tutorials/wide.md | 16 +- .../examples/android/AndroidManifest.xml | 4 + .../org/tensorflow/demo/CameraActivity.java | 7 +- .../org/tensorflow/demo/StylizeActivity.java | 60 + tensorflow/examples/ios/README.md | 6 +- tensorflow/examples/learn/mnist.py | 6 +- tensorflow/examples/learn/resnet.py | 12 +- tensorflow/python/BUILD | 12 +- tensorflow/python/client/timeline_test.py | 7 +- tensorflow/python/estimator/estimator.py | 34 +- tensorflow/python/estimator/run_config.py | 2 +- tensorflow/python/estimator/training.py | 26 +- .../keras/_impl/keras/engine/training.py | 2 +- .../keras/_impl/keras/layers/recurrent.py | 4 +- .../keras/_impl/keras/utils/generic_utils.py | 4 +- .../keras/_impl/keras/utils/vis_utils.py | 2 +- .../python/kernel_tests/concat_op_test.py | 11 + .../python/kernel_tests/conv_ops_test.py | 20 +- .../kernel_tests/depthtospace_op_test.py | 10 +- .../kernel_tests/spacetodepth_op_test.py | 10 +- tensorflow/python/layers/base.py | 2 +- tensorflow/python/layers/normalization.py | 9 +- tensorflow/python/lib/io/file_io_test.py | 5 + tensorflow/python/lib/io/tf_record.py | 18 +- tensorflow/python/ops/linalg_ops.py | 2 +- tensorflow/python/ops/nn_ops.py | 16 +- tensorflow/python/ops/random_ops.py | 2 +- tensorflow/python/ops/rnn.py | 17 +- tensorflow/python/ops/special_math_ops.py | 4 +- .../python/ops/special_math_ops_test.py | 5 + tensorflow/python/tools/freeze_graph.py | 36 +- tensorflow/python/tools/inspect_checkpoint.py | 4 +- tensorflow/python/tools/saved_model_cli.py | 60 + .../python/tools/saved_model_cli_test.py | 22 + tensorflow/python/training/saver.py | 5 +- tensorflow/stream_executor/cuda/cuda_dnn.cc | 33 +- .../tools/api/tests/api_compatibility_test.py | 2 + tensorflow/tools/ci_build/Dockerfile.cmake | 5 +- tensorflow/tools/compatibility/tf_upgrade.py | 6 +- tensorflow/tools/dist_test/README.md | 8 + tensorflow/tools/dist_test/local_test.sh | 22 +- .../tools/dist_test/python/mnist_replica.py | 2 +- tensorflow/tools/docker/Dockerfile.gpu | 9 +- tensorflow/tools/git/gen_git_source.py | 7 + tensorflow/tools/graph_transforms/BUILD | 1 + .../graph_transforms/fold_old_batch_norms.cc | 67 + .../fold_old_batch_norms_test.cc | 97 +- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/tools/pip_package/setup.py | 4 +- .../tools/test/upload_test_benchmarks.py | 9 +- third_party/jpeg/jpeg.BUILD | 4 +- third_party/kafka/BUILD | 13 +- third_party/py/BUILD.tpl | 22 +- third_party/tensorrt/tensorrt_configure.bzl | 4 + 219 files changed, 4312 insertions(+), 990 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py create mode 100644 tensorflow/contrib/data/python/ops/sliding.py create mode 100644 tensorflow/contrib/kafka/ops/dataset_ops.cc create mode 100644 tensorflow/contrib/kafka/python/ops/kafka_op_loader.py create mode 100755 tensorflow/contrib/lite/build_rpi_lib.sh create mode 100644 tensorflow/contrib/lite/g3doc/rpi.md create mode 100644 tensorflow/contrib/lite/rpi_makefile.inc create mode 100644 tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt create mode 100644 tensorflow/core/kernels/data/slide_dataset_op.cc diff --git a/README.md b/README.md index ef5bdc66ef..3cdb6e478d 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. +Keep up to date with release announcements and security updates by +subscribing to +[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). + ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* diff --git a/SECURITY.md b/SECURITY.md index fea24b2739..378e776967 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ report vulnerabilities in TensorFlow. ## TensorFlow models are programs -TensorFlow's runtime system interprets and executes programs. What machine +TensorFlow's runtime system interprets and executes programs. What machine learning practitioners term [**models**](https://developers.google.com/machine-learning/glossary/#model) are expressed as programs that TensorFlow executes. TensorFlow programs are encoded @@ -28,12 +28,12 @@ data you supply to TensorFlow to train a model, or to use a model to run inference on the data. **TensorFlow models are programs, and need to be treated as such from a security -perspective.** +perspective.** ## Running untrusted models As a general rule: **Always** execute untrusted models inside a sandbox (e.g., -[nsjail](https://github.com/google/nsjail)). +[nsjail](https://github.com/google/nsjail)). There are several ways in which a model could become untrusted. Obviously, if an untrusted party supplies TensorFlow kernels, arbitrary code may be executed. @@ -109,11 +109,11 @@ graphs known to the `ModelServer`. This means that an attacker may run graphs using untrusted inputs as described above, but they would not be able to execute arbitrary graphs. It is possible to safely expose a `ModelServer` directly to an untrusted network, **but only if the graphs it is configured to -use have been carefully audited to be safe**. +use have been carefully audited to be safe**. Similar to best practices for other servers, we recommend running any `ModelServer` with appropriate privileges (i.e., using a separate user with -reduced permisisons). In the spirit of defense in depth, we recommend +reduced permissions). In the spirit of defense in depth, we recommend authenticating requests to any TensorFlow server connected to an untrusted network, as well as sandboxing the server to minimize the adverse effects of any breach. @@ -129,11 +129,11 @@ with specially crafted inputs. ### What is a vulnerability? Given TensorFlow's flexibility, it is possible to specify computation graphs -which exhibit unexpected or unwanted behaviors. The fact that TensorFlow models +which exhibit unexpected or unwanted behavior. The fact that TensorFlow models can perform arbitrary computations means that they may read and write files, communicate via the network, produce deadlocks and infinite loops, or run out of memory. It is only when these behaviors are outside the specifications of the -operations involved that such behavior is a vulnerability. +operations involved that such behavior is a vulnerability. A `FileWriter` writing a file is not unexpected behavior and therefore is not a vulnerability in TensorFlow. A `MatMul` allowing arbitrary binary code execution @@ -168,7 +168,7 @@ below). Please use a descriptive subject line for your report email. After the initial reply to your report, the security team will endeavor to keep you informed of -the progress being made towards a fix and announcement. +the progress being made towards a fix and announcement. If you believe that an existing (public) issue is security-related, please send an email to `security@tensorflow.org`. The email should include the issue ID and diff --git a/configure.py b/configure.py index 97f46757ee..7d61c2e5e3 100644 --- a/configure.py +++ b/configure.py @@ -1048,7 +1048,10 @@ def set_tf_tensorrt_install_path(environ_cp): for lib_file in possible_files: if is_compatible(lib_file, cuda_ver, cudnn_ver): - ver_str = nvinfer_pattern.search(lib_file).group(1) + matches = nvinfer_pattern.search(lib_file) + if len(matches.groups()) == 0: + continue + ver_str = matches.group(1) ver = convert_version_to_int(ver_str) if len(ver_str) else 0 if ver > highest_ver[0]: highest_ver = [ver, ver_str, lib_file] diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index 78dc0ad4fc..a99e2b7794 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -38,14 +38,7 @@ namespace xla { GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id, size_t pointer_size) - : platform_id_(platform_id), pointer_size_(pointer_size) { - // We currently only support kHostPlatformId for CPU, kCudaPlatformId for - // GPU and kInterpreterPlatformId for Interpreter. Before supporting other - // platforms, we need to test this transfer manager on them. - CHECK(platform_id_ == se::host::kHostPlatformId || - platform_id_ == se::interpreter::kInterpreterPlatformId || - platform_id_ == se::cuda::kCudaPlatformId); -} + : platform_id_(platform_id), pointer_size_(pointer_size) {} se::Platform::Id GenericTransferManager::PlatformId() const { return platform_id_; diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 99640f5bb5..72715398de 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -723,7 +723,7 @@ INSTANTIATE_TEST_CASE_P( ); #endif -TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { +XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); Shape input_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index bab37e8906..d103da79e3 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -121,6 +121,7 @@ cc_library( "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", + "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -147,7 +148,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:kafka_ops_op_lib", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 8f85a75ee4..fe83bb3204 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an example C++ binary, and a PIP package containing the runtime and Python bindings. -### Pre-requisites +### Prerequisites * CMake version 3.5 or later. @@ -34,14 +34,16 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* Additional pre-requisites for Microsoft Windows: +* Additional prerequisites for Microsoft Windows: - Visual Studio 2015 - Python 3.5 - - NumPy 1.11.0 or later -* Additional pre-requisites for Linux: +* Additional prerequisites for Linux: - Python 2.7 or later - [Docker](https://www.docker.com/) (for automated testing) + +* Python dependencies: + - wheel - NumPy 1.11.0 or later ### Known-good configurations @@ -102,7 +104,7 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the pre-requisites detailed above, and set up your environment. +1. Install the prerequisites detailed above, and set up your environment. * The following commands assume that you are using the Windows Command Prompt (`cmd.exe`). You will need to set up your environment to use the diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 95106dba1f..cc218e8ab8 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -35,6 +35,7 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index aba8a5244e..ab464bc99a 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) +set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index cdf48b3584..237f4fe33a 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -478,6 +478,10 @@ if (tensorflow_BUILD_CC_TESTS) "${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*_test.cc" ) + list(REMOVE_ITEM tf_test_src_simple + ${tf_core_profiler_test_srcs} + ) + set(tf_test_lib tf_test_lib) add_library(${tf_test_lib} STATIC ${tf_src_testlib}) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index f09d156832..9212b69700 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -40,6 +40,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@rejection_resample @@scan @@shuffle_and_repeat +@@sliding_window_batch @@sloppy_interleave @@unbatch @@ -72,6 +73,9 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat +from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch +from tensorflow.python.data.ops.iterator_ops import Iterator +from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 22418b38e3..2c4d4adfda 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -498,6 +498,23 @@ py_test( ], ) +tf_py_test( + name = "slide_dataset_op_test", + size = "small", + srcs = ["slide_dataset_op_test.py"], + additional_deps = [ + "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", + "//third_party/py/numpy", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 3c7b46629e..913ab9b9f8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -45,12 +45,10 @@ class ResampleTest(test.TestCase): target_dist=target_dist, initial_dist=initial_dist, class_func=lambda c, _: c, - seed=27)).make_initializable_iterator()) - init_op = iterator.initializer + seed=27)).make_one_shot_iterator()) get_next = iterator.get_next() with self.test_session() as sess: - sess.run(init_op) returned = [] with self.assertRaises(errors.OutOfRangeError): while True: diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py new file mode 100644 index 0000000000..33c48e20be --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py @@ -0,0 +1,242 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.ops import sliding +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class SlideDatasetTest(test.TestCase): + + def testSlideDataset(self): + """Test an dataset that maps a TF function across its input elements.""" + components = (np.arange(7), + np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], + np.array(37.0) * np.arange(7)) + + count = array_ops.placeholder(dtypes.int64, shape=[]) + window_size = array_ops.placeholder(dtypes.int64, shape=[]) + stride = array_ops.placeholder(dtypes.int64, shape=[]) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> + # RepeatDataset(count) -> _SlideDataset(window_size, stride). + iterator = (dataset_ops.Dataset.from_tensor_slices(components) + .map(_map_fn) + .repeat(count) + .apply(sliding.sliding_window_batch(window_size, stride)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([[None] + list(c.shape[1:]) for c in components], + [t.shape.as_list() for t in get_next]) + + with self.test_session() as sess: + # Slide over a finite input, where the window_size divides the + # total number of elements. + sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) + # Same formula with convolution layer. + num_batches = (20 * 7 - 14) // 7 + 1 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(14): + self.assertAllEqual(component[(i*7 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over a finite input, where the window_size does not + # divide the total number of elements. + sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) + + num_batches = (20 * 7 - 17) // 9 + 1 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(17): + self.assertAllEqual(component[(i*9 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over a finite input, which is less than window_size, + # should fail straight away. + sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over an empty input should fail straight away. + sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Empty window_size should be an initialization time error. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0}) + + # Invalid stride should be an initialization time error. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) + + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSlideSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( + sliding.sliding_window_batch(5, 3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], + dense_shape=[5, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSlideSparseWithDifferentDenseShapes(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=array_ops.expand_dims( + math_ops.range(i, dtype=dtypes.int64), 1), + values=array_ops.fill([math_ops.to_int32(i)], i), + dense_shape=[i]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( + sliding.sliding_window_batch(5, 3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected_indices = [] + expected_values = [] + for j in range(5): + for k in range(i * 3 + j): + expected_indices.append([j, k]) + expected_values.append(i * 3 + j) + expected = sparse_tensor.SparseTensorValue( + indices=expected_indices, + values=expected_values, + dense_shape=[5, i * 3 + 5 - 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testNestedSlideSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse) + .apply(sliding.sliding_window_batch(4, 2)) + .apply(sliding.sliding_window_batch(3, 1)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + # Slide: 1st batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], + [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], + values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + # Slide: 2nd batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], + [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], + values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSlideShapeError(self): + + def generator(): + yield [1.0, 2.0, 3.0] + yield [4.0, 5.0, 6.0] + yield [7.0, 8.0, 9.0, 10.0] + + iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32, + output_shapes=[None]) + .apply(sliding.sliding_window_batch(3, 1)) + .make_initializable_iterator()) + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r"Cannot batch tensors with different shapes in component 0. " + r"First element had shape \[3\] and element 2 had shape \[4\]."): + sess.run(next_element) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index f03430c5c5..c3331e9636 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -106,6 +106,7 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", + "sliding.py", "stats_ops.py", "threadpool.py", "unique.py", diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index 56f526a330..f4015f19fb 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -54,7 +54,7 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" dist_estimation_batch_size = 32 - target_dist_t = ops.convert_to_tensor(target_dist, name="initial_dist") + target_dist_t = ops.convert_to_tensor(target_dist, name="target_dist") class_values_ds = dataset.map(class_func) if initial_dist is not None: initial_dist_t = ops.convert_to_tensor(initial_dist, name="initial_dist") @@ -151,7 +151,7 @@ def _calculate_acceptance_probs(initial_probs, target_probs): ``` - A solution for a_i in terms of the other variabes is the following: + A solution for a_i in terms of the other variables is the following: ```a_i = (t_i / p_i) / max_i[t_i / p_i]``` """ # Add tiny to initial_probs to avoid divide by zero. diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py new file mode 100644 index 0000000000..19cc3cb89f --- /dev/null +++ b/tensorflow/contrib/data/python/ops/sliding.py @@ -0,0 +1,102 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Sliding dataset transformations.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops + + +class _SlideDataset(dataset_ops.Dataset): + """A `Dataset` that passes a sliding window over its input.""" + + def __init__(self, input_dataset, window_size, stride=1): + """See `sliding_window_batch` for details.""" + super(_SlideDataset, self).__init__() + self._input_dataset = input_dataset + self._window_size = ops.convert_to_tensor( + window_size, dtype=dtypes.int64, name="window_size") + self._stride = ops.convert_to_tensor( + stride, dtype=dtypes.int64, name="stride") + + def _as_variant_tensor(self): + return gen_dataset_ops.slide_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + window_size=self._window_size, + stride=self._stride, + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + input_shapes = self._input_dataset.output_shapes + return nest.pack_sequence_as(input_shapes, [ + tensor_shape.vector(None).concatenate(s) + for s in nest.flatten(self._input_dataset.output_shapes) + ]) + + @property + def output_types(self): + return self._input_dataset.output_types + + +def sliding_window_batch(window_size, stride=1): + """A sliding window with size of `window_size` and step of `stride`. + + This transformation passes a sliding window over this dataset. The + window size is `window_size` and step size is `stride`. If the left + elements cannot fill up the sliding window, this transformation will + drop the final smaller element. For example: + + ```python + # NOTE: The following examples use `{ ... }` to represent the + # contents of a dataset. + a = { [1], [2], [3], [4], [5], [6] } + + a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) == + { + [[1], [2], [3]], + [[3], [4], [5]], + } + ``` + + Args: + window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of + elements in the sliding window. + stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + steps moving the sliding window forward for one iteration. The default + is `1`. It must be in `[1, window_size)`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + def _apply_fn(dataset): + return _SlideDataset(dataset, window_size, stride) + + return _apply_fn diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index 90f10f1fa8..ad8568ad44 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -224,7 +224,10 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = ["notsan"], # b/67512932 + tags = [ + "nomac", # b/73741358 + "notsan", # b/67512932 + ], deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index e61221a6b0..35341406a0 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -256,6 +256,9 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, if (p != std::string::npos) { string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); rgb24 = rgb24.substr(0, rgb24.find(",")); + // Strip anything after " ", in case the format is + // `640x360 [SAR 1:1 DAR 16:9]` + rgb24 = rgb24.substr(0, rgb24.find(" ")); string rgb24_width = rgb24.substr(0, rgb24.find("x")); string rgb24_height = rgb24.substr(rgb24_width.length() + 1); if (strings::safe_strtou32(rgb24_width, &width_value) && @@ -270,8 +273,10 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, // We only look for the first stream mapping to have the number of the // frames. // Once processed we will not further process stream mapping section. - if (line.find("frame= ") == 0) { - string number = line.substr(8, line.find(" ", 8)); + if (line.find("frame=") == 0) { + // The format might be `frame= 166 ` or `frame=12488 ` + string number = line.substr(6); + number = number.substr(number.find_first_not_of(" ")); number = number.substr(0, number.find(" ")); if (strings::safe_strtou32(number, &frames_value)) { in_mapping = false; diff --git a/tensorflow/contrib/framework/python/ops/arg_scope.py b/tensorflow/contrib/framework/python/ops/arg_scope.py index 409657fe1d..3cad1fee19 100644 --- a/tensorflow/contrib/framework/python/ops/arg_scope.py +++ b/tensorflow/contrib/framework/python/ops/arg_scope.py @@ -142,7 +142,7 @@ def arg_scope(list_ops_or_scope, **kwargs): else: # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs. if not isinstance(list_ops_or_scope, (list, tuple)): - raise TypeError('list_ops_or_scope must either be a list/tuple or reused' + raise TypeError('list_ops_or_scope must either be a list/tuple or reused ' 'scope (i.e. dict)') try: current_scope = current_arg_scope().copy() diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py index 7e86d10b64..47e51415fd 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py @@ -321,7 +321,7 @@ def classifier_score(images, classifier_fn, num_batches=1): NOTE: This function consumes images, computes their logits, and then computes the classifier score. If you would like to precompute many logits for - large batches, use clasifier_score_from_logits(), which this method also + large batches, use classifier_score_from_logits(), which this method also uses. Args: @@ -454,7 +454,7 @@ def frechet_classifier_distance(real_images, This technique is described in detail in https://arxiv.org/abs/1706.08500. Given two Gaussian distribution with means m and m_w and covariance matrices - C and C_w, this function calcuates + C and C_w, this function calculates |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) @@ -467,7 +467,7 @@ def frechet_classifier_distance(real_images, Frechet distance is biased. It is more biased for small sample sizes. (e.g. even if the two distributions are the same, for a small sample size, the expected Frechet distance is large). It is important to use the same - sample size to compute frechet classifier distance when comparing two + sample size to compute Frechet classifier distance when comparing two generative models. NOTE: This function consumes images, computes their activations, and then @@ -659,7 +659,7 @@ def frechet_classifier_distance_from_activations(real_activations, This technique is described in detail in https://arxiv.org/abs/1706.08500. Given two Gaussian distribution with means m and m_w and covariance matrices - C and C_w, this function calcuates + C and C_w, this function calculates |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py index 9bebcacbe4..4b10bc0f8e 100644 --- a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein_impl.py @@ -212,7 +212,7 @@ def sliced_wasserstein_distance(real_images, Args: real_images: (tensor) Real images (batch, height, width, channels). fake_images: (tensor) Fake images (batch, height, width, channels). - resolution_min: (int) Minimum resolution for the Laplacion pyramid. + resolution_min: (int) Minimum resolution for the Laplacian pyramid. patches_per_image: (int) Number of patches to extract per image per Laplacian level. patch_size: (int) Width of a square patch. @@ -221,7 +221,7 @@ def sliced_wasserstein_distance(real_images, use_svd: experimental method to compute a more accurate distance. Returns: List of tuples (distance_real, distance_fake) for each level of the - Laplacian pyramid from the highest resoluion to the lowest. + Laplacian pyramid from the highest resolution to the lowest. distance_real is the Wasserstein distance between real images distance_fake is the Wasserstein distance between real and fake images. Raises: diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py index cd31c62667..e2594faf85 100644 --- a/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py +++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Miscellanous utilities for TFGAN code and examples. +"""Miscellaneous utilities for TFGAN code and examples. Includes: 1) Conditioning the value of a Tensor, based on techniques from diff --git a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py index 4cfae0de44..9e4ec59e70 100644 --- a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py +++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_impl.py @@ -17,7 +17,7 @@ We use this to keep a history of values created by a generator, such that a discriminator can randomly be trained on some older samples, not just the current one. This can help to not let the discriminator get too far ahead of the -generator and also to keep the system from oscilating, if the discriminator +generator and also to keep the system from oscillating, if the discriminator forgets too fast what past samples from the generator looked like. See the following papers for more details. @@ -97,7 +97,7 @@ def tensor_pool(input_values, dtypes=[v.dtype for v in input_values], shapes=None) - # In pseudeo code this code does the following: + # In pseudo code this code does the following: # if not pool_full: # enqueue(input_values) # return input_values diff --git a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py index 845f89827b..2fe06a2872 100644 --- a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py +++ b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py @@ -148,7 +148,7 @@ class VirtualBatchnormTest(test.TestCase): self.assertAllClose(bn_np[i, ...], vb_np) def test_minibatch_independent(self): - """Test that virtual batch normalized exampels are independent. + """Test that virtual batch normalized examples are independent. Unlike batch normalization, virtual batch normalization has the property that the virtual batch normalized value of an example is independent of the diff --git a/tensorflow/contrib/grid_rnn/python/ops/grid_rnn_cell.py b/tensorflow/contrib/grid_rnn/python/ops/grid_rnn_cell.py index 252788140f..bcd2a34c4e 100644 --- a/tensorflow/contrib/grid_rnn/python/ops/grid_rnn_cell.py +++ b/tensorflow/contrib/grid_rnn/python/ops/grid_rnn_cell.py @@ -110,7 +110,7 @@ class GridRNNCell(rnn.RNNCell): logging.warning('%s: Using a concatenated state is slower and will ' 'soon be deprecated. Use state_is_tuple=True.', self) if not output_is_tuple: - logging.warning('%s: Using a concatenated output is slower and will' + logging.warning('%s: Using a concatenated output is slower and will ' 'soon be deprecated. Use output_is_tuple=True.', self) if num_dims < 1: diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 2924aef815..79eb3762ed 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -259,6 +259,7 @@ cuda_py_test( "//tensorflow/core:protos_all_py", ], data = [":sparse_image_warp_test_data"], + tags = ["no_pip"], ) filegroup( diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index efb403462a..1c3974871c 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -1,66 +1,93 @@ -package( - default_visibility = ["//visibility:private"], -) +package(default_visibility = ["//tensorflow:internal"]) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") -load("//tensorflow:tensorflow.bzl", "tf_kernel_library") -load("//tensorflow:tensorflow.bzl", "tf_py_test") +load( + "//tensorflow:tensorflow.bzl", + "tf_gen_op_wrapper_py", + "tf_kernel_library", + "tf_custom_op_library", + "tf_custom_op_py_library", + "tf_gen_op_libs", + "tf_py_test", +) -tf_kernel_library( - name = "kafka_kernels", +py_library( + name = "kafka", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_ops", + ], +) + +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = ["ops/dataset_ops.cc"], + deps = [":dataset_kernels"], +) + +tf_gen_op_libs( + op_lib_names = ["dataset_ops"], +) + +cc_library( + name = "dataset_kernels", srcs = ["kernels/kafka_dataset_ops.cc"], - visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/kernels:bounds_check_lib", - "//tensorflow/core/kernels:dataset", + "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", "@kafka", + "@protobuf_archive//:protobuf_headers", ], + alwayslink = 1, ) -tf_gen_op_libs( - op_lib_names = ["kafka_ops"], +py_library( + name = "dataset_ops", + srcs = [ + "python/ops/kafka_dataset_ops.py", + ], + srcs_version = "PY2AND3", deps = [ - "//tensorflow/core:lib", + ":kafka_op_loader", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", ], ) tf_gen_op_wrapper_py( - name = "gen_kafka_ops", - out = "python/ops/gen_kafka_ops.py", - require_shape_functions = True, - deps = [":kafka_ops_op_lib"], + name = "gen_dataset_ops", + out = "python/ops/gen_dataset_ops.py", + deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"], ) -py_library( - name = "kafka", - srcs = [ - "__init__.py", - "python/ops/kafka_dataset_ops.py", +tf_kernel_library( + name = "dataset_ops_kernels", + deps = [ + ":dataset_kernels", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +tf_custom_op_py_library( + name = "kafka_op_loader", + srcs = ["python/ops/kafka_op_loader.py"], + dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"], + kernels = [ + ":dataset_ops_kernels", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", ], srcs_version = "PY2AND3", - visibility = ["//visibility:public"], deps = [ - ":gen_kafka_ops", + ":gen_dataset_ops", "//tensorflow/contrib/util:util_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:iterator_ops", - "//tensorflow/python/data/ops:readers", ], ) @@ -88,6 +115,7 @@ tf_py_test( ], tags = [ "manual", + "no_windows", "notap", ], ) @@ -95,7 +123,9 @@ tf_py_test( filegroup( name = "all_files", srcs = glob( - ["**/*"], + include = [ + "**/*", + ], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc index 88ef5f3571..a4cd4a2cc4 100644 --- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -13,9 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/dataset.h" - -#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/dataset.h" #include "src-cpp/rdkafkacpp.h" diff --git a/tensorflow/contrib/kafka/ops/dataset_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc new file mode 100644 index 0000000000..8cdf16103b --- /dev/null +++ b/tensorflow/contrib/kafka/ops/dataset_ops.cc @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("KafkaDataset") + .Input("topics: string") + .Input("servers: string") + .Input("group: string") + .Input("eof: bool") + .Input("timeout: int64") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that emits the messages of one or more Kafka topics. + +topics: A `tf.string` tensor containing one or more subscriptions, + in the format of [topic:partition:offset:length], + by default length is -1 for unlimited. +servers: A list of bootstrap servers. +group: The consumer group id. +eof: If True, the kafka reader will stop on EOF. +timeout: The timeout value for the Kafka Consumer to wait + (in millisecond). +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py index 8e51d27a34..a1624614d1 100644 --- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -17,8 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.kafka.python.ops import gen_kafka_ops -from tensorflow.python.data.ops.readers import Dataset +from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import +from tensorflow.contrib.kafka.python.ops import gen_dataset_ops +from tensorflow.python.data.ops.dataset_ops import Dataset from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -58,8 +59,8 @@ class KafkaDataset(Dataset): timeout, dtype=dtypes.int64, name="timeout") def _as_variant_tensor(self): - return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, - self._eof, self._timeout) + return gen_dataset_ops.kafka_dataset(self._topics, self._servers, + self._group, self._eof, self._timeout) @property def output_classes(self): diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py new file mode 100644 index 0000000000..ec2fdea962 --- /dev/null +++ b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py @@ -0,0 +1,24 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python helper for loading kafka ops and kernels.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.util import loader +from tensorflow.python.platform import resource_loader + +_dataset_ops = loader.load_op_library( + resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index dee55cfa39..083da768ec 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -153,7 +153,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): raise ValueError("Unsupported momentum type {}. Must be one of {}." .format(momentum_type, legal_momentum_types)) if momentum_type != "regular" and norm_constraint is not None: - raise ValueError("Update clipping is only supported with momentum" + raise ValueError("Update clipping is only supported with momentum " "type 'regular'.") if momentum_type not in ["regular", "adam"] and momentum != 0: raise ValueError("Momentum must be unspecified if using a momentum_type " diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index b62e3050cd..ffa208540d 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -470,7 +470,7 @@ def embedding_lookup_unique(params, ids, name=None): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( - ids, math_ops.reduce_prod(shape, keep_dims=True)) + ids, math_ops.reduce_prod(shape, keepdims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) diff --git a/tensorflow/contrib/layers/python/layers/encoders.py b/tensorflow/contrib/layers/python/layers/encoders.py index 89c9d37bd0..f42112206d 100644 --- a/tensorflow/contrib/layers/python/layers/encoders.py +++ b/tensorflow/contrib/layers/python/layers/encoders.py @@ -125,7 +125,7 @@ def embed_sequence(ids, `reuse` is `None` or `False`. """ if not (reuse or (vocab_size and embed_dim)): - raise ValueError('Must specify vocab size and embedding dimension when not' + raise ValueError('Must specify vocab size and embedding dimension when not ' 'reusing. Got vocab_size=%s and embed_dim=%s' % ( vocab_size, embed_dim)) with variable_scope.variable_scope( diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index f837ca3265..9c59150580 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -5,6 +5,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +load("//tensorflow:tensorflow.bzl", "py_test") + package(default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", @@ -426,7 +428,10 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", - tags = ["noasan"], + tags = [ + "noasan", # b/73741358 + "nomac", + ], deps = [ ":learn", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index d8ccb1e7dc..7a026a15e4 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -917,8 +917,8 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, if feed_fn: hooks.append(basic_session_run_hooks.FeedFnHook(feed_fn)) if steps == 0: - logging.warning('evaluation steps are 0. If `input_fn` does not raise' - 'OutOfRangeError`, the evaluation will never stop.' + logging.warning('evaluation steps are 0. If `input_fn` does not raise ' + '`OutOfRangeError`, the evaluation will never stop. ' 'Use steps=None if intended.') if steps: hooks.append( diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 9a7c4cd685..3744abd860 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -358,7 +358,7 @@ class Experiment(object): self._start_server() elif config.cluster_spec and config.master: raise ValueError( - "For distributed runtime, Experiment class only works with" + "For distributed runtime, Experiment class only works with " "tf.contrib.learn.RunConfig for now, but provided {}".format( type(config))) diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index b3b067b8e1..8f9811cf25 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -61,7 +61,7 @@ def embedding_lookup(params, ids, name='embedding_lookup'): ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( - ids, math_ops.reduce_prod(shape, keep_dims=True)) + ids, math_ops.reduce_prod(shape, keepdims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 7f31629272..b4504f246a 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX) gcc +CXX := $(CC_PREFIX)gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX) gcc -CFLAGS := +CC := $(CC_PREFIX)gcc +CFLAGS := -O3 -DNDEBUG LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,10 +57,11 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl -lpthread + LIBS += -ldl endif include $(MAKEFILE_DIR)/ios_makefile.inc +include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index df8c1c623c..2680d515eb 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -99,7 +99,7 @@ Similar to the Android demo app, there's an iOS camera app that uses exactly the This demo app requires a camera so it doesn't work with simulators. It need to be executed on a real iOS device. Follow the instructions to build and run the demo app: -1. Run `third_party/tensorflow/contrib/lite/examples/ios/download_models.sh` to download the model files used by the demo app. +1. Run `tensorflow/contrib/lite/examples/ios/download_models.sh` to download the model files used by the demo app. 1. Install [CocoaPods](https://cocoapods.org/) if it wasn't installed yet: `sudo gem install cocoapods`. 1. Run `pod install` in `tensorflow/contrib/lite/examples/ios/camera` to generate the workspace file. 1. Open the project by running `open tflite_camera_example.xcworkspace`, and build the app in XCode. @@ -165,7 +165,7 @@ bazel-bin/tensorflow/python/tools/freeze_graph\ --input_graph=/tmp/mobilenet_v1_224.pb \ --input_checkpoint=/tmp/checkpoints/mobilenet-10202.ckpt \ --input_binary=true --output_graph=/tmp/frozen_mobilenet_v1_224.pb \ - --output_node_names=MobileNet/Predictions/Reshape_1 + --output_node_names=MobilenetV1/Predictions/Reshape_1 ``` The user has to first build the freeze_graph script using bazel and then run the script. The input_binary flag has to be enabled to ensure that the protobuf is read and written in binary format. The user has to input the .pb and the .ckpt files to freeze the graph The output_node_names may not be obvious outside of the code that built the model. The easiest way to find them is to visualize the graph, either with diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h index 58bc164619..f84b3dad95 100644 --- a/tensorflow/contrib/lite/arena_planner.h +++ b/tensorflow/contrib/lite/arena_planner.h @@ -33,7 +33,7 @@ class AllocationInfo; // each tensor needs to be allocated and deallocated, and preallocates all the // necessary memory (the PlanAllocations phase). It then assigns portions of // this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may -// share some of the bufer if a tensor B is to be allocated after another tensor +// share some of the buffer if a tensor B is to be allocated after another tensor // A has been deallocated. // // If dynamic tensors are used the planning steps can be repeated during model diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh new file mode 100755 index 0000000000..3824b16412 --- /dev/null +++ b/tensorflow/contrib/lite/build_rpi_lib.sh @@ -0,0 +1,22 @@ +#!/bin/bash -x +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR/../../.." + +CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h index da193d2586..3c5f805f12 100644 --- a/tensorflow/contrib/lite/error_reporter.h +++ b/tensorflow/contrib/lite/error_reporter.h @@ -30,7 +30,7 @@ namespace tflite { // va_list args; // foo.Report("test %d", args); // where args is va_list // -// Sublclass ErrorReporter to provide another reporting destination. +// Subclass ErrorReporter to provide another reporting destination. // For example, if you have a GUI program, you might redirect to a buffer // that drives a GUI error log box. class ErrorReporter { diff --git a/tensorflow/contrib/lite/g3doc/ios.md b/tensorflow/contrib/lite/g3doc/ios.md index a359b8d4b4..e0358a444d 100644 --- a/tensorflow/contrib/lite/g3doc/ios.md +++ b/tensorflow/contrib/lite/g3doc/ios.md @@ -22,6 +22,15 @@ Then install brew install automake brew install libtool ``` +If you get an error where either automake or libtool install but do not link correctly, you'll first need to: +```bash +sudo chown -R $(whoami) /usr/local/* +``` +Then follow the instructions to perform the linking: +```bash +brew link automake +brew link libtool +``` Then you need to run a shell script to download the dependencies you need: diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md new file mode 100644 index 0000000000..7a3a231626 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -0,0 +1,50 @@ +# TensorFlow Lite for Raspberry Pi + +## Cross compiling +### Installing toolchian +This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). + +To cross compiling TensorFlow Lite. First you should install the toolchain and libs. +```bash +sudo apt-get update +sudo apt-get install crossbuild-essential-armhf +``` +> If you are using docker, you may not use `sudo` + +### Building +Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: +> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. + +## Native compiling +This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). + +Log in to you RPI, install the toolchain. +```bash +sudo apt-get instal build-essential +``` + +First, clone this TensorFlow repository. Run this at the root of the repository: +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 788546fd60..77db178783 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -493,7 +493,7 @@ class Interpreter { // During Invoke(), Interpreter will allocate input tensors first, which are // known to be fixed size. Then it will allocate outputs from nodes as many // as possible. When there is a node that produces dynamic sized tensor. - // Intepreter will stop allocating tensors, set the value of next allocate + // Interpreter will stop allocating tensors, set the value of next allocate // node id, and execute the node to generate the output tensor before continue // to allocate successors. This process repeats until all nodes are executed. // NOTE: this relies on the order of nodes that is in topological order. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index efb29d5c9d..131e088079 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -42,7 +42,7 @@ TEST(BasicInterpreter, InvokeInvalidModel) { ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); } -// Test size accesser functions. +// Test size accessor functions. TEST(BasicInterpreter, TestSizeFunctions) { Interpreter interpreter; int base_index; diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index b91ba1a03d..e0cd12f1b4 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -64,7 +64,7 @@ struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multipler plus a left shift. + // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index 15dbfe08c8..cad9ce114c 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -52,7 +52,7 @@ enum KernelType { struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multipler plus a left shift. + // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index a77fe94e49..888e67966c 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -48,7 +48,7 @@ enum KernelType { struct OpData { // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multipler plus a left shift. + // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc index 0e481a9d40..4eddf7bf0a 100644 --- a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/contrib/lite/kernels/internal/spectrogram.h" +#include #include #include "third_party/fft2d/fft.h" diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h index 28f53b9fbb..21da1daff7 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.h +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -58,7 +58,7 @@ inline bool IsConstantTensor(TfLiteTensor* tensor) { } // Determines whether tensor is dynamic. Note that a tensor can be non-const and -// not dynamic. This function specificially checks for a dynamic tensor. +// not dynamic. This function specifically checks for a dynamic tensor. inline bool IsDynamicTensor(TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteDynamic; } diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc index 5f73b56ed9..0ee35775d5 100644 --- a/tensorflow/contrib/lite/kernels/lsh_projection.cc +++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// LSH Projection projects an input to a bit vector via locality senstive +// LSH Projection projects an input to a bit vector via locality sensitive // hashing. // // Options: diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index b9255b23a5..8cf1165135 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -213,9 +213,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, // present. // 2) If projection weight is present, then projection bias is optional. // TODO(ghodrat): make sure this is correct. - const bool projecton_tensors_consistent = + const bool projection_tensors_consistent = ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projecton_tensors_consistent == true); + TF_LITE_ENSURE(context, projection_tensors_consistent == true); return kTfLiteOk; } @@ -357,7 +357,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existense of only one to the get the condition. + // check the existence of only one to get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc index f3e6ddc9f4..438f70d311 100644 --- a/tensorflow/contrib/lite/kernels/reshape.cc +++ b/tensorflow/contrib/lite/kernels/reshape.cc @@ -49,20 +49,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions); int num_output_elements = 1; - int strech_dim = -1; + int stretch_dim = -1; for (int i = 0; i < params->num_dimensions; ++i) { int value = params->shape[i]; if (value == -1) { - TF_LITE_ENSURE_EQ(context, strech_dim, -1); - strech_dim = i; + TF_LITE_ENSURE_EQ(context, stretch_dim, -1); + stretch_dim = i; } else { num_output_elements *= value; output_size->data[i] = value; } } - if (strech_dim != -1) { - output_size->data[strech_dim] = num_input_elements / num_output_elements; - num_output_elements *= output_size->data[strech_dim]; + if (stretch_dim != -1) { + output_size->data[stretch_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_size->data[stretch_dim]; } TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc index 0fbcf6e6aa..aecbd0399f 100644 --- a/tensorflow/contrib/lite/kernels/reshape_test.cc +++ b/tensorflow/contrib/lite/kernels/reshape_test.cc @@ -60,7 +60,7 @@ TEST(ReshapeOpTest, TooManyDimensions) { TEST(ReshapeOpTest, TooManySpecialDimensions) { EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}), - "strech_dim != -1"); + "stretch_dim != -1"); } TEST(ReshapeOpTest, SimpleTest) { diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 373310bd87..0bb28b50b2 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -141,8 +141,8 @@ void SingleOpModel::SetBuiltinOp(BuiltinOperator type, void SingleOpModel::SetCustomOp( const string& name, const std::vector& custom_option, - const std::function& registeration) { - custom_registrations_[name] = registeration; + const std::function& registration) { + custom_registrations_[name] = registration; opcodes_.push_back( CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data())); operators_.push_back(CreateOperator( diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 508a570e2e..42941a97db 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -360,7 +360,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existense of only one to the get the condition. + // check the existence of only one to get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h index 5cd6c20850..0294ec815c 100644 --- a/tensorflow/contrib/lite/memory_planner.h +++ b/tensorflow/contrib/lite/memory_planner.h @@ -34,8 +34,8 @@ class MemoryPlanner { // [first_node, last_node]. virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0; - // Invalidates allocations made earliers. This is called when tensors sizes - // have change. All planned allocations remain, but can't be used until + // Invalidates allocations made earlier. This is called when tensors sizes + // have changed. All planned allocations remain, but can't be used until // ExecuteAllocations() is called. virtual TfLiteStatus ResetAllocations() = 0; }; diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 0c777760cb..036dc46e03 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -81,7 +81,7 @@ class FlatBufferModel { const tflite::Model* model_spec, ErrorReporter* error_reporter = DefaultErrorReporter()); - // Releases memory or unmaps mmaped meory. + // Releases memory or unmaps mmaped memory. ~FlatBufferModel(); // Copying or assignment is disallowed to simplify ownership semantics. diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 76032771af..bd49d327c9 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -569,7 +569,7 @@ enum { ANEURALNETWORKS_LOGISTIC = 14, /** - * Projects an input to a bit vector via locality senstive hashing. + * Projects an input to a bit vector via locality sensitive hashing. * * Inputs: * * 0: Hash functions. Dim.size == 2, DataType: Float. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc new file mode 100644 index 0000000000..832ef5824b --- /dev/null +++ b/tensorflow/contrib/lite/rpi_makefile.inc @@ -0,0 +1,33 @@ +# Settings for Raspberry Pi. +ifeq ($(TARGET), RPI) + ifeq ($(TARGET_ARCH), armv7) + CXXFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + CCFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + LDFLAGS := \ + -Wl,--no-export-dynamic \ + -Wl,--exclude-libs,ALL \ + -Wl,--gc-sections \ + -Wl,--as-needed + endif + + LIBS := \ + -lstdc++ \ + -lpthread \ + -lm \ + -ldl + + OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ + LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ + BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ + DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ +endif diff --git a/tensorflow/contrib/lite/schema/upgrade_schema.py b/tensorflow/contrib/lite/schema/upgrade_schema.py index 94f5730be5..e0b36d3d3e 100644 --- a/tensorflow/contrib/lite/schema/upgrade_schema.py +++ b/tensorflow/contrib/lite/schema/upgrade_schema.py @@ -39,8 +39,8 @@ import tensorflow as tf from tensorflow.python.platform import resource_loader parser = argparse.ArgumentParser( - description="Script to move TFLite models from pre-release schema to" - " new schema.") + description="Script to move TFLite models from pre-release schema to " + "new schema.") parser.add_argument( "input", type=str, @@ -48,7 +48,7 @@ parser.add_argument( parser.add_argument( "output", type=str, - help="Output json or bin TensorFlow lite model compliant with" + help="Output json or bin TensorFlow lite model compliant with " "the new schema. Extension must be `.json`, `.bin` or `.tflite`.") @@ -258,7 +258,7 @@ class Converter(object): # Check if builtin_code is the appropriate string type # use type("") instead of str or unicode. for py2and3 if not isinstance(operator_code["builtin_code"], type(u"")): - raise ValueError("builtin_code %r is non-string. this usually means" + raise ValueError("builtin_code %r is non-string. this usually means " "your model has consistency problems." % (operator_code["builtin_code"])) operator_code["builtin_code"] = (RemapOperator( diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc index 4aab244989..2f2004f56b 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.cc +++ b/tensorflow/contrib/lite/simple_memory_arena.cc @@ -113,21 +113,21 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) { underlying_buffer_size_ = required_size; underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; } - commited_ = true; + committed_ = true; return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; } TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc, char** output_ptr) { - TF_LITE_ENSURE(context, commited_); + TF_LITE_ENSURE(context, committed_); TF_LITE_ENSURE(context, output_ptr != nullptr); *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; return kTfLiteOk; } TfLiteStatus SimpleMemoryArena::Clear() { - commited_ = false; + committed_ = false; high_water_mark_ = 0; allocs_.clear(); return kTfLiteOk; diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h index 0535522374..5faf78b59e 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.h +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -22,7 +22,7 @@ limitations under the License. namespace tflite { // This little structure holds the offset and the size for a dynamic memory -// allocation in the memory arena. When the arena is commited and the +// allocation in the memory arena. When the arena is committed and the // underlying buffer is set, the alloc can be resolved into an actual memory // pointer. struct ArenaAlloc { @@ -43,7 +43,7 @@ struct ArenaAlloc { class SimpleMemoryArena { public: explicit SimpleMemoryArena(size_t arena_alignment) - : commited_(false), + : committed_(false), arena_alignment_(arena_alignment), high_water_mark_(0), underlying_buffer_size_(0), @@ -73,7 +73,7 @@ class SimpleMemoryArena { } private: - bool commited_; + bool committed_; size_t arena_alignment_; size_t high_water_mark_; std::unique_ptr underlying_buffer_; diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 995230dfa8..6c3b02e12b 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -194,6 +194,8 @@ with: srcs = glob(["libs/arm64-v8a/*.so"]), ``` +If you are building for Android TV (Shield TV devices), replace "portrait" with "landscape" for android:screenOrientation in all four activities in tensorflow/examples/android/AndroidManifest.xml + Then run: ```bash # Create dir for native libs diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index 2d99791839..0a458a27b3 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -80,10 +80,9 @@ if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then fi else echo "${PRNT_SLCTV_BIN} found. Using it" - ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h - fi + ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h fi if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then @@ -111,7 +110,7 @@ if [[ -z "${BUILD_ARCH}" ]]; then TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` else # arch specified so build just that - TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a ${BUILD_ARCH}` + TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios -a "${BUILD_ARCH}"` fi export HOST_NSYNC_LIB TARGET_NSYNC_LIB diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 0fee584f8e..81f05e7ce5 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -3647,7 +3647,7 @@ def cohen_kappa(labels, RuntimeError: If eager execution is enabled. """ if context.executing_eagerly(): - raise RuntimeError('tf.contrib.metrics.cohen_kappa is not supported' + raise RuntimeError('tf.contrib.metrics.cohen_kappa is not supported ' 'when eager execution is enabled.') if num_classes < 2: raise ValueError('`num_classes` must be >= 2.' diff --git a/tensorflow/contrib/model_pruning/python/layers/layers.py b/tensorflow/contrib/model_pruning/python/layers/layers.py index 988748ad75..466daf204a 100644 --- a/tensorflow/contrib/model_pruning/python/layers/layers.py +++ b/tensorflow/contrib/model_pruning/python/layers/layers.py @@ -214,7 +214,7 @@ def masked_convolution(inputs, elif data_format == 'NCHW': df = 'channels_first' else: - raise ValueError('Unsupported data fromat', data_format) + raise ValueError('Unsupported data format', data_format) layer = layer_class( filters=num_outputs, diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index 86963be4b8..5146a4a2de 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -216,7 +216,7 @@ def _partitioned_variable_assign(partitioned_var, new_value): """Assign op for partitioned variables. Args: - partitioned_var: A partitioned tensotflow variable + partitioned_var: A partitioned tensorflow variable new_value: Value to be assigned to the variable var Returns: diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h index fa297c28cb..df055ff567 100644 --- a/tensorflow/contrib/mpi/mpi_utils.h +++ b/tensorflow/contrib/mpi/mpi_utils.h @@ -24,6 +24,8 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" +// Skip MPI C++ bindings support, this matches the usage in other places +#define OMPI_SKIP_MPICXX #include "third_party/mpi/mpi.h" #define MPI_CHECK(cmd) \ do { \ diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 04b5d5bdf1..6e77e934fe 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -53,7 +53,7 @@ def from_contrib_estimator(estimator, `Estimator`. """ if isinstance(estimator, core_estimator.Estimator): - raise TypeError('Espected estimator to be of type ' + raise TypeError('Expected estimator to be of type ' 'tf.contrib.learn.Estimator, but got type ' 'tf.python.estimator.Estimator. You likely want to call ' 'from_estimator.') @@ -88,7 +88,7 @@ def from_estimator(estimator, `Estimator`. """ if isinstance(estimator, contrib_estimator.Estimator): - raise TypeError('Espected estimator to be of type ' + raise TypeError('Expected estimator to be of type ' 'tf.python.estimator.Estimator, but got type ' 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 90bc22008f..1194b98f5e 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(gast.NodeVisitor, self).__init__() + super(DetectReturnInUnsupportedControlFlow, self).__init__() def visit_While(self, node): self.cant_return = True diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index e8a0d41425..5750be6f4c 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph): # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's - # correction). The variance tensor read from FuseBatchNorm has bessel's + # correction). The variance tensor read from FuseBatchNorm has Bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() @@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, Args: context: The scope under which we look for batch norm params - match: Object containg required batch norm tensors for correction + match: Object containing required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index 0a8e35080c..a4f7b1b221 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). - max_var: a variable containing quantization range lupper end(s). - per_channel: a boolean specifying whether to use per-channel quantizatioh. + max_var: a variable containing quantization range upper end(s). + per_channel: a boolean specifying whether to use per-channel quantization. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 2b5b877e8e..33f14e8d0e 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -341,7 +341,7 @@ def _InsertQuantOp(context, """Inserts a quant op between a producer op and (multiple) consumer ops. Args: - context: Context w,here producer and consumer operations are nested. + context: Context where producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index d0fb55da74..0b74b438ac 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -155,7 +155,7 @@ def experimental_create_training_graph(input_graph=None, often fail. Args: - input_graph: The tf.Graph to be transformed,if None then defaults to the + input_graph: The tf.Graph to be transformed, if None then defaults to the default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 0624cc878b..db745aa562 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -419,7 +419,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -470,7 +470,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -526,7 +526,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -565,7 +565,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initialzes with a truncated normal variable. + An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index 216310abe4..bef58bad8d 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -197,7 +197,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initialzes with a truncated normal variable. + An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/rnn/ops/gru_ops.cc b/tensorflow/contrib/rnn/ops/gru_ops.cc index e91d1e8a80..9c8e40851a 100644 --- a/tensorflow/contrib/rnn/ops/gru_ops.cc +++ b/tensorflow/contrib/rnn/ops/gru_ops.cc @@ -69,7 +69,7 @@ Element-wise dot product of a and b is represented by ab Element-wise dot product is represented by \circ Matrix multiplication is represented by * -Baises are initialized with : +Biases are initialized with : `b_ru` - constant_initializer(1.0) `b_c` - constant_initializer(0.0) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index 7957edf68c..ffd2421894 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -54,7 +54,7 @@ def blocks_match(sess, use_peephole): initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212) with variable_scope.variable_scope("test", initializer=initializer): - # magic naming so that the cells pick up these variables and resuse them + # magic naming so that the cells pick up these variables and reuse them if use_peephole: wci = variable_scope.get_variable( "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtypes.float32) diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 4eb4fbcd92..9e61fc54d1 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -480,8 +480,7 @@ class LSTMBlockWrapper(base_layer.Layer): """Run this LSTM on inputs, starting from the given state. Args: - inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]` - or a list of `time_len` tensors of shape `[batch_size, input_size]`. + inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`. initial_state: a tuple `(initial_cell_state, initial_output)` with tensors of shape `[batch_size, self._num_units]`. If this is not provided, the cell is expected to create a zero initial state of type `dtype`. diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 73f2607d84..2f6ae9f367 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -534,7 +534,7 @@ class GridLSTMCell(rnn_cell_impl.RNNCell): initializer: (optional) The initializer to use for the weight and projection matrices, default None. num_unit_shards: (optional) int, default 1, How to split the weight - matrix. If > 1,the weight matrix is stored across num_unit_shards. + matrix. If > 1, the weight matrix is stored across num_unit_shards. forget_bias: (optional) float, default 1.0, The initial bias of the forget gates, used to reduce the scale of forgetting at the beginning of the training. @@ -993,7 +993,7 @@ class BidirectionalGridLSTMCell(GridLSTMCell): initializer: (optional) The initializer to use for the weight and projection matrices, default None. num_unit_shards: (optional) int, default 1, How to split the weight - matrix. If > 1,the weight matrix is stored across num_unit_shards. + matrix. If > 1, the weight matrix is stored across num_unit_shards. forget_bias: (optional) float, default 1.0, The initial bias of the forget gates, used to reduce the scale of forgetting at the beginning of the training. @@ -2133,7 +2133,7 @@ class Conv1DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_1d_lstm_cell", **kwargs): """Construct Conv1DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs) + super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs) class Conv2DLSTMCell(ConvLSTMCell): @@ -2144,7 +2144,7 @@ class Conv2DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_2d_lstm_cell", **kwargs): """Construct Conv2DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs) + super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs) class Conv3DLSTMCell(ConvLSTMCell): @@ -2155,7 +2155,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_3d_lstm_cell", **kwargs): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) + super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs) def _conv(args, filter_size, num_features, bias, bias_start=0.0): diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index b427dff88b..c4139dde49 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -222,6 +222,9 @@ class AttentionWrapperTest(test.TestCase): self.assertEqual( (None, batch_size, None), tuple(state_alignment_history.get_shape().as_list())) + nest.assert_same_structure( + cell.state_size, + cell.zero_state(batch_size, dtypes.float32)) # Remove the history from final_state for purposes of the # remainder of the tests. final_state = final_state._replace(alignment_history=()) # pylint: disable=protected-access diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py index 9265540317..178328619f 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py @@ -27,6 +27,7 @@ from tensorflow.contrib.seq2seq.python.ops import beam_search_ops from tensorflow.contrib.seq2seq.python.ops import decoder from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.layers import core as layers_core from tensorflow.python.ops import array_ops @@ -70,6 +71,98 @@ class TestGatherTree(test.TestCase): self.assertAllEqual(expected_result, res_) + def _test_gather_tree_from_array(self, + depth_ndims=0, + merged_batch_beam=False): + array = np.array( + [[[1, 2, 3], [4, 5, 6], [7, 8, 9], [0, 0, 0]], + [[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 0]]]).transpose([1, 0, 2]) + parent_ids = np.array( + [[[0, 0, 0], [0, 1, 1], [2, 1, 2], [-1, -1, -1]], + [[0, 0, 0], [1, 1, 0], [2, 0, 1], [0, 1, 0]]]).transpose([1, 0, 2]) + expected_array = np.array( + [[[2, 2, 2], [6, 5, 6], [7, 8, 9], [0, 0, 0]], + [[2, 3, 2], [7, 5, 7], [8, 9, 8], [11, 12, 0]]]).transpose([1, 0, 2]) + sequence_length = [[3, 3, 3], [4, 4, 3]] + + array = ops.convert_to_tensor( + array, dtype=dtypes.float32) + parent_ids = ops.convert_to_tensor( + parent_ids, dtype=dtypes.int32) + expected_array = ops.convert_to_tensor( + expected_array, dtype=dtypes.float32) + + max_time = array_ops.shape(array)[0] + batch_size = array_ops.shape(array)[1] + beam_width = array_ops.shape(array)[2] + + def _tile_in_depth(tensor): + # Generate higher rank tensors by concatenating tensor and tensor + 1. + for _ in range(depth_ndims): + tensor = array_ops.stack([tensor, tensor + 1], -1) + return tensor + + if merged_batch_beam: + array = array_ops.reshape( + array, [max_time, batch_size * beam_width]) + expected_array = array_ops.reshape( + expected_array, [max_time, batch_size * beam_width]) + + if depth_ndims > 0: + array = _tile_in_depth(array) + expected_array = _tile_in_depth(expected_array) + + sorted_array = beam_search_decoder.gather_tree_from_array( + array, parent_ids, sequence_length) + + with self.test_session() as sess: + sorted_array = sess.run(sorted_array) + expected_array = sess.run(expected_array) + self.assertAllEqual(expected_array, sorted_array) + + def test_gather_tree_from_array_scalar(self): + self._test_gather_tree_from_array() + + def test_gather_tree_from_array_1d(self): + self._test_gather_tree_from_array(depth_ndims=1) + + def test_gather_tree_from_array_1d_with_merged_batch_beam(self): + self._test_gather_tree_from_array(depth_ndims=1, merged_batch_beam=True) + + def test_gather_tree_from_array_2d(self): + self._test_gather_tree_from_array(depth_ndims=2) + + +class TestArrayShapeChecks(test.TestCase): + + def _test_array_shape_dynamic_checks(self, static_shape, dynamic_shape, + batch_size, beam_width, is_valid=True): + t = array_ops.placeholder_with_default( + np.random.randn(*static_shape).astype(np.float32), + shape=dynamic_shape) + + batch_size = array_ops.constant(batch_size) + check_op = beam_search_decoder._check_batch_beam(t, batch_size, beam_width) # pylint: disable=protected-access + + with self.test_session() as sess: + if is_valid: + sess.run(check_op) + else: + with self.assertRaises(errors.InvalidArgumentError): + sess.run(check_op) + + def test_array_shape_dynamic_checks(self): + self._test_array_shape_dynamic_checks( + (8, 4, 5, 10), (None, None, 5, 10), 4, 5, is_valid=True) + self._test_array_shape_dynamic_checks( + (8, 20, 10), (None, None, 10), 4, 5, is_valid=True) + self._test_array_shape_dynamic_checks( + (8, 21, 10), (None, None, 10), 4, 5, is_valid=False) + self._test_array_shape_dynamic_checks( + (8, 4, 6, 10), (None, None, None, 10), 4, 5, is_valid=False) + self._test_array_shape_dynamic_checks( + (8, 4), (None, None), 4, 5, is_valid=False) + class TestEosMasking(test.TestCase): """Tests EOS masking used in beam search.""" @@ -319,7 +412,8 @@ class TestLargeBeamStep(test.TestCase): class BeamSearchDecoderTest(test.TestCase): - def _testDynamicDecodeRNN(self, time_major, has_attention): + def _testDynamicDecodeRNN(self, time_major, has_attention, + with_alignment_history=False): encoder_sequence_length = np.array([3, 2, 3, 1, 1]) decoder_sequence_length = np.array([2, 0, 1, 2, 3]) batch_size = 5 @@ -359,7 +453,7 @@ class BeamSearchDecoderTest(test.TestCase): cell=cell, attention_mechanism=attention_mechanism, attention_layer_size=attention_depth, - alignment_history=False) + alignment_history=with_alignment_history) cell_state = cell.zero_state( dtype=dtypes.float32, batch_size=batch_size_tensor * beam_width) if has_attention: @@ -420,6 +514,12 @@ class BeamSearchDecoderTest(test.TestCase): def testDynamicDecodeRNNBatchMajorYesAttention(self): self._testDynamicDecodeRNN(time_major=False, has_attention=True) + def testDynamicDecodeRNNBatchMajorYesAttentionWithAlignmentHistory(self): + self._testDynamicDecodeRNN( + time_major=False, + has_attention=True, + with_alignment_history=True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index f8da5a3e17..9ff8a343f1 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -1278,7 +1278,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): attention_state=self._item_or_tuple( a.state_size for a in self._attention_mechanisms), alignment_history=self._item_or_tuple( - () for _ in self._attention_mechanisms)) # sometimes a TensorArray + a.alignments_size if self._alignment_history else () + for a in self._attention_mechanisms)) # sometimes a TensorArray def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. @@ -1318,22 +1319,26 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): cell_state = nest.map_structure( lambda s: array_ops.identity(s, name="checked_cell_state"), cell_state) + initial_alignments = [ + attention_mechanism.initial_alignments(batch_size, dtype) + for attention_mechanism in self._attention_mechanisms] return AttentionWrapperState( cell_state=cell_state, time=array_ops.zeros([], dtype=dtypes.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), - alignments=self._item_or_tuple( - attention_mechanism.initial_alignments(batch_size, dtype) - for attention_mechanism in self._attention_mechanisms), + alignments=self._item_or_tuple(initial_alignments), attention_state=self._item_or_tuple( attention_mechanism.initial_state(batch_size, dtype) for attention_mechanism in self._attention_mechanisms), alignment_history=self._item_or_tuple( - tensor_array_ops.TensorArray(dtype=dtype, size=0, - dynamic_size=True) + tensor_array_ops.TensorArray( + dtype, + size=0, + dynamic_size=True, + element_shape=alignment.shape) if self._alignment_history else () - for _ in self._attention_mechanisms)) + for alignment in initial_alignments)) def call(self, inputs, state): """Perform a step of attention-wrapped RNN. diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 03fe31abf7..a26107b0d7 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.platform import tf_logging from tensorflow.python.util import nest __all__ = [ @@ -121,14 +122,114 @@ def tile_batch(t, multiplier, name=None): return nest.map_structure(lambda t_: _tile_batch(t_, multiplier), t) +def gather_tree_from_array(t, parent_ids, sequence_length): + """Calculates the full beams for `TensorArray`s. + + Args: + t: A stacked `TensorArray` of size `max_time` that contains `Tensor`s of + shape `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` + where `s` is the depth shape. + parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. + sequence_length: The sequence length of shape `[batch_size, beam_width]`. + + Returns: + A `Tensor` which is a stacked `TensorArray` of the same size and type as + `t` and where beams are sorted in each `Tensor` according to `parent_ids`. + """ + max_time = parent_ids.shape[0].value or array_ops.shape(parent_ids)[0] + batch_size = parent_ids.shape[1].value or array_ops.shape(parent_ids)[1] + beam_width = parent_ids.shape[2].value or array_ops.shape(parent_ids)[2] + + # Generate beam ids that will be reordered by gather_tree. + beam_ids = array_ops.expand_dims( + array_ops.expand_dims(math_ops.range(beam_width), 0), 0) + beam_ids = array_ops.tile(beam_ids, [max_time, batch_size, 1]) + + mask = array_ops.sequence_mask( + sequence_length, maxlen=max_time, dtype=dtypes.int32) + mask = array_ops.transpose(mask, perm=[2, 0, 1]) + + # Use beam_width + 1 to mark the end of beam. + masked_beam_ids = (beam_ids * mask) + (1 - mask) * (beam_width + 1) + + max_sequence_lengths = math_ops.to_int32( + math_ops.reduce_max(sequence_length, axis=1)) + sorted_beam_ids = beam_search_ops.gather_tree( + step_ids=masked_beam_ids, + parent_ids=parent_ids, + max_sequence_lengths=max_sequence_lengths, + end_token=beam_width + 1) + + # For out of range steps, simply copy the same beam. + sorted_beam_ids = array_ops.where( + math_ops.cast(mask, dtypes.bool), x=sorted_beam_ids, y=beam_ids) + + # Generate indices for gather_nd. + time_ind = array_ops.tile(array_ops.reshape( + math_ops.range(max_time), [-1, 1, 1]), [1, batch_size, beam_width]) + batch_ind = array_ops.tile(array_ops.reshape( + math_ops.range(batch_size), [-1, 1, 1]), [1, max_time, beam_width]) + batch_ind = array_ops.transpose(batch_ind, perm=[1, 0, 2]) + indices = array_ops.stack([time_ind, batch_ind, sorted_beam_ids], -1) + + # Gather from a tensor with collapsed additional dimensions. + gather_from = t + final_shape = array_ops.shape(gather_from) + gather_from = array_ops.reshape( + gather_from, [max_time, batch_size, beam_width, -1]) + ordered = array_ops.gather_nd(gather_from, indices) + ordered = array_ops.reshape(ordered, final_shape) + + return ordered + + def _check_maybe(t): - if isinstance(t, tensor_array_ops.TensorArray): - raise TypeError( - "TensorArray state is not supported by BeamSearchDecoder: %s" % t.name) if t.shape.ndims is None: raise ValueError( "Expected tensor (%s) to have known rank, but ndims == None." % t) +def _check_static_batch_beam_maybe(shape, batch_size, beam_width): + """Raises an exception if dimensions are known statically and can not be + reshaped to [batch_size, beam_size, -1]. + """ + reshaped_shape = tensor_shape.TensorShape([batch_size, beam_width, None]) + if (batch_size is not None and shape[0].value is not None + and (shape[0] != batch_size * beam_width + or (shape.ndims >= 2 and shape[1].value is not None + and (shape[0] != batch_size or shape[1] != beam_width)))): + tf_logging.warn("TensorArray reordering expects elements to be " + "reshapable to %s which is incompatible with the " + "current shape %s. Consider setting " + "reorder_tensor_arrays to False to disable TensorArray " + "reordering during the beam search." + % (reshaped_shape, shape)) + return False + return True + +def _check_batch_beam(t, batch_size, beam_width): + """Returns an Assert operation checking that the elements of the stacked + TensorArray can be reshaped to [batch_size, beam_size, -1]. At this point, + the TensorArray elements have a known rank of at least 1. + """ + error_message = ("TensorArray reordering expects elements to be " + "reshapable to [batch_size, beam_size, -1] which is " + "incompatible with the dynamic shape of %s elements. " + "Consider setting reorder_tensor_arrays to False to disable " + "TensorArray reordering during the beam search." + % (t.name)) + rank = t.shape.ndims + shape = array_ops.shape(t) + if rank == 2: + condition = math_ops.equal(shape[1], batch_size * beam_width) + else: + condition = math_ops.logical_or( + math_ops.equal(shape[1], batch_size * beam_width), + math_ops.logical_and( + math_ops.equal(shape[1], batch_size), + math_ops.equal(shape[2], beam_width))) + return control_flow_ops.Assert(condition, [error_message]) + + class BeamSearchDecoder(decoder.Decoder): """BeamSearch sampling decoder. @@ -173,7 +274,8 @@ class BeamSearchDecoder(decoder.Decoder): initial_state, beam_width, output_layer=None, - length_penalty_weight=0.0): + length_penalty_weight=0.0, + reorder_tensor_arrays=True): """Initialize the BeamSearchDecoder. Args: @@ -188,6 +290,12 @@ class BeamSearchDecoder(decoder.Decoder): `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. + reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell + state will be reordered according to the beam search path. If the + `TensorArray` can be reordered, the stacked form will be returned. + Otherwise, the `TensorArray` will be returned as is. Set this flag to + `False` if the cell state contains `TensorArray`s that are not amenable + to reordering. Raises: TypeError: if `cell` is not an instance of `RNNCell`, @@ -202,6 +310,7 @@ class BeamSearchDecoder(decoder.Decoder): "output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer + self._reorder_tensor_arrays = reorder_tensor_arrays if callable(embedding): self._embedding_fn = embedding @@ -299,12 +408,13 @@ class BeamSearchDecoder(decoder.Decoder): """ finished, start_inputs = self._finished, self._start_inputs + dtype = nest.flatten(self._initial_cell_state)[0].dtype log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, - on_value=0.0, - off_value=-np.Inf, - dtype=nest.flatten(self._initial_cell_state)[0].dtype) + on_value=ops.convert_to_tensor(0.0, dtype=dtype), + off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), + dtype=dtype) initial_state = BeamSearchDecoderState( cell_state=self._initial_cell_state, @@ -341,6 +451,11 @@ class BeamSearchDecoder(decoder.Decoder): outputs.parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=self._end_token) + if self._reorder_tensor_arrays: + final_state = final_state._replace(cell_state=nest.map_structure( + lambda t: self._maybe_sort_array_beams( + t, outputs.parent_ids, final_state.lengths), + final_state.cell_state)) outputs = FinalBeamSearchDecoderOutput( beam_search_decoder_output=outputs, predicted_ids=predicted_ids) return outputs, final_state @@ -431,9 +546,10 @@ class BeamSearchDecoder(decoder.Decoder): returned unchanged. Raises: - TypeError: If `t` is an instance of `TensorArray`. ValueError: If the rank of `t` is not statically known. """ + if isinstance(t, tensor_array_ops.TensorArray): + return t _check_maybe(t) if t.shape.ndims >= 1: return self._split_batch_beams(t, s) @@ -454,15 +570,55 @@ class BeamSearchDecoder(decoder.Decoder): A reshaped version of t with shape `[batch_size, beam_width] + s`. Raises: - TypeError: If `t` is an instance of `TensorArray`. ValueError: If the rank of `t` is not statically known. """ + if isinstance(t, tensor_array_ops.TensorArray): + return t _check_maybe(t) if t.shape.ndims >= 2: return self._merge_batch_beams(t, s) else: return t + def _maybe_sort_array_beams(self, t, parent_ids, sequence_length): + """Maybe sorts beams within a `TensorArray`. + + Args: + t: A `TensorArray` of size `max_time` that contains `Tensor`s of shape + `[batch_size, beam_width, s]` or `[batch_size * beam_width, s]` where + `s` is the depth shape. + parent_ids: The parent ids of shape `[max_time, batch_size, beam_width]`. + sequence_length: The sequence length of shape `[batch_size, beam_width]`. + + Returns: + A `TensorArray` where beams are sorted in each `Tensor` or `t` itself if + it is not a `TensorArray` or does not meet shape requirements. + """ + if not isinstance(t, tensor_array_ops.TensorArray): + return t + # pylint: disable=protected-access + if (not t._infer_shape or not t._element_shape + or t._element_shape[0].ndims is None + or t._element_shape[0].ndims < 1): + shape = ( + t._element_shape[0] if t._infer_shape and t._element_shape + else tensor_shape.TensorShape(None)) + tf_logging.warn("The TensorArray %s in the cell state is not amenable to " + "sorting based on the beam search result. For a " + "TensorArray to be sorted, its elements shape must be " + "defined and have at least a rank of 1, but saw shape: %s" + % (t.handle.name, shape)) + return t + shape = t._element_shape[0] + # pylint: enable=protected-access + if not _check_static_batch_beam_maybe( + shape, tensor_util.constant_value(self._batch_size), self._beam_width): + return t + t = t.stack() + with ops.control_dependencies( + [_check_batch_beam(t, self._batch_size, self._beam_width)]): + return gather_tree_from_array(t, parent_ids, sequence_length) + def step(self, time, inputs, state, name=None): """Perform a decoding step. @@ -757,6 +913,8 @@ def _maybe_tensor_gather_helper(gather_indices, gather_from, batch_size, output: Gathered tensor of shape tf.shape(gather_from)[:1+len(gather_shape)] or the original tensor if its dimensions are too small. """ + if isinstance(gather_from, tensor_array_ops.TensorArray): + return gather_from _check_maybe(gather_from) if gather_from.shape.ndims >= len(gather_shape): return _tensor_gather_helper( diff --git a/tensorflow/contrib/slim/README.md b/tensorflow/contrib/slim/README.md index 2d9df8f27e..40f484fd78 100644 --- a/tensorflow/contrib/slim/README.md +++ b/tensorflow/contrib/slim/README.md @@ -94,7 +94,7 @@ of thin wrapper functions in [variables.py](https://www.tensorflow.org/code/tensorflow/contrib/framework/python/ops/variables.py) which allow callers to easily define variables. -For example, to create a `weight` variable, initialize it using a truncated +For example, to create a `weights` variable, initialize it using a truncated normal distribution, regularize it with an `l2_loss` and place it on the `CPU`, one need only declare the following: diff --git a/tensorflow/contrib/solvers/python/ops/least_squares.py b/tensorflow/contrib/solvers/python/ops/least_squares.py index fb7c0eb649..6e164f5342 100644 --- a/tensorflow/contrib/solvers/python/ops/least_squares.py +++ b/tensorflow/contrib/solvers/python/ops/least_squares.py @@ -33,7 +33,7 @@ def cgls(operator, rhs, tol=1e-6, max_iter=20, name="cgls"): r"""Conjugate gradient least squares solver. Solves a linear least squares problem \\(||A x - rhs||_2\\) for a single - righ-hand side, using an iterative, matrix-free algorithm where the action of + right-hand side, using an iterative, matrix-free algorithm where the action of the matrix A is represented by `operator`. The CGLS algorithm implicitly applies the symmetric conjugate gradient algorithm to the normal equations \\(A^* A x = A^* rhs\\). The iteration terminates when either diff --git a/tensorflow/contrib/solvers/python/ops/linear_equations.py b/tensorflow/contrib/solvers/python/ops/linear_equations.py index d791d46763..9305c6a11c 100644 --- a/tensorflow/contrib/solvers/python/ops/linear_equations.py +++ b/tensorflow/contrib/solvers/python/ops/linear_equations.py @@ -41,7 +41,7 @@ def conjugate_gradient(operator, r"""Conjugate gradient solver. Solves a linear system of equations `A*x = rhs` for selfadjoint, positive - definite matrix `A` and righ-hand side vector `rhs`, using an iterative, + definite matrix `A` and right-hand side vector `rhs`, using an iterative, matrix-free algorithm where the action of the matrix A is represented by `operator`. The iteration terminates when either the number of iterations exceeds `max_iter` or when the residual norm has been reduced to `tol` diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index c832c6f2e0..906cc3f034 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -83,6 +83,7 @@ cc_library( "kernels/trt_engine_op.h", ], copts = tf_copts(), + visibility = ["//visibility:public"], deps = [ ":trt_logging", ":trt_resources", @@ -154,6 +155,7 @@ py_library( deps = [ ":trt_convert_py", ":trt_ops_py", + "//tensorflow/python:errors", ], ) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index dfcce0fd00..461e627e99 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -2,7 +2,8 @@ Using TensorRT in TensorFlow ============================ This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. +operator that wraps a subgraph in TensorRT. This is still a work in progress +but should be useable with most common graphs. Compilation ----------- @@ -15,26 +16,10 @@ configure script should find the necessary components from the system automatically. If installed from tar packages, user has to set path to location where the library is installed during configuration. - -``` +```shell bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use is shown below. - -```python -import tensorflow as tf -import tensorflow.contrib.tensorrt as trt -#... create and train or load model -gdef = sess.graph.as_graph_def() -trt_gdef = trt.create_inference_graph( - gdef, #original graph_def - ["output"], #name of output node(s) - max_batch_size, #maximum batch size to run the inference - max_workspace_size_bytes) # max memory for TensorRT to use -tf.reset_default_graph() -tf.import_graph_def(graph_def=trt_gdef) -#...... run inference -``` +will be available. An example use can be found in test/test_tftrt.py directory diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index fd551d70b4..140ad48282 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,6 +18,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import -from tensorflow.contrib.tensorrt.python import * -# pylint: enable=unused-import,wildcard-import +from tensorflow.python.framework import errors + +# pylint: disable=unused-import,wildcard-import,g-import-not-at-top +try: + from tensorflow.contrib.tensorrt.python import * +except errors.NotFoundError as e: + no_trt_message = ( + '**** Failed to initialize TensorRT. This is either because the TensorRT' + ' installation path is not in LD_LIBRARY_PATH, or because you do not have' + ' it installed. If not installed, please go to' + ' https://developer.nvidia.com/tensorrt to download and install' + ' TensorRT ****') + print(no_trt_message) + raise e +# pylint: enable=unused-import,wildcard-import,g-import-not-at-top diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 970f810473..eea8c8efa2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" +#include #include #include #include @@ -48,13 +49,29 @@ namespace tensorrt { namespace convert { namespace { -static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set candidate_ops = { - "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" + "Identity", + "Const", + "Conv2D", + "MaxPool", + "BiasAdd", + "Relu", + "Add", + "Mul", + "Sub", + "Rsqrt", + "Pad", + "Mean", + "AvgPool", + "ConcatV2", + "DepthwiseConv2dNative", + "FusedBatchNorm", + "FusedBatchNormV2", + // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); @@ -69,6 +86,8 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { incoming_edges->insert(edge); + } else { + VLOG(2) << edge->src()->name() << " N, "; } } } @@ -82,7 +101,10 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { + VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); + } else { + VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -109,74 +131,150 @@ std::unordered_map> BuildTensorNameMap( } return result; } - -tensorflow::Status ConvertSubGraphToTensorRT( - const std::vector& output_names, - const std::set& subgraph_node_ids, - size_t max_batch_size, // Max batch size that engine will be created for - // Max amount of memory that engine will be allowed to consume, in bytes - size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::Graph* graph) { - tensorflow::EdgeSet subgraph_incoming_edges; - GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges); - +// TODO(sami): convert references to pointers +struct ConvertGraphParams { + ConvertGraphParams( + tensorflow::Graph& inp_graph, + const std::vector& output_node_names, + const std::set& subgraph_node_id_numbers, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map>* output_edges, + int engine_precision_mode) + : graph(inp_graph), + output_names(output_node_names), + subgraph_node_ids(subgraph_node_id_numbers), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + precision_mode(engine_precision_mode) {} + tensorflow::Graph& graph; + const std::vector& output_names; + const std::set& subgraph_node_ids; + size_t max_batch_size; + size_t max_workspace_size_bytes; + const tensorflow::grappler::GraphProperties& graph_properties; + std::unordered_map>* output_edge_map; + int precision_mode; std::vector> subgraph_inputs; + std::vector> subgraph_outputs; + tensorflow::EdgeSet subgraph_incoming_edges; + tensorflow::EdgeSet subgraph_outgoing_edges; +}; - // Collect inputs by looking for incoming edges - for (const tensorflow::Edge* edge : subgraph_incoming_edges) { - subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); +static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { + GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_incoming_edges); + for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { + p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set> subgraph_outputs_set; // Collect outputs referenced from output_names - auto output_name_to_index_map = BuildTensorNameMap(output_names); - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph->FindNodeId(node_id); + for (int node_id : p->subgraph_node_ids) { + tensorflow::Node* node = p->graph.FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - // Collect outputs referenced from outgoing edges - tensorflow::EdgeSet subgraph_outgoing_edges; - GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { + GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - // Impose an ordering on the outputs - std::vector> subgraph_outputs( - subgraph_outputs_set.begin(), subgraph_outputs_set.end()); - // Build TensorRT node and add it to the graph + p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.insert(p->subgraph_outputs.begin(), + subgraph_outputs_set.begin(), + subgraph_outputs_set.end()); + return tensorflow::Status::OK(); +}; + +tensorflow::Status GetCalibNode(ConvertGraphParams* params) { + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); tensorflow::NodeDef trt_node_def; - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( - *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size_bytes, graph_properties, - &trt_node_def)); + SubGraphParams s(params->graph, params->subgraph_node_ids, + params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size_bytes, + params->graph_properties, params->output_edge_map, + &trt_node_def, params->precision_mode); + TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; - tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + TF_RETURN_IF_ERROR(status); + + for (auto in_edge : + params->subgraph_incoming_edges) { // loop over incoming edges and + // attach them to calib node + // tensorflow::Node* src_node = in_edge->src(); + auto src_output = in_edge->src_output(); + auto dst_node = in_edge->dst(); + auto dst_input = in_edge->dst_input(); + VLOG(1) << " update edge " << trt_node->name() << ":" << src_output + << " -> " << dst_node->name() << ":" << dst_input; + TF_RETURN_IF_ERROR( + params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); + } + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); + tensorflow::NodeDef trt_node_def; + + SubGraphParams s(params->graph, params->subgraph_node_ids, + params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size_bytes, + params->graph_properties, params->output_edge_map, + &trt_node_def, params->precision_mode); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); + tensorflow::Status status; + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + // AddNode does not wire edges. + // Re-map incoming edges to use the new TRT node instead of the orig subgraph + std::map, int> subgraph_edge_to_input_map; + for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { + subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); + } + for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { + std::pair old_src = {edge->src()->id(), edge->src_output()}; + int new_src_output = subgraph_edge_to_input_map.at(old_src); + params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, + new_src_output); + params->graph.RemoveEdge(edge); + } + + VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); + for (const tensorflow::Edge* edge : trt_node->in_edges()) { + VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); + } + TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); + for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(), - edge->dst_input())); + TF_RETURN_IF_ERROR(params->graph.UpdateEdge( + trt_node, new_src_output, edge->dst(), edge->dst_input())); } // Remove the original subgraph - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph->FindNodeId(node_id); + for (int node_id : params->subgraph_node_ids) { + tensorflow::Node* node = params->graph.FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - graph->RemoveNode(node); + params->graph.RemoveNode(node); } return tensorflow::Status::OK(); } @@ -194,12 +292,39 @@ tensorflow::Status BuildNodeMap( } } // namespace +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { + VLOG(0) << "Starting Calib Conversion"; + tensorflow::Graph graph(tensorflow::OpRegistry::Global()); + TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( + tensorflow::GraphConstructorOptions(), graph_def, &graph)); + // get calib nodes + std::vector calib_nodes; + for (auto node : graph.op_nodes()) { + if (node->type_string() == "TRTCalibOp") { + VLOG(1) << "Found Calib Node"; + calib_nodes.push_back(node); + } + } + VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); + if (calib_nodes.size() == 0) + return tensorflow::errors::FailedPrecondition( + "Graph doesn't contain any calibration nodes!." + " Please generate calibration graph and run calibration first"); + for (auto n : calib_nodes) { + TF_RETURN_IF_ERROR( + tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); + } + graph.ToGraphDef(infer_graph); + return tensorflow::Status::OK(); +} tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { - // Optimization pass + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, + int precision_mode = FP32MODE, int minimum_segment_size = 3) { + // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; @@ -209,16 +334,23 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::grappler::LayoutOptimizer optimizer; tensorflow::grappler::Cluster* cluster; - // Virtual cluster + // virtual cluster tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); + // single machine + int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); + int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); + VLOG(2) << "cpu_cores: " << num_cpu_cores; + VLOG(2) << "gpus: " << num_gpus; + TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - // Constant folding + // constant folding item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); @@ -226,7 +358,6 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); - // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -243,7 +374,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 2; + segment_options.minimum_segment_size = minimum_segment_size; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -252,14 +383,37 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); + std::unordered_map> output_edge_map; + int count = 0; + float total_num_nodes_in_segments = 0.; + for (auto s : segments) { + total_num_nodes_in_segments += s.size(); + } for (const std::set& subgraph_node_names : segments) { std::set subgraph_node_ids; + size_t max_mem_per_engine = + max_workspace_size_bytes * + ((float)subgraph_node_names.size() / total_num_nodes_in_segments); + std::stringstream oss; for (const string& node_name : subgraph_node_names) { + oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - output_names, subgraph_node_ids, max_batch_size, - max_workspace_size_bytes, static_graph_properties, &graph)); + VLOG(2) << "Subgraph nodes" << oss.str(); + ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, + max_mem_per_engine, static_graph_properties, + &output_edge_map, precision_mode); + if (precision_mode == INT8MODE) { + TF_RETURN_IF_ERROR(GetCalibNode(&p)); + } else { + tensorflow::Status status = ConvertSubGraphToTensorRT(&p); + if (status != tensorflow::Status::OK()) { + LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count + << " due to: \n" + << status.ToString() << " SKIPPING......"; + } + count++; + } } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 154ad3f2e8..e01e4a5328 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -28,14 +28,20 @@ namespace tensorflow { namespace tensorrt { namespace convert { +// This method converts an already generated calibration graph which was used in +// calibration runs to an inference graph +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); + // max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. -// max_workspace_size_bytes: The upper bound of memory allowence for +// max_workspace_size_bytes: The upper bound of memory allowance for // engine building. tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def); + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, + int precision_mode, int minimum_segment_size); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 9ee717dd7f..92a692baa7 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -24,6 +24,10 @@ limitations under the License. #include #include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" +#include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" @@ -32,6 +36,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tensor_coding.h" @@ -39,7 +44,6 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorrt/include/NvInfer.h" // Check if the types are equal. Cast to int first so that failure log message @@ -49,6 +53,7 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::strings::StrCat; namespace { @@ -65,7 +70,8 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, *trt_dtype = nvinfer1::DataType::kHALF; break; default: - return tensorflow::errors::InvalidArgument("Unsupported data type"); + return tensorflow::errors::InvalidArgument( + "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); } return tensorflow::Status::OK(); } @@ -112,6 +118,18 @@ static std::vector> CreateSamePadding( return padding; } +string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { + size_t last_scope_separator = 0; + for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { + if (op_name_a[i] != op_name_b[i]) { + break; + } else if (op_name_a[i] == '/') { + last_scope_separator = i + 1; + } + } + return op_name_a.substr(0, last_scope_separator); +} + class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, @@ -244,6 +262,11 @@ std::vector TFAttrs::get>(string key) const { return std::vector(attr.begin(), attr.end()); } +template <> +std::vector TFAttrs::get>(string key) const { + auto attr = this->at(key)->list().s(); + return std::vector(attr.begin(), attr.end()); +} template <> nvinfer1::Dims TFAttrs::get(string key) const { auto values = this->get>(key); @@ -266,6 +289,17 @@ tensorflow::DataType TFAttrs::get(string key) const { return this->at(key)->type(); } +template <> +float TFAttrs::get(string key) const { + return this->at(key)->f(); +} + +template <> +bool TFAttrs::get(string key) const { + return this->at(key)->b(); +} + +// TODO(jie): reorder4 & reorder2 should be merged? template void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -283,29 +317,87 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } } +template +void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, + T* odata, nvinfer1::DimsHW ostrides) { + for (int h = 0; h < shape.h(); ++h) { + for (int w = 0; w < shape.w(); ++w) { + odata[h * ostrides.h() + w * ostrides.w()] = + idata[h * ostrides.h() + w * ostrides.w()]; + } + } +} + +// TODO(jie): fallback to tensorflow!! +void ReorderCKtoKC(const TRT_ShapedWeights& iweights, + TRT_ShapedWeights* oweights) { + int c = iweights.shape_.d[0]; + int k = iweights.shape_.d[1]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; + nvinfer1::DimsHW istrides = {1, k}; + nvinfer1::DimsHW ostrides = {c, 1}; + switch (iweights.type_) { + case tensorflow::DataType::DT_FLOAT: { + Reorder2({k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + case tensorflow::DataType::DT_HALF: { + Reorder2( + {k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + default: + LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " + << DataTypeString(iweights.type_); + } +} + void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights) { + TRT_ShapedWeights* oweights, int num_groups) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - int c = iweights.shape_.d[2]; - int k = iweights.shape_.d[3]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; + // TRT requires GKcRS, while TF depthwise has RSCK + // where c=1, C=G + VLOG(2) << "num_groups: " << num_groups; + int c = iweights.shape_.d[2] / num_groups; + VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c; + int k = iweights.shape_.d[3] * num_groups; + VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k; + oweights->shape_.d[0] = k / num_groups; + oweights->shape_.d[1] = c * num_groups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: + case tensorflow::DataType::DT_FLOAT: { Reorder4({k, c, r, s}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; + } + case tensorflow::DataType::DT_HALF: { + Reorder4( + {k, c, r, s}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + default: - LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; + LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " + << DataTypeString(iweights.type_); } } @@ -323,12 +415,11 @@ inline std::shared_ptr infer_object(T* obj) { return std::shared_ptr(obj, InferDeleter()); } -// Logger for GIE info/warning/errors class Converter; using OpConverter = std::function const&, + const std::vector&, std::vector*)>; class Converter { @@ -336,34 +427,57 @@ class Converter { std::unordered_map op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - + tensorflow::tensorrt::TRTWeightStore* weight_store_; + bool fp16_; void register_op_converters(); - std::vector get_inputs( const tensorflow::NodeDef& node_def) { std::vector inputs; - for (const auto& input_name : node_def.input()) { - VLOG(2) << "Retrieve input: " << input_name; - inputs.push_back(trt_tensors_.at(input_name)); + for (auto const& input_name : node_def.input()) { + /************************************************************************* + * TODO(jie) handle case 1) here + * Normalizes the inputs and extracts associated metadata: + * 1) Inputs can contain a colon followed by a suffix of characters. + * That suffix may be a single number (e.g. inputName:1) or several + * word characters separated from a number by a colon + * (e.g. inputName:foo:1). The + * latter case is used to denote inputs and outputs of functions. + * 2) Control dependency inputs contain caret at the beginning and we + * remove this and annotate the edge as a control dependency. + ************************************************************************/ + string name = input_name[0] == '^' ? input_name.substr(1) : input_name; + auto first = name.find_first_of(':'); + if (first != string::npos && first + 2 == name.size() && + name[first + 1] == '0') + name.erase(first); + + VLOG(2) << "retrieve input: " << name; + if (trt_tensors_.count(name)) { + inputs.push_back(trt_tensors_.at(name)); + } else { + LOG(FATAL) << "input: " << name << " not available for node at, " + << node_def.name(); + } } return inputs; } public: - explicit Converter(nvinfer1::INetworkDefinition* trt_network) - : trt_network_(trt_network) { + explicit Converter(nvinfer1::INetworkDefinition* trt_network, + tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) + : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { this->register_op_converters(); } - + tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - temp_bufs_.push_back(std::vector(weights.size_bytes())); - weights.SetValues(temp_bufs_.back().data()); + weight_store_->store_.push_back(std::vector(weights.size_bytes())); + weights.SetValues(weight_store_->store_.back().data()); return weights; } - + bool isFP16() { return fp16_; }; TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -382,7 +496,7 @@ class Converter { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); - if (i != 0) output_name = output_name + ":" + std::to_string(i); + if (i != 0) output_name = StrCat(output_name, ":", i); if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } @@ -448,7 +562,7 @@ struct LambdaFactory { switch (op) { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; - return [](T t) -> T { return 1.0 / std::sqrt(t); }; + return [](T t) -> T { return 1.0 / sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; @@ -534,6 +648,22 @@ struct LambdaFactory { } }; +template <> +std::function LambdaFactory::unary() { + switch (op) { + case OP_CATEGORY::RSQRT: { + VLOG(2) << "RSQRT GETS DONE"; + return [](Eigen::half t) -> Eigen::half { + return Eigen::half(1.0 / sqrt(float(t))); + }; + } + case OP_CATEGORY::NEG: + return [](Eigen::half t) -> Eigen::half { return -t; }; + default: + VLOG(2) << "Not supported op for unary: " << static_cast(op); + return nullptr; + } +} tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights, LambdaFactory unary_op) { @@ -545,6 +675,14 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, std::transform(inp, inp + iweights.count(), oup, unary_op.unary()); break; } + case tensorflow::DataType::DT_HALF: { + auto inp = static_cast(iweights.GetValues()); + auto oup = + static_cast(const_cast(oweights->GetValues())); + std::transform(inp, inp + iweights.count(), oup, + unary_op.unary()); + break; + } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -588,6 +726,32 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, } break; } + case tensorflow::DataType::DT_HALF: { + auto inp_l = static_cast(iweights_l.GetValues()); + auto inp_r = static_cast(iweights_r.GetValues()); + auto oup = + static_cast(const_cast(oweights->GetValues())); + + if (iweights_l.count() != iweights_r.count()) { + // We only supports broadcast of RankZero + if (iweights_l.count() == 1) { + VLOG(2) << "I bet it is not working!" << (*inp_l); + std::transform(inp_r, inp_r + iweights_r.count(), oup, + binary_op.broadcast_l(*inp_l)); + } else if (iweights_r.count() == 1) { + VLOG(2) << "I bet it is not working!" << (*inp_r); + std::transform(inp_l, inp_l + iweights_l.count(), oup, + binary_op.broadcast_r(*inp_r)); + } else { + return tensorflow::errors::Unimplemented( + "Binary op with non-rankZero broadcast not supported"); + } + } else { + std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, + binary_op.binary()); + } + break; + } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -599,7 +763,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, tensorflow::Status ConstantFoldUnary( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input = inputs.at(0).weights(); @@ -613,13 +777,12 @@ tensorflow::Status ConstantFoldUnary( CHECK_EQ(weights_input.type_, TFAttrs(node_def).get("T")); - // Maybe I should do a switch LambdaFactory unary_op; if (node_def.op() == "Rsqrt") { // Compute rsqrt unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT; auto ret = UnaryCompute(weights_input, &weights_output, unary_op); - // PAss the output + // Pass the output if (ret == tensorflow::Status::OK()) { outputs->push_back(TRT_TensorOrWeights(weights_output)); } @@ -631,11 +794,11 @@ tensorflow::Status ConstantFoldUnary( } // TODO(jie,ben) broadcast is needed yet not implemented -// Let's get the simple stuff working first. Maybe we should fall bakc to TF +// Let's get the simple stuff working first. Maybe we should fall back to TF // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input_l = inputs.at(0).weights(); TRT_ShapedWeights weights_input_r = inputs.at(1).weights(); @@ -648,12 +811,12 @@ tensorflow::Status ConstantFoldBinary( "Binary op implicit broadcast not supported: " + node_def.op()); // TODO(jie): constant fold should really fall back to TF. - int nb_dims = weights_input_l.shape_.nbDims; + int num_dims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; - output_shape.nbDims = nb_dims; - VLOG(2) << "nb_dims: " << nb_dims + output_shape.nbDims = num_dims; + VLOG(2) << "nb_dims: " << num_dims << ", the other: " << weights_input_r.shape_.nbDims; - for (int i = 0; i < nb_dims; i++) { + for (int i = 0; i < num_dims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; } else if (weights_input_l.shape_.d[i] == 1 || @@ -678,7 +841,6 @@ tensorflow::Status ConstantFoldBinary( // Allocate output weights TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape); - // Maybe I should do a switch LambdaFactory binary_op; if (node_def.op() == "Sub") { binary_op.op = LambdaFactory::OP_CATEGORY::SUB; @@ -712,48 +874,90 @@ tensorflow::Status BinaryTensorOpWeight( // Maybe this part has to be moved into the block of rsqrt later // Check type consistency - auto dtype = TFAttrs(node_def).get("T"); - CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); - CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message // Check scale mode auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // Default to channel-wise + // default to element-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + // TODO(jie): maybe use a permutation instead to support more cases; + bool permutation_flag = false; + if (weights.count() == 1) { VLOG(2) << "UNIFORM"; scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { - // No broadcasting on Batch dimension; - assert(dims_w.d[0] == 1); - - // Broadcasting on Channel dimension only allowed in kUNIFORM - assert(dims_w.d[1] == dims_t.d[0]); - assert(dims_w.nbDims == dims_t.nbDims); - - // Default is element; - for (int i = 2; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != dims_t.d[i - 1]) { - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - break; + // no broadcasting on Batch dimension; + VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims + << " tensor DIM: " << dims_t.nbDims; + if (dims_w.nbDims == dims_t.nbDims + 1) { + if (dims_w.d[0] == 1) { + for (int i = 1; i < dims_w.nbDims; i++) { + dims_w.d[i - 1] = dims_w.d[i]; + } + dims_w.nbDims--; + } else { + return tensorflow::errors::InvalidArgument( + "Binary op cannot operate on batch, " + node_def.name()); } } - if (scale_mode == nvinfer1::ScaleMode::kELEMENTWISE) { + + if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) { scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - for (int i = 2; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != 1) - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); + // default is element; + for (int i = 1; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != dims_t.d[i]) { + // if dimension does not match, switch back to channel; + VLOG(2) << "channel"; + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; + } + } + // if channel as candidate, validate it + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { + for (int i = 1; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != 1) + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); + } + } else { + VLOG(2) << "elementwise"; + } + } else if (dims_w.nbDims == 1 && + dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) { + // channel wise and broadcast required; + permutation_flag = true; + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + } else { + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); + } + } + + // transpose last dimension + std::vector permutation(dims_t.nbDims + 1); + if (permutation_flag) { + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { + // we swap the last dimension into channel for trt. + // because of tensorflow default broadcasting rules. + for (int i = 0; i < static_cast(permutation.size()); i++) { + permutation[i] = i; } + permutation[1] = dims_t.nbDims; + permutation[dims_t.nbDims] = 1; + tensor = ctx.TransposeTensor(const_cast(tensor), + permutation); + } else { + return tensorflow::errors::InvalidArgument( + "Transpose cannot be applied, " + node_def.name()); } } - // Prepare weights + // prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); TRT_ShapedWeights power_weights(weights.type_); @@ -779,88 +983,26 @@ tensorflow::Status BinaryTensorOpWeight( scale_weights, power_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); + // transpose back dimension + if (permutation_flag) { + output_tensor = ctx.TransposeTensor(output_tensor, permutation); + } // Pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, const tensorflow::NodeDef& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector* outputs) { - static const std::unordered_map ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - // {"max", nvinfer1::ElementWiseOperation::kMAX}, - // {"min", nvinfer1::ElementWiseOperation::kMIN}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // Get trt type & shape - TFAttrs attrs(node_def); - // Maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get("T"); - - // Check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast(tensor_l), - *const_cast(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // Pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} +enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; -tensorflow::Status ConvertPlaceholder( +tensorflow::Status ConvertConv2DHelper( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, - std::vector* outputs) { - VLOG(2) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented(", cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get("dtype"); - nvinfer1::Dims dims = attrs.get("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} + const std::vector& inputs, + std::vector* outputs, + int group // group ==0 specifies depthwise conv +) { + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); -tensorflow::Status ConvertConv2D(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - // TODO(jie): handle NHWC/NCHW transpose; - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - ReorderRSCKToKCRS(weights_rsck, &weights); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0]; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; TFAttrs attrs(node_def); int h_index = 2; @@ -874,11 +1016,31 @@ tensorflow::Status ConvertConv2D(Converter& ctx, // TODO(jie): transpose it } + // tensor after transpose (NCHW) + auto tensor_dim = tensor->getDimensions(); + + int num_groups = group; + if (num_groups == 0) // depthwise convolution + num_groups = tensor_dim.d[0]; + VLOG(2) << "groups count: " << num_groups; + + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0] * num_groups; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; + VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); + // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get>("strides"); + VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; + VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] + << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); - auto tensor_dim = tensor->getDimensions(); std::vector> padding; // TODO(jie): padding. if (attrs.get("padding") == "SAME") { @@ -919,10 +1081,11 @@ tensorflow::Status ConvertConv2D(Converter& ctx, layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); + layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", " << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -935,11 +1098,101 @@ tensorflow::Status ConvertConv2D(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConv2DHelper( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs, ConvolutionType type) { + switch (type) { + case ConvolutionType::DEFAULT: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); + case ConvolutionType::DEPTHWISE_CONV: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); + } + return tensorflow::errors::Unimplemented("unsupported convolution type at, " + + node_def.name()); +} + +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, const tensorflow::NodeDef& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector* outputs) { + static const std::unordered_map ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // get trt type & shape + TFAttrs attrs(node_def); + // maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get("T"); + + // check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); + + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast(tensor_l), + *const_cast(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertPlaceholder( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + VLOG(2) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get("dtype"); + nvinfer1::Dims dims = attrs.get("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertConv2D(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEFAULT); +} + +tensorflow::Status ConvertConv2DDepthwise( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEPTHWISE_CONV); +} + tensorflow::Status ConvertPool(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); int h_index = 2; @@ -957,6 +1210,8 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; + else if (node_def.op() == "AvgPool") + type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("Only supports Max pool"); @@ -1019,9 +1274,9 @@ tensorflow::Status ConvertPool(Converter& ctx, tensorflow::Status ConvertActivation( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( *const_cast(tensor), nvinfer1::ActivationType::kRELU); nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -1031,14 +1286,14 @@ tensorflow::Status ConvertActivation( tensorflow::Status ConvertScale(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) return tensorflow::errors::Unimplemented( "Only supports tensor op weight for now, at " + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); @@ -1055,16 +1310,33 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { VLOG(2) << "NCHW !!!!"; } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - weights, empty_weights, empty_weights); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - if (data_format == "NHWC") { - // TODO(jie): transpose it back! - output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1}); - } else { - VLOG(2) << "NCHW !!!!"; + auto dims = tensor->getDimensions(); + VLOG(2) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + VLOG(2) << "i: " << dims.d[i]; + } + dims = weights.shape_; + VLOG(2) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + VLOG(2) << "i: " << dims.d[i]; + } + + nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; + if (weights.shape_.d[0] == 1) { + mode = nvinfer1::ScaleMode::kUNIFORM; + } + + nvinfer1::IScaleLayer* layer = + ctx.network()->addScale(*const_cast(tensor), mode, + weights, empty_weights, empty_weights); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + if (data_format == "NHWC") { + // TODO(jie): transpose it back! + output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1}); + } else { + VLOG(2) << "NCHW !!!!"; } outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -1072,7 +1344,7 @@ tensorflow::Status ConvertScale(Converter& ctx, tensorflow::Status ConvertConst(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { const auto& weights_tensor = node_def.attr().at("value").tensor(); @@ -1091,20 +1363,144 @@ tensorflow::Status ConvertConst(Converter& ctx, VLOG(2) << "SCALAR!!!" << node_def.name(); nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { - VLOG(2) << "Dimensions: " << tensor.dims(); - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - GetTensorShape(tensor)); + VLOG(2) << "dimensions: " << tensor.dims(); + VLOG(2) << "size: " << weights_tensor.float_val_size(); + scalar_shape = GetTensorShape(tensor); + for (int i = 0; i < scalar_shape.nbDims; i++) + VLOG(2) << scalar_shape.d[i]; + if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size()) { + if (weights_tensor.float_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.float_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + } + } } else { VLOG(2) << "Dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - scalar_shape.d[0] = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - scalar_shape); + } + if (ctx.isFP16()) { + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + auto half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + half_tensor.device(defd) = + tensor.flat().template cast(); + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + } else { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + } + } else if (!weights_tensor.int_val().empty()) { + VLOG(2) << "int!!!" << node_def.name(); + nvinfer1::Dims scalar_shape; + if (tensor.dims() > 0) { + VLOG(2) << "dimensions: " << tensor.dims(); + scalar_shape = GetTensorShape(tensor); + if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size()) { + if (weights_tensor.int_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.int_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + } + } + } else { + VLOG(2) << "dimensions: " << tensor.dims(); + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { + scalar_shape.d[i] = 0; + scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; + } + } + if (ctx.isFP16()) { + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + TTypes::Flat half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + switch (dtype) { + case (tensorflow::DT_INT32): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT16): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_UINT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + default: + return tensorflow::errors::InvalidArgument( + "Datatype " + tensorflow::DataTypeString(dtype) + + " for FP16 conversion"); + break; + }; + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + } else { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); + len_data = std::max(len_data, len_tensor); + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val() + .end()); // make a local copy first to flatten + // doesn't have to be contiguous + memcpy(dst, tensor_data.data(), len_tensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); @@ -1130,7 +1526,7 @@ tensorflow::Status ConvertConst(Converter& ctx, tensorflow::Status ConvertIdentity( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { outputs->push_back(inputs.at(0)); return tensorflow::Status::OK(); @@ -1138,7 +1534,7 @@ tensorflow::Status ConvertIdentity( tensorflow::Status ConvertBinary(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2) return tensorflow::errors::FailedPrecondition( @@ -1165,7 +1561,7 @@ tensorflow::Status ConvertBinary(Converter& ctx, tensorflow::Status ConvertUnary(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 1) return tensorflow::errors::FailedPrecondition( @@ -1183,7 +1579,7 @@ tensorflow::Status ConvertUnary(Converter& ctx, tensorflow::Status ConvertReduce(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1191,7 +1587,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1229,6 +1625,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); if (index_list_data[i] == 1) permuted_index = 1; + idx_set.emplace(index_list_data[i]); } @@ -1236,7 +1633,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nb_dims; i++) { - if (idx_set.count(i)) { + if (idx_set.count(i) == 0) { permuted_index = i; break; } @@ -1271,12 +1668,13 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.TransposeTensor( const_cast(output_tensor), permutation_order); } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } tensorflow::Status ConvertPad(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1284,7 +1682,7 @@ tensorflow::Status ConvertPad(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1371,19 +1769,287 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConcat(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + // not including the last input (axis) here + int input_size = static_cast(inputs.size()) - 1; + + if (!inputs.at(0).is_tensor()) + return tensorflow::errors::InvalidArgument( + "Concat in TRT support only Tensor input, at " + node_def.name()); + + // We are retrieving the axis + TRT_ShapedWeights axis = inputs.at(input_size).weights(); + + TFAttrs attrs(node_def); + // auto attr_size = attrs.at("N")->i(); + // auto data_type = attrs.get("T"); + auto index_type = attrs.get("Tidx"); + + // TODO(jie): handle data type + // Only expect to handle INT32 as index attributes for now + if (index_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented( + "Tidx supports only DT_INT32, at " + node_def.name()); + + int index = *(static_cast(const_cast(axis.GetValues()))); + + // TODO(jie): early termination with no-op (attr_size==1) + + auto dim = inputs.at(0).tensor()->getDimensions(); + // dimension check + if (index > dim.nbDims + 1) + return tensorflow::errors::InvalidArgument( + "Concatenate on axis out of dimension range, at " + node_def.name()); + + if (index == 0) + return tensorflow::errors::InvalidArgument( + "Concatenate on batch dimension not supported, at " + node_def.name()); + + // incase we need permutation; + std::vector permutation_order(dim.nbDims + 1); + + for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i; + + if (index != 1) { + permutation_order[1] = index - 1; + permutation_order[index - 1] = 1; + } + + std::vector inputs_vec; + // Shap chack (all input tensor should have same shape) + // starting from 0 since we are probably also doing transpose here; + for (int i = 0; i < input_size; i++) { + auto tensor_i = inputs.at(i).tensor(); + auto dim_i = tensor_i->getDimensions(); + if (dim_i.nbDims != dim.nbDims) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent dimensions, at " + + node_def.name()); + + for (int j = 0; j < dim.nbDims; j++) { + // check dimension consistency on non-concatenate axis + if (j != index - 1 && dim_i.d[j] != dim.d[j]) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent shape, at" + + node_def.name()); + } + + // TRT does concatenation only on channel! + if (index != 1) + tensor_i = ctx.TransposeTensor(const_cast(tensor_i), + permutation_order); + + inputs_vec.push_back(tensor_i); + } + + // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( + const_cast(inputs_vec.data()), + inputs_vec.size()); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + if (index != 1) { + output_tensor = ctx.TransposeTensor(output_tensor, permutation_order); + } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertFusedBatchNorm( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + TFAttrs attrs(node_def); + float epsilon = attrs.get("epsilon"); + auto data_format = attrs.get("data_format"); + if (data_format != "NCHW") { + return tensorflow::errors::Unimplemented( + "only data_format=NCHW is supported, at " + node_def.name()); + } + bool is_training = attrs.get("is_training"); + if (is_training) { + return tensorflow::errors::Unimplemented( + "only is_training=false is supported, at " + node_def.name()); + } + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + TRT_ShapedWeights scale_weights = inputs.at(1).weights(); + TRT_ShapedWeights offset_weights = inputs.at(2).weights(); + TRT_ShapedWeights mean_weights = inputs.at(3).weights(); + TRT_ShapedWeights variance_weights = inputs.at(4).weights(); + TRT_ShapedWeights dummy_power_weights(scale_weights.type_); + TRT_ShapedWeights combined_scale_weights = + ctx.get_temp_weights_like(scale_weights); + TRT_ShapedWeights combined_offset_weights = + ctx.get_temp_weights_like(offset_weights); + size_t nweight = scale_weights.count(); + if ((scale_weights.type_ == offset_weights.type_) && + (mean_weights.type_ == variance_weights.type_) && + (scale_weights.type_ == variance_weights.type_)) { + if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && + (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { + return tensorflow::errors::Unimplemented( + "only float32 or float16 weight data type is supported, for node " + + node_def.name() + " got " + + tensorflow::DataTypeString(scale_weights.type_)); + } + if (scale_weights.type_ == tensorflow::DT_FLOAT) { + for (size_t i = 0; i < nweight; ++i) { + float scale = (static_cast(scale_weights.GetValues()))[i]; + float offset = + (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = + (static_cast(variance_weights.GetValues()))[i]; + float& combined_scale_ref = const_cast( + static_cast(combined_scale_weights.GetValues()))[i]; + float& combined_offset_ref = const_cast( + static_cast(combined_offset_weights.GetValues()))[i]; + combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_offset_ref = offset - mean * combined_scale_ref; + } + } else { + const Eigen::half* scale_vals = + (static_cast(scale_weights.GetValues())); + const Eigen::half* off_vals = + (static_cast(offset_weights.GetValues())); + const Eigen::half* mean_vals = + (static_cast(mean_weights.GetValues())); + const Eigen::half* variance_vals = + (static_cast(variance_weights.GetValues())); + Eigen::half* comb_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + Eigen::half* comb_off_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + for (size_t i = 0; i < nweight; ++i) { + float scale(scale_vals[i]); + float offset(off_vals[i]); + float mean(mean_vals[i]); + float variance(variance_vals[i]); + float combined_scale_ref = scale / sqrtf(variance + epsilon); + comb_scale_vals[i] = Eigen::half(combined_scale_ref); + float combined_offset_ref = offset - mean * combined_scale_ref; + comb_off_vals[i] = Eigen::half(combined_offset_ref); + } + } + } + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + combined_offset_weights.GetWeightsForTRT(), + combined_scale_weights.GetWeightsForTRT(), + dummy_power_weights.GetWeightsForTRT()); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertMatMul(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + + // TODO(jie): transpose! + TFAttrs attrs(node_def); + + TRT_ShapedWeights weights_ck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); + ReorderCKtoKC(weights_ck, &weights); + TRT_ShapedWeights biases(weights.type_); + + int noutput = weights.shape_.d[0]; + + nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected( + *const_cast(tensor), noutput, weights, biases); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertReshape( + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + if (inputs.size() != 2 || !inputs.at(0).is_tensor() || + !inputs.at(1).is_weights()) + return tensorflow::errors::InvalidArgument( + "Input expects tensor and weights, at" + node_def.name()); + + // implement tensor binaryOp weight [channel wise] for now; + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + auto dims = tensor->getDimensions(); + // restore implicit batch dimension + + TRT_ShapedWeights shape = inputs.at(1).weights(); + + TFAttrs attrs(node_def); + + auto padding_type = attrs.get("Tshape"); + + if (shape.shape_.nbDims != 1) + return tensorflow::errors::InvalidArgument( + "reshape new shape is not 1 dimensional, at " + node_def.name()); + + // Only expect to handle INT32 as attributes for now + if (padding_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented( + "reshape new shape supports only DT_INT32, at " + node_def.name()); + + auto shape_data = static_cast(const_cast(shape.GetValues())); + + if (shape_data[0] != -1) + return tensorflow::errors::InvalidArgument( + "reshape new shape first dimension is not -1, at " + node_def.name()); + + auto shape_num_dims = shape.shape_.d[0]; + VLOG(2) << "shape dimensions: " << shape_num_dims; + int volume_w = 1; + for (int i = 1; i < shape.shape_.d[0]; i++) volume_w *= shape_data[i]; + + int volume_t = 1; + for (int i = 0; i < dims.nbDims; i++) volume_t *= dims.d[i]; + + VLOG(2) << "volume: " << volume_t << " volume weights: " << volume_w; + if (volume_w != volume_t) + return tensorflow::errors::InvalidArgument( + "volume does not agree between tensor and new shape, at " + + node_def.name()); + + nvinfer1::IShuffleLayer* layer = + ctx.network()->addShuffle(*const_cast(tensor)); + + nvinfer1::Dims reshape_dims; + VLOG(2) << "new dimension: " << shape_num_dims - 1; + reshape_dims.nbDims = shape_num_dims - 1; + for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { + reshape_dims.d[i] = shape_data[i + 1]; + } + layer->setReshapeDimensions(reshape_dims); + VLOG(2) << "new dimension: " << shape_num_dims - 1; + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + auto dims_output = output_tensor->getDimensions(); + VLOG(2) << "output tensor dimension:" << dims_output.nbDims; + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + void Converter::register_op_converters() { // vgg_16 slim implementation op_registry_["Placeholder"] = ConvertPlaceholder; op_registry_["Conv2D"] = ConvertConv2D; + op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; op_registry_["Relu"] = ConvertActivation; op_registry_["MaxPool"] = ConvertPool; + op_registry_["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary op_registry_["BiasAdd"] = ConvertScale; op_registry_["Const"] = ConvertConst; - // op_registry_["MatMul"] = ConvertFullyConnected; // Not used in vgg // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed - // op_registry_["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -1393,26 +2059,364 @@ void Converter::register_op_converters() { op_registry_["Mean"] = ConvertReduce; op_registry_["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops + + op_registry_["ConcatV2"] = ConvertConcat; + op_registry_["MatMul"] = ConvertMatMul; + op_registry_["Reshape"] = ConvertReshape; + op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; + op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace +tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { + return tensorflow::errors::Unimplemented("Not implemented yet"); +} +tensorflow::Status ConvertCalibrationNodeToEngineNode( + tensorflow::Graph& graph, tensorflow::Node* c_node) { + const auto ndef = c_node->def(); + + TFAttrs attrs(ndef); + std::vector segment_nodes( + attrs.get>("segment_nodes")); + std::vector output_nodes( + attrs.get>("segment_output_names")); + std::vector input_names( + attrs.get>("input_names")); + string res_name = attrs.get("resource_name"); + VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; + string engine_name = "my_trt_op"; + { + const auto node_id = tensorflow::str_util::Split(res_name, "_"); + engine_name += node_id.back(); + } + std::map node_maps; + + for (auto n : graph.op_nodes()) { + node_maps.insert({n->name(), n}); + } + VLOG(1) << "Output Nodes:"; + std::vector out_types; + std::vector out_edges; + for (auto& i : output_nodes) { + auto node_port = tensorflow::str_util::Split(i, ":"); + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + auto out_node_name = node_port.at(0); + if (node_port.size() > 1) { + VLOG(1) << "Multi port output" << node_port.at(0) << " " + << node_port.at(1) << " size=" << node_port.size(); + } + auto node_it = node_maps.find(out_node_name); + if (node_it != node_maps.end()) { + tensorflow::Node* out_node = node_it->second; + int port = 0; + if (node_port.size() == 2) { + port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); + out_types.push_back(out_node->output_type(port)); + } else { + out_types.push_back(out_node->output_type(0)); + } + for (auto out_edge : out_node->out_edges()) { + if (out_edge->src_output() == port) { + out_edges.push_back(out_edge); + break; + } + } + } else { + LOG(WARNING) << " couldn't find output node " << out_node_name; + } + } + VLOG(1) << "Input Nodes:"; + for (auto& i : input_names) { + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + } + auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); + auto resmgr = trt_rm->getManager("TRTCalibOps"); + tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; + auto status = resmgr->Lookup(res_name, res_name, &calib_res); + if (!status.ok() || !calib_res->calibrator_) { + return tensorflow::errors::FailedPrecondition( + "You must run calibration" + " and inference conversion in the same proces"); + } + + calib_res->calibrator_->setDone(); + calib_res->thr_->join(); + delete calib_res->thr_; + if (!calib_res->engine_) { + LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " + "calibration graph?"; + } + auto weight_rmgr = trt_rm->getManager("WeightStore"); + TF_CHECK_OK(weight_rmgr->Delete( + res_name, res_name)); + auto engine_plan = calib_res->engine_->serialize(); + calib_res->engine_->destroy(); + calib_res->network_->destroy(); + calib_res->builder_->destroy(); + calib_res->thr_ = nullptr; + calib_res->engine_ = nullptr; + calib_res->builder_ = nullptr; + tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + std::vector income_edges; + for (const auto in_edge : c_node->in_edges()) { + auto src = in_edge->src(); + int dest_port = in_edge->dst_input(); + income_edges.emplace_back(src->name(), in_edge->src_output(), + c_node->input_type(dest_port)); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder.Input(input_list); + tensorflow::NodeDef engine_node; + const char* engine_plan_data = static_cast(engine_plan->data()); + string engine_plan_string(engine_plan_data, + engine_plan_data + engine_plan->size()); + status = op_builder.Attr("serialized_engine", engine_plan_string) + .Attr("input_nodes", input_names) + .Attr("output_nodes", output_nodes) + .Attr("OutT", out_types) + .Finalize(&engine_node); + if (!status.ok()) { + LOG(ERROR) << "Engine Node creation failed"; + return status; + } + auto trt_engine_node = graph.AddNode(engine_node, &status); + TF_CHECK_OK(status); + for (size_t i = 0; i < out_edges.size(); i++) { + VLOG(1) << "Connecting trt_engine_node output " << i << " with " + << out_edges.at(i)->dst()->name() << " port " + << out_edges.at(i)->dst_input(); + TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, + out_edges.at(i)->dst(), + out_edges.at(i)->dst_input())); + } + VLOG(1) << "Segment nodes:"; + for (auto& i : segment_nodes) { + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + auto it = node_maps.find(i); + if (it != node_maps.end()) { + graph.RemoveNode(it->second); + } + } + graph.RemoveNode(c_node); + return tensorflow::Status::OK(); +} + +tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { + // Visit nodes in reverse topological order and construct the TRT network. + + // Toposort + std::vector order_vec; + tensorflow::GetPostOrder(s.graph, &order_vec); + // Select just the subgraph + std::list order; + for (tensorflow::Node* node : order_vec) { + if (s.subgraph_node_ids.count(node->id())) { + order.push_front(node); // we want topological order to construct the + // network layer by layer + } + } + // topological order is needed to build TRT network + static int static_id = 0; + string subgraph_name_scope; + if (!order.empty()) { + subgraph_name_scope = order.front()->name(); + } + for (const tensorflow::Node* node : order) { + subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); + } + // TODO(sami,ben,jie): proper naming! + string calib_op_name = + StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); + static_id++; + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); + auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); + auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); + TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); + op_res->logger_ = new tensorflow::tensorrt::Logger(); + op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); + + if (!op_res->builder_) { + return tensorflow::errors::Internal( + "failed to create TensorRT builder object"); + } + + op_res->network_ = op_res->builder_->createNetwork(); + if (!op_res->network_) { + return tensorflow::errors::Internal( + "failed to create TensorRT network object"); + } + + // Build the network + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); + Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); + std::vector input_names; + std::vector input_dtypes; + for (const std::pair& input : s.input_inds) { + VLOG(2) << "parsing input. Node id= " << input.first; + int node_id = input.first; + int output_idx = input.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + auto node_name = node->name(); + input_names.push_back(node_name); // insert original node name without port + // TODO(jie): alternative :) + if (!s.graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("failed to find input node: " + + node_name); + + auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) + return tensorflow::errors::Internal( + "accessing output index of: ", output_idx, ", at node: ", node_name, + "with output entry from shape_map: ", op_info_vec.size()); + + auto op_info = op_info_vec.at(output_idx); + + tensorflow::DataType tf_dtype = op_info.dtype(); + input_dtypes.push_back(tf_dtype); + + nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); + + VLOG(2) << "accessing output index of: " << output_idx + << ", at node: " << node_name + << "with output entry from shape_map: " << op_info_vec.size(); + + // TODO(ben,jie): update TRT input format/dimension + nvinfer1::DimsCHW input_dim_psuedo_chw; + for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + + for (int i = 1; i < op_info.shape().dim_size(); i++) { + VLOG(2) << "dimension: " << i + << " , size: " << op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); + } + + // TODO(ben,jie): proper way to restore input tensor name? + auto input_tensor_name = node_name; + if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); + + nvinfer1::ITensor* input_tensor = converter.network()->addInput( + input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); + + if (!input_tensor) + return tensorflow::errors::InvalidArgument( + "Failed to create Input layer"); + VLOG(2) << "input tensor name :" << input_tensor_name; + + if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) + return tensorflow::errors::AlreadyExists( + "output tensor already exists for op: " + input_tensor_name); + } + + VLOG(2) << "finished sorting"; + + for (const tensorflow::Node* node : order) { + const tensorflow::NodeDef& node_def = node->def(); + VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); + TF_RETURN_IF_ERROR(converter.convert_node(node_def)); + } + + VLOG(2) << "finished conversion"; + + // Gather output metadata + std::vector output_names; + std::vector output_dtypes; + int trt_engine_op_output_idx = 0; + for (const std::pair& output : s.output_inds) { + int node_id = output.first; + int output_idx = output.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + string op_name = node->name(); + string tensor_name = op_name; + + s.output_edge_map->insert( + {trt_engine_op_output_idx == 0 + ? engine_name + : StrCat(engine_name, ":", trt_engine_op_output_idx), + {output_idx, tensor_name}}); + trt_engine_op_output_idx++; + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } + VLOG(1) << "output tensor name: " << tensor_name; + output_names.push_back(tensor_name); + auto tensor_or_weights = converter.get_tensor(tensor_name); + if (!tensor_or_weights.is_tensor()) { + return tensorflow::errors::InvalidArgument( + "Output node is weights not tensor"); + } + nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); + if (!tensor) { + return tensorflow::errors::NotFound("Output tensor not found: " + + tensor_name); + } + converter.network()->markOutput(*tensor); + tensorflow::DataType tf_dtype = node->output_type(output_idx); + output_dtypes.push_back(tf_dtype); + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); + tensor->setType(trt_dtype); + } + + VLOG(2) << "finished output"; + + // Build the engine + op_res->builder_->setMaxBatchSize(s.max_batch_size); + op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); + + // Build the TRT op + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); + std::vector income_edges; + for (size_t i = 0; i < input_names.size(); ++i) { + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) + auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( + input_names.at(i), output_idx, input_dtypes.at(i)); + VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) + << ":" << output_idx + << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); + income_edges.push_back(incoming_edge); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder.Input(input_list); + std::vector segment_names; + segment_names.reserve(s.subgraph_node_ids.size()); + for (int i : s.subgraph_node_ids) { + auto node = s.graph.FindNodeId(i); + segment_names.push_back(node->name()); + } + LOG(INFO) << "finished op preparation"; + + auto status = op_builder.Attr("segment_nodes", segment_names) + .Attr("input_names", input_names) + .Attr("segment_output_names", output_names) + .Attr("resource_name", calib_op_name) + .Finalize(s.trt_node); + + LOG(INFO) << status.ToString(); + LOG(INFO) << "finished op building"; + + return tensorflow::Status::OK(); +} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& input_inds, - const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::NodeDef* trt_node) { + tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector order_vec; - tensorflow::GetPostOrder(graph, &order_vec); + tensorflow::GetPostOrder(s.graph, &order_vec); // Select just the subgraph std::list order; for (tensorflow::Node* node : order_vec) { - if (subgraph_node_ids.count(node->id())) { + if (s.subgraph_node_ids.count(node->id())) { // We want topological order to contstruct the // network layer by layer order.push_front(node); @@ -1434,46 +2438,86 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "Failed to create TensorRT network object"); } + string subgraph_name_scope; + if (!order.empty()) { + subgraph_name_scope = order.front()->name(); + } + for (const tensorflow::Node* node : order) { + subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); + } + static int static_id = 0; + // TODO(sami,ben,jie): proper naming! + string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); + engine_name = StrCat(engine_name, static_id++); + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); + // Build the network - Converter converter(trt_network.get()); + Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); std::vector input_names; std::vector input_dtypes; - for (std::pair const& input : input_inds) { + for (const std::pair& input : s.input_inds) { + VLOG(2) << "parsing input!!!!!"; int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); auto node_name = node->name(); - input_names.push_back(node_name); // Insert original node name without port - // TODO(jie): alternative :) - if (!graph_properties.HasOutputProperties(node_name)) - return tensorflow::errors::Internal("Failed to find input node: " + - node_name); + // input_names should use the node name in the graph + // here it should be the input tensor name -> matching the binding + // insert original node name without port + auto tensor_name = node_name; + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } - auto op_info_vec = graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) - return tensorflow::errors::Internal( - "Accessing output index of: " + std::to_string(output_idx) + - ", at node: " + node_name + " with output entry from shape_map: " + - std::to_string(op_info_vec.size())); + VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name + << " idx: " << output_idx; - auto op_info = op_info_vec.at(output_idx); + auto shape_inference_node_name = node_name; + auto shape_inference_output_idx = output_idx; + // rewire the shape inference to original node in the graph + if (s.output_edge_map->count(tensor_name)) { + shape_inference_node_name = s.output_edge_map->at(tensor_name).second; + shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; + } + if (shape_inference_output_idx < 0) continue; + VLOG(2) << "shapeinference name: " << shape_inference_node_name + << " idx: " << shape_inference_output_idx; + + if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) + return tensorflow::errors::Internal("failed to find input node: " + + shape_inference_node_name); + + auto op_info_vec = + s.graph_properties.GetOutputProperties(shape_inference_node_name); + if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) + return tensorflow::errors::Internal( + "accessing output index of: ", shape_inference_output_idx, + ", at node: ", shape_inference_node_name, + " with output entry from shape_map: ", op_info_vec.size()); + auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "Accessing output index of: " << std::to_string(output_idx) + VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name - << " with output entry from shape_map: " - << std::to_string(op_info_vec.size()); - + << " with output entry from shape_map: " << op_info_vec.size(); // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + // TODO(jie): TRT 3.x only support 4 dimensional input tensor. + // update the code once TRT 4.0 comes out. + if (op_info.shape().dim_size() != 4) + return tensorflow::errors::Unimplemented("require 4 dimensional input"); + for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); @@ -1482,9 +2526,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) - input_tensor_name = node_name + ":" + std::to_string(output_idx); + if (output_idx != 0) { + input_tensor_name = StrCat(node_name, ":", output_idx); + } + input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -1511,14 +2557,22 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector output_names; std::vector output_dtypes; - for (std::pair const& output : output_inds) { + int trt_engine_op_output_idx = 0; + for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); string op_name = node->name(); string tensor_name = op_name; + + s.output_edge_map->insert( + {trt_engine_op_output_idx == 0 + ? engine_name + : StrCat(engine_name, ":", trt_engine_op_output_idx), + {output_idx, tensor_name}}); + trt_engine_op_output_idx++; if (output_idx != 0) - tensor_name = tensor_name + ":" + std::to_string(output_idx); + tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -1540,19 +2594,25 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << "Finished output"; - // TODO(jie): static_id is not thread safe. - static int static_id = 0; // Build the engine - trt_builder->setMaxBatchSize(max_batch_size); - trt_builder->setMaxWorkspaceSize(max_workspace_size_bytes); - VLOG(0) << "Starting build engine " << static_id; - // TODO(ben,jie): half2 and int8 mode support + trt_builder->setMaxBatchSize(s.max_batch_size); + trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); + VLOG(0) << "Max batch size= " << s.max_batch_size + << " max workspace size= " << s.max_workspace_size_bytes; + if (s.precision_mode == FP16MODE) { + trt_builder->setHalf2Mode(true); + VLOG(0) << "Using FP16 precision mode"; + } + LOG(INFO) << "starting build engine"; string engine_plan_string; { auto trt_engine = infer_object(trt_builder->buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; + if (trt_engine.get() == nullptr) { + return tensorflow::errors::Internal("Engine building failure"); + } auto engine_plan = infer_object(trt_engine->serialize()); VLOG(0) << "Serialized engine"; const char* engine_plan_data = @@ -1560,18 +2620,19 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - - VLOG(0) << "Finished engine"; + TF_RETURN_IF_ERROR(weight_rmgr->Delete( + engine_name, engine_name)); + LOG(INFO) << "finished engine " << engine_name; // Build the TRT op - // TODO(sami,ben,jie): proper naming! - tensorflow::NodeDefBuilder op_builder( - tensorflow::strings::StrCat("my_trt_op", static_id++), "TRTEngineOp"); + tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; + VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = input_inds.at(i).second; - // We wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) + VLOG(2) << "input edges: " << i << " " << input_names.at(i); + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); income_edges.push_back(incoming_edge); @@ -1586,7 +2647,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(trt_node); + .Finalize(s.trt_node); VLOG(0) << status.ToString() << " finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2e7fd19566..954a1e72f8 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -17,6 +17,8 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_ #include +#include +#include #include #include @@ -32,16 +34,49 @@ namespace tensorflow { namespace tensorrt { namespace convert { -tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& - input_inds, // {node_id, output_idx} - const std::vector>& - output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_prop, - tensorflow::NodeDef* trt_node); +const int FP32MODE = 0; +const int FP16MODE = 1; +const int INT8MODE = 2; +struct SubGraphParams { + SubGraphParams( + tensorflow::Graph& inp_graph, + const std::set& subgraph_node_id_numbers, + const std::vector>& input_indices, + const std::vector>& output_indices, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map>* output_edges, + tensorflow::NodeDef* constructed_trt_node, + int engine_precision_mode = FP32MODE) + : graph(inp_graph), + subgraph_node_ids(subgraph_node_id_numbers), + input_inds(input_indices), + output_inds(output_indices), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + trt_node(constructed_trt_node), + precision_mode(engine_precision_mode) {} + + tensorflow::Graph& graph; + const std::set& subgraph_node_ids; + const std::vector>& input_inds; // {node_id, output_idx} + const std::vector>& output_inds; // {node_id, output_idx} + size_t max_batch_size; + size_t max_workspace_size_bytes; + const tensorflow::grappler::GraphProperties& graph_properties; + std::unordered_map>* output_edge_map; + tensorflow::NodeDef* trt_node; + const int precision_mode; +}; + +// TODO(sami): Replace references with const reference or pointers +tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); +tensorflow::Status InjectCalibrationNode(SubGraphParams& params); +tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, + tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index 1dcb87e768..aea44fd8a2 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -21,10 +21,11 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/stream_executor.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -113,7 +114,13 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ctx->set_output(i, t); } VLOG(2) << "Filled map for sending"; - calib_res->calibrator_->setBatch(input_data); + // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files + const cudaStream_t* stream = CHECK_NOTNULL( + reinterpret_cast(ctx->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + calib_res->calibrator_->setBatch(input_data, *stream); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 8efdf63ebe..b32371b642 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,8 +24,12 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -namespace tensorrt { static ::tensorflow::tensorrt::Logger logger; +namespace gpu = ::perftools::gputools; +using IRuntime = nvinfer1::IRuntime; +using Dims = nvinfer1::Dims; + +namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine @@ -40,10 +44,21 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); + // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same + // gpu where the input/output is also located. + int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; + cudaSetDevice(gpu_id); + int device; + cudaGetDevice(&device); + if (gpu_id != device) LOG(FATAL) << "set device failed!"; + + // TODO(samikama) runtime should be taken from a resourcemanager as well. + // Only engine should be in the op and context and runtime should be taken + // from resourcemanager + + IRuntime* infer = nvinfer1::createInferRuntime(logger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); - trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); // Runtime is safe to delete after engine creation infer->destroy(); @@ -55,7 +70,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { size_t binding_index; int num_batch = 0; - bool valid = true; for (int i = 0; i < context->num_inputs(); i++) { // Grab the input tensor binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); @@ -64,8 +78,12 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { num_batch = input_shape.dim_size(0); + if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { + LOG(FATAL) << "input tensor batch larger than max_batch_size: " + << trt_engine_ptr_->getMaxBatchSize(); + } } else if (num_batch != input_shape.dim_size(0)) { - valid = false; + LOG(FATAL) << "input data inconsistent batch size"; break; } switch (trt_engine_ptr_->getBindingDataType(binding_index)) { @@ -81,9 +99,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } - // Might want a different way to inform the user of batch size inconsistency - if (!valid) LOG(WARNING) << "input data inconsistent batch size"; - for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. // Create an output tensor @@ -126,9 +141,11 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - // execution handled by TF since we are getting stream from TF. - // it is safe for CPU pointer array (buffers) to go out of scope after enqueue - trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr); + // TODO(jie): trt enqueue does not return error + auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], + *stream, nullptr); + VLOG(2) << "enqueue returns: " << ret; + // sync should be done by TF. } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index 7add8cb8b3..dda0dc9e71 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << msg; + VLOG(2) << name_ << " " << msg; break; } case Severity::kWARNING: { - LOG(WARNING) << msg; + LOG(WARNING) << name_ << " " << msg; break; } case Severity::kERROR: { - LOG(ERROR) << msg; + LOG(ERROR) << name_ << " " << msg; break; } case Severity::kINTERNAL_ERROR: { - LOG(FATAL) << msg; + LOG(FATAL) << name_ << " " << msg; break; } // This is useless for now. But would catch it in future if enum changes. It diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index d71f66b933..7f3544f8cf 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -27,9 +27,11 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { - private: + public: + Logger(string name = "DefaultLogger") : name_(name){}; void log(nvinfer1::ILogger::Severity severity, const char* msg) override; + private: string name_; }; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 7e050a768c..0b2321b5fc 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -20,5 +20,6 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.tensorrt.python.ops import trt_engine_op +from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 9454862f85..338475d90e 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -20,11 +20,17 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six +from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl +from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.grappler import tf_optimizer +from tensorflow.python.util import compat +# pylint: enable=unused-import,line-too-long # TODO(skama): get outputs from session when implemented as c++ @@ -32,22 +38,33 @@ from tensorflow.python.framework import ops def create_inference_graph(input_graph_def, outputs, max_batch_size=1, - max_workspace_size_bytes=2 << 20): - """Python wrapper for the TRT transormation. - + max_workspace_size_bytes=2 << 20, + precision_mode="FP32", + minimum_segment_size=3): + """Python wrapper for the TRT transformation. Args: input_graph_def: GraphDef object containing a model to be transformed. - outputs: List of tensors or node names for the model outputs. + outputs: list of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) + precision_mode: one of 'FP32', 'FP16' and 'INT8' + minimum_segment_size: the minimum number of nodes required for a subgraph to + be replaced by TRTEngineOp. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: + ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ + supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} + if precision_mode.upper() not in supported_precision_modes: + raise ValueError(("precision mode '{}' is not supported." + "It should be one of {}").format( + precision_mode, "{'FP32', 'FP16', 'INT8'}")) + mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -83,7 +100,7 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes) + max_workspace_size_bytes, mode, minimum_segment_size) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -101,3 +118,46 @@ def create_inference_graph(input_graph_def, output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string # Save some memory return output_graph_def + + +def calib_graph_to_infer_graph(calibration_graph_def): + """Convert an existing calibration graph to inference graph. + + Args: + calibration_graph_def: the calibration GraphDef object with calibration data + Returns: + New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. + Raises: + RuntimeError: if the returned status message is malformed. + """ + + def py2string(inp): + return inp + + def py3string(inp): + return inp.decode("utf-8") + + if _six.PY2: + to_string = py2string + else: + to_string = py3string + + graph_str = calibration_graph_def.SerializeToString() + out = calib_convert(graph_str) + status = to_string(out[0]) + output_graph_def_string = out[1] + del graph_str # Save some memory + if len(status) < 2: + raise _impl.UnknownError(None, None, status) + if status[:2] != "OK": + msg = status.split(";") + if len(msg) == 1: + raise RuntimeError("Status message is malformed {}".format(status)) + # pylint: disable=protected-access + raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), + int(msg[0])) + # pylint: enable=protected-access + output_graph_def = graph_pb2.GraphDef() + output_graph_def.ParseFromString(output_graph_def_string) + del output_graph_def_string # Save some memory + return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 3d5cc76c42..dc7c93f869 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -23,7 +23,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" +#include "cuda/include/cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { @@ -38,22 +38,18 @@ TRTInt8Calibrator::TRTInt8Calibrator( done_(false), dev_buffers_(dev_buffers), calib_running_(false), + batch_is_set_(false), engine_name_(engine_name) {} -bool TRTInt8Calibrator::setBatch( - const std::unordered_map& data) { - // TODO(aaroey): make sure that in future PR: - // 1. the mutex_lock is outside of the loop - // 2. wait() is used instead of wait_for() - // 3. done_ is to be protected by the mutex - // 4. the first batch is not missed - if (done_) return false; - while (calib_running_.load( - std::memory_order_acquire)) { // wait while calibration is running - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; +bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, + const cudaStream_t stream) { + tensorflow::mutex_lock lock(cond_mtx_); + while ((calib_running_ || batch_is_set_) && + !done_) { // wait while calibration is running + cond_.wait(lock); } + if (done_) return false; + CHECK(!calib_running_ && !batch_is_set_); VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { auto devptr = dev_buffers_.find(it.first); @@ -65,27 +61,32 @@ bool TRTInt8Calibrator::setBatch( // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - auto status = - cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); + // TODO(sami,aaroey): Need to figure out a way to ensure synchronization + // between stream, perhaps using a tensor? + auto status = cudaMemcpyAsync(d.first, it.second, d.second, + cudaMemcpyDeviceToDevice, stream); if (status != cudaSuccess) { LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first << "' failed with " << status; } } - calib_running_.store(true, std::memory_order_release); // release builder + + // TODO(Sami, aaorey): Find an alternative way! + cudaStreamSynchronize( + stream); // we have to wait for the stream before returning! + batch_is_set_ = true; cond_.notify_all(); return true; } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - calib_running_.store(false, std::memory_order_release); // wait for new batch + tensorflow::mutex_lock lock(cond_mtx_); + calib_running_ = false; cond_.notify_all(); - while (!calib_running_.load( - std::memory_order_acquire)) { // wait until new batch arrives - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; + while ((!batch_is_set_ && !done_)) { // wait until new batch arrives + cond_.wait(lock); + } if (done_) { return false; @@ -100,6 +101,8 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, bindings[i] = it->second.first; } + batch_is_set_ = false; + calib_running_ = true; return true; } @@ -107,6 +110,12 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } +void TRTInt8Calibrator::setDone() { + tensorflow::mutex_lock lock(cond_mtx_); + done_ = true; + cond_.notify_all(); +} + void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, std::size_t length) {} TRTInt8Calibrator::~TRTInt8Calibrator() { @@ -115,5 +124,6 @@ TRTInt8Calibrator::~TRTInt8Calibrator() { } // namespace tensorrt } // namespace tensorflow + #endif #endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index 8830f7efe7..d77aa2c5ab 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,7 +24,10 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT + +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" + namespace tensorflow { namespace tensorrt { // This class provides a 1 element queue to match TFs push model to @@ -39,8 +42,9 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map& data); - void setDone() { done_ = true; } + bool setBatch(const std::unordered_map& data, + const cudaStream_t stream); + void setDone(); const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); @@ -55,11 +59,14 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { const std::unordered_map> dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with // buffer names - std::atomic_bool calib_running_; + bool calib_running_; + bool batch_is_set_; string engine_name_; }; + } // namespace tensorrt } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ + #endif #endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index c78f6f2224..ad01bedd8f 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -60,6 +60,7 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): + """Run given graphdef once.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -74,15 +75,65 @@ def run_graph(gdef, dumm_inp): return val +# Use real data that is representative of the inference dataset +# for calibration. For this test script it is random data. +def run_calibration(gdef, dumm_inp): + """Run given calibration graph multiple times.""" + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with csess.Session( + config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: + # run over real calibration data here, we are mimicking a calibration set of + # 30 different batches. Use as much calibration data as you want + for _ in range(30): + val = sess.run(out, {inp: dumm_inp}) + return val + + if "__main__" in __name__: inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) - gdef = get_simple_graph_def() + orig_graph = get_simple_graph_def() # use a frozen graph for inference # Get optimized graph - trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0]) - o1 = run_graph(gdef, dummy_input) + trt_graph = trt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + o1 = run_graph(orig_graph, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) assert np.array_equal(o1, o2) assert np.array_equal(o3, o2) # sanity check + fp16_graph = trt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + int8_calib_gdef = trt.create_inference_graph( + input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + o4 = run_graph(fp16_graph, dummy_input) + _ = run_calibration(int8_calib_gdef, dummy_input) + int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) + o5 = run_graph(int8_graph, dummy_input) + assert np.allclose(o1, o4) + assert np.allclose(o1, o5) print("Pass") diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index d679945d56..46480e99a1 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -64,13 +64,17 @@ PyObject* pair_helper(std::pair* in) { %ignoreall %unignore tensorflow; %unignore trt_convert; +%unignore calib_convert; %{ + std::pair trt_convert( string graph_def_string, // The serialized GraphDef string. std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes + size_t max_workspace_size_bytes, + int precision_mode, + int minimum_segment_size // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -90,16 +94,64 @@ std::pair trt_convert( return std::pair{out_status, ""}; } + if(precision_mode < 0 || precision_mode > 2){ + out_status = "InvalidArgument;Invalid precision_mode"; + return std::pair{out_status, ""}; + } if (!output_names.size()) { out_status = "InvalidArgument;Size of the output_names vector is 0"; return std::pair{out_status, ""}; - // return ""; } tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph); + &outGraph, precision_mode, minimum_segment_size); + if (!conversion_status.ok()) { + auto retCode = (int)conversion_status.code(); + char buff[2000]; + snprintf(buff, 2000, "%d;%s", retCode, + conversion_status.error_message().c_str()); + out_status = buff; + return std::pair{out_status, ""}; + } + string result; + if (!outGraph.SerializeToString(&result)) { + out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; + return std::pair{out_status, ""}; + } + out_status = "OK;All good!"; + return std::pair{out_status, result}; +#else + // Returns FAILED_PRECONDITION. + return std::pair{"9;TensorRT is not enabled!", ""}; +#endif // GOOGLE_CUDA && GOOGLE_TENSORRT +} + +std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& + // unfortunately we can't use TF_Status here since it + // is in c/c_api and brings in a lot of other libraries + // which in turn declare ops. These ops are included + // statically in our library and cause an abort when + // module is loaded due to double registration + // until Tensorflow properly exposes these headers + // we have to work around this by returning a string + // and converting it to exception on python side. + //,TF_Status* out_status) { +) { +#if GOOGLE_CUDA && GOOGLE_TENSORRT + string out_status; + + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(graph_def_string)) { + out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; + return std::pair{out_status, ""}; + } + + tensorflow::GraphDef outGraph; + tensorflow::Status conversion_status = + tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, + &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -122,10 +174,13 @@ std::pair trt_convert( } %} +std::pair calib_convert(string graph_def_string); + std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes); + size_t max_workspace_size_bytes, + int precision_mode, int minimum_segment_size); %unignoreall diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc index cc32a26528..72d37f774c 100644 --- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc +++ b/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc @@ -50,7 +50,7 @@ namespace tensorflow { // TPU Embeddings use dedicated ops to enforce Host/TPU consistency in the // state of embedding table variables. Before beginning training or inference, // the model must Load the optimizer parameters into the TPU memories. Before -// saving a checkpoint, the model must Retreieve the parameters back into the +// saving a checkpoint, the model must Retrieve the parameters back into the // host CPU memory. REGISTER_OP("TPUEmbeddingLoadGradientDescentParameters") @@ -263,7 +263,7 @@ REGISTER_OP("TPUEmbeddingReceiveActivations") .SetIsStateful() .SetShapeFn(tpu_embedding_config_util::ActivationShapes) .Doc(R"doc( -An op that receives embeddng activations on the TPU. +An op that receives embedding activations on the TPU. The TPU system performs the embedding lookups and aggregations specified by the arguments to TPUEmbeddingEnqueueSparseBatch. The results of these @@ -293,7 +293,7 @@ REGISTER_OP("TPUEmbeddingActivations") An op enabling differentiation of TPU Embeddings. This op simply returns its first input, which is assumed to have been sliced -from the Tensors returnd by TPUEmbeddingDequeueActivations. The presence of this +from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of this op, and its first argument being a trainable Variable, enables automatic differentiation of graphs containing embeddings via the TPU Embedding Python libraries. diff --git a/tensorflow/contrib/tpu/python/tpu/device_assignment.py b/tensorflow/contrib/tpu/python/tpu/device_assignment.py index bdd9b88af5..726b2d248e 100644 --- a/tensorflow/contrib/tpu/python/tpu/device_assignment.py +++ b/tensorflow/contrib/tpu/python/tpu/device_assignment.py @@ -191,9 +191,9 @@ class DeviceAssignment(object): logical_core: A tuple of three integers which represents a logical core. Returns: A sorted list of the replicas that are attached to that task and - loical_core. + logical_core. Raises: - ValueError: If no replica exisis in the task which contains the logical + ValueError: If no replica exists in the task which contains the logical core. """ try: diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 009326e3d0..38b5ea2310 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -161,7 +161,7 @@ class RunConfig(run_config_lib.RunConfig): self._tpu_config = tpu_config or TPUConfig() self._cluster = cluster - # If user sets master and/or evaluation_master explicilty, including empty + # If user sets master and/or evaluation_master explicitly, including empty # string '', take it. Otherwise, take the values set by parent class. if master is not None: if cluster is not None: diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index c5c46ea741..3bac2db77e 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -39,7 +39,7 @@ class _TPUContext(object): This immutable object holds TPUEstimator config, train/eval batch size, and `TPUEstimator.use_tpu`, which is expected to be passed around. It also - provides utility functions, basded on the current state, to determine other + provides utility functions, based on the current state, to determine other information commonly required by TPU computation, such as TPU device names, TPU hosts, shard batch size, etc. @@ -218,7 +218,7 @@ class _TPUContext(object): model, when mode == PREDICT. Only with this bool, we could tell whether user is calling the Estimator.predict or Estimator.export_savedmodel, which are running on TPU and CPU - respectively. Parent class Estimator does not distingush these two. + respectively. Parent class Estimator does not distinguish these two. Returns: bool, whether current input_fn or model_fn should be running on CPU. diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index f61f6bb52e..4354735744 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -137,7 +137,7 @@ def _increase_eval_step_op(iterations_per_loop): """Returns an op to increase the eval step for TPU evaluation. Args: - iterations_per_loop: Tensor. The number of eval steps runnining in TPU + iterations_per_loop: Tensor. The number of eval steps running in TPU system before returning to CPU host for each `Session.run`. Returns: @@ -609,17 +609,17 @@ class _StoppingPredictHook(session_run_hook.SessionRunHook): # batch. And we append one more batch to signal the system it should stop. # The data flow might look like # - # batch 0: images, labels, stop = 0 (user provideded) - # batch 1: images, labels, stop = 0 (user provideded) + # batch 0: images, labels, stop = 0 (user provided) + # batch 1: images, labels, stop = 0 (user provided) # ... - # batch 99: images, labels, stop = 0 (user provideded) + # batch 99: images, labels, stop = 0 (user provided) # batch 100: images, labels, stop = 1 (TPUEstimator appended) # # where the final batch (id = 100) is appended by TPUEstimator, so we # should drop it before returning the predictions to user. # To achieve that, we throw the OutOfRangeError in after_run. Once # Monitored Session sees this error in SessionRunHook.after_run, the - # "current" prediciton, i.e., batch with id=100, will be discarded + # "current" prediction, i.e., batch with id=100, will be discarded # immediately raise errors.OutOfRangeError(None, None, 'Stopped by stopping signal.') @@ -758,7 +758,7 @@ class _InputPipeline(object): 2. (features, labels) Internally, form 1 is reformed to `(features, None)` as features and labels - are passed separatedly to underlying methods. For TPU training, TPUEstimator + are passed separately to underlying methods. For TPU training, TPUEstimator may expect multiple `features` and `labels` tuples one for each core. TPUEstimator allows various different structures for inputs (namely `features` diff --git a/tensorflow/contrib/tpu/python/tpu/training_loop.py b/tensorflow/contrib/tpu/python/tpu/training_loop.py index 3d7896127a..82a75d0255 100644 --- a/tensorflow/contrib/tpu/python/tpu/training_loop.py +++ b/tensorflow/contrib/tpu/python/tpu/training_loop.py @@ -170,7 +170,7 @@ def while_loop(condition, body, inputs=None, infeed_queue=None, name=None): def repeat(n, body, inputs=None, infeed_queue=None, name=None): - """Builds a training loop that executes a fixed number of interations. + """Builds a training loop that executes a fixed number of iterations. The set of loop-carried tensors correspond to `inputs`. `body` must be a function that takes and returns the values of the diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 42d222ff6b..a14eeed1a5 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3338,6 +3338,10 @@ tf_cc_test( size = "small", srcs = ["common_runtime/function_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), + tags = [ + "manual", + "no_oss", + ], deps = [ ":core", ":core_cpu", diff --git a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt index 51d63eeb56..7be9a958ab 100644 --- a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEig.pbtxt @@ -19,6 +19,7 @@ form square matrices, with the same constraints as the single matrix SelfAdjointEig. The result is a [..., M+1, M] matrix with [..., 0,:] containing the -eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. +eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues +are sorted in non-decreasing order. END } diff --git a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt index 4a5e125258..fae9e84fc8 100644 --- a/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SelfAdjointEigV2.pbtxt @@ -31,7 +31,8 @@ END summary: "Computes the eigen decomposition of one or more square self-adjoint matrices." description: <contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 02038c5d77..1507b6eae2 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2492,10 +2492,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { mkl_op_registry::GetMklOpName(csinfo_.identity), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn), - CopyAttrsLRN, AlwaysRewrite}); + CopyAttrsLRN, LrnRewrite}); rinfo_.push_back({csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), - CopyAttrsLRN, AlwaysRewrite}); + CopyAttrsLRN, LrnRewrite}); rinfo_.push_back({csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool), CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); @@ -2865,6 +2865,28 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } + // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized + // path. The unoptimized path is slow. Thus we dont rewrite the node + // and use default Eigen. But for depth_radius=2, MKL DNN optimized + // path is taken, i.e., eigen node is rewritten by MKl DNN node. + static bool LrnRewrite(const Node* n) { + CHECK_NOTNULL(n); + + int depth_radius; + CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); + + // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN + // and use eigen node instead + if (depth_radius == 2) { + return true; + } + VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" + << "case is not optimized by Intel MKL, thus using Eigen op" + << "for LRN " ; + + return false; + } + static bool AddNRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -3528,11 +3550,13 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, string data_format; string padding; std::vector strides; + std::vector dilations; bool use_cudnn_on_gpu; // Get all attributes from old node. TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3541,6 +3565,7 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); + nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); @@ -3778,12 +3803,14 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, DataType T_pred, T_succ; string padding; std::vector strides; + std::vector dilations; string data_format_pred, data_format_succ; bool use_cudnn_on_gnu; TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(pred->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index f78036d78c..bd0d94b83f 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -413,7 +414,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { frame_children_[frame_ids[0]].insert(frame_ids[1]); frame_parent_[frame_ids.back()] = frame_ids[frame_ids.size() - 2]; } - if (!frame_ids.empty()) { + if (frame_ids.size() >= 1) { frame_children_.insert(std::make_pair(frame_ids.back(), empty_set_)); if (node->op() == "LoopCond") { if (loop_cond_.count(frame_ids.back())) { @@ -432,7 +433,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { } for (auto it = frame_children_.begin(); it != frame_children_.end(); ++it) { - if (it->second.empty()) { + if (it->second.size() == 0) { worklist.push_back(it->first); } } @@ -445,7 +446,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { if (parent_it != frame_parent_.end()) { int parent_id = parent_it->second; frame_children_[parent_id].erase(frame_id); - if (frame_children_[parent_id].empty()) { + if (frame_children_[parent_id].size() == 0) { worklist.push_back(parent_id); } } @@ -468,6 +469,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { + TF_RETURN_IF_ERROR(RemoveStackOps(item, optimized_graph)); if (opt_level_ == RewriterConfig::AGGRESSIVE) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 48d5955ad1..2e39f25fc1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5157,7 +5157,6 @@ tf_kernel_library( ], hdrs = [ "meta_support.h", - "quantization_utils.h", "reference_gemm.h", ], deps = [ @@ -5232,6 +5231,7 @@ tf_cc_test( name = "quantization_utils_test", srcs = ["quantization_utils_test.cc"], deps = [ + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5294,6 +5294,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5355,6 +5356,7 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5377,6 +5379,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5441,6 +5444,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5461,6 +5465,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5500,6 +5505,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5556,6 +5562,7 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5578,6 +5585,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5614,6 +5622,7 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5635,6 +5644,7 @@ tf_cc_test( deps = [ ":batch_norm_op", ":ops_testutil", + ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc index 7011550f7e..f16766315f 100644 --- a/tensorflow/core/kernels/concat_op.cc +++ b/tensorflow/core/kernels/concat_op.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -28,6 +27,7 @@ limitations under the License. #include "tensorflow/core/kernels/concat_lib.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -53,17 +53,38 @@ class ConcatBaseOp : public OpKernel { void Compute(OpKernelContext* c) override { const Tensor* concat_dim_tensor; const char* axis_attribute_name = - AxisArgName == NAME_IS_AXIS - ? "axis" - : AxisArgName == NAME_IS_CONCAT_DIM ? "concat_dim" : ""; + AxisArgName == NAME_IS_AXIS ? "axis" : AxisArgName == NAME_IS_CONCAT_DIM + ? "concat_dim" + : ""; OP_REQUIRES_OK(c, c->input(axis_attribute_name, &concat_dim_tensor)); OP_REQUIRES(c, IsLegacyScalar(concat_dim_tensor->shape()), errors::InvalidArgument( axis_attribute_name, " tensor should be a scalar integer, but got shape ", concat_dim_tensor->shape().DebugString())); - const int32 concat_dim = - internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + int64 concat_dim; + // In case of ConcatV2, "axis" could be int32 or int64 + if (AxisArgName == NAME_IS_AXIS) { + OP_REQUIRES( + c, (concat_dim_tensor->dtype() == DT_INT32 || + concat_dim_tensor->dtype() == DT_INT64), + errors::InvalidArgument(axis_attribute_name, + " tensor should be int32 or int64, but got ", + concat_dim_tensor->dtype())); + } else { + OP_REQUIRES(c, (concat_dim_tensor->dtype() == DT_INT32), + errors::InvalidArgument(axis_attribute_name, + " tensor should be int32, but got ", + concat_dim_tensor->dtype())); + } + if (concat_dim_tensor->dtype() == DT_INT32) { + concat_dim = + internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + } else { + concat_dim = + internal::SubtleMustCopy(concat_dim_tensor->scalar()()); + } + OpInputList values; OP_REQUIRES_OK(c, c->input_list("values", &values)); const int N = values.size(); @@ -154,17 +175,16 @@ using ConcatOp = ConcatBaseOp; template using ConcatV2Op = ConcatBaseOp; -#define REGISTER_CONCAT(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("axis"), \ +#define REGISTER_CONCAT(type) \ + REGISTER_KERNEL_BUILDER(Name("Concat") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("concat_dim"), \ + ConcatOp) \ + REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ ConcatV2Op) TF_CALL_POD_STRING_TYPES(REGISTER_CONCAT); @@ -178,17 +198,16 @@ REGISTER_CONCAT(qint32); #if GOOGLE_CUDA -#define REGISTER_GPU(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("axis"), \ +#define REGISTER_GPU(type) \ + REGISTER_KERNEL_BUILDER(Name("Concat") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("concat_dim"), \ + ConcatOp) \ + REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ ConcatV2Op) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); @@ -212,7 +231,6 @@ REGISTER_KERNEL_BUILDER(Name("Concat") REGISTER_KERNEL_BUILDER(Name("ConcatV2") .Device(DEVICE_GPU) .TypeConstraint("T") - .TypeConstraint("Tidx") .HostMemory("values") .HostMemory("axis") .HostMemory("output"), @@ -221,17 +239,16 @@ REGISTER_KERNEL_BUILDER(Name("ConcatV2") #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL(type) \ - REGISTER_KERNEL_BUILDER(Name("Concat") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .HostMemory("concat_dim"), \ - ConcatOp) \ - REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint("T") \ - .TypeConstraint("Tidx") \ - .HostMemory("axis"), \ +#define REGISTER_SYCL(type) \ + REGISTER_KERNEL_BUILDER(Name("Concat") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .HostMemory("concat_dim"), \ + ConcatOp) \ + REGISTER_KERNEL_BUILDER(Name("ConcatV2") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ ConcatV2Op) TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL); @@ -246,7 +263,6 @@ REGISTER_KERNEL_BUILDER(Name("Concat") REGISTER_KERNEL_BUILDER(Name("ConcatV2") .Device(DEVICE_SYCL) .TypeConstraint("T") - .TypeConstraint("Tidx") .HostMemory("values") .HostMemory("axis") .HostMemory("output"), diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index 666bca265c..e2e166c02f 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -401,7 +401,7 @@ class ConvOpTest : public OpsTestBase { // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187 // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234 // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261 - // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121 + // (1*7)+(4*8)+(7*0)+(2*11)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121 // This means we should end up with this matrix: // | 105 | 150 | 183 | 95 | // | 235 | 312 | 357 | 178 | diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 484d4f88d6..01754ec21a 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -113,6 +113,19 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "slide_dataset_op", + srcs = ["slide_dataset_op.cc"], + deps = [ + ":dataset", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:batch_util", + ], +) + tf_kernel_library( name = "padded_batch_dataset_op", srcs = ["padded_batch_dataset_op.cc"], @@ -538,6 +551,7 @@ tf_kernel_library( ":scan_dataset_op", ":shuffle_dataset_op", ":skip_dataset_op", + ":slide_dataset_op", ":sparse_tensor_slice_dataset_op", ":sql_dataset_ops", ":stats_aggregator_ops", diff --git a/tensorflow/core/kernels/data/slide_dataset_op.cc b/tensorflow/core/kernels/data/slide_dataset_op.cc new file mode 100644 index 0000000000..4f3537b691 --- /dev/null +++ b/tensorflow/core/kernels/data/slide_dataset_op.cc @@ -0,0 +1,252 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/batch_util.h" +#include "tensorflow/core/kernels/data/dataset.h" + +namespace tensorflow { + +namespace { + +// See documentation in ../ops/dataset_ops.cc for a high-level +// description of the following op. + +class SlideDatasetOp : public UnaryDatasetOpKernel { + public: + explicit SlideDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + int64 window_size = 0; + int64 stride = 1; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "window_size", &window_size)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "stride", &stride)); + OP_REQUIRES( + ctx, window_size > 0, + errors::InvalidArgument("Window size must be greater than zero.")); + OP_REQUIRES( + ctx, stride > 0 && stride < window_size, + errors::InvalidArgument("Stride must be in [1, window_size).")); + + *output = new Dataset(ctx, window_size, stride, input); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, int64 window_size, int64 stride, const DatasetBase* input) + : GraphDatasetBase(ctx), window_size_(window_size), stride_(stride), input_(input) { + input_->Ref(); + + const auto& input_shapes = input_->output_shapes(); + output_shapes_.reserve(input_shapes.size()); + for (const auto& input_shape : input_shapes) { + output_shapes_.emplace_back( + PartialTensorShape({-1}).Concatenate(input_shape)); + } + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + Iterator::Params{this, strings::StrCat(prefix, "::Slide")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() override { + return strings::StrCat("SlideDatasetOp(", window_size_, ", ", stride_, ")::Dataset"); + } + + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + Node* window_size = nullptr; + Node* stride = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size)); + TF_RETURN_IF_ERROR(b->AddScalar(stride_, &stride)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, window_size, stride}, output)); + return Status::OK(); + } + + private: + + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + const int64 window_size = dataset()->window_size_; + const int64 stride = dataset()->stride_; + std::vector> batch_elements; + { + mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + batch_elements.reserve(window_size); + const bool first_call = cache_.empty(); + if (first_call) { + cache_.reserve(window_size); + } else { + // Reuse cache in the previous iteration. + cache_.swap(batch_elements); + } + // Fill up with new elements. + *end_of_sequence = false; + for (size_t i = batch_elements.size(); i < window_size && !*end_of_sequence; + ++i) { + std::vector batch_element_tuple; + TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, + end_of_sequence)); + if (!*end_of_sequence) { + batch_elements.push_back(std::move(batch_element_tuple)); + } else { + input_impl_.reset(); + } + } + // Drop the final smaller blocks. + if (batch_elements.size() < window_size) { + DCHECK(*end_of_sequence); + return Status::OK(); + } + // Cache the data used for the next iteration. + for (size_t i = stride; i < window_size; ++i) { + cache_.emplace_back(batch_elements[i]); + } + } + + // Construct output tensors. + // Those codes below are copied from batch_dataset_op.cc. + const size_t num_tuple_components = batch_elements[0].size(); + const int64 num_batch_elements = batch_elements.size(); + for (size_t component_index = 0; component_index < num_tuple_components; + ++component_index) { + const Tensor& first_element = batch_elements[0][component_index]; + TensorShape batch_component_shape({num_batch_elements}); + batch_component_shape.AppendShape(first_element.shape()); + Tensor batch_component(cpu_allocator(), first_element.dtype(), + batch_component_shape); + // Build the output tuple component by copying one slice + // from each input element in the batch. + for (size_t i = 0; i < num_batch_elements; ++i) { + if (batch_elements[i][component_index].shape() != + first_element.shape()) { + return errors::InvalidArgument( + "Cannot batch tensors with different shapes in component ", + component_index, ". First element had shape ", + first_element.shape().DebugString(), " and element ", i, + " had shape ", + batch_elements[i][component_index].shape().DebugString(), + "."); + } + TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice( + std::move(batch_elements[i][component_index]), &batch_component, + i)); + } + out_tensors->emplace_back(std::move(batch_component)); + } + *end_of_sequence = false; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } + // Save cache. + TF_RETURN_IF_ERROR( + writer->WriteScalar(strings::StrCat("cache_size"), cache_.size())); + for (int64 i = 0; i < cache_.size(); i++) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat("cache[", i, "]_size"), cache_[i].size())); + for (int64 j = 0; j < cache_[i].size(); j++) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + strings::StrCat("cache[", i, "][", j, "]"), cache_[i][j])); + } + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } + // Restore cache. + int64 cache_size; + TF_RETURN_IF_ERROR( + reader->ReadScalar(strings::StrCat("cache_size"), &cache_size)); + cache_.resize(cache_size); + for (int64 i = 0; i < cache_size; i++) { + int64 vector_size; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat("cache[", i, "]_size"), &vector_size)); + cache_[i].resize(vector_size); + for (int64 j = 0; j < vector_size; j++) { + TF_RETURN_IF_ERROR(reader->ReadTensor( + strings::StrCat("cache[", i, "][", j, "]"), &cache_[i][j])); + } + } + return Status::OK(); + } + + private: + mutex mu_; + std::vector> cache_ GUARDED_BY(mu_); + std::unique_ptr input_impl_ GUARDED_BY(mu_); + }; + + const int64 window_size_; + const int64 stride_; + const DatasetBase* const input_; + std::vector output_shapes_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("SlideDataset").Device(DEVICE_CPU), + SlideDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index 39aa3e9eb0..b74a09e2cb 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); +REGISTER_KERNEL_BUILDER( + Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), + DepthToSpaceOp); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 184c703599..0656081177 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -238,6 +238,12 @@ struct DepthToSpaceOpFunctor { template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::DepthToSpaceOpFunctor; +template struct functor::DepthToSpaceOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 108d59db2c..7688305019 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -45,6 +45,7 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:reduction_ops", "//tensorflow/core/kernels:remote_fused_graph_execute_utils", diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 1401bc65a4..e0706568b1 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -444,6 +444,7 @@ class MklConv2DCustomBackpropFilterOp ~MklConv2DCustomBackpropFilterOp() {} private: + const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -492,7 +493,9 @@ class MklConv2DCustomBackpropFilterOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, const memory::dims& strides, + Tensor** output_tensor, + const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -518,31 +521,32 @@ class MklConv2DCustomBackpropFilterOp bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x); } - // Create convolution backward weights primitive. - auto bwd_desc = - (biasEnabled && (bias_grad != nullptr)) - ? convolution_backward_weights::desc( - convolution_direct, input->GetOpMemDesc(), - output->GetOpMemDesc(), bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, - padding) - : convolution_backward_weights::desc( - convolution_direct, input->GetOpMemDesc(), - output->GetOpMemDesc(), outbackprop->GetOpMemDesc(), strides, - padding_l, padding_r, padding); - - auto bwd_pd = convolution_backward_weights::primitive_desc( - bwd_desc, cpu_engine, conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, bwd_output_format, - output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); - if (biasEnabled && (bias_grad != nullptr)) { + // Create convolution backward weights with bias primitive. + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding) : + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, + bwd_output_format, output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); + // Allocate bias_grad tensor TensorShape bias_grad_shape({depth}); Tensor* bias_grad_tensor = nullptr; @@ -553,11 +557,32 @@ class MklConv2DCustomBackpropFilterOp memory::desc({bias_grad_dims}, MklDnnType(), memory::format::x); bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor); bias_grad->SetUsrMemDataHandle(bias_grad_tensor); - } - if (biasEnabled && (bias_grad != nullptr)) { - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad); + PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, + bias_grad); } else { + // Create convolution backward weights primitive. + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding) : + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, + bwd_output_format, output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output); } } diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index eeed009531..d203c04934 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -369,6 +369,7 @@ class MklConv2DCustomBackpropInputOp private: const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0, kInputIndex_OutBackProp = 2; + const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -419,7 +420,9 @@ class MklConv2DCustomBackpropInputOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, const memory::dims& strides, + Tensor** output_tensor, + const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -432,9 +435,16 @@ class MklConv2DCustomBackpropInputOp CHECK_NOTNULL(output_tensor); // Create convolution backward data primitive. - auto bwd_desc = convolution_backward_data::desc( - convolution_direct, output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, padding); + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_data::desc(convolution_direct, + output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding): + convolution_backward_data::desc(convolution_direct, + output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); auto bwd_pd = convolution_backward_data::primitive_desc( bwd_desc, cpu_engine, conv_fwd_pd); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 1440da8f82..f0818eb96d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -493,6 +493,7 @@ class MklConv2DOp : public OpKernel { ~MklConv2DOp() {} explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); @@ -509,6 +510,20 @@ class MklConv2DOp : public OpKernel { errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); } void Compute(OpKernelContext* context) override { @@ -530,17 +545,19 @@ class MklConv2DOp : public OpKernel { MklDnnData filter(&cpu_engine); MklDnnData output(&cpu_engine); - memory::dims src_dims, filter_dims, padding_l, padding_r, strides; + memory::dims src_dims, filter_dims, padding_l, padding_r, + dilations, strides; memory::dims output_dims_tf_order, output_dims_mkl_order; // Get shapes of input tensors in MKL-DNN order - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, + dilations_); auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); conv_utl.GetConvFwdSizesInMklOrder( src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, - &output_dims_tf_order, &output_dims_mkl_order, &padding_l, - &padding_r); + &dilations, &output_dims_tf_order, &output_dims_mkl_order, + &padding_l, &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -553,6 +570,7 @@ class MklConv2DOp : public OpKernel { // Need semantics for Null MKL tensor MklDnnShape output_mkl_shape; output_mkl_shape.SetMklTensor(false); + AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor, src_tf_shape, output_mkl_shape); @@ -596,55 +614,79 @@ class MklConv2DOp : public OpKernel { filter.SetOpMemDesc(filter_dims, memory::format::any); output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - // If bias is enabled, then do the same steps as above for bias. + // MKLDNN dilation starts from 0. + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + if (biasEnabled) { - MklDnnData bias(&cpu_engine); - memory::dims bias_size; - conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); - const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); - bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); - bias.SetOpMemDesc(bias_size, memory::format::any); - - // Create convolution primitive with Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, - filter_out_tensor); + // Create convolution primitive with Bias. + MklDnnData bias(&cpu_engine); + memory::dims bias_size; + conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); + const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); + bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); + bias.SetOpMemDesc(bias_size, memory::format::any); + + // Create convolution primitive with Bias. + // Use MKLDNN dilated convolution in case of dilated rate (>0). + auto conv_desc = (dilations[kDilationH] > 0 || + dilations[kDilationW] > 0) ? + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), + output.GetOpMemDesc(), strides, dilations, + padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)): + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), + output.GetOpMemDesc(), strides, + padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, + output_dims_mkl_order, tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, + filter_out_tensor); } else { - // Create convolution primitive without Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output, - filter_out_tensor); + // Create convolution primitive without Bias. + // Use MKLDNN dilated convolution in case of dilated rate (>0). + auto conv_desc = (dilations[kDilationH] > 0 || + dilations[kDilationW] > 0) ? + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)): + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, + nullptr, &output, filter_out_tensor); } } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -658,10 +700,12 @@ class MklConv2DOp : public OpKernel { private: std::vector strides_; + std::vector dilations_; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; + const int kDilationH = 0, kDilationW = 1; // Allocate output tensor. void AllocateOutputTensor( diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 9dd88221a8..7ca10db895 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -58,13 +58,16 @@ class MklDnnConvUtil { protected: OpKernelContext* context_; // We don't own this. std::vector strides_; + std::vector dilations_; Padding padding_; TensorFormat data_format_; public: MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, - Padding pad, TensorFormat fm) - : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} + Padding pad, TensorFormat fm, + const std::vector& dilations) : + context_(context), strides_(strides), padding_(pad), + data_format_(fm), dilations_(dilations) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -78,6 +81,16 @@ class MklDnnConvUtil { *strides = {stride_rows, stride_cols}; } + // Calculate Convolution dilations + virtual inline void GetDilationsInMklOrder(memory::dims *dilations) { + // For now we take the dilation from the second and third dimensions only + // (we do not support dilation on the batch or depth dimension). + CHECK_NOTNULL(dilations); + int dilations_rows = GetTensorDim(dilations_, data_format_, 'H'); + int dilations_cols = GetTensorDim(dilations_, data_format_, 'W'); + *dilations = {dilations_rows, dilations_cols}; + } + // Calculate Convolution input size in MKL-DNN order. MKL-DNN // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's @@ -213,7 +226,8 @@ class MklDnnConvUtil { // TODO(nhasabni): Add similar function for input and filter in MklShape. virtual inline void GetOutputAndPadSizeInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, - const memory::dims& strides, memory::dims* output_dims_tf_order, + const memory::dims& strides, const memory::dims& dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -232,6 +246,8 @@ class MklDnnConvUtil { // Stride is vector of 2 elements: {s_r, s_c} int stride_rows = strides[0]; int stride_cols = strides[1]; + int dilation_rows = dilations[0]; + int dilation_cols = dilations[1]; // Output batch is same as input batch. int out_batch = GetTensorDim(input_shape, data_format_, 'N'); @@ -241,11 +257,13 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_rows, filter_rows, stride_rows, padding_, + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2(input_rows, filter_rows, + dilation_rows, stride_rows, padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_cols, filter_cols, stride_cols, padding_, + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2(input_cols, filter_cols, + dilation_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) @@ -271,7 +289,8 @@ class MklDnnConvUtil { // // Function does not return anything, but sets error in context status. inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, const memory::dims& strides, + size_t src_index, size_t filter_index, + const memory::dims& strides, const memory::dims& dilations, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -286,9 +305,9 @@ class MklDnnConvUtil { errors::InvalidArgument("input must be 4-dimensional", input_tf_shape.DebugString())); - GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, strides, - output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, + strides, dilations, output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -300,12 +319,14 @@ class MklDnnConvUtil { inline void GetConvFwdSizesInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, memory::dims* input_dims, memory::dims* filter_dims, - memory::dims* strides, memory::dims* output_dims_tf_order, + memory::dims* strides, memory::dims *dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); + CHECK_NOTNULL(dilations); CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -316,7 +337,9 @@ class MklDnnConvUtil { GetFilterSizeInMklOrder(input_shape, filter_shape, filter_dims); if (!context_->status().ok()) return; GetStridesInMklOrder(strides); - GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, + GetDilationsInMklOrder(dilations); + GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, + *strides, *dilations, output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; @@ -344,7 +367,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); - + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -406,15 +443,16 @@ class MklConv2DBackpropCommonOp : public OpKernel { // By default, all dims are in MKL order. Only dims in TF order // are those with prefix tf_order. memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims; - memory::dims padding_l, padding_r, strides, fwd_output_dims; + memory::dims padding_l, padding_r, dilations, strides, fwd_output_dims; memory::dims fwd_output_dims_tf_order; // Get forward convolution parameters. - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, + dilations_); conv_utl.GetConvFwdSizesInMklOrder( input_tf_shape, filter_tf_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward @@ -437,10 +475,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { memory::format::hwio); // Tensorflow Output of Conv2D is in data_format order. auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), tf_fmt); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_input_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + + const int kDilationH = 0, kDilationW = 1; + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + auto fwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0)? + convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_input_md, + fwd_filter_md, fwd_out_md, + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)) : + convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_input_md, + fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Create memory for user data. Describe how the inputs and outputs of @@ -485,8 +534,9 @@ class MklConv2DBackpropCommonOp : public OpKernel { // Operator-specific call to create and execute primitive. CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter, - &outbackprop, &output, &output_tensor, strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_), + &outbackprop, &output, &output_tensor, + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_), bwd_output_dims, bwd_output_format); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -535,20 +585,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { virtual memory::format GetOutputFormat(const memory::format data_format) = 0; /// Create and execute the primitive storing output in the output_tensor. - virtual void CreatePrimitive( - OpKernelContext* context, const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, - MklDnnData* output, Tensor** output_tensor, - const memory::dims& strides, const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) = 0; + virtual void CreatePrimitive(OpKernelContext* context, + const engine& cpu_engine, + const convolution_forward::primitive_desc& conv_fwd_pd, + MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, + MklDnnData* output, Tensor** output_tensor, const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, + const memory::dims& padding_r, padding_kind padding, + const memory::dims& bwd_output_dims, + memory::format bwd_output_format) = 0; // Get the data_format {NCHW, NHWC} TensorFormat GetTFDataFormat() { return data_format_; } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index e9a2376b54..d91f7107c5 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -442,12 +442,11 @@ class MklInputConversionOp : public OpKernel { auto input_tf_md = mkl_output_mkl_shape.GetTfLayout(); tf_input.SetUsrMem(input_tf_md, tf_tensor); - // Create reorder between tensorflow layout and Mkl layout. + // Create reorder between tensorflow layout and Mkl layout if necessary std::vector net; - CHECK_EQ(tf_input.CheckReorderToOpMem( + tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), - tensor_out, &net), - true); + tensor_out, &net); stream(stream::kind::eager).submit(net).wait(); // -- The tensor in MKL format passes through -- diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 267f4f8d12..0a0f69522f 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -392,7 +392,7 @@ class MklReluOpBase : public OpKernel { Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); + Compute_Scalar(context); // scalar case doesn't use in-place operation return; } @@ -437,11 +437,15 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst, - dnn_shape_dst); + + // Allocate output and MklDnnShape tensors separately for possible + // in-place operation + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {src_index}, dst_index, tf_shape_dst, &dst_tensor)); + AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); // Destination memory descriptor is same as source memory descriptor. - auto dst_md = src_md; + auto &dst_md = src_md; dst.SetUsrMem(dst_md, dst_tensor); // execute net @@ -492,7 +496,7 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); + Compute_Scalar(context); // scalar case doesn't use in-place operation return; } @@ -603,8 +607,13 @@ class MklReluGradOpBase : public OpKernel { // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, - tf_shape_diff_src, dnn_shape_diff_src); + + // Allocate diff_src and MklDnnShape tensors separately for possible + // in-place operation + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {diff_dst_index}, diff_src_index, tf_shape_diff_src, + &diff_src_tensor)); + AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); // diff_src memory descriptor is same as memory descriptor for both // inputs. diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 4abfbfb1a6..d65692a552 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -16,6 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ + +// This file requires the following include because it uses CudaAtomicMax: +// #include "tensorflow/core/util/cuda_kernel_helper.h" + +// Unfortunately we can't add the #include, since it breaks compilation for +// non-GPU targets. This only breaks in clang, because it's more strict for +// template code and CudaAtomicMax is used in template context. + // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index 23df1c35e5..e59adfc6ac 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); +REGISTER_KERNEL_BUILDER( + Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), + SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index db05ca1ed2..f38459724a 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -234,6 +234,12 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 254fdf115d..6de850bb20 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -205,7 +205,9 @@ Status RecordReader::SkipNBytes(uint64 offset) { if (options_.buffer_size > 0) { TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); } +#if !defined(IS_SLIM_BUILD) } +#endif return Status::OK(); } // namespace io diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 62dd2efb79..26278e0328 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LIB_IO_RECORD_READER_H_ #define TENSORFLOW_LIB_IO_RECORD_READER_H_ -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" -#if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/inputstream_interface.h" +#if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/zlib_compression_options.h" #include "tensorflow/core/lib/io/zlib_inputstream.h" #endif // IS_SLIM_BUILD diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index f32baee45e..e2453b9712 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -266,6 +266,16 @@ REGISTER_OP("BatchDataset") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::ScalarShape); +// TODO(mrry): move SlideDataset to contrib in the future. +REGISTER_OP("SlideDataset") + .Input("input_dataset: variant") + .Input("window_size: int64") + .Input("stride: int64") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("PaddedBatchDataset") .Input("input_dataset: variant") .Input("batch_size: int64") diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 910fbaca9e..d6a0f38033 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1498,6 +1498,7 @@ REGISTER_OP("_MklConv2D") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( MKL version of Conv2D operator. Uses MKL DNN APIs to perform 2D convolution. @@ -1516,6 +1517,7 @@ REGISTER_OP("__MklDummyConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node does not perform anything. It is just created as an intermediate output of @@ -1541,6 +1543,7 @@ REGISTER_OP("_MklConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2D and BiasAdd operator. Uses MKL DNN APIs to perform 2D convolution and add Bias to the output of convolution. @@ -1563,6 +1566,7 @@ REGISTER_OP("_MklConv2DBackpropFilter") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -1589,6 +1593,7 @@ REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1633,6 +1638,7 @@ REGISTER_OP("_MklConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1668,6 +1674,7 @@ REGISTER_OP("_MklConv2DWithBiasBackpropBias") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2DBackpropBias. Uses MKL DNN APIs to compute the gradients of convolution with respect to the bias. @@ -1690,6 +1697,7 @@ REGISTER_OP("_MklConv2DBackpropInput") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index b6b3722caa..682e46e0fc 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -382,7 +382,8 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( Status WindowsFileSystem::FileExists(const string& fname) { constexpr int kOk = 0; - if (_access(TranslateName(fname).c_str(), kOk) == 0) { + std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); + if (_waccess(ws_translated_fname.c_str(), kOk) == 0) { return Status::OK(); } return errors::NotFound(fname, " not found"); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 7405e01e14..22f2c02b78 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/core/util/stat_summarizer.h b/tensorflow/core/util/stat_summarizer.h index f7b63e8686..79fa63723e 100644 --- a/tensorflow/core/util/stat_summarizer.h +++ b/tensorflow/core/util/stat_summarizer.h @@ -186,7 +186,7 @@ class StatSummarizer { void Reset(); // Returns number of runs. - int num_runs() const { return run_total_us_.count(); } + int num_runs() const { return static_cast(run_total_us_.count()); } // Returns stats of total microseconds spent by all nodes in each run. const Stat& run_total_us() const { return run_total_us_; } diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 9f6fe91b14..6d0458e678 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -51,6 +51,8 @@ Europe: TensorFlow provides multiple communication paths. To pick the right path, please read the following list carefully: + * For new release announcements and security updates, subscribe to + [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). * To ask or answer technical questions about TensorFlow, use [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). For example, ask or search Stack Overflow about a particular error message @@ -65,5 +67,5 @@ please read the following list carefully: on GitHub. For example, use the issue tracker to request a new operation in TensorFlow. * To report vulnerabilities, please follow our - [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/SECURITY.md). + [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 818798555a..0481c97885 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 4c6dfa8daf..8f89898c92 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 527884863e..0ee9c849e1 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0-rc1 + 1.6.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0-rc1 + 1.6.0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0-rc1 + 1.6.0 org.tensorflow libtensorflow_jni_gpu - 1.6.0-rc1 + 1.6.0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
d +
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 2741b61bb2..8612762271 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -39,16 +39,9 @@ must be installed on your system: * [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/). Ensure that you create the `CUDA_HOME` environment variable as - described in NVIDIA's documentation. - * [CUPTI](http://docs.nvidia.com/cuda/cupti/) ships with the CUDA Toolkit, but - you also need to append its path to the `LD_LIBRARY_PATH` environment - variable: - -
 $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
- -In order to run TensorFlow computations on the GPU, you also need: - - * A GPU card with CUDA Compute Capability 3.0 or higher. See + described in the NVIDIA documentation. + * GPU card with CUDA Compute Capability 3.0 or higher for building + from source and 3.5 or higher for our binaries. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. * [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA @@ -172,7 +165,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -277,7 +270,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -340,24 +333,23 @@ where: to 6006. * TensorFlowCPUImage is required. It identifies the Docker container. Specify one of the following values: - * gcr.io/tensorflow/tensorflow, which is the TensorFlow CPU binary image. - * gcr.io/tensorflow/tensorflow:latest-devel, which is the latest + * tensorflow/tensorflow, which is the TensorFlow CPU binary image. + * tensorflow/tensorflow:latest-devel, which is the latest TensorFlow CPU Binary image plus source code. - * gcr.io/tensorflow/tensorflow:version, which is the + * tensorflow/tensorflow:version, which is the specified version (for example, 1.1.0rc1) of TensorFlow CPU binary image. - * gcr.io/tensorflow/tensorflow:version-devel, which is + * tensorflow/tensorflow:version-devel, which is the specified version (for example, 1.1.0rc1) of the TensorFlow GPU binary image plus source code. - gcr.io is the Google Container Registry. Note that some - TensorFlow images are also available at + TensorFlow images are available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches the latest TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ docker run -it gcr.io/tensorflow/tensorflow bash
+$ docker run -it tensorflow/tensorflow bash
 
The following command also launches the latest TensorFlow CPU binary image in a @@ -365,7 +357,7 @@ Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
+$ docker run -it -p 8888:8888 tensorflow/tensorflow
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -389,14 +381,14 @@ where: hostPort and containerPort to `8888`. * TensorFlowGPUImage specifies the Docker container. You must specify one of the following values: - * gcr.io/tensorflow/tensorflow:latest-gpu, which is the latest + * tensorflow/tensorflow:latest-gpu, which is the latest TensorFlow GPU binary image. - * gcr.io/tensorflow/tensorflow:latest-devel-gpu, which is + * tensorflow/tensorflow:latest-devel-gpu, which is the latest TensorFlow GPU Binary image plus source code. - * gcr.io/tensorflow/tensorflow:version-gpu, which is the + * tensorflow/tensorflow:version-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image. - * gcr.io/tensorflow/tensorflow:version-devel-gpu, which is + * tensorflow/tensorflow:version-devel-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image plus source code. @@ -405,7 +397,7 @@ following command launches the latest TensorFlow GPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ nvidia-docker run -it gcr.io/tensorflow/tensorflow:latest-gpu bash
+$ nvidia-docker run -it tensorflow/tensorflow:latest-gpu bash
 
The following command also launches the latest TensorFlow GPU binary image @@ -413,13 +405,13 @@ in a Docker container. In this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
+$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu
 
The following command installs an older TensorFlow version (0.12.1):
-$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:0.12.1-gpu
+$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:0.12.1-gpu
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -464,7 +456,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -489,7 +481,7 @@ If you installed through Docker, start a Docker container from which you can run bash. For example:
-$ docker run -it gcr.io/tensorflow/tensorflow bash
+$ docker run -it tensorflow/tensorflow bash
 
@@ -631,14 +623,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -650,14 +642,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -669,14 +661,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
 
@@ -688,14 +680,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 99745fcc6d..7207cb4f2b 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -292,24 +292,23 @@ where: to 6006. * TensorFlowImage is required. It identifies the Docker container. You must specify one of the following values: - * gcr.io/tensorflow/tensorflow: TensorFlow binary image. - * gcr.io/tensorflow/tensorflow:latest-devel: TensorFlow + * tensorflow/tensorflow: TensorFlow binary image. + * tensorflow/tensorflow:latest-devel: TensorFlow Binary image plus source code. -gcr.io is the Google Container Registry. Note that some -TensorFlow images are also available at +The TensorFlow images are available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches a TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell: -
$ docker run -it gcr.io/tensorflow/tensorflow bash
+
$ docker run -it tensorflow/tensorflow bash
The following command also launches a TensorFlow CPU binary image in a Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook: -
$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
+
$ docker run -it -p 8888:8888 tensorflow/tensorflow
Docker will download the TensorFlow binary image the first time you launch it. @@ -351,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl @@ -376,7 +375,7 @@ do the following: If you installed through Docker, start a Docker container that runs bash. For example: -
$ docker run -it gcr.io/tensorflow/tensorflow bash
+
$ docker run -it tensorflow/tensorflow bash
@@ -519,7 +518,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
 
@@ -527,5 +526,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 4e7b07d78b..a7f33819b4 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -350,10 +350,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0rc1 on Linux: +for TensorFlow 1.6.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index f0a30ee394..2413bc9cfb 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -17,7 +17,7 @@ You must choose one of the following types of TensorFlow to install: NVIDIA® GPU, you must install this version. Note that this version of TensorFlow is typically much easier to install (typically, in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. + installing this version first. Prebuilt binaries will use AVX instructions. * **TensorFlow with GPU support**. TensorFlow programs typically run significantly faster on a GPU than on a CPU. Therefore, if your system has a NVIDIA® GPU meeting the prerequisites shown below @@ -41,7 +41,8 @@ installed on your system: Note that cuDNN is typically installed in a different location from the other CUDA DLLs. Ensure that you add the directory where you installed the cuDNN DLL to your `%PATH%` environment variable. - * GPU card with CUDA Compute Capability 3.0 or higher. See + * GPU card with CUDA Compute Capability 3.0 or higher for building + from source and 3.5 or higher for our binaries. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. diff --git a/tensorflow/docs_src/performance/performance_guide.md b/tensorflow/docs_src/performance/performance_guide.md index cd47fc2803..580a899ac4 100644 --- a/tensorflow/docs_src/performance/performance_guide.md +++ b/tensorflow/docs_src/performance/performance_guide.md @@ -78,7 +78,7 @@ training CIFAR-10 illustrates the use of the `tf.data` API along with The `tf.data` API utilizes C++ multi-threading and has a much lower overhead than the Python-based `queue_runner` that is limited by Python's multi-threading performance. A detailed performance guide for the `tf.data` API can be found -[here](#datasets_performance). +[here](@{$datasets_performance}). While feeding data using a `feed_dict` offers a high level of flexibility, in general `feed_dict` does not provide a scalable solution. If only a single GPU diff --git a/tensorflow/docs_src/performance/xla/jit.md b/tensorflow/docs_src/performance/xla/jit.md index d4dc3e57c8..d9a979ccbd 100644 --- a/tensorflow/docs_src/performance/xla/jit.md +++ b/tensorflow/docs_src/performance/xla/jit.md @@ -157,7 +157,7 @@ to fuse Ops is visible by starting at `hlo_graph_0.dot` and viewing each diagram in succession. To Render the .dot file into a png, install -[GraphViz](http://www.graphviz.org/Download..php) and run: +[GraphViz](https://www.graphviz.org/download/) and run: ```shell dot -Tpng hlo_graph_80.dot -o hlo_graph_80.png diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 5fb1c2da88..d1399814ee 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -459,7 +459,7 @@ accuracy_score = classifier.evaluate(x=test_set.data, [debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_tflearn_iris.py), -based on {$tflearn$tf-learn's iris tutorial}, contains a full example of how to +based on [tf-learn's iris tutorial](https://www.tensorflow.org/versions/r1.2/get_started/tflearn), contains a full example of how to use the tfdbg with `Estimator`s. To run this example, do: ```none @@ -753,6 +753,7 @@ There are three possible workarounds or solutions: # For LocalCLIDebugHook hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")] ``` + Make sure that the directory pointed to by dump_root is empty or nonexistent. tfdbg cleans up the dump directories before exiting. * Reduce the batch size used during the runs. diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 1548d43877..392ac6f7f1 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -159,7 +159,7 @@ available. These operations allow you to build sophisticated @{$reading_data$input pipelines}, at the cost of making the TensorFlow computation somewhat more complicated. See the how-to documentation for -@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using `QueueRunner` objects to drive queues and readers} +@{$reading_data#creating_threads_to_prefetch_using_queuerunner_objects$using `QueueRunner` objects to drive queues and readers} for more information on how to use them. ## Variables @@ -272,7 +272,7 @@ Prefer predefined TensorFlow operations such as @{tf.decode_raw}, If your data is not easily parsable with the built-in TensorFlow operations, consider converting it, offline, to a format that is easily parsable, such -as ${tf.python_io.TFRecordWriter$`TFRecord`} format. +as @{tf.python_io.TFRecordWriter$`TFRecord`} format. The more efficient method to customize the parsing behavior is to @{$adding_an_op$add a new op written in C++} that parses your diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index d01d187e86..55ee42dd64 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -3,7 +3,7 @@ The @{tf.train.Saver} class provides methods to save and restore models. The @{tf.saved_model.simple_save} function is an easy way to build a @{tf.saved_model$saved model} suitable for serving. -[Estimators](/programmers_guide/estimators) automatically save and restore +[Estimators](@{$programmers_guide/estimators}) automatically save and restore variables in the `model_dir`. ## Save and restore variables @@ -400,7 +400,7 @@ defined in: After training an `Estimator` model, you may want to create a service from that model that takes requests and returns a result. You can run such a -service locally on your machine or deploy it scalably in the cloud. +service locally on your machine or deploy it in the cloud. To prepare a trained Estimator for serving, you must export it in the standard SavedModel format. This section explains how to: diff --git a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md index 79280d246a..fadfa03e78 100644 --- a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md +++ b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md @@ -83,7 +83,7 @@ data than you need, though. Instead, consider running the merged summary op every `n` steps. The code example below is a modification of the -@{$layers$simple MNIST tutorial}, +[simple MNIST tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py), in which we have added some summary ops, and run them every ten steps. If you run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able to visualize statistics, such as how the weights or accuracy varied during diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index d74d7f3181..a9c2cb3e33 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -129,10 +129,9 @@ my_tpu_estimator = tf.contrib.tpu.TPUEstimator( Typically the `FLAGS` would be set by command line arguments. To switch from training locally to training on a cloud TPU you would need to: - 1) Set `FLAGS.use_tpu` to `True` - 1) Set `FLAGS.tpu_name` so the - `tf.contrib.cluster_resolver.TPUClusterResolver` can find it - 1) Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). +* Set `FLAGS.use_tpu` to `True` +* Set `FLAGS.tpu_name` so the `tf.contrib.cluster_resolver.TPUClusterResolver` can find it +* Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). ## Optimizer diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md index 3692a02f2e..6361079671 100644 --- a/tensorflow/docs_src/tutorials/deep_cnn.md +++ b/tensorflow/docs_src/tutorials/deep_cnn.md @@ -268,7 +268,7 @@ in `cifar10_input.py`. `cifar10_train.py` periodically @{tf.train.Saver$saves} all model parameters in -@{$variables#saving-and-restoring$checkpoint files} +@{$programmers_guide/saved_model$checkpoint files} but it does *not* evaluate the model. The checkpoint file will be used by `cifar10_eval.py` to measure the predictive performance (see [Evaluating a Model](#evaluating-a-model) below). diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md index 246a420400..93d7c86e42 100644 --- a/tensorflow/docs_src/tutorials/image_retraining.md +++ b/tensorflow/docs_src/tutorials/image_retraining.md @@ -115,7 +115,7 @@ process is progressing. The training's objective is to make the loss as small as possible, so you can tell if the learning is working by keeping an eye on whether the loss keeps trending downwards, ignoring the short-term noise. -By default this script will run 4,000 training steps. Each step chooses ten +By default this script will run 4,000 training steps. Each step chooses 100 images at random from the training set, finds their bottlenecks from the cache, and feeds them into the final layer to get predictions. Those predictions are then compared against the actual labels to update the final layer's weights diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md index 63f408c2ca..b1f06ce0a3 100644 --- a/tensorflow/docs_src/tutorials/kernel_methods.md +++ b/tensorflow/docs_src/tutorials/kernel_methods.md @@ -1,9 +1,9 @@ # Improving Linear Models Using Explicit Kernel Methods -Note: This document uses a deprecated version of ${tf.estimator}, -which has a ${tf.contrib.learn.estimator$different interface}. +Note: This document uses a deprecated version of @{tf.estimator}, +which has a different interface (see `tf.contrib.learn Estimator`). It also uses other `contrib` methods whose -${$version_compat#not_covered$API may not be stable}. +@{$version_compat#not_covered$API may not be stable}. In this tutorial, we demonstrate how combining (explicit) kernel methods with linear models can drastically increase the latters' quality of predictions diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index ee03f440c9..9b17d0d4d5 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -193,7 +193,7 @@ to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training-and-evaluating-the-cnn-mnist-classifier). +Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). ### Input Layer @@ -446,7 +446,7 @@ tf.nn.softmax(logits, name="softmax_tensor") > Note: We use the `name` argument to explicitly name this operation > `softmax_tensor`, so we can reference it later. (We'll set up logging for the -> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook). +> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook)). We compile our predictions in a dict, and return an `EstimatorSpec` object: @@ -534,9 +534,8 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining -> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in -> tf.estimator"} tutorial. +> functions, see @{$get_started/custom_estimators#defining_the_training_op_for_the_model$"Defining the training op for the model"} +> in the @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} tutorial. ### Add evaluation metrics @@ -625,7 +624,8 @@ operation earlier when we generated the probabilities in `cnn_model_fn`. > Note: If you don't explicitly assign a name to an operation via the `name` > argument, TensorFlow will assign a default name. A couple easy ways to > discover the names applied to operations are to visualize your graph on -> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger (tfdbg)}. +> @{$graph_viz$TensorBoard}) or to enable the +> @{$programmers_guide/debugger$TensorFlow Debugger (tfdbg)}. Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index e22536adb6..7584a76ba5 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -109,7 +109,8 @@ This download will take a while and download a bit more than 23GB of data. To convert the `ndjson` files to @{$python/python_io#tfrecords_format_details$TFRecord} files containing -${tf.train.Example} protos run the following command. +[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) +protos run the following command. ```shell python create_dataset.py --ndjson_path rnn_tutorial_data \ diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index 005dc020f9..27ce75a30d 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -74,8 +74,8 @@ Here's a list of columns available in the Census Income dataset: | relationship | Categorical | Wife, Own-child, Husband, | : : : Not-in-family, Other-relative, : : : : Unmarried. : -| race | Categorical | White, Asian-Pac-Islander, | -: : : Amer-Indian-Eskimo, Other, Black. : +| race | Categorical | Amer-Indian-Eskimo, Asian-Pac- | +: : : Islander, Black, White, Other. : | gender | Categorical | Female, Male. | | capital_gain | Continuous | Capital gains recorded. | | capital_loss | Continuous | Capital Losses recorded. | @@ -247,7 +247,7 @@ hours_per_week = tf.feature_column.numeric_column('hours_per_week') ### Making Continuous Features Categorical through Bucketization Sometimes the relationship between a continuous feature and the label is not -linear. As an hypothetical example, a person's income may grow with age in the +linear. As a hypothetical example, a person's income may grow with age in the early stage of one's career, then the growth may slow at some point, and finally the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can @@ -361,6 +361,16 @@ The first line of the final output should be something like `accuracy: 0.83557522`, which means the accuracy is 83.6%. Feel free to try more features and transformations and see if you can do even better! +After the model is evaluated, we can use the model to predict whether an individual has an annual income of over +50,000 dollars given an individual's information input. +```python + pred_iter = model.predict(input_fn=lambda: input_fn(FLAGS.test_data, 1, False, 1)) + for pred in pred_iter: + print(pred['classes']) +``` + +The model prediction output would be like `[b'1']` or `[b'0']` which means whether corresponding individual has an annual income of over 50,000 dollars or not. + If you'd like to see a working end-to-end example, you can download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) and set the `model_type` flag to `wide`. diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index bb75431a1f..5c47ce6b67 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -40,6 +40,7 @@ + @@ -49,6 +50,7 @@ + @@ -58,6 +60,7 @@ + @@ -67,6 +70,7 @@ + diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java index 8bd4abb154..429138abe5 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java @@ -351,6 +351,10 @@ public abstract class CameraActivity extends Activity protected void setFragment() { String cameraId = chooseCamera(); + if (cameraId == null) { + Toast.makeText(this, "No Camera Detected", Toast.LENGTH_SHORT).show(); + finish(); + } Fragment fragment; if (useCamera2API) { @@ -416,7 +420,8 @@ public abstract class CameraActivity extends Activity @Override public boolean onKeyDown(final int keyCode, final KeyEvent event) { - if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP) { + if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP + || keyCode == KeyEvent.KEYCODE_BUTTON_L1 || keyCode == KeyEvent.KEYCODE_DPAD_CENTER) { debug = !debug; requestRender(); onSetDebug(debug); diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java index 6a66ec3927..33ec65e9f7 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java @@ -16,8 +16,10 @@ package org.tensorflow.demo; +import android.app.UiModeManager; import android.content.Context; import android.content.res.AssetManager; +import android.content.res.Configuration; import android.graphics.Bitmap; import android.graphics.Bitmap.Config; import android.graphics.BitmapFactory; @@ -31,9 +33,11 @@ import android.graphics.Typeface; import android.media.ImageReader.OnImageAvailableListener; import android.os.Bundle; import android.os.SystemClock; +import android.util.DisplayMetrics; import android.util.Size; import android.util.TypedValue; import android.view.Display; +import android.view.KeyEvent; import android.view.MotionEvent; import android.view.View; import android.view.View.OnClickListener; @@ -43,6 +47,7 @@ import android.widget.BaseAdapter; import android.widget.Button; import android.widget.GridView; import android.widget.ImageView; +import android.widget.RelativeLayout; import android.widget.Toast; import java.io.IOException; import java.io.InputStream; @@ -381,6 +386,27 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL grid = (GridView) findViewById(R.id.grid_layout); grid.setAdapter(adapter); grid.setOnTouchListener(gridTouchAdapter); + + // Change UI on Android TV + UiModeManager uiModeManager = (UiModeManager) getSystemService(UI_MODE_SERVICE); + if (uiModeManager.getCurrentModeType() == Configuration.UI_MODE_TYPE_TELEVISION) { + DisplayMetrics displayMetrics = new DisplayMetrics(); + getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); + int styleSelectorHeight = displayMetrics.heightPixels; + int styleSelectorWidth = displayMetrics.widthPixels - styleSelectorHeight; + RelativeLayout.LayoutParams layoutParams = new RelativeLayout.LayoutParams(styleSelectorWidth, ViewGroup.LayoutParams.MATCH_PARENT); + + // Calculate number of style in a row, so all the style can show up without scrolling + int numOfStylePerRow = 3; + while (styleSelectorWidth / numOfStylePerRow * Math.ceil((float) (adapter.getCount() - 2) / numOfStylePerRow) > styleSelectorHeight) { + numOfStylePerRow++; + } + grid.setNumColumns(numOfStylePerRow); + layoutParams.addRule(RelativeLayout.ALIGN_PARENT_RIGHT); + grid.setLayoutParams(layoutParams); + adapter.buttons.clear(); + } + setStyle(adapter.items[0], 1.0f); } @@ -602,4 +628,38 @@ public class StylizeActivity extends CameraActivity implements OnImageAvailableL borderedText.drawLines(canvas, 10, canvas.getHeight() - 10, lines); } + + @Override + public boolean onKeyDown(int keyCode, KeyEvent event) { + int moveOffset = 0; + switch (keyCode) { + case KeyEvent.KEYCODE_DPAD_LEFT: + moveOffset = -1; + break; + case KeyEvent.KEYCODE_DPAD_RIGHT: + moveOffset = 1; + break; + case KeyEvent.KEYCODE_DPAD_UP: + moveOffset = -1 * grid.getNumColumns(); + break; + case KeyEvent.KEYCODE_DPAD_DOWN: + moveOffset = grid.getNumColumns(); + break; + default: + return super.onKeyDown(keyCode, event); + } + + // get the highest selected style + int currentSelect = 0; + float highestValue = 0; + for (int i = 0; i < adapter.getCount(); i++) { + if (adapter.items[i].value > highestValue) { + currentSelect = i; + highestValue = adapter.items[i].value; + } + } + setStyle(adapter.items[(currentSelect + moveOffset + adapter.getCount()) % adapter.getCount()], 1); + + return true; + } } diff --git a/tensorflow/examples/ios/README.md b/tensorflow/examples/ios/README.md index 5bdaeb43ce..5d7bd36837 100644 --- a/tensorflow/examples/ios/README.md +++ b/tensorflow/examples/ios/README.md @@ -119,11 +119,13 @@ rundown: `tensorflow/contrib/makefile/gen/lib` to the Library Search Paths setting. - You'll also need to add `libprotobuf.a` and `libprotobuf-lite.a` from - `tensorflow/contrib/makefile/gen/protobuf_ios/lib` to your _Build Stages_ and - _Library Search Paths_. + `tensorflow/contrib/makefile/gen/protobuf_ios/lib` + and `nsync.a` from `tensorflow/contrib/makefile/downloads/nsync/builds/lipo.ios.c++11` + to your _Build Stages_ and _Library Search Paths_. - The _Header Search_ paths needs to contain: - the root folder of tensorflow, + - `tensorflow/contrib/makefile/downloads/nsync/public` - `tensorflow/contrib/makefile/downloads/protobuf/src` - `tensorflow/contrib/makefile/downloads`, - `tensorflow/contrib/makefile/downloads/eigen`, and diff --git a/tensorflow/examples/learn/mnist.py b/tensorflow/examples/learn/mnist.py index 98819b20bf..3ead8614b6 100644 --- a/tensorflow/examples/learn/mnist.py +++ b/tensorflow/examples/learn/mnist.py @@ -61,8 +61,10 @@ def conv_model(features, labels, mode): # Densely connected layer with 1024 neurons. h_fc1 = tf.layers.dense(h_pool2_flat, 1024, activation=tf.nn.relu) - if mode == tf.estimator.ModeKeys.TRAIN: - h_fc1 = tf.layers.dropout(h_fc1, rate=0.5) + h_fc1 = tf.layers.dropout( + h_fc1, + rate=0.5, + training=(mode == tf.estimator.ModeKeys.TRAIN)) # Compute logits (1 per class) and compute loss. logits = tf.layers.dense(h_fc1, N_DIGITS, activation=None) diff --git a/tensorflow/examples/learn/resnet.py b/tensorflow/examples/learn/resnet.py index 9542e55250..c00de932a8 100755 --- a/tensorflow/examples/learn/resnet.py +++ b/tensorflow/examples/learn/resnet.py @@ -53,6 +53,8 @@ def res_net_model(features, labels, mode): ndim = int(sqrt(input_shape[1])) x = tf.reshape(x, [-1, ndim, ndim, 1]) + training = (mode == tf.estimator.ModeKeys.TRAIN) + # First convolution expands to 64 channels with tf.variable_scope('conv_layer1'): net = tf.layers.conv2d( @@ -60,7 +62,7 @@ def res_net_model(features, labels, mode): filters=64, kernel_size=7, activation=tf.nn.relu) - net = tf.layers.batch_normalization(net) + net = tf.layers.batch_normalization(net, training=training) # Max pool net = tf.layers.max_pooling2d( @@ -88,7 +90,7 @@ def res_net_model(features, labels, mode): kernel_size=1, padding='valid', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) with tf.variable_scope(name + '/conv_bottleneck'): conv = tf.layers.conv2d( @@ -97,7 +99,7 @@ def res_net_model(features, labels, mode): kernel_size=3, padding='same', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) # 1x1 convolution responsible for restoring dimension with tf.variable_scope(name + '/conv_out'): @@ -108,7 +110,7 @@ def res_net_model(features, labels, mode): kernel_size=1, padding='valid', activation=tf.nn.relu) - conv = tf.layers.batch_normalization(conv) + conv = tf.layers.batch_normalization(conv, training=training) # shortcut connections that turn the network into its counterpart # residual function (identity shortcut) @@ -154,7 +156,7 @@ def res_net_model(features, labels, mode): loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) # Create training op. - if mode == tf.estimator.ModeKeys.TRAIN: + if training: optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index d11ee6f74c..54e944c264 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -86,7 +86,6 @@ py_library( ":ops", ":platform", ":pywrap_tensorflow", - ":saver_test_utils", ":script_ops", ":session_ops", ":sets", @@ -96,14 +95,15 @@ py_library( ":standard_ops", ":state_ops", ":string_ops", - ":subscribe", ":summary", ":tensor_array_ops", + ":training", + ":saver_test_utils", + ":subscribe", ":test_ops", # TODO: Break testing code out into separate rule. - ":tf_cluster", ":tf_item", + ":tf_cluster", ":tf_optimizer", - ":training", ":util", ":weights_broadcast_ops", "//third_party/py/numpy", @@ -3971,7 +3971,11 @@ py_test( srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", tags = [ + "manual", + "no_cuda_on_cpu_tap", + "no_oss", "no_windows", + "notap", ], deps = [ ":client", diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index 9641b8b7f2..5e6b5acdb0 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -155,9 +155,12 @@ class TimelineTest(test.TestCase): ctf = step_analysis.chrome_trace.format_to_string() self._validateTrace(ctf) maximums = step_analysis.allocator_maximums - self.assertTrue('cpu' in maximums) + cpuname = 'cpu' + if 'mklcpu' in maximums: + cpuname = 'mkl' + cpuname + self.assertTrue(cpuname in maximums) cpu_max = maximums[ - 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums['cpu'] + 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname] # At least num1 + num2, both float32s (4 bytes each) self.assertGreater(cpu_max.num_bytes, 8) self.assertGreater(cpu_max.timestamp, 0) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 9fcbd4ff77..6a4132bca2 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -139,8 +139,8 @@ class Estimator(object): to configure Estimators from hyper parameter tuning. * `config`: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. - Allows updating things in your model_fn based on configuration - such as `num_ps_replicas`, or `model_dir`. + Allows updating things in your `model_fn` based on + configuration such as `num_ps_replicas`, or `model_dir`. * Returns: `EstimatorSpec` @@ -301,11 +301,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a + * A tuple (features, labels): Where `features` is a `Tensor` or a + dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both - features and labels are consumed by `model_fn`. They should satisfy - the expectation of `model_fn` from inputs. + `features` and `labels` are consumed by `model_fn`. They should + satisfy the expectation of `model_fn` from inputs. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the training loop. @@ -381,11 +381,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a + * A tuple (features, labels): Where `features` is a `Tensor` or a + dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both - features and labels are consumed by `model_fn`. They should satisfy - the expectation of `model_fn` from inputs. + `features` and `labels` are consumed by `model_fn`. They should + satisfy the expectation of `model_fn` from inputs. steps: Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. @@ -457,17 +457,17 @@ class Estimator(object): checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. yield_single_examples: If False, yield the whole batch as returned by the - model_fn instead of decomposing the batch into individual elements. This - is useful if model_fn return some tensor with first dimension not - equal to the batch size + `model_fn` instead of decomposing the batch into individual elements. + This is useful if `model_fn` returns some tensors whose first dimension + is not equal to the batch size. Yields: Evaluated values of `predictions` tensors. Raises: - ValueError: Could not find a trained model in model_dir. - ValueError: if batch length of predictions are not same and - yield_single_examples is True. + ValueError: Could not find a trained model in `model_dir`. + ValueError: If batch length of predictions is not the same and + `yield_single_examples` is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. @@ -849,7 +849,7 @@ class Estimator(object): 'loss': estimator_spec.loss, 'step': global_step_tensor }, - every_n_iter=100) + every_n_iter=self._config.log_step_count_steps) ]) worker_hooks.extend(estimator_spec.training_hooks) diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 62f035bce5..820fda7765 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -423,7 +423,7 @@ class RunConfig(object): to be saved. The default value of 10,000 hours effectively disables the feature. log_step_count_steps: The frequency, in number of global steps, that the - global step/sec will be logged during training. + global step/sec and the loss will be logged during training. Raises: diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 2cc3331a15..e38b765da5 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -128,9 +128,16 @@ class TrainSpec( """Creates a validated `TrainSpec` instance. Args: - input_fn: Training input function returning a tuple of: - features - `Tensor` or dictionary of string feature name to `Tensor`. - labels - `Tensor` or dictionary of `Tensor` with labels. + input_fn: A function that provides input data for training as minibatches. + See @{$get_started/premade_estimators#create_input_functions} for more + information. The function should construct and return one of + the following: + * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a + tuple (features, labels) with same constraints as below. + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a + `Tensor` or a dictionary of string label name to `Tensor`. + max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the @@ -185,9 +192,16 @@ class EvalSpec( """Creates a validated `EvalSpec` instance. Args: - input_fn: Evaluation input function returning a tuple of: - features - `Tensor` or dictionary of string feature name to `Tensor`. - labels - `Tensor` or dictionary of `Tensor` with labels. + input_fn: A function that constructs the input data for evaluation. + See @{$get_started/premade_estimators#create_input_functions} for more + information. The function should construct and return one of + the following: + * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a + tuple (features, labels) with same constraints as below. + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a + `Tensor` or a dictionary of string label name to `Tensor`. + steps: Int. Positive number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. See `Estimator.evaluate` for details. diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 57506f9aff..4acb41553e 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -266,7 +266,7 @@ class Model(Network): # initialization for Eager mode execution if context.executing_eagerly(): if target_tensors is not None: - raise ValueError('target_tensors are not currently supported in Eager' + raise ValueError('target_tensors are not currently supported in Eager ' 'mode.') self.total_loss = None self.metrics_tensors = [] diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 2910719807..791f9b3113 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -546,8 +546,8 @@ class RNN(Layer): raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' - '(a "Keras tensor" is a tensor that was' - 'returned by a Keras layer, or by `Input`)') + ' (a "Keras tensor" is a tensor that was' + ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state and constants diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index 5196bf1740..3bbe87f92d 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -490,8 +490,8 @@ def slice_arrays(arrays, start=None, stop=None): if arrays is None: return [None] if isinstance(start, list) and stop is not None: - raise ValueError('The stop argument has to be None if the value of start is' - 'a list.') + raise ValueError('The stop argument has to be None if the value of start ' + 'is a list.') elif isinstance(arrays, list): if hasattr(start, '__len__'): # hdf5 datasets only support list objects as indices diff --git a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py index 45c1b92075..4761cece82 100644 --- a/tensorflow/python/keras/_impl/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/vis_utils.py @@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): layer_id = str(id(layer)) for i, node in enumerate(layer._inbound_nodes): node_key = layer.name + '_ib-' + str(i) - if node_key in model._container_nodes: + if node_key in model._network_nodes: # pylint: disable=protected-access for inbound_layer in node.inbound_layers: inbound_layer_id = str(id(inbound_layer)) layer_id = str(id(layer)) diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 81c6a4aa6e..c22934ce47 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -606,6 +606,17 @@ class ConcatOpTest(test.TestCase): inp_tensors_placeholders, -2, output_shape=[2, 3], gather_indexes=[2, 0], feed_dict=feed_dict) + def testConcatAxisType(self): + for dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(use_gpu=True): + t1 = [[1, 2, 3], [4, 5, 6]] + t2 = [[7, 8, 9], [10, 11, 12]] + + c = gen_array_ops.concat_v2([t1, t2], + constant_op.constant(1, dtype=dtype)) + self.assertEqual([2, 6], c.get_shape().as_list()) + output = c.eval() + self.assertAllEqual([[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]], output) class ConcatOffsetTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index f4fe01f868..25525cc128 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -970,7 +970,7 @@ class Conv2DTest(test.TestCase): self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 6, 1], @@ -984,7 +984,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -998,7 +998,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -1012,7 +1012,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 4, 3], @@ -1026,7 +1026,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 3, 1], @@ -1040,7 +1040,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 6, 1], @@ -1054,7 +1054,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 2, 3, 1], @@ -1068,7 +1068,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[0, 2, 3, 1], @@ -1082,7 +1082,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): # The GPU version of this test is not very stable. So adjusting the # error threshold to 1e-4. @@ -1098,7 +1098,7 @@ class Conv2DTest(test.TestCase): err=1e-4) def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 3, 1], diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index 96c9718b83..f0beabb4e2 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -35,8 +35,8 @@ from tensorflow.python.platform import tf_logging class DepthToSpaceTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.depth_to_space(input_nhwc, block_size) @@ -59,6 +59,12 @@ class DepthToSpaceTest(test.TestCase): x_out = [[[[1], [2]], [[3], [4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1, 2, 3, 4]]]] + block_size = 2 + x_out = [[[[1], [2]], [[3], [4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testBlockSize2(self): diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index b76135764f..cd90d16aac 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -34,8 +34,8 @@ from tensorflow.python.platform import tf_logging class SpaceToDepthTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.space_to_depth(input_nhwc, block_size) @@ -58,6 +58,12 @@ class SpaceToDepthTest(test.TestCase): x_out = [[[[1, 2, 3, 4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1], [2]], [[3], [4]]]] + block_size = 2 + x_out = [[[[1, 2, 3, 4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testLargerInput2x2(self): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index e9066d3fda..e4395bea92 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -578,7 +578,7 @@ class Layer(checkpointable.CheckpointableBase): if isinstance(variable, tf_variables.PartitionedVariable): raise RuntimeError( 'Partitioned variable regularization is not yet ' - 'supported when executing eagerly. File a feature request' + 'supported when executing eagerly. File a feature request ' 'if this is important to you.') # Save a zero-argument lambda which runs the regularizer on the # variable, to be executed when `Layer.losses` is requested. diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 11daf01670..29fb92ccb5 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -664,9 +664,16 @@ def batch_normalization(inputs, Note: when training, the moving_mean and moving_variance need to be updated. By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they - need to be added as a dependency to the `train_op`. For example: + need to be added as a dependency to the `train_op`. Also, be sure to add + any batch_normalization ops before getting the update_ops collection. + Otherwise, update_ops will be empty, and training/inference will not work + properly. For example: ```python + x_norm = tf.layers.batch_normalization(x, training=training) + + # ... + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index a751607aaa..223858edfa 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -485,6 +485,11 @@ class FileIoTest(test.TestCase): f.flush() self.assertEqual(content, f.read(len(content) + 1)) + def testUTF8StringPathExists(self): + file_path = os.path.join(self._base_dir, "UTF8测试_file_exist") + file_io.write_string_to_file(file_path, "testing") + v = file_io.file_exists(file_path) + self.assertEqual(v, True) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py index 48ea107a14..6fcf9c91d8 100644 --- a/tensorflow/python/lib/io/tf_record.py +++ b/tensorflow/python/lib/io/tf_record.py @@ -75,14 +75,16 @@ def tf_record_iterator(path, options=None): if reader is None: raise IOError("Could not open %s." % path) - while True: - try: - with errors.raise_exception_on_not_ok_status() as status: - reader.GetNext(status) - except errors.OutOfRangeError: - break - yield reader.record() - reader.Close() + try: + while True: + try: + with errors.raise_exception_on_not_ok_status() as status: + reader.GetNext(status) + except errors.OutOfRangeError: + break + yield reader.record() + finally: + reader.Close() @tf_export("python_io.TFRecordWriter") diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 37470e00d7..5b4fb4f7c8 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -341,7 +341,7 @@ def self_adjoint_eig(tensor, name=None): name: string, optional name of the operation. Returns: - e: Eigenvalues. Shape is `[..., N]`. + e: Eigenvalues. Shape is `[..., N]`. Sorted in non-decreasing order. v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most matrices contain eigenvectors of the corresponding matrices in `tensor` """ diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index fb3fe77b4d..a74de39eab 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -150,14 +150,12 @@ class _NonAtrousConvolution(object): conv_dims)) if conv_dims == 1: # conv1d uses the 2-d data format names - if data_format is None or data_format == "NWC": - data_format_2d = "NHWC" - elif data_format == "NCW": - data_format_2d = "NCHW" - else: + if data_format is None: + data_format = "NWC" + elif data_format not in {"NCW", "NWC", "NCHW", "NHWC"}: raise ValueError("data_format must be \"NWC\" or \"NCW\".") self.strides = strides[0] - self.data_format = data_format_2d + self.data_format = data_format self.conv_op = self._conv1d elif conv_dims == 2: if data_format is None or data_format == "NHWC": @@ -699,7 +697,7 @@ def convolution( `padded_input` is obtained by zero padding the input using an effective spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and output striding `strides` as described in the - @{tf.nn.convolution$comment here}. + @{$python/nn#Convolution$comment here}. In the case that `data_format` does start with `"NC"`, the `input` and output (but not the `filter`) are simply transposed as follows: @@ -1043,9 +1041,7 @@ def pool( @tf_export("nn.atrous_conv2d") def atrous_conv2d(value, filters, rate, padding, name=None): - """Atrous convolution (a.k.a. - - convolution with holes or dilated convolution). + """Atrous convolution (a.k.a. convolution with holes or dilated convolution). This function is a simpler wrapper around the more general @{tf.nn.convolution}, and exists only for backwards compatibility. You can diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index db8159579a..6a2dd3f1cd 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -209,7 +209,7 @@ def random_uniform(shape, maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on the range of random values to generate. Defaults to 1 if `dtype` is floating point. - dtype: The type of the output: 'float16`, `float32`, `float64`, `int32`, + dtype: The type of the output: `float16`, `float32`, `float64`, `int32`, or `int64`. seed: A Python integer. Used to create a random seed for the distribution. See @{tf.set_random_seed} diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index c59eccc174..42af7f8b27 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -867,7 +867,7 @@ def raw_rnn(cell, loop_fn, ```python time = tf.constant(0, dtype=tf.int32) - (finished, next_input, initial_state, _, loop_state) = loop_fn( + (finished, next_input, initial_state, emit_structure, loop_state) = loop_fn( time=time, cell_output=None, cell_state=None, loop_state=None) emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype) state = initial_state @@ -878,7 +878,7 @@ def raw_rnn(cell, loop_fn, loop_state=loop_state) # Emit zeros and copy forward state for minibatch entries that are finished. state = tf.where(finished, state, next_state) - emit = tf.where(finished, tf.zeros_like(emit), emit) + emit = tf.where(finished, tf.zeros_like(emit_structure), emit) emit_ta = emit_ta.write(time, emit) # If any new minibatch entries are marked as finished, mark these. finished = tf.logical_or(finished, next_finished) @@ -938,10 +938,15 @@ def raw_rnn(cell, loop_fn, and `emit_output`: the output to store for this iteration. Note that `emit_output` should be a `Tensor` or (possibly nested) - tuple of tensors with shapes and structure matching `cell.output_size` - and `cell_output` above. The parameter `cell_state` and output - `next_cell_state` may be either a single or (possibly nested) tuple - of tensors. The parameter `loop_state` and + tuple of tensors which is aggregated in the `emit_ta` inside the + `while_loop`. For the first call to `loop_fn`, the `emit_output` + corresponds to the `emit_structure` which is then used to determine the + size of the `zero_tensor` for the `emit_ta` (defaults to + `cell.output_size`). For the subsequent calls to the `loop_fn`, the + `emit_output` corresponds to the actual output tensor + that is to be aggregated in the `emit_ta`. The parameter `cell_state` + and output `next_cell_state` may be either a single or (possibly nested) + tuple of tensors. The parameter `loop_state` and output `next_loop_state` may be either a single or (possibly nested) tuple of `Tensor` and `TensorArray` objects. This last parameter may be ignored by `loop_fn` and the return value may be `None`. If it diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py index 6d7eaababc..5e2146b79f 100644 --- a/tensorflow/python/ops/special_math_ops.py +++ b/tensorflow/python/ops/special_math_ops.py @@ -163,7 +163,7 @@ def einsum(equation, *inputs, **kwargs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('([a-z,]+)(->[a-z]*)?', equation) + match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) @@ -402,7 +402,7 @@ def _exponential_space_einsum(equation, *inputs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('([a-z,]+)(->[a-z]*)?', equation) + match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py index 2c212f4548..d7c3a7e8dc 100644 --- a/tensorflow/python/ops/special_math_ops_test.py +++ b/tensorflow/python/ops/special_math_ops_test.py @@ -192,6 +192,9 @@ class EinsumTest(test.TestCase): 'abc,cba', 'dba,ead,cad->bce', 'aef,fbc,dca->bde', + 'iJ,Jk->ik', + 'iJ,Ki->JK', + 'iJk,Jklm->Jk' ] long_cases = [ @@ -208,6 +211,8 @@ class EinsumTest(test.TestCase): 'ijk ijk', 'ij.jk->ik', 'ij...,jk...->ik...', + 'ij,k ->kji', + 'ij,k-> kji', # axis in output that does not exist 'ij,jk->im', diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index a52f325ddb..e9f1def48c 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -56,8 +56,6 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as saver_lib -FLAGS = None - def freeze_graph_with_def_protos(input_graph_def, input_saver_def, @@ -256,25 +254,24 @@ def freeze_graph(input_graph, checkpoint_version=checkpoint_version) -def main(unused_args): - if FLAGS.checkpoint_version == 1: +def main(unused_args, flags): + if flags.checkpoint_version == 1: checkpoint_version = saver_pb2.SaverDef.V1 - elif FLAGS.checkpoint_version == 2: + elif flags.checkpoint_version == 2: checkpoint_version = saver_pb2.SaverDef.V2 else: print("Invalid checkpoint version (must be '1' or '2'): %d" % - FLAGS.checkpoint_version) + flags.checkpoint_version) return -1 - freeze_graph(FLAGS.input_graph, FLAGS.input_saver, FLAGS.input_binary, - FLAGS.input_checkpoint, FLAGS.output_node_names, - FLAGS.restore_op_name, FLAGS.filename_tensor_name, - FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes, - FLAGS.variable_names_whitelist, FLAGS.variable_names_blacklist, - FLAGS.input_meta_graph, FLAGS.input_saved_model_dir, - FLAGS.saved_model_tags, checkpoint_version) - + freeze_graph(flags.input_graph, flags.input_saver, flags.input_binary, + flags.input_checkpoint, flags.output_node_names, + flags.restore_op_name, flags.filename_tensor_name, + flags.output_graph, flags.clear_devices, flags.initializer_nodes, + flags.variable_names_whitelist, flags.variable_names_blacklist, + flags.input_meta_graph, flags.input_saved_model_dir, + flags.saved_model_tags, checkpoint_version) -if __name__ == "__main__": +def run_main(): parser = argparse.ArgumentParser() parser.register("type", "bool", lambda v: v.lower() == "true") parser.add_argument( @@ -376,5 +373,10 @@ if __name__ == "__main__": separated by \',\'. For tag-set contains multiple tags, all tags \ must be passed in.\ """) - FLAGS, unparsed = parser.parse_known_args() - app.run(main=main, argv=[sys.argv[0]] + unparsed) + flags, unparsed = parser.parse_known_args() + + my_main = lambda unused_args: main(unused_args, flags) + app.run(main=my_main, argv=[sys.argv[0]] + unparsed) + +if __name__ == '__main__': + run_main() diff --git a/tensorflow/python/tools/inspect_checkpoint.py b/tensorflow/python/tools/inspect_checkpoint.py index dd876cbe7f..6504fbc107 100644 --- a/tensorflow/python/tools/inspect_checkpoint.py +++ b/tensorflow/python/tools/inspect_checkpoint.py @@ -30,7 +30,7 @@ FLAGS = None def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors, - all_tensor_names): + all_tensor_names=False): """Prints tensors in a checkpoint file. If no `tensor_name` is provided, prints the tensor names and shapes @@ -139,7 +139,7 @@ if __name__ == "__main__": const=True, type="bool", default=False, - help="If True, print the values of all the tensors.") + help="If True, print the names and values of all the tensors.") parser.add_argument( "--all_tensor_names", nargs="?", diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b0e9e3e5ed..b88be4ae04 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -38,11 +38,15 @@ from tensorflow.core.example import example_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.debug.wrappers import local_cli_wrapper +from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils +# Set of ops to blacklist. +_OP_BLACKLIST = set(['WriteFile', 'ReadFile']) + def _show_tag_sets(saved_model_dir): """Prints the tag-sets stored in SavedModel directory. @@ -242,6 +246,27 @@ def get_signature_def_map(saved_model_dir, tag_set): return meta_graph.signature_def +def scan_meta_graph_def(meta_graph_def): + """Scans meta_graph_def and reports if there are ops on blacklist. + + Print ops if they are on black list, or print success if no blacklisted ops + found. + + Args: + meta_graph_def: MetaGraphDef protocol buffer. + """ + all_ops_set = set( + meta_graph_lib.ops_used_by_graph_def(meta_graph_def.graph_def)) + blacklisted_ops = _OP_BLACKLIST & all_ops_set + if blacklisted_ops: + # TODO(yifeif): print more warnings + print('MetaGraph with tag set %s contains the following blacklisted ops:' % + meta_graph_def.meta_info_def.tags, blacklisted_ops) + else: + print('MetaGraph with tag set %s does not contain blacklisted ops.' % + meta_graph_def.meta_info_def.tags) + + def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key, input_tensor_key_feed_dict, outdir, overwrite_flag, tf_debug=False): @@ -609,6 +634,21 @@ def run(args): args.overwrite, tf_debug=args.tf_debug) +def scan(args): + """Function triggered by scan command. + + Args: + args: A namespace parsed from command line. + """ + if args.tag_set: + scan_meta_graph_def( + saved_model_utils.get_meta_graph_def(args.dir, args.tag_set)) + else: + saved_model = reader.read_saved_model(args.dir) + for meta_graph_def in saved_model.meta_graphs: + scan_meta_graph_def(meta_graph_def) + + def create_parser(): """Creates a parser that parse the command line arguments. @@ -730,6 +770,26 @@ def create_parser(): 'SavedModel.') parser_run.set_defaults(func=run) + # scan command + scan_msg = ('Usage example:\n' + 'To scan for blacklisted ops in SavedModel:\n' + '$saved_model_cli scan --dir /tmp/saved_model\n' + 'To scan a specific MetaGraph, pass in --tag_set\n') + parser_scan = subparsers.add_parser( + 'scan', + description=scan_msg, + formatter_class=argparse.RawTextHelpFormatter) + parser_scan.add_argument( + '--dir', + type=str, + required=True, + help='directory containing the SavedModel to execute') + parser_scan.add_argument( + '--tag_set', + type=str, + help='tag-set of graph in SavedModel to scan, separated by \',\'') + parser_scan.set_defaults(func=scan) + return parser diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index f99c844845..eedc893a38 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -525,6 +525,28 @@ signature_def['serving_default']: y_expected = np.array([[2.5], [3.0]]) self.assertAllClose(y_expected, y_actual) + def testScanCommand(self): + self.parser = saved_model_cli.create_parser() + base_path = test.test_src_dir_path(SAVED_MODEL_PATH) + args = self.parser.parse_args(['scan', '--dir', base_path]) + with captured_output() as (out, _): + saved_model_cli.scan(args) + output = out.getvalue().strip() + self.assertTrue('does not contain blacklisted ops' in output) + + def testScanCommandFoundBlacklistedOp(self): + self.parser = saved_model_cli.create_parser() + base_path = test.test_src_dir_path(SAVED_MODEL_PATH) + args = self.parser.parse_args( + ['scan', '--dir', base_path, '--tag_set', 'serve']) + op_blacklist = saved_model_cli._OP_BLACKLIST + saved_model_cli._OP_BLACKLIST = set(['VariableV2']) + with captured_output() as (out, _): + saved_model_cli.scan(args) + saved_model_cli._OP_BLACKLIST = op_blacklist + output = out.getvalue().strip() + self.assertTrue('\'VariableV2\'' in output) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 5ef8bd9e9c..ba0d038475 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1135,8 +1135,9 @@ class Saver(object): the proliferation of checkpoint files on disk: * `max_to_keep` indicates the maximum number of recent checkpoint files to - keep. As new files are created, older files are deleted. If None or 0, - all checkpoint files are kept. Defaults to 5 (that is, the 5 most recent + keep. As new files are created, older files are deleted. If None or 0, + no checkpoints are deleted from the filesystem but only the last one is + kept in the `checkpoint` file. Defaults to 5 (that is, the 5 most recent checkpoint files are kept.) * `keep_checkpoint_every_n_hours`: In addition to keeping the most recent diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 0b3b060fe7..03e3e0857f 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -274,7 +274,8 @@ CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 7000 #define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \ - __macro(cudnnSetConvolutionMathType) + __macro(cudnnSetConvolutionMathType) \ + __macro(cudnnSetRNNMatrixMathType) // clang-format on CUDNN_DNN_ROUTINE_EACH_R7(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -586,6 +587,19 @@ static bool TensorOpMathEnabled() { return is_enabled; } +// A helper function to decide whether to enable the TENSOR_OP_MATH math type +// for RNNs. +static bool RnnTensorOpMathEnabled() { + static bool is_enabled = [] { + bool is_disabled = false; + TF_CHECK_OK( + tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH", + /*default_val=*/false, &is_disabled)); + return !is_disabled; + }(); + return is_enabled; +} + // A helper function to decide whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT // in batchnorm. This mode can be faster in some tasks because an optimized path // may be selected for CUDNN_DATA_FLOAT and CUDNN_DATA_HALF data types, compute @@ -1124,6 +1138,9 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { SetFailure(cudnn_params_desc_->Status()); return; } + if (data_type == CUDNN_DATA_HALF) { + set_use_tensor_op_math(true); + } } ~CudnnRnnDescriptor() override { if (rnn_desc_) { @@ -1132,6 +1149,20 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { CUDNN_RETURN_IF_FAIL(status, "Unable to destroy RNN descriptor"); } } + void set_use_tensor_op_math(bool use_tensor_op_math) { +#if CUDNN_VERSION >= 7000 + cudnnMathType_t math_type = + (use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH); + if (RnnTensorOpMathEnabled()) { + cudnnStatus_t status = + wrap::cudnnSetRNNMatrixMathType(parent_, rnn_desc_, math_type); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "could not set cudnn RNN math type: " + << ToString(status); + } + } +#endif + } cudnnRNNDescriptor_t handle() const { if (!ok()) return nullptr; return rnn_desc_; diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 99c47fd601..96f501e163 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -247,6 +247,8 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor = public_api.PublicAPIVisitor(visitor) public_api_visitor.do_not_descend_map['tf'].append('contrib') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] + # TODO(annarev): Make slide_dataset available in API. + public_api_visitor.private_map['tf'] = ['slide_dataset'] traverse.traverse(api, public_api_visitor) proto_dict = visitor.GetProtos() diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake index ec90c83aac..d5dea4f3e4 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cmake +++ b/tensorflow/tools/ci_build/Dockerfile.cmake @@ -23,11 +23,12 @@ RUN /install/install_deb_packages.sh RUN apt-get update RUN apt-get install -y --no-install-recommends python-pip +RUN pip install --upgrade wheel RUN pip install --upgrade astor RUN pip install --upgrade gast RUN pip install --upgrade numpy RUN pip install --upgrade termcolor # Install golang -RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable -RUN apt-get install -y golang +RUN apt-get install -t xenial-backports -y golang-1.9 +ENV PATH=${PATH}:/usr/lib/go-1.9/bin diff --git a/tensorflow/tools/compatibility/tf_upgrade.py b/tensorflow/tools/compatibility/tf_upgrade.py index 6e90b286c9..1f8833582a 100644 --- a/tensorflow/tools/compatibility/tf_upgrade.py +++ b/tensorflow/tools/compatibility/tf_upgrade.py @@ -662,9 +662,9 @@ class TFAPIChangeSpec(APIChangeSpec): def _reverse_handler(file_edit_recorder, node): # TODO(aselle): Could check for a literal list of bools and try to convert # them to indices. - comment = ("ERROR: tf.reverse has had its argument semantics changed\n" - "significantly the converter cannot detect this reliably, so you" - "need to inspect this usage manually.\n") + comment = ("ERROR: tf.reverse has had its argument semantics changed " + "significantly the converter cannot detect this reliably, so " + "you need to inspect this usage manually.\n") file_edit_recorder.add( comment, node.lineno, diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index c1b1f79bbd..228d5ee35d 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,6 +17,14 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP +You can test specify version of TensorFlow: + +```shell +./local_test.sh ${whl_file_url} +``` + +For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. + **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 435f9d0dc9..caae7fd530 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,12 +16,11 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script peforms the following steps: -# 1) Build the docker-in-docker (dind) image capable of running docker and -# Kubernetes (k8s) cluster inside. +# This script performs the following steps: +# 1) Build the docker image capable of running distributed TensorFlow in docker. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh @@ -64,15 +63,9 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -LOCAL_K8S_CACHE=${HOME}/kubernetes -# Helper function -get_container_id_by_image_name() { - # Get the id of a container by image name - # Usage: get_docker_container_id_by_image_name - - docker ps | grep $1 | awk '{print $1}' -} +# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below +DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -84,7 +77,8 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - die "whl file location is not specified" + WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} + echo "use default whl file location" fi while true; do @@ -121,7 +115,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker-in-docker image for local k8s cluster. +# Build docker image for local distributed TensorFlow cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then diff --git a/tensorflow/tools/dist_test/python/mnist_replica.py b/tensorflow/tools/dist_test/python/mnist_replica.py index a2d12442c4..d6e7f317dd 100644 --- a/tensorflow/tools/dist_test/python/mnist_replica.py +++ b/tensorflow/tools/dist_test/python/mnist_replica.py @@ -56,7 +56,7 @@ flags.DEFINE_integer("task_index", None, flags.DEFINE_integer("num_gpus", 1, "Total number of gpus for each machine." "If you don't use GPU, please set it to '0'") flags.DEFINE_integer("replicas_to_aggregate", None, - "Number of replicas to aggregate before parameter update" + "Number of replicas to aggregate before parameter update " "is applied (For sync_replicas mode only; default: " "num_workers)") flags.DEFINE_integer("hidden_units", 100, diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu index b6682cd681..625321e123 100644 --- a/tensorflow/tools/docker/Dockerfile.gpu +++ b/tensorflow/tools/docker/Dockerfile.gpu @@ -1,11 +1,18 @@ -FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04 +FROM nvidia/cuda:9.0-base-ubuntu16.04 LABEL maintainer="Craig Citro " # Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ + cuda-command-line-tools-9-0 \ + cuda-cublas-9-0 \ + cuda-cufft-9-0 \ + cuda-curand-9-0 \ + cuda-cusolver-9-0 \ + cuda-cusparse-9-0 \ curl \ + libcudnn7=7.0.5.15-1+cuda9.0 \ libfreetype6-dev \ libpng12-dev \ libzmq3-dev \ diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index 3630dbd740..cbcdbf5b80 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -114,6 +114,13 @@ def configure(src_base_path, gen_path, debug=False): for target, src in link_map.items(): if src is None: open(os.path.join(gen_path, target), "w").write("") + elif not os.path.exists(src): + # Git repo is configured in a way we don't support such as having + # packed refs. Even though in a git repo, tf.__git_version__ will not + # be accurate. + # TODO(mikecase): Support grabbing git info when using packed refs. + open(os.path.join(gen_path, target), "w").write("") + spec["git"] = False else: try: # In python 3.5, symlink function exists even on Windows. But requires diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index b7d7fac315..6e21aa2846 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -178,6 +178,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/util/tensor_bundle", ], diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d89afe85c7..d86f65325b 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -182,6 +182,36 @@ Status FuseBatchNormWithConv(const NodeMatch& match, return Status::OK(); } +Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, + std::vector* new_nodes) { + // Calculate the scale and offset values to apply. + std::vector scale_values; + std::vector offset_values; + TF_RETURN_IF_ERROR( + GetScaleAndOffsetValues(match, &scale_values, &offset_values)); + + // Fuse conv weights, and set the final output node name as batch_norm_node. + const NodeDef& batch_norm_node = match.node; + const NodeMatch& batch_to_space_node_match = match.inputs[0]; + const NodeMatch& conv_node_match = batch_to_space_node_match.inputs[0]; + const NodeDef& batch_to_space_node = batch_to_space_node_match.node; + const NodeDef& conv_node = conv_node_match.node; + + string biasadd_name = conv_node.name() + "/biasadd"; + TF_RETURN_IF_ERROR( + FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, + biasadd_name , new_nodes)); + + NodeDef new_batch_to_space_node = batch_to_space_node; + // reuse batch_norm node name + new_batch_to_space_node.set_name(batch_norm_node.name()); + new_batch_to_space_node.set_input(0, biasadd_name); + new_nodes->push_back(batch_to_space_node_match.inputs[1].node); + new_nodes->push_back(batch_to_space_node_match.inputs[2].node); + new_nodes->push_back(new_batch_to_space_node); + return Status::OK(); +} + Status FuseBatchNormWithConvConcat(const NodeMatch& match, std::vector* new_nodes) { // Calculate the scale and offset values to apply. @@ -284,6 +314,43 @@ Status FoldOldBatchNorms(const GraphDef& input_graph_def, current_graph_def = replaced_graph_def; } while (did_graph_change); + do { + did_graph_change = false; + GraphDef replaced_graph_def; + TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( + current_graph_def, // clang-format off + {"BatchNormWithGlobalNormalization|FusedBatchNorm", // batch_norm_node + { + {"BatchToSpaceND", // batch_to_space_node + { + {"Conv2D", // conv_node + { + {"*"}, // input_node + {"Const"}, // weights_node + } + }, + {"Const"}, // block_shape + {"Const"}, // crops + } + }, + {"Const"}, // mean_node + {"Const"}, // variance_node + {"Const"}, // beta_node + {"Const"}, // gamma_node + } + }, // clang-format on + [&did_graph_change](const NodeMatch& match, + const std::set& input_nodes, + const std::set& output_nodes, + std::vector* new_nodes) { + TF_RETURN_IF_ERROR(FuseBatchNormWithBatchToSpace(match, new_nodes)); + did_graph_change = true; + return Status::OK(); + }, + {}, &replaced_graph_def)); + current_graph_def = replaced_graph_def; + } while (did_graph_change); + do { did_graph_change = false; GraphDef replaced_graph_def; diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index b30ba9ac8b..7651a03fe5 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -298,6 +299,96 @@ class FoldOldBatchNormsTest : public ::testing::Test { } }; +void TestFoldFusedBatchNormsWithBatchToSpace() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({2, 1, 3, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = Conv2D(root.WithOpName("conv_op"), input_op, weights_op, + {1, 1, 1, 1}, "VALID"); + + Tensor block_shape_data(DT_INT32, TensorShape({2})); + test::FillValues(&block_shape_data, {1, 2}); + Output block_shape_op = + Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); + + Tensor crops_data(DT_INT32, TensorShape({2, 2})); + test::FillValues(&crops_data, {0, 0, 0, 1}); + Output crops_op = + Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); + + Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), + conv_op, block_shape_op, crops_data); + + Tensor mean_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&mean_data, {10.0f, 20.0f}); + Output mean_op = + Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); + + Tensor variance_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&variance_data, {0.25f, 0.5f}); + Output variance_op = Const(root.WithOpName("variance_op"), + Input::Initializer(variance_data)); + + Tensor beta_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&beta_data, {0.1f, 0.6f}); + Output beta_op = + Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); + + Tensor gamma_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&gamma_data, {1.0f, 2.0f}); + Output gamma_op = + Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + NodeDef batch_norm_node; + batch_norm_node.set_op("FusedBatchNorm"); + batch_norm_node.set_name("output"); + AddNodeInput("batch_to_space_op", &batch_norm_node); + AddNodeInput("gamma_op", &batch_norm_node); + AddNodeInput("beta_op", &batch_norm_node); + AddNodeInput("mean_op", &batch_norm_node); + AddNodeInput("variance_op", &batch_norm_node); + SetNodeAttr("T", DT_FLOAT, &batch_norm_node); + SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); + SetNodeAttr("is_training", false, &batch_norm_node); + *(original_graph_def.mutable_node()->Add()) = batch_norm_node; + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, + &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("FusedBatchNormWithBatchToSpace", node.op()); + } +} + TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNorms) { TestFoldOldBatchNorms(); } @@ -307,7 +398,7 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNorms) { } TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { - // Test axis is not 3, so all weigths and offsets are fused to each of inputs + // Test axis is not 3, so all weights and offsets are fused to each of inputs // of conv2d. TestFoldFusedBatchNormsWithConcat(/*split=*/true); // Test axis = 3, BatchNorm weights and offsets will be split before fused @@ -315,5 +406,9 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { TestFoldFusedBatchNormsWithConcat(/*split=*/false); } +TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { + TestFoldFusedBatchNormsWithBatchToSpace(); +} + } // namespace graph_transforms } // namespace tensorflow diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 1833d67d82..d55a883df5 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -108,6 +108,7 @@ filegroup( "@highwayhash//:LICENSE", "@jemalloc//:COPYING", "@jpeg//:LICENSE.md", + "@kafka//:LICENSE", "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e1a5f091ba..e0152da4df 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0-rc1' +_VERSION = '1.6.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -72,7 +72,7 @@ if sys.version_info < (3, 4): # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ - 'freeze_graph = tensorflow.python.tools.freeze_graph:main', + 'freeze_graph = tensorflow.python.tools.freeze_graph:run_main', 'toco_from_protos = tensorflow.contrib.lite.toco.python.toco_from_protos:main', 'toco = tensorflow.contrib.lite.toco.python.toco_wrapper:main', 'saved_model_cli = tensorflow.python.tools.saved_model_cli:main', diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index 77cc9f75f7..c030575109 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -87,6 +87,7 @@ import json import os import shutil +from six import text_type from google.cloud import datastore @@ -150,7 +151,7 @@ def upload_benchmark_data(client, data): """ test_result = json.loads(data) - test_name = unicode(test_result["name"]) + test_name = text_type(test_result["name"]) start_time = datetime.datetime.utcfromtimestamp( float(test_result["startTime"])) batch = [] @@ -162,7 +163,7 @@ def upload_benchmark_data(client, data): t_val.update({ "test": test_name, "start": start_time, - "info": unicode(data) + "info": text_type(data) }) batch.append(t_val) @@ -170,7 +171,7 @@ def upload_benchmark_data(client, data): # the attribute to be fetched and displayed. The full entry information is # also stored as a non-indexed JSON blob. for ent in test_result["entries"].get("entry", []): - ent_name = unicode(ent["name"]) + ent_name = text_type(ent["name"]) e_key = client.key("Entry") e_val = datastore.Entity(e_key, exclude_from_indexes=["info"]) e_val.update({ @@ -178,7 +179,7 @@ def upload_benchmark_data(client, data): "start": start_time, "entry": ent_name, "timing": ent["wallTime"], - "info": unicode(json.dumps(ent)) + "info": text_type(json.dumps(ent)) }) batch.append(e_val) diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 87a23925c4..4418ac32fc 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -526,12 +526,12 @@ config_setting( config_setting( name = "armeabi-v7a", - values = {"android_cpu": "armeabi-v7a"}, + values = {"cpu": "armeabi-v7a"}, ) config_setting( name = "arm64-v8a", - values = {"android_cpu": "arm64-v8a"}, + values = {"cpu": "arm64-v8a"}, ) config_setting( diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index a61a9e1f6c..a839ca717e 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -130,12 +130,16 @@ cc_library( ], hdrs = [ "config.h", + "src-cpp/rdkafkacpp.h", + "src-cpp/rdkafkacpp_int.h", + "src/lz4.c", + "src/snappy_compat.h", ], - defines = [ + copts = [ + "-Iexternal/kafka/src", + "-Iexternal/kafka/src-cpp", ], - includes = [ - "src", - "src-cpp", + defines = [ ], linkopts = [ "-lpthread", @@ -143,5 +147,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@boringssl//:ssl", + "@zlib_archive//:zlib", ], ) diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl index de06ad5f27..1dd8ab433a 100644 --- a/third_party/py/BUILD.tpl +++ b/third_party/py/BUILD.tpl @@ -2,20 +2,26 @@ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) +# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib +# See https://docs.python.org/3/extending/windows.html +cc_import( + name = "python_lib", + interface_library = select({ + ":windows": ":python_import_lib", + # A placeholder for Unix platforms which makes --no_build happy. + "//conditions:default": "not-existing.lib", + }), + system_provided = 1, +) + cc_library( name = "python_headers", hdrs = [":python_include"], - data = select({ - ":windows": [":python_import_lib"], + deps = select({ + ":windows": [":python_lib"], "//conditions:default": [], }), includes = ["python_include"], - linkopts = select({ - # TODO(pcloudy): Ideally, this should just go into deps after resolving - # https://github.com/bazelbuild/bazel/issues/3237, - ":windows": ["$(locations :python_import_lib)"], - "//conditions:default": [], - }), ) cc_library( diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl index 8e76e5d02a..9b946505a6 100644 --- a/third_party/tensorrt/tensorrt_configure.bzl +++ b/third_party/tensorrt/tensorrt_configure.bzl @@ -57,6 +57,10 @@ def _find_trt_header_dir(repository_ctx, trt_install_path): path = "/usr/include/x86_64-linux-gnu" if _headers_exist(repository_ctx, path): return path + if trt_install_path == "/usr/lib/aarch64-linux-gnu": + path = "/usr/include/aarch64-linux-gnu" + if _headers_exist(repository_ctx, path): + return path path = str(repository_ctx.path("%s/../include" % trt_install_path).realpath) if _headers_exist(repository_ctx, path): return path -- GitLab From e79924eda7285a5aa0cfe908a223aef690b05fdd Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 21 Mar 2018 12:14:30 -0700 Subject: [PATCH 311/960] Made sure to compare all the attributes when comparing nodes PiperOrigin-RevId: 189946858 --- tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 942724a6ce..bc004df608 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1037,6 +1037,9 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const { } // Compare attributes. + if (node1.attr().size() != node2.attr().size()) { + return false; + } for (const auto& attr1 : node1.attr()) { auto it = node2.attr().find(attr1.first); if (it == node2.attr().end()) { -- GitLab From bdd6f2253a76c707ff2ce2af9b560478891342eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 12:40:08 -0700 Subject: [PATCH 312/960] [XLA] Adding more tests for ReduceWindow. PiperOrigin-RevId: 189950361 --- tensorflow/compiler/xla/tests/reduce_window_test.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index f66fb5cacc..9c317fe579 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -1021,6 +1021,15 @@ struct R2ReduceWindowTestData { /*strides=*/{1, 1}, /*pad_low=*/{0, 130}, /*pad_high=*/{0, 0}, /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, +// TODO(b/76025683): These tests fail on TPU. +#if defined(XLA_TEST_BACKEND_CPU) || defined(XLA_TEST_BACKEND_GPU) + {/*base_bounds=*/{4096, 4096}, /*window_bounds=*/{1, 4}, + /*strides=*/{1, 1024}, /*pad_low=*/{0, 0}, /*pad-high=*/{0, 0}, + /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, + {/*base_bounds=*/{8, 256}, /*window_bounds=*/{1, 4}, + /*strides=*/{1, 64}, /*pad_low=*/{0, 0}, /*pad_high=*/{0, 0}, + /*layout=*/{1, 0}, /*reducer=*/Reducer::kAdd}, +#endif }; string R2ReduceWindowTestDataToString( -- GitLab From ee108441201ecb5fa9536573637623d712f9aa33 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Wed, 21 Mar 2018 12:53:04 -0700 Subject: [PATCH 313/960] Further improve accuracy of op_level_cost_estimator (Gather, GatherV2, Slice). PiperOrigin-RevId: 189952132 --- .../grappler/costs/op_level_cost_estimator.cc | 32 +++++++++++++++---- .../grappler/costs/op_level_cost_estimator.h | 2 +- .../costs/op_level_cost_estimator_test.cc | 22 +++++++++++-- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 84ad8a3e84..d3ffa03fe2 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -48,6 +48,8 @@ constexpr char kSize[] = "Size"; constexpr char kStopGradient[] = "StopGradient"; constexpr char kPreventGradient[] = "PreventGradient"; constexpr char kGather[] = "Gather"; +constexpr char kGatherV2[] = "GatherV2"; +constexpr char kSlice[] = "Slice"; static const Costs::Duration kMinComputeTime(1); @@ -169,7 +171,9 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {kNoOp, wrap(&OpLevelCostEstimator::PredictNoOp)}, - {kGather, wrap(&OpLevelCostEstimator::PredictGather)}, + {kGather, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)}, + {kGatherV2, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)}, + {kSlice, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)}, {kPlaceholder, wrap(&OpLevelCostEstimator::PredictIdentity)}, {kIdentity, wrap(&OpLevelCostEstimator::PredictIdentity)}, @@ -1049,17 +1053,33 @@ Costs OpLevelCostEstimator::PredictMetadata(const OpContext& op_context) const { return costs; } -Costs OpLevelCostEstimator::PredictGather(const OpContext& op_context) const { - // Gather op can have a very large input, but only the size of the output - // matters, because indices may select only a very small subset of input. - +Costs OpLevelCostEstimator::PredictGatherOrSlice( + const OpContext& op_context) const { + // Gather & Slice ops can have a very large input, but only access a small + // part of it. For these op the size of the output determines the memory cost. const auto& op_info = op_context.op_info; bool unknown_shapes = false; + + // Each output element is a copy of some element from input. + // For roofline estimate we assume each copy has a unit cost. const int64 op_count = CalculateTensorElementCount(op_info.outputs(0), &unknown_shapes); + const double output_size = CalculateOutputSize(op_info, &unknown_shapes); - const double total_io = 2 * output_size; + double input_size = output_size; + if (op_info.op() == "Slice") { + // Add 'begin' & 'size' tensors sizes. + input_size += + CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes) + + CalculateTensorElementCount(op_info.inputs(2), &unknown_shapes); + } else { + // Assuming this is "Gather" or "GatherV2" op, add 'indices' size. + input_size += + CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes); + } + + const double total_io = input_size + output_size; Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info); costs.inaccurate = unknown_shapes; costs.max_memory = output_size; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index e5dd31a7a2..1b3babb206 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -144,7 +144,7 @@ class OpLevelCostEstimator { Costs PredictVariable(const OpContext& op_context) const; Costs PredictBatchMatMul(const OpContext& op_context) const; Costs PredictMetadata(const OpContext& op_context) const; - Costs PredictGather(const OpContext& op_context) const; + Costs PredictGatherOrSlice(const OpContext& op_context) const; // Utility function for safe division. Returns 0 // if rhs is 0 or negative. diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index a92f230101..f2a9615dfb 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -206,9 +206,27 @@ TEST_F(OpLevelCostEstimatorTest, TestGatherCosts) { DescribeArbitraryRankOutput({16, 10}, DT_FLOAT, &op_context.op_info); auto cost = estimator_.PredictCosts(op_context); - EXPECT_EQ(Costs::Duration(128), cost.memory_time); + EXPECT_EQ(Costs::Duration(130), cost.memory_time); EXPECT_EQ(Costs::Duration(16), cost.compute_time); - EXPECT_EQ(Costs::Duration(144), cost.execution_time); + EXPECT_EQ(Costs::Duration(146), cost.execution_time); + EXPECT_FALSE(cost.inaccurate); +} + +TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op("Slice"); + + // Huge first input shouldn't affect Slice execution and memory costs. + DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); + DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankOutput({10, 10}, DT_FLOAT, &op_context.op_info); + + auto cost = estimator_.PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(81), cost.memory_time); + EXPECT_EQ(Costs::Duration(10), cost.compute_time); + EXPECT_EQ(Costs::Duration(91), cost.execution_time); EXPECT_FALSE(cost.inaccurate); } -- GitLab From a6a7de3d36a154e4382283a804531f148fca6c9b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 12:53:53 -0700 Subject: [PATCH 314/960] Temporarily disable constant folding past Enter, since a few breakages have been observed. PiperOrigin-RevId: 189952252 --- tensorflow/core/grappler/optimizers/constant_folding.cc | 5 ++++- tensorflow/core/grappler/optimizers/constant_folding.h | 2 +- tensorflow/core/grappler/optimizers/constant_folding_test.cc | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 263983584c..bdec73e69e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1707,7 +1707,9 @@ Status ConstantFolding::SimplifyGraph(GraphDef* optimized_graph, } // Move constants past Enter. - if (IsEnter(*node) && node->input_size() > 0) { + // TODO(rmlarsen): Reenable when we fix the root cause of b/76008022 + if (opt_level_ == RewriterConfig::AGGRESSIVE && IsEnter(*node) && + node->input_size() > 0) { const string& node_name = node->name(); const NodeDef* input = node_map_->GetNode(node->input(0)); if (input != nullptr && IsReallyConstant(*input) && @@ -1729,6 +1731,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* optimized_graph, NodeDef* new_node = optimized_graph->add_node(); *new_node = *input; new_node->set_name(OptimizedNodeName(*input, "_enter")); + new_node->set_device(node->device()); new_node->clear_input(); new_node->add_input(AsControlDependency(node_name)); node_map_->AddNode(new_node->name(), new_node); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 13ecfcd281..b6645d335e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -38,7 +38,7 @@ class ConstantFolding : public GraphOptimizer { static string AddControlDependency(const string& input_name, GraphDef* graph, NodeMap* node_map); - ConstantFolding(DeviceBase* cpu_device); + explicit ConstantFolding(DeviceBase* cpu_device); ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device); ~ConstantFolding() override {} diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index aeb430b384..914a9257ee 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -2103,7 +2103,8 @@ TEST_F(ConstantFoldingTest, Enter) { item.fetch.push_back("id2"); item.fetch.push_back("id3"); - ConstantFolding optimizer(nullptr /* cpu_device */); + ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, + nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); -- GitLab From 6741f81b8216862a83703122191a8632fda333a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 13:08:02 -0700 Subject: [PATCH 315/960] Make strcat.{h,cc} independent of Eigen. PiperOrigin-RevId: 189954596 --- tensorflow/compiler/xla/literal_util.cc | 5 +++-- tensorflow/compiler/xla/literal_util_test.cc | 2 +- tensorflow/core/framework/tensor.cc | 18 ++++++++++++++++-- tensorflow/core/lib/strings/strcat.cc | 4 ---- tensorflow/core/lib/strings/strcat.h | 5 ----- tensorflow/core/lib/strings/strcat_test.cc | 6 ------ tensorflow/stream_executor/stream.cc | 5 ++++- 7 files changed, 24 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 0a24db046a..20508edaa7 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -929,7 +929,7 @@ string Literal::GetAsString(tensorflow::gtl::ArraySlice multi_index, case U64: return StrCat(Get(multi_index, shape_index)); case F16: - return StrCat(Get(multi_index, shape_index)); + return StrCat(static_cast(Get(multi_index, shape_index))); case F32: return StrCat(Get(multi_index, shape_index)); case BF16: @@ -979,7 +979,8 @@ string Literal::GetSparseElementAsString(int64 sparse_element_number, return StrCat( GetSparseElement(sparse_element_number, shape_index)); case F16: - return StrCat(GetSparseElement(sparse_element_number, shape_index)); + return StrCat(static_cast( + GetSparseElement(sparse_element_number, shape_index))); case F32: return StrCat( GetSparseElement(sparse_element_number, shape_index)); diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 04e45f0049..7627762074 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -1702,7 +1702,7 @@ TEST_F(LiteralUtilTest, GetSparseElementAsString) { ASSERT_EQ(Literal::CreateSparse(dimensions, indices, {half{1.0}, half{2.0}, half{3.0}}) ->GetSparseElementAsString(1), - tensorflow::strings::StrCat(half{2.0})); + tensorflow::strings::StrCat(static_cast(half{2.0}))); ASSERT_EQ( Literal::CreateSparse( dimensions, indices, diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 5d32b71628..e2111d6038 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -884,6 +884,20 @@ bool Tensor::CanUseDMA() const { #undef CASE namespace { + +// StrCat and StrAppend don't support Eigen::half directly at the moment, and +// we would like to keep them compatible with their absl counterparts, for ease +// of migration. We could rely on errors::internal::PrepareForStrCat() but the +// logic is so simple we can just replicate it here, where it is close to its +// usage and easy to change later. And there's the extra benefit of not +// accessing an 'internal' namespace. +inline const strings::AlphaNum& PrintOneElement(const strings::AlphaNum& a) { + return a; +} +inline float PrintOneElement(const Eigen::half& h) { + return static_cast(h); +} + // Print from left dim to right dim recursively. template void PrintOneDim(int dim_index, const gtl::InlinedVector& shape, @@ -896,7 +910,7 @@ void PrintOneDim(int dim_index, const gtl::InlinedVector& shape, for (int64 i = 0; i < element_count; i++) { if (*data_index >= limit) return; if (i > 0) strings::StrAppend(result, " "); - strings::StrAppend(result, data[(*data_index)++]); + strings::StrAppend(result, PrintOneElement(data[(*data_index)++])); } return; } @@ -927,7 +941,7 @@ string SummarizeArray(int64 limit, int64 num_elts, if (shape.empty()) { for (int64 i = 0; i < limit; ++i) { if (i > 0) strings::StrAppend(&ret, " "); - strings::StrAppend(&ret, array[i]); + strings::StrAppend(&ret, PrintOneElement(array[i])); } if (num_elts > limit) strings::StrAppend(&ret, "..."); return ret; diff --git a/tensorflow/core/lib/strings/strcat.cc b/tensorflow/core/lib/strings/strcat.cc index 5b1cff486d..f140ec3d26 100644 --- a/tensorflow/core/lib/strings/strcat.cc +++ b/tensorflow/core/lib/strings/strcat.cc @@ -20,16 +20,12 @@ limitations under the License. #include #include -#include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { namespace strings { -AlphaNum::AlphaNum(const Eigen::half &f) - : piece_(digits_, strlen(FloatToBuffer(static_cast(f), digits_))) {} - AlphaNum::AlphaNum(Hex hex) { char *const end = &digits_[kFastToBufferSize]; char *writer = end; diff --git a/tensorflow/core/lib/strings/strcat.h b/tensorflow/core/lib/strings/strcat.h index b681f7398d..b3ec14e448 100644 --- a/tensorflow/core/lib/strings/strcat.h +++ b/tensorflow/core/lib/strings/strcat.h @@ -27,10 +27,6 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" -namespace Eigen { -struct half; -} - // The AlphaNum type was designed to be used as the parameter type for StrCat(). // Any routine accepting either a string or a number may accept it. // The basic idea is that by accepting a "const AlphaNum &" as an argument @@ -122,7 +118,6 @@ class AlphaNum { AlphaNum(double f) // NOLINT(runtime/explicit) : piece_(digits_, strlen(DoubleToBuffer(f, digits_))) {} - AlphaNum(const Eigen::half &f); // NOLINT(runtime/explicit) AlphaNum(Hex hex); // NOLINT(runtime/explicit) AlphaNum(const char *c_str) : piece_(c_str) {} // NOLINT(runtime/explicit) diff --git a/tensorflow/core/lib/strings/strcat_test.cc b/tensorflow/core/lib/strings/strcat_test.cc index 7cb186e637..8cc64a6f0a 100644 --- a/tensorflow/core/lib/strings/strcat_test.cc +++ b/tensorflow/core/lib/strings/strcat_test.cc @@ -17,7 +17,6 @@ limitations under the License. #include -#include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -131,11 +130,6 @@ TEST(StrCat, Basics) { result = tensorflow::strings::StrCat("A hundred K and a half squared is ", d); EXPECT_EQ(result, "A hundred K and a half squared is 10000100000.25"); - Eigen::half h(10007.0f); - result = - tensorflow::strings::StrCat("Ten thousand seven is approximately ", h); - EXPECT_EQ(result, "Ten thousand seven is approximately 10008"); - result = tensorflow::strings::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888, 999999999); EXPECT_EQ(result, "12333444455555666666777777788888888999999999"); diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 6bbb5f0b2e..1e3afde268 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" +#include "third_party/eigen3/Eigen/Core" #include "tensorflow/stream_executor/blas.h" #include "tensorflow/stream_executor/host_buffer.h" #include "tensorflow/stream_executor/lib/stacktrace.h" @@ -117,7 +118,9 @@ string ToVlogString(const DeviceMemoryBase *memory) { return ToVlogString(*memory); } -string ToVlogString(const Eigen::half &h) { return port::StrCat(h); } +string ToVlogString(const Eigen::half &h) { + return port::StrCat(static_cast(h)); +} string ToVlogString(int i) { return port::StrCat(i); } -- GitLab From 56e5181f340f855e0eef9a4ce25baea5be1aaebc Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 21 Mar 2018 13:28:11 -0700 Subject: [PATCH 316/960] [TF CriticalSection] Bugfix when Execute() inside a while_loop has a dep on a Variable outside of it. PiperOrigin-RevId: 189957569 --- .../framework/python/ops/critical_section_ops.py | 14 +++++++++++++- .../framework/python/ops/critical_section_test.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index 1893d7b466..bd764ed57a 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -308,7 +308,19 @@ class CriticalSection(object): all_args_dict.pop(input_.op._id, None) all_args_dict.pop(lock_op._id, None) - lock_op._add_control_inputs(all_args_dict.values()) + all_args = all_args_dict.values() + + if not all_args: + # No control dependencies to add; return early. + return + + # This group is important: it ensures that any ops in all_args + # outside the control context of the lock_op (and this fn, which + # runs in the same context) are added to this context before + # being added to the control dependencies of lock_op. + all_args = control_flow_ops.group(*all_args) + + lock_op._add_control_input(all_args) # pylint: enable=protected-access def _is_self_handle(self, x): diff --git a/tensorflow/contrib/framework/python/ops/critical_section_test.py b/tensorflow/contrib/framework/python/ops/critical_section_test.py index e24140bd72..ba660295cb 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_test.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_test.py @@ -316,6 +316,20 @@ class CriticalSectionTest(test.TestCase): ValueError, "requested exclusive resource access"): cs1.execute(lambda: v2 + 1) + def testControlDependencyFromOutsideWhileLoopMixedWithInsideLoop(self): + cs = critical_section_ops.CriticalSection() + v = resource_variable_ops.ResourceVariable(0, name="v") + # Make sure that the control dependencies on v do not cause issues + # in the lock_op's automatic control dependency adder. + # + # Note, here v must be a resource variable (or something similar), + # otherwise it gets hoisted into the while_loop by the time we add + # control dependencies to the lock_op. + out = control_flow_ops.while_loop( + lambda i: i < 10, lambda i: cs.execute(lambda j: v + j + 1, i), [0]) + self.evaluate(v.initializer) + self.assertEqual(10, self.evaluate(out)) + # TODO(ebrevdo): Re-enable once CriticalSection is in core. # # def testCriticalSectionAndExecuteOpSaverRoundTrip(self): -- GitLab From 9bdc24077e4a454a663b2958817bcb5f7d961833 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 13:30:26 -0700 Subject: [PATCH 317/960] Update ops-related pbtxt files. PiperOrigin-RevId: 189957912 --- .../core/ops/compat/ops_history.v1.pbtxt | 31 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 31 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index ddf7627463..99bdc8ccdc 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -51602,6 +51602,37 @@ op { } } } +op { + name: "SlideDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "window_size" + type: DT_INT64 + } + input_arg { + name: "stride" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Snapshot" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 72326e1137..02e52b07c4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -24399,6 +24399,37 @@ op { } } } +op { + name: "SlideDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "window_size" + type: DT_INT64 + } + input_arg { + name: "stride" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Snapshot" input_arg { -- GitLab From 7dd78367a19e101b45f0cafb5c4fbe6a3c840828 Mon Sep 17 00:00:00 2001 From: Sang Han Date: Wed, 21 Mar 2018 13:37:52 -0700 Subject: [PATCH 318/960] SetUsrMemDataHandle should return void --- tensorflow/core/util/mkl_util.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 34db96075d..9f58e40d94 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1579,10 +1579,10 @@ class MklDnnData { } /// Set function for data buffer of user memory primitive. - inline void* SetUsrMemDataHandle(void* data_buffer) { + inline void SetUsrMemDataHandle(void* data_buffer) { CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(data_buffer); - return user_memory_->set_data_handle(data_buffer); + user_memory_->set_data_handle(data_buffer); } /// Set function for data buffer of user memory primitive. -- GitLab From 2533de345fb13ff430eea207d59935b6d4b8fe19 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 13:46:27 -0700 Subject: [PATCH 319/960] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 189960595 --- tensorflow/go/op/wrappers.go | 376 +++++++++++++++++------------------ 1 file changed, 188 insertions(+), 188 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index e5256af1e8..16472464db 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -4116,6 +4116,194 @@ func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// DepthToSpaceAttr is an optional argument to DepthToSpace. +type DepthToSpaceAttr func(optionalAttr) + +// DepthToSpaceDataFormat sets the optional data_format attribute to value. +// If not specified, defaults to "NHWC" +func DepthToSpaceDataFormat(value string) DepthToSpaceAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// DepthToSpace for tensors of type T. +// +// Rearranges data from depth into blocks of spatial data. +// This is the reverse transformation of SpaceToDepth. More specifically, +// this op outputs a copy of the input tensor where values from the `depth` +// dimension are moved in spatial blocks to the `height` and `width` dimensions. +// The attr `block_size` indicates the input block size and how the data is moved. +// +// * Chunks of data of size `block_size * block_size` from depth are rearranged +// into non-overlapping blocks of size `block_size x block_size` +// * The width the output tensor is `input_depth * block_size`, whereas the +// height is `input_height * block_size`. +// * The Y, X coordinates within each block of the output image are determined +// by the high order component of the input channel index. +// * The depth of the input tensor must be divisible by +// `block_size * block_size`. +// +// The `data_format` attr specifies the layout of the input and output tensors +// with the following options: +// "NHWC": `[ batch, height, width, channels ]` +// "NCHW": `[ batch, channels, height, width ]` +// "NCHW_VECT_C": +// `qint8 [ batch, channels / 4, height, width, 4 ]` +// +// It is useful to consider the operation as transforming a 6-D Tensor. +// e.g. for data_format = NHWC, +// Each element in the input tensor can be specified via 6 coordinates, +// ordered by decreasing memory layout significance as: +// n,iY,iX,bY,bX,oC (where n=batch index, iX, iY means X or Y coordinates +// within the input image, bX, bY means coordinates +// within the output block, oC means output channels). +// The output would be the input transposed to the following layout: +// n,iY,bY,iX,bX,oC +// +// This operation is useful for resizing the activations between convolutions +// (but keeping all data), e.g. instead of pooling. It is also useful for training +// purely convolutional models. +// +// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and +// block_size = 2: +// +// ``` +// x = [[[[1, 2, 3, 4]]]] +// +// ``` +// +// This operation will output a tensor of shape `[1, 2, 2, 1]`: +// +// ``` +// [[[[1], [2]], +// [[3], [4]]]] +// ``` +// +// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`, +// the corresponding output will have 2x2 elements and will have a depth of +// 1 channel (1 = `4 / (block_size * block_size)`). +// The output element shape is `[2, 2, 1]`. +// +// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g. +// +// ``` +// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] +// ``` +// +// This operation, for block size of 2, will return the following tensor of shape +// `[1, 2, 2, 3]` +// +// ``` +// [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// +// ``` +// +// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2: +// +// ``` +// x = [[[[1, 2, 3, 4], +// [5, 6, 7, 8]], +// [[9, 10, 11, 12], +// [13, 14, 15, 16]]]] +// ``` +// +// the operator will return the following tensor of shape `[1 4 4 1]`: +// +// ``` +// x = [[[ [1], [2], [5], [6]], +// [ [3], [4], [7], [8]], +// [ [9], [10], [13], [14]], +// [ [11], [12], [15], [16]]]] +// +// ``` +// +// Arguments: +// +// block_size: The size of the spatial block, same as in Space2Depth. +func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"block_size": block_size} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DepthToSpace", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. +type Conv3DBackpropInputV2Attr func(optionalAttr) + +// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of 3-D convolution with respect to the input. +// +// Arguments: +// input_sizes: An integer vector representing the tensor shape of `input`, +// where `input` is a 5-D +// `[batch, depth, rows, cols, in_channels]` tensor. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Conv3DBackpropInputV2", + Input: []tf.Input{ + input_sizes, filter, out_backprop, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes square root of x element-wise. // // I.e., \\(y = \sqrt{x} = x^{1/2}\\). @@ -24282,194 +24470,6 @@ func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ... return values } -// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. -type Conv3DBackpropInputV2Attr func(optionalAttr) - -// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the input. -// -// Arguments: -// input_sizes: An integer vector representing the tensor shape of `input`, -// where `input` is a 5-D -// `[batch, depth, rows, cols, in_channels]` tensor. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv3DBackpropInputV2", - Input: []tf.Input{ - input_sizes, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DepthToSpaceAttr is an optional argument to DepthToSpace. -type DepthToSpaceAttr func(optionalAttr) - -// DepthToSpaceDataFormat sets the optional data_format attribute to value. -// If not specified, defaults to "NHWC" -func DepthToSpaceDataFormat(value string) DepthToSpaceAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthToSpace for tensors of type T. -// -// Rearranges data from depth into blocks of spatial data. -// This is the reverse transformation of SpaceToDepth. More specifically, -// this op outputs a copy of the input tensor where values from the `depth` -// dimension are moved in spatial blocks to the `height` and `width` dimensions. -// The attr `block_size` indicates the input block size and how the data is moved. -// -// * Chunks of data of size `block_size * block_size` from depth are rearranged -// into non-overlapping blocks of size `block_size x block_size` -// * The width the output tensor is `input_depth * block_size`, whereas the -// height is `input_height * block_size`. -// * The Y, X coordinates within each block of the output image are determined -// by the high order component of the input channel index. -// * The depth of the input tensor must be divisible by -// `block_size * block_size`. -// -// The `data_format` attr specifies the layout of the input and output tensors -// with the following options: -// "NHWC": `[ batch, height, width, channels ]` -// "NCHW": `[ batch, channels, height, width ]` -// "NCHW_VECT_C": -// `qint8 [ batch, channels / 4, height, width, 4 ]` -// -// It is useful to consider the operation as transforming a 6-D Tensor. -// e.g. for data_format = NHWC, -// Each element in the input tensor can be specified via 6 coordinates, -// ordered by decreasing memory layout significance as: -// n,iY,iX,bY,bX,oC (where n=batch index, iX, iY means X or Y coordinates -// within the input image, bX, bY means coordinates -// within the output block, oC means output channels). -// The output would be the input transposed to the following layout: -// n,iY,bY,iX,bX,oC -// -// This operation is useful for resizing the activations between convolutions -// (but keeping all data), e.g. instead of pooling. It is also useful for training -// purely convolutional models. -// -// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and -// block_size = 2: -// -// ``` -// x = [[[[1, 2, 3, 4]]]] -// -// ``` -// -// This operation will output a tensor of shape `[1, 2, 2, 1]`: -// -// ``` -// [[[[1], [2]], -// [[3], [4]]]] -// ``` -// -// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`, -// the corresponding output will have 2x2 elements and will have a depth of -// 1 channel (1 = `4 / (block_size * block_size)`). -// The output element shape is `[2, 2, 1]`. -// -// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g. -// -// ``` -// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] -// ``` -// -// This operation, for block size of 2, will return the following tensor of shape -// `[1, 2, 2, 3]` -// -// ``` -// [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// -// ``` -// -// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2: -// -// ``` -// x = [[[[1, 2, 3, 4], -// [5, 6, 7, 8]], -// [[9, 10, 11, 12], -// [13, 14, 15, 16]]]] -// ``` -// -// the operator will return the following tensor of shape `[1 4 4 1]`: -// -// ``` -// x = [[[ [1], [2], [5], [6]], -// [ [3], [4], [7], [8]], -// [ [9], [10], [13], [14]], -// [ [11], [12], [15], [16]]]] -// -// ``` -// -// Arguments: -// -// block_size: The size of the spatial block, same as in Space2Depth. -func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"block_size": block_size} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DepthToSpace", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // MapStageAttr is an optional argument to MapStage. type MapStageAttr func(optionalAttr) -- GitLab From c8f5b35dd70d103fe8ae6a41ca907ccccaa3ac39 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Wed, 21 Mar 2018 13:57:29 -0700 Subject: [PATCH 320/960] [docs] Update explicit latest_checkpoint with `path` returned while saving for object-based checkpointing. PiperOrigin-RevId: 189962437 --- tensorflow/contrib/eager/python/g3doc/guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md index b73dc17e5f..df084e9053 100644 --- a/tensorflow/contrib/eager/python/g3doc/guide.md +++ b/tensorflow/contrib/eager/python/g3doc/guide.md @@ -590,14 +590,14 @@ checkpoint = tfe.Checkpoint(x=x, y=y) # Assign new values to the variables and save. x.assign(2.) -checkpoint.save('/tmp/ckpt') +save_path = checkpoint.save('/tmp/ckpt') # Change the variable after saving. x.assign(11.) assert 16. == (x + y).numpy() # 11 + 5 # Restore the values in the checkpoint. -checkpoint.restore('/tmp/ckpt-1') +checkpoint.restore(save_path) # save_path='/tmp/ckpt-1' assert 7. == (x + y).numpy() # 2 + 5 ``` -- GitLab From 4c56dcaade7cee9f75740e2f03e30bdf3a6f93be Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 21 Mar 2018 14:12:07 -0700 Subject: [PATCH 321/960] [XLA][BF16] Add bf16 rounding function. We now use truncation to convert a F32 to BF16 by default. This CL adds a rounding method (basically a roll forward of the rounding part in cl/175252067). PiperOrigin-RevId: 189965138 --- tensorflow/core/framework/bfloat16_test.cc | 39 +++-- tensorflow/core/lib/bfloat16/bfloat16.h | 188 ++++++++++++++++++++- 2 files changed, 216 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index 17e6209f8e..206396a25a 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -37,19 +37,27 @@ float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, struct Bfloat16TestParam { float input; - float expected; + float expected_truncation; + float expected_rounding; }; class Bfloat16Test : public ::testing::Test, public ::testing::WithParamInterface {}; TEST_P(Bfloat16Test, TruncateTest) { - bfloat16 a(GetParam().input); + bfloat16 truncated(GetParam().input); if (std::isnan(GetParam().input)) { - EXPECT_TRUE(std::isnan(float(a)) || std::isinf(float(a))); + EXPECT_TRUE(std::isnan(float(truncated)) || std::isinf(float(truncated))); return; } - EXPECT_EQ(GetParam().expected, float(a)); + EXPECT_EQ(GetParam().expected_truncation, float(truncated)); + + bfloat16 rounded = bfloat16::round_to_bfloat16((GetParam().input)); + if (std::isnan(GetParam().input)) { + EXPECT_TRUE(std::isnan(float(rounded)) || std::isinf(float(rounded))); + return; + } + EXPECT_EQ(GetParam().expected_rounding, float(rounded)); } INSTANTIATE_TEST_CASE_P( @@ -57,37 +65,48 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( Bfloat16TestParam{ BinaryToFloat(0, 0b10000000, 0b1001000, 0b1111010111000011), - BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001001, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(1, 0b10000000, 0b1001000, 0b1111010111000011), - BinaryToFloat(1, 0b10000000, 0b1001000, 0b0000000000000000)}, + BinaryToFloat(1, 0b10000000, 0b1001000, 0b0000000000000000), + BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000001), - BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000000)}, + BinaryToFloat(0, 0b11111111, 0b0000000, 0b0000000000000000), + BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b11111111, 0b1111111, 0b1111111111111111), - BinaryToFloat(0, 0b11111111, 0b1111111, 0b0000000000000000)}, + BinaryToFloat(0, 0b11111111, 0b1111111, 0b0000000000000000), + BinaryToFloat(0, 0b11111111, 0b1000000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(1, 0b10000000, 0b1001000, 0b1100000000000000), - BinaryToFloat(1, 0b10000000, 0b1001000, 0b0000000000000000)}, + BinaryToFloat(1, 0b10000000, 0b1001000, 0b0000000000000000), + BinaryToFloat(1, 0b10000000, 0b1001001, 0b0000000000000000)}, Bfloat16TestParam{ + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b10000000, 0b1001000, 0b0100000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b10000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000), BinaryToFloat(0, 0b10000000, 0b1001000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b00000000, 0b1001000, 0b1000000000000000), + BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000), BinaryToFloat(0, 0b00000000, 0b1001000, 0b0000000000000000)}, Bfloat16TestParam{ BinaryToFloat(0, 0b00000000, 0b1111111, 0b1100000000000000), - BinaryToFloat(0, 0b00000000, 0b1111111, 0b0000000000000000)})); + BinaryToFloat(0, 0b00000000, 0b1111111, 0b0000000000000000), + BinaryToFloat(0, 0b00000001, 0b0000000, 0b0000000000000000)})); TEST(Bfloat16Test, Conversion) { float a[100]; diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index 075a8d1430..126e5a17af 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -165,6 +165,192 @@ struct bfloat16 { return complex128(double(*this), double(0.0)); } + union FP32 { + unsigned int u; + float f; + }; + + // Converts a float point to bfloat16, with round-nearest-to-even as rounding + // method. + // TODO(b/69266521): Add a truncate_to_bfloat16 function and make this + // function as default behavior. + // TODO: There is a slightly faster implementation (8% faster on CPU) + // than this (documented in cl/175987786), that is exponentially harder to + // understand and document. Switch to the faster version when converting to + // BF16 becomes compute-bound. + B16_DEVICE_FUNC static bfloat16 round_to_bfloat16(float v) { + uint32_t input; + FP32 f; + f.f = v; + input = f.u; + bfloat16 output; + + if (float_isnan(v)) { + // If the value is a NaN, squash it to a qNaN with msb of fraction set, + // this makes sure after truncation we don't end up with an inf. + // + // qNaN magic: All exponent bits set + most significant bit of fraction + // set. + output.value = 0x7fc0; + } else { + // Fast rounding algorithm that rounds a half value to nearest even. This + // reduces expected error when we convert a large number of floats. Here + // is how it works: + // + // Definitions: + // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits + // with the following tags: + // + // Sign | Exp (8 bits) | Frac (23 bits) + // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT + // + // S: Sign bit. + // E: Exponent bits. + // F: First 6 bits of fraction. + // L: Least significant bit of resulting bfloat16 if we truncate away the + // rest of the float32. This is also the 7th bit of fraction + // R: Rounding bit, 8th bit of fraction. + // T: Sticky bits, rest of fraction, 15 bits. + // + // To round half to nearest even, there are 3 cases where we want to round + // down (simply truncate the result of the bits away, which consists of + // rounding bit and sticky bits) and two cases where we want to round up + // (truncate then add one to the result). + // + // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of + // 1s) as the rounding bias, adds the rounding bias to the input, then + // truncates the last 16 bits away. + // + // To understand how it works, we can analyze this algorithm case by case: + // + // 1. L = 0, R = 0: + // Expect: round down, this is less than half value. + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input may create any carry, depending on + // whether there is any value set to 1 in T bits. + // - R may be set to 1 if there is a carry. + // - L remains 0. + // - Note that this case also handles Inf and -Inf, where all fraction + // bits, including L, R and Ts are all 0. The output remains Inf after + // this algorithm. + // + // 2. L = 1, R = 0: + // Expect: round down, this is less than half value. + // + // Algorithm: + // - Rounding bias: 0x7fff + 1 = 0x8000 + // - Adding rounding bias to input doesn't change sticky bits but + // adds 1 to rounding bit. + // - L remains 1. + // + // 3. L = 0, R = 1, all of T are 0: + // Expect: round down, this is exactly at half, the result is already + // even (L=0). + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input sets all sticky bits to 1, but + // doesn't create a carry. + // - R remains 1. + // - L remains 0. + // + // 4. L = 1, R = 1: + // Expect: round up, this is exactly at half, the result needs to be + // round to the next even number. + // + // Algorithm: + // - Rounding bias: 0x7fff + 1 = 0x8000 + // - Adding rounding bias to input doesn't change sticky bits, but + // creates a carry from rounding bit. + // - The carry sets L to 0, creates another carry bit and propagate + // forward to F bits. + // - If all the F bits are 1, a carry then propagates to the exponent + // bits, which then creates the minimum value with the next exponent + // value. Note that we won't have the case where exponents are all 1, + // since that's either a NaN (handled in the other if condition) or inf + // (handled in case 1). + // + // 5. L = 0, R = 1, any of T is 1: + // Expect: round up, this is greater than half. + // + // Algorithm: + // - Rounding bias: 0x7fff + 0 = 0x7fff + // - Adding rounding bias to input creates a carry from sticky bits, + // sets rounding bit to 0, then create another carry. + // - The second carry sets L to 1. + // + // Examples: + // + // Exact half value that is already even: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000 + // + // This falls into case 3. We truncate the rest of 16 bits and no + // carry is created into F and L: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 + // + // Exact half value, round to next even number: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000 + // + // This falls into case 4. We create a carry from R and T, + // which then propagates into L and F: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 + // + // + // Max denormal value round to min normal value: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111 + // + // This falls into case 4. We create a carry from R and T, + // propagate into L and F, which then propagates into exponent + // bits: + // + // Output: + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 + // + // Max normal value round to Inf: + // Input: + // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit) + // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT + // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111 + // + // This falls into case 4. We create a carry from R and T, + // propagate into L and F, which then propagates into exponent + // bits: + // + // Sign | Exp (8 bit) | Frac (first 7 bit) + // S E E E E E E E E F F F F F F L + // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 + // + // + // Least significant bit of resulting bfloat. + uint32_t lsb = (input >> 16) & 1; + uint32_t rounding_bias = 0x7fff + lsb; + input += rounding_bias; + output.value = static_cast(input >> 16); + } + return output; + } + static bfloat16 epsilon() { bfloat16 x; x.value = 0x3c00; // 0x1.0p-7 @@ -177,7 +363,7 @@ struct bfloat16 { static const uint16_t NAN_VALUE = 0x7FC0; private: - B16_DEVICE_FUNC bool float_isnan(const float& x) { + B16_DEVICE_FUNC static bool float_isnan(const float& x) { #ifdef __CUDA_ARCH__ return ::isnan(x); #else -- GitLab From c2b346538f8a651bd8adb5fa557bdfac0394c2c3 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 21 Mar 2018 14:26:14 -0700 Subject: [PATCH 322/960] Avoid taking a reference on a temporary value PiperOrigin-RevId: 189967517 --- tensorflow/core/grappler/optimizers/loop_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index bd0d94b83f..a063dc3381 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -368,7 +368,7 @@ Status LoopOptimizer::FindInvariantNodes(NodeDef* node) { bool is_invariant = true; for (const auto& input : consumer->input()) { if (!IsControlInput(input)) { - const auto& name = NodeName(input); + const string name = NodeName(input); auto* producer = node_map_->GetNode(name); if (!invariant_nodes_.count(producer)) { if (IsConstant(*producer)) { -- GitLab From a07bd80e27dd41a1b6a3f4c2e1954ae573453cda Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 14:59:06 -0700 Subject: [PATCH 323/960] Add an alternative "no_contrib" BUILD target to tensorflow/python to avoid including contrib packages PiperOrigin-RevId: 189973359 --- tensorflow/python/BUILD | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 54e944c264..079905781d 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -58,6 +58,18 @@ py_library( "//tensorflow/tools/api/generator:__pkg__", "//tensorflow/tools/quantization:__pkg__", # TODO(b/34059704): remove when fixed ], + deps = [":no_contrib"] + if_not_windows([ + "//tensorflow/contrib:contrib_py", + ]), +) + +py_library( + name = "no_contrib", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + visibility = [ + "//tensorflow:__pkg__", + ], deps = [ ":array_ops", ":bitwise_ops", @@ -86,6 +98,7 @@ py_library( ":ops", ":platform", ":pywrap_tensorflow", + ":saver_test_utils", ":script_ops", ":session_ops", ":sets", @@ -95,31 +108,28 @@ py_library( ":standard_ops", ":state_ops", ":string_ops", + ":subscribe", ":summary", ":tensor_array_ops", - ":training", - ":saver_test_utils", - ":subscribe", ":test_ops", # TODO: Break testing code out into separate rule. - ":tf_item", ":tf_cluster", + ":tf_item", ":tf_optimizer", + ":training", ":util", ":weights_broadcast_ops", - "//third_party/py/numpy", "//tensorflow/core:protos_all_py", "//tensorflow/python/data", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras", - "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", + "//tensorflow/python/ops/losses", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", - ] + if_not_windows([ - "//tensorflow/contrib:contrib_py", - ]), + "//third_party/py/numpy", + ], ) tf_py_build_info_genrule() -- GitLab From 259525189f41eaa1c08e0baa0dbac8d2cf3a15ef Mon Sep 17 00:00:00 2001 From: Joel Shor Date: Thu, 22 Mar 2018 00:11:33 +0200 Subject: [PATCH 324/960] Fix dataset resampling bug introduced by a bug in datasets itself. fixes #16606 (#17896) * Fixes github issue #16606. The core issue is that in the case of certain random Tensors, the following two lines aren't the same: ``` rand_0s_and_1s_ds = ... gather_ds = rand_0s_and_1s_ds.map(lambda i: tf.gather([0, 1], i)) tup_ds = tf.data.Dataset.zip(gather_ds, rand_0s_and_1s_ds) ``` ``` rand_0s_and_1s_ds = ... tup_ds = rand_0s_and_1s_ds.map(lambda i: (tf.gather([0, 1], i), i)) Note that this does NOT fix the underlying issue of drawing multiple sampes from the underlying distribution. ``` Tested: With the new test, bazel test :resample_test fails before and succeeds after. * Fixes github issue #16606. The core issue is that in the case of certain random Tensors, the following two lines aren't the same: ``` rand_0s_and_1s_ds = ... gather_ds = rand_0s_and_1s_ds.map(lambda i: tf.gather([0, 1], i)) tup_ds = tf.data.Dataset.zip(gather_ds, rand_0s_and_1s_ds) ``` ``` rand_0s_and_1s_ds = ... tup_ds = rand_0s_and_1s_ds.map(lambda i: (tf.gather([0, 1], i), i)) Note that this does NOT fix the underlying issue of drawing multiple sampes from the underlying distribution. ``` Tested: With the new test, bazel test :resample_test fails before and succeeds after. * Undo a spurious git-induced change. * Fix indent issue. * Fix indent issue. --- .../data/python/kernel_tests/resample_test.py | 40 +++++++++++++++++++ .../contrib/data/python/ops/resampling.py | 10 +++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 913ab9b9f8..527618b231 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -21,8 +21,11 @@ import numpy as np from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.ops import random_ops from tensorflow.python.ops import string_ops +from tensorflow.python.ops import math_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -68,6 +71,43 @@ class ResampleTest(test.TestCase): returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) + def testRandomClasses(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + num_samples = 100 # We don't need many samples to test a dirac-delta target distribution + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + dataset = dataset_ops.Dataset.from_tensor_slices(data_np) + + # Apply a random mapping that preserves the data distribution. + def _remap_fn(_): + return math_ops.cast(random_ops.random_uniform([1]) * num_classes, + dtypes.int32)[0] + dataset = dataset.map(_remap_fn) + + # Reshape distribution. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) + + get_next = dataset.make_one_shot_iterator().get_next() + + + with self.test_session() as sess: + returned = [] + with self.assertRaises(errors.OutOfRangeError): + while True: + returned.append(sess.run(get_next)) + + classes, _ = zip(*returned) + bincount = np.bincount( + np.array(classes), + minlength=num_classes).astype(np.float32) / len(classes) + + self.assertAllClose(target_dist, bincount, atol=1e-2) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index f4015f19fb..b465397437 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -101,14 +101,16 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): initial_dist_ds)) .map(maybe_warn_on_large_rejection)) - current_probabilities_ds = dataset_ops.Dataset.zip( - (acceptance_dist_ds, class_values_ds)).map(array_ops.gather) + def _gather_and_copy(class_val, acceptance_prob, data): + return (class_val, array_ops.gather(acceptance_prob, class_val), data) + current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) filtered_ds = ( - dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds, - dataset)) + current_probabilities_and_class_and_data_ds .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) + return _apply_fn -- GitLab From 9cd65e9a9081640934b2b78cf84b6e51ddd69796 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 21 Mar 2018 15:23:07 -0700 Subject: [PATCH 325/960] [TF:XLA] do not emit bfloat16 sum reductions from tf2xla bfloat16 is a storage format, not a computation format. Doing reductions in this reduced precision is prone to quickly overflow. Instead, emit a float32 computation, and wrap the reduce params and result in conversions to and from float32. PiperOrigin-RevId: 189977590 --- .../compiler/tf2xla/kernels/batch_norm_op.cc | 57 +++++------ .../compiler/tf2xla/kernels/bias_ops.cc | 13 ++- .../compiler/tf2xla/kernels/conv_ops.cc | 6 +- .../tf2xla/kernels/fake_quantize_ops.cc | 18 +++- .../compiler/tf2xla/kernels/image_ops.cc | 8 +- .../compiler/tf2xla/kernels/l2loss_op.cc | 23 ++--- tensorflow/compiler/tf2xla/kernels/lrn_ops.cc | 36 ++++--- .../compiler/tf2xla/kernels/pooling_ops.cc | 95 ++++++++++--------- .../compiler/tf2xla/kernels/reduction_ops.cc | 42 +++++--- .../compiler/tf2xla/kernels/reduction_ops.h | 9 +- .../tf2xla/kernels/reduction_ops_common.cc | 46 ++++----- .../compiler/tf2xla/kernels/scan_ops.cc | 8 +- .../compiler/tf2xla/kernels/softmax_op.cc | 51 +++++----- tensorflow/compiler/tf2xla/xla_helpers.cc | 17 ++++ tensorflow/compiler/tf2xla/xla_helpers.h | 12 +++ tensorflow/compiler/xla/literal_util.cc | 3 + tensorflow/compiler/xla/tests/convert_test.cc | 40 ++++++++ 17 files changed, 305 insertions(+), 179 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc index a249b1869f..931175be11 100644 --- a/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/batch_norm_op.cc @@ -118,30 +118,24 @@ class FusedBatchNormGradOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationBuilder* b = ctx->builder(); - - auto grad_backprop = ctx->Input(0); - auto activations = ctx->Input(1); - auto scale = ctx->Input(2); - auto mean = ctx->Input(3); - auto var = ctx->Input(4); - - TensorShape input_shape = ctx->InputShape(0); - int feature_index = - GetTensorFeatureDimIndex(input_shape.dims(), data_format_); - + xla::ComputationBuilder* const b = ctx->builder(); DataType input_dtype = ctx->input_type(0); DataType scale_dtype = ctx->input_type(2); - xla::PrimitiveType input_type; - OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(input_dtype, &input_type)); - xla::PrimitiveType scale_type; - OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(scale_dtype, &scale_type)); // TODO(b/69928690): support mixed precision in the XLA batch normalization // operators. For now, cast everything to the statistics type (which // may be more precise than the input type). - grad_backprop = b->ConvertElementType(grad_backprop, scale_type); - activations = b->ConvertElementType(activations, scale_type); + auto grad_backprop = + XlaHelpers::ConvertElementType(b, ctx->Input(0), scale_dtype); + auto activations = + XlaHelpers::ConvertElementType(b, ctx->Input(1), scale_dtype); + auto scale = ctx->Input(2); + auto mean = ctx->Input(3); + auto var = ctx->Input(4); + + const int input_dims = ctx->InputShape(0).dims(); + const int feature_index = + GetTensorFeatureDimIndex(input_dims, data_format_); xla::ComputationDataHandle x_backprop; xla::ComputationDataHandle scale_backprop; @@ -156,7 +150,7 @@ class FusedBatchNormGradOp : public XlaOpKernel { offset_backprop = b->GetTupleElement(output, 2); } else { // Reduce over all dimensions except the feature dim. - std::vector reduction_dims(input_shape.dims() - 1); + std::vector reduction_dims(input_dims - 1); std::iota(reduction_dims.begin(), reduction_dims.begin() + feature_index, 0); std::iota(reduction_dims.begin() + feature_index, reduction_dims.end(), @@ -165,9 +159,14 @@ class FusedBatchNormGradOp : public XlaOpKernel { // scale_backprop = y_backprop * ((x - pop_mean) * rsqrt(pop_var + // epsilon)) // x_backprop = y_backprop * (scale * rsqrt(pop_var + epsilon)) - offset_backprop = - b->Reduce(grad_backprop, XlaHelpers::Zero(b, scale_dtype), - *ctx->GetOrCreateAdd(scale_dtype), reduction_dims); + const DataType accumulation_type = + XlaHelpers::SumAccumulationType(scale_dtype); + auto converted = + XlaHelpers::ConvertElementType(b, grad_backprop, accumulation_type); + auto reduce = + b->Reduce(converted, XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), reduction_dims); + offset_backprop = XlaHelpers::ConvertElementType(b, reduce, scale_dtype); // scratch1 = rsqrt(pop_var + epsilon) auto neg_half = XlaHelpers::FloatLiteral(b, scale_dtype, -0.5); @@ -175,17 +174,21 @@ class FusedBatchNormGradOp : public XlaOpKernel { b->Pow(b->Add(var, b->ConstantR0(epsilon_)), neg_half); // scratch2 = sum(y_backprop * (x - mean)) - auto scratch2 = b->Reduce( - b->Mul(grad_backprop, b->Sub(activations, mean, {feature_index})), - XlaHelpers::Zero(b, scale_dtype), *ctx->GetOrCreateAdd(scale_dtype), - reduction_dims); + auto mul = + b->Mul(grad_backprop, b->Sub(activations, mean, {feature_index})); + converted = XlaHelpers::ConvertElementType(b, mul, accumulation_type); + reduce = + b->Reduce(converted, XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), reduction_dims); + auto scratch2 = XlaHelpers::ConvertElementType(b, reduce, scale_dtype); x_backprop = b->Mul(grad_backprop, b->Mul(scratch1, scale), {feature_index}); scale_backprop = b->Mul(scratch1, scratch2); } - ctx->SetOutput(0, b->ConvertElementType(x_backprop, input_type)); + ctx->SetOutput(0, + XlaHelpers::ConvertElementType(b, x_backprop, input_dtype)); ctx->SetOutput(1, scale_backprop); ctx->SetOutput(2, offset_backprop); ctx->SetConstantOutput(3, Tensor(scale_dtype, {})); diff --git a/tensorflow/compiler/tf2xla/kernels/bias_ops.cc b/tensorflow/compiler/tf2xla/kernels/bias_ops.cc index c667b4e3e3..ed33b8ed2e 100644 --- a/tensorflow/compiler/tf2xla/kernels/bias_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/bias_ops.cc @@ -103,10 +103,15 @@ class BiasAddGradOp : public XlaOpKernel { std::iota(reduce_dims.begin(), reduce_dims.begin() + feature_dim, 0); std::iota(reduce_dims.begin() + feature_dim, reduce_dims.end(), feature_dim + 1); - xla::ComputationDataHandle result = ctx->builder()->Reduce( - ctx->Input(0), XlaHelpers::Zero(ctx->builder(), input_type(0)), - *ctx->GetOrCreateAdd(input_type(0)), reduce_dims); - ctx->SetOutput(0, result); + xla::ComputationBuilder* const b = ctx->builder(); + const DataType accumulation_type = + XlaHelpers::SumAccumulationType(input_type(0)); + auto converted = + XlaHelpers::ConvertElementType(b, ctx->Input(0), accumulation_type); + auto reduce = + b->Reduce(converted, XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), reduce_dims); + ctx->SetOutput(0, XlaHelpers::ConvertElementType(b, reduce, input_type(0))); } private: diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index 81cea6d376..c0ee0c9c2e 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -58,7 +58,7 @@ xla::ComputationDataHandle CreateExpandedZero( // Create a mask for depthwise convolution that will make a normal convolution // produce the same results as a depthwise convolution. For a [2, 2, 3, 2] -// depthwise filter this returns a [2, 2, 3, 6] tesnsor +// depthwise filter this returns a [2, 2, 3, 6] tensor // 1 1 0 0 0 0 1 1 0 0 0 0 // 0 0 1 1 0 0 0 0 1 1 0 0 // 0 0 0 0 1 1 0 0 0 0 1 1 @@ -166,6 +166,10 @@ xla::ComputationDataHandle ContractFilterForDepthwiseBackprop( CreateExpandedFilterMask(filter_shape, builder), filter_backprop, CreateExpandedZero(filter_shape, dtype, builder)); return builder->Reshape( + // This reduce does not need inputs to be converted with + // XlaHelpers::SumAccumulationType() since the ExpandedFilterMask with + // ExpandedZero guarantees that only one element is non zero, so there + // cannot be accumulated precision error. builder->Reduce(masked_expanded_filter, XlaHelpers::Zero(builder, dtype), *ctx->GetOrCreateAdd(dtype), {expanded_filter_shape.dims() - 2}), diff --git a/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc b/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc index 453a32c494..99470d70e7 100644 --- a/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc @@ -247,6 +247,8 @@ class FakeQuantWithMinMaxVarsGradOp : public XlaOpKernel { const TensorShape gradient_shape = ctx->InputShape(0); xla::ComputationDataHandle input = ctx->Input(1); const DataType data_type = ctx->input_type(1); + const DataType accumulation_type = + XlaHelpers::SumAccumulationType(data_type); xla::ComputationDataHandle input_min = ctx->Input(2); xla::ComputationDataHandle input_max = ctx->Input(3); @@ -265,15 +267,23 @@ class FakeQuantWithMinMaxVarsGradOp : public XlaOpKernel { ctx->SetOutput(0, output0); xla::ComputationDataHandle below_min = b->Lt(input, nudged_input_min); + xla::ComputationDataHandle select1 = b->Select(below_min, gradient, zeroes); + xla::ComputationDataHandle reduce1 = b->ReduceAll( + XlaHelpers::ConvertElementType(b, select1, accumulation_type), + XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type)); xla::ComputationDataHandle output1 = - b->ReduceAll(b->Select(below_min, gradient, zeroes), zero, - *ctx->GetOrCreateAdd(data_type)); + XlaHelpers::ConvertElementType(b, reduce1, data_type); ctx->SetOutput(1, output1); xla::ComputationDataHandle above_max = b->Gt(input, nudged_input_max); + xla::ComputationDataHandle select2 = b->Select(above_max, gradient, zeroes); + xla::ComputationDataHandle reduce2 = b->ReduceAll( + XlaHelpers::ConvertElementType(b, select2, accumulation_type), + XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type)); xla::ComputationDataHandle output2 = - b->ReduceAll(b->Select(above_max, gradient, zeroes), zero, - *ctx->GetOrCreateAdd(data_type)); + XlaHelpers::ConvertElementType(b, reduce2, data_type); ctx->SetOutput(2, output2); } diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc index f22f384256..5eeda79a93 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc @@ -180,9 +180,13 @@ class AdjustContrastOpV2 : public XlaOpKernel { DataType type = context->input_type(0); - auto output = b->Reduce(input, /*init_value=*/XlaHelpers::Zero(b, type), - /*computation=*/*context->GetOrCreateAdd(type), + const DataType accumulation_type = XlaHelpers::SumAccumulationType(type); + auto converted = + XlaHelpers::ConvertElementType(b, input, accumulation_type); + auto reduce = b->Reduce(converted, XlaHelpers::Zero(b, accumulation_type), + *context->GetOrCreateAdd(accumulation_type), {height_dim, width_dim}); + auto output = XlaHelpers::ConvertElementType(b, reduce, type); output = b->Div(output, XlaHelpers::FloatLiteral(b, type, height * width)); std::vector broadcast_dims(input_shape.dims() - 2); diff --git a/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc b/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc index d096415087..c177f08d9c 100644 --- a/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/l2loss_op.cc @@ -29,21 +29,22 @@ class L2LossOp : public XlaOpKernel { explicit L2LossOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { - const TensorShape input_shape = ctx->InputShape(0); + std::vector dims(ctx->InputShape(0).dims()); + std::iota(dims.begin(), dims.end(), 0); DataType dtype = ctx->input_type(0); - xla::ComputationBuilder* b = ctx->builder(); - - auto zero = XlaHelpers::Zero(b, dtype); - auto two = XlaHelpers::IntegerLiteral(b, dtype, 2); - const xla::Computation& add = *ctx->GetOrCreateAdd(dtype); - - std::vector dims(input_shape.dims()); - std::iota(dims.begin(), dims.end(), 0); + xla::ComputationBuilder* const b = ctx->builder(); // output = sum(t ** 2) / 2 - auto x = ctx->Input(0); - ctx->SetOutput(0, b->Div(b->Reduce(b->Mul(x, x), zero, add, dims), two)); + const DataType accumulation_type = XlaHelpers::SumAccumulationType(dtype); + auto t = + XlaHelpers::ConvertElementType(b, ctx->Input(0), accumulation_type); + auto square = b->Mul(t, t); + auto reduce = b->Reduce(square, XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), dims); + auto deconverted = XlaHelpers::ConvertElementType(b, reduce, dtype); + auto two = XlaHelpers::IntegerLiteral(b, dtype, 2); + ctx->SetOutput(0, b->Div(deconverted, two)); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc b/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc index 759d1a1a2d..1cfee3070f 100644 --- a/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/lrn_ops.cc @@ -47,12 +47,17 @@ class LRNOp : public XlaOpKernel { // We use a window of depth_radius_ * 2 + 1, to account for the current // element and a depth_radius_ on either side. - auto squared = builder->Mul(input, input); - auto sqr_sum = builder->ReduceWindow( - squared, XlaHelpers::Zero(builder, input_type(0)), - *ctx->GetOrCreateAdd(input_type(0)), + auto accumulation_type = XlaHelpers::SumAccumulationType(input_type(0)); + auto converted = + XlaHelpers::ConvertElementType(builder, input, accumulation_type); + auto squared = builder->Mul(converted, converted); + auto reduce = builder->ReduceWindow( + squared, XlaHelpers::Zero(builder, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), /* window_dimensions = */ {1, 1, 1, depth_radius_ * 2 + 1}, /* window_strides = */ {1, 1, 1, 1}, xla::Padding::kSame); + auto sqr_sum = + XlaHelpers::ConvertElementType(builder, reduce, input_type(0)); auto scale = builder->Pow( builder->Add(builder->ConstantR0(bias_), @@ -130,12 +135,17 @@ class LRNGradOp : public XlaOpKernel { // dyi *= out_grads[j] // grads[k] += dyi - auto squared = builder->Mul(in_image, in_image); - auto sqr_sum = builder->ReduceWindow( - squared, XlaHelpers::Zero(builder, input_type(0)), - *ctx->GetOrCreateAdd(input_type(0)), + auto accumulation_type = XlaHelpers::SumAccumulationType(input_type(0)); + auto converted = + XlaHelpers::ConvertElementType(builder, in_image, accumulation_type); + auto squared = builder->Mul(converted, converted); + auto reduce = builder->ReduceWindow( + squared, XlaHelpers::Zero(builder, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), /* window_dimensions = */ {1, 1, 1, depth_radius_ * 2 + 1}, /* window_strides = */ {1, 1, 1, 1}, xla::Padding::kSame); + auto sqr_sum = + XlaHelpers::ConvertElementType(builder, reduce, input_type(0)); auto norm = builder->Add(builder->ConstantR0(bias_), @@ -146,11 +156,15 @@ class LRNGradOp : public XlaOpKernel { builder->Div(out_image, norm)), in_grads); - auto dy_reduced = builder->ReduceWindow( - dy, XlaHelpers::Zero(builder, input_type(0)), - *ctx->GetOrCreateAdd(input_type(0)), + auto converted_dy = + XlaHelpers::ConvertElementType(builder, dy, accumulation_type); + auto dy_reduce = builder->ReduceWindow( + converted_dy, XlaHelpers::Zero(builder, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), /* window_dimensions = */ {1, 1, 1, depth_radius_ * 2 + 1}, /* window_strides = */ {1, 1, 1, 1}, xla::Padding::kSame); + auto dy_reduced = + XlaHelpers::ConvertElementType(builder, dy_reduce, input_type(0)); xla::ComputationDataHandle gradients = builder->Add( builder->Mul(in_image, dy_reduced), diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc index 086a9491aa..5f635dd1bc 100644 --- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc @@ -35,8 +35,11 @@ namespace { // Superclass of pooling ops. class PoolingOp : public XlaOpKernel { public: - PoolingOp(OpKernelConstruction* ctx, int num_spatial_dims) - : XlaOpKernel(ctx), num_spatial_dims_(num_spatial_dims) { + PoolingOp(OpKernelConstruction* ctx, int num_spatial_dims, + const DataType reduction_type) + : XlaOpKernel(ctx), + num_spatial_dims_(num_spatial_dims), + reduction_type_(reduction_type) { if (ctx->num_inputs() == 1) { std::vector ksize_int; std::vector stride_int; @@ -63,12 +66,10 @@ class PoolingOp : public XlaOpKernel { int num_dims() const { return num_spatial_dims_ + 2; } // Method that builds an initial value to use in reductions. - virtual xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b, - DataType data_type) = 0; + virtual xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b) = 0; // The reduction operation to apply to each window. - virtual const xla::Computation* Reduction(XlaOpKernelContext* ctx, - DataType dtype) = 0; + virtual const xla::Computation* Reduction(XlaOpKernelContext* ctx) = 0; // A post-processing operation to apply on the outputs of the ReduceWindow. virtual xla::ComputationDataHandle PostProcessOutput( @@ -76,9 +77,6 @@ class PoolingOp : public XlaOpKernel { DataType dtype, const TensorShape& input_shape) = 0; void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle input = ctx->Input(0); - const TensorShape input_shape = ctx->InputShape(0); - std::vector ksize = ksize_; std::vector stride = stride_; if (ctx->num_inputs() != 1) { @@ -106,16 +104,20 @@ class PoolingOp : public XlaOpKernel { stride.clear(); OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(2, &stride)); } + const TensorShape input_shape = ctx->InputShape(0); OP_REQUIRES(ctx, input_shape.dims() == num_dims(), errors::InvalidArgument("Input to ", type_string(), " operator must have ", num_dims(), " dimensions")); - const DataType type = input_type(0); - xla::ComputationDataHandle pooled = ctx->builder()->ReduceWindow( - input, InitValue(ctx->builder(), type), *Reduction(ctx, type), ksize, - stride, padding_); - ctx->SetOutput(0, PostProcessOutput(ctx, pooled, type, input_shape)); + xla::ComputationBuilder* const b = ctx->builder(); + auto input = + XlaHelpers::ConvertElementType(b, ctx->Input(0), reduction_type_); + auto reduce = ctx->builder()->ReduceWindow( + input, InitValue(b), *Reduction(ctx), ksize, stride, padding_); + auto pooled = XlaHelpers::ConvertElementType(b, reduce, input_type(0)); + ctx->SetOutput(0, + PostProcessOutput(ctx, pooled, input_type(0), input_shape)); } protected: @@ -124,21 +126,21 @@ class PoolingOp : public XlaOpKernel { std::vector stride_; xla::Padding padding_; TensorFormat data_format_ = FORMAT_NHWC; + DataType reduction_type_; }; class MaxPoolOp : public PoolingOp { public: MaxPoolOp(OpKernelConstruction* ctx, int num_spatial_dims) - : PoolingOp(ctx, /*num_spatial_dims=*/num_spatial_dims) {} + : PoolingOp(ctx, /*num_spatial_dims=*/num_spatial_dims, + /*reduction_type=*/ctx->input_type(0)) {} - xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b, - DataType data_type) override { - return XlaHelpers::MinValue(b, data_type); + xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b) override { + return XlaHelpers::MinValue(b, reduction_type_); } - const xla::Computation* Reduction(XlaOpKernelContext* ctx, - DataType dtype) override { - return ctx->GetOrCreateMax(dtype); + const xla::Computation* Reduction(XlaOpKernelContext* ctx) override { + return ctx->GetOrCreateMax(reduction_type_); } xla::ComputationDataHandle PostProcessOutput( @@ -209,15 +211,17 @@ static xla::ComputationDataHandle AvgPoolDivideByCount( } // Build a matrix of all 1s, with the same width/height as the input. + const DataType accumulation_type = XlaHelpers::SumAccumulationType(dtype); auto ones = ctx->builder()->Broadcast( - XlaHelpers::One(ctx->builder(), dtype), input_dim_sizes); + XlaHelpers::One(ctx->builder(), accumulation_type), input_dim_sizes); // Perform a ReduceWindow with the same window size, strides, and padding // to count the number of contributions to each result element. - auto counts = ctx->builder()->ReduceWindow( - ones, XlaHelpers::Zero(ctx->builder(), dtype), - *ctx->GetOrCreateAdd(dtype), window_ksize, window_stride, + auto reduce = ctx->builder()->ReduceWindow( + ones, XlaHelpers::Zero(ctx->builder(), accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), window_ksize, window_stride, xla::Padding::kSame); + auto counts = XlaHelpers::ConvertElementType(ctx->builder(), reduce, dtype); return ctx->builder()->Div(output, counts, window_dims); } @@ -226,16 +230,16 @@ static xla::ComputationDataHandle AvgPoolDivideByCount( class AvgPoolOp : public PoolingOp { public: AvgPoolOp(OpKernelConstruction* ctx, int num_spatial_dims) - : PoolingOp(ctx, num_spatial_dims) {} + : PoolingOp(ctx, /*num_spatial_dims=*/num_spatial_dims, + /*reduction_type=*/ + XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} - xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b, - DataType data_type) override { - return XlaHelpers::Zero(b, data_type); + xla::ComputationDataHandle InitValue(xla::ComputationBuilder* b) override { + return XlaHelpers::Zero(b, reduction_type_); } - const xla::Computation* Reduction(XlaOpKernelContext* ctx, - DataType dtype) override { - return ctx->GetOrCreateAdd(dtype); + const xla::Computation* Reduction(XlaOpKernelContext* ctx) override { + return ctx->GetOrCreateAdd(reduction_type_); } xla::ComputationDataHandle PostProcessOutput( @@ -455,14 +459,12 @@ class AvgPoolGradOp : public XlaOpKernel { gradients_shape, filter_shape, out_backprop_shape, stride_, padding_, data_format_, &dims)); + // The input gradients are computed by a convolution of the output gradients + // and the filter, with some appropriate padding. See the comment at the top + // of conv_grad_ops.h for details. + xla::ComputationBuilder* const b = ctx->builder(); auto out_backprop = ctx->Input(1); - - // The input gradients are computed by a convolution of the output - // gradients - // and the filter, with some appropriate padding. See the comment at - // the top of conv_grad_ops.h for details. - DataType dtype = input_type(1); - + auto dtype = input_type(1); xla::Padding xla_padding = (padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame; @@ -483,17 +485,18 @@ class AvgPoolGradOp : public XlaOpKernel { padding->set_interior_padding(dims.spatial_dims[i].stride - 1); } - auto zero = XlaHelpers::Zero(ctx->builder(), dtype); - auto padded_gradients = - ctx->builder()->Pad(out_backprop_div, zero, padding_config); + auto zero = XlaHelpers::Zero(b, dtype); + auto padded_gradients = b->Pad(out_backprop_div, zero, padding_config); // in_backprop = padded_gradients ones std::vector ones(num_dims(), 1LL); - xla::ComputationDataHandle in_backprop = ctx->builder()->ReduceWindow( - padded_gradients, zero, *ctx->GetOrCreateAdd(dtype), ksize_, + auto accumulation_type = XlaHelpers::SumAccumulationType(dtype); + auto in_backprop = b->ReduceWindow( + XlaHelpers::ConvertElementType(b, padded_gradients, accumulation_type), + XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), ksize_, /* window_strides=*/ones, xla::Padding::kValid); - - ctx->SetOutput(0, in_backprop); + ctx->SetOutput(0, XlaHelpers::ConvertElementType(b, in_backprop, dtype)); } protected: diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc index 03b13b2924..812d258cd1 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc @@ -27,7 +27,13 @@ namespace { class SumOp : public XlaReductionOp { public: - explicit SumOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit SumOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, + XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} + xla::ComputationDataHandle InitialValue( + xla::ComputationBuilder* builder) override { + return XlaHelpers::Zero(builder, reduction_type_); + } void BuildReducer(xla::ComputationBuilder* builder, const xla::ComputationDataHandle& scalar_lhs, const xla::ComputationDataHandle& scalar_rhs) override { @@ -39,11 +45,13 @@ REGISTER_XLA_OP(Name("Sum").CompileTimeConstInput("reduction_indices"), SumOp); class ProdOp : public XlaReductionOp { public: - explicit ProdOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit ProdOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, + XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} xla::ComputationDataHandle InitialValue( xla::ComputationBuilder* builder) override { - return XlaHelpers::One(builder, input_type(0)); + return XlaHelpers::One(builder, reduction_type_); } void BuildReducer(xla::ComputationBuilder* builder, @@ -58,13 +66,12 @@ REGISTER_XLA_OP(Name("Prod").CompileTimeConstInput("reduction_indices"), class MinOp : public XlaReductionOp { public: - explicit MinOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit MinOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, ctx->input_type(0)) {} xla::ComputationDataHandle InitialValue( xla::ComputationBuilder* builder) override { - xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(input_type(0), &type)); - return builder->ConstantLiteral(xla::Literal::MaxValue(type)); + return XlaHelpers::MaxValue(builder, reduction_type_); } void BuildReducer(xla::ComputationBuilder* builder, @@ -78,13 +85,12 @@ REGISTER_XLA_OP(Name("Min").CompileTimeConstInput("reduction_indices"), MinOp); class MaxOp : public XlaReductionOp { public: - explicit MaxOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit MaxOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, ctx->input_type(0)) {} xla::ComputationDataHandle InitialValue( xla::ComputationBuilder* builder) override { - xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(input_type(0), &type)); - return builder->ConstantLiteral(xla::Literal::MinValue(type)); + return XlaHelpers::MinValue(builder, reduction_type_); } void BuildReducer(xla::ComputationBuilder* builder, @@ -98,8 +104,14 @@ REGISTER_XLA_OP(Name("Max").CompileTimeConstInput("reduction_indices"), MaxOp); class MeanOp : public XlaReductionOp { public: - explicit MeanOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit MeanOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, + XlaHelpers::SumAccumulationType(ctx->input_type(0))) {} + xla::ComputationDataHandle InitialValue( + xla::ComputationBuilder* builder) override { + return XlaHelpers::Zero(builder, reduction_type_); + } void BuildReducer(xla::ComputationBuilder* builder, const xla::ComputationDataHandle& scalar_lhs, const xla::ComputationDataHandle& scalar_rhs) override { @@ -121,7 +133,8 @@ REGISTER_XLA_OP(Name("Mean").CompileTimeConstInput("reduction_indices"), class AllOp : public XlaReductionOp { public: - explicit AllOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit AllOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, ctx->input_type(0)) {} xla::ComputationDataHandle InitialValue( xla::ComputationBuilder* builder) override { @@ -139,7 +152,8 @@ REGISTER_XLA_OP(Name("All").CompileTimeConstInput("reduction_indices"), AllOp); class AnyOp : public XlaReductionOp { public: - explicit AnyOp(OpKernelConstruction* ctx) : XlaReductionOp(ctx) {} + explicit AnyOp(OpKernelConstruction* ctx) + : XlaReductionOp(ctx, ctx->input_type(0)) {} xla::ComputationDataHandle InitialValue( xla::ComputationBuilder* builder) override { diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops.h b/tensorflow/compiler/tf2xla/kernels/reduction_ops.h index 9aca6d8fed..f3181f0dad 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops.h +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops.h @@ -33,12 +33,12 @@ namespace tensorflow { // xla::ComputationBuilder. class XlaReductionOp : public XlaOpKernel { public: - explicit XlaReductionOp(OpKernelConstruction* ctx); + XlaReductionOp(OpKernelConstruction* ctx, DataType reduction_type); ~XlaReductionOp() override {} - // Return the base case for the reduction. Defaults to zero. + // Return the base case for the reduction. virtual xla::ComputationDataHandle InitialValue( - xla::ComputationBuilder* builder); + xla::ComputationBuilder* builder) = 0; // Implement the (scalar,scalar)->scalar lambda that should be // applied to each pair of elements to be reduced. The desired @@ -63,6 +63,9 @@ class XlaReductionOp : public XlaOpKernel { private: // True if the number of dimensions should be maintained. bool keep_dims_; + + protected: + DataType reduction_type_; }; } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc index 4b5d09eb9f..64fe765ae9 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops_common.cc @@ -24,19 +24,15 @@ limitations under the License. namespace tensorflow { -XlaReductionOp::XlaReductionOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { +XlaReductionOp::XlaReductionOp(OpKernelConstruction* ctx, + DataType reduction_type) + : XlaOpKernel(ctx), reduction_type_(reduction_type) { const DataType dt = BaseType(input_type(0)); OP_REQUIRES_OK(ctx, ctx->MatchSignature({dt, DT_INT32}, {dt})); OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_)); } -// Return the base case for the reduction. Defaults to zero. -xla::ComputationDataHandle XlaReductionOp::InitialValue( - xla::ComputationBuilder* builder) { - return XlaHelpers::Zero(builder, input_type(0)); -} - // Unless BuildFinalizer is overridden the reduction has no // finalizer. xla::ComputationDataHandle XlaReductionOp::BuildFinalizer( @@ -100,36 +96,26 @@ void XlaReductionOp::Compile(XlaOpKernelContext* ctx) { string desc = ctx->op_kernel().name(); - // Call virtual method to get the initial value. - const xla::ComputationDataHandle initial = InitialValue(ctx->builder()); + xla::ComputationBuilder* const b = ctx->builder(); // Construct the builder for the reduction lambda. - xla::ComputationBuilder r(ctx->builder()->client(), - strings::StrCat(desc, "-reduction")); + xla::ComputationBuilder r(b->client(), strings::StrCat(desc, "-reduction")); xla::PrimitiveType type; - TF_CHECK_OK(DataTypeToPrimitiveType(input_type(0), &type)); - // Make two scalar parameters of the desired type for the lambda. - xla::ComputationDataHandle rx = - r.Parameter(0, xla::ShapeUtil::MakeShape(type, {}), "x"); - xla::ComputationDataHandle ry = - r.Parameter(1, xla::ShapeUtil::MakeShape(type, {}), "y"); - - auto data = ctx->Input(0); + TF_CHECK_OK(DataTypeToPrimitiveType(reduction_type_, &type)); + auto data = b->ConvertElementType(ctx->Input(0), type); + // Call virtual method to get the initial value. + auto initial = b->ConvertElementType(InitialValue(b), type); + // Make two scalar parameters of the desired type for the lambda. + auto rx = r.Parameter(0, xla::ShapeUtil::MakeShape(type, {}), "x"); + auto ry = r.Parameter(1, xla::ShapeUtil::MakeShape(type, {}), "y"); // Call virtual method to build the reduction lambda. BuildReducer(&r, rx, ry); xla::Computation reduction_computation = r.Build().ConsumeValueOrDie(); - xla::ComputationDataHandle reduce = - ctx->builder()->Reduce(data, initial, reduction_computation, xla_axes); - xla::ComputationDataHandle finalized = - BuildFinalizer(ctx->builder(), reduce, num_elements_reduced); - - xla::ComputationDataHandle result; - if (keep_dims_) { - result = ctx->builder()->Reshape(finalized, final_shape); - } else { - result = finalized; - } + auto reduce = b->Reduce(data, initial, reduction_computation, xla_axes); + auto deconverted = XlaHelpers::ConvertElementType(b, reduce, input_type(0)); + auto finalized = BuildFinalizer(b, deconverted, num_elements_reduced); + auto result = keep_dims_ ? b->Reshape(finalized, final_shape) : finalized; ctx->SetOutput(0, result); } diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc index ee4a94164c..4cfa28a0ce 100644 --- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -66,7 +66,7 @@ class ScanOp : public XlaOpKernel { -input_shape.dims(), ", ", input_shape.dims(), "), but got ", axis)); - DataType dtype = ctx->input_type(0); + DataType dtype = XlaHelpers::SumAccumulationType(ctx->input_type(0)); if (input_shape.num_elements() == 0) { // Exit early if there is nothing to compute. @@ -91,7 +91,6 @@ class ScanOp : public XlaOpKernel { std::swap(padding[axis].first, padding[axis].second); } - xla::ComputationDataHandle input = ctx->Input(0); xla::ComputationDataHandle init; const xla::Computation* reducer; if (sum_) { @@ -102,7 +101,10 @@ class ScanOp : public XlaOpKernel { reducer = ctx->GetOrCreateMul(dtype); } auto output = builder->ReduceWindowWithGeneralPadding( - ctx->Input(0), init, *reducer, window_dims, window_strides, padding); + XlaHelpers::ConvertElementType(builder, ctx->Input(0), dtype), init, + *reducer, window_dims, window_strides, padding); + output = + XlaHelpers::ConvertElementType(builder, output, ctx->input_type(0)); // In exclusive mode, we have computed an extra element containing the sum // of all the input elements. Slice off this extra "last" element. diff --git a/tensorflow/compiler/tf2xla/kernels/softmax_op.cc b/tensorflow/compiler/tf2xla/kernels/softmax_op.cc index 750a4c2dec..aa47cb799f 100644 --- a/tensorflow/compiler/tf2xla/kernels/softmax_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/softmax_op.cc @@ -42,9 +42,8 @@ class SoftmaxOp : public XlaOpKernel { const DataType type = input_type(0); auto logits = ctx->Input(0); - xla::ComputationBuilder* b = ctx->builder(); + xla::ComputationBuilder* const b = ctx->builder(); const xla::Computation& max_func = *ctx->GetOrCreateMax(type); - const xla::Computation& add_func = *ctx->GetOrCreateAdd(type); // Find the max in each batch, resulting in a tensor of shape [batch] auto logits_max = @@ -52,21 +51,20 @@ class SoftmaxOp : public XlaOpKernel { // Subtract the max in batch b from every element in batch b. Broadcasts // along the batch dimension. auto shifted_logits = b->Sub(logits, logits_max, {kBatchDim}); - xla::ComputationDataHandle softmax; - if (log_) { - // softmax = shifted_logits - log(sum(exp(shifted_logits))) - auto log_sum_exp = - b->Log(b->Reduce(b->Exp(shifted_logits), XlaHelpers::Zero(b, type), - add_func, {kClassDim})); - softmax = b->Sub(shifted_logits, log_sum_exp, {kBatchDim}); - } else { - // softmax = exp(shifted_logits) / sum(exp(shifted_logits)) - auto exp_shifted = b->Exp(shifted_logits); - auto sum_exp = b->Reduce(exp_shifted, XlaHelpers::Zero(b, type), add_func, - {kClassDim}); - softmax = b->Div(exp_shifted, sum_exp, {kBatchDim}); - } - + auto exp_shifted = b->Exp(shifted_logits); + const DataType accumulation_type = XlaHelpers::SumAccumulationType(type); + auto converted = + XlaHelpers::ConvertElementType(b, exp_shifted, accumulation_type); + auto reduce = + b->Reduce(converted, XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), {kClassDim}); + auto sum = XlaHelpers::ConvertElementType(b, reduce, type); + auto softmax = + log_ + // softmax = shifted_logits - log(sum(exp(shifted_logits))) + ? b->Sub(shifted_logits, b->Log(sum), {kBatchDim}) + // softmax = exp(shifted_logits) / sum(exp(shifted_logits)) + : b->Div(exp_shifted, sum, {kBatchDim}); ctx->SetOutput(0, softmax); } @@ -82,7 +80,6 @@ CrossEntropyWithLogits(XlaOpKernelContext* ctx, DataType type, const xla::ComputationDataHandle& logits, const xla::ComputationDataHandle& labels) { const xla::Computation& max_func = *ctx->GetOrCreateMax(type); - const xla::Computation& add_func = *ctx->GetOrCreateAdd(type); const int kBatchDim = 0; const int kClassDim = 1; @@ -100,8 +97,12 @@ CrossEntropyWithLogits(XlaOpKernelContext* ctx, DataType type, auto exp_shifted_logits = b->Exp(shifted_logits); // sum_{class} (exp(logits - max_logits)) - auto sum_exp = b->Reduce(exp_shifted_logits, XlaHelpers::Zero(b, type), - add_func, {kClassDim}); + const DataType accumulation_type = XlaHelpers::SumAccumulationType(type); + auto converted = + XlaHelpers::ConvertElementType(b, exp_shifted_logits, accumulation_type); + auto reduce = b->Reduce(converted, XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), {kClassDim}); + auto sum_exp = XlaHelpers::ConvertElementType(b, reduce, type); // log(sum(exp(logits - max_logits))) auto log_sum_exp = b->Log(sum_exp); @@ -110,9 +111,13 @@ CrossEntropyWithLogits(XlaOpKernelContext* ctx, DataType type, // ((logits - max_logits) - log(sum(exp(logits - max_logits))))) // along classes // (The subtraction broadcasts along the batch dimension.) - xla::ComputationDataHandle loss = b->Reduce( - b->Mul(b->Neg(labels), b->Sub(shifted_logits, log_sum_exp, {kBatchDim})), - XlaHelpers::Zero(b, type), add_func, {kClassDim}); + auto sub = b->Sub(shifted_logits, log_sum_exp, {kBatchDim}); + auto mul = b->Mul(b->Neg(labels), sub); + auto sum = + b->Reduce(XlaHelpers::ConvertElementType(b, mul, accumulation_type), + XlaHelpers::Zero(b, accumulation_type), + *ctx->GetOrCreateAdd(accumulation_type), {kClassDim}); + auto loss = XlaHelpers::ConvertElementType(b, sum, type); // backprop: prob - labels, where // prob = exp(logits - max_logits) / sum(exp(logits - max_logits)) diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index f048662953..3b0b2f06eb 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/array_slice.h" namespace tensorflow { @@ -273,4 +274,20 @@ Status XlaHelpers::OneHot(xla::ComputationBuilder* builder, int64 depth, return Status::OK(); } +DataType XlaHelpers::SumAccumulationType(const DataType& dtype) { + if (dtype == DT_BFLOAT16) { + return DT_FLOAT; + } + return dtype; +} + +xla::ComputationDataHandle XlaHelpers::ConvertElementType( + xla::ComputationBuilder* const builder, + const xla::ComputationDataHandle& operand, + const DataType new_element_type) { + xla::PrimitiveType convert_to; + TF_CHECK_OK(DataTypeToPrimitiveType(new_element_type, &convert_to)); + return builder->ConvertElementType(operand, convert_to); +} + } // end namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_helpers.h b/tensorflow/compiler/tf2xla/xla_helpers.h index 2a027db4c8..68ab93b64a 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.h +++ b/tensorflow/compiler/tf2xla/xla_helpers.h @@ -107,6 +107,18 @@ class XlaHelpers { const xla::ComputationDataHandle& on_value, const xla::ComputationDataHandle& off_value, xla::ComputationDataHandle* one_hot); + + // Certain DataTypes should use increased precision DataTypes when performing + // reductions. This function remaps a given DataType to a higher precision + // DataType if needed. + static DataType SumAccumulationType(const DataType& dtype); + + // A helper for creating a ConvertElementType xla op given a DataType rather + // than the xla::PrimitiveType. + static xla::ComputationDataHandle ConvertElementType( + xla::ComputationBuilder* const builder, + const xla::ComputationDataHandle& operand, + const DataType new_element_type); }; } // end namespace tensorflow diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 20508edaa7..214c2030cd 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -1463,6 +1463,9 @@ StatusOr> ConvertIfDestTypeMatches( StatusOr> Literal::Convert( PrimitiveType primitive_dest_type) const { TF_RET_CHECK(ShapeUtil::IsArray(shape())); + if (shape().element_type() == primitive_dest_type) { + return CloneToUnique(); + } switch (shape().element_type()) { #define CONVERT_IF_DEST_TYPE_MATCHES(type) \ case (type): \ diff --git a/tensorflow/compiler/xla/tests/convert_test.cc b/tensorflow/compiler/xla/tests/convert_test.cc index 7926767a4f..9a899b7914 100644 --- a/tensorflow/compiler/xla/tests/convert_test.cc +++ b/tensorflow/compiler/xla/tests/convert_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/casts.h" +#include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -384,5 +385,44 @@ XLA_TEST_F(ConvertTest, ConvertR1F32ToR1F16) { ComputeAndCompareR1(&builder, expected_output, {dot_lhs_handle.get()}); } + +XLA_TEST_F(ConvertTest, ConvertC64ToC64) { + ComputationBuilder builder(client_, TestName()); + std::vector x = {{42.0f, 64.0f}}; + builder.ConvertElementType(builder.ConstantR1(x), C64); + ComputeAndCompareR1(&builder, x, {}, ErrorSpec(0.0001)); +} + +XLA_TEST_F(ConvertTest, ConvertS64S64) { + ComputationBuilder builder(client_, TestName()); + std::vector x = {{-42, 64}}; + builder.ConvertElementType(builder.ConstantR1(x), S64); + ComputeAndCompareR1(&builder, x, {}); +} + +XLA_TEST_F(ConvertTest, ConvertU64U64) { + ComputationBuilder builder(client_, TestName()); + std::vector x = {{42, 64}}; + builder.ConvertElementType(builder.ConstantR1(x), U64); + ComputeAndCompareR1(&builder, x, {}); +} + +XLA_TEST_F(ConvertTest, ConvertU64S64) { + ComputationBuilder builder(client_, TestName()); + std::vector unsigned_x = {{42, UINT64_MAX}}; + builder.ConvertElementType(builder.ConstantR1(unsigned_x), S64); + std::vector signed_x = {{42, -1}}; + ComputeAndCompareR1(&builder, signed_x, {}); +} + +XLA_TEST_F(ConvertTest, ConvertS64U64) { + ComputationBuilder builder(client_, TestName()); + std::vector signed_x = {{42, -1, INT64_MIN}}; + builder.ConvertElementType(builder.ConstantR1(signed_x), U64); + std::vector unsigned_x = { + {42, UINT64_MAX, tensorflow::MathUtil::IPow(2, 63)}}; + ComputeAndCompareR1(&builder, unsigned_x, {}); +} + } // namespace } // namespace xla -- GitLab From 942a32bc71291994c14625b6311268319dd27808 Mon Sep 17 00:00:00 2001 From: James Qin Date: Wed, 21 Mar 2018 15:55:30 -0700 Subject: [PATCH 326/960] Change Softmax on CUDA to use fp32 for denominator when input/output are fp16. This avoids potential overflow in the denominator, also makes sure accumulation is done in high precision. PiperOrigin-RevId: 189982655 --- tensorflow/core/kernels/softmax_op_gpu.cu.cc | 90 ++++++++++++++----- tensorflow/python/framework/test_util.py | 75 +++++++++------- tensorflow/python/kernel_tests/BUILD | 2 +- .../python/kernel_tests/softmax_op_test.py | 38 ++++++-- 4 files changed, 145 insertions(+), 60 deletions(-) diff --git a/tensorflow/core/kernels/softmax_op_gpu.cu.cc b/tensorflow/core/kernels/softmax_op_gpu.cu.cc index 1f4a82a733..130d693dbd 100644 --- a/tensorflow/core/kernels/softmax_op_gpu.cu.cc +++ b/tensorflow/core/kernels/softmax_op_gpu.cu.cc @@ -33,8 +33,42 @@ namespace tensorflow { namespace { +template +__device__ __host__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t); + +template +__device__ __host__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t) { + return t; +} + +template <> +__device__ __host__ EIGEN_STRONG_INLINE float strict_cast( + Eigen::half t) { + return functor::HalfToFloat()(t); +} + +template <> +__device__ __host__ EIGEN_STRONG_INLINE Eigen::half +strict_cast(float t) { + return functor::FloatToHalf()(t); +} + template -__global__ void GenerateNormalizedProb(const T* logits, const T* sum_probs, +struct softmax_traits { + using accumulator_type = T; +}; + +template <> +struct softmax_traits { + using accumulator_type = float; +}; + +template +__global__ void GenerateNormalizedProb(const T* logits, const U* sum_probs, const T* max_logits, T* output, const int num_rows, const int num_cols, const bool in_log_space) { @@ -43,25 +77,33 @@ __global__ void GenerateNormalizedProb(const T* logits, const T* sum_probs, const int row = tid / num_cols; const int col = tid % num_cols; + // TODO(jamesqin): change to half2 load when inputs are Eigen::half. + U input = strict_cast(logits[tid]); + U max_val = strict_cast(ldg(max_logits + row)); + U result; + if (row < num_rows && col < num_cols) { - if (in_log_space) - output[tid] = - logits[tid] - ldg(max_logits + row) - log(ldg(sum_probs + row)); - else - output[tid] = - exp(logits[tid] - ldg(max_logits + row)) / ldg(sum_probs + row); + if (in_log_space) { + result = input - max_val - log(ldg(sum_probs + row)); + } else { + result = exp(input - max_val) / ldg(sum_probs + row); + } + output[tid] = strict_cast(result); } } -template +template struct SubtractAndExpFunctor { __host__ __device__ SubtractAndExpFunctor(const T* logits, const T* max_logits, const int num_cols) : logits_(logits), max_logits_(max_logits), num_cols_(num_cols) {} - __host__ __device__ T operator()(const int gid) const { - return exp(logits_[gid] - ldg(max_logits_ + gid / num_cols_)); + __host__ __device__ U operator()(const int gid) const { + // TODO(jamesqin): change to half2 load when inputs are Eigen::half. + const U diff = + strict_cast(logits_[gid] - ldg(max_logits_ + gid / num_cols_)); + return exp(diff); } const T* logits_; @@ -80,7 +122,6 @@ void DoRowReduction(OpKernelContext* context, T* output, InputIter input, functor::ReduceImpl( context, output, input, 2, rows, cols, 1, 1, constants.kOne, op); } - } // namespace template @@ -108,8 +149,10 @@ class SoftmaxOpGPU : public OpKernel { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::value, softmax_out->shape(), &max_logits)); + + typedef typename softmax_traits::accumulator_type acc_type; OP_REQUIRES_OK(context, - context->allocate_temp(DataTypeToEnum::value, + context->allocate_temp(DataTypeToEnum::value, softmax_out->shape(), &sum_probs)); DoRowReduction( @@ -120,25 +163,28 @@ class SoftmaxOpGPU : public OpKernel { const int numBlocks = Eigen::divup(rows * cols, numThreads); cub::CountingInputIterator counting_iterator(0); - typedef cub::TransformInputIterator, + typedef cub::TransformInputIterator, cub::CountingInputIterator> InputIterType; InputIterType input_itr( counting_iterator, - SubtractAndExpFunctor( + SubtractAndExpFunctor( reinterpret_cast(logits_in_.flat().data()), reinterpret_cast(max_logits.flat().data()), cols)); - DoRowReduction( - context, const_cast(sum_probs.flat().data()), input_itr, rows, - cols); + DoRowReduction( + context, const_cast(sum_probs.flat().data()), + input_itr, rows, cols); - GenerateNormalizedProb<<>>( - reinterpret_cast(logits_in_.flat().data()), - reinterpret_cast(sum_probs.flat().data()), - reinterpret_cast(max_logits.flat().data()), - const_cast(softmax_out->flat().data()), rows, cols, log_); + GenerateNormalizedProb + <<>>( + reinterpret_cast(logits_in_.flat().data()), + reinterpret_cast( + sum_probs.flat().data()), + reinterpret_cast(max_logits.flat().data()), + const_cast(softmax_out->flat().data()), rows, cols, log_); } } diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index d8f8569939..43106b6e59 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -53,6 +53,7 @@ from tensorflow.python.eager import tape # pylint: disable=unused-import from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed @@ -201,6 +202,7 @@ def _strip_checkpoint_v2_randomized(graph_def): def IsGoogleCudaEnabled(): return pywrap_tensorflow.IsGoogleCudaEnabled() + def CudaSupportsHalfMatMulAndConv(): return pywrap_tensorflow.CudaSupportsHalfMatMulAndConv() @@ -335,6 +337,8 @@ def _use_c_api_wrapper(fn, use_c_api, *args, **kwargs): # Make sure default graph reflects prev_value in case next test doesn't call # reset_default_graph(). ops.reset_default_graph() + + # pylint: disable=protected-access @@ -451,7 +455,8 @@ def with_c_api(cls): # If the C API is already enabled, don't do anything. Some tests break if the # same test is run twice, so this allows us to turn on the C API by default # without breaking these tests. - if ops._USE_C_API: return cls + if ops._USE_C_API: + return cls for name, value in cls.__dict__.copy().items(): if callable(value) and name.startswith("test"): @@ -469,6 +474,7 @@ def assert_no_new_pyobjects_executing_eagerly(f): Useful for checking that there are no missing Py_DECREFs in the C exercised by a bit of Python. """ + def decorator(self, **kwargs): """Warms up, gets an object count, runs the test, checks for new objects.""" with context.eager_mode(): @@ -483,8 +489,10 @@ def assert_no_new_pyobjects_executing_eagerly(f): new_count = len(gc.get_objects()) self.assertEqual(previous_count, new_count) gc.enable() + return decorator + def assert_no_new_tensors(f): """Decorator for asserting that no new Tensors persist after a test. @@ -508,17 +516,15 @@ def assert_no_new_tensors(f): def _is_tensorflow_object(obj): try: - return isinstance(obj, ( - ops.Tensor, - variables.Variable, - tensor_shape.Dimension, - tensor_shape.TensorShape)) + return isinstance(obj, + (ops.Tensor, variables.Variable, + tensor_shape.Dimension, tensor_shape.TensorShape)) except ReferenceError: # If the object no longer exists, we don't care about it. return False - tensors_before = set(id(obj) for obj in gc.get_objects() - if _is_tensorflow_object(obj)) + tensors_before = set( + id(obj) for obj in gc.get_objects() if _is_tensorflow_object(obj)) outside_graph_key = ops.get_default_graph()._graph_key with ops.Graph().as_default(): # Run the test in a new graph so that collections get cleared when it's @@ -572,18 +578,18 @@ def assert_no_garbage_created(f): "likely due to a reference cycle. New objects in cycle(s):") for i, obj in enumerate(gc.garbage[previous_garbage:]): try: - logging.error( - "Object %d of %d" % (i, len(gc.garbage) - previous_garbage)) + logging.error("Object %d of %d", i, + len(gc.garbage) - previous_garbage) + def _safe_object_str(obj): return "<%s %d>" % (obj.__class__.__name__, id(obj)) - logging.error(" Object type: %s" % (_safe_object_str(obj),)) - logging.error(" Referrer types: %s" % ( - ', '.join([_safe_object_str(ref) - for ref in gc.get_referrers(obj)]),)) - logging.error(" Referent types: %s" % ( - ', '.join([_safe_object_str(ref) - for ref in gc.get_referents(obj)]),)) - logging.error(" Object attribute names: %s" % (dir(obj),)) + + logging.error(" Object type: %s", _safe_object_str(obj)) + logging.error(" Referrer types: %s", ", ".join( + [_safe_object_str(ref) for ref in gc.get_referrers(obj)])) + logging.error(" Referent types: %s", ", ".join( + [_safe_object_str(ref) for ref in gc.get_referents(obj)])) + logging.error(" Object attribute names: %s", dir(obj)) logging.error(" Object __str__:") logging.error(obj) logging.error(" Object __repr__:") @@ -705,15 +711,23 @@ def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None): return 0, 0 return int(match.group(1)), int(match.group(2)) - for local_device in device_lib.list_local_devices(): - if local_device.device_type == "GPU": - if (min_cuda_compute_capability is None or - compute_capability_from_device_desc(local_device.physical_device_desc) - >= min_cuda_compute_capability): + try: + for local_device in device_lib.list_local_devices(): + if local_device.device_type == "GPU": + if (min_cuda_compute_capability is None or + compute_capability_from_device_desc( + local_device.physical_device_desc) >= + min_cuda_compute_capability): + return True + if local_device.device_type == "SYCL" and not cuda_only: return True - if local_device.device_type == "SYCL" and not cuda_only: - return True - return False + return False + except errors_impl.NotFoundError as e: + if not all([x in str(e) for x in ["CUDA", "not find"]]): + raise e + else: + logging.error(str(e)) + return False @contextlib.contextmanager @@ -1256,9 +1270,9 @@ class TensorFlowTestCase(googletest.TestCase): msg="Mismatched value: a%s is different from b%s." % (path_str, path_str)) except TypeError as e: - msg = "Error: a%s has %s, but b%s has %s" % ( - path_str, type(a), path_str, type(b)) - e.args = ((e.args[0] + ' : ' + msg,) + e.args[1:]) + msg = "Error: a%s has %s, but b%s has %s" % (path_str, type(a), + path_str, type(b)) + e.args = ((e.args[0] + " : " + msg,) + e.args[1:]) raise def assertAllClose(self, a, b, rtol=1e-6, atol=1e-6, msg=None): @@ -1438,8 +1452,7 @@ class TensorFlowTestCase(googletest.TestCase): """ device1 = pydev.canonical_name(device1) device2 = pydev.canonical_name(device2) - self.assertEqual(device1, device2, - "Devices %s and %s are not equal. %s" % + self.assertEqual(device1, device2, "Devices %s and %s are not equal. %s" % (device1, device2, msg)) # Fix Python 3 compatibility issues diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d9571fa2be..ece1da0332 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1910,7 +1910,7 @@ cuda_py_test( cuda_py_test( name = "softmax_op_test", - size = "small", + size = "medium", srcs = ["softmax_op_test.py"], additional_deps = [ "//third_party/py/numpy", diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 2b8e99e18e..981f96b74d 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -18,14 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import unittest import numpy as np + from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import nn_ops from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging as logging @test_util.with_c_api @@ -41,9 +44,10 @@ class SoftmaxTest(test.TestCase): features, axis=dim), one_only_on_dim)) softmax = e / np.reshape(np.sum(e, axis=dim), one_only_on_dim) if log: - return np.log(softmax) + res = np.log(softmax) else: - return softmax + res = softmax + return res def _testSoftmax(self, np_features, dim=-1, log=False, use_gpu=False): # A previous version of the code checked the op name rather than the op type @@ -53,9 +57,9 @@ class SoftmaxTest(test.TestCase): np_softmax = self._npSoftmax(np_features, dim=dim, log=log) with self.test_session(use_gpu=use_gpu): if log: - tf_softmax = nn_ops.log_softmax(np_features, dim=dim, name=name) + tf_softmax = nn_ops.log_softmax(np_features, axis=dim, name=name) else: - tf_softmax = nn_ops.softmax(np_features, dim=dim, name=name) + tf_softmax = nn_ops.softmax(np_features, axis=dim, name=name) out = tf_softmax.eval() self.assertAllCloseAccordingToType(np_softmax, out) self.assertShapeEqual(np_softmax, tf_softmax) @@ -117,10 +121,32 @@ class SoftmaxTest(test.TestCase): self._testAll( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float32)) + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testFloatGPU(self): + if test.is_gpu_available(cuda_only=True): + rows = [2**x + np.random.randint(0, 1024) for x in range(1, 10)] + cols = [2**x + np.random.randint(0, 1024) for x in range(1, 10)] + for row, col in zip(rows, cols): + logging.info("Testing softmax float dtype in shape [%d, %d]", row, col) + data = np.random.rand(row, col) + self._testAll(data.astype(np.float32)) + def testHalf(self): self._testAll( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16)) + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testHalfGPU(self): + if test.is_gpu_available(cuda_only=True): + rows = [2**x + np.random.randint(0, 1024) for x in range(1, 8)] + cols = [2**x + np.random.randint(0, 1024) for x in range(1, 8)] + for row, col in zip(rows, cols): + logging.info("Testing softmax half dtype in shape [%d, %d]", row, col) + data = np.random.rand(row, col) + self._testAll(data.astype(np.float16)) + def testDouble(self): self._testSoftmax( np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64)) @@ -169,7 +195,7 @@ class SoftmaxTest(test.TestCase): self.assertEqual(0, array_ops.size(x).eval()) # reshape would raise if logits is empty with self.assertRaises(errors_impl.InvalidArgumentError): - nn_ops.softmax(x, dim=0).eval() + nn_ops.softmax(x, axis=0).eval() def testDimTooLarge(self): with self.test_session(): @@ -177,7 +203,7 @@ class SoftmaxTest(test.TestCase): # inference error. dim = array_ops.placeholder_with_default(100, shape=[]) with self.assertRaises(errors_impl.InvalidArgumentError): - nn_ops.softmax([1., 2., 3., 4.], dim=dim).eval() + nn_ops.softmax([1., 2., 3., 4.], axis=dim).eval() def testLargeDims(self): # Make sure that we properly handle large inputs. See -- GitLab From 1f58c96b593c49a97bbfac0665c2628ef9c910cd Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Wed, 21 Mar 2018 16:00:44 -0700 Subject: [PATCH 327/960] Shorter definitions for elementwise_ops in op_level_cost_estimator. PiperOrigin-RevId: 189983460 --- .../grappler/costs/op_level_cost_estimator.cc | 180 +++++++----------- 1 file changed, 67 insertions(+), 113 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index d3ffa03fe2..fdbc61f3f1 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -192,121 +192,75 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {kShape, wrap(&OpLevelCostEstimator::PredictMetadata)}, {kSize, wrap(&OpLevelCostEstimator::PredictMetadata)}}; +#define EIGEN_COST(X) Eigen::internal::functor_traits::Cost + // Quantize = apply min and max bounds, multiply by scale factor and round. const int quantize_v2_cost = - Eigen::internal::functor_traits< - Eigen::internal::scalar_product_op>::Cost + - Eigen::internal::functor_traits< - Eigen::internal::scalar_max_op>::Cost + - Eigen::internal::functor_traits< - Eigen::internal::scalar_min_op>::Cost + - Eigen::internal::functor_traits< - Eigen::internal::scalar_round_op>::Cost; - - elementwise_ops_ = { - // Unary ops alphabetically sorted - {"Acos", Eigen::internal::functor_traits< - Eigen::internal::scalar_acos_op>::Cost}, - {"Asin", Eigen::internal::functor_traits< - Eigen::internal::scalar_asin_op>::Cost}, - {"Atan", Eigen::internal::functor_traits< - Eigen::internal::scalar_atan_op>::Cost}, - {"Atan2", Eigen::internal::functor_traits< - Eigen::internal::scalar_quotient_op>::Cost + - Eigen::internal::functor_traits< - Eigen::internal::scalar_atan_op>::Cost}, - {"Ceil", Eigen::internal::functor_traits< - Eigen::internal::scalar_ceil_op>::Cost}, - {"Cos", Eigen::internal::functor_traits< - Eigen::internal::scalar_cos_op>::Cost}, - {"Dequantize", Eigen::internal::functor_traits< - Eigen::internal::scalar_product_op>::Cost}, - {"Erf", 1}, - {"Erfc", 1}, - {"Exp", Eigen::internal::functor_traits< - Eigen::internal::scalar_exp_op>::Cost}, - {"Expm1", Eigen::internal::functor_traits< - Eigen::internal::scalar_expm1_op>::Cost}, - {"Floor", Eigen::internal::functor_traits< - Eigen::internal::scalar_floor_op>::Cost}, - {"Inv", Eigen::internal::functor_traits< - Eigen::internal::scalar_inverse_op>::Cost}, - {"InvGrad", 1}, - {"Lgamma", 1}, - {"Log", Eigen::internal::functor_traits< - Eigen::internal::scalar_log_op>::Cost}, - {"Log1p", Eigen::internal::functor_traits< - Eigen::internal::scalar_log1p_op>::Cost}, - {"Neg", Eigen::internal::functor_traits< - Eigen::internal::scalar_opposite_op>::Cost}, - {"QuantizeV2", quantize_v2_cost}, - {"Reciprocal", Eigen::internal::functor_traits< - Eigen::internal::scalar_inverse_op>::Cost}, - {"Rint", 1}, - {"Round", Eigen::internal::functor_traits< - Eigen::internal::scalar_round_op>::Cost}, - {"Rsqrt", Eigen::internal::functor_traits< - Eigen::internal::scalar_rsqrt_op>::Cost}, - {"Sqrt", Eigen::internal::functor_traits< - Eigen::internal::scalar_sqrt_op>::Cost}, - {"Square", Eigen::internal::functor_traits< - Eigen::internal::scalar_square_op>::Cost}, - {"Tanh", Eigen::internal::functor_traits< - Eigen::internal::scalar_tanh_op>::Cost}, - {"Relu", Eigen::internal::functor_traits< - Eigen::internal::scalar_max_op>::Cost}, - {"Sigmoid", Eigen::internal::functor_traits< - Eigen::internal::scalar_sigmoid_op>::Cost}, - {"Sign", Eigen::internal::functor_traits< - Eigen::internal::scalar_sign_op>::Cost}, - {"Sin", Eigen::internal::functor_traits< - Eigen::internal::scalar_sin_op>::Cost}, - {"Tan", Eigen::internal::functor_traits< - Eigen::internal::scalar_tan_op>::Cost}, - // Binary ops alphabetically sorted - {"Add", Eigen::internal::functor_traits< - Eigen::internal::scalar_sum_op>::Cost}, - {"ApproximateEqual", 1}, - {"BiasAdd", Eigen::internal::functor_traits< - Eigen::internal::scalar_sum_op>::Cost}, - {"Div", Eigen::internal::functor_traits< - Eigen::internal::scalar_quotient_op>::Cost}, - {"Equal", 1}, - {"FloorDiv", Eigen::internal::functor_traits< - Eigen::internal::scalar_quotient_op>::Cost}, - {"FloorMod", Eigen::internal::functor_traits< - Eigen::internal::scalar_mod_op>::Cost}, - {"Greater", 1}, - {"GreaterEqual", 1}, - {"Less", 1}, - {"LessEqual", 1}, - {"LogicalAnd", Eigen::internal::functor_traits< - Eigen::internal::scalar_boolean_and_op>::Cost}, - {"LogicalNot", 1}, - {"LogicalOr", Eigen::internal::functor_traits< - Eigen::internal::scalar_boolean_or_op>::Cost}, - {"Maximum", Eigen::internal::functor_traits< - Eigen::internal::scalar_max_op>::Cost}, - {"Minimum", Eigen::internal::functor_traits< - Eigen::internal::scalar_min_op>::Cost}, - {"Mod", Eigen::internal::functor_traits< - Eigen::internal::scalar_mod_op>::Cost}, - {"Mul", Eigen::internal::functor_traits< - Eigen::internal::scalar_product_op>::Cost}, - {"NotEqual", 1}, - {"QuantizedAdd", Eigen::internal::functor_traits< - Eigen::internal::scalar_sum_op>::Cost}, - {"QuantizedMul", Eigen::internal::functor_traits< - Eigen::internal::scalar_product_op>::Cost}, - {"RealDiv", Eigen::internal::functor_traits< - Eigen::internal::scalar_quotient_op>::Cost}, - {"SquareDifference", 1}, - {"Sub", Eigen::internal::functor_traits< - Eigen::internal::scalar_difference_op>::Cost}, - {"TruncateDiv", Eigen::internal::functor_traits< - Eigen::internal::scalar_quotient_op>::Cost}, - {"TruncateMod", Eigen::internal::functor_traits< - Eigen::internal::scalar_mod_op>::Cost}}; + EIGEN_COST(scalar_product_op) + EIGEN_COST(scalar_max_op) + + EIGEN_COST(scalar_min_op) + EIGEN_COST(scalar_round_op); + + elementwise_ops_ = {// Unary ops alphabetically sorted + {"Acos", EIGEN_COST(scalar_acos_op)}, + {"Asin", EIGEN_COST(scalar_asin_op)}, + {"Atan", EIGEN_COST(scalar_atan_op)}, + {"Atan2", EIGEN_COST(scalar_quotient_op) + + EIGEN_COST(scalar_atan_op)}, + {"Ceil", EIGEN_COST(scalar_ceil_op)}, + {"Cos", EIGEN_COST(scalar_cos_op)}, + {"Dequantize", EIGEN_COST(scalar_product_op)}, + {"Erf", 1}, + {"Erfc", 1}, + {"Exp", EIGEN_COST(scalar_exp_op)}, + {"Expm1", EIGEN_COST(scalar_expm1_op)}, + {"Floor", EIGEN_COST(scalar_floor_op)}, + {"Inv", EIGEN_COST(scalar_inverse_op)}, + {"InvGrad", 1}, + {"Lgamma", 1}, + {"Log", EIGEN_COST(scalar_log_op)}, + {"Log1p", EIGEN_COST(scalar_log1p_op)}, + {"Neg", EIGEN_COST(scalar_opposite_op)}, + {"QuantizeV2", quantize_v2_cost}, + {"Reciprocal", EIGEN_COST(scalar_inverse_op)}, + {"Rint", 1}, + {"Round", EIGEN_COST(scalar_round_op)}, + {"Rsqrt", EIGEN_COST(scalar_rsqrt_op)}, + {"Sqrt", EIGEN_COST(scalar_sqrt_op)}, + {"Square", EIGEN_COST(scalar_square_op)}, + {"Tanh", EIGEN_COST(scalar_tanh_op)}, + {"Relu", EIGEN_COST(scalar_max_op)}, + {"Sigmoid", EIGEN_COST(scalar_sigmoid_op)}, + {"Sign", EIGEN_COST(scalar_sign_op)}, + {"Sin", EIGEN_COST(scalar_sin_op)}, + {"Tan", EIGEN_COST(scalar_tan_op)}, + // Binary ops alphabetically sorted + {"Add", EIGEN_COST(scalar_sum_op)}, + {"ApproximateEqual", 1}, + {"BiasAdd", EIGEN_COST(scalar_sum_op)}, + {"Div", EIGEN_COST(scalar_quotient_op)}, + {"Equal", 1}, + {"FloorDiv", EIGEN_COST(scalar_quotient_op)}, + {"FloorMod", EIGEN_COST(scalar_mod_op)}, + {"Greater", 1}, + {"GreaterEqual", 1}, + {"Less", 1}, + {"LessEqual", 1}, + {"LogicalAnd", EIGEN_COST(scalar_boolean_and_op)}, + {"LogicalNot", 1}, + {"LogicalOr", EIGEN_COST(scalar_boolean_or_op)}, + {"Maximum", EIGEN_COST(scalar_max_op)}, + {"Minimum", EIGEN_COST(scalar_min_op)}, + {"Mod", EIGEN_COST(scalar_mod_op)}, + {"Mul", EIGEN_COST(scalar_product_op)}, + {"NotEqual", 1}, + {"QuantizedAdd", EIGEN_COST(scalar_sum_op)}, + {"QuantizedMul", EIGEN_COST(scalar_product_op)}, + {"RealDiv", EIGEN_COST(scalar_quotient_op)}, + {"SquareDifference", 1}, + {"Sub", EIGEN_COST(scalar_difference_op)}, + {"TruncateDiv", EIGEN_COST(scalar_quotient_op)}, + {"TruncateMod", EIGEN_COST(scalar_mod_op)}}; + +#undef EIGEN_COST // By default, use sum of memory_time and compute_time for execution_time. compute_memory_overlap_ = false; -- GitLab From 637b090ea0a5029805ba5e1dcf41c3b57d944ae4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 17:34:52 -0700 Subject: [PATCH 328/960] Small convenience changes. PiperOrigin-RevId: 189996801 --- tensorflow/contrib/py2tf/pyct/compiler.py | 2 +- tensorflow/contrib/py2tf/pyct/transformer.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/compiler.py b/tensorflow/contrib/py2tf/pyct/compiler.py index 507dbc7ed3..24c4517afa 100644 --- a/tensorflow/contrib/py2tf/pyct/compiler.py +++ b/tensorflow/contrib/py2tf/pyct/compiler.py @@ -31,7 +31,7 @@ import astor import gast -def ast_to_source(node, indentation): +def ast_to_source(node, indentation=' '): """Return the source code of given AST.""" if isinstance(node, gast.AST): node = gast.gast_to_ast(node) diff --git a/tensorflow/contrib/py2tf/pyct/transformer.py b/tensorflow/contrib/py2tf/pyct/transformer.py index 57016bb4ce..31ef7e1c05 100644 --- a/tensorflow/contrib/py2tf/pyct/transformer.py +++ b/tensorflow/contrib/py2tf/pyct/transformer.py @@ -24,6 +24,7 @@ import gast import six from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import compiler from tensorflow.contrib.py2tf.pyct import pretty_printer @@ -31,6 +32,13 @@ class PyFlowParseError(SyntaxError): pass +def try_ast_to_source(node): + try: + return compiler.ast_to_source(node) + except AssertionError: + return '' + + class Base(gast.NodeTransformer): """Base class for specialized transformers.""" @@ -62,8 +70,9 @@ class Base(gast.NodeTransformer): return super(Base, self).visit(node) except (ValueError, AttributeError, KeyError, NotImplementedError, AssertionError) as e: - msg = '%s: %s\nOccurred at node:\n%s' % ( - e.__class__.__name__, str(e), pretty_printer.fmt(node, color=False)) + msg = '%s: %s\nOffending source:\n%s\n\nOccurred at node:\n%s' % ( + e.__class__.__name__, str(e), try_ast_to_source(node), + pretty_printer.fmt(node, color=False)) if source_code: line = source_code.splitlines()[self._lineno - 1] else: -- GitLab From 8e4e9f7ceaa78f76b7f0aaa7a607e80e67f0d912 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Wed, 21 Mar 2018 17:38:04 -0700 Subject: [PATCH 329/960] Added an experimental C API to dump TF_Graph in a human-readable format, for debugging purposes. PiperOrigin-RevId: 189997099 --- tensorflow/c/c_api_experimental.cc | 10 ++++++++++ tensorflow/c/c_api_experimental.h | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index eb17e16d3e..34b9dec3ee 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -483,3 +483,13 @@ void TF_ShutdownTPUExecution(TF_Session* session, TF_Output shutdown_node, /*targets*/ &shutdown_node.oper, /*ntargets*/ 1, /*run_metadata*/ nullptr, status); } + +TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, + size_t* len) { + tensorflow::mutex_lock c(graph->mu); + const auto& debug_str = graph->graph.ToGraphDefDebug().DebugString(); + *len = debug_str.size(); + char* ret = static_cast(malloc(*len + 1)); + memcpy(ret, debug_str.c_str(), *len + 1); + return ret; +} diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 2bad278d63..b95cdfe6aa 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -94,6 +94,12 @@ TF_CAPI_EXPORT extern void TF_ShutdownTPUExecution(TF_Session* session, TF_Output shutdown_node, TF_Status* status); +// Returns the graph content in a human-readable format, with length set in +// `len`. The format is subject to change in the future. +// The returned string is heap-allocated, and caller should call free() on it. +TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, + size_t* len); + #ifdef __cplusplus } /* end extern "C" */ #endif -- GitLab From c7334fef9d1173525f6111b8ab50360b6531d76b Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 21 Mar 2018 18:02:01 -0700 Subject: [PATCH 330/960] [tf.data] Do not crash when combining .cache().take().repeat() Currently, if the .cache() iterator is not fully consumed before being repeated, it will cause an exception to be raised to Python. Instead, cache should act as an identity transformation and log an error, as this will not affect the correctness of the user's program (at the cost of an unexpected performance cost: i.e. not actually caching). PiperOrigin-RevId: 189999552 --- .../core/kernels/data/cache_dataset_ops.cc | 17 ++++++++++++++++- .../data/kernel_tests/cache_dataset_op_test.py | 15 +++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc index f0a2192826..4b4728dab6 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc @@ -308,6 +308,21 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { input_impl_(params.dataset->input_->MakeIterator(params.prefix)), cache_(new std::vector>) {} + ~MemoryWriterIterator() override { + mutex_lock l(mu_); + if (cache_) { + LOG(ERROR) + << "The calling iterator did not fully read the dataset we were " + "attempting to cache. In order to avoid unexpected truncation " + "of the sequence, the current [partially cached] sequence " + "will be dropped. This can occur if you have a sequence " + "similar to `dataset.cache().take(k).repeat()`. Instead, swap " + "the order (i.e. `dataset.take(k).cache().repeat()`)"; + mutex_lock l2(dataset()->mu_); + dataset()->writer_iterator_created_ = false; + } + } + Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { @@ -318,7 +333,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel { // Guard on cache_ to not crash if GetNext is called a second time // after *end_of_sequence == true if (cache_) { - mutex_lock l2(dataset()->mu_); + mutex_lock l(dataset()->mu_); DCHECK(dataset()->writer_iterator_created_); DCHECK(!dataset()->cache_); cache_.swap(dataset()->cache_); diff --git a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py index 02720a2e98..25269dc810 100644 --- a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py @@ -297,6 +297,21 @@ class MemoryCacheDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(i2.get_next()) + def testCacheTakeRepeat(self): + dataset = dataset_ops.Dataset.range(10).cache().take(5).repeat(2) + itr = dataset.make_one_shot_iterator() + n = itr.get_next() + + expected_values = [0, 1, 2, 3, 4, 0, 1, 2, 3, 4] + + with self.test_session() as sess: + for i, expected in enumerate(expected_values): + self.assertEqual(expected, sess.run(n), + "Unexpected value at index %s" % i) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(itr.get_next()) + if __name__ == "__main__": test.main() -- GitLab From 7407a35bd5237069b267bb82d08888c97675ab37 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Thu, 22 Mar 2018 09:22:42 +0800 Subject: [PATCH 331/960] Fix the variable typo in the python api example (#17900) --- tensorflow/contrib/lite/toco/g3doc/python_api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md index 440f9c367c..36e2d9c372 100644 --- a/tensorflow/contrib/lite/toco/g3doc/python_api.md +++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md @@ -28,7 +28,7 @@ val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) out = tf.identity(val, name="out") with tf.Session() as sess: tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) - open("test.tflite", "wb").write(tflite_modeL) + open("test.tflite", "wb").write(tflite_model) ``` **NOTE** Currently, the TOCO command will cause a fatal error to the Python -- GitLab From 61aa925ebaa69b9526cc67384fcde3fa42c9e6f1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 21 Mar 2018 18:22:36 -0700 Subject: [PATCH 332/960] Moves TFE_Executor to common_runtime PiperOrigin-RevId: 190001737 --- tensorflow/c/eager/BUILD | 2 + tensorflow/c/eager/c_api.cc | 179 ++++++---------- tensorflow/c/eager/c_api_internal.h | 84 +------- tensorflow/core/common_runtime/eager/BUILD | 22 ++ .../core/common_runtime/eager/context.cc | 142 +++++++++++++ .../core/common_runtime/eager/context.h | 193 ++++++++++++++++++ 6 files changed, 428 insertions(+), 194 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/context.cc create mode 100644 tensorflow/core/common_runtime/eager/context.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 841ff48a38..bea5a121b3 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -28,6 +28,7 @@ tf_cuda_library( "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", "//tensorflow/core:core_cpu", + "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core:core_cpu_internal", @@ -64,6 +65,7 @@ tf_cuda_library( "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", ], diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index a23015c99e..5d668848ab 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -71,18 +71,6 @@ std::atomic_int_fast64_t func_id_generator(0); } // namespace -TFE_ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy) { - if (!soft_placement) { - return original_policy; - } - if (original_policy == TFE_DEVICE_PLACEMENT_EXPLICIT || - original_policy == TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) { - return TFE_DEVICE_PLACEMENT_SILENT; - } - return original_policy; -} - extern "C" { TFE_ContextOptions* TFE_NewContextOptions() { return new TFE_ContextOptions; } @@ -104,19 +92,7 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, unsigned char async, TF_Status* status) { - { - tensorflow::mutex_lock l(ctx->async_map_mu); - ctx->thread_local_async[std::this_thread::get_id()] = async; - } - if (async) { - ctx->executor.EnableAsync(); - } else { - // TODO(agarwal): Currently we add a wait here to handle cases where a sync - // op has a control dependency on an async op, and the latter has not - // executed yet. This wait can be removed by storing all the control inputs - // and waiting for them when executing ops. - status->status = ctx->executor.WaitForAllPendingNodes(); - } + status->status = ctx->context.SetAsyncForThread(async); } void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } @@ -133,34 +109,26 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { new tensorflow::DeviceMgr(devices)); tensorflow::Rendezvous* r = new tensorflow::IntraProcessRendezvous(device_mgr.get()); - return new TFE_Context(*opts, std::move(device_mgr), r); + return new TFE_Context(opts->session_options.options, opts->policy, + opts->async, std::move(device_mgr), r); } void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->executor.WaitForAllPendingNodes(); - { - tensorflow::mutex_lock ml(ctx->cache_mu); - tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); - } - ctx->rendezvous->Unref(); delete ctx; } TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { TF_DeviceList* list = new TF_DeviceList; - ctx->device_manager->ListDeviceAttributes(&list->response); + ctx->context.device_mgr()->ListDeviceAttributes(&list->response); return list; } -void TFE_ContextClearCaches(TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->cache_mu); - tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); -} +void TFE_ContextClearCaches(TFE_Context* ctx) { ctx->context.ClearCaches(); } void TFE_ContextSetThreadLocalDevicePlacementPolicy( TFE_Context* ctx, TFE_ContextDevicePlacementPolicy policy) { - tensorflow::mutex_lock ml(ctx->policy_map_mu); - ctx->thread_local_policies[std::this_thread::get_id()] = policy; + ctx->context.SetThreadLocalDevicePlacementPolicy( + static_cast(policy)); } // Note: this function looks up a thread local policy. So it should be called in @@ -168,25 +136,20 @@ void TFE_ContextSetThreadLocalDevicePlacementPolicy( // safe to call this function from the async EagerExecutor threads. extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->policy_map_mu); - auto policy_map_it = - ctx->thread_local_policies.find(std::this_thread::get_id()); - if (policy_map_it != ctx->thread_local_policies.end()) { - return policy_map_it->second; - } - return ctx->policy; + return static_cast( + ctx->context.GetDevicePlacementPolicy()); } void TFE_ContextAsyncWait(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->executor.WaitForAllPendingNodes(); + status->status = ctx->context.AsyncWait(); } void TFE_ContextGetStatus(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->executor.status(); + status->status = ctx->context.GetStatus(); } void TFE_ContextAsyncClearError(TFE_Context* ctx) { - ctx->executor.ClearError(); + ctx->context.ClearAsyncError(); } TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { @@ -259,7 +222,7 @@ tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, // nullptr. tensorflow::Device* src_opd = nullptr; TF_RETURN_IF_ERROR(h->TensorAndDevice(&src, &srcd, &src_opd)); - if (srcd == nullptr) srcd = ctx->devices[0]; + if (srcd == nullptr) srcd = ctx->context.HostCPU(); bool is_same_device = (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd)); const bool dst_cpu = IsCPU(dstd); @@ -332,8 +295,7 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, status->status = tensorflow::AttrTypeMapForOp(name, &types); if (status->status.ok()) return new TFE_Op(ctx, name, types); if (TF_GetCode(status) == TF_NOT_FOUND) { - tensorflow::mutex_lock l(ctx->functions_mu); - if (ctx->func_lib_def.Find(name) != nullptr) { + if (ctx->context.FindFunctionByName(name)) { status->status = tensorflow::Status::OK(); return new TFE_Op(ctx, name, nullptr); } @@ -346,20 +308,14 @@ void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { tensorflow::Device* d = nullptr; if (device_name != nullptr && strlen(device_name) > 0) { - auto it = op->ctx->devices_map.find(device_name); - if (it == op->ctx->devices_map.end()) { - status->status = - tensorflow::errors::InvalidArgument(device_name, " unknown device."); - return; - } - d = it->second; + status->status = op->ctx->context.FindDeviceByName(device_name, &d); } op->device = d; } const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { tensorflow::Device* device = - (op->device == nullptr) ? op->ctx->devices[0] : op->device; + (op->device == nullptr) ? op->ctx->context.HostCPU() : op->device; return device->name().c_str(); } @@ -634,7 +590,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, TFE_Context* ctx, TF_Status* status) { tensorflow::DeviceSet ds; - for (tensorflow::Device* d : ctx->devices) { + for (tensorflow::Device* d : *ctx->context.devices()) { ds.AddDevice(d); } tensorflow::DeviceTypeVector final_devices; @@ -648,7 +604,7 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, "Could not find valid device for node ", ndef.DebugString()); return nullptr; } - for (tensorflow::Device* d : ctx->devices) { + for (tensorflow::Device* d : *ctx->context.devices()) { if (d->device_type() == final_devices[0].type_string()) { return d; } @@ -663,9 +619,8 @@ tensorflow::Status Execute( const tensorflow::gtl::InlinedVector& op_inputs, tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, TFE_TensorHandle** retvals, int num_retvals) { - if (!ctx->soft_placement && device == nullptr) { - // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU - device = ctx->devices[0]; + if (!ctx->context.SoftPlacement() && device == nullptr) { + device = ctx->context.HostCPU(); } if (device == nullptr) { @@ -697,18 +652,18 @@ tensorflow::Status Execute( if (maybe_stats != nullptr) { maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() - maybe_stats->all_start_micros()); - tensorflow::mutex_lock ml(ctx->metadata_mu); - if (ctx->should_store_metadata.load()) { - auto* step_stats = ctx->run_metadata.mutable_step_stats(); + tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); + if (ctx->context.ShouldStoreMetadata()) { + auto* step_stats = ctx->context.RunMetadataProto()->mutable_step_stats(); // Lazily initialize the RunMetadata with information about all devices if // this is the first call. - while (step_stats->dev_stats_size() < ctx->devices.size()) { + while (step_stats->dev_stats_size() < ctx->context.devices()->size()) { step_stats->add_dev_stats(); } // Find the current device's index. int device_idx = 0; - for (int i = 0; i < ctx->devices.size(); ++i) { - if (ctx->devices[i] == device) { + for (int i = 0; i < ctx->context.devices()->size(); ++i) { + if (ctx->context.devices()->at(i) == device) { device_idx = i; break; } @@ -744,7 +699,7 @@ class ExecuteNode : public tensorflow::EagerNode { tensorflow::NodeExecStats* maybe_stats, const tensorflow::DataTypeVector& output_dtypes, TFE_TensorHandle** retvals, int num_retvals) - : tensorflow::EagerNode(op->ctx->executor.NextId()), + : tensorflow::EagerNode(op->ctx->context.NextId()), ctx_(op->ctx), op_device_(op->device), inputs_(op->inputs), @@ -800,7 +755,7 @@ class CopyToDeviceNode : public tensorflow::EagerNode { public: CopyToDeviceNode(TFE_TensorHandle* src, tensorflow::Device* dstd, TFE_Context* ctx) - : tensorflow::EagerNode(ctx->executor.NextId()), + : tensorflow::EagerNode(ctx->context.NextId()), src_(src), dstd_(dstd), ctx_(ctx), @@ -1063,7 +1018,7 @@ extern "C" { void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { TFE_Context* ctx = op->ctx; - status->status = ctx->executor.status(); + status->status = ctx->context.GetStatus(); if (!status->status.ok()) { return; } @@ -1087,7 +1042,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && input_op_device != op->device) { tensorflow::Device* d = - input_op_device == nullptr ? ctx->devices[0] : input_op_device; + input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device; VLOG(1) << "Changing device of operation " << op->name << " to " << d->name() << " because input #" << i << " is a resource in this device."; @@ -1095,40 +1050,35 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } } tensorflow::Device* device = op->device; - if (!ctx->soft_placement && device == nullptr) { - // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU - device = ctx->devices[0]; + if (!ctx->context.SoftPlacement() && device == nullptr) { + device = ctx->context.HostCPU(); } tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); - tensorflow::KernelAndDevice* kernel; - { - tensorflow::tf_shared_lock l(ctx->cache_mu); - kernel = tensorflow::gtl::FindPtrOrNull(ctx->kernel_cache, cache_key); - } + tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key); if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); - if (ctx->soft_placement && device == nullptr) { + if (ctx->context.SoftPlacement() && device == nullptr) { device = SelectDevice(ndef, ctx, status); if (!status->status.ok()) { return; } } CHECK(device != nullptr); - if (ctx->log_device_placement) { + if (ctx->context.LogDevicePlacement()) { LOG(INFO) << "Executing op " << ndef.op() << " in device " << device->name(); } - kernel = new tensorflow::KernelAndDevice(ctx->rendezvous); + kernel = new tensorflow::KernelAndDevice(ctx->context.GetRendezvous()); // Knowledge of the implementation of Init (and in-turn // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def // will be accessed, so grab on to the lock. // See WARNING comment in Execute (before kernel->Run) - would be nice to // rework to avoid this subtlety. - tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + tensorflow::tf_shared_lock l(*ctx->context.FunctionsMu()); + status->status = tensorflow::KernelAndDevice::Init( + ndef, ctx->context.func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; @@ -1136,7 +1086,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Update output_dtypes inside `kernel`. const tensorflow::OpDef* op_def = nullptr; const tensorflow::FunctionDef* function_def = - ctx->func_lib_def.Find(ndef.op()); + ctx->context.FuncLibDef()->Find(ndef.op()); if (function_def != nullptr) { op_def = &(function_def->signature()); } @@ -1152,8 +1102,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (!status->status.ok()) { return; } - tensorflow::mutex_lock ml(ctx->cache_mu); - tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); + ctx->context.AddKernelToCache(cache_key, kernel); } const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); const int output_dtypes_size = output_dtypes.size(); @@ -1171,11 +1120,11 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // device from the one requested above. device = kernel->device(); } - status->status = ValidateInputTypeAndPlacement(ctx, ctx->devices[0], device, - op, kernel->kernel()); + status->status = ValidateInputTypeAndPlacement(ctx, ctx->context.HostCPU(), + device, op, kernel->kernel()); if (!status->status.ok()) return; std::unique_ptr maybe_stats; - if (ctx->should_store_metadata.load()) { + if (ctx->context.ShouldStoreMetadata()) { maybe_stats.reset(new tensorflow::NodeExecStats); maybe_stats->set_node_name(op->name); maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros()); @@ -1183,14 +1132,14 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); // TODO(apassos) track referenced tensors } - if (ctx->Async()) { + if (ctx->context.Async()) { // Note that for async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(agarwal): Consider executing "cheap" kernels inline for performance. tensorflow::EagerNode* node = new ExecuteNode(op, kernel, maybe_stats.release(), output_dtypes, retvals, *num_retvals); - ctx->executor.Add(node); + ctx->context.ExecutorAdd(node); } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to // allocate it. @@ -1206,23 +1155,24 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status) { - status->status = ctx->executor.status(); + status->status = ctx->context.GetStatus(); if (!status->status.ok()) { return nullptr; } - tensorflow::Device* dstd = ctx->devices[0]; + tensorflow::Device* dstd = ctx->context.HostCPU(); if (device_name != nullptr && strlen(device_name) > 0) { - status->status = ctx->device_manager->LookupDevice(device_name, &dstd); + status->status = + ctx->context.device_mgr()->LookupDevice(device_name, &dstd); if (!status->status.ok()) return nullptr; } - if (ctx->Async()) { + if (ctx->context.Async()) { // Note that `h` may not be currently ready. However execution order will // make sure that `h` is ready before the copy is actually done. CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); TFE_TensorHandle* output = node->dst(); // Note that calling Add makes `node` accessible by the EagerExecutor // thread. So further accesses need to be thread-safe. - ctx->executor.Add(node); + ctx->context.ExecutorAdd(node); return output; } else { TFE_TensorHandle* output = nullptr; @@ -1240,24 +1190,20 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx, tensorflow::errors::InvalidArgument("Invalid FunctionDef proto"); return; } - tensorflow::mutex_lock l(ctx->functions_mu); - status->status = ctx->func_lib_def.AddFunctionDef(function_def); + status->status = ctx->context.AddFunctionDef(function_def); } void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, TF_Status* status) { - tensorflow::mutex_lock l(ctx->functions_mu); - status->status = ctx->func_lib_def.AddFunctionDef(function->fdef); + status->status = ctx->context.AddFunctionDef(function->fdef); } void TFE_ContextEnableRunMetadata(TFE_Context* ctx) { - ctx->should_store_metadata.store(true); + ctx->context.SetShouldStoreMetadata(true); } void TFE_ContextDisableRunMetadata(TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->metadata_mu); - ctx->should_store_metadata.store(false); - ctx->run_metadata.Clear(); + ctx->context.SetShouldStoreMetadata(false); } } // extern "C" @@ -1286,9 +1232,9 @@ void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status) { TFE_ContextAsyncWait(ctx, status); if (!status->status.ok()) return; - tensorflow::mutex_lock ml(ctx->metadata_mu); - status->status = MessageToBuffer(ctx->run_metadata, buf); - ctx->run_metadata.Clear(); + tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); + status->status = MessageToBuffer(*ctx->context.RunMetadataProto(), buf); + ctx->context.RunMetadataProto()->Clear(); } namespace { @@ -1363,11 +1309,6 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } // namespace tensorflow -bool TFE_Context::Async() const { - tensorflow::mutex_lock l(async_map_mu); - return tensorflow::gtl::FindWithDefault( - thread_local_async, std::this_thread::get_id(), async_default); -} bool TFE_TensorHandle::IsReady() { if (node_id == 0) return true; @@ -1381,7 +1322,7 @@ tensorflow::Status TFE_TensorHandle::WaitReady() { { tensorflow::mutex_lock l(ctx_mutex_); if (ctx_ == nullptr) return tensorflow::Status::OK(); - executor = &ctx_->executor; + executor = ctx_->context.Executor(); } return executor->WaitFor(node_id); } diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index a79f8ddd33..5b29120b40 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/function.h" @@ -52,85 +53,18 @@ struct TFE_ContextOptions { TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; }; -TFE_ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy); - struct TFE_Context { - explicit TFE_Context(const TFE_ContextOptions& opts, + explicit TFE_Context(const tensorflow::SessionOptions& opts, + TFE_ContextDevicePlacementPolicy default_policy, + bool async, std::unique_ptr device_mgr, tensorflow::Rendezvous* rendezvous) - : soft_placement( - opts.session_options.options.config.allow_soft_placement()), - policy(PlacementPolicy(soft_placement, opts.policy)), - device_manager(std::move(device_mgr)), - devices(device_manager->ListDevices()), - rendezvous(rendezvous), - pflr(new tensorflow::ProcessFunctionLibraryRuntime( - device_manager.get(), opts.session_options.options.env, - TF_GRAPH_DEF_VERSION, &func_lib_def, {})), - log_device_placement( - opts.session_options.options.config.log_device_placement()), - async_default(opts.async) { - if (async_default) executor.EnableAsync(); - - for (auto* device : devices) { - devices_map[tensorflow::StringPiece(device->name())] = device; - } - } - - const bool soft_placement; - const TFE_ContextDevicePlacementPolicy policy; - - // Note: we cannot use C++11 thread_local here as there is no concept of a - // thread-local-object-local variable in C++11. - tensorflow::mutex policy_map_mu; - std::unordered_map - thread_local_policies GUARDED_BY(policy_map_mu); - - std::unique_ptr device_manager; - // Devices owned by device_manager - std::vector devices; - // All devices are not owned. - tensorflow::gtl::FlatMap - devices_map; - tensorflow::Rendezvous* const rendezvous; - - tensorflow::mutex functions_mu; - tensorflow::FunctionLibraryDefinition func_lib_def GUARDED_BY(functions_mu){ - tensorflow::OpRegistry::Global(), {}}; - - // One FunctionLibraryRuntime per device. - // func_libs[i] is the FunctionLibraryRuntime corresponding to - // session->devices[i]. - const std::unique_ptr pflr; - - tensorflow::mutex cache_mu; - std::unordered_map - kernel_cache GUARDED_BY(cache_mu); - - tensorflow::FunctionLibraryRuntime* func_lib(tensorflow::Device* d) const { - return pflr->GetFLR(d->name()); - } + : context(opts, + static_cast( + default_policy), + async, std::move(device_mgr), rendezvous) {} - // Whether we should compute RunMetadata. - std::atomic should_store_metadata{false}; - tensorflow::mutex metadata_mu; - tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu); - const bool log_device_placement; - // EagerExecutor for async execution. - tensorflow::EagerExecutor executor; - - // True if running in asynchronous mode. - bool Async() const; - - // True if the default value for execution mode is async. Note that this value - // can be overridden per thread based on `thread_local_async` overrides. - const bool async_default; - mutable tensorflow::mutex async_map_mu; - std::unordered_map thread_local_async - GUARDED_BY(async_map_mu); + tensorflow::EagerContext context; }; struct TFE_TensorHandle : public tensorflow::core::RefCounted { diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 8ba560bef8..de10b10b7e 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -32,6 +32,28 @@ tf_cuda_library( ], ) +tf_cuda_library( + name = "context", + srcs = [ + "context.cc", + ], + hdrs = [ + "context.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + ":eager_executor", + ":kernel_and_device", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + ], +) + tf_cuda_library( name = "kernel_and_device", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc new file mode 100644 index 0000000000..5e8d083cd2 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -0,0 +1,142 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/context.h" + +namespace tensorflow { + +ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, ContextDevicePlacementPolicy original_policy) { + if (!soft_placement) { + return original_policy; + } + if (original_policy == DEVICE_PLACEMENT_EXPLICIT || + original_policy == DEVICE_PLACEMENT_SILENT_FOR_INT32) { + return DEVICE_PLACEMENT_SILENT; + } + return original_policy; +} + +EagerContext::EagerContext(const SessionOptions& opts, + ContextDevicePlacementPolicy default_policy, + bool async, std::unique_ptr device_mgr, + Rendezvous* rendezvous) + : soft_placement_(opts.config.allow_soft_placement()), + policy_(PlacementPolicy(soft_placement_, default_policy)), + device_manager_(std::move(device_mgr)), + devices_(device_manager_->ListDevices()), + rendezvous_(rendezvous), + pflr_(new ProcessFunctionLibraryRuntime(device_manager_.get(), opts.env, + TF_GRAPH_DEF_VERSION, + &func_lib_def_, {})), + log_device_placement_(opts.config.log_device_placement()), + async_default_(async) { + if (async_default_) { + executor_.EnableAsync(); + } + + for (auto* device : devices_) { + devices_map_[device->name()] = device; + } +} + +bool EagerContext::Async() const { + mutex_lock l(async_map_mu_); + return gtl::FindWithDefault(thread_local_async_, std::this_thread::get_id(), + async_default_); +} + +Status EagerContext::SetAsyncForThread(bool async) { + { + tensorflow::mutex_lock l(async_map_mu_); + thread_local_async_[std::this_thread::get_id()] = async; + } + if (async) { + executor_.EnableAsync(); + } else { + // TODO(agarwal): Currently we add a wait here to handle cases where a + // sync op has a control dependency on an async op, and the latter has not + // executed yet. This wait can be removed by storing all the control + // inputs and waiting for them when executing ops. + return executor_.WaitForAllPendingNodes(); + } + return Status::OK(); +} + +void EagerContext::ClearCaches() { + mutex_lock ml(cache_mu_); + gtl::STLDeleteValues(&kernel_cache_); +} + +void EagerContext::SetThreadLocalDevicePlacementPolicy( + ContextDevicePlacementPolicy policy) { + mutex_lock ml(policy_map_mu_); + thread_local_policies_[std::this_thread::get_id()] = policy; +} + +ContextDevicePlacementPolicy EagerContext::GetDevicePlacementPolicy() { + mutex_lock ml(policy_map_mu_); + auto policy_map_it = thread_local_policies_.find(std::this_thread::get_id()); + if (policy_map_it != thread_local_policies_.end()) { + return policy_map_it->second; + } + return policy_; +} + +EagerContext::~EagerContext() { + executor_.WaitForAllPendingNodes().IgnoreError(); + ClearCaches(); + rendezvous_->Unref(); +} + +bool EagerContext::FindFunctionByName(const string& name) { + mutex_lock l(functions_mu_); + return func_lib_def_.Find(name) != nullptr; +} + +Status EagerContext::FindDeviceByName(const string& name, Device** result) { + auto it = devices_map_.find(name); + if (it == devices_map_.end()) { + return errors::InvalidArgument(name, " unknown device."); + } + *result = it->second; + return Status::OK(); +} + +Status EagerContext::AddFunctionDef(const FunctionDef& fdef) { + mutex_lock l(functions_mu_); + return func_lib_def_.AddFunctionDef(fdef); +} + +KernelAndDevice* EagerContext::GetCachedKernel(Fprint128 cache_key) { + tf_shared_lock l(cache_mu_); + return gtl::FindPtrOrNull(kernel_cache_, cache_key); +} + +void EagerContext::AddKernelToCache(Fprint128 cache_key, + KernelAndDevice* kernel) { + mutex_lock ml(cache_mu_); + gtl::InsertOrUpdate(&kernel_cache_, cache_key, kernel); +} + +void EagerContext::SetShouldStoreMetadata(bool value) { + should_store_metadata_.store(value); + if (!value) { + mutex_lock ml(metadata_mu_); + run_metadata_.Clear(); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h new file mode 100644 index 0000000000..d525d44fe4 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/context.h @@ -0,0 +1,193 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +// Note: there's a copy enum in eager/c_api.h. It should be kept in sync. +enum ContextDevicePlacementPolicy { + // Running operations with input tensors on the wrong device will fail. When + // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. + DEVICE_PLACEMENT_EXPLICIT = 0, + // Copy the tensor to the right device but log a warning. + DEVICE_PLACEMENT_WARN = 1, + // Silently copy the tensor, which has a performance cost since the + // operation will be blocked till the copy completes. + DEVICE_PLACEMENT_SILENT = 2, + // Default placement policy which silently copies int32 tensors but not other + // dtypes. When soft placement is enabled acts like + // TFE_DEVICE_PLACEMENT_SILENT. + DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, +}; + +ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, ContextDevicePlacementPolicy original_policy); + +class EagerContext { + public: + explicit EagerContext(const SessionOptions& opts, + ContextDevicePlacementPolicy default_policy, bool async, + std::unique_ptr device_mgr, + Rendezvous* rendezvous); + + ~EagerContext(); + + // Returns the function library runtime for the given device. + FunctionLibraryRuntime* func_lib(Device* d) const { + return pflr_->GetFLR(d->name()); + } + + // True if running in asynchronous mode. + bool Async() const; + + EagerExecutor* Executor() { return &executor_; } + + // Sets whether this thread should run in synchronous or asynchronous mode. + Status SetAsyncForThread(bool async); + + // TODO(apassos) make this return a constant reference + gtl::FlatMap* device_map() { + return &devices_map_; + } + + // TODO(apassos) make this return a constant reference + std::vector* devices() { return &devices_; } + + // Clears the kernel caches. + void ClearCaches(); + + // Sets the device placement policy for the current thread. + void SetThreadLocalDevicePlacementPolicy(ContextDevicePlacementPolicy policy); + + // Returns the device placement policy for the current thread. + ContextDevicePlacementPolicy GetDevicePlacementPolicy(); + + Status AsyncWait() { return executor_.WaitForAllPendingNodes(); } + + Status GetStatus() { return executor_.status(); } + + void ClearAsyncError() { executor_.ClearError(); } + + bool FindFunctionByName(const string& name); + + Status FindDeviceByName(const string& name, Device** result); + + Device* HostCPU() { return devices_[0]; } + + bool SoftPlacement() { return soft_placement_; } + + uint64 NextId() { return executor_.NextId(); } + + void ExecutorAdd(EagerNode* node) { executor_.Add(node); } + + Status AddFunctionDef(const FunctionDef& fdef); + + KernelAndDevice* GetCachedKernel(Fprint128 cache_key); + + void AddKernelToCache(Fprint128 cache_key, KernelAndDevice* kernel); + + bool LogDevicePlacement() { return log_device_placement_; } + + Rendezvous* GetRendezvous() { return rendezvous_; } + + mutex* FunctionsMu() { return &functions_mu_; } + + tensorflow::DeviceMgr* device_mgr() { return device_manager_.get(); } + + // TODO(apassos) remove the need for this + void ReleaseDeviceMgr() { device_manager_.release(); } + + // TODO(apassos) clean up RunMetadata storage. + mutex* MetadataMu() { return &metadata_mu_; } + bool ShouldStoreMetadata() { return should_store_metadata_.load(); } + void SetShouldStoreMetadata(bool value); + RunMetadata* RunMetadataProto() { return &run_metadata_; } + + FunctionLibraryDefinition* FuncLibDef() { return &func_lib_def_; } + + private: + const bool soft_placement_; + const ContextDevicePlacementPolicy policy_; + + // Note: we cannot use C++11 thread_local here as there is no concept of a + // thread-local-object-local variable in C++11. + mutex policy_map_mu_; + std::unordered_map + thread_local_policies_ GUARDED_BY(policy_map_mu_); + + std::unique_ptr device_manager_; + // Devices owned by device_manager + std::vector devices_; + // All devices are not owned. + gtl::FlatMap devices_map_; + Rendezvous* const rendezvous_; + + mutex functions_mu_; + FunctionLibraryDefinition func_lib_def_ GUARDED_BY(functions_mu_){ + OpRegistry::Global(), {}}; + + // One FunctionLibraryRuntime per device. + // func_libs[i] is the FunctionLibraryRuntime corresponding to + // session->devices[i]. + const std::unique_ptr pflr_; + + mutex cache_mu_; + std::unordered_map kernel_cache_ + GUARDED_BY(cache_mu_); + + // Whether we should compute RunMetadata. + std::atomic should_store_metadata_{false}; + mutex metadata_mu_; + RunMetadata run_metadata_ GUARDED_BY(metadata_mu_); + const bool log_device_placement_; + // EagerExecutor for async execution. + EagerExecutor executor_; + + // True if the default value for execution mode is async. Note that this value + // can be overridden per thread based on `thread_local_async` overrides. + const bool async_default_; + mutable mutex async_map_mu_; + std::unordered_map thread_local_async_ + GUARDED_BY(async_map_mu_); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ -- GitLab From 73bd57d80111dc957d117b6ae98bc2354f766604 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 19:12:18 -0700 Subject: [PATCH 333/960] Add tensor quantization info to python wrapper PiperOrigin-RevId: 190005998 --- tensorflow/contrib/lite/python/interpreter.py | 2 ++ .../contrib/lite/python/interpreter_test.py | 4 ++++ .../interpreter_wrapper/interpreter_wrapper.cc | 17 +++++++++++++++++ .../interpreter_wrapper/interpreter_wrapper.h | 1 + 4 files changed, 24 insertions(+) diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index accdd04671..b8638007f7 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -71,6 +71,7 @@ class Interpreter(object): tensor_name = self._interpreter.TensorName(tensor_index) tensor_size = self._interpreter.TensorSize(tensor_index) tensor_type = self._interpreter.TensorType(tensor_index) + tensor_quantization = self._interpreter.TensorQuantization(tensor_index) if not tensor_name or not tensor_type: raise ValueError('Could not get tensor details') @@ -80,6 +81,7 @@ class Interpreter(object): 'index': tensor_index, 'shape': tensor_size, 'dtype': tensor_type, + 'quantization': tensor_quantization, } return details diff --git a/tensorflow/contrib/lite/python/interpreter_test.py b/tensorflow/contrib/lite/python/interpreter_test.py index e85390c56c..bf124410f3 100644 --- a/tensorflow/contrib/lite/python/interpreter_test.py +++ b/tensorflow/contrib/lite/python/interpreter_test.py @@ -39,12 +39,14 @@ class InterpreterTest(test_util.TensorFlowTestCase): self.assertEqual('input', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) self.assertTrue(([1, 4] == input_details[0]['shape']).all()) + self.assertEqual((0.0, 0), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('output', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) self.assertTrue(([1, 4] == output_details[0]['shape']).all()) + self.assertEqual((0.0, 0), output_details[0]['quantization']) test_input = np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np.float32) expected_output = np.array([[4.0, 3.0, 2.0, 1.0]], dtype=np.float32) @@ -67,12 +69,14 @@ class InterpreterTest(test_util.TensorFlowTestCase): self.assertEqual('input', input_details[0]['name']) self.assertEqual(np.uint8, input_details[0]['dtype']) self.assertTrue(([1, 4] == input_details[0]['shape']).all()) + self.assertEqual((1.0, 0), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('output', output_details[0]['name']) self.assertEqual(np.uint8, output_details[0]['dtype']) self.assertTrue(([1, 4] == output_details[0]['shape']).all()) + self.assertEqual((1.0, 0), output_details[0]['quantization']) test_input = np.array([[1, 2, 3, 4]], dtype=np.uint8) expected_output = np.array([[4, 3, 2, 1]], dtype=np.uint8) diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index 14e1190c80..35ad226b78 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -109,6 +109,13 @@ PyObject* PyArrayFromIntVector(const int* data, npy_intp size) { return PyArray_SimpleNewFromData(1, &size, NPY_INT32, pydata); } +PyObject* PyTupleFromQuantizationParam(const TfLiteQuantizationParams& param) { + PyObject* result = PyTuple_New(2); + PyTuple_SET_ITEM(result, 0, PyFloat_FromDouble(param.scale)); + PyTuple_SET_ITEM(result, 1, PyInt_FromLong(param.zero_point)); + return result; +} + } // namespace InterpreterWrapper::InterpreterWrapper( @@ -214,6 +221,16 @@ PyObject* InterpreterWrapper::TensorSize(int i) const { return PyArray_Return(reinterpret_cast(np_array)); } +PyObject* InterpreterWrapper::TensorQuantization(int i) const { + if (!interpreter_ || i >= interpreter_->tensors_size() || i < 0) { + Py_INCREF(Py_None); + return Py_None; + } + + const TfLiteTensor* tensor = interpreter_->tensor(i); + return PyTupleFromQuantizationParam(tensor->params); +} + bool InterpreterWrapper::SetTensor(int i, PyObject* value) { if (!interpreter_) { LOG(ERROR) << "Invalid interpreter."; diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index 63bdb30f79..0972c57259 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -54,6 +54,7 @@ class InterpreterWrapper { std::string TensorName(int i) const; PyObject* TensorType(int i) const; PyObject* TensorSize(int i) const; + PyObject* TensorQuantization(int i) const; bool SetTensor(int i, PyObject* value); PyObject* GetTensor(int i) const; -- GitLab From 212a42a01d7b30fec1d6f8ca34dbf9c095938d4a Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Wed, 21 Mar 2018 22:11:10 -0700 Subject: [PATCH 334/960] Simplified the experimental APIs related to TPU execution, by moving the graph rewrite functionality out of it. PiperOrigin-RevId: 190016936 --- tensorflow/c/BUILD | 1 + tensorflow/c/c_api_experimental.cc | 432 ++--------------------------- tensorflow/c/c_api_experimental.h | 47 ++-- 3 files changed, 39 insertions(+), 441 deletions(-) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index c178d7f81f..4332f44e5d 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -116,6 +116,7 @@ tf_cuda_library( ":c_api", ":c_api_internal", "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags", + "//tensorflow/contrib/tpu:all_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 34b9dec3ee..29caf508e7 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -22,389 +22,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/protobuf/config.pb.h" -using tensorflow::Node; -using tensorflow::NodeBuilder; -using tensorflow::NodeDef; using tensorflow::Status; -using tensorflow::string; - -namespace { - -const char* const DEVICE_TPU_REPLICATED_CORE = "TPU_REPLICATED_CORE"; -const char* const DEVICE_TPU_SYSTEM = "TPU_SYSTEM"; - -TF_Operation* ToTF_Operation(Node* node) { - return static_cast(static_cast(node)); -} - -// Graph rewrite algorithm (modeled after the python TPU graph rewrite path): -// -// 1. For each input node I, with C being the consumer node of I's output: -// -// a) When infeed is not specified, feed I to a new TPUReplicatedInput node -// (both running on CPU), which in turn feeds a new Identity node N, and N feeds -// C (both running on TPU). -// -// b) Otherwise, feed I to a new InfeedEnqueueTuple node IE, both running on -// CPU. Also set an InfeedDequeueTuple node ID to feed C, both running on -// TPU. -// -// In case b), if we have multiple input nodes, they all feed into the same -// InfeedEnqueueTuple node, so that the graph has a single pair of infeed -// enqueue and dequeue nodes. The list of output tensors from the dequeue node -// can go to different consumer nodes. For example, say the original graph has -// input nodes I1 and I2 respectively feeding nodes C1 and C2. After the rewrite -// with infeed ops, we will have: I1 and I2 feed a single infeed enqueue node -// IE, and a corresponding infeed dequeue node ID produces a list of two -// tensors, respectively feeding C1 and C2. -// -// 2. Rewrite all existing graph nodes by adding an attribute on TPU -// cluster. For each node C reading some input node I, rewire it to read from a -// new input node generated in step #1 above. -// -// 3. For each output node O, feed it to a new Identity node, which in turn -// feeds a new TPUReplicatedOutput node, which in turn feeds a new Identity node -// M. Return the set of new output nodes (the "M" nodes) for caller to fetch -// from. -// -// Limitations compared to the python TPU rewrite path: -// - # replicas is always 1. -// - Less error checking. -// -// TODO(hongm): Simplify the graph rewrite to generating fewer TPUReplicate -// related nodes. -class GraphRewriter { - public: - GraphRewriter(TF_Graph* graph, int num_input_nodes, - const TF_Output* input_nodes, int num_output_nodes, - const TF_Output* output_nodes) - EXCLUSIVE_LOCKS_REQUIRED(graph->mu) - : graph_(graph), input_nodes_(input_nodes) { - for (int i = 0; i < num_input_nodes; ++i) { - // Will fill in the value part later when we create the associated new - // input node. - input_node_map_[input_nodes[i].oper->node.name()] = - NodeBuilder::NodeOut(nullptr, -1); - } - - // Grab all existing nodes for the upcoming rewrite, before mutating the - // graph. - for (Node* n : graph->graph.nodes()) { - nodes_to_rewrite_.push_back(n); - } - - for (int i = 0; i < num_output_nodes; ++i) { - output_node_map_.emplace(output_nodes[i].oper->node.name(), - PortIndexPair{output_nodes[i].index, i}); - } - } - - // On success, sets `config_op` and `shutdown_op` to the corresponding - // "ConfigureDistributedTPU" and "ShutdownDistributedTPU" nodes added to the - // graph. - tensorflow::Status Rewrite(TF_Output* new_output_nodes, - TF_Operation** infeed_enqueue_node, - TF_Output* config_op, TF_Output* shutdown_op) - EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { - TF_RETURN_IF_ERROR(ProcessInputNodes(infeed_enqueue_node)); - - return RewriteGraphAndAddOutputNodes(new_output_nodes, config_op, - shutdown_op); - } - - private: - // Synthesizes new graph nodes (infeed enqueue or TPU replicated input - // nodes) for the input nodes, and creates a replicated metadata node. - // - // When `infeed_enqueue_node` is non-NULL and there are some input nodes, - // also adds the infeed dequeue node. - tensorflow::Status ProcessInputNodes(TF_Operation** infeed_enqueue_node) - EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { - Node* metadata_node; - TF_RETURN_IF_ERROR( - NodeBuilder(metadata_node_name_.c_str(), "TPUReplicateMetadata") - .Attr("num_replicas", 1) - .Attr("_tpu_replicate", cluster_name_.c_str()) - .Finalize(&graph_->graph, &metadata_node)); - - Node* dequeue_node = nullptr; - // Be deterministic in the corner case where `use_infeed` below is false. - if (infeed_enqueue_node) *infeed_enqueue_node = nullptr; - const bool use_infeed = - infeed_enqueue_node != nullptr && !input_node_map_.empty(); - if (use_infeed) { - std::vector new_input_list; - new_input_list.reserve(input_node_map_.size()); - std::vector input_dtypes; - input_dtypes.reserve(input_node_map_.size()); - std::vector input_shapes; - input_shapes.reserve(input_node_map_.size()); - for (int i = 0; i < input_node_map_.size(); ++i) { - Node& input_node = input_nodes_[i].oper->node; - new_input_list.push_back( - NodeBuilder::NodeOut(&input_node, input_nodes_[i].index)); - input_dtypes.push_back(input_node.output_type(input_nodes_[i].index)); - tensorflow::TensorShapeProto shape; - TF_RETURN_IF_ERROR( - tensorflow::GetNodeAttr(input_node.attrs(), "shape", &shape)); - VLOG(1) << "Input node " << i << " has shape " << shape.DebugString(); - input_shapes.push_back(shape); - } - // Enqueue always runs on CPU. - Node* enqueue_node; - TF_RETURN_IF_ERROR(NodeBuilder("InfeedEnqueueTuple", "InfeedEnqueueTuple") - .Input(new_input_list) - .Device("/device:CPU:0") - .Attr("device_ordinal", 0) - .Attr("dtypes", input_dtypes) - .Attr("shapes", input_shapes) - .Finalize(&graph_->graph, &enqueue_node)); - *infeed_enqueue_node = ToTF_Operation(enqueue_node); - // The dequeue node should be put onto the "_tpu_replicate" cluster. - TF_RETURN_IF_ERROR( - NodeBuilder("TPUReplicate/InfeedDequeueTuple", "InfeedDequeueTuple") - .ControlInput(metadata_node) - .Attr("_tpu_replicate", cluster_name_.c_str()) - .Attr("dtypes", input_dtypes) - .Attr("shapes", input_shapes) - .Finalize(&graph_->graph, &dequeue_node)); - } - - for (int i = 0; i < input_node_map_.size(); ++i) { - VLOG(1) << "Handling input node " << input_nodes_[i].oper->node.name(); - if (use_infeed) { - DCHECK(dequeue_node); - input_node_map_[input_nodes_[i].oper->node.name()] = - NodeBuilder::NodeOut(dequeue_node, i); - } else { - Node* replicated_input_node; - { - std::string replicated_input_name("TPUReplicate/input" + - std::to_string(i)); - NodeBuilder::NodeOut input(&input_nodes_[i].oper->node, - input_nodes_[i].index); - std::vector input_list; - input_list.push_back(input); - TF_RETURN_IF_ERROR( - NodeBuilder(replicated_input_name.c_str(), "TPUReplicatedInput") - // This op requires an input list. - .Input(input_list) - .Finalize(&graph_->graph, &replicated_input_node)); - } - - { - Node* new_input_node; - const std::string new_input_name("TPUReplicate/replicated_input_" + - std::to_string(i)); - TF_RETURN_IF_ERROR(NodeBuilder(new_input_name.c_str(), "Identity") - .Input(replicated_input_node, 0) - .ControlInput(metadata_node) - .Attr("_tpu_replicate", cluster_name_.c_str()) - .Finalize(&graph_->graph, &new_input_node)); - DCHECK_GT(input_node_map_.count(input_nodes_[i].oper->node.name()), - 0); - input_node_map_[input_nodes_[i].oper->node.name()] = - NodeBuilder::NodeOut(new_input_node, 0); - } - } - } - return Status::OK(); - } - - // On success, sets `config_op` and `shutdown_op` to the corresponding - // "ConfigureDistributedTPU" and "ShutdownDistributedTPU" nodes added to the - // graph. - tensorflow::Status RewriteGraphAndAddOutputNodes(TF_Output* new_output_nodes, - TF_Output* config_op, - TF_Output* shutdown_op) - EXCLUSIVE_LOCKS_REQUIRED(graph_->mu) { - tensorflow::Status s; - // For each non-input node in the input graph, place the node in a "TPU - // replicate cluster" via an attribute, and with the above metadata node - // as a control dependency. - // - // Although we have handled the input nodes in ProcessInputNodes(), some - // of those nodes may also serve as output nodes, which we will handle - // below. - for (Node* n : nodes_to_rewrite_) { - if (n->IsSource()) continue; - VLOG(1) << "Rewriting node " << n->name(); - - if (n->IsSink()) { - // TODO(hongm): Rewire SINK to be control dependent on the new input - // nodes created above? - continue; - } - - const NodeDef& old_def = n->def(); - // Let node C be the consumer of `n`'s output in the original graph. - // This new node will feed into C in the rewritten graph. - NodeBuilder::NodeOut new_node; - if (input_node_map_.count(n->name())) { - new_node = input_node_map_[n->name()]; - } else { - // This node is to replace `n` in the graph. - NodeDef new_def = n->def(); - const std::string new_node_name = "TPUReplicate/" + n->name(); - new_def.set_name(new_node_name); - new_def.clear_input(); - for (int i = 0; i < old_def.input_size(); ++i) { - const string old_input_name = old_def.input(i); - // When there are multiple input nodes that get mapped to the same - // infeed dequeue node, use different output ports of the dequeue - // node. e.g. Say in the original graph, input I1 feeds C1, and I2 - // feeds C2. After the rewrite, I1 and I2 both feed a new infeed - // enqueue node, and the corresponding dequeue node has its output - // port 0 feeding C1, and output port 1 feeding C2. Note C1 and C2 - // could be the same node (e.g. an Add that takes 2 inputs). - const string new_input_name = - input_node_map_.count(old_input_name) > 0 - ? tensorflow::strings::StrCat( - input_node_map_[old_input_name].node->name(), ":", - input_node_map_[old_input_name].index) - : "TPUReplicate/" + old_input_name; - new_def.add_input(new_input_name); - } - if (old_def.input_size() == 0) { - // It is sufficient to only set control dependency of nodes without - // input. Other nodes with input(s) with inherit such control - // dependency. - // e.g. say the graph computes add(x, y). Once we make nodes x and y - // control-dependent on the metadata node, node add will inherit - // such control dependency indirectly. - new_def.add_input( - tensorflow::strings::StrCat("^", metadata_node_name_.c_str())); - } - tensorflow::AddNodeAttr("_tpu_replicate", cluster_name_.c_str(), - &new_def); - new_node = NodeBuilder::NodeOut(graph_->graph.AddNode(new_def, &s), 0); - if (!s.ok()) { - return s; - } - VLOG(1) << "The rewritten node node is " - << new_node.node->DebugString(); - } - - if (output_node_map_.count(n->name()) > 0) { - VLOG(1) << "Handling output node " << n->name(); - auto range_it = output_node_map_.equal_range(n->name()); - for (auto it = range_it.first; it != range_it.second; ++it) { - const PortIndexPair& pair = it->second; - Node* out_identity_node; - { - // If this output node is also an input, use the input_node_map_'s - // stored port, which would also work for an infeed dequeue op. - // Otherwise use pair.port. - // An example of the former: Say the graph has input nodes I1 and - // I2, and the output nodes are also I1 and I2. In the rewritten - // graph with infeed, the 2 output nodes will both come from a - // single infeed dequeue node ID, with output ports respectively - // set to 0 and 1. - const int output_port = - input_node_map_.count(n->name()) ? new_node.index : pair.port; - VLOG(1) << "Handling its output port " << output_port - << " at output index " << pair.index; - std::string output_node_name = "TPUReplicate/Identity"; - if (pair.index > 0) { - output_node_name += "_" + std::to_string(pair.index); - } - TF_RETURN_IF_ERROR( - NodeBuilder(output_node_name.c_str(), "Identity") - .Input(new_node.node, output_port) - .Device(!old_def.device().empty() - ? old_def.device() - : tensorflow::strings::StrCat( - "/device:", DEVICE_TPU_REPLICATED_CORE)) - .Attr("_tpu_replicate", cluster_name_.c_str()) - .Finalize(&graph_->graph, &out_identity_node)); - VLOG(1) << "out_identity_node: " - << out_identity_node->DebugString(); - } - - Node* replicated_output_node; - { - const std::string replicated_output_node_name = - "TPUReplicate/output" + std::to_string(pair.index); - TF_RETURN_IF_ERROR( - NodeBuilder(replicated_output_node_name.c_str(), - "TPUReplicatedOutput") - .Input(out_identity_node, 0) - .Attr("num_replicas", 1) - .Finalize(&graph_->graph, &replicated_output_node)); - VLOG(1) << "replicated_output_node: " - << replicated_output_node->DebugString(); - } - - Node* final_output_node; - const std::string final_output_node_name = - "TPUReplicate/output_" + std::to_string(pair.index) + "_shard_" + - std::to_string(0); - TF_RETURN_IF_ERROR( - NodeBuilder(final_output_node_name.c_str(), "Identity") - .Input(replicated_output_node, 0) - .Finalize(&graph_->graph, &final_output_node)); - VLOG(1) << "new_output_node: " << final_output_node->DebugString(); - auto oper = ToTF_Operation(final_output_node); - new_output_nodes[pair.index] = {oper, 0}; - } - } - - if (input_node_map_.count(n->name()) == 0) { - graph_->graph.RemoveNode(n); - } - } - - { - Node* config_node; - TF_RETURN_IF_ERROR( - NodeBuilder("ConfigureDistributedTPU", "ConfigureDistributedTPU") - .Device(DEVICE_TPU_SYSTEM) - .Finalize(&graph_->graph, &config_node)); - *config_op = {ToTF_Operation(config_node), 0}; - } - - { - Node* shutdown_node; - TF_RETURN_IF_ERROR( - NodeBuilder("ShutdownDistributedTPU", "ShutdownDistributedTPU") - .Device(DEVICE_TPU_SYSTEM) - .Finalize(&graph_->graph, &shutdown_node)); - *shutdown_op = {ToTF_Operation(shutdown_node), 0}; - } - - return Status::OK(); - } - - TF_Graph* const graph_; - - const TF_Output* const input_nodes_; - - const std::string cluster_name_ = "TPUReplicate/cluster"; - const std::string metadata_node_name_ = "TPUReplicate/TPUReplicateMetadata"; - - // Keep mappings from the current input nodes to newly created input nodes, - // which we will use to rewrite existing nodes that read these - // inputs. e.g. A node that reads input node PlaceHolder could be rewired to - // read the created TPUReplicate/replicated_input_0 node or some output port - // of the created TPUReplicate/InfeedDequeueTuple node. Because of the latter - // case, we the map entries store NodeBuilder::NodeOut, and not just Node*. - std::unordered_map input_node_map_; - - std::vector nodes_to_rewrite_; - - // Map from name to set{(output port, output tensor idx)}. - // e.g. Say there are 3 output tensors, respectively produced by (node 0, - // port 0), (node 0, port 1), (node 1, port 0). Then the mapping entries - // are: node 0 -> {(port 0, idx 0), (port 1, idx 1)} node 1 -> {(port 0, idx - // 2)} Based on these mappings, we will generate 3 new output nodes. - struct PortIndexPair { - int port; - int index; - }; - std::multimap output_node_map_; -}; - -} // namespace void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) { tensorflow::ConfigProto& config = options->options.config; @@ -425,62 +43,54 @@ void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) { } } -TF_Output TF_SetupTPUExecution(TF_Session* session, int num_input_nodes, - const TF_Output* input_nodes, - int num_output_nodes, - const TF_Output* output_nodes, - TF_Output* new_output_nodes, - TF_Operation** infeed_enqueue_node, - TF_Status* status) { - TF_Output config_op, shutdown_op; - { - auto graph = session->graph; - tensorflow::mutex_lock c(graph->mu); - - VLOG(1) << "Graph before TPU rewrite: " - << graph->graph.ToGraphDefDebug().DebugString(); - GraphRewriter rewriter(graph, num_input_nodes, input_nodes, - num_output_nodes, output_nodes); - status->status = rewriter.Rewrite(new_output_nodes, infeed_enqueue_node, - &config_op, &shutdown_op); - if (!status->status.ok()) { - return shutdown_op; - } - VLOG(1) << "Graph after TPU rewrite: " - << graph->graph.ToGraphDefDebug().DebugString(); +void TF_InitializeTPU(TF_Session* session, TF_Status* status) { + VLOG(1) << "Initializing TPU"; + TF_Operation* config_op = + TF_GraphOperationByName(session->graph, "ConfigureDistributedTPU"); + if (config_op == nullptr) { + status->status = tensorflow::errors::Internal( + "Unable to find node ConfigureDistributedTPU in the TF graph."); + return; } - VLOG(1) << "Initializing TPU"; + TF_Output config_node{config_op, 0}; + TF_Tensor* dummy_output; TF_SessionRun(session, /*run_options*/ nullptr, // input related parameters /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0, // output related parameters - /*outputs*/ &config_op, /*output_values*/ &dummy_output, + /*outputs*/ &config_node, /*output_values*/ &dummy_output, /*noutputs*/ 1, /*targets*/ nullptr, /*ntargets*/ 0, /*run_metadata*/ nullptr, status); if (status->status.ok()) { TF_DeleteTensor(dummy_output); } - return shutdown_op; } -void TF_ShutdownTPUExecution(TF_Session* session, TF_Output shutdown_node, - TF_Status* status) { +void TF_ShutdownTPU(TF_Session* session, TF_Status* status) { { tensorflow::mutex_lock c(session->graph->mu); VLOG(1) << "Shutting down TPU, with input graph: " << session->graph->graph.ToGraphDefDebug().DebugString(); } + TF_Operation* shutdown_op = + TF_GraphOperationByName(session->graph, "ShutdownDistributedTPU"); + if (shutdown_op == nullptr) { + status->status = tensorflow::errors::Internal( + "Unable to find node ShutdownDistributedTPU in the TF graph."); + return; + } + TF_SessionRun(session, /*run_options*/ nullptr, // input related parameters /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0, // output related parameters /*outputs*/ nullptr, /*output_values*/ nullptr, /*noutputs*/ 0, - /*targets*/ &shutdown_node.oper, /*ntargets*/ 1, + /*targets*/ &shutdown_op, /*ntargets*/ 1, /*run_metadata*/ nullptr, status); } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index b95cdfe6aa..f069398bbb 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -60,39 +60,26 @@ extern "C" { TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable); -// Sets up TPU execution, by rewriting the graph accordingly, and initializing -// TPU system. +// Initializes TPU system. Must be called exactly once before TF_SessionRun() is +// called on a TPU graph. // -// When `infeed_enqueue_node` is non-NULL and there are input tensors, rewrites -// the graph by adding the relevant infeed enqueue/dequeue ops, and returns the -// enqueue op in `infeed_enqueue_node` on success, so that user can run that -// node and feed input tensors. When there are no input tensors, -// `infeed_enqueue_node` is ignored, and user should not run that node later. -// TODO(hongm): In this case, we currently only support input tensors of dim 0 -// shape. Lift that constraint. -// -// On success, also returns a shutdown node to be used in a subsequent -// TF_ShutdownTPUExecution(), and sets the new output nodes in -// `new_output_nodes` for caller to fetch from. Must be called exactly once -// before TF_SessionRun(). -// -// The API and logic is modeled after the python counterparts -// tpu.{initialize_system(), rewrite(), shutdown_system()}. -// -// TODO(b/74774824): Create separate APIs for initializing TPU system and graph -// rewrite. -TF_CAPI_EXPORT extern TF_Output TF_SetupTPUExecution( - TF_Session* session, int num_input_nodes, const TF_Output* input_nodes, - int num_output_nodes, const TF_Output* output_nodes, - TF_Output* new_output_nodes, TF_Operation** infeed_enqueue_node, - TF_Status* status); - -// Shuts down TPU system. For any `session` where TF_SetupTPUExecution() has +// The session graph must contain a node named ConfigureDistributedTPU. +// TODO(b/74774824): Improve the API on initializing TPU system. +TF_CAPI_EXPORT extern void TF_InitializeTPU(TF_Session* session, + TF_Status* status); + +// Shuts down TPU system. For any `session` where TF_InitializeTPU() has // been successfully called, this call must be made exactly once before the // session is closed. -TF_CAPI_EXPORT extern void TF_ShutdownTPUExecution(TF_Session* session, - TF_Output shutdown_node, - TF_Status* status); +// The session graph must contain a node named ShutdownDistributedTPU. +TF_CAPI_EXPORT extern void TF_ShutdownTPU(TF_Session* session, + TF_Status* status); + +// Returns the graph content in a human-readable format, with length set in +// `len`. The format is subject to change in the future. +// The returned string is heap-allocated, and caller should call free() on it. +TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, + size_t* len); // Returns the graph content in a human-readable format, with length set in // `len`. The format is subject to change in the future. -- GitLab From 0e1775355f9d7fe5301bc0d17906453caf970e27 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Wed, 21 Mar 2018 23:04:59 -0700 Subject: [PATCH 335/960] Merge changes from github. PiperOrigin-RevId: 190020572 --- .../data/python/kernel_tests/resample_test.py | 40 +++++++++++++++++++ .../contrib/data/python/ops/resampling.py | 9 +++-- .../core/kernels/segment_reduction_ops.h | 8 ---- .../docs_src/tutorials/kernel_methods.md | 4 +- .../docs_src/tutorials/recurrent_quickdraw.md | 8 ++-- 5 files changed, 51 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index 913ab9b9f8..5f47dcb339 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -21,7 +21,10 @@ import numpy as np from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import string_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -68,6 +71,43 @@ class ResampleTest(test.TestCase): returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) + def testRandomClasses(self): + init_dist = [0.25, 0.25, 0.25, 0.25] + target_dist = [0.0, 0.0, 0.0, 1.0] + num_classes = len(init_dist) + # We don't need many samples to test a dirac-delta target distribution + num_samples = 100 + data_np = np.random.choice(num_classes, num_samples, p=init_dist) + + dataset = dataset_ops.Dataset.from_tensor_slices(data_np) + + # Apply a random mapping that preserves the data distribution. + def _remap_fn(_): + return math_ops.cast(random_ops.random_uniform([1]) * num_classes, + dtypes.int32)[0] + dataset = dataset.map(_remap_fn) + + # Reshape distribution. + dataset = dataset.apply( + resampling.rejection_resample( + class_func=lambda x: x, + target_dist=target_dist, + initial_dist=init_dist)) + + get_next = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + returned = [] + with self.assertRaises(errors.OutOfRangeError): + while True: + returned.append(sess.run(get_next)) + + classes, _ = zip(*returned) + bincount = np.bincount( + np.array(classes), + minlength=num_classes).astype(np.float32) / len(classes) + + self.assertAllClose(target_dist, bincount, atol=1e-2) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index f4015f19fb..a182dddd38 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -101,11 +101,12 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): initial_dist_ds)) .map(maybe_warn_on_large_rejection)) - current_probabilities_ds = dataset_ops.Dataset.zip( - (acceptance_dist_ds, class_values_ds)).map(array_ops.gather) + def _gather_and_copy(class_val, acceptance_prob, data): + return (class_val, array_ops.gather(acceptance_prob, class_val), data) + current_probabilities_and_class_and_data_ds = dataset_ops.Dataset.zip( + (class_values_ds, acceptance_dist_ds, dataset)).map(_gather_and_copy) filtered_ds = ( - dataset_ops.Dataset.zip((class_values_ds, current_probabilities_ds, - dataset)) + current_probabilities_and_class_and_data_ds .filter(lambda _1, p, _2: random_ops.random_uniform([], seed=seed) < p)) return filtered_ds.map(lambda class_value, _, data: (class_value, data)) diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index d65692a552..4abfbfb1a6 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -16,14 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ - -// This file requires the following include because it uses CudaAtomicMax: -// #include "tensorflow/core/util/cuda_kernel_helper.h" - -// Unfortunately we can't add the #include, since it breaks compilation for -// non-GPU targets. This only breaks in clang, because it's more strict for -// template code and CudaAtomicMax is used in template context. - // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" diff --git a/tensorflow/docs_src/tutorials/kernel_methods.md b/tensorflow/docs_src/tutorials/kernel_methods.md index b1f06ce0a3..73e5c51057 100644 --- a/tensorflow/docs_src/tutorials/kernel_methods.md +++ b/tensorflow/docs_src/tutorials/kernel_methods.md @@ -1,7 +1,7 @@ # Improving Linear Models Using Explicit Kernel Methods Note: This document uses a deprecated version of @{tf.estimator}, -which has a different interface (see `tf.contrib.learn Estimator`). +which has a @{tf.contrib.learn.Estimator$different interface}. It also uses other `contrib` methods whose @{$version_compat#not_covered$API may not be stable}. @@ -53,7 +53,7 @@ In order to feed data to a `tf.contrib.learn Estimator`, it is helpful to conver it to Tensors. For this, we will use an `input function` which adds Ops to the TensorFlow graph that, when executed, create mini-batches of Tensors to be used downstream. For more background on input functions, check -@{$get_started/premade_estimators#input_fn$this section on input functions}. +@{$get_started/premade_estimators#create_input_functions$this section on input functions}. In this example, we will use the `tf.train.shuffle_batch` Op which, besides converting numpy arrays to Tensors, allows us to specify the batch_size and whether to randomize the input every time the input_fn Ops are executed diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index 7584a76ba5..5d83fbe2a3 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -38,8 +38,8 @@ To try the code for this tutorial: 1. [Download the data](#download-the-data) in `TFRecord` format from [here](http://download.tensorflow.org/data/quickdraw_tutorial_dataset_v1.tar.gz) and unzip it. More details about [how to obtain the original Quick, Draw! - data](#optional-download-the-full-quick-draw-data) and [how to convert that - to `TFRecord` files](#optional-converting-the-data) is available below. + data](#optional_download_the_full_quick_draw_data) and [how to convert that + to `TFRecord` files](#optional_converting_the_data) is available below. 1. Execute the tutorial code with the following command to train the RNN-based model described in this tutorial. Make sure to adjust the paths to point to @@ -108,7 +108,7 @@ This download will take a while and download a bit more than 23GB of data. ### Optional: Converting the data To convert the `ndjson` files to -@{$python/python_io#tfrecords_format_details$TFRecord} files containing +@{$python/python_io#TFRecords_Format_Details$TFRecord} files containing [`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) protos run the following command. @@ -118,7 +118,7 @@ protos run the following command. ``` This will store the data in 10 shards of -@{$python/python_io#tfrecords_format_details$TFRecord} files with 10000 items +@{$python/python_io#TFRecords_Format_Details$TFRecord} files with 10000 items per class for the training data and 1000 items per class as eval data. This conversion process is described in more detail in the following. -- GitLab From f83711104b64a108ac43213c92f13827343d09ef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 21 Mar 2018 23:11:40 -0700 Subject: [PATCH 336/960] Automated g4 rollback of changelist 190001737 PiperOrigin-RevId: 190021164 --- tensorflow/c/eager/BUILD | 2 - tensorflow/c/eager/c_api.cc | 179 ++++++++++------ tensorflow/c/eager/c_api_internal.h | 84 +++++++- tensorflow/core/common_runtime/eager/BUILD | 22 -- .../core/common_runtime/eager/context.cc | 142 ------------- .../core/common_runtime/eager/context.h | 193 ------------------ 6 files changed, 194 insertions(+), 428 deletions(-) delete mode 100644 tensorflow/core/common_runtime/eager/context.cc delete mode 100644 tensorflow/core/common_runtime/eager/context.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index bea5a121b3..841ff48a38 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -28,7 +28,6 @@ tf_cuda_library( "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", "//tensorflow/core:core_cpu", - "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core:core_cpu_internal", @@ -65,7 +64,6 @@ tf_cuda_library( "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", ], diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 5d668848ab..a23015c99e 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -71,6 +71,18 @@ std::atomic_int_fast64_t func_id_generator(0); } // namespace +TFE_ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy) { + if (!soft_placement) { + return original_policy; + } + if (original_policy == TFE_DEVICE_PLACEMENT_EXPLICIT || + original_policy == TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) { + return TFE_DEVICE_PLACEMENT_SILENT; + } + return original_policy; +} + extern "C" { TFE_ContextOptions* TFE_NewContextOptions() { return new TFE_ContextOptions; } @@ -92,7 +104,19 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, unsigned char async, TF_Status* status) { - status->status = ctx->context.SetAsyncForThread(async); + { + tensorflow::mutex_lock l(ctx->async_map_mu); + ctx->thread_local_async[std::this_thread::get_id()] = async; + } + if (async) { + ctx->executor.EnableAsync(); + } else { + // TODO(agarwal): Currently we add a wait here to handle cases where a sync + // op has a control dependency on an async op, and the latter has not + // executed yet. This wait can be removed by storing all the control inputs + // and waiting for them when executing ops. + status->status = ctx->executor.WaitForAllPendingNodes(); + } } void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } @@ -109,26 +133,34 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { new tensorflow::DeviceMgr(devices)); tensorflow::Rendezvous* r = new tensorflow::IntraProcessRendezvous(device_mgr.get()); - return new TFE_Context(opts->session_options.options, opts->policy, - opts->async, std::move(device_mgr), r); + return new TFE_Context(*opts, std::move(device_mgr), r); } void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { + status->status = ctx->executor.WaitForAllPendingNodes(); + { + tensorflow::mutex_lock ml(ctx->cache_mu); + tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); + } + ctx->rendezvous->Unref(); delete ctx; } TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { TF_DeviceList* list = new TF_DeviceList; - ctx->context.device_mgr()->ListDeviceAttributes(&list->response); + ctx->device_manager->ListDeviceAttributes(&list->response); return list; } -void TFE_ContextClearCaches(TFE_Context* ctx) { ctx->context.ClearCaches(); } +void TFE_ContextClearCaches(TFE_Context* ctx) { + tensorflow::mutex_lock ml(ctx->cache_mu); + tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); +} void TFE_ContextSetThreadLocalDevicePlacementPolicy( TFE_Context* ctx, TFE_ContextDevicePlacementPolicy policy) { - ctx->context.SetThreadLocalDevicePlacementPolicy( - static_cast(policy)); + tensorflow::mutex_lock ml(ctx->policy_map_mu); + ctx->thread_local_policies[std::this_thread::get_id()] = policy; } // Note: this function looks up a thread local policy. So it should be called in @@ -136,20 +168,25 @@ void TFE_ContextSetThreadLocalDevicePlacementPolicy( // safe to call this function from the async EagerExecutor threads. extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( TFE_Context* ctx) { - return static_cast( - ctx->context.GetDevicePlacementPolicy()); + tensorflow::mutex_lock ml(ctx->policy_map_mu); + auto policy_map_it = + ctx->thread_local_policies.find(std::this_thread::get_id()); + if (policy_map_it != ctx->thread_local_policies.end()) { + return policy_map_it->second; + } + return ctx->policy; } void TFE_ContextAsyncWait(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->context.AsyncWait(); + status->status = ctx->executor.WaitForAllPendingNodes(); } void TFE_ContextGetStatus(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->context.GetStatus(); + status->status = ctx->executor.status(); } void TFE_ContextAsyncClearError(TFE_Context* ctx) { - ctx->context.ClearAsyncError(); + ctx->executor.ClearError(); } TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { @@ -222,7 +259,7 @@ tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, // nullptr. tensorflow::Device* src_opd = nullptr; TF_RETURN_IF_ERROR(h->TensorAndDevice(&src, &srcd, &src_opd)); - if (srcd == nullptr) srcd = ctx->context.HostCPU(); + if (srcd == nullptr) srcd = ctx->devices[0]; bool is_same_device = (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd)); const bool dst_cpu = IsCPU(dstd); @@ -295,7 +332,8 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, status->status = tensorflow::AttrTypeMapForOp(name, &types); if (status->status.ok()) return new TFE_Op(ctx, name, types); if (TF_GetCode(status) == TF_NOT_FOUND) { - if (ctx->context.FindFunctionByName(name)) { + tensorflow::mutex_lock l(ctx->functions_mu); + if (ctx->func_lib_def.Find(name) != nullptr) { status->status = tensorflow::Status::OK(); return new TFE_Op(ctx, name, nullptr); } @@ -308,14 +346,20 @@ void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { tensorflow::Device* d = nullptr; if (device_name != nullptr && strlen(device_name) > 0) { - status->status = op->ctx->context.FindDeviceByName(device_name, &d); + auto it = op->ctx->devices_map.find(device_name); + if (it == op->ctx->devices_map.end()) { + status->status = + tensorflow::errors::InvalidArgument(device_name, " unknown device."); + return; + } + d = it->second; } op->device = d; } const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { tensorflow::Device* device = - (op->device == nullptr) ? op->ctx->context.HostCPU() : op->device; + (op->device == nullptr) ? op->ctx->devices[0] : op->device; return device->name().c_str(); } @@ -590,7 +634,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, TFE_Context* ctx, TF_Status* status) { tensorflow::DeviceSet ds; - for (tensorflow::Device* d : *ctx->context.devices()) { + for (tensorflow::Device* d : ctx->devices) { ds.AddDevice(d); } tensorflow::DeviceTypeVector final_devices; @@ -604,7 +648,7 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, "Could not find valid device for node ", ndef.DebugString()); return nullptr; } - for (tensorflow::Device* d : *ctx->context.devices()) { + for (tensorflow::Device* d : ctx->devices) { if (d->device_type() == final_devices[0].type_string()) { return d; } @@ -619,8 +663,9 @@ tensorflow::Status Execute( const tensorflow::gtl::InlinedVector& op_inputs, tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, TFE_TensorHandle** retvals, int num_retvals) { - if (!ctx->context.SoftPlacement() && device == nullptr) { - device = ctx->context.HostCPU(); + if (!ctx->soft_placement && device == nullptr) { + // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU + device = ctx->devices[0]; } if (device == nullptr) { @@ -652,18 +697,18 @@ tensorflow::Status Execute( if (maybe_stats != nullptr) { maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() - maybe_stats->all_start_micros()); - tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); - if (ctx->context.ShouldStoreMetadata()) { - auto* step_stats = ctx->context.RunMetadataProto()->mutable_step_stats(); + tensorflow::mutex_lock ml(ctx->metadata_mu); + if (ctx->should_store_metadata.load()) { + auto* step_stats = ctx->run_metadata.mutable_step_stats(); // Lazily initialize the RunMetadata with information about all devices if // this is the first call. - while (step_stats->dev_stats_size() < ctx->context.devices()->size()) { + while (step_stats->dev_stats_size() < ctx->devices.size()) { step_stats->add_dev_stats(); } // Find the current device's index. int device_idx = 0; - for (int i = 0; i < ctx->context.devices()->size(); ++i) { - if (ctx->context.devices()->at(i) == device) { + for (int i = 0; i < ctx->devices.size(); ++i) { + if (ctx->devices[i] == device) { device_idx = i; break; } @@ -699,7 +744,7 @@ class ExecuteNode : public tensorflow::EagerNode { tensorflow::NodeExecStats* maybe_stats, const tensorflow::DataTypeVector& output_dtypes, TFE_TensorHandle** retvals, int num_retvals) - : tensorflow::EagerNode(op->ctx->context.NextId()), + : tensorflow::EagerNode(op->ctx->executor.NextId()), ctx_(op->ctx), op_device_(op->device), inputs_(op->inputs), @@ -755,7 +800,7 @@ class CopyToDeviceNode : public tensorflow::EagerNode { public: CopyToDeviceNode(TFE_TensorHandle* src, tensorflow::Device* dstd, TFE_Context* ctx) - : tensorflow::EagerNode(ctx->context.NextId()), + : tensorflow::EagerNode(ctx->executor.NextId()), src_(src), dstd_(dstd), ctx_(ctx), @@ -1018,7 +1063,7 @@ extern "C" { void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { TFE_Context* ctx = op->ctx; - status->status = ctx->context.GetStatus(); + status->status = ctx->executor.status(); if (!status->status.ok()) { return; } @@ -1042,7 +1087,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && input_op_device != op->device) { tensorflow::Device* d = - input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device; + input_op_device == nullptr ? ctx->devices[0] : input_op_device; VLOG(1) << "Changing device of operation " << op->name << " to " << d->name() << " because input #" << i << " is a resource in this device."; @@ -1050,35 +1095,40 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } } tensorflow::Device* device = op->device; - if (!ctx->context.SoftPlacement() && device == nullptr) { - device = ctx->context.HostCPU(); + if (!ctx->soft_placement && device == nullptr) { + // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU + device = ctx->devices[0]; } tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); - tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key); + tensorflow::KernelAndDevice* kernel; + { + tensorflow::tf_shared_lock l(ctx->cache_mu); + kernel = tensorflow::gtl::FindPtrOrNull(ctx->kernel_cache, cache_key); + } if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); - if (ctx->context.SoftPlacement() && device == nullptr) { + if (ctx->soft_placement && device == nullptr) { device = SelectDevice(ndef, ctx, status); if (!status->status.ok()) { return; } } CHECK(device != nullptr); - if (ctx->context.LogDevicePlacement()) { + if (ctx->log_device_placement) { LOG(INFO) << "Executing op " << ndef.op() << " in device " << device->name(); } - kernel = new tensorflow::KernelAndDevice(ctx->context.GetRendezvous()); + kernel = new tensorflow::KernelAndDevice(ctx->rendezvous); // Knowledge of the implementation of Init (and in-turn // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def // will be accessed, so grab on to the lock. // See WARNING comment in Execute (before kernel->Run) - would be nice to // rework to avoid this subtlety. - tensorflow::tf_shared_lock l(*ctx->context.FunctionsMu()); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->context.func_lib(device), kernel); + tensorflow::tf_shared_lock l(ctx->functions_mu); + status->status = + tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; @@ -1086,7 +1136,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Update output_dtypes inside `kernel`. const tensorflow::OpDef* op_def = nullptr; const tensorflow::FunctionDef* function_def = - ctx->context.FuncLibDef()->Find(ndef.op()); + ctx->func_lib_def.Find(ndef.op()); if (function_def != nullptr) { op_def = &(function_def->signature()); } @@ -1102,7 +1152,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (!status->status.ok()) { return; } - ctx->context.AddKernelToCache(cache_key, kernel); + tensorflow::mutex_lock ml(ctx->cache_mu); + tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); const int output_dtypes_size = output_dtypes.size(); @@ -1120,11 +1171,11 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // device from the one requested above. device = kernel->device(); } - status->status = ValidateInputTypeAndPlacement(ctx, ctx->context.HostCPU(), - device, op, kernel->kernel()); + status->status = ValidateInputTypeAndPlacement(ctx, ctx->devices[0], device, + op, kernel->kernel()); if (!status->status.ok()) return; std::unique_ptr maybe_stats; - if (ctx->context.ShouldStoreMetadata()) { + if (ctx->should_store_metadata.load()) { maybe_stats.reset(new tensorflow::NodeExecStats); maybe_stats->set_node_name(op->name); maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros()); @@ -1132,14 +1183,14 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); // TODO(apassos) track referenced tensors } - if (ctx->context.Async()) { + if (ctx->Async()) { // Note that for async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(agarwal): Consider executing "cheap" kernels inline for performance. tensorflow::EagerNode* node = new ExecuteNode(op, kernel, maybe_stats.release(), output_dtypes, retvals, *num_retvals); - ctx->context.ExecutorAdd(node); + ctx->executor.Add(node); } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to // allocate it. @@ -1155,24 +1206,23 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status) { - status->status = ctx->context.GetStatus(); + status->status = ctx->executor.status(); if (!status->status.ok()) { return nullptr; } - tensorflow::Device* dstd = ctx->context.HostCPU(); + tensorflow::Device* dstd = ctx->devices[0]; if (device_name != nullptr && strlen(device_name) > 0) { - status->status = - ctx->context.device_mgr()->LookupDevice(device_name, &dstd); + status->status = ctx->device_manager->LookupDevice(device_name, &dstd); if (!status->status.ok()) return nullptr; } - if (ctx->context.Async()) { + if (ctx->Async()) { // Note that `h` may not be currently ready. However execution order will // make sure that `h` is ready before the copy is actually done. CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); TFE_TensorHandle* output = node->dst(); // Note that calling Add makes `node` accessible by the EagerExecutor // thread. So further accesses need to be thread-safe. - ctx->context.ExecutorAdd(node); + ctx->executor.Add(node); return output; } else { TFE_TensorHandle* output = nullptr; @@ -1190,20 +1240,24 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx, tensorflow::errors::InvalidArgument("Invalid FunctionDef proto"); return; } - status->status = ctx->context.AddFunctionDef(function_def); + tensorflow::mutex_lock l(ctx->functions_mu); + status->status = ctx->func_lib_def.AddFunctionDef(function_def); } void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, TF_Status* status) { - status->status = ctx->context.AddFunctionDef(function->fdef); + tensorflow::mutex_lock l(ctx->functions_mu); + status->status = ctx->func_lib_def.AddFunctionDef(function->fdef); } void TFE_ContextEnableRunMetadata(TFE_Context* ctx) { - ctx->context.SetShouldStoreMetadata(true); + ctx->should_store_metadata.store(true); } void TFE_ContextDisableRunMetadata(TFE_Context* ctx) { - ctx->context.SetShouldStoreMetadata(false); + tensorflow::mutex_lock ml(ctx->metadata_mu); + ctx->should_store_metadata.store(false); + ctx->run_metadata.Clear(); } } // extern "C" @@ -1232,9 +1286,9 @@ void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status) { TFE_ContextAsyncWait(ctx, status); if (!status->status.ok()) return; - tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); - status->status = MessageToBuffer(*ctx->context.RunMetadataProto(), buf); - ctx->context.RunMetadataProto()->Clear(); + tensorflow::mutex_lock ml(ctx->metadata_mu); + status->status = MessageToBuffer(ctx->run_metadata, buf); + ctx->run_metadata.Clear(); } namespace { @@ -1309,6 +1363,11 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } // namespace tensorflow +bool TFE_Context::Async() const { + tensorflow::mutex_lock l(async_map_mu); + return tensorflow::gtl::FindWithDefault( + thread_local_async, std::this_thread::get_id(), async_default); +} bool TFE_TensorHandle::IsReady() { if (node_id == 0) return true; @@ -1322,7 +1381,7 @@ tensorflow::Status TFE_TensorHandle::WaitReady() { { tensorflow::mutex_lock l(ctx_mutex_); if (ctx_ == nullptr) return tensorflow::Status::OK(); - executor = ctx_->context.Executor(); + executor = &ctx_->executor; } return executor->WaitFor(node_id); } diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 5b29120b40..a79f8ddd33 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/function.h" @@ -53,18 +52,85 @@ struct TFE_ContextOptions { TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; }; +TFE_ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy); + struct TFE_Context { - explicit TFE_Context(const tensorflow::SessionOptions& opts, - TFE_ContextDevicePlacementPolicy default_policy, - bool async, + explicit TFE_Context(const TFE_ContextOptions& opts, std::unique_ptr device_mgr, tensorflow::Rendezvous* rendezvous) - : context(opts, - static_cast( - default_policy), - async, std::move(device_mgr), rendezvous) {} + : soft_placement( + opts.session_options.options.config.allow_soft_placement()), + policy(PlacementPolicy(soft_placement, opts.policy)), + device_manager(std::move(device_mgr)), + devices(device_manager->ListDevices()), + rendezvous(rendezvous), + pflr(new tensorflow::ProcessFunctionLibraryRuntime( + device_manager.get(), opts.session_options.options.env, + TF_GRAPH_DEF_VERSION, &func_lib_def, {})), + log_device_placement( + opts.session_options.options.config.log_device_placement()), + async_default(opts.async) { + if (async_default) executor.EnableAsync(); + + for (auto* device : devices) { + devices_map[tensorflow::StringPiece(device->name())] = device; + } + } + + const bool soft_placement; + const TFE_ContextDevicePlacementPolicy policy; + + // Note: we cannot use C++11 thread_local here as there is no concept of a + // thread-local-object-local variable in C++11. + tensorflow::mutex policy_map_mu; + std::unordered_map + thread_local_policies GUARDED_BY(policy_map_mu); + + std::unique_ptr device_manager; + // Devices owned by device_manager + std::vector devices; + // All devices are not owned. + tensorflow::gtl::FlatMap + devices_map; + tensorflow::Rendezvous* const rendezvous; + + tensorflow::mutex functions_mu; + tensorflow::FunctionLibraryDefinition func_lib_def GUARDED_BY(functions_mu){ + tensorflow::OpRegistry::Global(), {}}; + + // One FunctionLibraryRuntime per device. + // func_libs[i] is the FunctionLibraryRuntime corresponding to + // session->devices[i]. + const std::unique_ptr pflr; + + tensorflow::mutex cache_mu; + std::unordered_map + kernel_cache GUARDED_BY(cache_mu); + + tensorflow::FunctionLibraryRuntime* func_lib(tensorflow::Device* d) const { + return pflr->GetFLR(d->name()); + } - tensorflow::EagerContext context; + // Whether we should compute RunMetadata. + std::atomic should_store_metadata{false}; + tensorflow::mutex metadata_mu; + tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu); + const bool log_device_placement; + // EagerExecutor for async execution. + tensorflow::EagerExecutor executor; + + // True if running in asynchronous mode. + bool Async() const; + + // True if the default value for execution mode is async. Note that this value + // can be overridden per thread based on `thread_local_async` overrides. + const bool async_default; + mutable tensorflow::mutex async_map_mu; + std::unordered_map thread_local_async + GUARDED_BY(async_map_mu); }; struct TFE_TensorHandle : public tensorflow::core::RefCounted { diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index de10b10b7e..8ba560bef8 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -32,28 +32,6 @@ tf_cuda_library( ], ) -tf_cuda_library( - name = "context", - srcs = [ - "context.cc", - ], - hdrs = [ - "context.h", - ], - visibility = ["//tensorflow:internal"], - deps = [ - ":eager_executor", - ":kernel_and_device", - "//tensorflow/core:core_cpu_lib", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:session_options", - ], -) - tf_cuda_library( name = "kernel_and_device", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc deleted file mode 100644 index 5e8d083cd2..0000000000 --- a/tensorflow/core/common_runtime/eager/context.cc +++ /dev/null @@ -1,142 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/common_runtime/eager/context.h" - -namespace tensorflow { - -ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, ContextDevicePlacementPolicy original_policy) { - if (!soft_placement) { - return original_policy; - } - if (original_policy == DEVICE_PLACEMENT_EXPLICIT || - original_policy == DEVICE_PLACEMENT_SILENT_FOR_INT32) { - return DEVICE_PLACEMENT_SILENT; - } - return original_policy; -} - -EagerContext::EagerContext(const SessionOptions& opts, - ContextDevicePlacementPolicy default_policy, - bool async, std::unique_ptr device_mgr, - Rendezvous* rendezvous) - : soft_placement_(opts.config.allow_soft_placement()), - policy_(PlacementPolicy(soft_placement_, default_policy)), - device_manager_(std::move(device_mgr)), - devices_(device_manager_->ListDevices()), - rendezvous_(rendezvous), - pflr_(new ProcessFunctionLibraryRuntime(device_manager_.get(), opts.env, - TF_GRAPH_DEF_VERSION, - &func_lib_def_, {})), - log_device_placement_(opts.config.log_device_placement()), - async_default_(async) { - if (async_default_) { - executor_.EnableAsync(); - } - - for (auto* device : devices_) { - devices_map_[device->name()] = device; - } -} - -bool EagerContext::Async() const { - mutex_lock l(async_map_mu_); - return gtl::FindWithDefault(thread_local_async_, std::this_thread::get_id(), - async_default_); -} - -Status EagerContext::SetAsyncForThread(bool async) { - { - tensorflow::mutex_lock l(async_map_mu_); - thread_local_async_[std::this_thread::get_id()] = async; - } - if (async) { - executor_.EnableAsync(); - } else { - // TODO(agarwal): Currently we add a wait here to handle cases where a - // sync op has a control dependency on an async op, and the latter has not - // executed yet. This wait can be removed by storing all the control - // inputs and waiting for them when executing ops. - return executor_.WaitForAllPendingNodes(); - } - return Status::OK(); -} - -void EagerContext::ClearCaches() { - mutex_lock ml(cache_mu_); - gtl::STLDeleteValues(&kernel_cache_); -} - -void EagerContext::SetThreadLocalDevicePlacementPolicy( - ContextDevicePlacementPolicy policy) { - mutex_lock ml(policy_map_mu_); - thread_local_policies_[std::this_thread::get_id()] = policy; -} - -ContextDevicePlacementPolicy EagerContext::GetDevicePlacementPolicy() { - mutex_lock ml(policy_map_mu_); - auto policy_map_it = thread_local_policies_.find(std::this_thread::get_id()); - if (policy_map_it != thread_local_policies_.end()) { - return policy_map_it->second; - } - return policy_; -} - -EagerContext::~EagerContext() { - executor_.WaitForAllPendingNodes().IgnoreError(); - ClearCaches(); - rendezvous_->Unref(); -} - -bool EagerContext::FindFunctionByName(const string& name) { - mutex_lock l(functions_mu_); - return func_lib_def_.Find(name) != nullptr; -} - -Status EagerContext::FindDeviceByName(const string& name, Device** result) { - auto it = devices_map_.find(name); - if (it == devices_map_.end()) { - return errors::InvalidArgument(name, " unknown device."); - } - *result = it->second; - return Status::OK(); -} - -Status EagerContext::AddFunctionDef(const FunctionDef& fdef) { - mutex_lock l(functions_mu_); - return func_lib_def_.AddFunctionDef(fdef); -} - -KernelAndDevice* EagerContext::GetCachedKernel(Fprint128 cache_key) { - tf_shared_lock l(cache_mu_); - return gtl::FindPtrOrNull(kernel_cache_, cache_key); -} - -void EagerContext::AddKernelToCache(Fprint128 cache_key, - KernelAndDevice* kernel) { - mutex_lock ml(cache_mu_); - gtl::InsertOrUpdate(&kernel_cache_, cache_key, kernel); -} - -void EagerContext::SetShouldStoreMetadata(bool value) { - should_store_metadata_.store(value); - if (!value) { - mutex_lock ml(metadata_mu_); - run_metadata_.Clear(); - } -} - -} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h deleted file mode 100644 index d525d44fe4..0000000000 --- a/tensorflow/core/common_runtime/eager/context.h +++ /dev/null @@ -1,193 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/common_runtime/eager/eager_executor.h" -#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/common_runtime/rendezvous_mgr.h" -#include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/lib/gtl/map_util.h" -#include "tensorflow/core/lib/gtl/stl_util.h" -#include "tensorflow/core/platform/fingerprint.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/thread_annotations.h" -#include "tensorflow/core/public/session_options.h" -#include "tensorflow/core/public/version.h" - -namespace tensorflow { - -// Note: there's a copy enum in eager/c_api.h. It should be kept in sync. -enum ContextDevicePlacementPolicy { - // Running operations with input tensors on the wrong device will fail. When - // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. - DEVICE_PLACEMENT_EXPLICIT = 0, - // Copy the tensor to the right device but log a warning. - DEVICE_PLACEMENT_WARN = 1, - // Silently copy the tensor, which has a performance cost since the - // operation will be blocked till the copy completes. - DEVICE_PLACEMENT_SILENT = 2, - // Default placement policy which silently copies int32 tensors but not other - // dtypes. When soft placement is enabled acts like - // TFE_DEVICE_PLACEMENT_SILENT. - DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, -}; - -ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, ContextDevicePlacementPolicy original_policy); - -class EagerContext { - public: - explicit EagerContext(const SessionOptions& opts, - ContextDevicePlacementPolicy default_policy, bool async, - std::unique_ptr device_mgr, - Rendezvous* rendezvous); - - ~EagerContext(); - - // Returns the function library runtime for the given device. - FunctionLibraryRuntime* func_lib(Device* d) const { - return pflr_->GetFLR(d->name()); - } - - // True if running in asynchronous mode. - bool Async() const; - - EagerExecutor* Executor() { return &executor_; } - - // Sets whether this thread should run in synchronous or asynchronous mode. - Status SetAsyncForThread(bool async); - - // TODO(apassos) make this return a constant reference - gtl::FlatMap* device_map() { - return &devices_map_; - } - - // TODO(apassos) make this return a constant reference - std::vector* devices() { return &devices_; } - - // Clears the kernel caches. - void ClearCaches(); - - // Sets the device placement policy for the current thread. - void SetThreadLocalDevicePlacementPolicy(ContextDevicePlacementPolicy policy); - - // Returns the device placement policy for the current thread. - ContextDevicePlacementPolicy GetDevicePlacementPolicy(); - - Status AsyncWait() { return executor_.WaitForAllPendingNodes(); } - - Status GetStatus() { return executor_.status(); } - - void ClearAsyncError() { executor_.ClearError(); } - - bool FindFunctionByName(const string& name); - - Status FindDeviceByName(const string& name, Device** result); - - Device* HostCPU() { return devices_[0]; } - - bool SoftPlacement() { return soft_placement_; } - - uint64 NextId() { return executor_.NextId(); } - - void ExecutorAdd(EagerNode* node) { executor_.Add(node); } - - Status AddFunctionDef(const FunctionDef& fdef); - - KernelAndDevice* GetCachedKernel(Fprint128 cache_key); - - void AddKernelToCache(Fprint128 cache_key, KernelAndDevice* kernel); - - bool LogDevicePlacement() { return log_device_placement_; } - - Rendezvous* GetRendezvous() { return rendezvous_; } - - mutex* FunctionsMu() { return &functions_mu_; } - - tensorflow::DeviceMgr* device_mgr() { return device_manager_.get(); } - - // TODO(apassos) remove the need for this - void ReleaseDeviceMgr() { device_manager_.release(); } - - // TODO(apassos) clean up RunMetadata storage. - mutex* MetadataMu() { return &metadata_mu_; } - bool ShouldStoreMetadata() { return should_store_metadata_.load(); } - void SetShouldStoreMetadata(bool value); - RunMetadata* RunMetadataProto() { return &run_metadata_; } - - FunctionLibraryDefinition* FuncLibDef() { return &func_lib_def_; } - - private: - const bool soft_placement_; - const ContextDevicePlacementPolicy policy_; - - // Note: we cannot use C++11 thread_local here as there is no concept of a - // thread-local-object-local variable in C++11. - mutex policy_map_mu_; - std::unordered_map - thread_local_policies_ GUARDED_BY(policy_map_mu_); - - std::unique_ptr device_manager_; - // Devices owned by device_manager - std::vector devices_; - // All devices are not owned. - gtl::FlatMap devices_map_; - Rendezvous* const rendezvous_; - - mutex functions_mu_; - FunctionLibraryDefinition func_lib_def_ GUARDED_BY(functions_mu_){ - OpRegistry::Global(), {}}; - - // One FunctionLibraryRuntime per device. - // func_libs[i] is the FunctionLibraryRuntime corresponding to - // session->devices[i]. - const std::unique_ptr pflr_; - - mutex cache_mu_; - std::unordered_map kernel_cache_ - GUARDED_BY(cache_mu_); - - // Whether we should compute RunMetadata. - std::atomic should_store_metadata_{false}; - mutex metadata_mu_; - RunMetadata run_metadata_ GUARDED_BY(metadata_mu_); - const bool log_device_placement_; - // EagerExecutor for async execution. - EagerExecutor executor_; - - // True if the default value for execution mode is async. Note that this value - // can be overridden per thread based on `thread_local_async` overrides. - const bool async_default_; - mutable mutex async_map_mu_; - std::unordered_map thread_local_async_ - GUARDED_BY(async_map_mu_); -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ -- GitLab From 585fb74541ed914845eccd3da4b1a2c94a99779e Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 22 Mar 2018 00:26:31 -0700 Subject: [PATCH 337/960] Minor style improvement in TFLite interpreter_test.py PiperOrigin-RevId: 190027161 --- .../contrib/lite/python/interpreter_test.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/lite/python/interpreter_test.py b/tensorflow/contrib/lite/python/interpreter_test.py index bf124410f3..cd2386f526 100644 --- a/tensorflow/contrib/lite/python/interpreter_test.py +++ b/tensorflow/contrib/lite/python/interpreter_test.py @@ -61,30 +61,31 @@ class InterpreterTest(test_util.TensorFlowTestCase): 'testdata/permute_uint8.tflite') with io.open(model_path, 'rb') as model_file: data = model_file.read() - interpreter = interpreter_wrapper.Interpreter(model_content=data) - interpreter.allocate_tensors() - - input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertEqual('input', input_details[0]['name']) - self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 4] == input_details[0]['shape']).all()) - self.assertEqual((1.0, 0), input_details[0]['quantization']) - - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual('output', output_details[0]['name']) - self.assertEqual(np.uint8, output_details[0]['dtype']) - self.assertTrue(([1, 4] == output_details[0]['shape']).all()) - self.assertEqual((1.0, 0), output_details[0]['quantization']) - - test_input = np.array([[1, 2, 3, 4]], dtype=np.uint8) - expected_output = np.array([[4, 3, 2, 1]], dtype=np.uint8) - interpreter.set_tensor(input_details[0]['index'], test_input) - interpreter.invoke() - - output_data = interpreter.get_tensor(output_details[0]['index']) - self.assertTrue((expected_output == output_data).all()) + + interpreter = interpreter_wrapper.Interpreter(model_content=data) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('input', input_details[0]['name']) + self.assertEqual(np.uint8, input_details[0]['dtype']) + self.assertTrue(([1, 4] == input_details[0]['shape']).all()) + self.assertEqual((1.0, 0), input_details[0]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('output', output_details[0]['name']) + self.assertEqual(np.uint8, output_details[0]['dtype']) + self.assertTrue(([1, 4] == output_details[0]['shape']).all()) + self.assertEqual((1.0, 0), output_details[0]['quantization']) + + test_input = np.array([[1, 2, 3, 4]], dtype=np.uint8) + expected_output = np.array([[4, 3, 2, 1]], dtype=np.uint8) + interpreter.set_tensor(input_details[0]['index'], test_input) + interpreter.invoke() + + output_data = interpreter.get_tensor(output_details[0]['index']) + self.assertTrue((expected_output == output_data).all()) if __name__ == '__main__': -- GitLab From 31adaf4361b9b65e382be9633c4d0517d77c29e5 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 22 Mar 2018 00:26:33 -0700 Subject: [PATCH 338/960] TFLite: Ensure only 1 scale/zero_point is in QuantizationParam. PiperOrigin-RevId: 190027163 --- tensorflow/contrib/lite/model.cc | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index f7daa6fc9d..9c619f88e0 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -679,9 +679,27 @@ TfLiteStatus InterpreterBuilder::ParseTensors( // but we really only support one value for the whole tensor. // TODO(aselle): This breaks as well if these are nullptr's. // TODO(aselle): This assumes non per-channel quantization. - if (q_params->scale()) quantization.scale = q_params->scale()->Get(0); - if (q_params->zero_point()) + + if (q_params->scale()) { + if (q_params->scale()->size() != 1) { + error_reporter_->Report( + "QuantizationParam has %d scale values (only 1 is supported).", + q_params->scale()->size()); + return kTfLiteError; + } + quantization.scale = q_params->scale()->Get(0); + } + + if (q_params->zero_point()) { + if (q_params->zero_point()->size() != 1) { + error_reporter_->Report( + "QuantizationParam has %d zero_point values" + " (only 1 is supported).", + q_params->zero_point()->size()); + return kTfLiteError; + } quantization.zero_point = q_params->zero_point()->Get(0); + } } TfLiteType type; -- GitLab From ba97ee847d9baca0ac3b7eab5c6bad93e70a2882 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 22 Mar 2018 00:48:30 -0700 Subject: [PATCH 339/960] Java: Release 1.7.0-rc1 PiperOrigin-RevId: 190028714 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index 7f3a83b195..0b69a8cbe5 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc0 + 1.7.0-rc1 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index cc436ff840..541876f7f5 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc0 + 1.7.0-rc1 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index 47f678382a..d8933e5238 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc0 + 1.7.0-rc1 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 42d32810a2..6286fd73df 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.7.0-rc0 + 1.7.0-rc1 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 463893ce62..4e881f5a63 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc0 + 1.7.0-rc1 ../ proto diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 60e7f3c199..d512a7eda9 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.7.0-rc0 + 1.7.0-rc1 ../ tensorflow -- GitLab From f9ccb89134d89469ae962bba832e78d1f116b96b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 04:50:42 -0700 Subject: [PATCH 340/960] Add a utility that converts call keyword arguments into dicts, in AST space. PiperOrigin-RevId: 190047495 --- tensorflow/contrib/py2tf/pyct/ast_util.py | 9 +++++++++ tensorflow/contrib/py2tf/pyct/ast_util_test.py | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/ast_util.py b/tensorflow/contrib/py2tf/pyct/ast_util.py index f916775b9c..6f7e656c26 100644 --- a/tensorflow/contrib/py2tf/pyct/ast_util.py +++ b/tensorflow/contrib/py2tf/pyct/ast_util.py @@ -94,3 +94,12 @@ def rename_symbols(node, name_map): elif isinstance(node, tuple): return tuple(renamer.visit(n) for n in node) return renamer.visit(node) + + +def keywords_to_dict(keywords): + keys = [] + values = [] + for kw in keywords: + keys.append(gast.Str(kw.arg)) + values.append(kw.value) + return gast.Dict(keys=keys, values=values) diff --git a/tensorflow/contrib/py2tf/pyct/ast_util_test.py b/tensorflow/contrib/py2tf/pyct/ast_util_test.py index a871ccad6f..8d123679e3 100644 --- a/tensorflow/contrib/py2tf/pyct/ast_util_test.py +++ b/tensorflow/contrib/py2tf/pyct/ast_util_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import ast from tensorflow.contrib.py2tf.pyct import ast_util +from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.python.platform import test @@ -74,6 +76,17 @@ class AstUtilTest(test.TestCase): self.assertFalse(ret is new_node.body[0]) self.assertFalse(hasattr(new_node.body[0], '__foo')) + def test_keywords_to_dict(self): + keywords = parser.parse_expression('f(a=b, c=1, d=\'e\')').keywords + d = ast_util.keywords_to_dict(keywords) + # Make sure we generate a usable dict node by attaching it to a variable and + # compiling everything. + output = parser.parse_str('b = 3') + output.body += (ast.Assign([ast.Name(id='d', ctx=ast.Store())], d),) + result, _ = compiler.ast_to_object(output) + self.assertDictEqual(result.d, {'a': 3, 'c': 1, 'd': 'e'}) + print(d) + if __name__ == '__main__': test.main() -- GitLab From 8cd562c55cb5fa172345f0de0376d9666b2326b4 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Thu, 22 Mar 2018 19:55:34 +0800 Subject: [PATCH 341/960] Fix the inconsistency in the accepted shape/data_format of Input Tensor to Conv2D in documentation (#17893) --- tensorflow/docs_src/tutorials/layers.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 9b17d0d4d5..aeb746f29c 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -198,9 +198,9 @@ Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). ### Input Layer The methods in the `layers` module for creating convolutional and pooling layers -for two-dimensional image data expect input tensors to have a shape of -[batch_size, image_width, image_height, -channels], defined as follows: +for two-dimensional image data expect input tensors to have a `channels_last` shape of +[batch_size, image_height, image_width, channels] +or a `channels_first` shape of [batch_size, channels, image_height, image_width], defined as follows: * _`batch_size`_. Size of the subset of examples to use when performing gradient descent during training. -- GitLab From 9e651e4571f7b7c2d32bdafe43cc4ced9bb0c750 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Thu, 22 Mar 2018 05:33:42 -0700 Subject: [PATCH 342/960] Allow to download clang and use clang for CPU builds. Previously we only allowed to download clang when doing GPU builds. The added skylark files use bazel's autoconf scripts, which were only added in 0.10.0. To provide nice error message for older versions of bazel (i.e. 'version is less than 0.10' vs 'can't load @bazel_tools/cpp/...'), we move the bazel version check into WORKSPACE file from workspace.bzl. PiperOrigin-RevId: 190050798 --- WORKSPACE | 6 +++ configure.py | 26 ++++----- tensorflow/version_check.bzl | 48 +++++++++++++++++ tensorflow/workspace.bzl | 53 ++----------------- third_party/clang_toolchain/BUILD | 0 .../clang_toolchain/cc_configure_clang.bzl | 27 ++++++++++ .../download_clang.bzl | 0 third_party/gpus/cuda_configure.bzl | 2 +- third_party/mkl_dnn/mkldnn.BUILD | 2 +- tools/bazel.rc | 5 +- 10 files changed, 104 insertions(+), 65 deletions(-) create mode 100644 tensorflow/version_check.bzl create mode 100644 third_party/clang_toolchain/BUILD create mode 100644 third_party/clang_toolchain/cc_configure_clang.bzl rename third_party/{gpus => clang_toolchain}/download_clang.bzl (100%) diff --git a/WORKSPACE b/WORKSPACE index 1e38a9a8cd..11c5cdb207 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -14,6 +14,12 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories") closure_repositories() +# We must check the bazel version before trying to parse any other BUILD +# files, in case the parsing of those build files depends on the bazel +# version we require here. +load("//tensorflow:version_check.bzl", "check_bazel_version_at_least") +check_bazel_version_at_least("0.10.0") + load("//tensorflow:workspace.bzl", "tf_workspace") # Uncomment and update the paths in these entries to build the Android demo. diff --git a/configure.py b/configure.py index 7d61c2e5e3..ea732c64e2 100644 --- a/configure.py +++ b/configure.py @@ -524,7 +524,7 @@ def set_tf_cuda_clang(environ_cp): def set_tf_download_clang(environ_cp): """Set TF_DOWNLOAD_CLANG action_env.""" - question = 'Do you want to download a fresh release of clang? (Experimental)' + question = 'Do you wish to download a fresh release of clang? (Experimental)' yes_reply = 'Clang will be downloaded and used to compile tensorflow.' no_reply = 'Clang will not be downloaded.' set_action_env_var( @@ -1380,7 +1380,7 @@ def main(): # environment variables. environ_cp = dict(os.environ) - check_bazel_version('0.5.4') + check_bazel_version('0.10.0') reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() @@ -1397,6 +1397,9 @@ def main(): environ_cp['TF_NEED_OPENCL'] = '0' environ_cp['TF_CUDA_CLANG'] = '0' environ_cp['TF_NEED_TENSORRT'] = '0' + # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on + # Windows. + environ_cp['TF_DOWNLOAD_CLANG'] = '0' if is_macos(): environ_cp['TF_NEED_JEMALLOC'] = '0' @@ -1444,16 +1447,8 @@ def main(): set_tf_cuda_clang(environ_cp) if environ_cp.get('TF_CUDA_CLANG') == '1': - if not is_windows(): - # Ask if we want to download clang release while building. - set_tf_download_clang(environ_cp) - else: - # We use bazel's generated crosstool on Windows and there is no - # way to provide downloaded toolchain for that yet. - # TODO(ibiryukov): Investigate using clang as a cuda compiler on - # Windows. - environ_cp['TF_DOWNLOAD_CLANG'] = '0' - + # Ask whether we should download the clang toolchain. + set_tf_download_clang(environ_cp) if environ_cp.get('TF_DOWNLOAD_CLANG') != '1': # Set up which clang we should use as the cuda / host compiler. set_clang_cuda_compiler_path(environ_cp) @@ -1463,6 +1458,13 @@ def main(): if not is_windows(): set_gcc_host_compiler_path(environ_cp) set_other_cuda_vars(environ_cp) + else: + # CUDA not required. Ask whether we should download the clang toolchain and + # use it for the CPU build. + set_tf_download_clang(environ_cp) + if environ_cp.get('TF_DOWNLOAD_CLANG') == '1': + write_to_bazelrc('build --config=download_clang') + write_to_bazelrc('test --config=download_clang') set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) if environ_cp.get('TF_NEED_MPI') == '1': diff --git a/tensorflow/version_check.bzl b/tensorflow/version_check.bzl new file mode 100644 index 0000000000..79e721dab4 --- /dev/null +++ b/tensorflow/version_check.bzl @@ -0,0 +1,48 @@ +""" Helpers to check minimum version of bazel.""" + +def _extract_version_number(bazel_version): + """Extracts the semantic version number from a version string + + Args: + bazel_version: the version string that begins with the semantic version + e.g. "1.2.3rc1 abc1234" where "abc1234" is a commit hash. + + Returns: + The semantic version string, like "1.2.3". + """ + for i in range(len(bazel_version)): + c = bazel_version[i] + if not (c.isdigit() or c == "."): + return bazel_version[:i] + return bazel_version + +# Parse the bazel version string from `native.bazel_version`. +# e.g. +# "0.10.0rc1 abc123d" => (0, 10, 0) +# "0.3.0" => (0, 3, 0) +def _parse_bazel_version(bazel_version): + """Parses a version string into a 3-tuple of ints + + int tuples can be compared directly using binary operators (<, >). + + Args: + bazel_version: the Bazel version string + + Returns: + An int 3-tuple of a (major, minor, patch) version. + """ + + version = _extract_version_number(bazel_version) + return tuple([int(n) for n in version.split(".")]) + +def check_bazel_version_at_least(minimum_bazel_version): + if "bazel_version" not in dir(native): + fail("\nCurrent Bazel version is lower than 0.2.1, expected at least %s\n" % minimum_bazel_version) + elif not native.bazel_version: + print("\nCurrent Bazel is not a release version, cannot check for compatibility.") + print("Make sure that you are running at least Bazel %s.\n" % minimum_bazel_version) + return + + if _parse_bazel_version(native.bazel_version) < _parse_bazel_version(minimum_bazel_version): + fail("\nCurrent Bazel version is {}, expected at least {}\n".format( + native.bazel_version, minimum_bazel_version)) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 675acbe5f6..ebb9e9412f 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -10,65 +10,18 @@ load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure") load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure") load("//third_party:repo.bzl", "tf_http_archive") +load("//third_party/clang_toolchain:cc_configure_clang.bzl", "cc_download_clang_toolchain") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") -def _extract_version_number(bazel_version): - """Extracts the semantic version number from a version string - - Args: - bazel_version: the version string that begins with the semantic version - e.g. "1.2.3rc1 abc1234" where "abc1234" is a commit hash. - - Returns: - The semantic version string, like "1.2.3". - """ - for i in range(len(bazel_version)): - c = bazel_version[i] - if not (c.isdigit() or c == "."): - return bazel_version[:i] - return bazel_version - -# Parse the bazel version string from `native.bazel_version`. -# e.g. -# "0.10.0rc1 abc123d" => (0, 10, 0) -# "0.3.0" => (0, 3, 0) -def _parse_bazel_version(bazel_version): - """Parses a version string into a 3-tuple of ints - - int tuples can be compared directly using binary operators (<, >). - - Args: - bazel_version: the Bazel version string - - Returns: - An int 3-tuple of a (major, minor, patch) version. - """ - - version = _extract_version_number(bazel_version) - return tuple([int(n) for n in version.split(".")]) - -def check_bazel_version_at_least(minimum_bazel_version): - if "bazel_version" not in dir(native): - fail("\nCurrent Bazel version is lower than 0.2.1, expected at least %s\n" % minimum_bazel_version) - elif not native.bazel_version: - print("\nCurrent Bazel is not a release version, cannot check for compatibility.") - print("Make sure that you are running at least Bazel %s.\n" % minimum_bazel_version) - return - - if _parse_bazel_version(native.bazel_version) < _parse_bazel_version(minimum_bazel_version): - fail("\nCurrent Bazel version is {}, expected at least {}\n".format( - native.bazel_version, minimum_bazel_version)) # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. def tf_workspace(path_prefix="", tf_repo_name=""): - # We must check the bazel version before trying to parse any other BUILD - # files, in case the parsing of those build files depends on the bazel - # version we require here. - check_bazel_version_at_least("0.5.4") + # Note that we check the minimum bazel version in WORKSPACE. clang6_configure(name="local_config_clang6") + cc_download_clang_toolchain(name="local_config_download_clang") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") git_configure(name="local_config_git") diff --git a/third_party/clang_toolchain/BUILD b/third_party/clang_toolchain/BUILD new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/clang_toolchain/cc_configure_clang.bzl b/third_party/clang_toolchain/cc_configure_clang.bzl new file mode 100644 index 0000000000..1181110ea9 --- /dev/null +++ b/third_party/clang_toolchain/cc_configure_clang.bzl @@ -0,0 +1,27 @@ +""" Downloads clang and configures the crosstool using bazel's autoconf.""" + +load("@bazel_tools//tools/cpp:cc_configure.bzl", "cc_autoconf_impl") +load(":download_clang.bzl", "download_clang") + +_TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG" +_TF_NEED_CUDA = "TF_NEED_CUDA" + +def _cc_clang_autoconf(repo_ctx): + if repo_ctx.os.environ.get(_TF_DOWNLOAD_CLANG) != "1": + return + if repo_ctx.os.environ.get(_TF_NEED_CUDA) == "1": + # Clang is handled separately for CUDA configs. + # See cuda_configure.bzl for more details. + return + + download_clang(repo_ctx, out_folder='extra_tools') + overriden_tools = {'gcc': 'extra_tools/bin/clang'} + cc_autoconf_impl(repo_ctx, overriden_tools) + +cc_download_clang_toolchain = repository_rule( + environ = [ + _TF_DOWNLOAD_CLANG, + _TF_NEED_CUDA, + ], + implementation = _cc_clang_autoconf, +) diff --git a/third_party/gpus/download_clang.bzl b/third_party/clang_toolchain/download_clang.bzl similarity index 100% rename from third_party/gpus/download_clang.bzl rename to third_party/clang_toolchain/download_clang.bzl diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 6c9c128db6..ede7e31897 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -96,7 +96,7 @@ NVVM_LIBDEVICE_PATHS = [ "share/cuda/", ] -load(":download_clang.bzl", "download_clang") +load("//third_party/clang_toolchain:download_clang.bzl", "download_clang") # TODO(dzc): Once these functions have been factored out of Bazel's # cc_configure.bzl, load them from @bazel_tools instead. diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD index 752a0d8498..68f24aabae 100644 --- a/third_party/mkl_dnn/mkldnn.BUILD +++ b/third_party/mkl_dnn/mkldnn.BUILD @@ -4,7 +4,7 @@ config_setting( name = "clang_linux_x86_64", values = { "cpu": "k8", - "define": "using_cuda_clang=true", + "define": "using_clang=true", }, ) diff --git a/tools/bazel.rc b/tools/bazel.rc index 8b8c717561..1c1e6afb65 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -27,11 +27,14 @@ build --define framework_shared_object=true build:mkl --define=using_mkl=true build:mkl -c opt +build:download_clang --crosstool_top=@local_config_download_clang//:toolchain +build:download_clang --define=using_clang=true + build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain -build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true +build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true -- GitLab From b559a319411e2d3f2a42f466c18737edd527bb10 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 05:46:07 -0700 Subject: [PATCH 343/960] Update file due to changes in Bazel (PACKAGE_NAME is deprecated) PiperOrigin-RevId: 190051589 --- tensorflow/compiler/aot/tfcompile.bzl | 2 +- tensorflow/tensorflow.bzl | 4 ++-- tensorflow/tools/test/performance.bzl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index 9dff1be09f..3a877c5337 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -132,7 +132,7 @@ def tf_library(name, graph, config, header_file = name + ".h" metadata_object_file = name + "_tfcompile_metadata.o" function_object_file = name + "_tfcompile_function.o" - ep = ("__" + PACKAGE_NAME + "__" + name).replace("/", "_") + ep = ("__" + native.package_name() + "__" + name).replace("/", "_") if type(tfcompile_flags) == type(""): flags = tfcompile_flags else: diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 9b0db8a112..2d3cb415fe 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -34,7 +34,7 @@ def src_to_test_name(src): return src.replace("/", "_").split(".")[0] def full_path(relative_paths): - return [PACKAGE_NAME + "/" + relative for relative in relative_paths] + return [native.package_name() + "/" + relative for relative in relative_paths] # List of proto files for android builds def tf_android_core_proto_sources(core_proto_sources_relative): @@ -265,7 +265,7 @@ def _rpath_linkopts(name): # deployed. Other shared object dependencies (e.g. shared between contrib/ # ops) are picked up as long as they are in either the same or a parent # directory in the tensorflow/ tree. - levels_to_root = PACKAGE_NAME.count("/") + name.count("/") + levels_to_root = native.package_name().count("/") + name.count("/") return select({ clean_dep("//tensorflow:darwin"): [ "-Wl,%s" % (_make_search_paths("@loader_path", levels_to_root),), diff --git a/tensorflow/tools/test/performance.bzl b/tensorflow/tools/test/performance.bzl index cee53dd5b6..3486871080 100644 --- a/tensorflow/tools/test/performance.bzl +++ b/tensorflow/tools/test/performance.bzl @@ -31,7 +31,7 @@ def tf_cc_logged_benchmark( size = "large", srcs = ["//tensorflow/tools/test:run_and_gather_logs"], args = [ - "--name=//%s:%s" % (PACKAGE_NAME, name), + "--name=//%s:%s" % (native.package_name(), name), "--test_name=" + target, "--test_args=--benchmarks=%s" % benchmarks, "--benchmark_type=%s" % benchmark_type, -- GitLab From cae614a3300b3befae52d0e076c708450a93d820 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 22 Mar 2018 05:50:56 -0700 Subject: [PATCH 344/960] Automatically insert api-links. If the contents of a pair of back-ticks match a public api symbol name insert a link. PiperOrigin-RevId: 190051941 --- tensorflow/tools/docs/parser.py | 48 +++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index e758229535..d2a63ecc49 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -34,7 +34,11 @@ from tensorflow.python.util import tf_inspect # A regular expression capturing a python identifier. -IDENTIFIER_RE = '[a-zA-Z_][a-zA-Z0-9_]*' +IDENTIFIER_RE = r'[a-zA-Z_]\w*' + + +class TFDocsError(Exception): + pass class _Errors(object): @@ -118,6 +122,8 @@ SYMBOL_REFERENCE_RE = re.compile( """, flags=re.VERBOSE) +AUTO_REFERENCE_RE = re.compile(r'`([a-zA-Z0-9_.]+?)`') + class ReferenceResolver(object): """Class for replacing @{...} references with Markdown links. @@ -240,10 +246,25 @@ class ReferenceResolver(object): Returns: `string`, with "@{symbol}" references replaced by Markdown links. """ - def one_ref(match): - return self._one_ref(match, relative_path_to_root) - return re.sub(SYMBOL_REFERENCE_RE, one_ref, string) + def strict_one_ref(match): + try: + return self._one_ref(match, relative_path_to_root) + except TFDocsError as e: + self.add_error(e.message) + return 'BAD_LINK' + + string = re.sub(SYMBOL_REFERENCE_RE, strict_one_ref, string) + + def sloppy_one_ref(match): + try: + return self._one_ref(match, relative_path_to_root) + except TFDocsError: + return match.group(0) + + string = re.sub(AUTO_REFERENCE_RE, sloppy_one_ref, string) + + return string def python_link(self, link_text, ref_full_name, relative_path_to_root, code_ref=True): @@ -307,14 +328,14 @@ class ReferenceResolver(object): Raises: RuntimeError: If `ref_full_name` is not documented. + TFDocsError: If the @{} syntax cannot be decoded. """ master_name = self._duplicate_of.get(ref_full_name, ref_full_name) # Check whether this link exists if master_name not in self._all_names: - message = 'Cannot make link to "%s": Not in index.' % master_name - self.add_error(message) - return 'BROKEN_LINK' + raise TFDocsError( + 'Cannot make link to "%s": Not in index.' % master_name) # If this is a member of a class, link to the class page with an anchor. ref_path = None @@ -369,8 +390,8 @@ class ReferenceResolver(object): code_ref=not manual_link_text) # Error! - self.add_error('Did not understand "%s"' % match.group(0)) - return 'BROKEN_LINK' + raise TFDocsError('Did not understand "%s"' % match.group(0), + 'BROKEN_LINK') def _doc_link(self, string, link_text, manual_link_text, relative_path_to_root): @@ -395,11 +416,10 @@ class ReferenceResolver(object): return self._doc_missing(string, hash_tag, link_text, manual_link_text, relative_path_to_root) - def _doc_missing(self, string, unused_hash_tag, link_text, + def _doc_missing(self, string, unused_hash_tag, unused_link_text, unused_manual_link_text, unused_relative_path_to_root): """Generate an error for unrecognized @{$...} references.""" - self.add_error('Unknown Document "%s"' % string) - return link_text + raise TFDocsError('Unknown Document "%s"' % string) def _cc_link(self, string, link_text, unused_manual_link_text, relative_path_to_root): @@ -416,8 +436,8 @@ class ReferenceResolver(object): elif string == 'tensorflow::ops::Const': ret = 'namespace/tensorflow/ops.md#const' else: - self.add_error('C++ reference not understood: "%s"' % string) - return 'TODO_C++:%s' % string + raise TFDocsError('C++ reference not understood: "%s"' % string) + # relative_path_to_root gets you to api_docs/python, we go from there # to api_docs/cc, and then add ret. cc_relative_path = os.path.normpath(os.path.join( -- GitLab From 69dc403c97f273b750d5927ec1ed26613d90f3ad Mon Sep 17 00:00:00 2001 From: cbockman Date: Thu, 22 Mar 2018 06:42:39 -0700 Subject: [PATCH 345/960] spelling fix (#17911) --- tensorflow/contrib/data/python/ops/grouping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index a19be22254..ae10d2eb22 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -42,7 +42,7 @@ def group_by_window(key_func, This transformation maps each consecutive element in a dataset to a key using `key_func` and groups the elements by key. It then applies `reduce_func` to at most `window_size_func(key)` elements matching the same - key. All execpt the final window for each key will contain + key. All except the final window for each key will contain `window_size_func(key)` elements; the final window may be smaller. You may provide either a constant `window_size` or a window size determined by -- GitLab From 4d5c139fbb831684e58b3875cd253a15c742362d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 08:19:21 -0700 Subject: [PATCH 346/960] Fix QN for Calls. PiperOrigin-RevId: 190067548 --- tensorflow/contrib/py2tf/pyct/qual_names.py | 20 +++++++------------ .../contrib/py2tf/pyct/qual_names_test.py | 18 +++++++++++++++++ .../py2tf/pyct/static_analysis/activity.py | 4 ++++ 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/qual_names.py b/tensorflow/contrib/py2tf/pyct/qual_names.py index 6bcbaeb2ae..7dec13db92 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names.py +++ b/tensorflow/contrib/py2tf/pyct/qual_names.py @@ -169,14 +169,6 @@ class QnResolver(gast.NodeTransformer): Note: Not using NodeAnnos to avoid circular dependencies. """ - def visit_Call(self, node): - node = self.generic_visit(node) - # This helps treat the following cases uniformly: - # a = b[i] - # a = b()[i] - anno.copyanno(node.func, node, anno.Basic.QN) - return node - def visit_Name(self, node): node = self.generic_visit(node) anno.setanno(node, anno.Basic.QN, QN(node.id)) @@ -184,8 +176,9 @@ class QnResolver(gast.NodeTransformer): def visit_Attribute(self, node): node = self.generic_visit(node) - anno.setanno(node, anno.Basic.QN, - QN(anno.getanno(node.value, anno.Basic.QN), attr=node.attr)) + if anno.hasanno(node.value, anno.Basic.QN): + anno.setanno(node, anno.Basic.QN, + QN(anno.getanno(node.value, anno.Basic.QN), attr=node.attr)) return node def visit_Subscript(self, node): @@ -201,9 +194,10 @@ class QnResolver(gast.NodeTransformer): subscript = QN(StringLiteral(s.value.s)) else: subscript = anno.getanno(node.slice.value, anno.Basic.QN) - anno.setanno(node, anno.Basic.QN, - QN(anno.getanno(node.value, anno.Basic.QN), - subscript=subscript)) + if anno.hasanno(node.value, anno.Basic.QN): + anno.setanno(node, anno.Basic.QN, + QN(anno.getanno(node.value, anno.Basic.QN), + subscript=subscript)) return node diff --git a/tensorflow/contrib/py2tf/pyct/qual_names_test.py b/tensorflow/contrib/py2tf/pyct/qual_names_test.py index f2cd8e98f0..6583fa243b 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names_test.py +++ b/tensorflow/contrib/py2tf/pyct/qual_names_test.py @@ -208,6 +208,24 @@ class QNResolverTest(test.TestCase): self.assertQNStringIs(nodes[8], 'a.b[c[d]].e.f') self.assertQNStringIs(nodes[9], 'a.b[c[d.e.f].g].h') + def test_function_calls(self): + samples = """ + a.b + a.b() + a().b + z[i] + z[i]() + z()[i] + """ + nodes = resolve(parser.parse_str(textwrap.dedent(samples))) + nodes = tuple(n.value for n in nodes.body) + self.assertQNStringIs(nodes[0], 'a.b') + self.assertQNStringIs(nodes[1].func, 'a.b') + self.assertQNStringIs(nodes[2].value.func, 'a') + self.assertQNStringIs(nodes[3], 'z[i]') + self.assertQNStringIs(nodes[4].func, 'z[i]') + self.assertQNStringIs(nodes[5].value.func, 'z') + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py b/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py index 87fc8c979c..716672a53b 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py @@ -171,6 +171,10 @@ class ActivityAnalizer(transformer.Base): self._in_return_statement = False def _track_symbol(self, node): + # This can happen when we have an attribute (or subscript) on a function + # call. Example: a().b + if not anno.hasanno(node, anno.Basic.QN): + return qn = anno.getanno(node, anno.Basic.QN) if isinstance(node.ctx, gast.Store): -- GitLab From 4deaf50fd8bb10aa2c96662a106f201b281f57ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 08:51:43 -0700 Subject: [PATCH 347/960] Methods to work with symbolic tensor shapes. PiperOrigin-RevId: 190071400 --- tensorflow/core/grappler/optimizers/BUILD | 30 +++ .../grappler/optimizers/symbolic_shapes.cc | 177 ++++++++++++++++++ .../grappler/optimizers/symbolic_shapes.h | 60 ++++++ .../optimizers/symbolic_shapes_test.cc | 95 ++++++++++ 4 files changed, 362 insertions(+) create mode 100644 tensorflow/core/grappler/optimizers/symbolic_shapes.cc create mode 100644 tensorflow/core/grappler/optimizers/symbolic_shapes.h create mode 100644 tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 96ea8f7a83..ac29edd213 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -5,6 +5,12 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +# Platform specific build config +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_protos_grappler", +) + filegroup( name = "all_files", srcs = glob( @@ -586,3 +592,27 @@ tf_cc_test( "//tensorflow/core/grappler/utils:grappler_test", ], ) + +cc_library( + name = "symbolic_shapes", + srcs = ["symbolic_shapes.cc"], + hdrs = ["symbolic_shapes.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + ] + tf_protos_grappler(), +) + +tf_cc_test( + name = "symbolic_shapes_test", + srcs = ["symbolic_shapes_test.cc"], + deps = [ + ":symbolic_shapes", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/core/grappler/optimizers/symbolic_shapes.cc b/tensorflow/core/grappler/optimizers/symbolic_shapes.cc new file mode 100644 index 0000000000..cfca2dc0d3 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/symbolic_shapes.cc @@ -0,0 +1,177 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h" +#include "tensorflow/core/util/bcast.h" + +namespace tensorflow { +namespace grappler { +namespace { + +BCast::Vec ShapeDims(const TensorShapeProto& shape) { + BCast::Vec dims; + dims.reserve(shape.dim_size()); + for (int i = 0; i < shape.dim_size(); ++i) + dims.push_back(shape.dim(i).size()); + return dims; +} + +} // namespace + +bool IsKnown(const TensorShapeProto::Dim& dim) { return dim.size() >= 0; } + +bool IsKnownSymbolically(const TensorShapeProto::Dim& dim) { + return dim.size() <= -2; +} + +bool IsUnknown(const TensorShapeProto::Dim& dim) { return dim.size() == -1; } + +bool ShapeIsSymbolicallyDefined(const TensorShapeProto& shape) { + return !shape.unknown_rank() && + std::all_of( + shape.dim().begin(), shape.dim().end(), + [](const TensorShapeProto::Dim& dim) { return !IsUnknown(dim); }); +} + +bool ShapeIsSymbolicallyDefined(const OpInfo::TensorProperties& properties) { + return ShapeIsSymbolicallyDefined(properties.shape()); +} + +bool ShapesSymbolicallyEqual(const TensorShapeProto& left, + const TensorShapeProto& right) { + if (left.unknown_rank() || right.unknown_rank() || + left.dim_size() != right.dim_size()) { + return false; + } + for (int i = 0; i < left.dim_size(); ++i) { + const auto& ldim = left.dim(i); + const auto& rdim = right.dim(i); + if (IsUnknown(ldim) || IsUnknown(rdim) || ldim.size() != rdim.size()) { + return false; + } + } + return true; +} + +bool ShapesSymbolicallyEqual(const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right) { + return ShapesSymbolicallyEqual(left.shape(), right.shape()); +} + +bool ShapesBroadcastable(const TensorShapeProto& left, + const TensorShapeProto& right) { + if (!ShapeIsSymbolicallyDefined(left) || !ShapeIsSymbolicallyDefined(right)) { + return false; + } + BCast bcast(ShapeDims(left), ShapeDims(right), + /*fewer_dims_optimization*/ false); + return bcast.IsValid(); +} + +bool ShapesBroadcastable(const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right) { + return ShapesBroadcastable(left.shape(), right.shape()); +} + +bool CompareSymbolicallyShapedTensorSizes(const TensorShapeProto& left, + const TensorShapeProto& right) { + // if one of the ranks is unknown, it's impossible to compare tensor sizes + if (left.unknown_rank() || right.unknown_rank()) { + return false; + } + + // Tensor size, computed as a product of defined dimensions + int64 left_defined_size = 1; + int64 right_defined_size = 1; + + // Keep how many times each unknown dimension appeared on the left and right + std::unordered_map left_unknown_dims; + std::unordered_map right_unknown_dims; + + // Assign unique id to every unknown dimension (-1). We are going to + // assign positive ids, because negative values are already used by + // symbolic dimensions. + int64 unknown_dim_id = 1; + + // For each shape dimension update "defined tensor size", if shape is defined, + // or increment a counter for unknown dim. + auto process_dimensions = + [&unknown_dim_id](const TensorShapeProto& shape, int64* defined_size, + std::unordered_map* unknown_dims) { + for (int i = 0; i < shape.dim_size(); ++i) { + const auto& dim = shape.dim(i); + int64 dim_size = dim.size(); + if (dim_size > 0) { + *defined_size *= dim_size; + } else if (IsUnknown(dim)) { + ++(*unknown_dims)[unknown_dim_id++]; + } else if (IsKnownSymbolically(dim)) { + ++(*unknown_dims)[dim_size]; + } + } + }; + + process_dimensions(left, &left_defined_size, &left_unknown_dims); + process_dimensions(right, &right_defined_size, &right_unknown_dims); + + // Compute a union of unknown dimension ids appeared in both shapes + std::set unknown_dims; + for (const auto& el : left_unknown_dims) unknown_dims.insert(el.first); + for (const auto& el : right_unknown_dims) unknown_dims.insert(el.first); + + // Cancel unknown dimensions that appeared in both shapes + for (int64 unknown_dim : unknown_dims) { + int64 co_occurrence = std::min(left_unknown_dims[unknown_dim], + right_unknown_dims[unknown_dim]); + left_unknown_dims[unknown_dim] -= co_occurrence; + right_unknown_dims[unknown_dim] -= co_occurrence; + } + + // Count unbalanced unknown dimensions + int64 left_unbalanced_unknown_dims = 0; + int64 right_unbalanced_unknown_dims = 0; + for (const auto& el : left_unknown_dims) + left_unbalanced_unknown_dims += el.second; + for (const auto& el : right_unknown_dims) + right_unbalanced_unknown_dims += el.second; + + if (left_unbalanced_unknown_dims == 0 && right_unbalanced_unknown_dims == 0) { + // If unknown dimensions cancelled each other, compare tensor sizes + // represented by defined dimensions + return left_defined_size < right_defined_size; + } + + if (left_defined_size <= right_defined_size && + left_unbalanced_unknown_dims == 0 && right_unbalanced_unknown_dims > 0) { + // If size of a 'left" tensor computed from defined dimensions less or + // equal, and shape on the right has unbalanced unknown dimensions, we can + // guarantee that shape on the left is strictly smaller (assuming that + // unknown dimension size is larger than 1) + return true; + } + + // In every other case, assuming that unknown dimensions can be arbitrary + // large in size, we can't guarantee any ordering + return false; +} + +bool CompareSymbolicallyShapedTensorSizes( + const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right) { + return CompareSymbolicallyShapedTensorSizes(left.shape(), right.shape()); +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/symbolic_shapes.h b/tensorflow/core/grappler/optimizers/symbolic_shapes.h new file mode 100644 index 0000000000..a9dcf44e23 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/symbolic_shapes.h @@ -0,0 +1,60 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SYMBOLIC_SHAPES_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SYMBOLIC_SHAPES_H_ + +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/grappler/costs/op_performance_data.pb.h" + +namespace tensorflow { +namespace grappler { + +bool IsKnown(const TensorShapeProto::Dim& dim); +bool IsKnownSymbolically(const TensorShapeProto::Dim& dim); +bool IsUnknown(const TensorShapeProto::Dim& dim); + +// Shape is symbolically defined, if it has a known rank, and each dimension is +// known (dim_size >= 0), or is a symbolic dimension size (dim_size <= -2). +bool ShapeIsSymbolicallyDefined(const TensorShapeProto& shape); +bool ShapeIsSymbolicallyDefined(const OpInfo::TensorProperties& properties); + +// Shapes are symbolically equal, if they have the same rank, they are +// they are known or symbolically defined, and have matching dimensions. +bool ShapesSymbolicallyEqual(const TensorShapeProto& left, + const TensorShapeProto& right); +bool ShapesSymbolicallyEqual(const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right); + +// Check if two shapes can be broadcasted to each other. Both shapes must be at +// least symbolically defined, and the have valid BCast instance. +bool ShapesBroadcastable(const TensorShapeProto& left, + const TensorShapeProto& right); +bool ShapesBroadcastable(const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right); + +// Return true if can prove, that tensor of size 'left' is smaller than tensor +// of size 'right'. Return false if it's larger or equal, or it's impossible to +// compare because of unknown dimensions, or mismatch in symbolic dimensions. +bool CompareSymbolicallyShapedTensorSizes(const TensorShapeProto& left, + const TensorShapeProto& right); +bool CompareSymbolicallyShapedTensorSizes( + const OpInfo::TensorProperties& left, + const OpInfo::TensorProperties& right); + +} // namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_SYMBOLIC_SHAPES_H_ diff --git a/tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc b/tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc new file mode 100644 index 0000000000..5ef9f65925 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/symbolic_shapes_test.cc @@ -0,0 +1,95 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/symbolic_shapes.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class SymbolicShapesTest : public ::testing::Test { + protected: + TensorShapeProto MakeUnknown() { + TensorShapeProto shape; + shape.set_unknown_rank(true); + return shape; + } + + TensorShapeProto MakeShape(std::vector dims) { + TensorShapeProto shape; + for (int dim_size : dims) { + TensorShapeProto::Dim dim; + dim.set_size(dim_size); + *shape.add_dim() = dim; + } + return shape; + } +}; + +bool operator<(const TensorShapeProto& lhs, const TensorShapeProto& rhs) { + return CompareSymbolicallyShapedTensorSizes(lhs, rhs); +} + +TEST_F(SymbolicShapesTest, ShapeIsSymbolicallyDefined) { + EXPECT_FALSE(ShapeIsSymbolicallyDefined(MakeUnknown())); + EXPECT_FALSE(ShapeIsSymbolicallyDefined(MakeShape({-1, 2}))); + + EXPECT_TRUE(ShapeIsSymbolicallyDefined(MakeShape({1, 2}))); + EXPECT_TRUE(ShapeIsSymbolicallyDefined(MakeShape({-2, 2}))); +} + +TEST_F(SymbolicShapesTest, ShapesSymbolicallyEqual) { + EXPECT_FALSE(ShapesSymbolicallyEqual(MakeUnknown(), MakeUnknown())); + EXPECT_FALSE(ShapesSymbolicallyEqual(MakeShape({-1, 2}), MakeShape({-1, 2}))); + EXPECT_FALSE(ShapesSymbolicallyEqual(MakeShape({-2, 2}), MakeShape({-3, 2}))); + + EXPECT_TRUE(ShapesSymbolicallyEqual(MakeShape({1, 2}), MakeShape({1, 2}))); + EXPECT_TRUE(ShapesSymbolicallyEqual(MakeShape({-2, 2}), MakeShape({-2, 2}))); +} + +TEST_F(SymbolicShapesTest, ShapesBroadcastable) { + EXPECT_FALSE(ShapesBroadcastable(MakeUnknown(), MakeUnknown())); + EXPECT_FALSE(ShapesBroadcastable(MakeShape({-2}), MakeShape({1, -3}))); + EXPECT_FALSE(ShapesBroadcastable(MakeShape({-1, 2}), MakeShape({-1, 2}))); + EXPECT_FALSE(ShapesBroadcastable(MakeShape({-2, 2}), MakeShape({-3, 2}))); + EXPECT_FALSE(ShapesBroadcastable(MakeShape({-2, 4}), MakeShape({-2, 8}))); + + EXPECT_TRUE(ShapesBroadcastable(MakeShape({1, 2}), MakeShape({1, 2}))); + EXPECT_TRUE(ShapesBroadcastable(MakeShape({-2, 2}), MakeShape({-2, 2}))); + EXPECT_TRUE(ShapesBroadcastable(MakeShape({-2, 32}), MakeShape({-2, 1}))); + EXPECT_TRUE(ShapesBroadcastable(MakeShape({-2, 1}), MakeShape({1, -2}))); + EXPECT_TRUE(ShapesBroadcastable(MakeShape({-2, 1}), MakeShape({1, -3}))); + EXPECT_TRUE(ShapesBroadcastable(MakeShape({-3}), MakeShape({-2, -3}))); +} + +TEST_F(SymbolicShapesTest, CompareSymbolicallyShapedTensorSizes) { + EXPECT_TRUE(MakeShape({1, 1, 32}) < MakeShape({32, 32})); + EXPECT_TRUE(MakeShape({1, 32, 32}) < MakeShape({2048})); + EXPECT_TRUE(MakeShape({1, -2, 32}) < MakeShape({-2, 32, 32})); + EXPECT_TRUE(MakeShape({1, 32, 32}) < MakeShape({-2, 32, 32})); + EXPECT_TRUE(MakeShape({1, 32, 32}) < MakeShape({-1, 32, 32})); + EXPECT_TRUE(MakeShape({1, -2, 32}) < MakeShape({-2, -2, 32})); + + EXPECT_FALSE(MakeShape({1, -2, 32}) < MakeShape({-3, 32, 32})); + EXPECT_FALSE(MakeShape({1, -1, 32}) < MakeShape({1, -1, 32})); + EXPECT_FALSE(MakeShape({1, -1, 32}) < MakeShape({-1, -1, 32})); + EXPECT_FALSE(MakeShape({-1, -1, 32}) < MakeShape({1, -1, 32})); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow -- GitLab From e92c2e0c957bc539fc24dffdceb96f4b3955bbee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 09:14:07 -0700 Subject: [PATCH 348/960] Disable over-aggressive shape inference PiperOrigin-RevId: 190074445 --- tensorflow/core/ops/list_ops.cc | 4 ---- .../python/kernel_tests/list_ops_test.py | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/ops/list_ops.cc b/tensorflow/core/ops/list_ops.cc index 0c16abd369..cad617638f 100644 --- a/tensorflow/core/ops/list_ops.cc +++ b/tensorflow/core/ops/list_ops.cc @@ -135,10 +135,6 @@ REGISTER_OP("TensorListStack") } shape_inference::ShapeHandle ignored; TF_RETURN_IF_ERROR(c->Merge(s, list_shape_type.shape, &ignored)); - if (!c->FullyDefined(list_shape_type.shape)) { - return errors::InvalidArgument( - "Can only stack a list with fully defined shapes."); - } s = list_shape_type.shape; } int expected_num_elements = -1; diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 8865e165fd..dbbed39c72 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -152,6 +152,28 @@ class ListOpsTest(test_util.TensorFlowTestCase): s1 = list_ops.tensor_list_stack(t1, element_dtype=dtypes.int32).eval() self.assertAllEqual(s1, [0, 1, 2, 3]) + def testGraphStackSwitchDtype(self): + with context.graph_mode(), self.test_session(): + list_ = list_ops.empty_tensor_list( + element_shape=constant_op.constant([], dtype=dtypes.int32), + element_dtype=dtypes.int32) + m = constant_op.constant([1, 2, 3], dtype=dtypes.float32) + + def body(list_, m): + list_ = control_flow_ops.cond( + math_ops.equal(list_ops.tensor_list_length(list_), 0), + lambda: list_ops.empty_tensor_list(m.shape, m.dtype), lambda: list_) + list_ = list_ops.tensor_list_push_back(list_, m) + return list_, m + + for _ in range(2): + list_, m = body(list_, m) + + s1 = list_ops.tensor_list_stack( + list_, element_dtype=dtypes.float32).eval() + np_s1 = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.float32) + self.assertAllEqual(s1, np_s1) + def testGraphStackInLoopSwitchDtype(self): with context.graph_mode(), self.test_session(): t1 = list_ops.empty_tensor_list( -- GitLab From b6ad189b1a197e454ae527829a01f742d76ba2a2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 22 Mar 2018 09:34:29 -0700 Subject: [PATCH 349/960] [XLA] Only overwrite the hlo_profiling flag when it's not enabled by default. This got broken in 504d103a405654f029e8902d97d4dd8f3aa07513 PiperOrigin-RevId: 190077360 --- .../compiler/xla/client/executable_build_options.cc | 4 +++- .../compiler/xla/client/executable_build_options.h | 9 +++++---- tensorflow/compiler/xla/service/local_service.cc | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc index 4ff4da6215..6e3c5cb484 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.cc +++ b/tensorflow/compiler/xla/client/executable_build_options.cc @@ -103,6 +103,8 @@ ExecutableBuildOptions& ExecutableBuildOptions::set_hlo_profile(bool enabled) { return *this; } -bool ExecutableBuildOptions::hlo_profile() const { return hlo_profile_; } +tensorflow::gtl::optional ExecutableBuildOptions::hlo_profile() const { + return hlo_profile_; +} } // namespace xla diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h index 85b2cd96cb..11f1098360 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.h +++ b/tensorflow/compiler/xla/client/executable_build_options.h @@ -70,17 +70,18 @@ class ExecutableBuildOptions { tensorflow::StringPiece dirpath); const tensorflow::gtl::optional& dump_per_pass_hlo_proto_to() const; - // If set, specifies that we should record an HLO profile during execution and - // log it after execution (as in DebugOptions). + // If true, specifies that we should record an HLO profile during execution + // and log it after execution (as in DebugOptions). If nullopt the default is + // used. ExecutableBuildOptions& set_hlo_profile(bool enabled); - bool hlo_profile() const; + tensorflow::gtl::optional hlo_profile() const; // Returns a string representation of the build options, suitable for // debugging. string ToString() const; private: - bool hlo_profile_ = false; + tensorflow::gtl::optional hlo_profile_; int device_ordinal_ = -1; Shape result_layout_; bool result_layout_set_ = false; diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 7fd1ccd1a8..5690a89909 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -119,8 +119,10 @@ StatusOr> LocalService::CompileExecutable( } ExecutionOptions execution_options = CreateDefaultExecutionOptions(); - execution_options.mutable_debug_options()->set_xla_hlo_profile( - build_options.hlo_profile()); + if (build_options.hlo_profile().has_value()) { + execution_options.mutable_debug_options()->set_xla_hlo_profile( + *build_options.hlo_profile()); + } if (build_options.generate_hlo_graph().has_value()) { execution_options.mutable_debug_options()->set_xla_generate_hlo_graph( build_options.generate_hlo_graph().value()); -- GitLab From 18c6e42b95dab659aa755242096cda9195db4927 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 09:43:32 -0700 Subject: [PATCH 350/960] Simplifying "is" and "is not" dispatch PiperOrigin-RevId: 190078959 --- tensorflow/contrib/py2tf/utils/multiple_dispatch.py | 13 ++++--------- .../contrib/py2tf/utils/multiple_dispatch_test.py | 8 ++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/py2tf/utils/multiple_dispatch.py b/tensorflow/contrib/py2tf/utils/multiple_dispatch.py index da7a942703..427a936c35 100644 --- a/tensorflow/contrib/py2tf/utils/multiple_dispatch.py +++ b/tensorflow/contrib/py2tf/utils/multiple_dispatch.py @@ -22,21 +22,16 @@ import six from tensorflow.contrib.py2tf.utils.type_check import is_tensor from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops def dynamic_is(left, right): - if is_tensor(left, right): - return math_ops.equal(left.name, right.name) - else: - return left is right + # TODO(alexbw) if we're sure we should leave 'is' in place, + # then change the semantics in converters/logical_expressions.py + return left is right def dynamic_is_not(left, right): - if is_tensor(left, right): - return math_ops.not_equal(left.name, right.name) - else: - return left is not right + return left is not right def run_cond(condition, true_fn, false_fn): diff --git a/tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py b/tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py index 8d89b6898a..75e8fdd5ed 100644 --- a/tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py +++ b/tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py @@ -50,10 +50,10 @@ class MultipleDispatchTest(test.TestCase): should_be_false1 = multiple_dispatch.dynamic_is_not(a, also_a) should_be_true2 = multiple_dispatch.dynamic_is_not(a, not_actually_a) should_be_false2 = multiple_dispatch.dynamic_is(a, not_actually_a) - self.assertTrue(should_be_true1.eval()) - self.assertTrue(should_be_true2.eval()) - self.assertFalse(should_be_false1.eval()) - self.assertFalse(should_be_false2.eval()) + self.assertTrue(should_be_true1) + self.assertTrue(should_be_true2) + self.assertFalse(should_be_false1) + self.assertFalse(should_be_false2) def test_run_cond_python(self): true_fn = lambda: 2.0 -- GitLab From 9816741186cdf327e1ee9fb048f1573356ac1064 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 10:10:18 -0700 Subject: [PATCH 351/960] Template system improvements: allow replacing keyword args. Allow using function calls and dicts in name replacements. PiperOrigin-RevId: 190083700 --- tensorflow/contrib/py2tf/pyct/templates.py | 59 ++++++++++++++++++- .../contrib/py2tf/pyct/templates_test.py | 45 ++++++++++++++ 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py index cdd71dc56d..590be68234 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/py2tf/pyct/templates.py @@ -44,8 +44,6 @@ class ReplaceTransformer(gast.NodeTransformer): self.replacements = replacements self.in_replacements = False - # TODO(mdan): Make a more detailed pass and clean up if needed. - def visit_Expr(self, node): if (isinstance(node.value, gast.Name) and node.value.id in self.replacements): @@ -53,17 +51,57 @@ class ReplaceTransformer(gast.NodeTransformer): self.generic_visit(node) return node + def visit_keyword(self, node): + if node.arg in self.replacements: + repl = self.replacements[node.arg] + if isinstance(repl, gast.keyword): + return repl + elif (isinstance(repl, (list, tuple)) and repl and + all(isinstance(r, gast.keyword) for r in repl)): + return repl + # TODO(mdan): We may allow replacing with a string as well. + # For example, if one wanted to replace foo with bar in foo=baz, then + # we could allow changing just node arg, so that we end up with bar=baz. + raise ValueError( + 'a keyword argument may only be replaced by another keyword or a ' + 'non-empty list of keywords. Found: %s' % repl) + return self.generic_visit(node) + def visit_FunctionDef(self, node): node = self.generic_visit(node) if node.name in self.replacements: repl = self.replacements[node.name] if not isinstance(repl, (gast.Name, ast.Name)): raise ValueError( - 'A function name can only be replaced by a Name node. Found: %s' % + 'a function name can only be replaced by a Name node. Found: %s' % repl) node.name = repl.id return node + def _check_has_context(self, node): + if not node.ctx: + raise ValueError('node %s is missing ctx value' % node) + + def _check_inner_children_have_context(self, node): + if isinstance(node, gast.Attribute): + self._check_inner_children_have_context(node.value) + self._check_has_context(node) + elif isinstance(node, gast.Tuple): + for e in node.elts: + self._check_inner_children_have_context(e) + self._check_has_context(node) + elif isinstance(node, gast.Dict): + for e in node.keys: + self._check_inner_children_have_context(e) + for e in node.values: + self._check_inner_children_have_context(e) + elif isinstance(node, gast.Name): + self._check_has_context(node) + elif isinstance(node, (gast.Str, gast.Num)): + pass + else: + raise ValueError('unexpected node type "%s"' % node) + def _set_inner_child_context(self, node, ctx): if isinstance(node, gast.Attribute): self._set_inner_child_context(node.value, ctx) @@ -74,6 +112,21 @@ class ReplaceTransformer(gast.NodeTransformer): node.ctx = ctx elif isinstance(node, gast.Name): node.ctx = ctx + elif isinstance(node, gast.Call): + self._set_inner_child_context(node.func, ctx) + # We may be able to override these to Load(), but for now it's simpler + # to just assert that they're set. + for a in node.args: + self._check_inner_children_have_context(a) + for k in node.keywords: + self._check_inner_children_have_context(k.value) + elif isinstance(node, gast.Dict): + # We may be able to override these to Load(), but for now it's simpler + # to just assert that they're set. + for e in node.keys: + self._check_inner_children_have_context(e) + for e in node.values: + self._check_inner_children_have_context(e) elif isinstance(node, (gast.Str, gast.Num)): pass else: diff --git a/tensorflow/contrib/py2tf/pyct/templates_test.py b/tensorflow/contrib/py2tf/pyct/templates_test.py index d7835b80a7..af939caf32 100644 --- a/tensorflow/contrib/py2tf/pyct/templates_test.py +++ b/tensorflow/contrib/py2tf/pyct/templates_test.py @@ -23,6 +23,7 @@ import imp import gast from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import templates from tensorflow.python.platform import test @@ -96,6 +97,50 @@ class TemplatesTest(test.TestCase): with self.assertRaises(ValueError): templates.replace(template, foo=1) + def test_replace_call_keyword(self): + template = """ + def test_fn(): + def f(a, d, f): + return a + d + f + return f(1, kws=None) + """ + + source = parser.parse_expression('f(d=3, f=5)') + node = templates.replace(template, kws=source.keywords)[0] + result, _ = compiler.ast_to_object(node) + self.assertEquals(9, result.test_fn()) + + with self.assertRaises(ValueError): + templates.replace(template, kws=[]) + templates.replace(template, kws=1) + + def test_replace_name_with_call(self): + template = """ + def test_fn(): + b = 5 + def g(a): + return 3 * a + def f(): + return g + return foo + """ + + source = parser.parse_expression('f()(b)') + node = templates.replace(template, foo=source)[0] + result, _ = compiler.ast_to_object(node) + self.assertEquals(15, result.test_fn()) + + def test_replace_name_with_dict(self): + template = """ + def test_fn(): + return foo['bar'] + """ + + source = parser.parse_expression('{\'bar\': 3}') + node = templates.replace(template, foo=source)[0] + result, _ = compiler.ast_to_object(node) + self.assertEquals(3, result.test_fn()) + def replace_as_expression(self): template = """ foo(a) -- GitLab From aeb9f62e237ae1274482acca2fa09db34aef42d4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 10:17:18 -0700 Subject: [PATCH 352/960] Omit tensorflow/python/estimator:replicate_model_fn_test from asan builds. It gets flaky timeouts. PiperOrigin-RevId: 190084932 --- tensorflow/python/estimator/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 04fcbb0e87..b25f9d2153 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -929,5 +929,6 @@ cuda_py_test( ], tags = [ "multi_gpu", + "noasan", # flaky time outs ], ) -- GitLab From 010e3e401cef883aa0fff334d3f5e56a88e3f5e4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 10:21:39 -0700 Subject: [PATCH 353/960] Mark tensor as const in a function that does not mutate a tensor. PiperOrigin-RevId: 190085757 --- tensorflow/contrib/lite/kernels/kernel_util.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h index 21da1daff7..2f407b5da3 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.h +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -53,13 +53,13 @@ inline TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, } // Determines whether tensor is constant. -inline bool IsConstantTensor(TfLiteTensor* tensor) { +inline bool IsConstantTensor(const TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteMmapRo; } // Determines whether tensor is dynamic. Note that a tensor can be non-const and // not dynamic. This function specifically checks for a dynamic tensor. -inline bool IsDynamicTensor(TfLiteTensor* tensor) { +inline bool IsDynamicTensor(const TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteDynamic; } -- GitLab From ed0c4037ec47e3a7d1e5d23514951e5256b8a30f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 10:21:59 -0700 Subject: [PATCH 354/960] Small cleanup StrCat related number formatting - Resolve inconsistency in return values (pointer to start vs end of buffer) - Instead, return the number of chars written as this turns out to be most useful to callers - Removes the need for redundant strlen calls. PiperOrigin-RevId: 190085812 --- tensorflow/core/lib/strings/numbers.cc | 49 +++++++++++++++----------- tensorflow/core/lib/strings/numbers.h | 15 ++++---- tensorflow/core/lib/strings/strcat.h | 16 ++++----- 3 files changed, 43 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc index f5822fad8e..516decc3c0 100644 --- a/tensorflow/core/lib/strings/numbers.cc +++ b/tensorflow/core/lib/strings/numbers.cc @@ -106,19 +106,22 @@ T locale_independent_strtonum(const char* str, const char** endptr) { namespace strings { -char* FastInt32ToBufferLeft(int32 i, char* buffer) { +size_t FastInt32ToBufferLeft(int32 i, char* buffer) { uint32 u = i; + size_t length = 0; if (i < 0) { *buffer++ = '-'; + ++length; // We need to do the negation in modular (i.e., "unsigned") // arithmetic; MSVC++ apparently warns for plain "-u", so // we write the equivalent expression "0 - u" instead. u = 0 - u; } - return FastUInt32ToBufferLeft(u, buffer); + length += FastUInt32ToBufferLeft(u, buffer); + return length; } -char* FastUInt32ToBufferLeft(uint32 i, char* buffer) { +size_t FastUInt32ToBufferLeft(uint32 i, char* buffer) { char* start = buffer; do { *buffer++ = ((i % 10) + '0'); @@ -126,19 +129,22 @@ char* FastUInt32ToBufferLeft(uint32 i, char* buffer) { } while (i > 0); *buffer = 0; std::reverse(start, buffer); - return buffer; + return buffer - start; } -char* FastInt64ToBufferLeft(int64 i, char* buffer) { +size_t FastInt64ToBufferLeft(int64 i, char* buffer) { uint64 u = i; + size_t length = 0; if (i < 0) { *buffer++ = '-'; + ++length; u = 0 - u; } - return FastUInt64ToBufferLeft(u, buffer); + length += FastUInt64ToBufferLeft(u, buffer); + return length; } -char* FastUInt64ToBufferLeft(uint64 i, char* buffer) { +size_t FastUInt64ToBufferLeft(uint64 i, char* buffer) { char* start = buffer; do { *buffer++ = ((i % 10) + '0'); @@ -146,19 +152,18 @@ char* FastUInt64ToBufferLeft(uint64 i, char* buffer) { } while (i > 0); *buffer = 0; std::reverse(start, buffer); - return buffer; + return buffer - start; } static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001; -char* DoubleToBuffer(double value, char* buffer) { +size_t DoubleToBuffer(double value, char* buffer) { // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all // platforms these days. Just in case some system exists where DBL_DIG // is significantly larger -- and risks overflowing our buffer -- we have // this assert. static_assert(DBL_DIG < 20, "DBL_DIG is too big"); - bool full_precision_needed = true; if (std::abs(value) <= kDoublePrecisionCheckMax) { int snprintf_result = snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value); @@ -167,18 +172,20 @@ char* DoubleToBuffer(double value, char* buffer) { // larger than the precision we asked for. DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); - full_precision_needed = - locale_independent_strtonum(buffer, nullptr) != value; + if (locale_independent_strtonum(buffer, nullptr) == value) { + // Round-tripping the string to double works; we're done. + return snprintf_result; + } + // else: full precision formatting needed. Fall through. } - if (full_precision_needed) { - int snprintf_result = - snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value); + int snprintf_result = + snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value); - // Should never overflow; see above. - DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); - } - return buffer; + // Should never overflow; see above. + DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); + + return snprintf_result; } namespace { @@ -325,7 +332,7 @@ bool safe_strtod(const char* str, double* value) { return *str != '\0' && *endptr == '\0'; } -char* FloatToBuffer(float value, char* buffer) { +size_t FloatToBuffer(float value, char* buffer) { // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all // platforms these days. Just in case some system exists where FLT_DIG // is significantly larger -- and risks overflowing our buffer -- we have @@ -347,7 +354,7 @@ char* FloatToBuffer(float value, char* buffer) { // Should never overflow; see above. DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); } - return buffer; + return snprintf_result; } string FpToString(Fprint fp) { diff --git a/tensorflow/core/lib/strings/numbers.h b/tensorflow/core/lib/strings/numbers.h index 3c45b90274..6b7703be37 100644 --- a/tensorflow/core/lib/strings/numbers.h +++ b/tensorflow/core/lib/strings/numbers.h @@ -60,19 +60,18 @@ static const int kFastToBufferSize = 32; // the output. The buffer should typically be at least kFastToBufferSize // bytes. // -// Returns a pointer to the end of the string (i.e. the null character -// terminating the string). +// Returns the number of characters written. // ---------------------------------------------------------------------- -char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes -char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes -char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes -char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes +size_t FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes +size_t FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes +size_t FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes +size_t FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes // Required buffer size for DoubleToBuffer is kFastToBufferSize. // Required buffer size for FloatToBuffer is kFastToBufferSize. -char* DoubleToBuffer(double i, char* buffer); -char* FloatToBuffer(float i, char* buffer); +size_t DoubleToBuffer(double value, char* buffer); +size_t FloatToBuffer(float value, char* buffer); // Convert a 64-bit fingerprint value to an ASCII representation. string FpToString(Fprint fp); diff --git a/tensorflow/core/lib/strings/strcat.h b/tensorflow/core/lib/strings/strcat.h index b3ec14e448..fb2cd5bc7e 100644 --- a/tensorflow/core/lib/strings/strcat.h +++ b/tensorflow/core/lib/strings/strcat.h @@ -101,22 +101,22 @@ class AlphaNum { // A bool ctor would also convert incoming pointers (bletch). AlphaNum(int i32) // NOLINT(runtime/explicit) - : piece_(digits_, FastInt32ToBufferLeft(i32, digits_) - &digits_[0]) {} + : piece_(digits_, FastInt32ToBufferLeft(i32, digits_)) {} AlphaNum(unsigned int u32) // NOLINT(runtime/explicit) - : piece_(digits_, FastUInt32ToBufferLeft(u32, digits_) - &digits_[0]) {} + : piece_(digits_, FastUInt32ToBufferLeft(u32, digits_)) {} AlphaNum(long x) // NOLINT(runtime/explicit) - : piece_(digits_, FastInt64ToBufferLeft(x, digits_) - &digits_[0]) {} + : piece_(digits_, FastInt64ToBufferLeft(x, digits_)) {} AlphaNum(unsigned long x) // NOLINT(runtime/explicit) - : piece_(digits_, FastUInt64ToBufferLeft(x, digits_) - &digits_[0]) {} + : piece_(digits_, FastUInt64ToBufferLeft(x, digits_)) {} AlphaNum(long long int i64) // NOLINT(runtime/explicit) - : piece_(digits_, FastInt64ToBufferLeft(i64, digits_) - &digits_[0]) {} + : piece_(digits_, FastInt64ToBufferLeft(i64, digits_)) {} AlphaNum(unsigned long long int u64) // NOLINT(runtime/explicit) - : piece_(digits_, FastUInt64ToBufferLeft(u64, digits_) - &digits_[0]) {} + : piece_(digits_, FastUInt64ToBufferLeft(u64, digits_)) {} AlphaNum(float f) // NOLINT(runtime/explicit) - : piece_(digits_, strlen(FloatToBuffer(f, digits_))) {} + : piece_(digits_, FloatToBuffer(f, digits_)) {} AlphaNum(double f) // NOLINT(runtime/explicit) - : piece_(digits_, strlen(DoubleToBuffer(f, digits_))) {} + : piece_(digits_, DoubleToBuffer(f, digits_)) {} AlphaNum(Hex hex); // NOLINT(runtime/explicit) -- GitLab From 6fa811a94f3da0c49d69db9b15ea424f84a6431f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 10:28:56 -0700 Subject: [PATCH 355/960] Automated g4 rollback of changelist 189819449 PiperOrigin-RevId: 190087074 --- tensorflow/contrib/distributions/BUILD | 6 ++- .../kernel_tests/statistical_testing_test.py | 40 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 4ddec73ec8..e9c827a618 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -486,7 +486,11 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], - tags = ["noasan"], # Was found to time out in asan + tags = [ + "manual", + "noasan", + "noguitar", + ], ) cuda_py_test( diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index fc071c273d..3548ac1807 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -31,34 +31,30 @@ class StatisticalTestingTest(test.TestCase): def test_dkwm_design_mean_one_sample_soundness(self): numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] - def check_soundness(ff, fp): - sufficient_n = st.min_num_samples_for_dkwm_mean_test( - numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( - sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - return check_ops.assert_less_equal(detectable_d, numbers) with self.test_session() as sess: - sess.run([check_soundness(ff, fp) - for ff in rates - for fp in rates]) + for ff in rates: + for fp in rates: + sufficient_n = st.min_num_samples_for_dkwm_mean_test( + numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + sess.run(check_ops.assert_less_equal(detectable_d, numbers)) def test_dkwm_design_mean_two_sample_soundness(self): numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] - def check_soundness(ff, fp): - (sufficient_n1, - sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( - numbers, 0., 1., 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample - detectable_d = d_fn( - sufficient_n1, 0., 1., sufficient_n2, 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - return check_ops.assert_less_equal(detectable_d, numbers) with self.test_session() as sess: - sess.run([check_soundness(ff, fp) - for ff in rates - for fp in rates]) + for ff in rates: + for fp in rates: + (sufficient_n1, + sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( + numbers, 0., 1., 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample + detectable_d = d_fn( + sufficient_n1, 0., 1., sufficient_n2, 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + sess.run(check_ops.assert_less_equal(detectable_d, numbers)) def test_true_mean_confidence_interval_by_dkwm_one_sample(self): rng = np.random.RandomState(seed=0) -- GitLab From 1a6752dddf387d280a6a13c2dc7e2bebf69dab2f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 11:12:10 -0700 Subject: [PATCH 356/960] Adds remaining validations in sequence_numeric_column. PiperOrigin-RevId: 190094883 --- .../feature_column/sequence_feature_column.py | 32 ++++++++++++++++++- .../sequence_feature_column_test.py | 26 +++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index e60116966f..555beddeaa 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -166,6 +166,10 @@ def sequence_categorical_column_with_identity( Returns: A `_SequenceCategoricalColumn`. + + Raises: + ValueError: if `num_buckets` is less than one. + ValueError: if `default_value` is not in range `[0, num_buckets)`. """ return fc._SequenceCategoricalColumn( fc.categorical_column_with_identity( @@ -205,6 +209,10 @@ def sequence_categorical_column_with_hash_bucket( Returns: A `_SequenceCategoricalColumn`. + + Raises: + ValueError: `hash_bucket_size` is not greater than 1. + ValueError: `dtype` is neither string nor integer. """ return fc._SequenceCategoricalColumn( fc.categorical_column_with_hash_bucket( @@ -257,6 +265,13 @@ def sequence_categorical_column_with_vocabulary_file( Returns: A `_SequenceCategoricalColumn`. + + Raises: + ValueError: `vocabulary_file` is missing or cannot be opened. + ValueError: `vocabulary_size` is missing or < 1. + ValueError: `num_oov_buckets` is a negative integer. + ValueError: `num_oov_buckets` and `default_value` are both specified. + ValueError: `dtype` is neither string nor integer. """ return fc._SequenceCategoricalColumn( fc.categorical_column_with_vocabulary_file( @@ -311,6 +326,12 @@ def sequence_categorical_column_with_vocabulary_list( Returns: A `_SequenceCategoricalColumn`. + + Raises: + ValueError: if `vocabulary_list` is empty, or contains duplicate keys. + ValueError: `num_oov_buckets` is a negative integer. + ValueError: `num_oov_buckets` and `default_value` are both specified. + ValueError: if `dtype` is not integer or string. """ return fc._SequenceCategoricalColumn( fc.categorical_column_with_vocabulary_list( @@ -352,8 +373,17 @@ def sequence_numeric_column( Returns: A `_SequenceNumericColumn`. + + Raises: + TypeError: if any dimension in shape is not an int. + ValueError: if any dimension in shape is not a positive integer. + ValueError: if `dtype` is not convertible to `tf.float32`. """ - # TODO(b/73160931): Add validations. + shape = fc._check_shape(shape=shape, key=key) + if not (dtype.is_integer or dtype.is_floating): + raise ValueError('dtype must be convertible to float. ' + 'dtype: {}, key: {}'.format(dtype, key)) + return _SequenceNumericColumn( key, shape=shape, diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index b64f086376..88f5d53516 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -662,6 +662,32 @@ class SequenceIndicatorColumnTest(test.TestCase): class SequenceNumericColumnTest(test.TestCase): + def test_defaults(self): + a = sfc.sequence_numeric_column('aaa') + self.assertEqual('aaa', a.key) + self.assertEqual('aaa', a.name) + self.assertEqual('aaa', a._var_scope_name) + self.assertEqual((1,), a.shape) + self.assertEqual(0., a.default_value) + self.assertEqual(dtypes.float32, a.dtype) + + def test_shape_saved_as_tuple(self): + a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) + self.assertEqual((1, 2), a.shape) + + def test_shape_must_be_positive_integer(self): + with self.assertRaisesRegexp(TypeError, 'shape dimensions must be integer'): + sfc.sequence_numeric_column('aaa', shape=[1.0]) + + with self.assertRaisesRegexp( + ValueError, 'shape dimensions must be greater than 0'): + sfc.sequence_numeric_column('aaa', shape=[0]) + + def test_dtype_is_convertible_to_float(self): + with self.assertRaisesRegexp( + ValueError, 'dtype must be convertible to float'): + sfc.sequence_numeric_column('aaa', dtype=dtypes.string) + def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] -- GitLab From 8991aeea540da49344ceac6e8f5e092778f410a9 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 22 Mar 2018 11:21:52 -0700 Subject: [PATCH 357/960] Eager/g3doc: Gradients with respect to constants are None and not 0. Same behavior as tf.gradients() for graphs. Some discussion of this choice in #783 PiperOrigin-RevId: 190096919 --- tensorflow/contrib/eager/python/g3doc/guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md index df084e9053..11064981c6 100644 --- a/tensorflow/contrib/eager/python/g3doc/guide.md +++ b/tensorflow/contrib/eager/python/g3doc/guide.md @@ -273,9 +273,9 @@ assert 6 == df(3.)[0].numpy() d2f = tfe.gradients_function(lambda x: df(x)[0]) assert 2 == d2f(3.)[0].numpy() -# Third order derivative. +# Third order derivative: Will be None d3f = tfe.gradients_function(lambda x : d2f(x)[0]) -assert 0 == d3f(3.)[0].numpy() +assert None == d3f(3.)[0] ``` These functions can be used to train models. For example, consider the following -- GitLab From cfdd61585769188789280e768fc43fdbba799619 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Mar 2018 11:23:26 -0700 Subject: [PATCH 358/960] Run the grappler optimizer tests both on GPU and CPU PiperOrigin-RevId: 190097168 --- tensorflow/core/grappler/optimizers/BUILD | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index ac29edd213..cfb698969c 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -1,7 +1,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") -load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") +load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") @@ -44,7 +44,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "static_schedule_test", srcs = ["static_schedule_test.cc"], deps = [ @@ -79,7 +79,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "auto_parallel_test", srcs = ["auto_parallel_test.cc"], deps = [ @@ -157,7 +157,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "function_optimizer_test", srcs = ["function_optimizer_test.cc"], deps = [ @@ -223,7 +223,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "graph_optimizer_stage_test", size = "small", srcs = ["graph_optimizer_stage_test.cc"], @@ -274,7 +274,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "arithmetic_optimizer_test", size = "small", srcs = ["arithmetic_optimizer_test.cc"], @@ -315,7 +315,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "dependency_optimizer_test", size = "small", srcs = ["dependency_optimizer_test.cc"], @@ -351,7 +351,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "model_pruner_test", srcs = ["model_pruner_test.cc"], deps = [ @@ -422,10 +422,9 @@ cc_library( ]), ) -tf_cc_test_gpu( +tf_cuda_cc_test( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], - tags = ["no_cuda_on_cpu_tap"], deps = [ ":memory_optimizer", "//tensorflow/cc:cc_ops", @@ -464,7 +463,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "layout_optimizer_test", srcs = ["layout_optimizer_test.cc"], deps = [ @@ -513,7 +512,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "meta_optimizer_test", srcs = ["meta_optimizer_test.cc"], deps = [ @@ -542,7 +541,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "custom_graph_optimizer_registry_test", size = "small", srcs = ["custom_graph_optimizer_registry_test.cc"], @@ -577,7 +576,7 @@ cc_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "loop_optimizer_test", srcs = ["loop_optimizer_test.cc"], deps = [ -- GitLab From 7c4cdb8bae0e8760ebe4793d49ea5aee68768655 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 22 Mar 2018 11:25:49 -0700 Subject: [PATCH 359/960] Supports PReLU in TFLite & Toco. PiperOrigin-RevId: 190097557 --- tensorflow/contrib/lite/builtin_ops.h | 1 + .../contrib/lite/kernels/activations.cc | 64 ++++++++++ .../contrib/lite/kernels/activations_test.cc | 43 +++++++ tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/model.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 1 + .../contrib/lite/schema/schema_generated.h | 9 +- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 49 ++++++++ .../testing/generated_examples_zip_test.cc | 4 + tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + .../graph_transformations/identify_prelu.cc | 119 ++++++++++++++++++ .../propagate_fixed_sizes.cc | 1 + tensorflow/contrib/lite/toco/model.h | 13 ++ .../contrib/lite/toco/tflite/operator.cc | 2 + tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + tensorflow/contrib/lite/toco/tooling_util.cc | 1 + 19 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index e4652a3e70..d7993e60cc 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -78,6 +78,7 @@ typedef enum { kTfLiteBuiltinDelegate = 51, kTfLiteBuiltinBidirectionalSequenceLstm = 52, kTfLiteBuiltinCast = 53, + kTfLiteBuiltinPrelu = 54, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc index 093761c43c..39a54c9396 100644 --- a/tensorflow/contrib/lite/kernels/activations.cc +++ b/tensorflow/contrib/lite/kernels/activations.cc @@ -150,6 +150,34 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArrayCopy(input->dims)); } +TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteTensor* alpha = GetInput(context, node, 1); + + output->type = input->type; + + // Currently only Float32 is supported + // TODO(ycling): Support other data types. + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, alpha->type, kTfLiteFloat32); + + // Currently, only support 4D `input` and 3D `alpha` with shape + // (1, 1, channels). + // TODO(impjdi): Support other cases where `alpha` is broadcastable + // to `input`. + TF_LITE_ENSURE_EQ(context, input->dims->size, 4); + TF_LITE_ENSURE_EQ(context, alpha->dims->size, 3); + TF_LITE_ENSURE_EQ(context, alpha->dims->data[0], 1); + TF_LITE_ENSURE_EQ(context, alpha->dims->data[1], 1); + TF_LITE_ENSURE_EQ(context, alpha->dims->data[2], input->dims->data[3]); + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); @@ -388,6 +416,35 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) { } } +TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* alpha = GetInput(context, node, 1); + TfLiteTensor* output = GetOutput(context, node, 0); + + if (input->type != kTfLiteFloat32) { + context->ReportError(context, "Only float32 supported currently."); + return kTfLiteError; + } + TF_LITE_ENSURE_EQ(context, input->dims->size, 4); + const int batches = input->dims->data[0]; + const int height = input->dims->data[1]; + const int width = input->dims->data[2]; + const int channels = input->dims->data[3]; + + TF_LITE_ENSURE_EQ(context, alpha->dims->size, 3); + TF_LITE_ENSURE_EQ(context, alpha->dims->data[0], 1); + TF_LITE_ENSURE_EQ(context, alpha->dims->data[1], 1); + TF_LITE_ENSURE_EQ(context, alpha->dims->data[2], channels); + + const int n = batches * height * width * channels; + for (int i = 0; i < n; ++i) { + const float x = input->data.f[i]; + output->data.f[i] = x >= 0.0f ? x : alpha->data.f[i % channels] * x; + } + + return kTfLiteOk; +} + } // namespace activations TfLiteRegistration* Register_RELU() { @@ -439,6 +496,13 @@ TfLiteRegistration* Register_LOG_SOFTMAX() { return &r; } +TfLiteRegistration* Register_PRELU() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + activations::PreluPrepare, + activations::PreluEval}; + return &r; +} + } // namespace builtin } // namespace ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/activations_test.cc b/tensorflow/contrib/lite/kernels/activations_test.cc index b9a96e3f79..50a84edd47 100644 --- a/tensorflow/contrib/lite/kernels/activations_test.cc +++ b/tensorflow/contrib/lite/kernels/activations_test.cc @@ -383,6 +383,49 @@ TEST(FloatActivationsOpTest, LogSoftmax) { }))); } +class PReluOpModel : public SingleOpModel { + public: + PReluOpModel(const TensorData& input, const TensorData& alpha) { + input_ = AddInput(input); + alpha_ = AddInput(alpha); + output_ = AddOutput(input); + SetBuiltinOp(BuiltinOperator_PRELU, BuiltinOptions_NONE, 0); + BuildInterpreter({GetShape(input_), GetShape(alpha_)}); + } + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + void SetAlpha(std::initializer_list data) { + PopulateTensor(alpha_, data); + } + std::vector GetOutput() { return ExtractVector(output_); } + + protected: + int input_; + int alpha_; + int output_; +}; + +TEST(FloatActivationsOpTest, PRelu) { + PReluOpModel m({TensorType_FLOAT32, {1, 2, 2, 3}}, + {TensorType_FLOAT32, {1, 1, 3}}); + + m.SetInput({ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -2.0f, -2.0f, -2.0f, // Row 1, Column 2 + }); + m.SetAlpha({0.0f, 1.0f, 2.0f}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + 0.0f, -1.0f, -2.0f, // Row 2, Column 1 + 0.0f, -2.0f, -4.0f, // Row 1, Column 2 + })); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 369d3b9886..62045f0a4d 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -75,6 +75,7 @@ TfLiteRegistration* Register_TOPK_V2(); TfLiteRegistration* Register_LOG_SOFTMAX(); TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_DEQUANTIZE(); +TfLiteRegistration* Register_PRELU(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -131,6 +132,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); + AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 9c619f88e0..b7ccdf070b 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -309,6 +309,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_LOG_SOFTMAX: case BuiltinOperator_CAST: case BuiltinOperator_DEQUANTIZE: + case BuiltinOperator_PRELU: break; case BuiltinOperator_LSH_PROJECTION: { TfLiteLSHProjectionParams* params = diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 9d00d965d3..e31b7c03a5 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -349,6 +349,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_DEQUANTIZE: case tflite::BuiltinOperator_DELEGATE: case tflite::BuiltinOperator_CAST: + case tflite::BuiltinOperator_PRELU: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 04387fed33..e1075971e9 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -130,6 +130,7 @@ enum BuiltinOperator : byte { DELEGATE = 51, BIDIRECTIONAL_SEQUENCE_LSTM = 52, CAST = 53, + PRELU = 54, } // Options for the builtin operators. diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index b922de2081..86daeaf5cc 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -254,11 +254,12 @@ enum BuiltinOperator { BuiltinOperator_DELEGATE = 51, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, BuiltinOperator_CAST = 53, + BuiltinOperator_PRELU = 54, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_CAST + BuiltinOperator_MAX = BuiltinOperator_PRELU }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[52] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[53] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -311,7 +312,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[52] { BuiltinOperator_LOG_SOFTMAX, BuiltinOperator_DELEGATE, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, - BuiltinOperator_CAST + BuiltinOperator_CAST, + BuiltinOperator_PRELU }; return values; } @@ -372,6 +374,7 @@ inline const char **EnumNamesBuiltinOperator() { "DELEGATE", "BIDIRECTIONAL_SEQUENCE_LSTM", "CAST", + "PRELU", nullptr }; return names; diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index f1b18ad30f..555ea90034 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -39,6 +39,7 @@ gen_zipped_test_files( "mean.zip", "mul.zip", "pad.zip", + "prelu.zip", "relu.zip", "relu1.zip", "relu6.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 420bdb41f1..38de9dcf2c 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -617,6 +617,54 @@ def make_relu6_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_prelu_tests(zip_path): + """Make a set of tests to do PReLU.""" + + test_parameters = [{ + # The canonical case for image processing is having a 4D `input` (NHWC) + # and `shared_axes`=[1, 2], so the alpha parameter is per channel. + "input_shape": [[1, 10, 10, 3], [3, 3, 3, 3]], + "shared_axes": [[1, 2], [1]], + }] + + def build_graph(parameters): + """Build the graph for the test case.""" + + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + prelu = tf.keras.layers.PReLU(shared_axes=parameters["shared_axes"]) + out = prelu(input_tensor) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + """Build the inputs for the test case.""" + + input_shape = parameters["input_shape"] + input_values = create_tensor_data( + np.float32, input_shape, min_value=-10, max_value=10) + shared_axes = parameters["shared_axes"] + + alpha_shape = [] + for dim in range(1, len(input_shape)): + alpha_shape.append(1 if dim in shared_axes else input_shape[dim]) + + alpha_values = create_tensor_data(np.float32, alpha_shape) + + with tf.variable_scope("", reuse=True): + alpha = tf.get_variable("p_re_lu/alpha") + sess.run(alpha.assign(alpha_values)) + + return [input_values], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_values]))) + + make_zip_of_tests( + zip_path, + test_parameters, + build_graph, + build_inputs, + use_frozen_graph=True) + + # This function tests various TensorFLow functions that generates Const op, # including `tf.ones`, `tf.zeros` and random functions. def make_constant_tests(zip_path): @@ -1911,6 +1959,7 @@ def main(unused_args): "relu.zip": make_relu_tests, "relu1.zip": make_relu1_tests, "relu6.zip": make_relu6_tests, + "prelu.zip": make_prelu_tests, "l2_pool.zip": make_pool_tests(make_l2_pool), "avg_pool.zip": make_pool_tests(tf.nn.avg_pool), "max_pool.zip": make_pool_tests(tf.nn.max_pool), diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 5e76e7c510..ba2d259462 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -88,6 +88,9 @@ std::map kBrokenTests = { // Transpose only supports 1D-4D input tensors. {R"(^\/transpose.*input_shape=\[.,.,.,.,.\])", "71545879"}, + + // PRelu only supports 4D input with (1, 1, channels) 3D alpha now. + {R"(^\/prelu.*shared_axes=\[1\])", "75975192"}, }; // Allows test data to be unzipped into a temporary directory and makes @@ -253,6 +256,7 @@ INSTANTIATE_TESTS(mul) INSTANTIATE_TESTS(pad) INSTANTIATE_TESTS(relu) INSTANTIATE_TESTS(relu1) +INSTANTIATE_TESTS(prelu) INSTANTIATE_TESTS(relu6) INSTANTIATE_TESTS(reshape) INSTANTIATE_TESTS(resize_bilinear) diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 395abc5326..486ff1edcd 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -193,6 +193,7 @@ cc_library( "graph_transformations/identify_lstm.cc", "graph_transformations/identify_lstm_merge_inputs.cc", "graph_transformations/identify_lstm_split_inputs.cc", + "graph_transformations/identify_prelu.cc", "graph_transformations/identify_relu1.cc", "graph_transformations/lstm_utils.cc", "graph_transformations/make_initial_dequantize_operator.cc", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 11e5e19f50..640afc7c74 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -129,6 +129,7 @@ DECLARE_GRAPH_TRANSFORMATION(IdentifyLstmCell) DECLARE_GRAPH_TRANSFORMATION(SplitLstmCellInputs) DECLARE_GRAPH_TRANSFORMATION(MergeLstmCellInputs) DECLARE_GRAPH_TRANSFORMATION(IdentifyRelu1) +DECLARE_GRAPH_TRANSFORMATION(IdentifyPRelu) DECLARE_GRAPH_TRANSFORMATION(IdentifyDilatedConv) DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator) DECLARE_GRAPH_TRANSFORMATION(PropagateActivationFunctionIntoConstants) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc new file mode 100644 index 0000000000..30be4ac0aa --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc @@ -0,0 +1,119 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +// This transformation rule tries to identify the PRelu structure generated by +// Keras, and convert it to a single op. +// +// The formula of PReLU is: +// f(x) = alpha * x for x < 0, f(x) = x for x >= 0. +// +// `x` is the input, and `alpha` is a trainable tensor which can be broadcasted +// to the shape of `x`. +// +// There's no native PRelu op in TensorFlow, so Keras generates the following +// structure which does the equivalent calculation: +// f(x) = Relu(x) + (-alpha * Relu(-x)) +// +// Practically, alpha is always a constant in the inference graph, and Toco have +// other graph transformations which fold the activation functions to other ops. +// Therefore, we're looking for the structure: +// +// f(x) = Relu(x) + (negative_alpha * Neg(x, activation=Relu)) + +namespace toco { + +bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { + const auto add_op_it = model->operators.begin() + op_index; + const auto* add_op = add_op_it->get(); + if (add_op == nullptr || add_op->type != OperatorType::kAdd || + add_op->inputs.size() != 2 || + add_op->fused_activation_function != FusedActivationFunctionType::kNone) { + return false; + } + + const auto* relu_input_op = GetOpWithOutput(*model, add_op->inputs[0]); + if (relu_input_op == nullptr || relu_input_op->type != OperatorType::kRelu || + relu_input_op->inputs.size() != 1 || + relu_input_op->fused_activation_function != + FusedActivationFunctionType::kNone) { + return false; + } + + // TODO(ycling): Both Add and Mul are commutative. Support the case where + // the position of operands are exchanged. + const auto* mul_op = GetOpWithOutput(*model, add_op->inputs[1]); + if (mul_op == nullptr || mul_op->type != OperatorType::kMul || + mul_op->inputs.size() != 2 || + mul_op->fused_activation_function != FusedActivationFunctionType::kNone) { + return false; + } + + const auto neg_alpha_tensor_name = mul_op->inputs[0]; + + const auto* relu_neg_input_op = GetOpWithOutput(*model, mul_op->inputs[1]); + + if (relu_neg_input_op == nullptr || + relu_neg_input_op->type != OperatorType::kNeg || + relu_neg_input_op->fused_activation_function != + FusedActivationFunctionType::kRelu || + relu_neg_input_op->inputs.size() != 1) { + return false; + } + + if (relu_input_op->inputs[0] != relu_neg_input_op->inputs[0]) { + return false; + } + + const auto input_tensor_name = relu_input_op->inputs[0]; + const auto output_tensor_name = add_op->outputs[0]; + + // Construct a tensor for positive alpha (double negative). + const auto alpha_tensor_name = + AvailableArrayName(*model, neg_alpha_tensor_name + "_neg"); + model->GetOrCreateArray(alpha_tensor_name); + + auto* neg_neg_alpha_op = new NegOperator; + neg_neg_alpha_op->inputs = {neg_alpha_tensor_name}; + neg_neg_alpha_op->outputs = {alpha_tensor_name}; + model->operators.emplace(add_op_it, neg_neg_alpha_op); + + auto* prelu_op = new PReluOperator; + prelu_op->inputs = {input_tensor_name, alpha_tensor_name}; + prelu_op->outputs = {output_tensor_name}; + model->operators.emplace(add_op_it, prelu_op); + AddMessageF("Creating %s replacing equivalent subgraph", LogName(*prelu_op)); + + DeleteArrayIfUsedOnce(neg_alpha_tensor_name, model); + DeleteArrayIfUsedOnce(add_op->inputs[0], model); + DeleteArrayIfUsedOnce(add_op->inputs[1], model); + DeleteArrayIfUsedOnce(mul_op->inputs[1], model); + // Remove the existing Add op that outputs the final result. If the other + // intermediate tensors aren't used by other ops, those will be removed by + // other graph transformation rules. + model->operators.erase(FindOp(*model, add_op)); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 375848a7d4..676736cfc5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1467,6 +1467,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kRelu: case OperatorType::kRelu1: case OperatorType::kRelu6: + case OperatorType::kPRelu: case OperatorType::kSoftmax: case OperatorType::kLogSoftmax: case OperatorType::kLogistic: diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 3fa0089cba..5199e292e1 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -65,6 +65,7 @@ enum class OperatorType { kRelu, kRelu1, kRelu6, + kPRelu, kSoftmax, kLogSoftmax, kSub, @@ -566,6 +567,18 @@ struct Relu6Operator : Operator { Relu6Operator() : Operator(OperatorType::kRelu6) {} }; +// PRelu +// f(x) = alpha * x for x < 0, f(x) = x for x >= 0. +// +// Inputs: +// inputs[0]: required: the input array +// inputs[1]: required: the alpha array +// +// Equivalent to keras.layers.PReLU. +struct PReluOperator : Operator { + PReluOperator() : Operator(OperatorType::kPRelu) {} +}; + // Element-wise Logistic operator: // x -> Logistic(x) = 1 / (1 + exp(-x)) // diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index f2cc4ef71f..f23249cfa1 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -854,6 +854,8 @@ std::vector> BuildOperatorList() { new SimpleOperator("RELU_N1_TO_1", OperatorType::kRelu1)); ops.emplace_back( new SimpleOperator("RELU6", OperatorType::kRelu6)); + ops.emplace_back( + new SimpleOperator("PRELU", OperatorType::kPRelu)); ops.emplace_back(new SimpleOperator( "LOGISTIC", OperatorType::kLogistic)); ops.emplace_back( diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index ca66110ba3..30dd6fab9e 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -94,6 +94,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new IdentifyL2Normalization); transformations->Add(new IdentifyL2Pool); transformations->Add(new IdentifyRelu1); + transformations->Add(new IdentifyPRelu); transformations->Add(new RemoveTrivialBinaryOperator); transformations->Add(new ReadFakeQuantMinMax); transformations->Add(new ResolveSpaceToBatchNDAttributes); diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 2362206a14..ec1770c129 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -300,6 +300,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(Relu) HANDLE_OPERATORTYPENAME_CASE(Relu1) HANDLE_OPERATORTYPENAME_CASE(Relu6) + HANDLE_OPERATORTYPENAME_CASE(PRelu) HANDLE_OPERATORTYPENAME_CASE(ReorderAxes) HANDLE_OPERATORTYPENAME_CASE(Softmax) HANDLE_OPERATORTYPENAME_CASE(LogSoftmax) -- GitLab From 282750fee5e2df502436ca9ef6a95283f8adab34 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Thu, 22 Mar 2018 11:25:58 -0700 Subject: [PATCH 360/960] Add new Ops for ScopedAllocator and the associated Concat and Split. The ScopedAllocatorOp allocates a large backing tensor whose slices may be concatenated or splitted with ScopedAllocatorConcatOp and ScopedAllocatorSplitOp respectively. These ops should only be added via Grappler optimizations on the dataflow graph provided by the user. PiperOrigin-RevId: 190097586 --- tensorflow/core/BUILD | 3 + .../core/common_runtime/gpu/gpu_device.cc | 11 + .../core/common_runtime/gpu/gpu_device.h | 15 +- .../core/common_runtime/scoped_allocator.cc | 3 +- .../common_runtime/scoped_allocator_mgr.cc | 25 +- .../common_runtime/scoped_allocator_mgr.h | 6 +- .../scoped_allocator_mgr_test.cc | 25 +- .../core/common_runtime/threadpool_device.cc | 16 +- .../core/common_runtime/threadpool_device.h | 12 +- tensorflow/core/framework/allocator.h | 16 +- tensorflow/core/framework/device_base.h | 16 +- tensorflow/core/framework/op_kernel.cc | 9 +- tensorflow/core/kernels/BUILD | 37 +++ .../core/kernels/scoped_allocator_ops.cc | 216 +++++++++++++ .../core/kernels/scoped_allocator_ops_test.cc | 296 ++++++++++++++++++ tensorflow/core/ops/scoped_allocator_ops.cc | 81 +++++ 16 files changed, 742 insertions(+), 45 deletions(-) create mode 100644 tensorflow/core/kernels/scoped_allocator_ops.cc create mode 100644 tensorflow/core/kernels/scoped_allocator_ops_test.cc create mode 100644 tensorflow/core/ops/scoped_allocator_ops.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a14eeed1a5..15cbba8285 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -634,6 +634,7 @@ tf_gen_op_libs( "random_ops", "remote_fused_graph_ops", "resource_variable_ops", + "scoped_allocator_ops", "sdca_ops", "set_ops", "script_ops", @@ -717,6 +718,7 @@ cc_library( ":random_ops_op_lib", ":remote_fused_graph_ops_op_lib", ":resource_variable_ops_op_lib", + ":scoped_allocator_ops_op_lib", ":script_ops_op_lib", ":sdca_ops_op_lib", ":sendrecv_ops_op_lib", @@ -861,6 +863,7 @@ cc_library( "//tensorflow/core/kernels:remote_fused_graph_ops", "//tensorflow/core/kernels:required", "//tensorflow/core/kernels:resource_variable_ops", + "//tensorflow/core/kernels:scoped_allocator_ops", "//tensorflow/core/kernels:sdca_ops", "//tensorflow/core/kernels:set_kernels", "//tensorflow/core/kernels:sparse", diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 8357cc5a72..52fd20e479 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -840,6 +840,17 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, } } +Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr, + int64 step_id) { + if (attr.scope_id > 0) { + return scoped_allocator_mgr_->GetContainer(step_id)->GetInstance( + attr.scope_id); + } + LOG(FATAL) << "Unexpected call to BaseGPUDevice::GetScopedAllocator " + << "attr.scope_id = " << attr.scope_id; + return gpu_allocator_; +} + const int BaseGPUDeviceFactory::InterconnectMap::kSameDeviceStrength = 1000; const int BaseGPUDeviceFactory::InterconnectMap::kStreamExecutorStrength = 1; diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index d817c7dd1f..cc5c3881dd 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -17,8 +17,8 @@ limitations under the License. #error This file must only be included when building with Cuda support #endif -#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_ -#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_DEVICE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_DEVICE_H_ #include #include @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/common_runtime/local_device.h" +#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/op_kernel.h" @@ -95,11 +96,19 @@ class BaseGPUDevice : public LocalDevice { // corresponds to the cuda context. gpu::StreamExecutor* executor() const { return executor_; } + Allocator* GetScopedAllocator(AllocatorAttributes attr, + int64 step_id) override; + + ScopedAllocatorMgr* GetScopedAllocatorMgr() const override { + return scoped_allocator_mgr_.get(); + } + protected: Allocator* gpu_allocator_; // not owned Allocator* cpu_allocator_; // not owned gpu::StreamExecutor* executor_; // not owned + std::unique_ptr scoped_allocator_mgr_; private: struct StreamGroup { @@ -205,4 +214,4 @@ class BaseGPUDeviceFactory : public DeviceFactory { } // namespace tensorflow -#endif // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_ +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_DEVICE_H_ diff --git a/tensorflow/core/common_runtime/scoped_allocator.cc b/tensorflow/core/common_runtime/scoped_allocator.cc index 31e7a5e3e2..a26672b79d 100644 --- a/tensorflow/core/common_runtime/scoped_allocator.cc +++ b/tensorflow/core/common_runtime/scoped_allocator.cc @@ -75,7 +75,8 @@ void* ScopedAllocator::AllocateRaw(int32 field_index, size_t num_bytes) { if (num_bytes != f.bytes) { LOG(ERROR) << "ScopedAllocator " << name_ << " got request for " << num_bytes << " bytes from field " << field_index - << " which has precalculated size " << f.bytes; + << " which has precalculated size " << f.bytes << " and offset " + << f.offset; return nullptr; } diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc index d0d05c6d1b..e1f70404e3 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc @@ -22,7 +22,7 @@ namespace tensorflow { Status ScopedAllocatorContainer::AddScopedAllocator( const Tensor& backing_tensor, int32 scope_id, const string& scope_name, const gtl::ArraySlice& fields, - int32 expected_call_count, ScopedAllocator** sa_ptr) { + int32 expected_call_count) { VLOG(1) << "AddScopedAllocator " << mgr_->device_name() << " step_id_=" << step_id_ << " scope_id=" << scope_id; mutex_lock l(mu_); @@ -41,17 +41,17 @@ Status ScopedAllocatorContainer::AddScopedAllocator( } } VLOG(2) << " container " << this << " step_id " << step_id_; - *sa_ptr = new ScopedAllocator(backing_tensor, scope_id, scope_name, fields, - expected_call_count, this); - allocators_[scope_id] = ScopedAllocatorContainer::SAField( - ScopedAllocator::kBackingIndex, *sa_ptr); + ScopedAllocator* sa = new ScopedAllocator( + backing_tensor, scope_id, scope_name, fields, expected_call_count, this); + allocators_[scope_id] = + ScopedAllocatorContainer::SAField(ScopedAllocator::kBackingIndex, sa); VLOG(2) << "#fields " << fields.size(); for (int i = 0; i < fields.size(); ++i) { const ScopedAllocator::Field& f = fields[i]; VLOG(2) << "Adding instance with for " << mgr_->device_name() << " scope_id=" << f.scope_id; allocators_[f.scope_id] = ScopedAllocatorContainer::SAField( - i, new ScopedAllocatorInstance(*sa_ptr, i)); + i, new ScopedAllocatorInstance(sa, i)); } return Status::OK(); } @@ -154,23 +154,26 @@ Status ScopedAllocatorMgr::AddScopedAllocator( const Tensor& backing_tensor, int64 step_id, int32 scope_id, const string& scope_name, const gtl::ArraySlice& fields, - int32 expected_call_count, ScopedAllocator** sa_ptr) { + int32 expected_call_count) { ScopedAllocatorContainer* sac = GetContainer(step_id); return sac->AddScopedAllocator(backing_tensor, scope_id, scope_name, fields, - expected_call_count, sa_ptr); + expected_call_count); } void ScopedAllocatorMgr::PopulateFields( - int32 scope_id, const gtl::ArraySlice& shapes, DataType dtype, - std::vector* fields) { + int32 scope_id, const gtl::ArraySlice& shapes, + const DataType dtype, std::vector* fields) { const int32 num_fields = static_cast(shapes.size()); fields->resize(num_fields); size_t offset = 0; for (int32 i = 0; i < num_fields; ++i) { - size_t bytes = shapes[i].num_elements() * sizeof(dtype); + size_t bytes = shapes[i].num_elements() * DataTypeSize(dtype); (*fields)[i].scope_id = scope_id + 1 + i; (*fields)[i].bytes = bytes; (*fields)[i].offset = offset; + VLOG(1) << "field=" << i << " scope_id=" << (*fields)[i].scope_id + << " bytes=" << (*fields)[i].bytes + << " offset=" << (*fields)[i].offset; offset += bytes; size_t overshoot = offset % Allocator::kAllocatorAlignment; if (overshoot > 0) { diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.h b/tensorflow/core/common_runtime/scoped_allocator_mgr.h index 4d5bc23dd9..effc5f2d77 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr.h +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.h @@ -34,7 +34,7 @@ class ScopedAllocatorContainer : public core::RefCounted { Status AddScopedAllocator( const Tensor& backing_tensor, int32 scope_id, const string& scope_name, const gtl::ArraySlice& fields, - int32 expected_call_count, ScopedAllocator** sa_ptr); + int32 expected_call_count); ScopedAllocatorInstance* GetInstance(int32 scope_id); ScopedAllocator* GetAllocator(int32 scope_id); @@ -83,7 +83,7 @@ class ScopedAllocatorMgr { const Tensor& backing_tensor, int64 step_id, int32 scope_id, const string& scope_name, const gtl::ArraySlice& fields, - int32 expected_call_count, ScopedAllocator** sa_ptr); + int32 expected_call_count); void Cleanup(int64 step_id); @@ -91,7 +91,7 @@ class ScopedAllocatorMgr { // consecutive scope_id values following that of the base ScopedAllocator. static void PopulateFields(int32 scope_id, const gtl::ArraySlice& shapes, - DataType dtype, + const DataType dtype, std::vector* fields); const string& device_name() const { return device_name_; } diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr_test.cc b/tensorflow/core/common_runtime/scoped_allocator_mgr_test.cc index 81cb3e7979..38e07e47f2 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr_test.cc +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr_test.cc @@ -25,7 +25,7 @@ namespace { class ScopedAllocatorMgrTest : public ::testing::Test { public: - ScopedAllocatorMgrTest() : sam_("CPU0"), sa_(nullptr) {} + ScopedAllocatorMgrTest() : sam_("CPU0") {} void InitTensor() { backing_tensor_ = Tensor(cpu_allocator(), DT_FLOAT, backing_tensor_shape_); @@ -42,7 +42,7 @@ class ScopedAllocatorMgrTest : public ::testing::Test { << " expected_use_count " << expected_use_count; return sam_.AddScopedAllocator(backing_tensor_, step_id_, scope_id, "tensor_shape_599", fields_, - expected_use_count, &sa_); + expected_use_count); } Status PrepScopedAllocatorMgr(int expected_use_count) { @@ -87,7 +87,6 @@ class ScopedAllocatorMgrTest : public ::testing::Test { std::vector fields_shapes_; std::vector fields_; ScopedAllocatorMgr sam_; - ScopedAllocator* sa_; const int step_id_ = 101; const int scope_id_ = 599; std::vector sa_instances_; @@ -138,9 +137,9 @@ TEST_F(ScopedAllocatorMgrTest, ContainerAddAllocator) { // Cleanup the instances by invoking allocate and deallocate. void* ptr0 = - sa_instances_[0]->AllocateRaw(0 /* alignment */, 512 * sizeof(DT_FLOAT)); + sa_instances_[0]->AllocateRaw(0 /* alignment */, 512 * sizeof(float)); void* ptr1 = - sa_instances_[1]->AllocateRaw(0 /* alignment */, 512 * sizeof(DT_FLOAT)); + sa_instances_[1]->AllocateRaw(0 /* alignment */, 512 * sizeof(float)); sa_instances_[0]->DeallocateRaw(ptr0); sa_instances_[1]->DeallocateRaw(ptr1); } @@ -153,7 +152,6 @@ TEST_F(ScopedAllocatorMgrTest, AllocatorSuccess) { fields_shapes_ = std::vector({{512}, {3, 3}, {2, 256}}); Status s = PrepScopedAllocatorMgr(3); other = sac->GetAllocator(scope_id_); - EXPECT_EQ(other, sa_); ScopedAllocatorInstance* inst0 = sac->GetInstance(scope_id_ + 1); char* ptr0 = static_cast(inst0->AllocateRaw(0, 512 * sizeof(float))); @@ -187,8 +185,7 @@ TEST_F(ScopedAllocatorMgrTest, AllocatorInitFail) { fields_.resize(1); fields_[0].scope_id = scope_id_ + 1; fields_[0].offset = 0; - fields_[0].bytes = - backing_tensor_shape_.num_elements() * 2 * sizeof(DT_FLOAT); + fields_[0].bytes = backing_tensor_shape_.num_elements() * 2 * sizeof(float); // fields[0].offset + fields[0].bytes is larger than the size of the backing // tensor, so this check should fail EXPECT_DEATH(Status s = AddScopedAllocator(1, scope_id_), ""); @@ -208,20 +205,20 @@ TEST_F(ScopedAllocatorMgrTest, AllocatorFail) { // so we need to explicitly delete the instances to avoid a memleak. SaveInstances(fields_shapes_.size()); - char* ptr0 = static_cast( - sa_instances_[0]->AllocateRaw(0, 512 * sizeof(DT_FLOAT))); + char* ptr0 = + static_cast(sa_instances_[0]->AllocateRaw(0, 512 * sizeof(float))); VLOG(2) << "Should fail because we deallocate ptr=" << static_cast(ptr0 + 8) << " which we never allocated."; EXPECT_DEATH(sa_instances_[0]->DeallocateRaw(ptr0 + 8), ""); VLOG(2) << "Should fail because we allocate smaller than the size of the " << "field."; - EXPECT_EQ(nullptr, sa_instances_[1]->AllocateRaw(0, 256 * sizeof(DT_FLOAT))); + EXPECT_EQ(nullptr, sa_instances_[1]->AllocateRaw(0, 256 * sizeof(float))); VLOG(2) << "Should fail because we allocate larger than the size of the " << "field."; - EXPECT_EQ(nullptr, sa_instances_[1]->AllocateRaw(0, 1024 * sizeof(DT_FLOAT))); - void* ptr1 = sa_instances_[1]->AllocateRaw(0, 512 * sizeof(DT_FLOAT)); + EXPECT_EQ(nullptr, sa_instances_[1]->AllocateRaw(0, 1024 * sizeof(float))); + void* ptr1 = sa_instances_[1]->AllocateRaw(0, 512 * sizeof(float)); VLOG(2) << "Should fail because we exceed expected_use_count."; - EXPECT_EQ(nullptr, sa_instances_[0]->AllocateRaw(0, 512 * sizeof(DT_FLOAT))); + EXPECT_EQ(nullptr, sa_instances_[0]->AllocateRaw(0, 512 * sizeof(float))); sa_instances_[0]->DeallocateRaw(ptr0); sa_instances_[1]->DeallocateRaw(ptr1); } diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc index 5aa01376ab..6d8de6a3c0 100644 --- a/tensorflow/core/common_runtime/threadpool_device.cc +++ b/tensorflow/core/common_runtime/threadpool_device.cc @@ -16,6 +16,8 @@ limitations under the License. #include "tensorflow/core/common_runtime/threadpool_device.h" #include "tensorflow/core/common_runtime/local_device.h" +#include "tensorflow/core/common_runtime/scoped_allocator.h" +#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/framework/device_base.h" @@ -40,7 +42,8 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options, Allocator* allocator) : LocalDevice(options, Device::BuildDeviceAttributes( name, DEVICE_CPU, memory_limit, locality)), - allocator_(allocator) {} + allocator_(allocator), + scoped_allocator_mgr_(new ScopedAllocatorMgr(name)) {} ThreadPoolDevice::~ThreadPoolDevice() {} @@ -65,6 +68,17 @@ Allocator* ThreadPoolDevice::GetAllocator(AllocatorAttributes attr) { return allocator_; } +Allocator* ThreadPoolDevice::GetScopedAllocator(AllocatorAttributes attr, + int64 step_id) { + if (attr.scope_id > 0) { + return scoped_allocator_mgr_->GetContainer(step_id)->GetInstance( + attr.scope_id); + } + LOG(FATAL) << "Unexpected call to ThreadPoolDevice::GetScopedAllocator " + << "attr.scope_id = " << attr.scope_id; + return allocator_; +} + Status ThreadPoolDevice::MakeTensorFromProto( const TensorProto& tensor_proto, const AllocatorAttributes alloc_attrs, Tensor* tensor) { diff --git a/tensorflow/core/common_runtime/threadpool_device.h b/tensorflow/core/common_runtime/threadpool_device.h index 37cb745a0a..afc5d15ebc 100644 --- a/tensorflow/core/common_runtime/threadpool_device.h +++ b/tensorflow/core/common_runtime/threadpool_device.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMMON_RUNTIME_THREADPOOL_DEVICE_H_ -#define TENSORFLOW_COMMON_RUNTIME_THREADPOOL_DEVICE_H_ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_THREADPOOL_DEVICE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_THREADPOOL_DEVICE_H_ #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/local_device.h" @@ -31,6 +31,11 @@ class ThreadPoolDevice : public LocalDevice { void Compute(OpKernel* op_kernel, OpKernelContext* context) override; Allocator* GetAllocator(AllocatorAttributes attr) override; + Allocator* GetScopedAllocator(AllocatorAttributes attr, + int64 step_id) override; + ScopedAllocatorMgr* GetScopedAllocatorMgr() const override { + return scoped_allocator_mgr_.get(); + } Status MakeTensorFromProto(const TensorProto& tensor_proto, const AllocatorAttributes alloc_attrs, Tensor* tensor) override; @@ -39,8 +44,9 @@ class ThreadPoolDevice : public LocalDevice { private: Allocator* allocator_; // Not owned + std::unique_ptr scoped_allocator_mgr_; }; } // namespace tensorflow -#endif // TENSORFLOW_COMMON_RUNTIME_THREADPOOL_DEVICE_H_ +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_THREADPOOL_DEVICE_H_ diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 3ce1b61246..2c87156dca 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_FRAMEWORK_ALLOCATOR_H_ -#define TENSORFLOW_FRAMEWORK_ALLOCATOR_H_ +#ifndef TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_H_ +#define TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_H_ #include @@ -359,7 +359,12 @@ struct AllocatorAttributes { bool nic_compatible() const { return value & (0x1 << 1); } void set_gpu_compatible(bool v) { value |= (static_cast(v) << 2); } bool gpu_compatible() const { return value & (0x1 << 2); } - void Merge(AllocatorAttributes other) { value |= other.value; } + void Merge(AllocatorAttributes other) { + value |= other.value; + scope_id = (scope_id > 0 && other.scope_id == 0) + ? scope_id + : ((scope_id == 0) ? other.scope_id : 0); + } // Returns true if the fields set in *this is a subset of or equal to // those set in other. bool IsEqualOrLessRestrictiveThan(const AllocatorAttributes& other) const { @@ -371,6 +376,9 @@ struct AllocatorAttributes { // upper 8 bits in device-specific ways, and ops implemented for those // devices are responsible for setting those 8 bits appropriately. uint32 value = 0; + // EXPERIMENTAL: If this is greater than zero, then allocation is delegated to + // a named special-purpose allocator on the same device. + int32 scope_id = 0; }; // Returns a trivial implementation of Allocator which uses the system @@ -396,4 +404,4 @@ class SubAllocator { } // namespace tensorflow -#endif // TENSORFLOW_FRAMEWORK_ALLOCATOR_H_ +#endif // TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_H_ diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index fb6d5c69e1..52b9077d8c 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_ -#define TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_ +#ifndef TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_ +#define TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_ #include #include @@ -48,6 +48,7 @@ class Env; class EventMgr; class OpKernelContext; class ResourceMgr; +class ScopedAllocatorMgr; class TensorProto; namespace thread { @@ -179,6 +180,15 @@ class DeviceBase { return GetAllocator(attr); } + // Return an Allocator prepared for use in particular places by graph + // optimization + virtual Allocator* GetScopedAllocator(AllocatorAttributes attr, + int64 step_id) { + LOG(FATAL) << "Device does not implement GetScopedAllocator()"; + } + + virtual ScopedAllocatorMgr* GetScopedAllocatorMgr() const { return nullptr; } + virtual const Eigen::ThreadPoolDevice* eigen_cpu_device() { CHECK(eigen_cpu_device_ != nullptr); return eigen_cpu_device_; @@ -243,4 +253,4 @@ class DeviceBase { } // namespace tensorflow -#endif // TENSORFLOW_FRAMEWORK_DEVICE_BASE_H_ +#endif // TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_ diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 8654437059..9ec1c213c3 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -282,8 +282,13 @@ OpKernelContext::~OpKernelContext() { } Allocator* OpKernelContext::get_allocator(AllocatorAttributes attr) { - Allocator* allocator = - params_->device->GetStepAllocator(attr, resource_manager()); + Allocator* allocator = nullptr; + if (attr.scope_id > 0) { + allocator = params_->device->GetScopedAllocator(attr, step_id()); + CHECK(allocator); + } else { + allocator = params_->device->GetStepAllocator(attr, resource_manager()); + } if (track_allocations()) { mutex_lock lock(mu_); for (const auto& wrapped : wrapped_allocators_) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 2e39f25fc1..f6137fb860 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1666,6 +1666,43 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "scoped_allocator_ops", + prefix = "scoped_allocator_ops", + deps = [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:scoped_allocator_ops_op_lib", + ], +) + +tf_cuda_cc_test( + name = "scoped_allocator_ops_test", + srcs = ["scoped_allocator_ops_test.cc"], + linkstatic = tf_kernel_tests_linkstatic(), #Required for benchmarking + deps = [ + ":cwise_op", + ":dense_update_ops", + ":ops_testutil", + ":ops_util", + ":scoped_allocator_ops", + ":variable_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "session_ops", prefix = "session_ops", diff --git a/tensorflow/core/kernels/scoped_allocator_ops.cc b/tensorflow/core/kernels/scoped_allocator_ops.cc new file mode 100644 index 0000000000..d7b25ffad0 --- /dev/null +++ b/tensorflow/core/kernels/scoped_allocator_ops.cc @@ -0,0 +1,216 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/scoped_allocator.h" +#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class ScopedAllocatorOp : public OpKernel { + public: + explicit ScopedAllocatorOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); + OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); + OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); + OP_REQUIRES_OK(context, context->GetAttr("expected_call_count", + &expected_call_count_)); + device_ = context->device(); + // Precalculate the size of the backing tensor and the offsets of + // the subtensors to be allocated from it, taking into account + // alignment considerations. + ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_); + size_t num_bytes = fields_.back().offset + fields_.back().bytes; + num_elements_ = num_bytes / DataTypeSize(dtype_); + OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0, + errors::InvalidArgument( + "Number of bytes ", num_bytes, + " must be divisible by size of datatype ", dtype_)); + } + + void Compute(OpKernelContext* context) override { + ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr(); + if (!sam) { + context->SetStatus(errors::Internal( + "ScopedAllocatorMgr not supported on device ", device_->name())); + return; + } + Tensor* backing_tensor = nullptr; + AllocatorAttributes attr = context->output_alloc_attr(0); + Status s = + context->allocate_output(0, {num_elements_}, &backing_tensor, attr); + VLOG(1) << "_ScopedAllocatorOp new backing tensor size " + << backing_tensor->TotalBytes() << " num_elements_ " + << num_elements_ << " buffer " << DMAHelper::buffer(backing_tensor) + << " base addr " << DMAHelper::base(backing_tensor); + if (s.ok()) { + s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_, + name_, fields_, expected_call_count_); + } + if (!s.ok()) { + context->SetStatus(s); + } + } + + private: + std::vector shapes_; + DataType dtype_; + int64 num_elements_; + std::vector fields_; + string name_; + int32 id_; + int32 expected_call_count_; + DeviceBase* device_; +}; + +REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU), + ScopedAllocatorOp); + +REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU), + ScopedAllocatorOp); + +class ScopedAllocatorConcatOp : public OpKernel { + public: + explicit ScopedAllocatorConcatOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); + // This stuff is just for debugging + OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); + OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); + device_ = context->device(); + } + + void Compute(OpKernelContext* context) override { + const Tensor& backing_tensor = context->input(0); + // Check that type matches. + OP_REQUIRES( + context, backing_tensor.dtype() == dtype_, + errors::InvalidArgument("Backing tensor type ", backing_tensor.dtype(), + " does not match expected type ", dtype_)); + // Check that backing tensor is at least as large as the shape of the + // output. + OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(), + errors::InvalidArgument("Backing tensor num elements ", + backing_tensor.NumElements(), + " is not equal to expected ", + shape_.num_elements())); + VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at " + << DMAHelper::base(&backing_tensor); + Tensor backing_copy(backing_tensor); + context->set_output(0, backing_copy); + const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy); + const void* backing_tensor_lb = backing_buf->data(); + const void* backing_tensor_ub = static_cast( + static_cast(backing_tensor_lb) + backing_buf->size()); + // Check that all inputs lie entirely within the backing tensor. + for (int i = 1; i < context->num_inputs(); ++i) { + const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i)); + const void* input_lb = input_buf->data(); + OP_REQUIRES( + context, input_lb >= backing_tensor_lb, + errors::InvalidArgument("Lower bound check fail for input ", i, + " to node ", context->op_kernel().name())); + const void* input_ub = static_cast( + static_cast(input_lb) + input_buf->size()); + OP_REQUIRES( + context, input_ub <= backing_tensor_ub, + errors::InvalidArgument("Upper bound check fail for input ", i, + " to node ", context->op_kernel().name())); + } + } + + private: + TensorShape shape_; + DataType dtype_; + string name_; + int32 id_; + DeviceBase* device_; +}; + +REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU), + ScopedAllocatorConcatOp); + +REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU), + ScopedAllocatorConcatOp); + +class ScopedAllocatorSplitOp : public OpKernel { + public: + explicit ScopedAllocatorSplitOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); + // This stuff is just for debugging + OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); + OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); + device_ = context->device(); + } + + void Compute(OpKernelContext* context) override { + Tensor backing_copy(context->input(0)); + // Check that type matches. + OP_REQUIRES( + context, backing_copy.dtype() == dtype_, + errors::InvalidArgument("Backing tensor type ", backing_copy.dtype(), + " does not match expected type ", dtype_)); + const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy); + const void* backing_tensor_lb = backing_buf->data(); + const void* backing_tensor_ub = static_cast( + static_cast(backing_tensor_lb) + backing_buf->size()); + for (int i = 1; i < context->num_inputs(); ++i) { + VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i + << " to output " << i - 1 << " buf addr " + << DMAHelper::base(&context->input(i)); + Tensor copy(context->input(i)); + OP_REQUIRES( + context, copy.dtype() == dtype_, + errors::InvalidArgument("Input ", i, " tensor type ", copy.dtype(), + " does not match expected type ", dtype_)); + context->set_output(i - 1, copy); + const TensorBuffer* input_buf = DMAHelper::buffer(©); + const void* input_lb = input_buf->data(); + OP_REQUIRES( + context, input_lb >= backing_tensor_lb, + errors::InvalidArgument("Lower bound check fail for input ", i, + " to node ", context->op_kernel().name())); + const void* input_ub = static_cast( + static_cast(input_lb) + input_buf->size()); + OP_REQUIRES( + context, input_ub <= backing_tensor_ub, + errors::InvalidArgument("Upper bound check fail for input ", i, + " to node ", context->op_kernel().name())); + } + } + + private: + DataType dtype_; + string name_; + int32 id_; + DeviceBase* device_; +}; + +REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU), + ScopedAllocatorSplitOp); + +REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU), + ScopedAllocatorSplitOp); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/scoped_allocator_ops_test.cc b/tensorflow/core/kernels/scoped_allocator_ops_test.cc new file mode 100644 index 0000000000..3d36c8b7d4 --- /dev/null +++ b/tensorflow/core/kernels/scoped_allocator_ops_test.cc @@ -0,0 +1,296 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/common_runtime/scoped_allocator.h" +#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/testlib.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +class ScopedAllocatorOpTest : public OpsTestBase { + protected: + void MakeOp(const gtl::ArraySlice& shapes, DataType dtype, + const string& name, int32 id, int32 expected_call_count) { + TF_EXPECT_OK(NodeDefBuilder("scoped_allocator_op", "_ScopedAllocator") + .Attr("T", dtype) + .Attr("shapes", shapes) + .Attr("sa_name", name) + .Attr("id", id) + .Attr("expected_call_count", expected_call_count) + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + TF_ASSERT_OK(RunOpKernel()); + + // Allocate and Deallocate the tensors so that memory is not leaked + AllocatorAttributes attr; + Allocator* allocator; + for (size_t i = 0; i < shapes.size(); i++) { + attr.scope_id = id + i + 1; + allocator = device_->GetScopedAllocator(attr, context_->step_id()); + Tensor temp(allocator, dtype, shapes[i]); + } + } +}; + +TEST_F(ScopedAllocatorOpTest, Simple) { + MakeOp({TensorShape({8})}, DT_FLOAT, "test", 120, 1); + MakeOp({TensorShape({32, 32})}, DT_DOUBLE, "test1", 130, 1); + MakeOp({TensorShape({64}), TensorShape({3, 3}), TensorShape({5, 5, 5})}, + DT_HALF, "test2", 140, 3); + MakeOp({TensorShape({512}), TensorShape({64, 8})}, DT_UINT32, "test3", 150, + 2); +} + +// PrepOp is common to ConcatOp tests and SplitOpTests. +// It allocates a backing tensor that is large enough to hold all slices defined +// by fields, creates ScopedAllocatorInstances for each field, allocates the +// tensors, and assigns them as inputs to the op. +// We won't use the AddInput* suite of functions from ops_testutil.h because +// they allocate new tensors for each input. We need to mimic what a +// ScopedAllocator would do. +void PrepOp(DataType dtype, int32 id, + const std::vector& fields_shapes, + std::vector* fields, + Tensor** backing_tensor, Allocator* allocator, + ScopedAllocatorMgr* sam, const string& op_name, + std::vector* tensors, + gtl::InlinedVector* inputs, + const DataTypeVector& input_types) { + ScopedAllocatorMgr::PopulateFields(id, fields_shapes, dtype, fields); + // We don't simply allocate a tensor with shape as backing_tensor_shape, + // because we need to account for padding in the fields. We actually need a + // tensor of size at least (fields[-1].offset + fields[-1].bytes). + size_t num_bytes = fields->back().offset + fields->back().bytes; + int32_t num_elements = num_bytes / DataTypeSize(dtype); + CHECK_EQ(num_bytes % DataTypeSize(dtype), 0); + + *backing_tensor = new Tensor(allocator, dtype, {num_elements}); + int64 step_id = 10; + Status s = sam->AddScopedAllocator(**backing_tensor, step_id, id, + "sa_" + op_name + "_test", *fields, + fields_shapes.size()); + TF_ASSERT_OK(s); + + ScopedAllocatorContainer* sac = sam->GetContainer(step_id); + std::vector sa_instances(fields_shapes.size(), + nullptr); + for (size_t i = 0; i < fields_shapes.size(); i++) { + sa_instances[i] = sac->GetInstance(id + i + 1); + tensors->push_back(Tensor(sa_instances[i], dtype, fields_shapes[i])); + } + // Now add the tensor as an input to ScopedAllocatorOp. + // Order matters here, so first add the backing tensor, then the slices. + inputs->reserve(1 + tensors->size()); + CHECK_GT(input_types.size(), inputs->size()); + CHECK_EQ(input_types[inputs->size()], dtype); + inputs->push_back({nullptr, *backing_tensor}); + for (size_t i = 0; i < tensors->size(); i++) { + CHECK_EQ(input_types[inputs->size()], dtype); + inputs->push_back({nullptr, &((*tensors)[i])}); + } +} + +class ScopedAllocatorConcatOpTest : public OpsTestBase { + protected: + void MakeOp(const TensorShape& shape, DataType dtype, const string& name, + int32 id, int32 num_tensors) { + TF_EXPECT_OK( + NodeDefBuilder("scoped_allocator_concat_op", "_ScopedAllocatorConcat") + .Attr("shape", shape) + .Attr("T", dtype) + .Attr("N", num_tensors) + .Attr("sa_name", name) + .Attr("id", id) + .Input(FakeInput(dtype)) // backing tensor + .Input(FakeInput(num_tensors, dtype)) // list of tensors + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + } + + void ExecOp(DataType dtype, int32 id, + const std::vector& fields_shapes) { + Tensor* backing_tensor = nullptr; + std::vector tensors; + std::vector fields; + PrepOp(dtype, id, fields_shapes, &fields, &backing_tensor, allocator(), + device_->GetScopedAllocatorMgr(), "split", &tensors, &inputs_, + input_types_); + + TF_ASSERT_OK(RunOpKernel()); + + // Check input and output are same tensor. + const Tensor& input = context_->input(0); + OpOutputList output_list; + Status s = context_->output_list("output", &output_list); + TF_ASSERT_OK(s); + const Tensor& output = *(output_list[0]); + CHECK_EQ(DMAHelper::base(&input), DMAHelper::base(&output)); + CHECK_EQ(input.dtype(), output.dtype()); + CHECK_EQ(input.NumElements(), output.NumElements()); + + // Free the backing tensor which was allocated in PrepOp. + delete backing_tensor; + } +}; + +TEST_F(ScopedAllocatorConcatOpTest, Success1) { + MakeOp({32}, DT_FLOAT, "test", 120, 2); + ExecOp(DT_FLOAT, 120, {{16}, {16}}); +} + +TEST_F(ScopedAllocatorConcatOpTest, Success2) { + MakeOp({2, 2, 2}, DT_DOUBLE, "test", 120, 2); + ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); +} + +TEST_F(ScopedAllocatorConcatOpTest, Success3) { + MakeOp({3, 3, 3}, DT_HALF, "test", 120, 3); + ExecOp(DT_HALF, 120, {{3, 3}, {3, 3}, {3, 3}}); +} + +TEST_F(ScopedAllocatorConcatOpTest, FailDtypeCheck) { + MakeOp({8}, DT_FLOAT, "test", 120, 2); + EXPECT_DEATH(ExecOp(DT_DOUBLE, 120, {{4}, {4}}), ""); +} + +TEST_F(ScopedAllocatorConcatOpTest, FailNumElementsCheck) { + MakeOp({32}, DT_FLOAT, "test", 120, 2); + AddInputFromArray({8}, {0, 1, 2, 3, 4, 5, 6, 7}); + AddInputFromArray({4}, {0, 1, 2, 3}); + AddInputFromArray({4}, {4, 5, 6, 7}); + Status s = RunOpKernel(); + EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); +} + +// This test should fail because the backing tensor and the input tensors are +// unrelated, i.e. the inputs are not slices of the backing tensor. +TEST_F(ScopedAllocatorConcatOpTest, FailBounds) { + MakeOp({8}, DT_DOUBLE, "test", 120, 2); + AddInputFromArray({8}, {0, 1, 2, 3, 4, 5, 6, 7}); + AddInputFromArray({4}, {0, 1, 2, 3}); + AddInputFromArray({4}, {4, 5, 6, 7}); + Status s = RunOpKernel(); + EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); +} + +class ScopedAllocatorSplitOpTest : public OpsTestBase { + protected: + void BuildNodeDef(const TensorShape& shape, DataType dtype, + const string& name, int32 id, int32 num_tensors) { + TF_EXPECT_OK( + NodeDefBuilder("scoped_allocator_split_op", "_ScopedAllocatorSplit") + .Attr("T", dtype) + .Attr("N", num_tensors) + .Attr("sa_name", name) + .Attr("id", id) + .Input(FakeInput(dtype)) // backing tensor and input + .Input( + FakeInput(num_tensors, dtype)) // list of subtensors to forward + .Finalize(node_def())); + } + + void MakeOp(const TensorShape& shape, DataType dtype, const string& name, + int32 id, int32 num_tensors) { + BuildNodeDef(shape, dtype, name, id, num_tensors); + TF_EXPECT_OK(InitOp()); + } + + // Similar to ConcatOpTest, we add inputs that are allocated from + // ScopedAllocator so that the memory lines up nicely. + void ExecOp(DataType dtype, int32 id, + const std::vector& fields_shapes) { + Tensor* backing_tensor = nullptr; + std::vector tensors; + std::vector fields; + PrepOp(dtype, id, fields_shapes, &fields, &backing_tensor, allocator(), + device_->GetScopedAllocatorMgr(), "split", &tensors, &inputs_, + input_types_); + + TF_ASSERT_OK(RunOpKernel()); + + // Check that outputs are slices of backing tensor. + const Tensor& input = context_->input(0); + const void* lower_limit = DMAHelper::base(&input); + const char* lower_limit_c = + static_cast(lower_limit); // for pointer arithmetic + OpOutputList output_list; + Status s = context_->output_list("output", &output_list); + TF_ASSERT_OK(s); + for (int i = 0; i < output_list.size(); i++) { + const Tensor& output = *(output_list[i]); + const void* expected_base = + static_cast(lower_limit_c + fields[i].offset); + CHECK_EQ(output.dtype(), input.dtype()); + CHECK_EQ(expected_base, DMAHelper::base(&output)); + CHECK_EQ(output.NumElements(), fields_shapes[i].num_elements()); + } + + // Free the backing tensor which was allocated in PrepOp. + delete backing_tensor; + } +}; + +TEST_F(ScopedAllocatorSplitOpTest, Success1) { + MakeOp({32}, DT_FLOAT, "test", 120, 2); + ExecOp(DT_FLOAT, 120, {{16}, {16}}); +} + +TEST_F(ScopedAllocatorSplitOpTest, Success2) { + MakeOp({2, 2, 2}, DT_DOUBLE, "test", 120, 2); + ExecOp(DT_DOUBLE, 120, {{2, 2}, {2, 2}}); +} + +TEST_F(ScopedAllocatorSplitOpTest, Success3) { + MakeOp({3, 3, 3}, DT_HALF, "test", 120, 3); + ExecOp(DT_HALF, 120, {{3, 3}, {3, 3}, {3, 3}}); +} + +TEST_F(ScopedAllocatorSplitOpTest, FailNLessThan2) { + BuildNodeDef({4, 4}, DT_FLOAT, "test", 120, 1); + Status s = InitOp(); + EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); +} + +TEST_F(ScopedAllocatorSplitOpTest, FailDtypeCheck) { + MakeOp({8}, DT_FLOAT, "test", 120, 2); + EXPECT_DEATH(ExecOp(DT_HALF, 120, {{4}, {4}}), ""); +} + +TEST_F(ScopedAllocatorSplitOpTest, FailBounds) { + MakeOp({8}, DT_DOUBLE, "test", 120, 2); + AddInputFromArray({8}, {0, 1, 2, 3, 4, 5, 6, 7}); + AddInputFromArray({4}, {0, 1, 2, 3}); + AddInputFromArray({4}, {4, 5, 6, 7}); + Status s = RunOpKernel(); + EXPECT_EQ(s.code(), error::INVALID_ARGUMENT); +} + +} // end namespace tensorflow diff --git a/tensorflow/core/ops/scoped_allocator_ops.cc b/tensorflow/core/ops/scoped_allocator_ops.cc new file mode 100644 index 0000000000..f053a53f4c --- /dev/null +++ b/tensorflow/core/ops/scoped_allocator_ops.cc @@ -0,0 +1,81 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" + +namespace tensorflow { + +REGISTER_OP("_ScopedAllocator") + .Output("output: T") + .Attr("shapes: list(shape)") + .Attr("T: type") + .Attr("sa_name: string") + .Attr("id: int") + .Attr("expected_call_count: int") + .SetIsStateful() + .SetShapeFn(shape_inference::ExplicitShape) + .Doc(R"doc( +Allocates a mutable tensor that becomes available to appropriately annotated +downstream Ops as backing store for their output tensor allocations via the +ScopedAllocatorMgr. +Returns a reference to this value. + +This is an experimental op for internal use only. It is possible to use this +op in unsafe ways. +)doc"); + +REGISTER_OP("_ScopedAllocatorConcat") + .Output("output: T") + .Input("backing: T") + .Input("inputs: N * T") + .Attr("shape: shape") + .Attr("T: type") + .Attr("sa_name: string") + .Attr("id: int") + .Attr("N: int >= 2") + .SetIsStateful() + .SetShapeFn(shape_inference::ExplicitShape) + .Doc(R"doc( +Acts like a Concat Op that merges multple tensors into one, however it must +only be used in conjunction with a ScopedAllocator which is backing the memory +of all of its input tensors so that actually it just outputs a read-only +reference to that ScopedAllocator's backing tensor. + +This is an experimental op for internal use only. It is possible to use this +op in unsafe ways. +)doc"); + +REGISTER_OP("_ScopedAllocatorSplit") + .Output("output: N * T") + .Input("concat: T") + .Input("split: N * T") + .Attr("T: type") + .Attr("sa_name: string") + .Attr("id: int") + .Attr("N: int >= 2") + .SetIsStateful() + .SetShapeFn(shape_inference::ExplicitShape) + .Doc(R"doc( +Acts like a Concat Op that merges multple tensors into one, however it must +only be used in conjunction with a ScopedAllocator which is backing the memory +of all of its input tensors so that actually it just outputs a read-only +reference to that ScopedAllocator's backing tensor. + +This is an experimental op for internal use only. It is possible to use this +op in unsafe ways. +)doc"); + +} // end namespace tensorflow -- GitLab From 5f50c1ea7d62d12253b56030110e68c8c1e87e7c Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 22 Mar 2018 11:29:25 -0700 Subject: [PATCH 361/960] Cleanup: replace an errant `in_eager_mode()` with `executing_eagerly()`. PiperOrigin-RevId: 190098277 --- tensorflow/python/framework/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b3fa39fdab..de222e1932 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5411,7 +5411,7 @@ def get_name_scope(): Returns: A string representing the current name scope. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return context.context().scope_name.rstrip("/") return get_default_graph().get_name_scope() -- GitLab From ebf8abdb4db1f4224ba61cd1d478e5301ff4bfd7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 11:55:11 -0700 Subject: [PATCH 362/960] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 190102805 --- tensorflow/go/op/wrappers.go | 2112 +++++++++++++++++----------------- 1 file changed, 1056 insertions(+), 1056 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 16472464db..92370c4f95 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -605,75 +605,123 @@ func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides return op.Output(0) } -// MapPeekAttr is an optional argument to MapPeek. -type MapPeekAttr func(optionalAttr) +// SpaceToDepthAttr is an optional argument to SpaceToDepth. +type SpaceToDepthAttr func(optionalAttr) -// MapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapPeekCapacity(value int64) MapPeekAttr { +// SpaceToDepthDataFormat sets the optional data_format attribute to value. +// If not specified, defaults to "NHWC" +func SpaceToDepthDataFormat(value string) SpaceToDepthAttr { return func(m optionalAttr) { - m["capacity"] = value + m["data_format"] = value } } -// MapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// SpaceToDepth for tensors of type T. // -// REQUIRES: value >= 0 -func MapPeekMemoryLimit(value int64) MapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapPeekContainer(value string) MapPeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapPeekSharedName(value string) MapPeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified key. If the +// Rearranges blocks of spatial data, into depth. More specifically, +// this op outputs a copy of the input tensor where values from the `height` +// and `width` dimensions are moved to the `depth` dimension. +// The attr `block_size` indicates the input block size. // -// underlying container does not contain this key -// this op will block until it does. -func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { +// * Non-overlapping blocks of size `block_size x block size` are rearranged +// into depth at each location. +// * The depth of the output tensor is `block_size * block_size * input_depth`. +// * The Y, X coordinates within each block of the input become the high order +// component of the output channel index. +// * The input tensor's height and width must be divisible by block_size. +// +// The `data_format` attr specifies the layout of the input and output tensors +// with the following options: +// "NHWC": `[ batch, height, width, channels ]` +// "NCHW": `[ batch, channels, height, width ]` +// "NCHW_VECT_C": +// `qint8 [ batch, channels / 4, height, width, 4 ]` +// +// It is useful to consider the operation as transforming a 6-D Tensor. +// e.g. for data_format = NHWC, +// Each element in the input tensor can be specified via 6 coordinates, +// ordered by decreasing memory layout significance as: +// n,oY,bY,oX,bX,iC (where n=batch index, oX, oY means X or Y coordinates +// within the output image, bX, bY means coordinates +// within the input block, iC means input channels). +// The output would be a transpose to the following layout: +// n,oY,oX,bY,bX,iC +// +// This operation is useful for resizing the activations between convolutions +// (but keeping all data), e.g. instead of pooling. It is also useful for training +// purely convolutional models. +// +// For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and +// block_size = 2: +// +// ``` +// x = [[[[1], [2]], +// [[3], [4]]]] +// ``` +// +// This operation will output a tensor of shape `[1, 1, 1, 4]`: +// +// ``` +// [[[[1, 2, 3, 4]]]] +// ``` +// +// Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`, +// the corresponding output will have a single element (i.e. width and height are +// both 1) and will have a depth of 4 channels (1 * block_size * block_size). +// The output element shape is `[1, 1, 4]`. +// +// For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g. +// +// ``` +// x = [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// ``` +// +// This operation, for block_size of 2, will return the following tensor of shape +// `[1, 1, 1, 12]` +// +// ``` +// [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] +// ``` +// +// Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2: +// +// ``` +// x = [[[[1], [2], [5], [6]], +// [[3], [4], [7], [8]], +// [[9], [10], [13], [14]], +// [[11], [12], [15], [16]]]] +// ``` +// +// the operator will return the following tensor of shape `[1 2 2 4]`: +// +// ``` +// x = [[[[1, 2, 3, 4], +// [5, 6, 7, 8]], +// [[9, 10, 11, 12], +// [13, 14, 15, 16]]]] +// ``` +// +// Arguments: +// +// block_size: The size of the spatial block. +func SpaceToDepth(scope *Scope, input tf.Output, block_size int64, optional ...SpaceToDepthAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{"block_size": block_size} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MapPeek", + Type: "SpaceToDepth", Input: []tf.Input{ - key, indices, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapPeek", err) - return - } - return values + return op.Output(0) } // Returns (x - y)(x - y) element-wise. @@ -3383,45 +3431,6 @@ func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { return op.Output(0) } -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed Precision at `k` as a `bool Tensor`. -func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"k": k} - opspec := tf.OpSpec{ - Type: "InTopK", - Input: []tf.Input{ - predictions, targets, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Given a quantized tensor described by (input, input_min, input_max), outputs a // // range that covers the actual values present in that tensor. This op is @@ -5092,45 +5101,46 @@ func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Outp return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) +// AvgPoolGradAttr is an optional argument to AvgPoolGrad. +type AvgPoolGradAttr func(optionalAttr) -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. +// AvgPoolGradDataFormat sets the optional data_format attribute to value. // -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { return func(m optionalAttr) { - m["element_shape"] = value + m["data_format"] = value } } -// Gather specific elements from the TensorArray into output `value`. -// -// All elements selected by `indices` must have the same shape. +// Computes gradients of the average pooling function. // // Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. +// the output of `avg_pool`. +// ksize: The size of the sliding window for each dimension of the input. +// strides: The stride of the sliding window for each dimension of the input. +// padding: The type of padding algorithm to use. // -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { +// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. +func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", + Type: "AvgPoolGrad", Input: []tf.Input{ - handle, indices, flow_in, + orig_input_shape, grad, }, Attrs: attrs, } @@ -5138,18 +5148,181 @@ func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. +// StageClearAttr is an optional argument to StageClear. +type StageClearAttr func(optionalAttr) + +// StageClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. +// REQUIRES: value >= 0 +func StageClearCapacity(value int64) StageClearAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// StageClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. +// REQUIRES: value >= 0 +func StageClearMemoryLimit(value int64) StageClearAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// StageClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StageClearContainer(value string) StageClearAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StageClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StageClearSharedName(value string) StageClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. +// +// Returns the created operation. +func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StageClear", + + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. +type ComputeAccidentalHitsAttr func(optionalAttr) + +// ComputeAccidentalHitsSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Computes the ids of the positions in sampled_candidates that match true_labels. +// +// When doing log-odds NCE, the result of this op should be passed through a +// SparseToDense op, then added to the logits of the sampled candidates. This has +// the effect of 'removing' the sampled labels that match the true labels by +// making the classifier sure that they are sampled labels. +// +// Arguments: +// true_classes: The true_classes output of UnpackSparseLabels. +// sampled_candidates: The sampled_candidates output of CandidateSampler. +// num_true: Number of true labels per context. +// +// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label +// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element +// is -FLOAT_MAX. +func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ComputeAccidentalHits", + Input: []tf.Input{ + true_classes, sampled_candidates, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. +type TensorArrayGatherV3Attr func(optionalAttr) + +// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. +// +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { + return func(m optionalAttr) { + m["element_shape"] = value + } +} + +// Gather specific elements from the TensorArray into output `value`. +// +// All elements selected by `indices` must have the same shape. +// +// Arguments: +// handle: The handle to a TensorArray. +// indices: The locations in the TensorArray from which to read tensor elements. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. +// +// Returns All of the elements in the TensorArray, concatenated along a new +// axis (the new dimension 0). +func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorArrayGatherV3", + Input: []tf.Input{ + handle, indices, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process and will never change. However, it is not suitable for cryptography. +// This function may be used when CPU time is scarce and inputs are trusted or +// unimportant. There is a risk of adversaries constructing inputs that all hash +// to the same bucket. To prevent this problem, use a strong hash function with +// `tf.string_to_hash_bucket_strong`. +// +// Arguments: +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. // // Returns A Tensor of the same shape as the input `string_tensor`. func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { @@ -8454,6 +8627,81 @@ func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and return tensors } +// Computes the maximum along segments of a tensor. +// +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. +// +// Computes a tensor such that +// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such +// that `segment_ids[j] == i`. +// +// If the max is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMax", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that skips `count` elements from the `input_dataset`. +// +// Arguments: +// +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. +// +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SkipDataset", + Input: []tf.Input{ + input_dataset, count, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes hyperbolic tangent of `x` element-wise. +func Tanh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tanh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Decode web-safe base64-encoded strings. // // Input may or may not have padding at the end. See EncodeBase64 for padding. @@ -8901,25 +9149,140 @@ func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Out return op.Output(0) } -// Transforms a vector of brain.Example protos (as strings) into typed tensors. +// DecodeJpegAttr is an optional argument to DecodeJpeg. +type DecodeJpegAttr func(optionalAttr) + +// DecodeJpegChannels sets the optional channels attribute to value. // -// Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// names: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this vector must be the same length as "serialized". -// sparse_keys: A list of Nsparse string Tensors (scalars). -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples' features associated with dense values. -// dense_defaults: A list of Ndense Tensors (some may be empty). -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeJpegChannels(value int64) DecodeJpegAttr { + return func(m optionalAttr) { + m["channels"] = value + } +} + +// DecodeJpegRatio sets the optional ratio attribute to value. +// +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeJpegRatio(value int64) DecodeJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value + } +} + +// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. +// +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value + } +} + +// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["try_recover_truncated"] = value + } +} + +// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { + return func(m optionalAttr) { + m["acceptable_fraction"] = value + } +} + +// DecodeJpegDctMethod sets the optional dct_method attribute to value. +// +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeJpegDctMethod(value string) DecodeJpegAttr { + return func(m optionalAttr) { + m["dct_method"] = value + } +} + +// Decode a JPEG-encoded image to a uint8 tensor. +// +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// This op also supports decoding PNGs and non-animated GIFs since the interface is +// the same, though it is cleaner to use `tf.image.decode_image`. +// +// Arguments: +// contents: 0-D. The JPEG-encoded image. +// +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeJpeg", + Input: []tf.Input{ + contents, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Transforms a vector of brain.Example protos (as strings) into typed tensors. +// +// Arguments: +// serialized: A vector containing a batch of binary serialized Example protos. +// names: A vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) names for the +// corresponding serialized protos. These are purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no names are available. +// If non-empty, this vector must be the same length as "serialized". +// sparse_keys: A list of Nsparse string Tensors (scalars). +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: A list of Ndense string Tensors (scalars). +// The keys expected in the Examples' features associated with dense values. +// dense_defaults: A list of Ndense Tensors (some may be empty). +// dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. // If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, // then the shape of dense_defaults[j] must match that of dense_shapes[j]. // If dense_shapes[j] has an undefined major dimension (variable strides dense @@ -9073,39 +9436,234 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) { input, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. +type ResourceSparseApplyAdagradDAAttr func(optionalAttr) + +// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. +// +// Arguments: +// var_: Should be from a Variable(). +// gradient_accumulator: Should be from a Variable(). +// gradient_squared_accumulator: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Learning rate. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// global_step: Training step number. Must be a scalar. +// +// Returns the created operation. +func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyAdagradDA", + Input: []tf.Input{ + var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// EncodeJpegAttr is an optional argument to EncodeJpeg. +type EncodeJpegAttr func(optionalAttr) + +// EncodeJpegFormat sets the optional format attribute to value. +// +// value: Per pixel image format. +// If not specified, defaults to "" +func EncodeJpegFormat(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["format"] = value + } +} + +// EncodeJpegQuality sets the optional quality attribute to value. +// +// value: Quality of the compression from 0 to 100 (higher is better and slower). +// If not specified, defaults to 95 +func EncodeJpegQuality(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["quality"] = value + } +} + +// EncodeJpegProgressive sets the optional progressive attribute to value. +// +// value: If True, create a JPEG that loads progressively (coarse to fine). +// If not specified, defaults to false +func EncodeJpegProgressive(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["progressive"] = value + } +} + +// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. +// +// value: If True, spend CPU/RAM to reduce size with no quality change. +// If not specified, defaults to false +func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["optimize_size"] = value + } +} + +// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. +// +// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. +// If not specified, defaults to true +func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["chroma_downsampling"] = value + } +} + +// EncodeJpegDensityUnit sets the optional density_unit attribute to value. +// +// value: Unit used to specify `x_density` and `y_density`: +// pixels per inch (`'in'`) or centimeter (`'cm'`). +// If not specified, defaults to "in" +func EncodeJpegDensityUnit(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["density_unit"] = value + } +} + +// EncodeJpegXDensity sets the optional x_density attribute to value. +// +// value: Horizontal pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegXDensity(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["x_density"] = value + } +} + +// EncodeJpegYDensity sets the optional y_density attribute to value. +// +// value: Vertical pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegYDensity(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["y_density"] = value + } +} + +// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. +// +// value: If not empty, embed this XMP metadata in the image header. +// If not specified, defaults to "" +func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["xmp_metadata"] = value + } +} + +// JPEG-encode an image. +// +// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. +// +// The attr `format` can be used to override the color format of the encoded +// output. Values can be: +// +// * `''`: Use a default format based on the number of channels in the image. +// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension +// of `image` must be 1. +// * `rgb`: Output an RGB JPEG image. The `channels` dimension +// of `image` must be 3. +// +// If `format` is not specified or is the empty string, a default format is picked +// in function of the number of channels in `image`: +// +// * 1: Output a grayscale image. +// * 3: Output an RGB image. +// +// Arguments: +// image: 3-D with shape `[height, width, channels]`. +// +// Returns 0-D. JPEG-encoded image. +func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "EncodeJpeg", + Input: []tf.Input{ + image, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MultinomialAttr is an optional argument to Multinomial. +type MultinomialAttr func(optionalAttr) + +// MultinomialSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 is set to be non-zero, the internal random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func MultinomialSeed(value int64) MultinomialAttr { + return func(m optionalAttr) { + m["seed"] = value + } } -// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. -type ResourceSparseApplyAdagradDAAttr func(optionalAttr) - -// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// MultinomialSeed2 sets the optional seed2 attribute to value. // -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func MultinomialSeed2(value int64) MultinomialAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed2"] = value } } -// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. +// MultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { + return func(m optionalAttr) { + m["output_dtype"] = value + } +} + +// Draws samples from a multinomial distribution. // // Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. // -// Returns the created operation. -func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9114,13 +9672,14 @@ func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumul a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagradDA", + Type: "Multinomial", Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, + logits, num_samples, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } // Returns the truth value of NOT x element-wise. @@ -14914,208 +15473,12 @@ func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { // // The inputs must be two-dimensional matrices and the inner dimension of "a" must // match the outer dimension of "b". This op is optimized for the case where at -// least one of "a" or "b" is sparse. The breakeven for using this versus a dense -// matrix multiply on one platform was 30% zero values in the sparse matrix. -// -// The gradient computation of this operation will only take advantage of sparsity -// in the input gradient when that gradient comes from a Relu. -func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseMatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MultinomialAttr is an optional argument to Multinomial. -type MultinomialAttr func(optionalAttr) - -// MultinomialSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 is set to be non-zero, the internal random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func MultinomialSeed(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// MultinomialSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func MultinomialSeed2(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// MultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// Draws samples from a multinomial distribution. -// -// Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. -// -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Multinomial", - Input: []tf.Input{ - logits, num_samples, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EncodeJpegAttr is an optional argument to EncodeJpeg. -type EncodeJpegAttr func(optionalAttr) - -// EncodeJpegFormat sets the optional format attribute to value. -// -// value: Per pixel image format. -// If not specified, defaults to "" -func EncodeJpegFormat(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["format"] = value - } -} - -// EncodeJpegQuality sets the optional quality attribute to value. -// -// value: Quality of the compression from 0 to 100 (higher is better and slower). -// If not specified, defaults to 95 -func EncodeJpegQuality(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["quality"] = value - } -} - -// EncodeJpegProgressive sets the optional progressive attribute to value. -// -// value: If True, create a JPEG that loads progressively (coarse to fine). -// If not specified, defaults to false -func EncodeJpegProgressive(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["progressive"] = value - } -} - -// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. -// -// value: If True, spend CPU/RAM to reduce size with no quality change. -// If not specified, defaults to false -func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["optimize_size"] = value - } -} - -// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. -// -// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. -// If not specified, defaults to true -func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["chroma_downsampling"] = value - } -} - -// EncodeJpegDensityUnit sets the optional density_unit attribute to value. -// -// value: Unit used to specify `x_density` and `y_density`: -// pixels per inch (`'in'`) or centimeter (`'cm'`). -// If not specified, defaults to "in" -func EncodeJpegDensityUnit(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["density_unit"] = value - } -} - -// EncodeJpegXDensity sets the optional x_density attribute to value. -// -// value: Horizontal pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegXDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["x_density"] = value - } -} - -// EncodeJpegYDensity sets the optional y_density attribute to value. -// -// value: Vertical pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegYDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["y_density"] = value - } -} - -// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. -// -// value: If not empty, embed this XMP metadata in the image header. -// If not specified, defaults to "" -func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["xmp_metadata"] = value - } -} - -// JPEG-encode an image. -// -// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. -// -// The attr `format` can be used to override the color format of the encoded -// output. Values can be: -// -// * `''`: Use a default format based on the number of channels in the image. -// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension -// of `image` must be 1. -// * `rgb`: Output an RGB JPEG image. The `channels` dimension -// of `image` must be 3. -// -// If `format` is not specified or is the empty string, a default format is picked -// in function of the number of channels in `image`: -// -// * 1: Output a grayscale image. -// * 3: Output an RGB image. -// -// Arguments: -// image: 3-D with shape `[height, width, channels]`. +// least one of "a" or "b" is sparse. The breakeven for using this versus a dense +// matrix multiply on one platform was 30% zero values in the sparse matrix. // -// Returns 0-D. JPEG-encoded image. -func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { +// The gradient computation of this operation will only take advantage of sparsity +// in the input gradient when that gradient comes from a Relu. +func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { if scope.Err() != nil { return } @@ -15124,9 +15487,9 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont a(attrs) } opspec := tf.OpSpec{ - Type: "EncodeJpeg", + Type: "SparseMatMul", Input: []tf.Input{ - image, + a, b, }, Attrs: attrs, } @@ -15513,6 +15876,45 @@ func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, upd return scope.AddOperation(opspec) } +// Says whether the targets are in the top `K` predictions. +// +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. +// +// More formally, let +// +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// +// Arguments: +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. +// +// Returns Computed Precision at `k` as a `bool Tensor`. +func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"k": k} + opspec := tf.OpSpec{ + Type: "InTopK", + Input: []tf.Input{ + predictions, targets, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the gradient for the inverse of `x` wrt its input. // // Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` @@ -16588,243 +16990,75 @@ func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. -// -// Arguments: -// -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. -// -// -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SkipDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes hyperbolic tangent of `x` element-wise. -func Tanh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tanh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the maximum along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such -// that `segment_ids[j] == i`. -// -// If the max is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMax", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AvgPoolGradAttr is an optional argument to AvgPoolGrad. -type AvgPoolGradAttr func(optionalAttr) - -// AvgPoolGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the average pooling function. -// -// Arguments: -// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. -// the output of `avg_pool`. -// ksize: The size of the sliding window for each dimension of the input. -// strides: The stride of the sliding window for each dimension of the input. -// padding: The type of padding algorithm to use. -// -// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. -func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPoolGrad", - Input: []tf.Input{ - orig_input_shape, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StageClearAttr is an optional argument to StageClear. -type StageClearAttr func(optionalAttr) +// MapPeekAttr is an optional argument to MapPeek. +type MapPeekAttr func(optionalAttr) -// StageClearCapacity sets the optional capacity attribute to value. +// MapPeekCapacity sets the optional capacity attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func StageClearCapacity(value int64) StageClearAttr { +func MapPeekCapacity(value int64) MapPeekAttr { return func(m optionalAttr) { m["capacity"] = value } } -// StageClearMemoryLimit sets the optional memory_limit attribute to value. +// MapPeekMemoryLimit sets the optional memory_limit attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func StageClearMemoryLimit(value int64) StageClearAttr { +func MapPeekMemoryLimit(value int64) MapPeekAttr { return func(m optionalAttr) { m["memory_limit"] = value } } -// StageClearContainer sets the optional container attribute to value. +// MapPeekContainer sets the optional container attribute to value. // If not specified, defaults to "" -func StageClearContainer(value string) StageClearAttr { +func MapPeekContainer(value string) MapPeekAttr { return func(m optionalAttr) { m["container"] = value } } -// StageClearSharedName sets the optional shared_name attribute to value. +// MapPeekSharedName sets the optional shared_name attribute to value. // If not specified, defaults to "" -func StageClearSharedName(value string) StageClearAttr { +func MapPeekSharedName(value string) MapPeekAttr { return func(m optionalAttr) { m["shared_name"] = value } } -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StageClear", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. -type ComputeAccidentalHitsAttr func(optionalAttr) - -// ComputeAccidentalHitsSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Computes the ids of the positions in sampled_candidates that match true_labels. -// -// When doing log-odds NCE, the result of this op should be passed through a -// SparseToDense op, then added to the logits of the sampled candidates. This has -// the effect of 'removing' the sampled labels that match the true labels by -// making the classifier sure that they are sampled labels. -// -// Arguments: -// true_classes: The true_classes output of UnpackSparseLabels. -// sampled_candidates: The sampled_candidates output of CandidateSampler. -// num_true: Number of true labels per context. +// Op peeks at the values at the specified key. If the // -// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label -// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element -// is -FLOAT_MAX. -func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { +// underlying container does not contain this key +// this op will block until it does. +func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ComputeAccidentalHits", + Type: "MapPeek", Input: []tf.Input{ - true_classes, sampled_candidates, + key, indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + if scope.Err() != nil { + return + } + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapPeek", err) + return + } + return values } // Looks up keys in a table, outputs the corresponding values. @@ -18790,29 +19024,272 @@ func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_ opspec := tf.OpSpec{ Type: "DenseToDenseSetOperation", Input: []tf.Input{ - set1, set2, + set1, set2, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Generate a sharded filename. The filename is printf formatted as +// +// %s-%05d-of-%05d, basename, shard, num_shards. +func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilename", + Input: []tf.Input{ + basename, shard, num_shards, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// BatchToSpace for N-D tensors of type T. +// +// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape +// `block_shape + [batch]`, interleaves these blocks back into the grid defined by +// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as +// the input. The spatial dimensions of this intermediate result are then +// optionally cropped according to `crops` to produce the output. This is the +// reverse of SpaceToBatch. See below for a precise description. +// +// Arguments: +// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, +// where spatial_shape has M dimensions. +// block_shape: 1-D with shape `[M]`, all values must be >= 1. +// crops: 2-D with shape `[M, 2]`, all values must be >= 0. +// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input +// dimension `i + 1`, which corresponds to spatial dimension `i`. It is +// required that +// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. +// +// This operation is equivalent to the following steps: +// +// 1. Reshape `input` to `reshaped` of shape: +// [block_shape[0], ..., block_shape[M-1], +// batch / prod(block_shape), +// input_shape[1], ..., input_shape[N-1]] +// +// 2. Permute dimensions of `reshaped` to produce `permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1], block_shape[0], +// ..., +// input_shape[M], block_shape[M-1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// 3. Reshape `permuted` to produce `reshaped_permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0], +// ..., +// input_shape[M] * block_shape[M-1], +// +// input_shape[M+1], +// ..., +// input_shape[N-1]] +// +// 4. Crop the start and end of dimensions `[1, ..., M]` of +// `reshaped_permuted` according to `crops` to produce the output of shape: +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], +// ..., +// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// Some examples: +// +// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 1]` and value: +// +// ``` +// x = [[[[1], [2]], [[3], [4]]]] +// ``` +// +// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 3]` and value: +// +// ``` +// x = [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// ``` +// +// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// x = [[[[1], [3]], [[9], [11]]], +// [[[2], [4]], [[10], [12]]], +// [[[5], [7]], [[13], [15]]], +// [[[6], [8]], [[14], [16]]]] +// ``` +// +// The output tensor has shape `[1, 4, 4, 1]` and value: +// +// ``` +// x = [[[1], [2], [3], [4]], +// [[5], [6], [7], [8]], +// [[9], [10], [11], [12]], +// [[13], [14], [15], [16]]] +// ``` +// +// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [2, 0]]`: +// +// ``` +// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], +// [[[0], [2], [4]]], [[[0], [10], [12]]], +// [[[0], [5], [7]]], [[[0], [13], [15]]], +// [[[0], [6], [8]]], [[[0], [14], [16]]]] +// ``` +// +// The output tensor has shape `[2, 2, 4, 1]` and value: +// +// ``` +// x = [[[[1], [2], [3], [4]], +// [[5], [6], [7], [8]]], +// [[[9], [10], [11], [12]], +// [[13], [14], [15], [16]]]] +// ``` +func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BatchToSpaceND", + Input: []tf.Input{ + input, block_shape, crops, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// UnpackAttr is an optional argument to Unpack. +type UnpackAttr func(optionalAttr) + +// UnpackAxis sets the optional axis attribute to value. +// +// value: Dimension along which to unpack. Negative values wrap around, so the +// valid range is `[-R, R)`. +// If not specified, defaults to 0 +func UnpackAxis(value int64) UnpackAttr { + return func(m optionalAttr) { + m["axis"] = value + } +} + +// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. +// +// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. +// For example, given a tensor of shape `(A, B, C, D)`; +// +// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` +// and each tensor in `output` will have shape `(B, C, D)`. (Note that the +// dimension unpacked along is gone, unlike `split`). +// +// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` +// and each tensor in `output` will have shape `(A, C, D)`. +// Etc. +// +// This is the opposite of `pack`. +// +// Arguments: +// value: 1-D or higher, with `axis` dimension size equal to `num`. +// +// +// Returns The list of tensors unpacked from `value`. +func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num": num} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Unpack", + Input: []tf.Input{ + value, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("Unpack", err) + return + } + return output +} + +// Increments variable pointed to by 'resource' until it reaches 'limit'. +// +// Arguments: +// resource: Should be from a scalar `Variable` node. +// limit: If incrementing ref would bring it above limit, instead generates an +// 'OutOfRange' error. +// +// +// Returns A copy of the input before increment. If nothing else modifies the +// input, the values produced will all be distinct. +func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"limit": limit, "T": T} + opspec := tf.OpSpec{ + Type: "ResourceCountUpTo", + Input: []tf.Input{ + resource, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Generate a sharded filename. The filename is printf formatted as +// Delete the stack from its resource container. // -// %s-%05d-of-%05d, basename, shard, num_shards. -func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { +// Arguments: +// handle: The handle to a stack. +// +// Returns the created operation. +func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ShardedFilename", + Type: "StackCloseV2", Input: []tf.Input{ - basename, shard, num_shards, + handle, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } // Generate a glob pattern matching all sharded file names. @@ -19443,121 +19920,6 @@ func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, op return op.Output(0) } -// DecodeJpegAttr is an optional argument to DecodeJpeg. -type DecodeJpegAttr func(optionalAttr) - -// DecodeJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeJpegChannels(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeJpegRatio(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value - } -} - -// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. -// -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} - -// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. -// -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value - } -} - -// DecodeJpegDctMethod sets the optional dct_method attribute to value. -// -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeJpegDctMethod(value string) DecodeJpegAttr { - return func(m optionalAttr) { - m["dct_method"] = value - } -} - -// Decode a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// -// -// This op also supports decoding PNGs and non-animated GIFs since the interface is -// the same, though it is cleaner to use `tf.image.decode_image`. -// -// Arguments: -// contents: 0-D. The JPEG-encoded image. -// -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeJpeg", - Input: []tf.Input{ - contents, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ExtractJpegShapeAttr is an optional argument to ExtractJpegShape. type ExtractJpegShapeAttr func(optionalAttr) @@ -25121,139 +25483,20 @@ func Exit(scope *Scope, data tf.Output) (output tf.Output) { data, }, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a copy of the input tensor. -func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Snapshot", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SpaceToDepthAttr is an optional argument to SpaceToDepth. -type SpaceToDepthAttr func(optionalAttr) - -// SpaceToDepthDataFormat sets the optional data_format attribute to value. -// If not specified, defaults to "NHWC" -func SpaceToDepthDataFormat(value string) SpaceToDepthAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// SpaceToDepth for tensors of type T. -// -// Rearranges blocks of spatial data, into depth. More specifically, -// this op outputs a copy of the input tensor where values from the `height` -// and `width` dimensions are moved to the `depth` dimension. -// The attr `block_size` indicates the input block size. -// -// * Non-overlapping blocks of size `block_size x block size` are rearranged -// into depth at each location. -// * The depth of the output tensor is `block_size * block_size * input_depth`. -// * The Y, X coordinates within each block of the input become the high order -// component of the output channel index. -// * The input tensor's height and width must be divisible by block_size. -// -// The `data_format` attr specifies the layout of the input and output tensors -// with the following options: -// "NHWC": `[ batch, height, width, channels ]` -// "NCHW": `[ batch, channels, height, width ]` -// "NCHW_VECT_C": -// `qint8 [ batch, channels / 4, height, width, 4 ]` -// -// It is useful to consider the operation as transforming a 6-D Tensor. -// e.g. for data_format = NHWC, -// Each element in the input tensor can be specified via 6 coordinates, -// ordered by decreasing memory layout significance as: -// n,oY,bY,oX,bX,iC (where n=batch index, oX, oY means X or Y coordinates -// within the output image, bX, bY means coordinates -// within the input block, iC means input channels). -// The output would be a transpose to the following layout: -// n,oY,oX,bY,bX,iC -// -// This operation is useful for resizing the activations between convolutions -// (but keeping all data), e.g. instead of pooling. It is also useful for training -// purely convolutional models. -// -// For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and -// block_size = 2: -// -// ``` -// x = [[[[1], [2]], -// [[3], [4]]]] -// ``` -// -// This operation will output a tensor of shape `[1, 1, 1, 4]`: -// -// ``` -// [[[[1, 2, 3, 4]]]] -// ``` -// -// Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`, -// the corresponding output will have a single element (i.e. width and height are -// both 1) and will have a depth of 4 channels (1 * block_size * block_size). -// The output element shape is `[1, 1, 4]`. -// -// For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g. -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// This operation, for block_size of 2, will return the following tensor of shape -// `[1, 1, 1, 12]` -// -// ``` -// [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] -// ``` -// -// Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2: -// -// ``` -// x = [[[[1], [2], [5], [6]], -// [[3], [4], [7], [8]], -// [[9], [10], [13], [14]], -// [[11], [12], [15], [16]]]] -// ``` -// -// the operator will return the following tensor of shape `[1 2 2 4]`: -// -// ``` -// x = [[[[1, 2, 3, 4], -// [5, 6, 7, 8]], -// [[9, 10, 11, 12], -// [13, 14, 15, 16]]]] -// ``` -// -// Arguments: -// -// block_size: The size of the spatial block. -func SpaceToDepth(scope *Scope, input tf.Output, block_size int64, optional ...SpaceToDepthAttr) (output tf.Output) { + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns a copy of the input tensor. +func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"block_size": block_size} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "SpaceToDepth", + Type: "Snapshot", Input: []tf.Input{ input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -27644,246 +27887,3 @@ func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size op := scope.AddOperation(opspec) return op.Output(0) } - -// UnpackAttr is an optional argument to Unpack. -type UnpackAttr func(optionalAttr) - -// UnpackAxis sets the optional axis attribute to value. -// -// value: Dimension along which to unpack. Negative values wrap around, so the -// valid range is `[-R, R)`. -// If not specified, defaults to 0 -func UnpackAxis(value int64) UnpackAttr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. -// -// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. -// For example, given a tensor of shape `(A, B, C, D)`; -// -// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` -// and each tensor in `output` will have shape `(B, C, D)`. (Note that the -// dimension unpacked along is gone, unlike `split`). -// -// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` -// and each tensor in `output` will have shape `(A, C, D)`. -// Etc. -// -// This is the opposite of `pack`. -// -// Arguments: -// value: 1-D or higher, with `axis` dimension size equal to `num`. -// -// -// Returns The list of tensors unpacked from `value`. -func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num": num} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Unpack", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("Unpack", err) - return - } - return output -} - -// Increments variable pointed to by 'resource' until it reaches 'limit'. -// -// Arguments: -// resource: Should be from a scalar `Variable` node. -// limit: If incrementing ref would bring it above limit, instead generates an -// 'OutOfRange' error. -// -// -// Returns A copy of the input before increment. If nothing else modifies the -// input, the values produced will all be distinct. -func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"limit": limit, "T": T} - opspec := tf.OpSpec{ - Type: "ResourceCountUpTo", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Delete the stack from its resource container. -// -// Arguments: -// handle: The handle to a stack. -// -// Returns the created operation. -func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StackCloseV2", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - -// BatchToSpace for N-D tensors of type T. -// -// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape -// `block_shape + [batch]`, interleaves these blocks back into the grid defined by -// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as -// the input. The spatial dimensions of this intermediate result are then -// optionally cropped according to `crops` to produce the output. This is the -// reverse of SpaceToBatch. See below for a precise description. -// -// Arguments: -// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, -// where spatial_shape has M dimensions. -// block_shape: 1-D with shape `[M]`, all values must be >= 1. -// crops: 2-D with shape `[M, 2]`, all values must be >= 0. -// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input -// dimension `i + 1`, which corresponds to spatial dimension `i`. It is -// required that -// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. -// -// This operation is equivalent to the following steps: -// -// 1. Reshape `input` to `reshaped` of shape: -// [block_shape[0], ..., block_shape[M-1], -// batch / prod(block_shape), -// input_shape[1], ..., input_shape[N-1]] -// -// 2. Permute dimensions of `reshaped` to produce `permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1], block_shape[0], -// ..., -// input_shape[M], block_shape[M-1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// 3. Reshape `permuted` to produce `reshaped_permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0], -// ..., -// input_shape[M] * block_shape[M-1], -// -// input_shape[M+1], -// ..., -// input_shape[N-1]] -// -// 4. Crop the start and end of dimensions `[1, ..., M]` of -// `reshaped_permuted` according to `crops` to produce the output of shape: -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], -// ..., -// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 3]` and value: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// The output tensor has shape `[1, 4, 4, 1]` and value: -// -// ``` -// x = [[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]] -// ``` -// -// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [2, 0]]`: -// -// ``` -// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], -// [[[0], [2], [4]]], [[[0], [10], [12]]], -// [[[0], [5], [7]]], [[[0], [13], [15]]], -// [[[0], [6], [8]]], [[[0], [14], [16]]]] -// ``` -// -// The output tensor has shape `[2, 2, 4, 1]` and value: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BatchToSpaceND", - Input: []tf.Input{ - input, block_shape, crops, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From 6ed3cec4ae1a0706abf3c7b82f6b70f6a45a760c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 11:55:34 -0700 Subject: [PATCH 363/960] Make GetLocalTemporaryDirectories() a virtual method of Env, that is overriden by the implementations. PiperOrigin-RevId: 190102851 --- tensorflow/core/platform/env.h | 8 ++++++-- tensorflow/core/platform/posix/env.cc | 5 ++++- tensorflow/core/platform/windows/env.cc | 4 +++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h index 2a114d47a8..a7e9fcb17c 100644 --- a/tensorflow/core/platform/env.h +++ b/tensorflow/core/platform/env.h @@ -291,10 +291,10 @@ class Env { virtual string FormatLibraryFileName(const string& name, const string& version) = 0; - private: // Returns a possible list of local temporary directories. - void GetLocalTempDirectories(std::vector* list); + virtual void GetLocalTempDirectories(std::vector* list) = 0; + private: std::unique_ptr file_system_registry_; TF_DISALLOW_COPY_AND_ASSIGN(Env); EnvTime* envTime = EnvTime::Default(); @@ -358,6 +358,10 @@ class EnvWrapper : public Env { } private: + void GetLocalTempDirectories(std::vector* list) override { + target_->GetLocalTempDirectories(list); + } + Env* target_; }; diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc index 8097624e09..418874d340 100644 --- a/tensorflow/core/platform/posix/env.cc +++ b/tensorflow/core/platform/posix/env.cc @@ -118,6 +118,9 @@ class PosixEnv : public Env { const string& version) override { return tensorflow::internal::FormatLibraryFileName(name, version); } + + private: + void GetLocalTempDirectories(std::vector* list) override; }; } // namespace @@ -131,7 +134,7 @@ Env* Env::Default() { } #endif -void Env::GetLocalTempDirectories(std::vector* list) { +void PosixEnv::GetLocalTempDirectories(std::vector* list) { list->clear(); // Directories, in order of preference. If we find a dir that // exists, we stop adding other less-preferred dirs diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc index 41b2644170..2f54f423b2 100644 --- a/tensorflow/core/platform/windows/env.cc +++ b/tensorflow/core/platform/windows/env.cc @@ -160,6 +160,8 @@ class WindowsEnv : public Env { } private: + void GetLocalTempDirectories(std::vector* list) override; + typedef VOID(WINAPI* FnGetSystemTimePreciseAsFileTime)(LPFILETIME); FnGetSystemTimePreciseAsFileTime GetSystemTimePreciseAsFileTime_; }; @@ -174,7 +176,7 @@ Env* Env::Default() { return default_env; } -void Env::GetLocalTempDirectories(std::vector* list) { +void WindowsEnv::GetLocalTempDirectories(std::vector* list) { list->clear(); // On windows we'll try to find a directory in this order: // C:/Documents & Settings/whomever/TEMP (or whatever GetTempPath() is) -- GitLab From c7d11e1601d5045f5421c465a438a1d9632df78d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 12:34:02 -0700 Subject: [PATCH 364/960] Merges predict export_outputs in multi_head. PiperOrigin-RevId: 190108434 --- tensorflow/contrib/estimator/BUILD | 1 + .../estimator/python/estimator/multi_head.py | 9 ++++++ .../python/estimator/multi_head_test.py | 28 +++++++++++++++---- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 2f7ed7cd73..676d60231d 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -292,6 +292,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:metrics", "//tensorflow/python:summary", + "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:head", "//tensorflow/python/estimator:metric_keys", "//tensorflow/python/estimator:model_fn", diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index 0346ddc24b..23d3714c53 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -23,6 +23,7 @@ import six from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib from tensorflow.python.estimator.canned import metric_keys +from tensorflow.python.estimator.export import export_output as export_output_lib from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -319,6 +320,7 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access all_estimator_spec[0].export_outputs, self._heads[0].name), } + merged_predict_outputs = {} for head, spec in zip(self._heads, all_estimator_spec): head_name = head.name for k, v in six.iteritems(spec.export_outputs): @@ -327,8 +329,15 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access else: key = '%s/%s' % (k, head_name) export_outputs[key] = v + if (k == head_lib._PREDICT_SERVING_KEY and # pylint:disable=protected-access + isinstance(v, export_output_lib.PredictOutput)): + for kp, vp in six.iteritems(v.outputs): + key = '%s/%s' % (head_name, kp) + merged_predict_outputs[key] = vp for k, v in six.iteritems(spec.predictions): predictions[(head_name, k)] = v + export_outputs[head_lib._PREDICT_SERVING_KEY] = ( # pylint:disable=protected-access + export_output_lib.PredictOutput(merged_predict_outputs)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 65ea89ba1b..8e788a9ce8 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -127,8 +127,8 @@ class MultiHeadTest(test.TestCase): logits=logits) self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'head1', 'classification/head1', 'predict/head1', - 'head2', 'classification/head2', 'predict/head2'), + (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'classification/head1', + 'predict/head1', 'head2', 'classification/head2', 'predict/head2'), spec.export_outputs.keys()) # Assert predictions and export_outputs. @@ -158,6 +158,22 @@ class MultiHeadTest(test.TestCase): self.assertAllClose( expected_probabilities['head2'], sess.run(spec.export_outputs['head2'].scores)) + self.assertAllClose( + expected_probabilities['head1'], + sess.run( + spec.export_outputs['predict'].outputs['head1/probabilities'])) + self.assertAllClose( + expected_probabilities['head2'], + sess.run( + spec.export_outputs['predict'].outputs['head2/probabilities'])) + self.assertAllClose( + expected_probabilities['head1'], + sess.run( + spec.export_outputs['predict/head1'].outputs['probabilities'])) + self.assertAllClose( + expected_probabilities['head2'], + sess.run( + spec.export_outputs['predict/head2'].outputs['probabilities'])) def test_predict_two_heads_logits_tensor(self): """Tests predict with logits as Tensor.""" @@ -181,8 +197,8 @@ class MultiHeadTest(test.TestCase): logits=logits) self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'head1', 'classification/head1', 'predict/head1', - 'head2', 'classification/head2', 'predict/head2'), + (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'classification/head1', + 'predict/head1', 'head2', 'classification/head2', 'predict/head2'), spec.export_outputs.keys()) # Assert predictions and export_outputs. @@ -238,8 +254,8 @@ class MultiHeadTest(test.TestCase): logits=logits) self.assertItemsEqual( - (_DEFAULT_SERVING_KEY, 'head1', 'regression/head1', 'predict/head1', - 'head2', 'regression/head2', 'predict/head2'), + (_DEFAULT_SERVING_KEY, 'predict', 'head1', 'regression/head1', + 'predict/head1', 'head2', 'regression/head2', 'predict/head2'), spec.export_outputs.keys()) # Assert predictions and export_outputs. -- GitLab From 830fc390b76b5eb138a7f59d0e13e83add653870 Mon Sep 17 00:00:00 2001 From: Dan Ringwalt Date: Thu, 22 Mar 2018 12:45:24 -0700 Subject: [PATCH 365/960] Add tf.contrib.framework.argsort, wrapping tf.nn.top_k (#288). Comparable to np.argsort. PiperOrigin-RevId: 190109968 --- tensorflow/contrib/framework/__init__.py | 1 + .../contrib/framework/python/ops/sort_ops.py | 161 +++++++++++++----- .../framework/python/ops/sort_ops_test.py | 34 ++++ 3 files changed, 156 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 3398b3fd1c..cbb68bd3eb 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -83,6 +83,7 @@ See the @{$python/contrib.framework} guide. @@load_linear_multiclass_bias_initializer @@load_variable_slot_initializer +@@argsort @@py_func @@sort diff --git a/tensorflow/contrib/framework/python/ops/sort_ops.py b/tensorflow/contrib/framework/python/ops/sort_ops.py index 8f62f0ea7b..1921a77c1e 100644 --- a/tensorflow/contrib/framework/python/ops/sort_ops.py +++ b/tensorflow/contrib/framework/python/ops/sort_ops.py @@ -14,6 +14,7 @@ # ============================================================================== """Support for sorting tensors. +@@argsort @@sort """ @@ -21,6 +22,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + +from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops as framework_ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops @@ -47,64 +51,141 @@ def sort(values, axis=-1, direction='ASCENDING', name=None): ValueError: If axis is not a constant scalar, or the direction is invalid. """ with framework_ops.name_scope(name, 'sort'): - if direction not in _SORT_IMPL: - raise ValueError('%s should be one of %s' % - (direction, ', '.join(sorted(_SORT_IMPL.keys())))) - # Axis must be an integer, not a Tensor. - axis = framework_ops.convert_to_tensor(axis, name='axis') - axis_static = tensor_util.constant_value(axis) - if axis.shape.ndims != 0 or axis_static is None: - raise ValueError('axis must be a constant scalar') - axis_static = int(axis_static) # Avoids NumPy casting error + return _sort_or_argsort(values, axis, direction, return_argsort=False) + + +def argsort(values, axis=-1, direction='ASCENDING', stable=False, name=None): + """Returns the indices of a tensor that give its sorted order along an axis. + + For a 1D tensor, `tf.gather(values, tf.argsort(values))` is equivalent to + `tf.sort(values)`. For higher dimensions, the output has the same shape as + `values`, but along the given axis, values represent the index of the sorted + element in that slice of the tensor at the given position. + + Args: + values: 1-D or higher numeric `Tensor`. + axis: The axis along which to sort. The default is -1, which sorts the last + axis. + direction: The direction in which to sort the values (`'ASCENDING'` or + `'DESCENDING'`). + stable: If True, equal elements in the original tensor will not be + re-ordered in the returned order. Unstable sort is not yet implemented, + but will eventually be the default for performance reasons. If you + require a stable order, pass `stable=True` for forwards compatibility. + name: Optional name for the operation. + + Returns: + An int32 `Tensor` with the same shape as `values`. The indices that would + sort each slice of the given `values` along the given `axis`. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + del stable # Unused. + with framework_ops.name_scope(name, 'argsort'): + return _sort_or_argsort(values, axis, direction, return_argsort=True) + + +def _sort_or_argsort(values, axis, direction, return_argsort): + """Internal sort/argsort implementation. + + Args: + values: The input values. + axis: The axis along which to sort. + direction: 'ASCENDING' or 'DESCENDING'. + return_argsort: Whether to return the argsort result. + + Returns: + Either the sorted values, or the indices of the sorted values in the + original tensor. See the `sort` and `argsort` docstrings. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + if direction not in _SORT_IMPL: + raise ValueError('%s should be one of %s' % + (direction, ', '.join(sorted(_SORT_IMPL.keys())))) + # Axis must be an integer, not a Tensor. + axis = framework_ops.convert_to_tensor(axis, name='axis') + axis_static = tensor_util.constant_value(axis) + if axis.shape.ndims != 0 or axis_static is None: + raise ValueError('axis must be a constant scalar') + axis_static = int(axis_static) # Avoids NumPy casting error - values = framework_ops.convert_to_tensor(values, name='values') + values = framework_ops.convert_to_tensor(values, name='values') - return _SORT_IMPL[direction](values, axis_static) + return _SORT_IMPL[direction](values, axis_static, return_argsort) -def _descending_sort(values, axis): +def _descending_sort(values, axis, return_argsort=False): """Sorts values in reverse using `top_k`. Args: values: Tensor of numeric values. axis: Index of the axis which values should be sorted along. + return_argsort: If False, return the sorted values. If True, return the + indices that would sort the values. Returns: The sorted values. """ k = array_ops.shape(values)[axis] rank = array_ops.rank(values) + static_rank = values.shape.ndims # Fast path: sorting the last axis. if axis == -1 or axis + 1 == values.get_shape().ndims: - return nn_ops.top_k(values, k)[0] - - # Otherwise, transpose the array. Swap axes `axis` and `rank - 1`. - if axis < 0: - # Make axis a Tensor with the real axis index if needed. - axis += rank - transposition = array_ops.concat( - [ - # Axes up to axis are unchanged. - math_ops.range(axis), - # Swap axis and rank - 1. - [rank - 1], - # Axes in [axis + 1, rank - 1) are unchanged. - math_ops.range(axis + 1, rank - 1), - # Swap axis and rank - 1. - [axis] - ], - axis=0) - top_k_input = array_ops.transpose(values, transposition) - values, unused_indices = nn_ops.top_k(top_k_input, k) - # transposition contains a single cycle of length 2 (swapping 2 elements), - # so it is an involution (it is its own inverse). - return array_ops.transpose(values, transposition) - - -def _ascending_sort(values, axis): + top_k_input = values + transposition = None + else: + # Otherwise, transpose the array. Swap axes `axis` and `rank - 1`. + if axis < 0: + # Calculate the actual axis index if counting from the end. Use the static + # rank if available, or else make the axis back into a tensor. + axis += static_rank or rank + if static_rank is not None: + # Prefer to calculate the transposition array in NumPy and make it a + # constant. + transposition = constant_op.constant( + np.r_[ + # Axes up to axis are unchanged. + np.arange(axis), + # Swap axis and rank - 1. + [static_rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + np.arange(axis + 1, static_rank - 1), + # Swap axis and rank - 1. + [axis]], + name='transposition') + else: + # Generate the transposition array from the tensors. + transposition = array_ops.concat( + [ + # Axes up to axis are unchanged. + math_ops.range(axis), + # Swap axis and rank - 1. + [rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + math_ops.range(axis + 1, rank - 1), + # Swap axis and rank - 1. + [axis] + ], + axis=0) + top_k_input = array_ops.transpose(values, transposition) + + values, indices = nn_ops.top_k(top_k_input, k) + return_value = indices if return_argsort else values + if transposition is not None: + # transposition contains a single cycle of length 2 (swapping 2 elements), + # so it is an involution (it is its own inverse). + return_value = array_ops.transpose(return_value, transposition) + return return_value + + +def _ascending_sort(values, axis, return_argsort=False): # Negate the values to get the ascending order from descending sort. - values_or_indices = _descending_sort(-values, axis) - return -values_or_indices + values_or_indices = _descending_sort(-values, axis, return_argsort) + # If not argsort, negate the values again. + return values_or_indices if return_argsort else -values_or_indices _SORT_IMPL = { diff --git a/tensorflow/contrib/framework/python/ops/sort_ops_test.py b/tensorflow/contrib/framework/python/ops/sort_ops_test.py index d08ae502f1..a8fb94b245 100644 --- a/tensorflow/contrib/framework/python/ops/sort_ops_test.py +++ b/tensorflow/contrib/framework/python/ops/sort_ops_test.py @@ -24,6 +24,8 @@ from tensorflow.contrib.framework.python.ops import sort_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test @@ -90,6 +92,38 @@ class SortTest(test.TestCase): axis=0, direction='DESCENDING').eval()) + def testSort_staticallyKnownRank_constantTransposition(self): + # The transposition array should be a constant if the rank of "values" is + # statically known. + tensor = random_ops.random_uniform( + # Rank is statically known to be 5, but the dimension lengths are not + # known. + random_ops.random_uniform( + shape=(5,), minval=0, maxval=10, dtype=dtypes.int32)) + sort_ops.sort(tensor, axis=1) + transposition = ( + ops.get_default_graph().get_tensor_by_name('sort/transposition:0')) + self.assertFalse(tensor_util.constant_value(transposition) is None) + self.assertAllEqual( + # Swaps "1" and "4" to put "1" at the end. + tensor_util.constant_value(transposition), + [0, 4, 2, 3, 1]) + + def testArgsort_1d(self): + arr = np.random.random(42) + with self.test_session(): + self.assertAllEqual( + np.sort(arr), + array_ops.gather(arr, sort_ops.argsort(arr)).eval()) + + def testArgsort(self): + arr = np.random.random((5, 6, 7, 8)) + for axis in range(4): + with self.test_session(): + self.assertAllEqual( + np.argsort(arr, axis=axis), + sort_ops.argsort(arr, axis=axis).eval()) + if __name__ == '__main__': test.main() -- GitLab From 3158b499c7c811a5ed4b81a2d8341dd3c8923823 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Thu, 22 Mar 2018 12:52:25 -0700 Subject: [PATCH 366/960] Make api_compatibility_test output verbose by default. PiperOrigin-RevId: 190110866 --- tensorflow/tools/api/tests/api_compatibility_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 96f501e163..603b2a4327 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -58,7 +58,7 @@ _UPDATE_GOLDENS_HELP = """ have to be authorized by TensorFlow leads. """ -# DEFINE_boolean, verbose_diffs, default False: +# DEFINE_boolean, verbose_diffs, default True: _VERBOSE_DIFFS_HELP = """ If set to true, print line by line diffs on all libraries. If set to false, only print which libraries have differences. @@ -286,7 +286,7 @@ if __name__ == '__main__': parser.add_argument( '--update_goldens', type=bool, default=False, help=_UPDATE_GOLDENS_HELP) parser.add_argument( - '--verbose_diffs', type=bool, default=False, help=_VERBOSE_DIFFS_HELP) + '--verbose_diffs', type=bool, default=True, help=_VERBOSE_DIFFS_HELP) FLAGS, unparsed = parser.parse_known_args() # Now update argv, so that unittest library does not get confused. -- GitLab From b57af1577f4a6e4181227d105c68463538b8f9ef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 13:05:30 -0700 Subject: [PATCH 367/960] Disable testing flaky tensorflow/contrib/eager/python/examples/spinn:spinn_test under py3 PiperOrigin-RevId: 190112748 --- tensorflow/contrib/eager/python/examples/spinn/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD index 98d01ad1d5..5966f1d487 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/BUILD +++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD @@ -39,6 +39,7 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", ], tags = [ + "no-internal-py3", # flaky "no_cuda_on_cpu_tap", "no_pip", # because spinn.py is under third_party/. ], -- GitLab From 3642ae00e9268229db76667150c113b83339d11e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 13:21:57 -0700 Subject: [PATCH 368/960] Allow specifying in the arrays extra info file: - the shape of the array - the hardcoding of the values of the array as a single repeated constant scalar value, turning an activations array into a constant array. PiperOrigin-RevId: 190115218 --- .../contrib/lite/toco/model_flags.proto | 4 +++- tensorflow/contrib/lite/toco/tooling_util.cc | 24 ++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/toco/model_flags.proto b/tensorflow/contrib/lite/toco/model_flags.proto index 867b86f31d..42e0f54826 100644 --- a/tensorflow/contrib/lite/toco/model_flags.proto +++ b/tensorflow/contrib/lite/toco/model_flags.proto @@ -96,11 +96,13 @@ message RnnState { // model that does not already contain such MinMax information. message ArraysExtraInfo { message Entry { - // Next ID to use: 5. + // Next ID to use: 7. optional string name = 1; optional float min = 2; optional float max = 3; optional IODataType data_type = 4; + optional InputArrayShape shape = 5; + optional float constant_float_value = 6; } repeated Entry entries = 1; } diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index ec1770c129..f3f50487ff 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1972,9 +1972,9 @@ void FinishBuildingRNNStates(Model* model) { void UseArraysExtraInfo(Model* model) { for (const auto& entry : model->flags.arrays_extra_info().entries()) { - QCHECK(model->HasArray(entry.name())) - << "ArraysExtraInfo refers to non-existent array name: " - << entry.name(); + if (!model->HasArray(entry.name())) { + continue; + } auto& array = model->GetArray(entry.name()); auto& minmax = array.GetOrCreateMinMax(); if (entry.has_min() || entry.has_max()) { @@ -1986,6 +1986,24 @@ void UseArraysExtraInfo(Model* model) { array.final_data_type = ConvertIODataTypeToArrayDataType(entry.data_type()); } + if (entry.has_shape()) { + array.clear_shape(); + // Make sure to create the shape even if there are no dims, to + // correctly record 0-D shapes. + array.mutable_shape(); + for (int dim : entry.shape().dims()) { + array.mutable_shape()->mutable_dims()->push_back(dim); + } + } + if (entry.has_constant_float_value()) { + CHECK(array.has_shape()); + CHECK(array.data_type == ArrayDataType::kFloat); + auto& data = array.GetMutableBuffer().data; + data.resize(RequiredBufferSizeForShape(array.shape())); + for (float& f : data) { + f = entry.constant_float_value(); + } + } } } -- GitLab From 9b9e54538fa766679aaa60b73f352e975c213730 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 22 Mar 2018 13:24:51 -0700 Subject: [PATCH 369/960] Disable all the automatic optimizations when testing, to ensure that we can properly compare the results of the original graph against that of the hand optimized graph. PiperOrigin-RevId: 190115606 --- .../core/grappler/utils/grappler_test.cc | 22 +++++++++++++++---- .../core/grappler/utils/grappler_test.h | 7 ++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index 6b6cecebe1..1c15ea65b8 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -17,15 +17,30 @@ limitations under the License. #include #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" #include "tensorflow/core/public/session.h" namespace tensorflow { namespace grappler { +GrapplerTest::GrapplerTest() { + // Turn off all the automatic optimizations to ensure that we run the graph + // exactly as it is given to us. This ensures that we can compare the results + // before and after manual optimization, without any of the automatic + // optimizations interfering in the comparison. + RewriterConfig* cfg = + options_.config.mutable_graph_options()->mutable_rewrite_options(); + cfg->set_constant_folding(RewriterConfig::OFF); + cfg->set_arithmetic_optimization(RewriterConfig::OFF); + cfg->set_dependency_optimization(RewriterConfig::OFF); + cfg->set_loop_optimization(RewriterConfig::OFF); + cfg->set_function_optimization(RewriterConfig::OFF); + cfg->set_layout_optimizer(RewriterConfig::OFF); +} + std::vector GrapplerTest::EvaluateNodes( const GraphDef& graph, const std::vector& node_names) const { - SessionOptions options; - std::unique_ptr session(NewSession(options)); + std::unique_ptr session(NewSession(options_)); TF_CHECK_OK(session->Create(graph)); RunOptions run_options; std::vector output_tensors; @@ -37,8 +52,7 @@ std::vector GrapplerTest::EvaluateNodes( std::vector GrapplerTest::EvaluateFetchNodes( const GrapplerItem& item) const { - SessionOptions options; - std::unique_ptr session(NewSession(options)); + std::unique_ptr session(NewSession(options_)); TF_CHECK_OK(session->Create(item.graph)); RunOptions run_options; if (!item.init_ops.empty()) { diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index c7f06557e7..e0c67381a4 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -24,11 +24,15 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" namespace tensorflow { namespace grappler { class GrapplerTest : public ::testing::Test { + public: + GrapplerTest(); + protected: std::vector EvaluateNodes( const GraphDef& graph, const std::vector& node_names) const; @@ -48,6 +52,9 @@ class GrapplerTest : public ::testing::Test { // Count nodes of the given op-type in a graph. int CountOpNodes(const GraphDef& graph, const string& op); + + private: + SessionOptions options_; }; } // end namespace grappler -- GitLab From f088fa2b1bc010fd4e4396a9f1e6e0868e9890c4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 13:41:13 -0700 Subject: [PATCH 370/960] Code cleanup: rather than storing the outside_compilation shape inference graph as a serialized GraphDef in an attr, put it into the function library. PiperOrigin-RevId: 190118116 --- tensorflow/compiler/jit/BUILD | 1 + .../jit/encapsulate_subgraphs_pass.cc | 67 ++++++++------- .../jit/encapsulate_subgraphs_pass_test.cc | 83 +++++++++---------- 3 files changed, 79 insertions(+), 72 deletions(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 0475cd9ff2..8e505da622 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -348,6 +348,7 @@ tf_cc_test( deps = [ ":common", ":compilation_passes", + ":graph_to_functiondef", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", "//tensorflow/cc:function_ops", diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 0685036c9d..7fc43fb263 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -334,8 +334,10 @@ class Encapsulator { void ConnectSequencerToCallNode(Graph* graph_out); Status AddShapeInferenceInfo( + const string& subgraph_name, const string& outside_compilation_subgraph_name, - const std::vector& shapes, GraphDef* inference_graph); + const std::vector& shapes, Graph* inference_graph, + FunctionLibraryDefinition* library); Status ReplaceFunctionDef(FunctionLibraryDefinition* library); @@ -573,7 +575,7 @@ class Encapsulator { const std::unordered_set& recv_at_host_nodes, Node* send_node, FunctionLibraryDefinition* library, std::vector* static_shape_out, - std::unique_ptr* graphdef_out); + std::unique_ptr* graph_out); // Makes a copy of graph containing only nodes that are ancestors of at least // one node in send_from_host_nodes and store it in pruned_graph. On exit @@ -949,8 +951,10 @@ Status Encapsulator::Subgraph::BuildFunctionDef( } Status Encapsulator::Subgraph::AddShapeInferenceInfo( + const string& subgraph_name, const string& outside_compilation_subgraph_name, - const std::vector& shapes, GraphDef* inference_graph) { + const std::vector& shapes, Graph* inference_graph, + FunctionLibraryDefinition* library) { OutsideCompilationSubgraph& oc_subgraph = outside_compilation_subgraphs_.at(outside_compilation_subgraph_name); @@ -972,14 +976,15 @@ Status Encapsulator::Subgraph::AddShapeInferenceInfo( host_compute->AddAttr("shape_inference_graph", ""); host_compute->AddAttr("shapes", shapes); } else { - string serialized_graph; - if (!inference_graph->SerializeToString(&serialized_graph)) { - return errors::Internal( - "Failed to serialize graph for outside compilation subgraph ", - oc_subgraph.host_compute_name); - } - host_compute->AddAttr("shape_inference_graph", serialized_graph); + string inference_graph_name = + strings::StrCat("_outside_compilation_shape_inference_", subgraph_name, + "_", outside_compilation_subgraph_name); + FunctionDef fdef; + TF_RETURN_IF_ERROR( + GraphToFunctionDef(*inference_graph, inference_graph_name, &fdef)); + host_compute->AddAttr("shape_inference_graph", inference_graph_name); host_compute->AddAttr("shapes", std::vector()); + TF_RETURN_IF_ERROR(library->AddFunctionDef(fdef)); } return Status::OK(); } @@ -1760,7 +1765,7 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( const std::unordered_set& recv_at_host_nodes, Node* send_node, FunctionLibraryDefinition* library, std::vector* static_shape_out, - std::unique_ptr* graphdef_out) { + std::unique_ptr* graph_out) { // Maps from nodes in graph_in to nodes in graph_out. // // When an edge has fully defined shape the source node in graph_in is @@ -1777,8 +1782,8 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( std::unordered_map dummy_node_images; std::unordered_map copied_node_images; - std::unique_ptr graph_out(new Graph(graph_in.op_registry())); - graph_out->set_versions(graph_in.versions()); + graph_out->reset(new Graph(graph_in.op_registry())); + (*graph_out)->set_versions(graph_in.versions()); // The final input to the send node is the dynamic key, which we don't include // in the static shapes. static_shape_out->resize(send_node->num_inputs() - 1); @@ -1800,7 +1805,7 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( if (w.leave) { TF_RETURN_IF_ERROR(CopyShapeInferenceNodeToGraph( n, send_node, dummy_node_images, library, &copied_node_images, - graph_out.get())); + graph_out->get())); } else { if (visited[n->id()]) continue; visited[n->id()] = true; @@ -1824,7 +1829,7 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( context->ShapeHandleToProto(shape, &proto); if (dummy_node_images.find(src_node) == dummy_node_images.end()) { dummy_node_images[src_node] = AddDummyShapedNode( - src_node->output_type(src_port), proto, graph_out.get()); + src_node->output_type(src_port), proto, graph_out->get()); } // The final input to the send node is the dynamic key, which we // don't include in the static shapes. @@ -1849,7 +1854,7 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( // The shapes of all the inputs to send_node are statically known. We // won't have to do any inference at compile time so return now: the // shapes were stored in static_shape_out above. - graphdef_out->reset(); + graph_out->reset(); return Status::OK(); } else { // Any shape that is being processed is either the original send node @@ -1872,9 +1877,6 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( } } - graphdef_out->reset(new GraphDef()); - graph_out->ToGraphDef(graphdef_out->get()); - return Status::OK(); } @@ -1997,13 +1999,14 @@ Status Encapsulator::GetShapeInfoForOutsideCompilationSends( } for (auto& subgraph_entry : subgraphs_) { + const string& subgraph_name = subgraph_entry.first; Subgraph& subgraph = subgraph_entry.second; // Find all the recv_at_host nodes in this subgraph. std::vector outside_compilation_names; subgraph.GetOutsideCompilationSubgraphNames(&outside_compilation_names); std::unordered_set recv_at_host_names; - for (const auto& name : outside_compilation_names) { - Node* recv_node = subgraph.GetRecvAtHostNode(name); + for (const auto& oc_name : outside_compilation_names) { + Node* recv_node = subgraph.GetRecvAtHostNode(oc_name); if (recv_node != nullptr) { recv_at_host_names.insert(recv_node->name()); } @@ -2012,26 +2015,30 @@ Status Encapsulator::GetShapeInfoForOutsideCompilationSends( // without knowing the shape of the recv_at_host nodes, and store the // result, along with enough information to complete the job at compile time // once the recv_at_host shapes are known. - for (const auto& name : outside_compilation_names) { - Node* send_node = subgraph.GetSendFromHostNode(name); + for (const auto& oc_name : outside_compilation_names) { + Node* send_node = subgraph.GetSendFromHostNode(oc_name); std::vector static_shape; - std::unique_ptr graphdef; + std::unique_ptr graph; if (send_node != nullptr) { TF_RETURN_IF_ERROR(DoStaticShapeInferenceForOutsideCompilationSend( *pruned_graph, shape_refiner, recv_at_host_names, - node_images[send_node], library, &static_shape, &graphdef)); - if (graphdef == nullptr) { + node_images[send_node], library, &static_shape, &graph)); + if (graph == nullptr) { VLOG(2) << "Send node " << send_node->name() << " shapes"; for (int i = 0; i < static_shape.size(); ++i) { VLOG(2) << static_shape[i].DebugString(); } } else { - VLOG(2) << "Send node " << send_node->name() << " graph\n" - << graphdef->DebugString(); + if (VLOG_IS_ON(2)) { + GraphDef graphdef; + graph->ToGraphDef(&graphdef); + VLOG(2) << "Send node " << send_node->name() << " graph\n" + << graphdef.DebugString(); + } } } - TF_RETURN_IF_ERROR( - subgraph.AddShapeInferenceInfo(name, static_shape, graphdef.get())); + TF_RETURN_IF_ERROR(subgraph.AddShapeInferenceInfo( + subgraph_name, oc_name, static_shape, graph.get(), library)); } if (!outside_compilation_names.empty()) { TF_RETURN_IF_ERROR(subgraph.ReplaceFunctionDef(library)); diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 711b1424c7..94481a1fde 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -13,12 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/jit/graph_to_functiondef.h" #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/graph_def_builder.h" @@ -32,6 +34,24 @@ namespace { const char* const kXlaHostTransferSequencerAttr = "_xla_host_transfer_sequencer"; +Status AddGraphDefToFunctionLibrary(const GraphDefBuilder& graphdef_builder, + const string& name_suffix, + FunctionDefLibrary* library) { + GraphDef graphdef; + TF_RETURN_IF_ERROR(graphdef_builder.ToGraphDef(&graphdef)); + std::unique_ptr graph = + std::unique_ptr(new Graph(OpRegistry::Global())); + GraphConstructorOptions opts; + opts.allow_internal_ops = true; + TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(opts, graphdef, graph.get())); + FunctionDef* fdef = library->add_function(); + TF_RETURN_IF_ERROR(GraphToFunctionDef( + *graph, + strings::StrCat("_outside_compilation_shape_inference_", name_suffix), + fdef)); + return Status::OK(); +} + template bool EqualProtoMap(const ::tensorflow::protobuf::Map& a, const ::tensorflow::protobuf::Map& b, @@ -115,23 +135,7 @@ bool EqualFunctionNodeDef(const NodeDef& a, const NodeDef& b, a.attr(), b.attr(), [](const string& s) { return s; }, [](const AttrValue& v) { return v.DebugString(); }, [](const string& key, const AttrValue& av, const AttrValue& bv) { - if (key == "shape_inference_graph") { - // Default serialization of GraphDef is unstable because maps don't - // serialize deterministically. Rather than go through the hoops to - // turn on deterministic serialization of this attr just for this - // test, add logic here to compare determinstically. - GraphDef ga; - if (!ga.ParseFromString(av.s())) { - return false; - } - GraphDef gb; - if (!gb.ParseFromString(bv.s())) { - return false; - } - return EqualGraphDef(ga, gb, nullptr); - } else { - return av.DebugString() == bv.DebugString(); - } + return av.DebugString() == bv.DebugString(); }, strings::StrCat(diff_preamble, " attr mismatch for node ", a.name()), diff); @@ -848,7 +852,6 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { FunctionDefLibrary library_expected; GraphDef graphdef_expected; - string shape_string_expected; { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); Node* key_constant = @@ -861,9 +864,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { shape.opts().WithName("E")); SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); - GraphDef shape_graph; - TF_EXPECT_OK(shape.ToGraphDef(&shape_graph)); - EXPECT_TRUE(shape_graph.SerializeToString(&shape_string_expected)); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); } *library_expected.add_function() = test::function::XTimesTwo(); @@ -883,7 +885,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, {"key", "host_compute_channel_F1_O1"}, - {"shape_inference_graph", shape_string_expected}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, {"shapes", gtl::ArraySlice({})}}, {"c"}}, }, @@ -969,7 +972,6 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { FunctionDefLibrary library_expected; GraphDef graphdef_expected; - string shape_string_expected_1; { GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); Node* key_constant = @@ -982,12 +984,10 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { shape1.opts().WithName("E")); SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, shape1.opts().WithName("outside_compilation_F1_O1_send")); - GraphDef shape1_graph; - TF_EXPECT_OK(shape1.ToGraphDef(&shape1_graph)); - EXPECT_TRUE(shape1_graph.SerializeToString(&shape_string_expected_1)); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected)); } - string shape_string_expected_2; { GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately); Node* key_constant = @@ -1005,9 +1005,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { Node* h = Binary(ops::NodeOut(recv2, 0), e, shape2.opts().WithName("H")); SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", {h}, shape2.opts().WithName("outside_compilation_F1_O2_send")); - GraphDef shape2_graph; - TF_EXPECT_OK(shape2.ToGraphDef(&shape2_graph)); - EXPECT_TRUE(shape2_graph.SerializeToString(&shape_string_expected_2)); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected)); } *library_expected.add_function() = FunctionDefHelper::Create( @@ -1029,7 +1028,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, {"key", "host_compute_channel_F1_O2"}, - {"shape_inference_graph", shape_string_expected_2}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O2"}, {"shapes", gtl::ArraySlice({})}}, {"F"}}, {{"outside_compilation_O1_host_compute"}, @@ -1038,7 +1038,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, {"key", "host_compute_channel_F1_O1"}, - {"shape_inference_graph", shape_string_expected_1}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, {"shapes", gtl::ArraySlice({})}}, {"D"}}, }, @@ -1134,7 +1135,6 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { FunctionDefLibrary library_expected; GraphDef graphdef_expected; - string shape_string_expected; { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); Node* key_constant = @@ -1147,9 +1147,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { shape.opts().WithName("E")); SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); - GraphDef shape_graph; - TF_EXPECT_OK(shape.ToGraphDef(&shape_graph)); - EXPECT_TRUE(shape_graph.SerializeToString(&shape_string_expected)); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); } TensorShapeProto shape_proto_expected; @@ -1172,7 +1171,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {{"Tinputs", gtl::ArraySlice({DT_FLOAT, DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, {"key", "host_compute_channel_F1_O1"}, - {"shape_inference_graph", shape_string_expected}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, {"shapes", gtl::ArraySlice({})}}, {"D"}}, }, @@ -1661,7 +1661,6 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { FunctionDefLibrary library_expected; GraphDef graphdef_expected; - string shape_string_expected; { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); Node* key_constant = @@ -1673,9 +1672,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { Node* e = BinaryUnknownShape(known, recv, shape.opts().WithName("E")); SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); - GraphDef shape_graph; - TF_EXPECT_OK(shape.ToGraphDef(&shape_graph)); - EXPECT_TRUE(shape_graph.SerializeToString(&shape_string_expected)); + TF_EXPECT_OK( + AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); } *library_expected.add_function() = test::function::XTimesTwo(); @@ -1694,7 +1692,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { {{"Tinputs", gtl::ArraySlice({DT_FLOAT})}, {"Toutputs", gtl::ArraySlice({DT_FLOAT})}, {"key", "host_compute_channel_F1_O1"}, - {"shape_inference_graph", shape_string_expected}, + {"shape_inference_graph", + "_outside_compilation_shape_inference_F1_O1"}, {"shapes", gtl::ArraySlice({})}}, {"c"}}, }, -- GitLab From f2b62548edfd298367bc996fb236ea39f385ff76 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 22 Mar 2018 13:55:48 -0700 Subject: [PATCH 371/960] Fix cases where we export incorrect symbol with tf_export. This can happen when both generated op and its python wrapper have tf_export decorator. create_python_api.py now checks that we don't export different symbols with same name. Also, simplified some logic. PiperOrigin-RevId: 190120505 --- .../api_def/python_api/api_def_ArgMax.pbtxt | 4 + .../api_def/python_api/api_def_ArgMin.pbtxt | 4 + .../python_api/api_def_CountUpTo.pbtxt | 4 + .../core/api_def/python_api/api_def_Div.pbtxt | 4 + .../core/api_def/python_api/api_def_Erf.pbtxt | 4 + .../api_def/python_api/api_def_Identity.pbtxt | 4 + .../core/api_def/python_api/api_def_Mod.pbtxt | 4 + .../api_def/python_api/api_def_Rank.pbtxt | 4 + .../api_def/python_api/api_def_Round.pbtxt | 4 + .../python_api/api_def_ScatterNdUpdate.pbtxt | 4 + .../python_api/api_def_ScatterUpdate.pbtxt | 4 + .../api_def/python_api/api_def_ShapeN.pbtxt | 4 + .../api_def/python_api/api_def_Sign.pbtxt | 4 + .../api_def/python_api/api_def_Sqrt.pbtxt | 4 + .../api_def/python_api/api_def_Square.pbtxt | 4 + tensorflow/python/framework/python_op_gen.cc | 8 +- tensorflow/python/ops/math_ops.py | 4 +- .../tools/api/generator/create_python_api.py | 82 ++++++++++++++----- 18 files changed, 127 insertions(+), 27 deletions(-) create mode 100644 tensorflow/core/api_def/python_api/api_def_ArgMax.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ArgMin.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CountUpTo.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Div.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Erf.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Identity.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Mod.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Rank.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Round.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ScatterNdUpdate.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ScatterUpdate.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ShapeN.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Sign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Square.pbtxt diff --git a/tensorflow/core/api_def/python_api/api_def_ArgMax.pbtxt b/tensorflow/core/api_def/python_api/api_def_ArgMax.pbtxt new file mode 100644 index 0000000000..4c23a432f2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ArgMax.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ArgMax" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ArgMin.pbtxt b/tensorflow/core/api_def/python_api/api_def_ArgMin.pbtxt new file mode 100644 index 0000000000..daa14f6386 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ArgMin.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ArgMin" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CountUpTo.pbtxt b/tensorflow/core/api_def/python_api/api_def_CountUpTo.pbtxt new file mode 100644 index 0000000000..f41be2f540 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CountUpTo.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CountUpTo" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Div.pbtxt b/tensorflow/core/api_def/python_api/api_def_Div.pbtxt new file mode 100644 index 0000000000..8e5537c8bf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Div.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Div" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Erf.pbtxt b/tensorflow/core/api_def/python_api/api_def_Erf.pbtxt new file mode 100644 index 0000000000..391167254e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Erf.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Erf" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Identity.pbtxt b/tensorflow/core/api_def/python_api/api_def_Identity.pbtxt new file mode 100644 index 0000000000..00f2afde27 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Identity.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Identity" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Mod.pbtxt b/tensorflow/core/api_def/python_api/api_def_Mod.pbtxt new file mode 100644 index 0000000000..48d828ca72 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Mod.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Mod" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Rank.pbtxt b/tensorflow/core/api_def/python_api/api_def_Rank.pbtxt new file mode 100644 index 0000000000..05aa12f2fa --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Rank.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Rank" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Round.pbtxt b/tensorflow/core/api_def/python_api/api_def_Round.pbtxt new file mode 100644 index 0000000000..74428e2f58 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Round.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Round" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ScatterNdUpdate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScatterNdUpdate.pbtxt new file mode 100644 index 0000000000..ccf4a9cce8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ScatterNdUpdate.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ScatterNdUpdate" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ScatterUpdate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScatterUpdate.pbtxt new file mode 100644 index 0000000000..e4c41c1226 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ScatterUpdate.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ScatterUpdate" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ShapeN.pbtxt b/tensorflow/core/api_def/python_api/api_def_ShapeN.pbtxt new file mode 100644 index 0000000000..b2dbe74b09 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ShapeN.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ShapeN" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Sign.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sign.pbtxt new file mode 100644 index 0000000000..c2ee91dd12 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Sign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Sign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt new file mode 100644 index 0000000000..59e2dfe836 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Sqrt" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Square.pbtxt b/tensorflow/core/api_def/python_api/api_def_Square.pbtxt new file mode 100644 index 0000000000..7b39ae25fa --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Square.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Square" + visibility: HIDDEN +} diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 03721c9a68..9850f0becc 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -78,7 +78,7 @@ bool IsPythonReserved(const string& s) { bool IsOpWithUnderscorePrefix(const string& s) { static const std::set* const kUnderscoreOps = new std::set( {// Lowercase built-in functions and types in Python, from: - // [x for x in dir(__builtins__) if x[0].islower()] + // [x for x in dir(__builtins__) if x[0].islower()] except "round". // These need to be excluded so they don't conflict with actual built-in // functions since we use '*' imports. "abs", "all", "any", "apply", "bin", "bool", "buffer", "bytearray", @@ -90,9 +90,9 @@ bool IsOpWithUnderscorePrefix(const string& s) { "iter", "len", "license", "list", "locals", "long", "map", "max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print", "property", "quit", "range", "raw_input", "reduce", "reload", - "repr", "reversed", "round", "set", "setattr", "slice", "sorted", - "staticmethod", "str", "sum", "super", "tuple", "type", "unichr", - "unicode", "vars", "xrange", "zip", + "repr", "reversed", "set", "setattr", "slice", "sorted", "staticmethod", + "str", "sum", "super", "tuple", "type", "unichr", "unicode", "vars", + "xrange", "zip", // These have the same name as ops defined in Python and might be used // incorrectly depending on order of '*' imports. // TODO(annarev): reduce usage of '*' imports and remove these from the diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index c893bf9b90..4699e05269 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -180,6 +180,8 @@ linspace = gen_math_ops.lin_space arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max) # pylint: disable=used-before-assignment arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment +tf_export("arg_max")(arg_max) +tf_export("arg_min")(arg_min) # This is set by resource_variable_ops.py. It is included in this way since @@ -1196,7 +1198,7 @@ tf_export("floor_div")(floor_div) truncatemod = gen_math_ops.truncate_mod tf_export("truncatemod")(truncatemod) floormod = gen_math_ops.floor_mod -tf_export("floormod")(floormod) +tf_export("floormod", "mod")(floormod) def _mul_dispatch(x, y, name=None): diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index bb7c3e77a3..183c4731b8 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -23,7 +23,6 @@ import collections import os import sys -from tensorflow import python as tf from tensorflow.python.util import tf_decorator @@ -39,6 +38,11 @@ Generated by: tensorflow/tools/api/generator/create_python_api.py script. """ +class SymbolExposedTwiceError(Exception): + """Raised when different symbols are exported with the same name.""" + pass + + def format_import(source_module_name, source_name, dest_name): """Formats import statement. @@ -63,6 +67,44 @@ def format_import(source_module_name, source_name, dest_name): return 'import %s as %s' % (source_name, dest_name) +class _ModuleImportsBuilder(object): + """Builds a map from module name to imports included in that module.""" + + def __init__(self): + self.module_imports = collections.defaultdict(list) + self._seen_api_names = set() + + def add_import( + self, dest_module_name, source_module_name, source_name, dest_name): + """Adds this import to module_imports. + + Args: + dest_module_name: (string) Module name to add import to. + source_module_name: (string) Module to import from. + source_name: (string) Name of the symbol to import. + dest_name: (string) Import the symbol using this name. + + Raises: + SymbolExposedTwiceError: Raised when an import with the same + dest_name has already been added to dest_module_name. + """ + import_str = format_import(source_module_name, source_name, dest_name) + if import_str in self.module_imports[dest_module_name]: + return + + # Check if we are trying to expose two different symbols with same name. + full_api_name = dest_name + if dest_module_name: + full_api_name = dest_module_name + '.' + full_api_name + if full_api_name in self._seen_api_names: + raise SymbolExposedTwiceError( + 'Trying to export multiple symbols with same name: %s.' % + full_api_name) + self._seen_api_names.add(full_api_name) + + self.module_imports[dest_module_name].append(import_str) + + def get_api_imports(): """Get a map from destination module to formatted imports. @@ -73,7 +115,9 @@ def get_api_imports(): (for e.g. 'from foo import bar') and constant assignments (for e.g. 'FOO = 123'). """ - module_imports = collections.defaultdict(list) + module_imports_builder = _ModuleImportsBuilder() + visited_symbols = set() + # Traverse over everything imported above. Specifically, # we want to traverse over TensorFlow Python modules. for module in sys.modules.values(): @@ -86,6 +130,8 @@ def get_api_imports(): for module_contents_name in dir(module): attr = getattr(module, module_contents_name) + if id(attr) in visited_symbols: + continue # If attr is _tf_api_constants attribute, then add the constants. if module_contents_name == _API_CONSTANTS_ATTR: @@ -93,36 +139,30 @@ def get_api_imports(): for export in exports: names = export.split('.') dest_module = '.'.join(names[:-1]) - import_str = format_import(module.__name__, value, names[-1]) - module_imports[dest_module].append(import_str) + module_imports_builder.add_import( + dest_module, module.__name__, value, names[-1]) continue _, attr = tf_decorator.unwrap(attr) # If attr is a symbol with _tf_api_names attribute, then # add import for it. if hasattr(attr, '__dict__') and _API_NAMES_ATTR in attr.__dict__: - # The same op might be accessible from multiple modules. - # We only want to consider location where function was defined. - # Here we check if the op is defined in another TensorFlow module in - # sys.modules. - if (hasattr(attr, '__module__') and - attr.__module__.startswith(tf.__name__) and - attr.__module__ != module.__name__ and - attr.__module__ in sys.modules and - module_contents_name in dir(sys.modules[attr.__module__])): + # If the same symbol is available using multiple names, only create + # imports for it once. + if id(attr) in visited_symbols: continue + visited_symbols.add(id(attr)) for export in attr._tf_api_names: # pylint: disable=protected-access names = export.split('.') dest_module = '.'.join(names[:-1]) - import_str = format_import( - module.__name__, module_contents_name, names[-1]) - module_imports[dest_module].append(import_str) + module_imports_builder.add_import( + dest_module, module.__name__, module_contents_name, names[-1]) # Import all required modules in their parent modules. # For e.g. if we import 'foo.bar.Value'. Then, we also # import 'bar' in 'foo'. - imported_modules = set(module_imports.keys()) + imported_modules = set(module_imports_builder.module_imports.keys()) for module in imported_modules: if not module: continue @@ -135,13 +175,11 @@ def get_api_imports(): parent_module += ('.' + module_split[submodule_index-1] if parent_module else module_split[submodule_index-1]) import_from += '.' + parent_module - submodule_import = format_import( - import_from, module_split[submodule_index], + module_imports_builder.add_import( + parent_module, import_from, module_split[submodule_index], module_split[submodule_index]) - if submodule_import not in module_imports[parent_module]: - module_imports[parent_module].append(submodule_import) - return module_imports + return module_imports_builder.module_imports def create_api_files(output_files): -- GitLab From 804f98e5bc0a75284f5f92569e5c82fe88b455ad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 22 Mar 2018 14:01:10 -0700 Subject: [PATCH 372/960] Supports optimizer arg in head.create_estimator_spec. PiperOrigin-RevId: 190121386 --- tensorflow/contrib/estimator/BUILD | 2 + .../estimator/python/estimator/head.py | 28 +++- .../estimator/python/estimator/head_test.py | 36 +++++ .../estimator/python/estimator/multi_head.py | 34 +++-- .../python/estimator/multi_head_test.py | 38 ++++++ tensorflow/python/estimator/BUILD | 2 +- tensorflow/python/estimator/canned/dnn.py | 9 +- .../estimator/canned/dnn_testing_utils.py | 15 ++- tensorflow/python/estimator/canned/head.py | 126 ++++++++++++------ .../python/estimator/canned/head_test.py | 102 ++++++++++++++ tensorflow/python/estimator/canned/linear.py | 9 +- 11 files changed, 322 insertions(+), 79 deletions(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 676d60231d..24374266dc 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -175,6 +175,7 @@ py_library( "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:summary", + "//tensorflow/python:training", "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:head", "//tensorflow/python/estimator:metric_keys", @@ -292,6 +293,7 @@ py_library( "//tensorflow/python:math_ops", "//tensorflow/python:metrics", "//tensorflow/python:summary", + "//tensorflow/python:training", "//tensorflow/python/estimator:export_output", "//tensorflow/python/estimator:head", "//tensorflow/python/estimator:metric_keys", diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index f95fcc8039..42e1b7b68c 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -36,10 +36,12 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.losses import losses from tensorflow.python.saved_model import signature_constants from tensorflow.python.summary import summary +from tensorflow.python.training import training_util _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY +# TODO(b/65403806): Switch loss_reduction default to SUM_OVER_BATCH_SIZE. def multi_class_head(n_classes, weight_column=None, label_vocabulary=None, @@ -489,8 +491,8 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access processed_labels=processed_labels) def create_estimator_spec( - self, features, mode, logits, labels=None, train_op_fn=None, - regularization_losses=None): + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: @@ -502,8 +504,11 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access with shape `[D0, D1, ... DN, n_classes]` or `SparseTensor` with `dense_shape` `[D0, D1, ... DN, ?]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. + optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. + Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which + updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns - `train_op`. Required in TRAIN mode. + `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to @@ -513,7 +518,8 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access Returns: `EstimatorSpec`. Raises: - ValueError: If `train_op_fn` is `None` in TRAIN mode. + ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN + mode, or if both are set. """ with ops.name_scope(self._name, 'head'): logits = head_lib._check_logits_final_dim(logits, self.logits_dimension) # pylint:disable=protected-access @@ -565,8 +571,16 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access regularization_loss=regularization_loss)) # Train. - if train_op_fn is None: - raise ValueError('train_op_fn can not be None.') + if optimizer is not None: + if train_op_fn is not None: + raise ValueError('train_op_fn and optimizer cannot both be set.') + train_op = optimizer.minimize( + regularized_training_loss, + global_step=training_util.get_global_step()) + elif train_op_fn is not None: + train_op = train_op_fn(regularized_training_loss) + else: + raise ValueError('train_op_fn and optimizer cannot both be None.') # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -592,7 +606,7 @@ class _MultiLabelHead(head_lib._Head): # pylint:disable=protected-access mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, - train_op=train_op_fn(regularized_training_loss)) + train_op=train_op) def _eval_metric_ops( self, labels, probabilities, weights, unreduced_loss, diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index dc30dde877..776f0ee341 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -863,6 +863,42 @@ class MultiLabelHead(test.TestCase): self._test_train( head=head, logits=logits, labels=labels, expected_loss=expected_loss) + def test_train_with_optimizer(self): + head = head_lib.multi_label_head(n_classes=2) + logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32) + labels = np.array([[1, 0], [1, 1]], dtype=np.int64) + # For large logits, sigmoid cross entropy loss is approximated as: + # loss = labels * (logits < 0) * (-logits) + + # (1 - labels) * (logits > 0) * logits => + # expected_unweighted_loss = [[10., 10.], [15., 0.]] + # Average over classes, sum over weights. + expected_loss = 17.5 + expected_train_result = 'my_train_op' + + class _Optimizer(object): + + def minimize(self, loss, global_step): + del global_step + return string_ops.string_join( + [constant_op.constant(expected_train_result), + string_ops.as_string(loss, precision=3)]) + + spec = head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + optimizer=_Optimizer()) + + tol = 1e-3 + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) + self.assertEqual( + six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), + train_result) + def test_train_with_regularization_losses(self): head = head_lib.multi_label_head( n_classes=2, loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head.py b/tensorflow/contrib/estimator/python/estimator/multi_head.py index 23d3714c53..bbbc19cc4d 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.saved_model import signature_constants from tensorflow.python.summary import summary +from tensorflow.python.training import training_util _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -227,8 +228,10 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access weights=example_weights_by_head, processed_labels=labels_by_head) + # TODO(b/65403806): Support regularization_losses arg. def create_estimator_spec( - self, features, mode, logits, labels=None, train_op_fn=None): + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None): """See `_Head`.""" if isinstance(logits, dict): logits_dict = logits @@ -249,9 +252,10 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access train_op_fn=_no_op_train_fn)) if mode == model_fn.ModeKeys.TRAIN: - if train_op_fn is None: - raise ValueError('train_op_fn can not be None in TRAIN mode.') - spec = self._merge_train(all_estimator_spec, train_op_fn) + spec = self._merge_train( + all_estimator_spec=all_estimator_spec, + optimizer=optimizer, + train_op_fn=train_op_fn) with ops.name_scope(''): summary.scalar(metric_keys.MetricKeys.LOSS, spec.loss) return spec @@ -280,16 +284,21 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access begin_idx += head.logits_dimension return logits_dict - def _merge_train(self, all_estimator_spec, train_op_fn): + def _merge_train(self, all_estimator_spec, optimizer, train_op_fn): """Merges list of `EstimatorSpec` for training. Args: all_estimator_spec: list of `EstimatorSpec` for the individual heads. - train_op_fn: Function to create train op. See `create_estimator_spec` - documentation for more details. + optimizer: `Optimizer` instance to create train op. See + `create_estimator_spec` documentation for more details. + train_op_fn: Function to create train op. Used if `optimizer` is `None`. Returns: `EstimatorSpec` that merges all heads for TRAIN. + + Raises: + ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN + mode. """ losses = [] metrics = {} @@ -298,11 +307,20 @@ class _MultiHead(head_lib._Head): # pylint:disable=protected-access # Metric keys already contain head.name. metrics.update(spec.eval_metric_ops or {}) loss = _merge_losses(losses, self._head_weights) + if optimizer is not None: + if train_op_fn is not None: + raise ValueError('train_op_fn and optimizer cannot both be set.') + train_op = optimizer.minimize( + loss, global_step=training_util.get_global_step()) + elif train_op_fn is not None: + train_op = train_op_fn(loss) + else: + raise ValueError('train_op_fn and optimizer cannot both be None.') return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, loss=loss, - train_op=train_op_fn(loss), + train_op=train_op, eval_metric_ops=metrics) def _merge_predict(self, all_estimator_spec): diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 8e788a9ce8..43cc157a1f 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -550,6 +550,44 @@ class MultiHeadTest(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN + '/head1': expected_loss / 2, }, summary_str, tol) + def test_train_one_head_with_optimizer(self): + head1 = head_lib.multi_label_head(n_classes=2, name='head1') + multi_head = multi_head_lib.multi_head([head1]) + + logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)} + labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)} + # For large logits, sigmoid cross entropy loss is approximated as: + # loss = labels * (logits < 0) * (-logits) + + # (1 - labels) * (logits > 0) * logits => + # expected_unweighted_loss = [[10., 10.], [15., 0.]] + # Average over classes, sum over weights. + expected_loss = 17.5 + expected_train_result = 'my_train_op' + + class _Optimizer(object): + + def minimize(self, loss, global_step): + del global_step + return string_ops.string_join( + [constant_op.constant(expected_train_result), + string_ops.as_string(loss, precision=3)]) + + spec = multi_head.create_estimator_spec( + features={'x': np.array(((42,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + optimizer=_Optimizer()) + + tol = 1e-3 + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) + self.assertEqual( + six.b('{0:s}{1:.3f}'.format(expected_train_result, expected_loss)), + train_result) + def test_train_two_heads_with_weights(self): head1 = head_lib.multi_label_head(n_classes=2, name='head1') head2 = head_lib.multi_label_head(n_classes=3, name='head2') diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index b25f9d2153..5afb5a7dd5 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -265,7 +265,6 @@ py_library( "//tensorflow/python:nn", "//tensorflow/python:partitioned_variables", "//tensorflow/python:summary", - "//tensorflow/python:training", "//tensorflow/python:variable_scope", "//tensorflow/python/feature_column", "//tensorflow/python/ops/losses", @@ -617,6 +616,7 @@ py_library( "//tensorflow/python:sparse_tensor", "//tensorflow/python:string_ops", "//tensorflow/python:summary", + "//tensorflow/python:training", "//tensorflow/python:weights_broadcast_ops", "//tensorflow/python/feature_column", "//tensorflow/python/ops/losses", diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py index 7043da8de0..6382622e0b 100644 --- a/tensorflow/python/estimator/canned/dnn.py +++ b/tensorflow/python/estimator/canned/dnn.py @@ -32,7 +32,6 @@ from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary -from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import tf_export # The default learning rate of 0.05 is a historical artifact of the initial @@ -183,17 +182,11 @@ def _dnn_model_fn(features, input_layer_partitioner=input_layer_partitioner) logits = logit_fn(features=features, mode=mode) - def _train_op_fn(loss): - """Returns the op to optimize the loss.""" - return optimizer.minimize( - loss, - global_step=training_util.get_global_step()) - return head.create_estimator_spec( features=features, mode=mode, labels=labels, - train_op_fn=_train_op_fn, + optimizer=optimizer, logits=logits) diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py index 85b058caf3..44545c058c 100644 --- a/tensorflow/python/estimator/canned/dnn_testing_utils.py +++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py @@ -53,7 +53,7 @@ from tensorflow.python.summary.writer import writer_cache from tensorflow.python.training import checkpoint_utils from tensorflow.python.training import gradient_descent from tensorflow.python.training import monitored_session -from tensorflow.python.training import optimizer +from tensorflow.python.training import optimizer as optimizer_lib from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util @@ -134,7 +134,8 @@ def mock_head(testcase, hidden_units, logits_dimension, expected_logits): hidden_weights_names + hidden_biases_names + [LOGITS_WEIGHTS_NAME + '/part_0:0', LOGITS_BIASES_NAME + '/part_0:0']) - def _create_estimator_spec(features, mode, logits, labels, train_op_fn): + def _create_estimator_spec( + features, mode, logits, labels, train_op_fn=None, optimizer=None): del features, labels # Not used. trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) testcase.assertItemsEqual(expected_var_names, @@ -144,8 +145,12 @@ def mock_head(testcase, hidden_units, logits_dimension, expected_logits): expected_logits, logits, message='Failed for mode={}. '.format(mode)) with ops.control_dependencies([assert_logits]): if mode == model_fn.ModeKeys.TRAIN: + if train_op_fn is not None: + train_op = train_op_fn(loss) + elif optimizer is not None: + train_op = optimizer.minimize(loss, global_step=None) return model_fn.EstimatorSpec( - mode=mode, loss=loss, train_op=train_op_fn(loss)) + mode=mode, loss=loss, train_op=train_op) elif mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec(mode=mode, loss=array_ops.identity(loss)) elif mode == model_fn.ModeKeys.PREDICT: @@ -203,8 +208,8 @@ def mock_optimizer(testcase, hidden_units, expected_loss=None): return control_flow_ops.no_op() optimizer_mock = test.mock.NonCallableMagicMock( - spec=optimizer.Optimizer, - wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer')) + spec=optimizer_lib.Optimizer, + wraps=optimizer_lib.Optimizer(use_locking=False, name='my_optimizer')) optimizer_mock.minimize = test.mock.MagicMock(wraps=_minimize) return optimizer_mock diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index f68204a35e..c9635a9c27 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -44,6 +44,7 @@ from tensorflow.python.ops import weights_broadcast_ops from tensorflow.python.ops.losses import losses from tensorflow.python.saved_model import signature_constants from tensorflow.python.summary import summary +from tensorflow.python.training import training_util _DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY @@ -85,40 +86,39 @@ class _Head(object): ```python def _my_dnn_model_fn(features, labels, mode, params, config=None): # Optionally your callers can pass head to model_fn as a param. - head = tf.contrib.learn.regression_head(...) - input = tf.contrib.layers.input_from_feature_columns(features, ...) - last_hidden_layer_out = tf.contrib.layers.stack( - input, tf.contrib.layers.fully_connected, [1000, 500]) - logits = tf.contrib.layers.fully_connected( - last_hidden_layer_out, head.logits_dimension, activation_fn=None) - - def _train_op_fn(loss): - return optimizer.minimize(loss) + head = tf.contrib.estimator.regression_head(...) + inputs = tf.feature_column.input_layer(features, ...) + hidden_layer0 = tf.layers.dense( + inputs, units=1000, activation=tf.nn.relu) + hidden_layer1 = tf.layers.dense( + hidden_layer0, units=500, activation=tf.nn.relu) + logits = tf.layers.dense( + hidden_layer1, units=head.logits_dimension, activation=None) return head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, - train_op_fn=_train_op_fn) + optimizer=optimizer) ``` There are cases where computing and applying gradients can not be meaningfully - captured with train_op_fn we support (for example, with sync optimizer). In - such case, you can take the responsibility on your own. Here is a common - use case, + captured with optimizer or train_op_fn we support (for example, with sync + optimizer). In such case, you can take the responsibility on your own. Here is + a common use case, ```python estimator_spec = head.create_estimator_spec( features=features, labels=labels, mode=mode, logits=logits, - train_op_fn=tf.contrib.learn.no_op_train_fn) + train_op_fn=lambda _: tf.no_op()) if mode == model_fn.ModeKeys.TRAIN: optimizer = ... sync = tf.train.SyncReplicasOptimizer(opt=optimizer, ...) - update_op = tf.contrib.layers.optimize_loss(optimizer=sync, - loss=estimator_spec.loss, ...) + update_op = sync.minimize( + estimator_spec.loss, global_step=tf.get_global_step()) hooks = [sync.make_session_run_hook(is_chief)] ... update train_op and hooks in EstimatorSpec and return ``` @@ -172,10 +172,12 @@ class _Head(object): """ raise NotImplementedError('Calling an abstract method.') + # TODO(b/65403806): By default, collect regularization_losses from + # GraphKeys.REGULARIZATION_LOSSES collection. @abc.abstractmethod def create_estimator_spec( - self, features, mode, logits, labels=None, train_op_fn=None, - regularization_losses=None): + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None, regularization_losses=None): """Returns `EstimatorSpec` that a model_fn can return. Please note that, @@ -186,10 +188,14 @@ class _Head(object): mode: Estimator's `ModeKeys`. logits: logits `Tensor` to be used by the head. labels: Labels `Tensor`, or `dict` of same. + optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. + Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which + updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns an op - to optimize the model with the loss. This is used in TRAIN mode and - must not be None. None is allowed in other modes. If you want to - optimize loss yourself you can pass `no_op_train_fn` and then use + to optimize the model with the loss in TRAIN mode. Used if `optimizer` + is `None`. Exactly one of `train_op_fn` and `optimizer` must be set in + TRAIN mode. None is allowed in other modes. If you want to optimize loss + yourself you can pass `lambda _: tf.no_op()` and then use EstimatorSpec.loss to compute and apply gradients. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. @@ -694,8 +700,8 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): processed_labels=label_ids) def create_estimator_spec( - self, features, mode, logits, labels=None, train_op_fn=None, - regularization_losses=None): + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: @@ -706,8 +712,11 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): labels: Labels integer or string `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. + optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. + Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which + updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns - `train_op`. Required in TRAIN mode. + `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to @@ -717,7 +726,8 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): Returns: `EstimatorSpec`. Raises: - ValueError: If `train_op_fn` is `None` in TRAIN mode. + ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN + mode, or if both are set. """ with ops.name_scope(self._name, 'head'): logits = _check_logits_final_dim(logits, self.logits_dimension) @@ -780,8 +790,16 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): regularization_loss=regularization_loss)) # Train. - if train_op_fn is None: - raise ValueError('train_op_fn cannot be None.') + if optimizer is not None: + if train_op_fn is not None: + raise ValueError('train_op_fn and optimizer cannot both be set.') + train_op = optimizer.minimize( + regularized_training_loss, + global_step=training_util.get_global_step()) + elif train_op_fn is not None: + train_op = train_op_fn(regularized_training_loss) + else: + raise ValueError('train_op_fn and optimizer cannot both be None.') # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -807,7 +825,7 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, - train_op=train_op_fn(regularized_training_loss)) + train_op=train_op) def _binary_logistic_head_with_sigmoid_cross_entropy_loss( @@ -1039,8 +1057,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): processed_labels=labels) def create_estimator_spec( - self, features, mode, logits, labels=None, train_op_fn=None, - regularization_losses=None): + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: @@ -1051,8 +1069,11 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): labels: Labels integer or string `Tensor` with shape matching `logits`, namely `[D0, D1, ... DN, 1]` or `[D0, D1, ... DN]`. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. + optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. + Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which + updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns - `train_op`. Required in TRAIN mode. + `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to @@ -1062,7 +1083,8 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): Returns: `EstimatorSpec`. Raises: - ValueError: If `train_op_fn` is `None` in TRAIN mode. + ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN + mode, or if both are set. """ # Predict. with ops.name_scope(self._name, 'head'): @@ -1134,8 +1156,16 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): regularization_loss=regularization_loss)) # Train. - if train_op_fn is None: - raise ValueError('train_op_fn can not be None.') + if optimizer is not None: + if train_op_fn is not None: + raise ValueError('train_op_fn and optimizer cannot both be set.') + train_op = optimizer.minimize( + regularized_training_loss, + global_step=training_util.get_global_step()) + elif train_op_fn is not None: + train_op = train_op_fn(regularized_training_loss) + else: + raise ValueError('train_op_fn and optimizer cannot both be None.') # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -1160,7 +1190,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, - train_op=train_op_fn(regularized_training_loss)) + train_op=train_op) def _regression_head_with_mean_squared_error_loss( @@ -1289,8 +1319,8 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): processed_labels=labels) def create_estimator_spec( - self, features, mode, logits, labels=None, train_op_fn=None, - regularization_losses=None): + self, features, mode, logits, labels=None, optimizer=None, + train_op_fn=None, regularization_losses=None): """Returns an `EstimatorSpec`. Args: @@ -1302,8 +1332,11 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): `[D0, D1, ... DN, logits_dimension]`. When `logits_dimension=1`, shape `[D0, D1, ... DN]` is also supported. `labels` is required argument when `mode` equals `TRAIN` or `EVAL`. + optimizer: `Optimizer` instance to optimize the loss in TRAIN mode. + Namely, sets `train_op = optimizer.minimize(loss, global_step)`, which + updates variables and increments `global_step`. train_op_fn: Function that takes a scalar loss `Tensor` and returns - `train_op`. Required in TRAIN mode. + `train_op`. Used if `optimizer` is `None`. regularization_losses: A list of additional scalar losses to be added to the training loss, such as regularization losses. These losses are usually expressed as a batch average, so for best results users need to @@ -1313,7 +1346,8 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): Returns: `EstimatorSpec`. Raises: - ValueError: If `train_op_fn` is `None` in TRAIN mode. + ValueError: If both `train_op_fn` and `optimizer` are `None` in TRAIN + mode, or if both are set. """ # Predict. with ops.name_scope(self._name, 'head'): @@ -1373,8 +1407,16 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): eval_metric_ops=eval_metric_ops) # Train. - if train_op_fn is None: - raise ValueError('train_op_fn can not be None.') + if optimizer is not None: + if train_op_fn is not None: + raise ValueError('train_op_fn and optimizer cannot both be set.') + train_op = optimizer.minimize( + regularized_training_loss, + global_step=training_util.get_global_step()) + elif train_op_fn is not None: + train_op = train_op_fn(regularized_training_loss) + else: + raise ValueError('train_op_fn and optimizer cannot both be None.') # Only summarize mean_loss for SUM reduction to preserve backwards # compatibility. Otherwise skip it to avoid unnecessary computation. if self._loss_reduction == losses.Reduction.SUM: @@ -1399,7 +1441,7 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=regularized_training_loss, - train_op=train_op_fn(regularized_training_loss)) + train_op=train_op) def _assert_range(labels, n_classes, message=None): diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index b5d35c9b45..fe6ee07529 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -842,6 +842,41 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, }, summary_str, tol) + def test_train_with_optimizer(self): + n_classes = 3 + head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(n_classes) + + logits = np.array(((10, 0, 0), (0, 10, 0),), dtype=np.float32) + labels = np.array(((1,), (1,)), dtype=np.int64) + features = {'x': np.array(((42,),), dtype=np.int32)} + expected_train_result = 'my_train_op' + + class _Optimizer(object): + + def minimize(self, loss, global_step): + del global_step + return string_ops.string_join( + [constant_op.constant(expected_train_result), + string_ops.as_string(loss, precision=2)]) + + # loss = sum(cross_entropy(labels, logits)) = sum(10, 0) = 10. + expected_loss = 10. + spec = head.create_estimator_spec( + features=features, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + optimizer=_Optimizer()) + + tol = 1e-2 + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss, rtol=tol, atol=tol) + self.assertEqual( + six.b('{0:s}{1:.2f}'.format(expected_train_result, expected_loss)), + train_result) + def test_train_summaries_with_head_name(self): n_classes = 3 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( @@ -1942,6 +1977,39 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN: 20.5, }, summary_str) + def test_train_with_optimizer(self): + head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss() + + logits = np.array(((45,), (-41,),), dtype=np.float32) + labels = np.array(((1,), (1,),), dtype=np.float64) + expected_train_result = b'my_train_op' + features = {'x': np.array(((42,),), dtype=np.float32)} + # loss = sum(cross_entropy(labels, logits)) = sum(0, 41) = 41 + expected_loss = 41. + + class _Optimizer(object): + + def minimize(self, loss, global_step): + del global_step + with ops.control_dependencies((check_ops.assert_equal( + math_ops.to_float(expected_loss), math_ops.to_float(loss), + name='assert_loss'),)): + return constant_op.constant(expected_train_result) + + # Create estimator spec. + spec = head.create_estimator_spec( + features=features, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + optimizer=_Optimizer()) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss) + self.assertEqual(expected_train_result, train_result) + def test_train_summaries_with_head_name(self): head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( name='some_binary_head') @@ -3076,6 +3144,40 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase): metric_keys.MetricKeys.LOSS_MEAN: 6.5, }, summary_str) + def test_train_with_optimizer(self): + head = head_lib._regression_head_with_mean_squared_error_loss() + self.assertEqual(1, head.logits_dimension) + + # Create estimator spec. + logits = np.array(((45,), (41,),), dtype=np.float32) + labels = np.array(((43.,), (44.,),), dtype=np.float64) + expected_train_result = b'my_train_op' + features = {'x': np.array(((42.,),), dtype=np.float32)} + # loss = (43-45)^2 + (44-41)^2 = 4 + 9 = 13 + expected_loss = 13 + + class _Optimizer(object): + + def minimize(self, loss, global_step): + del global_step + with ops.control_dependencies((check_ops.assert_equal( + math_ops.to_float(expected_loss), math_ops.to_float(loss), + name='assert_loss'),)): + return constant_op.constant(expected_train_result) + + spec = head.create_estimator_spec( + features=features, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + optimizer=_Optimizer()) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run((spec.loss, spec.train_op)) + self.assertAllClose(expected_loss, loss) + self.assertEqual(expected_train_result, train_result) + def test_train_summaries_with_head_name(self): head = head_lib._regression_head_with_mean_squared_error_loss( name='some_regression_head') diff --git a/tensorflow/python/estimator/canned/linear.py b/tensorflow/python/estimator/canned/linear.py index a2f24ef270..e7ec417991 100644 --- a/tensorflow/python/estimator/canned/linear.py +++ b/tensorflow/python/estimator/canned/linear.py @@ -33,7 +33,6 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary from tensorflow.python.training import ftrl -from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import tf_export @@ -157,17 +156,11 @@ def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, units=head.logits_dimension, feature_columns=feature_columns) logits = logit_fn(features=features) - def _train_op_fn(loss): - """Returns the op to optimize the loss.""" - return optimizer.minimize( - loss, - global_step=training_util.get_global_step()) - return head.create_estimator_spec( features=features, mode=mode, labels=labels, - train_op_fn=_train_op_fn, + optimizer=optimizer, logits=logits) -- GitLab From 48b0fb7a524425d57547dc23093d869538b888db Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Thu, 22 Mar 2018 14:09:59 -0700 Subject: [PATCH 373/960] Fetch C shapes for ops created by import_graph_def with C API enabled. If _USE_C_API = True, this change makes us always fetch shapes using the C API after calling TF_ImportGraphDef, even if _USE_C_SHAPES = False. This is necessary to preserve the shapes specified by the "_output_shapes" attr on imported NodeDefs (note that this attr isn't present on the NodeDefs of the imported nodes, so there's no other way to recover this information after calling TF_ImportGraphDef). PiperOrigin-RevId: 190122991 --- tensorflow/python/framework/ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index de222e1932..93edaa0cf0 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3455,12 +3455,12 @@ class Graph(object): ] for op in new_ops: - # The Python shape inference code does not support imported functions. It - # also needs access to op.inputs, which is why we call it here. + # Operations created by the C API always retrieve shapes from the C API so + # we preserve the shapes of ops created in import_graph_def (from the + # "_output_shapes" attr of the imported NodeDef). # TODO(b/74620627): move this back to _create_op_helper once _USE_C_SHAPES # is removed. - if not self._is_function(op.type) or _USE_C_SHAPES: - set_shapes_for_outputs(op) + _set_shapes_for_outputs_c_api(op) new_control_inputs = self._control_dependencies_for_inputs(op.inputs) # pylint: disable=protected-access op._add_control_inputs(new_control_inputs) -- GitLab From e3468b56d323783fdfb79fa2d6c24effc58bcaa9 Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Thu, 22 Mar 2018 14:11:08 -0700 Subject: [PATCH 374/960] Adds float64 support for Conv2d, Conv2dBackpropInput, and Conv2dBackpropFilter PiperOrigin-RevId: 190123191 --- .../core/kernels/conv_grad_filter_ops.cc | 7 + .../core/kernels/conv_grad_input_ops.cc | 7 + tensorflow/core/kernels/conv_ops.cc | 7 +- tensorflow/core/kernels/conv_ops_gpu_2.cu.cc | 3 + tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | 3 + tensorflow/core/kernels/depthwise_conv_op.cc | 4 +- .../core/kernels/eigen_spatial_convolutions.h | 182 ++++++++++++++++++ tensorflow/core/ops/nn_ops.cc | 6 +- .../python/kernel_tests/conv_ops_test.py | 4 +- 9 files changed, 215 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index e6ae595291..66ee474ca3 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -520,6 +520,7 @@ class Conv2DCustomBackpropFilterOp : public OpKernel { TF_CALL_half(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS // GPU definitions. @@ -1017,11 +1018,17 @@ namespace functor { typename TTypes::Tensor out, TensorFormat data_format); \ extern template struct PadInput; +DECLARE_GPU_SPEC(double); DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(Eigen::half); #undef DECLARE_GPU_SPEC } // namespace functor +REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("filter_sizes"), + Conv2DSlowBackpropFilterOp); REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropFilter") .Device(DEVICE_GPU) .TypeConstraint("T") diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 15c55e4d99..71ea0d5d72 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -592,6 +592,7 @@ class Conv2DCustomBackpropInputOp : public OpKernel { TF_CALL_half(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS // GPU definitions. @@ -1090,11 +1091,17 @@ namespace functor { typename TTypes::Tensor out, TensorFormat data_format); \ extern template struct PadInput; +DECLARE_GPU_SPEC(double); DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(Eigen::half); #undef DECLARE_GPU_SPEC } // namespace functor +REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("input_sizes"), + Conv2DSlowBackpropInputOp); REGISTER_KERNEL_BUILDER(Name("Conv2DBackpropInput") .Device(DEVICE_GPU) .TypeConstraint("T") diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index 47f6907c04..88843e4da7 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -446,10 +446,11 @@ class Conv2DOp : public BinaryOp { #if !defined(USE_GEMM_FOR_CONV) TF_CALL_half(REGISTER_CPU); TF_CALL_float(REGISTER_CPU); +TF_CALL_double(REGISTER_CPU); #endif // USE_GEMM_FOR_CONV // To be used inside depthwise_conv_op.cc. -template class LaunchConv2DOp; +template struct LaunchConv2DOp; #if GOOGLE_CUDA int64 GetCudnnWorkspaceLimit(const string& envvar_in_mb, @@ -810,6 +811,7 @@ namespace functor { typename TTypes::Tensor out, TensorFormat data_format); \ extern template struct PadInput +DECLARE_GPU_SPEC(double); DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(Eigen::half); #undef DECLARE_GPU_SPEC @@ -822,6 +824,9 @@ REGISTER_KERNEL_BUILDER( REGISTER_KERNEL_BUILDER( Name("Conv2D").Device(DEVICE_GPU).TypeConstraint("T"), Conv2DOp); +REGISTER_KERNEL_BUILDER( + Name("Conv2D").Device(DEVICE_GPU).TypeConstraint("T"), + Conv2DOp); // To be used inside depthwise_conv_op.cc. template class LaunchConv2DOp; diff --git a/tensorflow/core/kernels/conv_ops_gpu_2.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_2.cu.cc index b5dd26a9e4..52859af950 100644 --- a/tensorflow/core/kernels/conv_ops_gpu_2.cu.cc +++ b/tensorflow/core/kernels/conv_ops_gpu_2.cu.cc @@ -25,6 +25,9 @@ limitations under the License. namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; +template struct functor::InflatePadAndShuffle; +template struct functor::InflatePadAndShuffle; template struct functor::InflatePadAndShuffle; template struct functor::InflatePadAndShuffle; diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc index a376534bad..2503b475dc 100644 --- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc +++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc @@ -1039,9 +1039,11 @@ template struct functor::SwapDimension0And2InTensor3; // For 2d ops. +template struct functor::TransformFilter; template struct functor::TransformFilter; template struct functor::TransformFilter; +template struct functor::ReverseTransformFilter; template struct functor::ReverseTransformFilter; template struct functor::ReverseTransformFilter; @@ -1054,6 +1056,7 @@ template struct functor::NCHWToNHWC; template struct functor::NCHWToNHWC; template struct functor::PadInput; +template struct functor::PadInput; template struct functor::PadInput; template struct functor::PadInput; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index c060b2e14d..6dedb1a61e 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -241,7 +241,7 @@ struct LaunchDepthwiseConvOp { }; // Extern template instantiated in conv_ops.cc. -extern template class LaunchConv2DOp; +extern template struct LaunchConv2DOp; #if GOOGLE_CUDA @@ -251,7 +251,7 @@ extern template struct LaunchDepthwiseConvOp; extern template struct LaunchDepthwiseConvOp; // Extern template instantiated in conv_ops.cc. -extern template class LaunchConv2DOp; +extern template struct LaunchConv2DOp; #endif diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h index 1acbe3a658..a4dff4b91c 100644 --- a/tensorflow/core/kernels/eigen_spatial_convolutions.h +++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h @@ -797,6 +797,188 @@ struct gemm_pack_rhs< } }; +// Template specialization for packet_size = 2. We must special-case packet +// blocks with nr > packet_size, e.g. PacketBlock. +template +struct gemm_pack_rhs< + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, + Alignment>, + nr, ColMajor, false, false> { + typedef TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, 2, inner_dim_contiguous, inner_dim_reordered, + Alignment> + SubMapper; + typedef SubMapper DataMapper; + + EIGEN_DEVICE_FUNC + static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; } + + EIGEN_DEVICE_FUNC + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + typedef typename packet_traits::type Packet; + + const int packet_size = 2; + const Index packet_cols4 = (cols / 4) * 4; + const Index peeled_k = (depth / packet_size) * packet_size; + const bool non_standard_patches = rhs.nonStandardPatches(); + + for (Index j2 = 0; j2 < packet_cols4; j2 += 4) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + Index k = 0; + if (!non_standard_patches) { + const Index patch_depth = rhs.patchDepth(); + if ((patch_depth % packet_size) == 0) { + const Index patch_cols = rhs.patchCols(); + const Index patch_rows = rhs.patchRows(); + + const Index startCol = rhs.colOffset(); + const Index max_cols = std::min( + ceil_div(peeled_k, patch_rows * patch_depth) + startCol, + patch_cols); + + for (Index c = startCol; c < max_cols; ++c) { + eigen_assert(k < peeled_k); + const Index startRow = (c == startCol) ? rhs.rowOffset() : 0; + const Index max_rows = std::min( + ceil_div(peeled_k - c * patch_rows * patch_depth, patch_depth) + + startRow, + patch_rows); + + const bool pad_col0 = dm0.padCol(c); + const bool pad_col1 = dm1.padCol(c); + const bool pad_col2 = dm2.padCol(c); + const bool pad_col3 = dm3.padCol(c); + for (Index r = startRow; r < max_rows; ++r) { + eigen_assert(k < peeled_k); + const bool pad0 = pad_col0 || dm0.padRow(r); + const bool pad1 = pad_col1 || dm1.padRow(r); + const bool pad2 = pad_col2 || dm2.padRow(r); + const bool pad3 = pad_col3 || dm3.padRow(r); + + const Index idx0 = dm0.baseIndex(r, c); + const Index idx1 = dm1.baseIndex(r, c); + const Index idx2 = dm2.baseIndex(r, c); + const Index idx3 = dm3.baseIndex(r, c); + + const Index startDepth = + ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0; + const Index max_depth = + std::min(peeled_k - c * patch_rows * patch_depth - + r * patch_depth + startDepth, + patch_depth); + eigen_assert((max_depth - startDepth) % packet_size == 0); + for (Index d = startDepth; d < max_depth; d += packet_size) { + eigen_assert(k < peeled_k); + PacketBlock kernel0; + PacketBlock kernel1; + kernel0.packet[0] = pad0 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx0); + kernel0.packet[1] = pad1 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx1); + kernel1.packet[0] = pad2 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx2); + kernel1.packet[1] = pad3 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx3); + ptranspose(kernel0); + ptranspose(kernel1); + pstoreu(block + 0 * packet_size, kernel0.packet[0]); + pstoreu(block + 1 * packet_size, kernel1.packet[0]); + pstoreu(block + 2 * packet_size, kernel0.packet[1]); + pstoreu(block + 3 * packet_size, kernel1.packet[1]); + block += 4 * packet_size; + k += packet_size; + } + } + } + + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel0; + PacketBlock kernel1; + kernel0.packet[0] = dm0.loadPacketFast(k); + kernel0.packet[1] = dm1.loadPacketFast(k); + kernel1.packet[0] = dm2.loadPacketFast(k); + kernel1.packet[1] = dm3.loadPacketFast(k); + ptranspose(kernel0); + ptranspose(kernel1); + pstoreu(block + 0 * packet_size, kernel0.packet[0]); + pstoreu(block + 1 * packet_size, kernel1.packet[0]); + pstoreu(block + 2 * packet_size, kernel0.packet[1]); + pstoreu(block + 3 * packet_size, kernel1.packet[1]); + block += 4 * packet_size; + } + } else { + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel0; + PacketBlock kernel1; + kernel0.packet[0] = dm0.loadPacketStandard(k); + kernel0.packet[1] = dm1.loadPacketStandard(k); + kernel1.packet[0] = dm2.loadPacketStandard(k); + kernel1.packet[1] = dm3.loadPacketStandard(k); + ptranspose(kernel0); + ptranspose(kernel1); + pstoreu(block + 0 * packet_size, kernel0.packet[0]); + pstoreu(block + 1 * packet_size, kernel1.packet[0]); + pstoreu(block + 2 * packet_size, kernel0.packet[1]); + pstoreu(block + 3 * packet_size, kernel1.packet[1]); + block += 4 * packet_size; + } + } + } + if (!rhs.nonStandardPatches()) { + for (; k < depth; k++) { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } else { + for (; k < depth; k++) { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for (Index j2 = packet_cols4; j2 < cols; ++j2) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for (Index k = 0; k < depth; k++) { + *block = dm0(k); + block += 1; + } + } + } +}; + // Special case for non-vectorized types such as float16. template Date: Thu, 22 Mar 2018 14:14:33 -0700 Subject: [PATCH 375/960] Update tf.keras preprocessing to Keras 2.1.5 API PiperOrigin-RevId: 190123773 --- .../keras/_impl/keras/preprocessing/image.py | 211 +++++++++++----- .../_impl/keras/preprocessing/image_test.py | 151 +++++++++++- .../_impl/keras/preprocessing/sequence.py | 232 ++++++++++++++---- .../keras/preprocessing/sequence_test.py | 82 ++++++- .../keras/_impl/keras/preprocessing/text.py | 31 ++- .../_impl/keras/preprocessing/text_test.py | 42 +++- .../keras/preprocessing/image/__init__.py | 1 + .../keras/preprocessing/sequence/__init__.py | 1 + .../keras/preprocessing/text/__init__.py | 1 + ...processing.image.-directory-iterator.pbtxt | 2 +- ...ocessing.image.-image-data-generator.pbtxt | 6 +- ...ocessing.image.-numpy-array-iterator.pbtxt | 2 +- ...tensorflow.keras.preprocessing.image.pbtxt | 4 + ...ssing.sequence.-timeseries-generator.pbtxt | 14 ++ ...sorflow.keras.preprocessing.sequence.pbtxt | 4 + .../tensorflow.keras.preprocessing.text.pbtxt | 4 + 16 files changed, 665 insertions(+), 123 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.-timeseries-generator.pbtxt diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/image.py b/tensorflow/python/keras/_impl/keras/preprocessing/image.py index d12f108639..6299445c34 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/image.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/image.py @@ -43,6 +43,7 @@ except ImportError: try: + from PIL import ImageEnhance from PIL import Image as pil_image except ImportError: pil_image = None @@ -227,6 +228,32 @@ def random_channel_shift(x, intensity, channel_axis=0): return x +@tf_export('keras.preprocessing.image.random_brightness') +def random_brightness(x, brightness_range): + """Performs a random adjustment of brightness of a Numpy image tensor. + + Arguments: + x: Input tensor. Must be 3D. + brightness_range: Tuple of floats; range to pick a brightness value from. + + Returns: + Brightness adjusted Numpy image tensor. + + Raises: + ValueError: if `brightness_range` isn't a tuple. + """ + if len(brightness_range) != 2: + raise ValueError('`brightness_range should be tuple or list of two floats. ' + 'Received arg: ', brightness_range) + + x = array_to_img(x) + x = ImageEnhance.Brightness(x) + u = np.random.uniform(brightness_range[0], brightness_range[1]) + x = x.enhance(u) + x = img_to_array(x) + return x + + def transform_matrix_offset_center(matrix, x, y): o_x = float(x) / 2 + 0.5 o_y = float(y) / 2 + 0.5 @@ -265,7 +292,7 @@ def apply_transform(x, x_channel, final_affine_matrix, final_offset, - order=0, + order=1, mode=fill_mode, cval=cval) for x_channel in x ] @@ -436,6 +463,7 @@ class ImageDataGenerator(object): rotation_range: degrees (0 to 180). width_shift_range: fraction of total width, if < 1, or pixels if >= 1. height_shift_range: fraction of total height, if < 1, or pixels if >= 1. + brightness_range: the range of brightness to apply shear_range: shear intensity (shear angle in degrees). zoom_range: amount of zoom. if scalar z, zoom will be randomly picked in the range [1-z, 1+z]. A sequence of two can be passed instead @@ -469,6 +497,8 @@ class ImageDataGenerator(object): It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. If you never set it, then it will be "channels_last". + validation_split: fraction of images reserved for validation (strictly + between 0 and 1). """ def __init__(self, @@ -481,6 +511,7 @@ class ImageDataGenerator(object): rotation_range=0., width_shift_range=0., height_shift_range=0., + brightness_range=None, shear_range=0., zoom_range=0., channel_shift_range=0., @@ -490,7 +521,8 @@ class ImageDataGenerator(object): vertical_flip=False, rescale=None, preprocessing_function=None, - data_format=None): + data_format=None, + validation_split=0.0): if data_format is None: data_format = K.image_data_format() self.featurewise_center = featurewise_center @@ -502,6 +534,7 @@ class ImageDataGenerator(object): self.rotation_range = rotation_range self.width_shift_range = width_shift_range self.height_shift_range = height_shift_range + self.brightness_range = brightness_range self.shear_range = shear_range self.zoom_range = zoom_range self.channel_shift_range = channel_shift_range @@ -526,6 +559,10 @@ class ImageDataGenerator(object): self.channel_axis = 3 self.row_axis = 1 self.col_axis = 2 + if validation_split and not 0 < validation_split < 1: + raise ValueError('`validation_split` must be strictly between 0 and 1. ' + 'Received arg: ', validation_split) + self.validation_split = validation_split self.mean = None self.std = None @@ -574,7 +611,8 @@ class ImageDataGenerator(object): seed=None, save_to_dir=None, save_prefix='', - save_format='png'): + save_format='png', + subset=None): return NumpyArrayIterator( x, y, @@ -585,7 +623,8 @@ class ImageDataGenerator(object): data_format=self.data_format, save_to_dir=save_to_dir, save_prefix=save_prefix, - save_format=save_format) + save_format=save_format, + subset=subset) def flow_from_directory(self, directory, @@ -600,6 +639,7 @@ class ImageDataGenerator(object): save_prefix='', save_format='png', follow_links=False, + subset=None, interpolation='nearest'): return DirectoryIterator( directory, @@ -616,6 +656,7 @@ class ImageDataGenerator(object): save_prefix=save_prefix, save_format=save_format, follow_links=follow_links, + subset=subset, interpolation=interpolation) def standardize(self, x): @@ -628,7 +669,7 @@ class ImageDataGenerator(object): The inputs, normalized. """ if self.preprocessing_function: - x = self.preprocessing_function(x) + x = self.image_data_generator.preprocessing_function(x) if self.rescale: x *= self.rescale if self.samplewise_center: @@ -762,6 +803,9 @@ class ImageDataGenerator(object): if np.random.random() < 0.5: x = flip_axis(x, img_row_axis) + if self.brightness_range is not None: + x = random_brightness(x, self.brightness_range) + return x def fit(self, x, augment=False, rounds=1, seed=None): @@ -828,12 +872,10 @@ class ImageDataGenerator(object): raise ImportError('Scipy is required for zca_whitening.') flat_x = np.reshape(x, (x.shape[0], x.shape[1] * x.shape[2] * x.shape[3])) - num_examples = flat_x.shape[0] - _, s, vt = linalg.svd(flat_x / np.sqrt(num_examples)) - s_expand = np.hstack( - (s, np.zeros(vt.shape[0] - num_examples, dtype=flat_x.dtype))) - self.principal_components = ( - vt.T / np.sqrt(s_expand**2 + self.zca_epsilon)).dot(vt) + sigma = np.dot(flat_x.T, flat_x) / flat_x.shape[0] + u, s, _ = linalg.svd(sigma) + s_inv = 1. / np.sqrt(s[np.newaxis] + self.zca_epsilon) + self.principal_components = (u * s_inv).dot(u.T) @tf_export('keras.preprocessing.image.Iterator') @@ -947,6 +989,8 @@ class NumpyArrayIterator(Iterator): images (if `save_to_dir` is set). save_format: Format to use for saving sample images (if `save_to_dir` is set). + subset: Subset of data (`"training"` or `"validation"`) if + validation_split is set in ImageDataGenerator. """ def __init__(self, @@ -959,17 +1003,29 @@ class NumpyArrayIterator(Iterator): data_format=None, save_to_dir=None, save_prefix='', - save_format='png'): + save_format='png', + subset=None): if y is not None and len(x) != len(y): - raise ValueError('X (images tensor) and y (labels) ' + raise ValueError('`x` (images tensor) and `y` (labels) ' 'should have the same length. ' - 'Found: X.shape = %s, y.shape = %s' % + 'Found: x.shape = %s, y.shape = %s' % (np.asarray(x).shape, np.asarray(y).shape)) - + if subset is not None: + if subset not in {'training', 'validation'}: + raise ValueError('Invalid subset name:', subset, + '; expected "training" or "validation".') + split_idx = int(len(x) * image_data_generator.validation_split) + if subset == 'validation': + x = x[:split_idx] + if y is not None: + y = y[:split_idx] + else: + x = x[split_idx:] + if y is not None: + y = y[split_idx:] if data_format is None: data_format = K.image_data_format() self.x = np.asarray(x, dtype=K.floatx()) - if self.x.ndim != 4: raise ValueError('Input data in `NumpyArrayIterator` ' 'should have rank 4. You passed an array ' @@ -1032,8 +1088,7 @@ class NumpyArrayIterator(Iterator): return self._get_batches_of_transformed_samples(index_array) -def _count_valid_files_in_directory(directory, white_list_formats, - follow_links): +def _iter_valid_files(directory, white_list_formats, follow_links): """Count files with extension in `white_list_formats` contained in directory. Arguments: @@ -1043,29 +1098,54 @@ def _count_valid_files_in_directory(directory, white_list_formats, the files to be counted. follow_links: boolean. - Returns: - the count of files with extension in `white_list_formats` contained in - the directory. + Yields: + tuple of (root, filename) with extension in `white_list_formats`. """ def _recursive_list(subpath): return sorted( - os.walk(subpath, followlinks=follow_links), key=lambda tpl: tpl[0]) + os.walk(subpath, followlinks=follow_links), key=lambda x: x[0]) - samples = 0 - for _, _, files in _recursive_list(directory): - for fname in files: - is_valid = False + for root, _, files in _recursive_list(directory): + for fname in sorted(files): for extension in white_list_formats: + if fname.lower().endswith('.tiff'): + logging.warning( + 'Using \'.tiff\' files with multiple bands will cause ' + 'distortion. Please verify your output.') if fname.lower().endswith('.' + extension): - is_valid = True - break - if is_valid: - samples += 1 - return samples + yield root, fname -def _list_valid_filenames_in_directory(directory, white_list_formats, +def _count_valid_files_in_directory(directory, white_list_formats, split, + follow_links): + """Count files with extension in `white_list_formats` contained in directory. + + Arguments: + directory: absolute path to the directory + containing files to be counted + white_list_formats: set of strings containing allowed extensions for + the files to be counted. + split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into + account a certain fraction of files in each directory. + E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent + of images in each directory. + follow_links: boolean. + + Returns: + the count of files with extension in `white_list_formats` contained in + the directory. + """ + num_files = len( + list(_iter_valid_files(directory, white_list_formats, follow_links))) + if split: + start, stop = int(split[0] * num_files), int(split[1] * num_files) + else: + start, stop = 0, num_files + return stop - start + + +def _list_valid_filenames_in_directory(directory, white_list_formats, split, class_indices, follow_links): """List paths of files in `subdir` with extensions in `white_list_formats`. @@ -1075,6 +1155,10 @@ def _list_valid_filenames_in_directory(directory, white_list_formats, `class_indices`. white_list_formats: set of strings containing allowed extensions for the files to be counted. + split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into + account a certain fraction of files in each directory. + E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent + of images in each directory. class_indices: dictionary mapping a class name to its index. follow_links: boolean. @@ -1084,27 +1168,26 @@ def _list_valid_filenames_in_directory(directory, white_list_formats, `directory`'s parent (e.g., if `directory` is "dataset/class1", the filenames will be ["class1/file1.jpg", "class1/file2.jpg", ...]). """ - - def _recursive_list(subpath): - return sorted( - os.walk(subpath, followlinks=follow_links), key=lambda tpl: tpl[0]) + dirname = os.path.basename(directory) + if split: + num_files = len( + list(_iter_valid_files(directory, white_list_formats, follow_links))) + start, stop = int(split[0] * num_files), int(split[1] * num_files) + valid_files = list( + _iter_valid_files(directory, white_list_formats, + follow_links))[start:stop] + else: + valid_files = _iter_valid_files(directory, white_list_formats, follow_links) classes = [] filenames = [] - subdir = os.path.basename(directory) - basedir = os.path.dirname(directory) - for root, _, files in _recursive_list(directory): - for fname in sorted(files): - is_valid = False - for extension in white_list_formats: - if fname.lower().endswith('.' + extension): - is_valid = True - break - if is_valid: - classes.append(class_indices[subdir]) - # add filename relative to directory - absolute_path = os.path.join(root, fname) - filenames.append(os.path.relpath(absolute_path, basedir)) + for root, fname in valid_files: + classes.append(class_indices[dirname]) + absolute_path = os.path.join(root, fname) + relative_path = os.path.join(dirname, + os.path.relpath(absolute_path, directory)) + filenames.append(relative_path) + return classes, filenames @@ -1144,6 +1227,8 @@ class DirectoryIterator(Iterator): images (if `save_to_dir` is set). save_format: Format to use for saving sample images (if `save_to_dir` is set). + subset: Subset of data (`"training"` or `"validation"`) if + validation_split is set in ImageDataGenerator. interpolation: Interpolation method used to resample the image if the target size is different from that of the loaded image. Supported methods are "nearest", "bilinear", and "bicubic". @@ -1167,6 +1252,7 @@ class DirectoryIterator(Iterator): save_prefix='', save_format='png', follow_links=False, + subset=None, interpolation='nearest'): if data_format is None: data_format = K.image_data_format() @@ -1200,7 +1286,20 @@ class DirectoryIterator(Iterator): self.save_format = save_format self.interpolation = interpolation - white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm'} + if subset is not None: + validation_split = self.image_data_generator.validation_split + if subset == 'validation': + split = (0, validation_split) + elif subset == 'training': + split = (validation_split, 1) + else: + raise ValueError('Invalid subset name: ', subset, + '; expected "training" or "validation"') + else: + split = None + self.subset = subset + + white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm', 'tif', 'tiff'} # first, count the number of samples and classes self.samples = 0 @@ -1217,7 +1316,8 @@ class DirectoryIterator(Iterator): function_partial = partial( _count_valid_files_in_directory, white_list_formats=white_list_formats, - follow_links=follow_links) + follow_links=follow_links, + split=split) self.samples = sum( pool.map(function_partial, (os.path.join(directory, subdir) for subdir in classes))) @@ -1233,14 +1333,15 @@ class DirectoryIterator(Iterator): i = 0 for dirpath in (os.path.join(directory, subdir) for subdir in classes): results.append( - pool.apply_async( - _list_valid_filenames_in_directory, - (dirpath, white_list_formats, self.class_indices, follow_links))) + pool.apply_async(_list_valid_filenames_in_directory, + (dirpath, white_list_formats, split, + self.class_indices, follow_links))) for res in results: classes, filenames = res.get() self.classes[i:i + len(classes)] = classes self.filenames += filenames i += len(classes) + pool.close() pool.join() super(DirectoryIterator, self).__init__(self.samples, batch_size, shuffle, diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py b/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py index c0790b5a51..001fee91f9 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/image_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import os import shutil +import tempfile import numpy as np @@ -74,6 +75,7 @@ class TestImage(test.TestCase): shear_range=0.5, zoom_range=0.2, channel_shift_range=0., + brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, @@ -92,6 +94,47 @@ class TestImage(test.TestCase): self.assertEqual(x.shape[1:], images.shape[1:]) break + def test_image_data_generator_with_validation_split(self): + if PIL is None: + return # Skip test if PIL is not available. + + for test_images in _generate_test_images(): + img_list = [] + for im in test_images: + img_list.append(keras.preprocessing.image.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + generator = keras.preprocessing.image.ImageDataGenerator( + validation_split=0.5) + seq = generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + batch_size=3, + subset='validation') + _, y = seq[0] + self.assertEqual(list(y), [0, 1, 2]) + seq = generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + batch_size=3, + subset='training') + _, y2 = seq[0] + self.assertEqual(list(y2), [4, 5, 6]) + + with self.assertRaises(ValueError): + generator.flow( + images, + np.arange(images.shape[0]), + shuffle=False, + batch_size=3, + subset='foo') + + def test_image_data_generator_with_split_value_error(self): + with self.assertRaises(ValueError): + keras.preprocessing.image.ImageDataGenerator(validation_split=5) + def test_image_data_generator_invalid_data(self): generator = keras.preprocessing.image.ImageDataGenerator( featurewise_center=True, @@ -202,9 +245,80 @@ class TestImage(test.TestCase): # check number of classes and images self.assertEqual(len(dir_iterator.class_indices), num_classes) self.assertEqual(len(dir_iterator.classes), count) - self.assertEqual(sorted(dir_iterator.filenames), sorted(filenames)) + self.assertEqual(set(dir_iterator.filenames), set(filenames)) _ = dir_iterator.next() + def directory_iterator_with_validation_split_test_helper( + self, validation_split): + if PIL is None: + return # Skip test if PIL is not available. + + num_classes = 2 + tmp_folder = tempfile.mkdtemp(prefix='test_images') + + # create folders and subfolders + paths = [] + for cl in range(num_classes): + class_directory = 'class-{}'.format(cl) + classpaths = [ + class_directory, + os.path.join(class_directory, 'subfolder-1'), + os.path.join(class_directory, 'subfolder-2'), + os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') + ] + for path in classpaths: + os.mkdir(os.path.join(tmp_folder, path)) + paths.append(classpaths) + + # save the images in the paths + count = 0 + filenames = [] + for test_images in _generate_test_images(): + for im in test_images: + # rotate image class + im_class = count % num_classes + # rotate subfolders + classpaths = paths[im_class] + filename = os.path.join(classpaths[count % len(classpaths)], + 'image-{}.jpg'.format(count)) + filenames.append(filename) + im.save(os.path.join(tmp_folder, filename)) + count += 1 + + # create iterator + generator = keras.preprocessing.image.ImageDataGenerator( + validation_split=validation_split) + + with self.assertRaises(ValueError): + generator.flow_from_directory(tmp_folder, subset='foo') + + num_validation = int(count * validation_split) + num_training = count - num_validation + train_iterator = generator.flow_from_directory( + tmp_folder, subset='training') + self.assertEqual(train_iterator.samples, num_training) + + valid_iterator = generator.flow_from_directory( + tmp_folder, subset='validation') + self.assertEqual(valid_iterator.samples, num_validation) + + # check number of classes and images + self.assertEqual(len(train_iterator.class_indices), num_classes) + self.assertEqual(len(train_iterator.classes), num_training) + self.assertEqual( + len(set(train_iterator.filenames) & set(filenames)), num_training) + + shutil.rmtree(tmp_folder) + + def test_directory_iterator_with_validation_split_25_percent(self): + self.directory_iterator_with_validation_split_test_helper(0.25) + + def test_directory_iterator_with_validation_split_40_percent(self): + self.directory_iterator_with_validation_split_test_helper(0.40) + + def test_directory_iterator_with_validation_split_50_percent(self): + self.directory_iterator_with_validation_split_test_helper(0.50) + def test_img_utils(self): if PIL is None: return # Skip test if PIL is not available. @@ -241,6 +355,41 @@ class TestImage(test.TestCase): x = keras.preprocessing.image.img_to_array(img, data_format='channels_last') self.assertEqual(x.shape, (height, width, 1)) + def test_batch_standardize(self): + if PIL is None: + return # Skip test if PIL is not available. + + # ImageDataGenerator.standardize should work on batches + for test_images in _generate_test_images(): + img_list = [] + for im in test_images: + img_list.append(keras.preprocessing.image.img_to_array(im)[None, ...]) + + images = np.vstack(img_list) + generator = keras.preprocessing.image.ImageDataGenerator( + featurewise_center=True, + samplewise_center=True, + featurewise_std_normalization=True, + samplewise_std_normalization=True, + zca_whitening=True, + rotation_range=90., + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.5, + zoom_range=0.2, + channel_shift_range=0., + brightness_range=(1, 5), + fill_mode='nearest', + cval=0.5, + horizontal_flip=True, + vertical_flip=True) + generator.fit(images, augment=True) + + transformed = np.copy(images) + for i, im in enumerate(transformed): + transformed[i] = generator.random_transform(im) + transformed = generator.standardize(transformed) + def test_img_transforms(self): x = np.random.random((3, 200, 200)) _ = keras.preprocessing.image.random_rotation(x, 20) diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py b/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py index a423d96d3d..e68c171d9c 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/sequence.py @@ -22,6 +22,8 @@ import random import numpy as np from six.moves import range # pylint: disable=redefined-builtin + +from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence from tensorflow.python.util.tf_export import tf_export @@ -32,29 +34,40 @@ def pad_sequences(sequences, padding='pre', truncating='pre', value=0.): - """Pads each sequence to the same length (length of the longest sequence). + """Pads sequences to the same length. + + This function transforms a list of + `num_samples` sequences (lists of integers) + into a 2D Numpy array of shape `(num_samples, num_timesteps)`. + `num_timesteps` is either the `maxlen` argument if provided, + or the length of the longest sequence otherwise. + + Sequences that are shorter than `num_timesteps` + are padded with `value` at the end. - If maxlen is provided, any sequence longer - than maxlen is truncated to maxlen. - Truncation happens off either the beginning (default) or - the end of the sequence. + Sequences longer than `num_timesteps` are truncated + so that they fit the desired length. + The position where padding or truncation happens is determined by + the arguments `padding` and `truncating`, respectively. - Supports post-padding and pre-padding (default). + Pre-padding is the default. Arguments: - sequences: list of lists where each element is a sequence - maxlen: int, maximum length - dtype: type to cast the resulting sequence. - padding: 'pre' or 'post', pad either before or after each sequence. - truncating: 'pre' or 'post', remove values from sequences larger than - maxlen either in the beginning or in the end of the sequence - value: float, value to pad the sequences to the desired value. + sequences: List of lists, where each element is a sequence. + maxlen: Int, maximum length of all sequences. + dtype: Type of the output sequences. + padding: String, 'pre' or 'post': + pad either before or after each sequence. + truncating: String, 'pre' or 'post': + remove values from sequences larger than + `maxlen`, either at the beginning or at the end of the sequences. + value: Float, padding value. Returns: - x: numpy array with dimensions (number_of_sequences, maxlen) + x: Numpy array with shape `(len(sequences), maxlen)` Raises: - ValueError: in case of invalid values for `truncating` or `padding`, + ValueError: In case of invalid values for `truncating` or `padding`, or in case of invalid shape for a `sequences` entry. """ if not hasattr(sequences, '__len__'): @@ -92,10 +105,9 @@ def pad_sequences(sequences, # check `trunc` has expected shape trunc = np.asarray(trunc, dtype=dtype) if trunc.shape[1:] != sample_shape: - raise ValueError( - 'Shape of sample %s of sequence at position %s is different from ' - 'expected shape %s' - % (trunc.shape[1:], idx, sample_shape)) + raise ValueError('Shape of sample %s of sequence at position %s ' + 'is different from expected shape %s' % + (trunc.shape[1:], idx, sample_shape)) if padding == 'post': x[idx, :len(trunc)] = trunc @@ -110,22 +122,26 @@ def pad_sequences(sequences, def make_sampling_table(size, sampling_factor=1e-5): """Generates a word rank-based probabilistic sampling table. - This generates an array where the ith element - is the probability that a word of rank i would be sampled, - according to the sampling distribution used in word2vec. + Used for generating the `sampling_table` argument for `skipgrams`. + `sampling_table[i]` is the probability of sampling + the word i-th most common word in a dataset + (more common words should be sampled less frequently, for balance). - The word2vec formula is: - p(word) = min(1, sqrt(word.frequency/sampling_factor) / - (word.frequency/sampling_factor)) + The sampling probabilities are generated according + to the sampling distribution used in word2vec: + + `p(word) = min(1, sqrt(word_frequency / sampling_factor) / (word_frequency / + sampling_factor))` We assume that the word frequencies follow Zipf's law (s=1) to derive a numerical approximation of frequency(rank): - frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank)) - where gamma is the Euler-Mascheroni constant. + + `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))` + where `gamma` is the Euler-Mascheroni constant. Arguments: - size: int, number of possible words to sample. - sampling_factor: the sampling factor in the word2vec formula. + size: Int, number of possible words to sample. + sampling_factor: The sampling factor in the word2vec formula. Returns: A 1D Numpy array of length `size` where the ith entry @@ -151,30 +167,37 @@ def skipgrams(sequence, seed=None): """Generates skipgram word pairs. - Takes a sequence (list of indexes of words), - returns couples of [word_index, other_word index] and labels (1s or 0s), - where label = 1 if 'other_word' belongs to the context of 'word', - and label=0 if 'other_word' is randomly sampled + This function transforms a sequence of word indexes (list of integers) + into tuples of words of the form: + + - (word, word in the same window), with label 1 (positive samples). + - (word, random word from the vocabulary), with label 0 (negative samples). + + Read more about Skipgram in this gnomic paper by Mikolov et al.: + [Efficient Estimation of Word Representations in + Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) Arguments: - sequence: a word sequence (sentence), encoded as a list + sequence: A word sequence (sentence), encoded as a list of word indices (integers). If using a `sampling_table`, word indices are expected to match the rank of the words in a reference dataset (e.g. 10 would encode the 10-th most frequently occurring token). Note that index 0 is expected to be a non-word and will be skipped. - vocabulary_size: int. maximum possible word index + 1 - window_size: int. actually half-window. - The window of a word wi will be [i-window_size, i+window_size+1] - negative_samples: float >= 0. 0 for no negative (=random) samples. - 1 for same number as positive samples. etc. - shuffle: whether to shuffle the word couples before returning them. + vocabulary_size: Int, maximum possible word index + 1 + window_size: Int, size of sampling windows (technically half-window). + The window of a word `w_i` will be + `[i - window_size, i + window_size+1]`. + negative_samples: Float >= 0. 0 for no negative (i.e. random) samples. + 1 for same number as positive samples. + shuffle: Whether to shuffle the word couples before returning them. categorical: bool. if False, labels will be - integers (eg. [0, 1, 1 .. ]), - if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ] + integers (eg. `[0, 1, 1 .. ]`), + if `True`, labels will be categorical, e.g. + `[[1,0],[0,1],[0,1] .. ]`. sampling_table: 1D array of size `vocabulary_size` where the entry i encodes the probability to sample a word of rank i. - seed: random seed. + seed: Random seed. Returns: couples, labels: where `couples` are int pairs and @@ -234,9 +257,9 @@ def _remove_long_seq(maxlen, seq, label): """Removes sequences that exceed the maximum length. Arguments: - maxlen: int, maximum length - seq: list of lists where each sublist is a sequence - label: list where each element is an integer + maxlen: Int, maximum length of the output sequences. + seq: List of lists, where each sublist is a sequence. + label: List where each element is an integer. Returns: new_seq, new_label: shortened lists for `seq` and `label`. @@ -247,3 +270,120 @@ def _remove_long_seq(maxlen, seq, label): new_seq.append(x) new_label.append(y) return new_seq, new_label + + +@tf_export('keras.preprocessing.sequence.TimeseriesGenerator') +class TimeseriesGenerator(Sequence): + """Utility class for generating batches of temporal data. + + This class takes in a sequence of data-points gathered at + equal intervals, along with time series parameters such as + stride, length of history, etc., to produce batches for + training/validation. + + Arguments: + data: Indexable generator (such as list or Numpy array) + containing consecutive data points (timesteps). + The data should be at 2D, and axis 0 is expected + to be the time dimension. + targets: Targets corresponding to timesteps in `data`. + It should have same length as `data`. + length: Length of the output sequences (in number of timesteps). + sampling_rate: Period between successive individual timesteps + within sequences. For rate `r`, timesteps + `data[i]`, `data[i-r]`, ... `data[i - length]` + are used for create a sample sequence. + stride: Period between successive output sequences. + For stride `s`, consecutive output samples would + be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. + start_index, end_index: Data points earlier than `start_index` + or later than `end_index` will not be used in the output sequences. + This is useful to reserve part of the data for test or validation. + shuffle: Whether to shuffle output samples, + or instead draw them in chronological order. + reverse: Boolean: if `true`, timesteps in each output sample will be + in reverse chronological order. + batch_size: Number of timeseries samples in each batch + (except maybe the last one). + + Returns: + A [Sequence](/utils/#sequence) instance. + + Examples: + + ```python + from keras.preprocessing.sequence import TimeseriesGenerator + import numpy as np + + data = np.array([[i] for i in range(50)]) + targets = np.array([[i] for i in range(50)]) + + data_gen = TimeseriesGenerator(data, targets, + length=10, sampling_rate=2, + batch_size=2) + assert len(data_gen) == 20 + + batch_0 = data_gen[0] + x, y = batch_0 + assert np.array_equal(x, + np.array([[[0], [2], [4], [6], [8]], + [[1], [3], [5], [7], [9]]])) + assert np.array_equal(y, + np.array([[10], [11]])) + ``` + """ + + def __init__(self, + data, + targets, + length, + sampling_rate=1, + stride=1, + start_index=0, + end_index=None, + shuffle=False, + reverse=False, + batch_size=128): + self.data = data + self.targets = targets + self.length = length + self.sampling_rate = sampling_rate + self.stride = stride + self.start_index = start_index + length + if end_index is None: + end_index = len(data) - 1 + self.end_index = end_index + self.shuffle = shuffle + self.reverse = reverse + self.batch_size = batch_size + + def __len__(self): + length = int( + np.ceil((self.end_index - self.start_index) / + (self.batch_size * self.stride))) + return length if length >= 0 else 0 + + def _empty_batch(self, num_rows): + samples_shape = [num_rows, self.length // self.sampling_rate] + samples_shape.extend(self.data.shape[1:]) + targets_shape = [num_rows] + targets_shape.extend(self.targets.shape[1:]) + return np.empty(samples_shape), np.empty(targets_shape) + + def __getitem__(self, index): + if self.shuffle: + rows = np.random.randint( + self.start_index, self.end_index, size=self.batch_size) + else: + i = self.start_index + self.batch_size * self.stride * index + rows = np.arange(i, min(i + self.batch_size * self.stride, + self.end_index), self.stride) + + samples, targets = self._empty_batch(len(rows)) + for j in range(len(rows)): + indices = range(rows[j] - self.length, rows[j], self.sampling_rate) + samples[j] = self.data[indices] + targets[j] = self.targets[rows[j]] + if self.reverse: + return samples[:, ::-1, ...], targets + return samples, targets diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/sequence_test.py b/tensorflow/python/keras/_impl/keras/preprocessing/sequence_test.py index 4529e6e94f..b9bfdd0004 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/sequence_test.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/sequence_test.py @@ -84,15 +84,91 @@ class TestSequence(test.TestCase): couples, labels = keras.preprocessing.sequence.skipgrams( np.arange(3), vocabulary_size=3) for couple in couples: - assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] + self.assertIn(couple[0], [0, 1, 2]) + self.assertIn(couple[1], [0, 1, 2]) # test window size and categorical labels couples, labels = keras.preprocessing.sequence.skipgrams( np.arange(5), vocabulary_size=5, window_size=1, categorical=True) for couple in couples: - assert couple[0] - couple[1] <= 3 + self.assertLessEqual(couple[0] - couple[1], 3) for l in labels: - assert len(l) == 2 + self.assertEqual(len(l), 2) + + def test_TimeseriesGenerator(self): + data = np.array([[i] for i in range(50)]) + targets = np.array([[i] for i in range(50)]) + + data_gen = keras.preprocessing.sequence.TimeseriesGenerator( + data, targets, length=10, sampling_rate=2, batch_size=2) + self.assertEqual(len(data_gen), 20) + self.assertAllClose(data_gen[0][0], + np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], + [7], [9]]])) + self.assertAllClose(data_gen[0][1], np.array([[10], [11]])) + self.assertAllClose(data_gen[1][0], + np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], + [9], [11]]])) + self.assertAllClose(data_gen[1][1], np.array([[12], [13]])) + + data_gen = keras.preprocessing.sequence.TimeseriesGenerator( + data, targets, length=10, sampling_rate=2, reverse=True, batch_size=2) + self.assertEqual(len(data_gen), 20) + self.assertAllClose(data_gen[0][0], + np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], + [3], [1]]])) + self.assertAllClose(data_gen[0][1], np.array([[10], [11]])) + + data_gen = keras.preprocessing.sequence.TimeseriesGenerator( + data, targets, length=10, sampling_rate=2, shuffle=True, batch_size=1) + batch = data_gen[0] + r = batch[1][0][0] + self.assertAllClose(batch[0], + np.array([[[r - 10], [r - 8], [r - 6], [r - 4], + [r - 2]]])) + self.assertAllClose(batch[1], np.array([ + [r], + ])) + + data_gen = keras.preprocessing.sequence.TimeseriesGenerator( + data, targets, length=10, sampling_rate=2, stride=2, batch_size=2) + self.assertEqual(len(data_gen), 10) + self.assertAllClose(data_gen[1][0], + np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], + [12], [14]]])) + self.assertAllClose(data_gen[1][1], np.array([[14], [16]])) + + data_gen = keras.preprocessing.sequence.TimeseriesGenerator( + data, + targets, + length=10, + sampling_rate=2, + start_index=10, + end_index=30, + batch_size=2) + self.assertEqual(len(data_gen), 5) + self.assertAllClose(data_gen[0][0], + np.array([[[10], [12], [14], [16], [18]], + [[11], [13], [15], [17], [19]]])) + self.assertAllClose(data_gen[0][1], np.array([[20], [21]])) + + data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)]) + targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)]) + data_gen = keras.preprocessing.sequence.TimeseriesGenerator( + data, + targets, + length=10, + sampling_rate=2, + start_index=10, + end_index=30, + batch_size=2) + + self.assertEqual(len(data_gen), 5) + self.assertAllClose(data_gen[0][0], + np.array( + [np.array(data[10:19:2]), + np.array(data[11:20:2])])) + self.assertAllClose(data_gen[0][1], np.array([targets[20], targets[21]])) if __name__ == '__main__': diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/text.py b/tensorflow/python/keras/_impl/keras/preprocessing/text.py index 1e3828ccf1..f652f318f3 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/text.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/text.py @@ -91,6 +91,7 @@ def one_hot(text, text, n, hash_function=hash, filters=filters, lower=lower, split=split) +@tf_export('keras.preprocessing.text.hashing_trick') def hashing_trick(text, n, hash_function=None, @@ -187,21 +188,27 @@ class Tokenizer(object): self.document_count = 0 self.char_level = char_level self.oov_token = oov_token + self.index_docs = {} def fit_on_texts(self, texts): """Updates internal vocabulary based on a list of texts. + In the case where texts contains lists, we assume each entry of the lists + to be a token. + Required before using `texts_to_sequences` or `texts_to_matrix`. Arguments: texts: can be a list of strings, - or a generator of strings (for memory-efficiency) + a generator of strings (for memory-efficiency), + or a list of list of strings. """ - self.document_count = 0 for text in texts: self.document_count += 1 - seq = text if self.char_level else text_to_word_sequence( - text, self.filters, self.lower, self.split) + if self.char_level or isinstance(text, list): + seq = text + else: + seq = text_to_word_sequence(text, self.filters, self.lower, self.split) for w in seq: if w in self.word_counts: self.word_counts[w] += 1 @@ -226,7 +233,6 @@ class Tokenizer(object): if i is None: self.word_index[self.oov_token] = len(self.word_index) + 1 - self.index_docs = {} for w, c in list(self.word_docs.items()): self.index_docs[self.word_index[w]] = c @@ -240,8 +246,7 @@ class Tokenizer(object): sequences: A list of sequence. A "sequence" is a list of integer word indices. """ - self.document_count = len(sequences) - self.index_docs = {} + self.document_count += len(sequences) for seq in sequences: seq = set(seq) for i in seq: @@ -268,7 +273,11 @@ class Tokenizer(object): return res def texts_to_sequences_generator(self, texts): - """Transforms each text in texts in a sequence of integers. + """Transforms each text in `texts` in a sequence of integers. + + Each item in texts can also be a list, in which case we assume each item of + that list + to be a token. Only top "num_words" most frequent words will be taken into account. Only words known by the tokenizer will be taken into account. @@ -281,8 +290,10 @@ class Tokenizer(object): """ num_words = self.num_words for text in texts: - seq = text if self.char_level else text_to_word_sequence( - text, self.filters, self.lower, self.split) + if self.char_level or isinstance(text, list): + seq = text + else: + seq = text_to_word_sequence(text, self.filters, self.lower, self.split) vect = [] for w in seq: i = self.word_index.get(w) diff --git a/tensorflow/python/keras/_impl/keras/preprocessing/text_test.py b/tensorflow/python/keras/_impl/keras/preprocessing/text_test.py index a934e331c4..c6a267e57e 100644 --- a/tensorflow/python/keras/_impl/keras/preprocessing/text_test.py +++ b/tensorflow/python/keras/_impl/keras/preprocessing/text_test.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -80,17 +81,52 @@ class TestText(test.TestCase): x_train = ['This text has only known words'] x_test = ['This text has some unknown words'] # 2 OOVs: some, unknown - # Defalut, without OOV flag + # Default, without OOV flag tokenizer = keras.preprocessing.text.Tokenizer() tokenizer.fit_on_texts(x_train) x_test_seq = tokenizer.texts_to_sequences(x_test) - assert len(x_test_seq[0]) == 4 # discards 2 OOVs + self.assertEqual(len(x_test_seq[0]), 4) # discards 2 OOVs # With OOV feature tokenizer = keras.preprocessing.text.Tokenizer(oov_token='') tokenizer.fit_on_texts(x_train) x_test_seq = tokenizer.texts_to_sequences(x_test) - assert len(x_test_seq[0]) == 6 # OOVs marked in place + self.assertEqual(len(x_test_seq[0]), 6) # OOVs marked in place + + def test_sequential_fit(self): + texts = [ + 'The cat sat on the mat.', 'The dog sat on the log.', + 'Dogs and cats living together.' + ] + word_sequences = [['The', 'cat', 'is', 'sitting'], + ['The', 'dog', 'is', 'standing']] + tokenizer = keras.preprocessing.text.Tokenizer() + tokenizer.fit_on_texts(texts) + tokenizer.fit_on_texts(word_sequences) + + self.assertEqual(tokenizer.document_count, 5) + + tokenizer.texts_to_matrix(texts) + tokenizer.texts_to_matrix(word_sequences) + + def test_text_to_word_sequence(self): + text = 'hello! ? world!' + seq = keras.preprocessing.text.text_to_word_sequence(text) + self.assertEqual(seq, ['hello', 'world']) + + def test_text_to_word_sequence_unicode(self): + text = u'ali! veli? kırk dokuz elli' + seq = keras.preprocessing.text.text_to_word_sequence(text) + self.assertEqual(seq, [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) + + def test_tokenizer_unicode(self): + texts = [ + u'ali veli kırk dokuz elli', u'ali veli kırk dokuz elli veli kırk dokuz' + ] + tokenizer = keras.preprocessing.text.Tokenizer(num_words=5) + tokenizer.fit_on_texts(texts) + + self.assertEqual(len(tokenizer.word_counts), 5) if __name__ == '__main__': diff --git a/tensorflow/python/keras/preprocessing/image/__init__.py b/tensorflow/python/keras/preprocessing/image/__init__.py index b96e767552..6aba5fc825 100644 --- a/tensorflow/python/keras/preprocessing/image/__init__.py +++ b/tensorflow/python/keras/preprocessing/image/__init__.py @@ -27,6 +27,7 @@ from tensorflow.python.keras._impl.keras.preprocessing.image import img_to_array from tensorflow.python.keras._impl.keras.preprocessing.image import Iterator from tensorflow.python.keras._impl.keras.preprocessing.image import load_img from tensorflow.python.keras._impl.keras.preprocessing.image import NumpyArrayIterator +from tensorflow.python.keras._impl.keras.preprocessing.image import random_brightness from tensorflow.python.keras._impl.keras.preprocessing.image import random_channel_shift from tensorflow.python.keras._impl.keras.preprocessing.image import random_rotation from tensorflow.python.keras._impl.keras.preprocessing.image import random_shear diff --git a/tensorflow/python/keras/preprocessing/sequence/__init__.py b/tensorflow/python/keras/preprocessing/sequence/__init__.py index 112f6af5e5..b7a7149cc4 100644 --- a/tensorflow/python/keras/preprocessing/sequence/__init__.py +++ b/tensorflow/python/keras/preprocessing/sequence/__init__.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.python.keras._impl.keras.preprocessing.sequence import make_sampling_table from tensorflow.python.keras._impl.keras.preprocessing.sequence import pad_sequences from tensorflow.python.keras._impl.keras.preprocessing.sequence import skipgrams +from tensorflow.python.keras._impl.keras.preprocessing.sequence import TimeseriesGenerator del absolute_import del division diff --git a/tensorflow/python/keras/preprocessing/text/__init__.py b/tensorflow/python/keras/preprocessing/text/__init__.py index 5bf1a2fb21..000ad68a0c 100644 --- a/tensorflow/python/keras/preprocessing/text/__init__.py +++ b/tensorflow/python/keras/preprocessing/text/__init__.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.keras._impl.keras.preprocessing.text import hashing_trick from tensorflow.python.keras._impl.keras.preprocessing.text import one_hot from tensorflow.python.keras._impl.keras.preprocessing.text import text_to_word_sequence from tensorflow.python.keras._impl.keras.preprocessing.text import Tokenizer diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt index 04174bff5f..ec0f3d892d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt @@ -6,7 +6,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'directory\', \'image_data_generator\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'None\', \'\', \'png\', \'False\', \'nearest\'], " + argspec: "args=[\'self\', \'directory\', \'image_data_generator\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'subset\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'None\', \'\', \'png\', \'False\', \'None\', \'nearest\'], " } member_method { name: "next" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt index 41f27d1f74..f5bc04e44c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'featurewise_center\', \'samplewise_center\', \'featurewise_std_normalization\', \'samplewise_std_normalization\', \'zca_whitening\', \'zca_epsilon\', \'rotation_range\', \'width_shift_range\', \'height_shift_range\', \'shear_range\', \'zoom_range\', \'channel_shift_range\', \'fill_mode\', \'cval\', \'horizontal_flip\', \'vertical_flip\', \'rescale\', \'preprocessing_function\', \'data_format\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'1e-06\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'0.0\', \'nearest\', \'0.0\', \'False\', \'False\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'featurewise_center\', \'samplewise_center\', \'featurewise_std_normalization\', \'samplewise_std_normalization\', \'zca_whitening\', \'zca_epsilon\', \'rotation_range\', \'width_shift_range\', \'height_shift_range\', \'brightness_range\', \'shear_range\', \'zoom_range\', \'channel_shift_range\', \'fill_mode\', \'cval\', \'horizontal_flip\', \'vertical_flip\', \'rescale\', \'preprocessing_function\', \'data_format\', \'validation_split\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'False\', \'False\', \'False\', \'1e-06\', \'0.0\', \'0.0\', \'0.0\', \'None\', \'0.0\', \'0.0\', \'0.0\', \'nearest\', \'0.0\', \'False\', \'False\', \'None\', \'None\', \'None\', \'0.0\'], " } member_method { name: "fit" @@ -12,11 +12,11 @@ tf_class { } member_method { name: "flow" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\'], varargs=None, keywords=None, defaults=[\'None\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'subset\'], varargs=None, keywords=None, defaults=[\'None\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'None\'], " } member_method { name: "flow_from_directory" - argspec: "args=[\'self\', \'directory\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'False\', \'nearest\'], " + argspec: "args=[\'self\', \'directory\', \'target_size\', \'color_mode\', \'classes\', \'class_mode\', \'batch_size\', \'shuffle\', \'seed\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'follow_links\', \'subset\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'(256, 256)\', \'rgb\', \'None\', \'categorical\', \'32\', \'True\', \'None\', \'None\', \'\', \'png\', \'False\', \'None\', \'nearest\'], " } member_method { name: "random_transform" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt index 4ef6e6e99e..42196ddeee 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt @@ -6,7 +6,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'x\', \'y\', \'image_data_generator\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\'], varargs=None, keywords=None, defaults=[\'32\', \'False\', \'None\', \'None\', \'None\', \'\', \'png\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'image_data_generator\', \'batch_size\', \'shuffle\', \'seed\', \'data_format\', \'save_to_dir\', \'save_prefix\', \'save_format\', \'subset\'], varargs=None, keywords=None, defaults=[\'32\', \'False\', \'None\', \'None\', \'None\', \'\', \'png\', \'None\'], " } member_method { name: "next" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt index d28fef6965..6b850dd6b7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt @@ -36,6 +36,10 @@ tf_module { name: "load_img" argspec: "args=[\'path\', \'grayscale\', \'target_size\', \'interpolation\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'nearest\'], " } + member_method { + name: "random_brightness" + argspec: "args=[\'x\', \'brightness_range\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "random_channel_shift" argspec: "args=[\'x\', \'intensity\', \'channel_axis\'], varargs=None, keywords=None, defaults=[\'0\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.-timeseries-generator.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.-timeseries-generator.pbtxt new file mode 100644 index 0000000000..d9c3215b55 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.-timeseries-generator.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.keras.preprocessing.sequence.TimeseriesGenerator" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'data\', \'targets\', \'length\', \'sampling_rate\', \'stride\', \'start_index\', \'end_index\', \'shuffle\', \'reverse\', \'batch_size\'], varargs=None, keywords=None, defaults=[\'1\', \'1\', \'0\', \'None\', \'False\', \'False\', \'128\'], " + } + member_method { + name: "on_epoch_end" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.pbtxt index 1b01935cc5..cf59f8a272 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.pbtxt @@ -1,5 +1,9 @@ path: "tensorflow.keras.preprocessing.sequence" tf_module { + member { + name: "TimeseriesGenerator" + mtype: "" + } member_method { name: "make_sampling_table" argspec: "args=[\'size\', \'sampling_factor\'], varargs=None, keywords=None, defaults=[\'1e-05\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.text.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.text.pbtxt index d106429df0..50b54fc7e1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.text.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.preprocessing.text.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "Tokenizer" mtype: "" } + member_method { + name: "hashing_trick" + argspec: "args=[\'text\', \'n\', \'hash_function\', \'filters\', \'lower\', \'split\'], varargs=None, keywords=None, defaults=[\'None\', \'!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n\', \'True\', \' \'], " + } member_method { name: "one_hot" argspec: "args=[\'text\', \'n\', \'filters\', \'lower\', \'split\'], varargs=None, keywords=None, defaults=[\'!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n\', \'True\', \' \'], " -- GitLab From 1004396a769ad9fdf350ed28083bca5b6ad00402 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 22 Mar 2018 14:24:23 -0700 Subject: [PATCH 376/960] Remove use of deprecated API from RNN Colorbot example. PiperOrigin-RevId: 190125356 --- .../examples/rnn_colorbot/rnn_colorbot.py | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py index 29f0232454..88fffc962f 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py @@ -60,6 +60,7 @@ import functools import os import sys import time +import urllib import six import tensorflow as tf @@ -89,13 +90,35 @@ def parse(line): return rgb, chars, length +def maybe_download(filename, work_directory, source_url): + """Download the data from source url, unless it's already here. + + Args: + filename: string, name of the file in the directory. + work_directory: string, path to working directory. + source_url: url to download from if file doesn't exist. + + Returns: + Path to resulting file. + """ + if not tf.gfile.Exists(work_directory): + tf.gfile.MakeDirs(work_directory) + filepath = os.path.join(work_directory, filename) + if not tf.gfile.Exists(filepath): + temp_file_name, _ = urllib.request.urlretrieve(source_url) + tf.gfile.Copy(temp_file_name, filepath) + with tf.gfile.GFile(filepath) as f: + size = f.size() + print("Successfully downloaded", filename, size, "bytes.") + return filepath + + def load_dataset(data_dir, url, batch_size): """Loads the colors data at path into a PaddedDataset.""" # Downloads data at url into data_dir/basename(url). The dataset has a header # row (color_name, r, g, b) followed by comma-separated lines. - path = tf.contrib.learn.datasets.base.maybe_download( - os.path.basename(url), data_dir, url) + path = maybe_download(os.path.basename(url), data_dir, url) # This chain of commands loads our data by: # 1. skipping the header; (.skip(1)) -- GitLab From a34a3b2035ca0cfd48488c03bd4b088070bf9a25 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 22 Mar 2018 14:32:12 -0700 Subject: [PATCH 377/960] Fixing the issue where MKL-DNN is getting built when not using --config=mkl --- tensorflow/tensorflow.bzl | 53 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 9b0db8a112..8549c34691 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -788,7 +788,33 @@ def tf_cc_test_mkl(srcs, tags=[], size="medium", args=None): - if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions")) + for src in srcs: + native.cc_test( + name=src_to_test_name(src), + srcs=if_mkl([src]) + tf_binary_additional_srcs(), + copts=tf_copts(), + linkopts=select({ + clean_dep("//tensorflow:android"): [ + "-pie", + ], + clean_dep("//tensorflow:windows"): [], + clean_dep("//tensorflow:windows_msvc"): [], + "//conditions:default": [ + "-lpthread", + "-lm" + ], + }) + _rpath_linkopts(src_to_test_name(src)), + deps=deps + if_mkl( + [ + "//third_party/mkl:intel_binary_blob", + ], + ), + linkstatic=linkstatic, + tags=tags, + size=size, + args=args, + nocopts="-fno-exceptions") + def tf_cc_tests_gpu(srcs, deps, @@ -1006,16 +1032,12 @@ register_extension_info( def tf_mkl_kernel_library(name, prefix=None, srcs=None, - gpu_srcs=None, hdrs=None, deps=None, alwayslink=1, copts=tf_copts(), - nocopts="-fno-exceptions", - **kwargs): + nocopts="-fno-exceptions"): """A rule to build MKL-based TensorFlow kernel libraries.""" - gpu_srcs = gpu_srcs # unused argument - kwargs = kwargs # unused argument if not bool(srcs): srcs = [] @@ -1028,16 +1050,15 @@ def tf_mkl_kernel_library(name, hdrs = hdrs + native.glob( [prefix + "*.h"]) - if_mkl( - native.cc_library( - name=name, - srcs=srcs, - hdrs=hdrs, - deps=deps, - alwayslink=alwayslink, - copts=copts, - nocopts=nocopts - )) + native.cc_library( + name=name, + srcs=if_mkl(srcs), + hdrs=hdrs, + deps=deps, + alwayslink=alwayslink, + copts=copts, + nocopts=nocopts + ) register_extension_info( extension_name = "tf_mkl_kernel_library", -- GitLab From 1a99109e8832bc94710d2dcfb5d9525688913a50 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Thu, 22 Mar 2018 14:38:41 -0700 Subject: [PATCH 378/960] Merge consecutive broadcast HLO instructions. As an optimization, replace consecutive broadcast instructions with a single equivalent broadcast in algebraic simplification. PiperOrigin-RevId: 190127730 --- .../xla/service/algebraic_simplifier.cc | 22 ++++++-- .../xla/service/algebraic_simplifier_test.cc | 51 +++++++++++++++++++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 971c2935c8..88f6ff0a07 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1121,10 +1121,10 @@ bool OutputIsSubsetOfOperandElements(HloInstruction* instruction, Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { auto operand = broadcast->mutable_operand(0); + auto dims = broadcast->dimensions(); // A degenerate broadcast of a reshape that does not change the number of // elements can be replaced by a reshape. - if (std::is_sorted(broadcast->dimensions().begin(), - broadcast->dimensions().end()) && + if (std::is_sorted(dims.begin(), dims.end()) && ShapeUtil::ElementsIn(broadcast->shape()) == ShapeUtil::ElementsIn(operand->shape())) { VLOG(10) << "transform broadcast(X) -> reshape(X) where " @@ -1142,8 +1142,8 @@ Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { VLOG(10) << "transform broadcast(X) -> transpose(X) where " "n(broadcast(X)) == n(X)"; return ReplaceWithNewInstruction( - broadcast, HloInstruction::CreateTranspose(broadcast->shape(), operand, - broadcast->dimensions())); + broadcast, + HloInstruction::CreateTranspose(broadcast->shape(), operand, dims)); } // A broadcast of a reshape which merely inserts 1-sized dimensions can @@ -1157,7 +1157,6 @@ Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { if (merely_inserts_or_deletes_1_sized_dimensions && deleted_indices.empty()) { std::reverse(inserted_indices.begin(), inserted_indices.end()); - auto dims = broadcast->dimensions(); for (auto inserted_index : inserted_indices) { dims.erase(dims.begin() + inserted_index); } @@ -1201,6 +1200,19 @@ Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { return user->ReplaceAllUsesWith(new_broadcast); } } + return Status::OK(); + } + + // Merge two consecutive broadcasts into a single one. + if (operand->opcode() == HloOpcode::kBroadcast) { + std::vector new_dimensions(operand->dimensions().size()); + for (auto dim : operand->dimensions()) { + new_dimensions.push_back(dims[dim]); + } + return ReplaceWithNewInstruction( + broadcast, + HloInstruction::CreateBroadcast( + broadcast->shape(), operand->mutable_operand(0), new_dimensions)); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 451294ef5d..3b80a827bf 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -35,6 +35,8 @@ limitations under the License. #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/strings/str_util.h" +using ::testing::ElementsAre; + namespace xla { namespace { @@ -2462,6 +2464,55 @@ TEST_F(AlgebraicSimplifierTest, TrivialDynamicUpdateSlice) { op::DynamicSlice(op::Parameter(), op::Parameter())); } +// Test that two consecutive broadcasts can be merged to one. +TEST_F(AlgebraicSimplifierTest, MergeBroadcasts) { + HloComputation::Builder builder(TestName()); + Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 2}); + HloInstruction* input_array = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1({3, 4}))); + HloInstruction* inner_bcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(r2f32, input_array, {1})); + Shape r3f32 = ShapeUtil::MakeShape(F32, {2, 2, 2}); + builder.AddInstruction( + HloInstruction::CreateBroadcast(r3f32, inner_bcast, {0, 2})); + + auto computation = module().AddEntryComputation(builder.Build()); + HloInstruction* root = computation->root_instruction(); + EXPECT_EQ(root->opcode(), HloOpcode::kBroadcast); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(&module()).ValueOrDie()); + root = computation->root_instruction(); + EXPECT_THAT(root, op::Broadcast(op::Constant())); + EXPECT_THAT(root->dimensions(), ElementsAre(2)); +} + +// Test that two consecutive broadcasts can be merged to one. +TEST_F(AlgebraicSimplifierTest, MergeBroadcasts2) { + HloComputation::Builder builder(TestName()); + Shape r2f32 = ShapeUtil::MakeShape(F32, {2, 3}); + Shape r3f32 = ShapeUtil::MakeShape(F32, {2, 5, 3}); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r2f32, "param0")); + // The initial dimensions go to places 0 and 2 in the 3-dim array, + // and to places 1 and 3 in the 4-dim array, + HloInstruction* inner_bcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(r3f32, param0, {0, 2})); + Shape r4f32 = ShapeUtil::MakeShape(F32, {4, 2, 5, 3}); + builder.AddInstruction( + HloInstruction::CreateBroadcast(r4f32, inner_bcast, {1, 2, 3})); + + auto computation = module().AddEntryComputation(builder.Build()); + HloInstruction* root = computation->root_instruction(); + EXPECT_EQ(root->opcode(), HloOpcode::kBroadcast); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(&module()).ValueOrDie()); + root = computation->root_instruction(); + EXPECT_THAT(root, op::Broadcast(op::Parameter(0))); + EXPECT_THAT(root->dimensions(), ElementsAre(1, 3)); +} + struct PadReduceWindowEffectiveBroadcastCase { std::vector input_spatials; std::vector symmetric_pad_spatials; -- GitLab From 730e69519a93a668d97ea298d52365326c00357d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 22 Mar 2018 14:47:22 -0700 Subject: [PATCH 379/960] Automated g4 rollback of changelist 190021164 PiperOrigin-RevId: 190129094 --- tensorflow/c/eager/BUILD | 2 + tensorflow/c/eager/c_api.cc | 194 ++++++----------- tensorflow/c/eager/c_api_internal.h | 84 +------- tensorflow/core/common_runtime/eager/BUILD | 22 ++ .../core/common_runtime/eager/context.cc | 153 ++++++++++++++ .../core/common_runtime/eager/context.h | 198 ++++++++++++++++++ 6 files changed, 450 insertions(+), 203 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/context.cc create mode 100644 tensorflow/core/common_runtime/eager/context.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 841ff48a38..bea5a121b3 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -28,6 +28,7 @@ tf_cuda_library( "//tensorflow/c:c_api", "//tensorflow/c:c_api_internal", "//tensorflow/core:core_cpu", + "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core:core_cpu_internal", @@ -64,6 +65,7 @@ tf_cuda_library( "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", ], diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index a23015c99e..2402a6d044 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -71,18 +71,6 @@ std::atomic_int_fast64_t func_id_generator(0); } // namespace -TFE_ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy) { - if (!soft_placement) { - return original_policy; - } - if (original_policy == TFE_DEVICE_PLACEMENT_EXPLICIT || - original_policy == TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) { - return TFE_DEVICE_PLACEMENT_SILENT; - } - return original_policy; -} - extern "C" { TFE_ContextOptions* TFE_NewContextOptions() { return new TFE_ContextOptions; } @@ -104,19 +92,7 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, unsigned char async, TF_Status* status) { - { - tensorflow::mutex_lock l(ctx->async_map_mu); - ctx->thread_local_async[std::this_thread::get_id()] = async; - } - if (async) { - ctx->executor.EnableAsync(); - } else { - // TODO(agarwal): Currently we add a wait here to handle cases where a sync - // op has a control dependency on an async op, and the latter has not - // executed yet. This wait can be removed by storing all the control inputs - // and waiting for them when executing ops. - status->status = ctx->executor.WaitForAllPendingNodes(); - } + status->status = ctx->context.SetAsyncForThread(async); } void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } @@ -133,34 +109,26 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { new tensorflow::DeviceMgr(devices)); tensorflow::Rendezvous* r = new tensorflow::IntraProcessRendezvous(device_mgr.get()); - return new TFE_Context(*opts, std::move(device_mgr), r); + return new TFE_Context(opts->session_options.options, opts->policy, + opts->async, std::move(device_mgr), r); } void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->executor.WaitForAllPendingNodes(); - { - tensorflow::mutex_lock ml(ctx->cache_mu); - tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); - } - ctx->rendezvous->Unref(); delete ctx; } TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { TF_DeviceList* list = new TF_DeviceList; - ctx->device_manager->ListDeviceAttributes(&list->response); + ctx->context.device_mgr()->ListDeviceAttributes(&list->response); return list; } -void TFE_ContextClearCaches(TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->cache_mu); - tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); -} +void TFE_ContextClearCaches(TFE_Context* ctx) { ctx->context.ClearCaches(); } void TFE_ContextSetThreadLocalDevicePlacementPolicy( TFE_Context* ctx, TFE_ContextDevicePlacementPolicy policy) { - tensorflow::mutex_lock ml(ctx->policy_map_mu); - ctx->thread_local_policies[std::this_thread::get_id()] = policy; + ctx->context.SetThreadLocalDevicePlacementPolicy( + static_cast(policy)); } // Note: this function looks up a thread local policy. So it should be called in @@ -168,25 +136,20 @@ void TFE_ContextSetThreadLocalDevicePlacementPolicy( // safe to call this function from the async EagerExecutor threads. extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->policy_map_mu); - auto policy_map_it = - ctx->thread_local_policies.find(std::this_thread::get_id()); - if (policy_map_it != ctx->thread_local_policies.end()) { - return policy_map_it->second; - } - return ctx->policy; + return static_cast( + ctx->context.GetDevicePlacementPolicy()); } void TFE_ContextAsyncWait(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->executor.WaitForAllPendingNodes(); + status->status = ctx->context.AsyncWait(); } void TFE_ContextGetStatus(TFE_Context* ctx, TF_Status* status) { - status->status = ctx->executor.status(); + status->status = ctx->context.GetStatus(); } void TFE_ContextAsyncClearError(TFE_Context* ctx) { - ctx->executor.ClearError(); + ctx->context.ClearAsyncError(); } TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { @@ -259,7 +222,7 @@ tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, // nullptr. tensorflow::Device* src_opd = nullptr; TF_RETURN_IF_ERROR(h->TensorAndDevice(&src, &srcd, &src_opd)); - if (srcd == nullptr) srcd = ctx->devices[0]; + if (srcd == nullptr) srcd = ctx->context.HostCPU(); bool is_same_device = (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd)); const bool dst_cpu = IsCPU(dstd); @@ -332,8 +295,7 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, status->status = tensorflow::AttrTypeMapForOp(name, &types); if (status->status.ok()) return new TFE_Op(ctx, name, types); if (TF_GetCode(status) == TF_NOT_FOUND) { - tensorflow::mutex_lock l(ctx->functions_mu); - if (ctx->func_lib_def.Find(name) != nullptr) { + if (ctx->context.FindFunctionByName(name)) { status->status = tensorflow::Status::OK(); return new TFE_Op(ctx, name, nullptr); } @@ -346,20 +308,14 @@ void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { tensorflow::Device* d = nullptr; if (device_name != nullptr && strlen(device_name) > 0) { - auto it = op->ctx->devices_map.find(device_name); - if (it == op->ctx->devices_map.end()) { - status->status = - tensorflow::errors::InvalidArgument(device_name, " unknown device."); - return; - } - d = it->second; + status->status = op->ctx->context.FindDeviceByName(device_name, &d); } op->device = d; } const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { tensorflow::Device* device = - (op->device == nullptr) ? op->ctx->devices[0] : op->device; + (op->device == nullptr) ? op->ctx->context.HostCPU() : op->device; return device->name().c_str(); } @@ -634,7 +590,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, TFE_Context* ctx, TF_Status* status) { tensorflow::DeviceSet ds; - for (tensorflow::Device* d : ctx->devices) { + for (tensorflow::Device* d : *ctx->context.devices()) { ds.AddDevice(d); } tensorflow::DeviceTypeVector final_devices; @@ -648,7 +604,7 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, "Could not find valid device for node ", ndef.DebugString()); return nullptr; } - for (tensorflow::Device* d : ctx->devices) { + for (tensorflow::Device* d : *ctx->context.devices()) { if (d->device_type() == final_devices[0].type_string()) { return d; } @@ -663,9 +619,8 @@ tensorflow::Status Execute( const tensorflow::gtl::InlinedVector& op_inputs, tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, TFE_TensorHandle** retvals, int num_retvals) { - if (!ctx->soft_placement && device == nullptr) { - // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU - device = ctx->devices[0]; + if (!ctx->context.SoftPlacement() && device == nullptr) { + device = ctx->context.HostCPU(); } if (device == nullptr) { @@ -684,8 +639,8 @@ tensorflow::Status Execute( inputs[i] = *input_tensor; } // WARNING: kernel->Run utilizes the FunctionLibraryRuntime - // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def, - // which is GUARDED_BY(ctx->functions_mu). But knowledge of the implementation + // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def. + // But knowledge of the implementation // of FunctionLibraryRuntime tells us that func_lib_def is not accessed by // FunctionLibraryRuntime::Run(), so there is no thread-safety concern here. // This is quite subtle. Re-work things to make this better? (Would it make @@ -697,18 +652,18 @@ tensorflow::Status Execute( if (maybe_stats != nullptr) { maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() - maybe_stats->all_start_micros()); - tensorflow::mutex_lock ml(ctx->metadata_mu); - if (ctx->should_store_metadata.load()) { - auto* step_stats = ctx->run_metadata.mutable_step_stats(); + tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); + if (ctx->context.ShouldStoreMetadata()) { + auto* step_stats = ctx->context.RunMetadataProto()->mutable_step_stats(); // Lazily initialize the RunMetadata with information about all devices if // this is the first call. - while (step_stats->dev_stats_size() < ctx->devices.size()) { + while (step_stats->dev_stats_size() < ctx->context.devices()->size()) { step_stats->add_dev_stats(); } // Find the current device's index. int device_idx = 0; - for (int i = 0; i < ctx->devices.size(); ++i) { - if (ctx->devices[i] == device) { + for (int i = 0; i < ctx->context.devices()->size(); ++i) { + if (ctx->context.devices()->at(i) == device) { device_idx = i; break; } @@ -744,7 +699,7 @@ class ExecuteNode : public tensorflow::EagerNode { tensorflow::NodeExecStats* maybe_stats, const tensorflow::DataTypeVector& output_dtypes, TFE_TensorHandle** retvals, int num_retvals) - : tensorflow::EagerNode(op->ctx->executor.NextId()), + : tensorflow::EagerNode(op->ctx->context.NextId()), ctx_(op->ctx), op_device_(op->device), inputs_(op->inputs), @@ -800,7 +755,7 @@ class CopyToDeviceNode : public tensorflow::EagerNode { public: CopyToDeviceNode(TFE_TensorHandle* src, tensorflow::Device* dstd, TFE_Context* ctx) - : tensorflow::EagerNode(ctx->executor.NextId()), + : tensorflow::EagerNode(ctx->context.NextId()), src_(src), dstd_(dstd), ctx_(ctx), @@ -866,8 +821,7 @@ const tensorflow::FunctionDef* OpToFunction( TFE_Context* ctx = op->ctx; const tensorflow::OpRegistrationData* op_data; { - tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = ctx->func_lib_def.LookUp(op->name, &op_data); + status->status = ctx->context.FindFunctionOpData(op->name, &op_data); if (!status->status.ok()) { return nullptr; } @@ -963,10 +917,9 @@ const tensorflow::FunctionDef* OpToFunction( } VLOG(1) << "Fixed Output names and all types: " << fdef.DebugString(); - tensorflow::mutex_lock l(ctx->functions_mu); - status->status = ctx->func_lib_def.AddFunctionDef(fdef); + ctx->context.AddFunctionDef(fdef); if (!status->status.ok()) return nullptr; - const auto ret = ctx->func_lib_def.Find(signature->name()); + const auto ret = ctx->context.FindFunctionDef(signature->name()); DCHECK(ret != nullptr); return ret; } @@ -985,8 +938,7 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { const tensorflow::FunctionDef* fdef; { - tensorflow::tf_shared_lock l(op->ctx->functions_mu); - fdef = op->ctx->func_lib_def.Find(op->name); + fdef = op->ctx->context.FindFunctionDef(op->name); } std::vector const_input_types; std::vector arg_input_types; @@ -1063,7 +1015,7 @@ extern "C" { void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { TFE_Context* ctx = op->ctx; - status->status = ctx->executor.status(); + status->status = ctx->context.GetStatus(); if (!status->status.ok()) { return; } @@ -1087,7 +1039,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && input_op_device != op->device) { tensorflow::Device* d = - input_op_device == nullptr ? ctx->devices[0] : input_op_device; + input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device; VLOG(1) << "Changing device of operation " << op->name << " to " << d->name() << " because input #" << i << " is a resource in this device."; @@ -1095,40 +1047,35 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } } tensorflow::Device* device = op->device; - if (!ctx->soft_placement && device == nullptr) { - // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU - device = ctx->devices[0]; + if (!ctx->context.SoftPlacement() && device == nullptr) { + device = ctx->context.HostCPU(); } tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); - tensorflow::KernelAndDevice* kernel; - { - tensorflow::tf_shared_lock l(ctx->cache_mu); - kernel = tensorflow::gtl::FindPtrOrNull(ctx->kernel_cache, cache_key); - } + tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key); if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); - if (ctx->soft_placement && device == nullptr) { + if (ctx->context.SoftPlacement() && device == nullptr) { device = SelectDevice(ndef, ctx, status); if (!status->status.ok()) { return; } } CHECK(device != nullptr); - if (ctx->log_device_placement) { + if (ctx->context.LogDevicePlacement()) { LOG(INFO) << "Executing op " << ndef.op() << " in device " << device->name(); } - kernel = new tensorflow::KernelAndDevice(ctx->rendezvous); + kernel = new tensorflow::KernelAndDevice(ctx->context.GetRendezvous()); // Knowledge of the implementation of Init (and in-turn // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def // will be accessed, so grab on to the lock. // See WARNING comment in Execute (before kernel->Run) - would be nice to // rework to avoid this subtlety. - tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + tensorflow::tf_shared_lock l(*ctx->context.FunctionsMu()); + status->status = tensorflow::KernelAndDevice::Init( + ndef, ctx->context.func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; @@ -1136,7 +1083,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Update output_dtypes inside `kernel`. const tensorflow::OpDef* op_def = nullptr; const tensorflow::FunctionDef* function_def = - ctx->func_lib_def.Find(ndef.op()); + ctx->context.FuncLibDef()->Find(ndef.op()); if (function_def != nullptr) { op_def = &(function_def->signature()); } @@ -1152,8 +1099,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (!status->status.ok()) { return; } - tensorflow::mutex_lock ml(ctx->cache_mu); - tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); + ctx->context.AddKernelToCache(cache_key, kernel); } const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); const int output_dtypes_size = output_dtypes.size(); @@ -1171,11 +1117,11 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // device from the one requested above. device = kernel->device(); } - status->status = ValidateInputTypeAndPlacement(ctx, ctx->devices[0], device, - op, kernel->kernel()); + status->status = ValidateInputTypeAndPlacement(ctx, ctx->context.HostCPU(), + device, op, kernel->kernel()); if (!status->status.ok()) return; std::unique_ptr maybe_stats; - if (ctx->should_store_metadata.load()) { + if (ctx->context.ShouldStoreMetadata()) { maybe_stats.reset(new tensorflow::NodeExecStats); maybe_stats->set_node_name(op->name); maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros()); @@ -1183,14 +1129,14 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); // TODO(apassos) track referenced tensors } - if (ctx->Async()) { + if (ctx->context.Async()) { // Note that for async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(agarwal): Consider executing "cheap" kernels inline for performance. tensorflow::EagerNode* node = new ExecuteNode(op, kernel, maybe_stats.release(), output_dtypes, retvals, *num_retvals); - ctx->executor.Add(node); + ctx->context.ExecutorAdd(node); } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to // allocate it. @@ -1206,23 +1152,24 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status) { - status->status = ctx->executor.status(); + status->status = ctx->context.GetStatus(); if (!status->status.ok()) { return nullptr; } - tensorflow::Device* dstd = ctx->devices[0]; + tensorflow::Device* dstd = ctx->context.HostCPU(); if (device_name != nullptr && strlen(device_name) > 0) { - status->status = ctx->device_manager->LookupDevice(device_name, &dstd); + status->status = + ctx->context.device_mgr()->LookupDevice(device_name, &dstd); if (!status->status.ok()) return nullptr; } - if (ctx->Async()) { + if (ctx->context.Async()) { // Note that `h` may not be currently ready. However execution order will // make sure that `h` is ready before the copy is actually done. CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); TFE_TensorHandle* output = node->dst(); // Note that calling Add makes `node` accessible by the EagerExecutor // thread. So further accesses need to be thread-safe. - ctx->executor.Add(node); + ctx->context.ExecutorAdd(node); return output; } else { TFE_TensorHandle* output = nullptr; @@ -1240,24 +1187,20 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx, tensorflow::errors::InvalidArgument("Invalid FunctionDef proto"); return; } - tensorflow::mutex_lock l(ctx->functions_mu); - status->status = ctx->func_lib_def.AddFunctionDef(function_def); + status->status = ctx->context.AddFunctionDef(function_def); } void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, TF_Status* status) { - tensorflow::mutex_lock l(ctx->functions_mu); - status->status = ctx->func_lib_def.AddFunctionDef(function->fdef); + status->status = ctx->context.AddFunctionDef(function->fdef); } void TFE_ContextEnableRunMetadata(TFE_Context* ctx) { - ctx->should_store_metadata.store(true); + ctx->context.SetShouldStoreMetadata(true); } void TFE_ContextDisableRunMetadata(TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->metadata_mu); - ctx->should_store_metadata.store(false); - ctx->run_metadata.Clear(); + ctx->context.SetShouldStoreMetadata(false); } } // extern "C" @@ -1286,9 +1229,9 @@ void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status) { TFE_ContextAsyncWait(ctx, status); if (!status->status.ok()) return; - tensorflow::mutex_lock ml(ctx->metadata_mu); - status->status = MessageToBuffer(ctx->run_metadata, buf); - ctx->run_metadata.Clear(); + tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); + status->status = MessageToBuffer(*ctx->context.RunMetadataProto(), buf); + ctx->context.RunMetadataProto()->Clear(); } namespace { @@ -1363,11 +1306,6 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } // namespace tensorflow -bool TFE_Context::Async() const { - tensorflow::mutex_lock l(async_map_mu); - return tensorflow::gtl::FindWithDefault( - thread_local_async, std::this_thread::get_id(), async_default); -} bool TFE_TensorHandle::IsReady() { if (node_id == 0) return true; @@ -1381,7 +1319,7 @@ tensorflow::Status TFE_TensorHandle::WaitReady() { { tensorflow::mutex_lock l(ctx_mutex_); if (ctx_ == nullptr) return tensorflow::Status::OK(); - executor = &ctx_->executor; + executor = ctx_->context.Executor(); } return executor->WaitFor(node_id); } diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index a79f8ddd33..5b29120b40 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/function.h" @@ -52,85 +53,18 @@ struct TFE_ContextOptions { TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; }; -TFE_ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy); - struct TFE_Context { - explicit TFE_Context(const TFE_ContextOptions& opts, + explicit TFE_Context(const tensorflow::SessionOptions& opts, + TFE_ContextDevicePlacementPolicy default_policy, + bool async, std::unique_ptr device_mgr, tensorflow::Rendezvous* rendezvous) - : soft_placement( - opts.session_options.options.config.allow_soft_placement()), - policy(PlacementPolicy(soft_placement, opts.policy)), - device_manager(std::move(device_mgr)), - devices(device_manager->ListDevices()), - rendezvous(rendezvous), - pflr(new tensorflow::ProcessFunctionLibraryRuntime( - device_manager.get(), opts.session_options.options.env, - TF_GRAPH_DEF_VERSION, &func_lib_def, {})), - log_device_placement( - opts.session_options.options.config.log_device_placement()), - async_default(opts.async) { - if (async_default) executor.EnableAsync(); - - for (auto* device : devices) { - devices_map[tensorflow::StringPiece(device->name())] = device; - } - } - - const bool soft_placement; - const TFE_ContextDevicePlacementPolicy policy; - - // Note: we cannot use C++11 thread_local here as there is no concept of a - // thread-local-object-local variable in C++11. - tensorflow::mutex policy_map_mu; - std::unordered_map - thread_local_policies GUARDED_BY(policy_map_mu); - - std::unique_ptr device_manager; - // Devices owned by device_manager - std::vector devices; - // All devices are not owned. - tensorflow::gtl::FlatMap - devices_map; - tensorflow::Rendezvous* const rendezvous; - - tensorflow::mutex functions_mu; - tensorflow::FunctionLibraryDefinition func_lib_def GUARDED_BY(functions_mu){ - tensorflow::OpRegistry::Global(), {}}; - - // One FunctionLibraryRuntime per device. - // func_libs[i] is the FunctionLibraryRuntime corresponding to - // session->devices[i]. - const std::unique_ptr pflr; - - tensorflow::mutex cache_mu; - std::unordered_map - kernel_cache GUARDED_BY(cache_mu); - - tensorflow::FunctionLibraryRuntime* func_lib(tensorflow::Device* d) const { - return pflr->GetFLR(d->name()); - } + : context(opts, + static_cast( + default_policy), + async, std::move(device_mgr), rendezvous) {} - // Whether we should compute RunMetadata. - std::atomic should_store_metadata{false}; - tensorflow::mutex metadata_mu; - tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu); - const bool log_device_placement; - // EagerExecutor for async execution. - tensorflow::EagerExecutor executor; - - // True if running in asynchronous mode. - bool Async() const; - - // True if the default value for execution mode is async. Note that this value - // can be overridden per thread based on `thread_local_async` overrides. - const bool async_default; - mutable tensorflow::mutex async_map_mu; - std::unordered_map thread_local_async - GUARDED_BY(async_map_mu); + tensorflow::EagerContext context; }; struct TFE_TensorHandle : public tensorflow::core::RefCounted { diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 8ba560bef8..de10b10b7e 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -32,6 +32,28 @@ tf_cuda_library( ], ) +tf_cuda_library( + name = "context", + srcs = [ + "context.cc", + ], + hdrs = [ + "context.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + ":eager_executor", + ":kernel_and_device", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + ], +) + tf_cuda_library( name = "kernel_and_device", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc new file mode 100644 index 0000000000..0566329f18 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -0,0 +1,153 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/context.h" + +namespace tensorflow { + +ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, ContextDevicePlacementPolicy original_policy) { + if (!soft_placement) { + return original_policy; + } + if (original_policy == DEVICE_PLACEMENT_EXPLICIT || + original_policy == DEVICE_PLACEMENT_SILENT_FOR_INT32) { + return DEVICE_PLACEMENT_SILENT; + } + return original_policy; +} + +EagerContext::EagerContext(const SessionOptions& opts, + ContextDevicePlacementPolicy default_policy, + bool async, std::unique_ptr device_mgr, + Rendezvous* rendezvous) + : soft_placement_(opts.config.allow_soft_placement()), + policy_(PlacementPolicy(soft_placement_, default_policy)), + device_manager_(std::move(device_mgr)), + devices_(device_manager_->ListDevices()), + rendezvous_(rendezvous), + pflr_(new ProcessFunctionLibraryRuntime(device_manager_.get(), opts.env, + TF_GRAPH_DEF_VERSION, + &func_lib_def_, {})), + log_device_placement_(opts.config.log_device_placement()), + async_default_(async) { + if (async_default_) { + executor_.EnableAsync(); + } + + for (auto* device : devices_) { + devices_map_[device->name()] = device; + } +} + +bool EagerContext::Async() const { + mutex_lock l(async_map_mu_); + return gtl::FindWithDefault(thread_local_async_, std::this_thread::get_id(), + async_default_); +} + +Status EagerContext::SetAsyncForThread(bool async) { + { + tensorflow::mutex_lock l(async_map_mu_); + thread_local_async_[std::this_thread::get_id()] = async; + } + if (async) { + executor_.EnableAsync(); + } else { + // TODO(agarwal): Currently we add a wait here to handle cases where a + // sync op has a control dependency on an async op, and the latter has not + // executed yet. This wait can be removed by storing all the control + // inputs and waiting for them when executing ops. + return executor_.WaitForAllPendingNodes(); + } + return Status::OK(); +} + +void EagerContext::ClearCaches() { + mutex_lock ml(cache_mu_); + gtl::STLDeleteValues(&kernel_cache_); +} + +void EagerContext::SetThreadLocalDevicePlacementPolicy( + ContextDevicePlacementPolicy policy) { + mutex_lock ml(policy_map_mu_); + thread_local_policies_[std::this_thread::get_id()] = policy; +} + +ContextDevicePlacementPolicy EagerContext::GetDevicePlacementPolicy() { + mutex_lock ml(policy_map_mu_); + auto policy_map_it = thread_local_policies_.find(std::this_thread::get_id()); + if (policy_map_it != thread_local_policies_.end()) { + return policy_map_it->second; + } + return policy_; +} + +EagerContext::~EagerContext() { + executor_.WaitForAllPendingNodes().IgnoreError(); + ClearCaches(); + rendezvous_->Unref(); +} + +bool EagerContext::FindFunctionByName(const string& name) { + mutex_lock l(functions_mu_); + return func_lib_def_.Find(name) != nullptr; +} + +Status EagerContext::FindFunctionOpData( + const string& name, const tensorflow::OpRegistrationData** op_data) { + mutex_lock l(functions_mu_); + return func_lib_def_.LookUp(name, op_data); +} + +const FunctionDef* EagerContext::FindFunctionDef(const string& name) { + mutex_lock l(functions_mu_); + return func_lib_def_.Find(name); +} + +Status EagerContext::FindDeviceByName(const string& name, Device** result) { + auto it = devices_map_.find(name); + if (it == devices_map_.end()) { + return errors::InvalidArgument(name, " unknown device."); + } + *result = it->second; + return Status::OK(); +} + +Status EagerContext::AddFunctionDef(const FunctionDef& fdef) { + mutex_lock l(functions_mu_); + return func_lib_def_.AddFunctionDef(fdef); +} + +KernelAndDevice* EagerContext::GetCachedKernel(Fprint128 cache_key) { + tf_shared_lock l(cache_mu_); + return gtl::FindPtrOrNull(kernel_cache_, cache_key); +} + +void EagerContext::AddKernelToCache(Fprint128 cache_key, + KernelAndDevice* kernel) { + mutex_lock ml(cache_mu_); + gtl::InsertOrUpdate(&kernel_cache_, cache_key, kernel); +} + +void EagerContext::SetShouldStoreMetadata(bool value) { + should_store_metadata_.store(value); + if (!value) { + mutex_lock ml(metadata_mu_); + run_metadata_.Clear(); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h new file mode 100644 index 0000000000..bc97219dae --- /dev/null +++ b/tensorflow/core/common_runtime/eager/context.h @@ -0,0 +1,198 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +// Note: there's a copy enum in eager/c_api.h. It should be kept in sync. +enum ContextDevicePlacementPolicy { + // Running operations with input tensors on the wrong device will fail. When + // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. + DEVICE_PLACEMENT_EXPLICIT = 0, + // Copy the tensor to the right device but log a warning. + DEVICE_PLACEMENT_WARN = 1, + // Silently copy the tensor, which has a performance cost since the + // operation will be blocked till the copy completes. + DEVICE_PLACEMENT_SILENT = 2, + // Default placement policy which silently copies int32 tensors but not other + // dtypes. When soft placement is enabled acts like + // TFE_DEVICE_PLACEMENT_SILENT. + DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, +}; + +ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, ContextDevicePlacementPolicy original_policy); + +class EagerContext { + public: + explicit EagerContext(const SessionOptions& opts, + ContextDevicePlacementPolicy default_policy, bool async, + std::unique_ptr device_mgr, + Rendezvous* rendezvous); + + ~EagerContext(); + + // Returns the function library runtime for the given device. + FunctionLibraryRuntime* func_lib(Device* d) const { + return pflr_->GetFLR(d->name()); + } + + // True if running in asynchronous mode. + bool Async() const; + + EagerExecutor* Executor() { return &executor_; } + + // Sets whether this thread should run in synchronous or asynchronous mode. + Status SetAsyncForThread(bool async); + + // TODO(apassos) make this return a constant reference + gtl::FlatMap* device_map() { + return &devices_map_; + } + + // TODO(apassos) make this return a constant reference + std::vector* devices() { return &devices_; } + + // Clears the kernel caches. + void ClearCaches(); + + // Sets the device placement policy for the current thread. + void SetThreadLocalDevicePlacementPolicy(ContextDevicePlacementPolicy policy); + + // Returns the device placement policy for the current thread. + ContextDevicePlacementPolicy GetDevicePlacementPolicy(); + + Status AsyncWait() { return executor_.WaitForAllPendingNodes(); } + + Status GetStatus() { return executor_.status(); } + + void ClearAsyncError() { executor_.ClearError(); } + + bool FindFunctionByName(const string& name); + + Status FindFunctionOpData(const string& name, + const tensorflow::OpRegistrationData** op_data); + + const FunctionDef* FindFunctionDef(const string& name); + + Status FindDeviceByName(const string& name, Device** result); + + Device* HostCPU() { return devices_[0]; } + + bool SoftPlacement() { return soft_placement_; } + + uint64 NextId() { return executor_.NextId(); } + + void ExecutorAdd(EagerNode* node) { executor_.Add(node); } + + Status AddFunctionDef(const FunctionDef& fdef); + + KernelAndDevice* GetCachedKernel(Fprint128 cache_key); + + void AddKernelToCache(Fprint128 cache_key, KernelAndDevice* kernel); + + bool LogDevicePlacement() { return log_device_placement_; } + + Rendezvous* GetRendezvous() { return rendezvous_; } + + mutex* FunctionsMu() { return &functions_mu_; } + + tensorflow::DeviceMgr* device_mgr() { return device_manager_.get(); } + + // TODO(apassos) remove the need for this + void ReleaseDeviceMgr() { device_manager_.release(); } + + // TODO(apassos) clean up RunMetadata storage. + mutex* MetadataMu() { return &metadata_mu_; } + bool ShouldStoreMetadata() { return should_store_metadata_.load(); } + void SetShouldStoreMetadata(bool value); + RunMetadata* RunMetadataProto() { return &run_metadata_; } + + FunctionLibraryDefinition* FuncLibDef() { return &func_lib_def_; } + + private: + const bool soft_placement_; + const ContextDevicePlacementPolicy policy_; + + // Note: we cannot use C++11 thread_local here as there is no concept of a + // thread-local-object-local variable in C++11. + mutex policy_map_mu_; + std::unordered_map + thread_local_policies_ GUARDED_BY(policy_map_mu_); + + std::unique_ptr device_manager_; + // Devices owned by device_manager + std::vector devices_; + // All devices are not owned. + gtl::FlatMap devices_map_; + Rendezvous* const rendezvous_; + + mutex functions_mu_; + FunctionLibraryDefinition func_lib_def_ GUARDED_BY(functions_mu_){ + OpRegistry::Global(), {}}; + + // One FunctionLibraryRuntime per device. + // func_libs[i] is the FunctionLibraryRuntime corresponding to + // session->devices[i]. + const std::unique_ptr pflr_; + + mutex cache_mu_; + std::unordered_map kernel_cache_ + GUARDED_BY(cache_mu_); + + // Whether we should compute RunMetadata. + std::atomic should_store_metadata_{false}; + mutex metadata_mu_; + RunMetadata run_metadata_ GUARDED_BY(metadata_mu_); + const bool log_device_placement_; + // EagerExecutor for async execution. + EagerExecutor executor_; + + // True if the default value for execution mode is async. Note that this value + // can be overridden per thread based on `thread_local_async` overrides. + const bool async_default_; + mutable mutex async_map_mu_; + std::unordered_map thread_local_async_ + GUARDED_BY(async_map_mu_); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_CONTEXT_H_ -- GitLab From 63d46266ba5b2a513244e13321f76e7acd03aba3 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Thu, 22 Mar 2018 14:53:59 -0700 Subject: [PATCH 380/960] Move cuDNN RNN ops to core, for use in the internal TF codebase only (not publicly exposed). RELNOTES: Moved cuDNN RNN ops to core. PiperOrigin-RevId: 190130405 --- tensorflow/contrib/BUILD | 2 - tensorflow/contrib/cmake/python_modules.txt | 2 - .../contrib/cmake/tf_core_kernels.cmake | 2 - tensorflow/contrib/cmake/tf_core_ops.cmake | 2 +- tensorflow/contrib/cmake/tf_python.cmake | 3 +- tensorflow/contrib/cudnn_rnn/BUILD | 68 +-------- .../cudnn_rnn/python/ops/cudnn_rnn_ops.py | 7 +- tensorflow/core/BUILD | 47 +++++++ .../api_def/base_api/api_def_CudnnRNN.pbtxt | 36 +++++ .../base_api/api_def_CudnnRNNBackprop.pbtxt | 45 ++++++ .../api_def_CudnnRNNCanonicalToParams.pbtxt | 35 +++++ .../base_api/api_def_CudnnRNNParamsSize.pbtxt | 27 ++++ .../api_def_CudnnRNNParamsToCanonical.pbtxt | 35 +++++ .../api_def/python_api/api_def_CudnnRNN.pbtxt | 4 + .../python_api/api_def_CudnnRNNBackprop.pbtxt | 4 + .../api_def_CudnnRNNCanonicalToParams.pbtxt | 4 + .../api_def_CudnnRNNParamsSize.pbtxt | 4 + .../api_def_CudnnRNNParamsToCanonical.pbtxt | 4 + tensorflow/core/kernels/BUILD | 17 +++ .../kernels/cudnn_rnn_ops.cc | 0 .../cudnn_rnn => core}/ops/cudnn_rnn_ops.cc | 130 ++---------------- .../ops/cudnn_rnn_ops_test.cc | 0 tensorflow/python/BUILD | 8 ++ tensorflow/python/__init__.py | 4 + 24 files changed, 287 insertions(+), 203 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNNBackprop.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNNCanonicalToParams.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNNParamsSize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNNParamsToCanonical.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CudnnRNN.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CudnnRNNBackprop.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CudnnRNNCanonicalToParams.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CudnnRNNParamsSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CudnnRNNParamsToCanonical.pbtxt rename tensorflow/{contrib/cudnn_rnn => core}/kernels/cudnn_rnn_ops.cc (100%) rename tensorflow/{contrib/cudnn_rnn => core}/ops/cudnn_rnn_ops.cc (53%) rename tensorflow/{contrib/cudnn_rnn => core}/ops/cudnn_rnn_ops_test.cc (100%) diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index d103da79e3..2d7bbc016f 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -119,7 +119,6 @@ cc_library( deps = [ "//tensorflow/contrib/boosted_trees:boosted_trees_kernels", "//tensorflow/contrib/coder:all_kernels", - "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", @@ -143,7 +142,6 @@ cc_library( deps = [ "//tensorflow/contrib/boosted_trees:boosted_trees_ops_op_lib", "//tensorflow/contrib/coder:all_ops", - "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_ops_op_lib", "//tensorflow/contrib/data:dataset_ops_op_lib", "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index 0d2a6a23db..f7d3c73b2c 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -147,8 +147,6 @@ tensorflow/contrib/crf tensorflow/contrib/crf/python tensorflow/contrib/crf/python/ops tensorflow/contrib/cudnn_rnn -tensorflow/contrib/cudnn_rnn/kernels -tensorflow/contrib/cudnn_rnn/ops tensorflow/contrib/cudnn_rnn/python tensorflow/contrib/cudnn_rnn/python/layers tensorflow/contrib/cudnn_rnn/python/ops diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 998f99ecc1..ed018b4fed 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -67,8 +67,6 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/kernels/range_coder_ops_util.cc" "${tensorflow_source_dir}/tensorflow/contrib/coder/ops/coder_ops.cc" - "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/kernels/cudnn_rnn_ops.cc" - "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/ignore_errors_dataset_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/prefetching_kernels.cc" "${tensorflow_source_dir}/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc" diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 59e094812a..d6712aa2b4 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -21,6 +21,7 @@ set(tf_op_lib_names "checkpoint_ops" "control_flow_ops" "ctc_ops" + "cudnn_rnn_ops" "data_flow_ops" "dataset_ops" "functional_ops" @@ -84,7 +85,6 @@ GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_prediction "${tensorflow_source_dir}/t GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_quantiles "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(boosted_trees_stats_accumulator "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/ops/stats_accumulator_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(coder "${tensorflow_source_dir}/tensorflow/contrib/coder/ops/coder_ops.cc") -GENERATE_CONTRIB_OP_LIBRARY(cudnn_rnn "${tensorflow_source_dir}/tensorflow/contrib/cudnn_rnn/ops/cudnn_rnn_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(data_dataset "${tensorflow_source_dir}/tensorflow/contrib/data/ops/dataset_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_clustering "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/clustering_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir}/tensorflow/contrib/factorization/ops/factorization_ops.cc") diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 1e354bf212..31e715b654 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -326,6 +326,7 @@ GENERATE_PYTHON_OP_LIB("checkpoint_ops") GENERATE_PYTHON_OP_LIB("control_flow_ops" ADDITIONAL_LIBRARIES $) GENERATE_PYTHON_OP_LIB("ctc_ops") +GENERATE_PYTHON_OP_LIB("cudnn_rnn_ops") GENERATE_PYTHON_OP_LIB("data_flow_ops") GENERATE_PYTHON_OP_LIB("dataset_ops") GENERATE_PYTHON_OP_LIB("image_ops") @@ -367,8 +368,6 @@ GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_stats_accumulator_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_stats_accumulator_ops.py) GENERATE_PYTHON_OP_LIB("contrib_coder_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/coder/python/ops/gen_coder_ops.py) -GENERATE_PYTHON_OP_LIB("contrib_cudnn_rnn_ops" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cudnn_rnn/ops/gen_cudnn_rnn_ops.py) GENERATE_PYTHON_OP_LIB("contrib_data_dataset_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/data/python/ops/gen_dataset_ops.py) GENERATE_PYTHON_OP_LIB("contrib_factorization_clustering_ops" diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD index fec358c4e1..fa86ad38c9 100644 --- a/tensorflow/contrib/cudnn_rnn/BUILD +++ b/tensorflow/contrib/cudnn_rnn/BUILD @@ -9,52 +9,10 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") -load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") -load("//tensorflow:tensorflow.bzl", "tf_cc_test") - -tf_custom_op_library( - name = "python/ops/_cudnn_rnn_ops.so", - srcs = [ - "kernels/cudnn_rnn_ops.cc", - "ops/cudnn_rnn_ops.cc", - ], - deps = [ - "//tensorflow/core/kernels:bounds_check_lib", - "@farmhash_archive//:farmhash", - ], -) - -tf_kernel_library( - name = "cudnn_rnn_kernels", - srcs = ["kernels/cudnn_rnn_ops.cc"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:stream_executor", - "//tensorflow/core/kernels:bounds_check_lib", - "//third_party/eigen3", - "@farmhash_archive//:farmhash", - ], -) - -tf_gen_op_libs( - op_lib_names = ["cudnn_rnn_ops"], - deps = [ - "//tensorflow/core:lib", - ], -) - -tf_gen_op_wrapper_py( - name = "cudnn_rnn_ops", - deps = [":cudnn_rnn_ops_op_lib"], -) tf_custom_op_py_library( name = "cudnn_rnn_py", @@ -64,20 +22,13 @@ tf_custom_op_py_library( "python/layers/cudnn_rnn.py", "python/ops/cudnn_rnn_ops.py", ], - dso = [ - ":python/ops/_cudnn_rnn_ops.so", - ], - kernels = [ - ":cudnn_rnn_kernels", - ":cudnn_rnn_ops_op_lib", - ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - ":cudnn_rnn_ops", "//tensorflow/contrib/util:util_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", + "//tensorflow/python:cudnn_rnn_ops_gen", "//tensorflow/python:framework", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:init_ops", @@ -173,23 +124,6 @@ cuda_py_test( ], ) -tf_cc_test( - name = "cudnn_rnn_ops_test_cc", - size = "small", - srcs = [ - "ops/cudnn_rnn_ops_test.cc", - ], - deps = [ - ":cudnn_rnn_ops_op_lib", - "//tensorflow/core", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py index e87162f0ee..622241a177 100644 --- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py +++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py @@ -17,27 +17,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.cudnn_rnn.ops import gen_cudnn_rnn_ops from tensorflow.contrib.rnn.python.ops import lstm_ops -from tensorflow.contrib.util import loader from tensorflow.python.framework import common_shapes from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.layers import base as base_layer from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_cudnn_rnn_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope as vs -from tensorflow.python.platform import resource_loader from tensorflow.python.training import saver -_cudnn_rnn_ops_so = loader.load_op_library( - resource_loader.get_path_to_datafile("_cudnn_rnn_ops.so")) - CUDNN_RNN_UNIDIRECTION = "unidirectional" CUDNN_RNN_BIDIRECTION = "bidirectional" CUDNN_LSTM = "lstm" diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 15cbba8285..2885a9f823 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -688,6 +688,34 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "cudnn_rnn_ops", + srcs = [ + "ops/cudnn_rnn_ops.cc", + ], + linkstatic = 1, + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:stream_executor", + "//tensorflow/core/kernels:bounds_check_lib", + "//third_party/eigen3", + "@farmhash_archive//:farmhash", + ], + alwayslink = 1, +) + +tf_gen_op_libs( + op_lib_names = [ + "cudnn_rnn_ops", + ], + deps = [ + ":lib", + ], +) + cc_library( name = "ops", visibility = ["//visibility:public"], @@ -700,6 +728,7 @@ cc_library( ":checkpoint_ops_op_lib", ":control_flow_ops_op_lib", ":ctc_ops_op_lib", + ":cudnn_rnn_ops_op_lib", ":data_flow_ops_op_lib", ":dataset_ops_op_lib", ":function_ops_op_lib", @@ -840,6 +869,7 @@ cc_library( "//tensorflow/core/kernels:checkpoint_ops", "//tensorflow/core/kernels:control_flow_ops", "//tensorflow/core/kernels:ctc_ops", + "//tensorflow/core/kernels:cudnn_rnn_kernels", "//tensorflow/core/kernels:data_flow", "//tensorflow/core/kernels:dataset_ops", "//tensorflow/core/kernels:fake_quant_ops", @@ -2914,6 +2944,23 @@ tf_cc_tests( ], ) +tf_cc_test( + name = "cudnn_rnn_ops_test_cc", + size = "small", + srcs = [ + "ops/cudnn_rnn_ops_test.cc", + ], + deps = [ + ":cudnn_rnn_ops", + "//tensorflow/core", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", diff --git a/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt b/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt new file mode 100644 index 0000000000..daeb5fe9a2 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_CudnnRNN.pbtxt @@ -0,0 +1,36 @@ +op { + graph_op_name: "CudnnRNN" + summary: "A RNN backed by cuDNN." + description: <
- + - + - + - + - +
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
Label Prediction
5.9 3.0 4.3 1.5 1
5.9 3.0 4.3 1.5 1 1
6.9 3.1 5.4 2.1 2
6.9 3.1 5.4 2.1 2 2
5.1 3.3 1.7 0.5 0
5.1 3.3 1.7 0.5 0 0
6.0 3.4 4.5 1.6 1
6.0 3.4 4.5 1.6 1 2
5.5 2.5 4.0 1.3 1
5.5 2.5 4.0 1.3 1 1
@@ -631,6 +642,10 @@ Test set accuracy: 0.967 An accuracy of 0.967 implies that our trained model correctly classified 29 out of the 30 Iris species in the test set. +To get a deeper understanding of different metrics for evaluating +models, see the +[Classification section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/classification). + ### Predicting @@ -723,7 +738,6 @@ Prediction is "Virginica" (97.9%), expected "Virginica" ## Summary - This document provides a short introduction to machine learning. Because `premade_estimators.py` relies on high-level APIs, much of the diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index b7bd1286e3..fb83a770a5 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -1,5 +1,12 @@ # Getting Started +If you are new to machine learning, we recommend taking the following online +course prior to diving into TensorFlow documentation: + + * [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/), + which introduces machine learning concepts and encourages experimentation + with existing TensorFlow code. + TensorFlow is a tool for machine learning. While it contains a wide range of functionality, TensorFlow is mainly designed for deep neural network models. diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 8612762271..5e9a84bff6 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -506,11 +506,18 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. - If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). +If you are new to machine learning, we recommend the following: + +* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) +* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} + +If you are experienced with machine learning but new to TensorFlow, see +@{$get_started/premade_estimators$Getting Started with TensorFlow}. + + ## Common installation problems We are relying on Stack Overflow to document TensorFlow installation problems diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 7207cb4f2b..55b460e189 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -400,12 +400,18 @@ writing TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see -@{$get_started/premade_estimators$Getting Started with TensorFlow}. - If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). +If you are new to machine learning, we recommend the following: + +* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) +* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} + +If you are experienced with machine learning but new to TensorFlow, see +@{$get_started/premade_estimators$Getting Started with TensorFlow}. + + ## Common installation problems We are relying on Stack Overflow to document TensorFlow installation problems diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 2413bc9cfb..86add74da1 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -17,7 +17,7 @@ You must choose one of the following types of TensorFlow to install: NVIDIA® GPU, you must install this version. Note that this version of TensorFlow is typically much easier to install (typically, in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. Prebuilt binaries will use AVX instructions. + installing this version first. Prebuilt binaries will use AVX instructions. * **TensorFlow with GPU support**. TensorFlow programs typically run significantly faster on a GPU than on a CPU. Therefore, if your system has a NVIDIA® GPU meeting the prerequisites shown below @@ -154,13 +154,17 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. - If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). -There is also a helpful [script](https://gist.github.com/mrry/ee5dbcfdd045fa48a27d56664411d41c) -for Windows TensorFlow installation issues. +If you are new to machine learning, we recommend the following: + +* [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course) +* @{$get_started/get_started_for_beginners$Getting Started for ML Beginners} + +If you are experienced with machine learning but new to TensorFlow, see +@{$get_started/premade_estimators$Getting Started with TensorFlow}. + ## Common installation problems diff --git a/tensorflow/docs_src/programmers_guide/embedding.md b/tensorflow/docs_src/programmers_guide/embedding.md index e8027fc12b..d5703e0737 100644 --- a/tensorflow/docs_src/programmers_guide/embedding.md +++ b/tensorflow/docs_src/programmers_guide/embedding.md @@ -7,6 +7,9 @@ with the TensorBoard Embedding Projector newcomers to machine learning or TensorFlow, and the Embedding Projector how-to is for users at all levels. +An alternative tutorial on these concepts is available in the +[Embeddings section of Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture). + [TOC] An **embedding** is a mapping from discrete objects, such as words, to vectors -- GitLab From cf5729fc4710c9e579afa7c1176b00c9c0acec6e Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Fri, 23 Mar 2018 13:20:13 -0700 Subject: [PATCH 443/960] Updates reduce_mean op. PiperOrigin-RevId: 190264873 --- tensorflow/contrib/lite/testing/generate_examples.py | 6 +++--- tensorflow/contrib/lite/toco/import_tensorflow.cc | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 38de9dcf2c..e4ef17585f 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -754,7 +754,7 @@ def make_mean_tests(zip_path): [-1, -2, -3], [0, 0, 0], [2, 2, 0], [1, 0, -3, -3] ], "const_axis": [True, False], - "keep_dims": [True, False], + "keepdims": [True, False], }, { "input_dtype": [tf.float32, tf.int32, tf.int64], "input_shape": [[1, 224, 224, 3]], @@ -765,7 +765,7 @@ def make_mean_tests(zip_path): [2, 2, 3], [-3, -3, -4], [-3, 2, 1] ], "const_axis": [True, False], - "keep_dims": [True, False], + "keepdims": [True, False], }] def build_graph(parameters): @@ -788,7 +788,7 @@ def make_mean_tests(zip_path): input_tensors = [input_tensor, axis] out = tf.reduce_mean( - input_tensor, axis=axis, keep_dims=parameters["keep_dims"]) + input_tensor, axis=axis, keepdims=parameters["keepdims"]) return input_tensors, [out] def build_inputs(parameters, sess, inputs, outputs): diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index a7a50e6fc9..b844e0b948 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1541,7 +1541,9 @@ void ConvertMeanOperator(const NodeDef& node, op->inputs.push_back(node.input(1)); op->outputs.push_back(node.name()); model->operators.emplace_back(op); - if (HasAttr(node, "keep_dims")) { + if (HasAttr(node, "keepdims")) { + op->keep_dims = GetBoolAttr(node, "keepdims"); + } else if (HasAttr(node, "keep_dims")) { op->keep_dims = GetBoolAttr(node, "keep_dims"); } } -- GitLab From 9b9ca14ae9720b7c28351191a9d9529fc68884b1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Mar 2018 13:23:35 -0700 Subject: [PATCH 444/960] Moves TensorHandle to common_runtime PiperOrigin-RevId: 190265301 --- tensorflow/c/eager/BUILD | 2 + tensorflow/c/eager/c_api.cc | 272 ++++++++---------- tensorflow/c/eager/c_api_internal.h | 85 +----- tensorflow/core/common_runtime/eager/BUILD | 23 ++ .../common_runtime/eager/tensor_handle.cc | 107 +++++++ .../core/common_runtime/eager/tensor_handle.h | 130 +++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 7 +- tensorflow/python/lib/core/py_func.cc | 2 +- 8 files changed, 393 insertions(+), 235 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/tensor_handle.cc create mode 100644 tensorflow/core/common_runtime/eager/tensor_handle.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index bea5a121b3..d2d8d59323 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -31,6 +31,7 @@ tf_cuda_library( "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", + "//tensorflow/core/common_runtime/eager:tensor_handle", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -68,6 +69,7 @@ tf_cuda_library( "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", + "//tensorflow/core/common_runtime/eager:tensor_handle", ], ) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 2402a6d044..59432f2ef8 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -161,29 +161,32 @@ TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { DCHECK(h); - h->Unref(); + if (h->handle) { + h->handle->Unref(); + } + delete h; } TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h) { - return static_cast(h->dtype); + return static_cast(h->handle->dtype); } int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) { const tensorflow::Tensor* t = nullptr; - status->status = h->Tensor(&t); + status->status = h->handle->Tensor(&t); return t == nullptr ? 0 : t->dims(); } int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, TF_Status* status) { const tensorflow::Tensor* t = nullptr; - status->status = h->Tensor(&t); + status->status = h->handle->Tensor(&t); return t == nullptr ? 0 : t->dim_size(dim_index); } const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) { tensorflow::Device* d = nullptr; - status->status = h->OpDevice(&d); + status->status = h->handle->OpDevice(&d); return (d == nullptr) ? "/job:localhost/replica:0/task:0/device:CPU:0" : d->name().c_str(); } @@ -193,7 +196,7 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { tensorflow::Device* d = nullptr; tensorflow::Device* op_device = nullptr; const tensorflow::Tensor* t = nullptr; - status->status = h->TensorAndDevice(&t, &d, &op_device); + status->status = h->handle->TensorAndDevice(&t, &d, &op_device); if (!status->status.ok()) return nullptr; if (!IsCPU(d)) { TF_SetStatus(status, TF_UNIMPLEMENTED, @@ -212,10 +215,10 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { namespace { -tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, +tensorflow::Status TensorHandleCopyToDevice(tensorflow::TensorHandle* h, TFE_Context* ctx, tensorflow::Device* dstd, - TFE_TensorHandle** output) { + tensorflow::TensorHandle** output) { const tensorflow::Tensor* src = nullptr; tensorflow::Device* srcd = nullptr; // TODO(agarwal): src_opd is unused. Perhaps allow TensorAndDevice to accept @@ -232,7 +235,7 @@ tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, const bool both_on_cpu = src_cpu && dst_cpu; if (is_same_device || both_on_cpu) { dstd = dst_cpu ? nullptr : dstd; - *output = new TFE_TensorHandle(*src, dstd, dstd); + *output = new tensorflow::TensorHandle(*src, dstd, dstd); return tensorflow::Status::OK(); } if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && @@ -249,7 +252,7 @@ tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); if (src->shape().num_elements() == 0) { dstd = dst_cpu ? nullptr : dstd; - *output = new TFE_TensorHandle(dst, dstd, dstd); + *output = new tensorflow::TensorHandle(dst, dstd, dstd); return tensorflow::Status::OK(); } tensorflow::DeviceContext* src_device_context = nullptr; @@ -280,7 +283,7 @@ tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, n.WaitForNotification(); if (status.ok()) { dstd = dst_cpu ? nullptr : dstd; - *output = new TFE_TensorHandle(dst, dstd, dstd); + *output = new tensorflow::TensorHandle(dst, dstd, dstd); } return status; } @@ -335,12 +338,12 @@ void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { tensorflow::Device* d = nullptr; // TODO(agarwal): This call may block if h is not ready. Avoid this if // possible. - status->status = h->Device(&d); + status->status = h->handle->Device(&d); if (!status->status.ok()) return; if (!IsCPU(d)) op->device = d; } - h->Ref(); - op->inputs.push_back(h); + h->handle->Ref(); + op->inputs.push_back(h->handle); op->attrs.NumInputs(op->inputs.size()); } @@ -506,6 +509,79 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, namespace { +class CopyToDeviceNode : public tensorflow::EagerNode { + public: + CopyToDeviceNode(tensorflow::TensorHandle* src, tensorflow::Device* dstd, + TFE_Context* ctx) + : tensorflow::EagerNode(ctx->context.NextId()), + src_(src), + dstd_(dstd), + ctx_(ctx), + dst_(new tensorflow::TensorHandle(id, src_->dtype, &ctx->context)) { + src_->Ref(); + dst_->Ref(); + } + + ~CopyToDeviceNode() override { + src_->Unref(); + dst_->Unref(); + } + + tensorflow::Status Run() override { + tensorflow::TensorHandle* temp = nullptr; + TF_RETURN_IF_ERROR(TensorHandleCopyToDevice(src_, ctx_, dstd_, &temp)); + const tensorflow::Tensor* tensor = nullptr; + tensorflow::Device* device = nullptr; + tensorflow::Device* op_device = nullptr; + tensorflow::Status status = + temp->TensorAndDevice(&tensor, &device, &op_device); + // `temp` is a ready handle. So the following call should return OK. + TF_DCHECK_OK(status) << status.error_message(); + DCHECK(tensor); + dst_->SetTensorAndDevice(*tensor, device, op_device); + temp->Unref(); + return tensorflow::Status::OK(); + } + + tensorflow::TensorHandle* dst() { return dst_; } + + private: + tensorflow::TensorHandle* src_; + tensorflow::Device* dstd_; + TFE_Context* ctx_; + tensorflow::TensorHandle* dst_; +}; + +// TODO(apassos) move to TensorHandle +tensorflow::TensorHandle* TFE_TensorHandleCopyToDevice_Internal( + tensorflow::TensorHandle* h, TFE_Context* ctx, const char* device_name, + TF_Status* status) { + status->status = ctx->context.GetStatus(); + if (!status->status.ok()) { + return nullptr; + } + tensorflow::Device* dstd = ctx->context.HostCPU(); + if (device_name != nullptr && strlen(device_name) > 0) { + status->status = + ctx->context.device_mgr()->LookupDevice(device_name, &dstd); + if (!status->status.ok()) return nullptr; + } + if (ctx->context.Async()) { + // Note that `h` may not be currently ready. However execution order will + // make sure that `h` is ready before the copy is actually done. + CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); + tensorflow::TensorHandle* output = node->dst(); + // Note that calling Add makes `node` accessible by the EagerExecutor + // thread. So further accesses need to be thread-safe. + ctx->context.ExecutorAdd(node); + return output; + } else { + tensorflow::TensorHandle* output = nullptr; + status->status = TensorHandleCopyToDevice(h, ctx, dstd, &output); + return output; + } +} + tensorflow::Status ValidateInputTypeAndPlacement( TFE_Context* ctx, tensorflow::Device* host_device, tensorflow::Device* op_device, TFE_Op* op, @@ -518,7 +594,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( for (int i = 0; i < op->inputs.size(); ++i) { const tensorflow::Device* expected_device = memtypes[i] == tensorflow::HOST_MEMORY ? host_device : op_device; - TFE_TensorHandle* handle = op->inputs[i]; + tensorflow::TensorHandle* handle = op->inputs[i]; tensorflow::Device* handle_device = nullptr; TF_RETURN_IF_ERROR(handle->Device(&handle_device)); const tensorflow::Device* actual_device = @@ -560,8 +636,9 @@ tensorflow::Status ValidateInputTypeAndPlacement( // We are only here if the policy is warn or silent copies, so we should // trigger a copy. TF_Status* s = TF_NewStatus(); - TFE_TensorHandle* copied_tensor = TFE_TensorHandleCopyToDevice( - handle, ctx, expected_device->name().c_str(), s); + tensorflow::TensorHandle* copied_tensor = + TFE_TensorHandleCopyToDevice_Internal( + handle, ctx, expected_device->name().c_str(), s); tensorflow::Status status = s->status; TF_DeleteStatus(s); if (!status.ok()) { @@ -616,9 +693,10 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, tensorflow::Status Execute( TFE_Context* ctx, tensorflow::Device* device, - const tensorflow::gtl::InlinedVector& op_inputs, + const tensorflow::gtl::InlinedVector& + op_inputs, tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, - TFE_TensorHandle** retvals, int num_retvals) { + tensorflow::TensorHandle** retvals, int num_retvals) { if (!ctx->context.SoftPlacement() && device == nullptr) { device = ctx->context.HostCPU(); } @@ -683,7 +761,7 @@ tensorflow::Status Execute( d = nullptr; } if (retvals[i] == nullptr) { - retvals[i] = new TFE_TensorHandle(outputs[i], d, op_device); + retvals[i] = new tensorflow::TensorHandle(outputs[i], d, op_device); } else { retvals[i]->SetTensorAndDevice(outputs[i], d, op_device); } @@ -711,9 +789,10 @@ class ExecuteNode : public tensorflow::EagerNode { } TFE_Context* ctx = op->ctx; for (int i = 0; i < num_retvals; ++i) { - TFE_TensorHandle* h = new TFE_TensorHandle(id, output_dtypes[i], ctx); + tensorflow::TensorHandle* h = + new tensorflow::TensorHandle(id, output_dtypes[i], &ctx->context); h->Ref(); - retvals[i] = h; + retvals[i] = new TFE_TensorHandle(h); retvals_[i] = h; } } @@ -745,54 +824,12 @@ class ExecuteNode : public tensorflow::EagerNode { private: TFE_Context* ctx_; tensorflow::Device* op_device_; - tensorflow::gtl::InlinedVector inputs_; + tensorflow::gtl::InlinedVector inputs_; tensorflow::KernelAndDevice* kernel_; std::unique_ptr maybe_stats_; - tensorflow::gtl::InlinedVector retvals_; + tensorflow::gtl::InlinedVector retvals_; }; -class CopyToDeviceNode : public tensorflow::EagerNode { - public: - CopyToDeviceNode(TFE_TensorHandle* src, tensorflow::Device* dstd, - TFE_Context* ctx) - : tensorflow::EagerNode(ctx->context.NextId()), - src_(src), - dstd_(dstd), - ctx_(ctx), - dst_(new TFE_TensorHandle(id, src_->dtype, ctx)) { - src_->Ref(); - dst_->Ref(); - } - - ~CopyToDeviceNode() override { - src_->Unref(); - dst_->Unref(); - } - - tensorflow::Status Run() override { - TFE_TensorHandle* temp = nullptr; - TF_RETURN_IF_ERROR(TensorHandleCopyToDevice(src_, ctx_, dstd_, &temp)); - const tensorflow::Tensor* tensor = nullptr; - tensorflow::Device* device = nullptr; - tensorflow::Device* op_device = nullptr; - tensorflow::Status status = - temp->TensorAndDevice(&tensor, &device, &op_device); - // `temp` is a ready handle. So the following call should return OK. - TF_DCHECK_OK(status) << status.error_message(); - DCHECK(tensor); - dst_->SetTensorAndDevice(*tensor, device, op_device); - temp->Unref(); - return tensorflow::Status::OK(); - } - - TFE_TensorHandle* dst() { return dst_; } - - private: - TFE_TensorHandle* src_; - tensorflow::Device* dstd_; - TFE_Context* ctx_; - TFE_TensorHandle* dst_; -}; #ifdef TENSORFLOW_EAGER_USE_XLA // Synthesizes and returns a wrapper function over `op`, which must be a @@ -1140,11 +1177,14 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to // allocate it. + std::vector handle_retvals(*num_retvals, + nullptr); + status->status = + Execute(op->ctx, op->device, op->inputs, kernel, maybe_stats.get(), + handle_retvals.data(), *num_retvals); for (int i = 0; i < *num_retvals; ++i) { - retvals[i] = nullptr; + retvals[i] = new TFE_TensorHandle(handle_retvals[i]); } - status->status = Execute(op->ctx, op->device, op->inputs, kernel, - maybe_stats.get(), retvals, *num_retvals); } } @@ -1152,30 +1192,12 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status) { - status->status = ctx->context.GetStatus(); - if (!status->status.ok()) { - return nullptr; - } - tensorflow::Device* dstd = ctx->context.HostCPU(); - if (device_name != nullptr && strlen(device_name) > 0) { - status->status = - ctx->context.device_mgr()->LookupDevice(device_name, &dstd); - if (!status->status.ok()) return nullptr; - } - if (ctx->context.Async()) { - // Note that `h` may not be currently ready. However execution order will - // make sure that `h` is ready before the copy is actually done. - CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); - TFE_TensorHandle* output = node->dst(); - // Note that calling Add makes `node` accessible by the EagerExecutor - // thread. So further accesses need to be thread-safe. - ctx->context.ExecutorAdd(node); - return output; - } else { - TFE_TensorHandle* output = nullptr; - status->status = TensorHandleCopyToDevice(h, ctx, dstd, &output); - return output; + tensorflow::TensorHandle* handle = TFE_TensorHandleCopyToDevice_Internal( + h->handle, ctx, device_name, status); + if (status->status.ok()) { + return new TFE_TensorHandle(handle); } + return nullptr; } void TFE_ContextAddFunctionDef(TFE_Context* ctx, @@ -1214,7 +1236,7 @@ const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( tensorflow::Device* d = nullptr; tensorflow::Device* op_device = nullptr; const tensorflow::Tensor* t = nullptr; - status->status = h->TensorAndDevice(&t, &d, &op_device); + status->status = h->handle->TensorAndDevice(&t, &d, &op_device); if (!status->status.ok()) return nullptr; if (d != nullptr) { status->status = tensorflow::errors::FailedPrecondition( @@ -1306,70 +1328,8 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } // namespace tensorflow - -bool TFE_TensorHandle::IsReady() { - if (node_id == 0) return true; - tensorflow::mutex_lock l(ctx_mutex_); - return ctx_ == nullptr; -} - -tensorflow::Status TFE_TensorHandle::WaitReady() { - if (node_id == 0) return tensorflow::Status::OK(); - tensorflow::EagerExecutor* executor = nullptr; - { - tensorflow::mutex_lock l(ctx_mutex_); - if (ctx_ == nullptr) return tensorflow::Status::OK(); - executor = ctx_->context.Executor(); - } - return executor->WaitFor(node_id); -} - -tensorflow::Status TFE_TensorHandle::Tensor(const tensorflow::Tensor** t) { - TF_RETURN_IF_ERROR(WaitReady()); - DCHECK(IsReady()); - *t = &tensor_; - return tensorflow::Status::OK(); -} - -tensorflow::Status TFE_TensorHandle::Device(tensorflow::Device** d) { - TF_RETURN_IF_ERROR(WaitReady()); - DCHECK(IsReady()); - *d = device_; - return tensorflow::Status::OK(); -} - -tensorflow::Status TFE_TensorHandle::OpDevice(tensorflow::Device** d) { - TF_RETURN_IF_ERROR(WaitReady()); - DCHECK(IsReady()); - *d = op_device_; - return tensorflow::Status::OK(); -} - -tensorflow::Status TFE_TensorHandle::TensorAndDevice( - const tensorflow::Tensor** tensor, tensorflow::Device** device, - tensorflow::Device** op_device) { - TF_RETURN_IF_ERROR(WaitReady()); - DCHECK(IsReady()); - *tensor = &tensor_; - *device = device_; - *op_device = op_device_; - return tensorflow::Status::OK(); -} - -void TFE_TensorHandle::SetTensorAndDevice(const tensorflow::Tensor& tensor, - tensorflow::Device* device, - tensorflow::Device* op_device) { - tensorflow::mutex_lock l(ctx_mutex_); - DCHECK(node_id > 0 && ctx_) << "SetTensorAndDevice should be only called " - << "on non-ready handles."; - ctx_ = nullptr; - tensor_ = tensor; - device_ = device; - op_device_ = op_device; -} - TFE_Op::~TFE_Op() { - for (TFE_TensorHandle* h : inputs) { + for (tensorflow::TensorHandle* h : inputs) { h->Unref(); } } diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 5b29120b40..e6d2ab75ff 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -67,84 +68,18 @@ struct TFE_Context { tensorflow::EagerContext context; }; -struct TFE_TensorHandle : public tensorflow::core::RefCounted { - public: +struct TFE_TensorHandle { TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d, tensorflow::Device* op_device) - : dtype(t.dtype()), - node_id(0), - tensor_(t), - device_(d), - op_device_(op_device), - ctx_(nullptr) {} + : handle(new tensorflow::TensorHandle(t, d, op_device)) {} TFE_TensorHandle(tensorflow::uint64 node_id, tensorflow::DataType dtype, - TFE_Context* ctx) - : dtype(dtype), - node_id(node_id), - tensor_(dtype), - device_(nullptr), - op_device_(nullptr), - ctx_(ctx) { - DCHECK_GT(node_id, 0); - } - - ~TFE_TensorHandle() override {} - - tensorflow::Status Tensor(const tensorflow::Tensor** t); - - tensorflow::Status Device(tensorflow::Device** d); - - tensorflow::Status OpDevice(tensorflow::Device** d); - - tensorflow::Status TensorAndDevice(const tensorflow::Tensor** tensor, - tensorflow::Device** device, - tensorflow::Device** op_device); - - // Note that this can be called at most once, and only on non-ready handles, - // and makes them ready. - void SetTensorAndDevice(const tensorflow::Tensor& tensor, - tensorflow::Device* device, - tensorflow::Device* op_device); - - // dtype for the handle. It must be the same as t.dtype() once the handle is - // ready. - const tensorflow::DataType dtype; - - private: - // If the contents of the Tensor pointed to by this handle is yet to be - // computed by a EagerNode, this function will block till that compuatation is - // done and the handle is "ready". - tensorflow::Status WaitReady(); - - bool IsReady(); - - // Id for the EagerNode that will compute the value pointed to by this handle. - // If the value is 0, the handle is already ready, but not vice-versa. - const tensorflow::uint64 node_id; - - tensorflow::Tensor tensor_; - - // TODO(ashankar): device_ == nullptr iff local CPU - // This was expedient, but perhaps worth revisiting ('device_' should always - // be a valid pointer?) - // This can be done if TFE_NewOp() and the TFE_TensorHandle constructors are - // provided with the appropriate TFE_Context. - // - // TODO(ashankar): Reference count TFE_Context to ensure that 'device_' of a - // TFE_TensorHandle does not outlive the TFE_Context from which it came? - tensorflow::Device* device_; - - // Device in which the op producing this tensor was executed. Equals to - // device_ for constant tensors. - tensorflow::Device* op_device_; - - tensorflow::mutex ctx_mutex_; - - // `ctx` is only guaranteed to be set if the handle is not "ready". This is - // typically true when the handle was produced during async execution. - // `ctx` object is not owned and should outlive this handle. - TFE_Context* ctx_ GUARDED_BY(ctx_mutex_); + tensorflow::EagerContext* ctx) + : handle(new tensorflow::TensorHandle(node_id, dtype, ctx)) {} + + TFE_TensorHandle(tensorflow::TensorHandle* handle) : handle(handle) {} + + tensorflow::TensorHandle* handle; }; struct TFE_Op { @@ -161,7 +96,7 @@ struct TFE_Op { const tensorflow::string name; tensorflow::AttrBuilder attrs; const tensorflow::AttrTypeMap* attr_types; - tensorflow::gtl::InlinedVector inputs; + tensorflow::gtl::InlinedVector inputs; tensorflow::Device* device; bool use_xla = false; }; diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index de10b10b7e..02fb83200a 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -54,6 +54,29 @@ tf_cuda_library( ], ) +tf_cuda_library( + name = "tensor_handle", + srcs = [ + "tensor_handle.cc", + ], + hdrs = [ + "tensor_handle.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + ":context", + ":eager_executor", + ":kernel_and_device", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + ], +) + tf_cuda_library( name = "kernel_and_device", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc new file mode 100644 index 0000000000..5bc1700627 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -0,0 +1,107 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +bool TensorHandle::IsReady() { + if (node_id == 0) return true; + mutex_lock l(ctx_mutex_); + return ctx_ == nullptr; +} + +Status TensorHandle::WaitReady() { + if (node_id == 0) return Status::OK(); + EagerExecutor* executor = nullptr; + { + mutex_lock l(ctx_mutex_); + if (ctx_ == nullptr) return Status::OK(); + executor = ctx_->Executor(); + } + return executor->WaitFor(node_id); +} + +Status TensorHandle::Tensor(const tensorflow::Tensor** t) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *t = &tensor_; + return Status::OK(); +} + +Status TensorHandle::Device(tensorflow::Device** d) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *d = device_; + return Status::OK(); +} + +Status TensorHandle::OpDevice(tensorflow::Device** d) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *d = op_device_; + return Status::OK(); +} + +Status TensorHandle::TensorAndDevice(const tensorflow::Tensor** tensor, + tensorflow::Device** device, + tensorflow::Device** op_device) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *tensor = &tensor_; + *device = device_; + *op_device = op_device_; + return Status::OK(); +} + +void TensorHandle::SetTensorAndDevice(const tensorflow::Tensor& tensor, + tensorflow::Device* device, + tensorflow::Device* op_device) { + mutex_lock l(ctx_mutex_); + DCHECK(node_id > 0 && ctx_) << "SetTensorAndDevice should be only called " + << "on non-ready handles."; + ctx_ = nullptr; + tensor_ = tensor; + device_ = device; + op_device_ = op_device; +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h new file mode 100644 index 0000000000..97e67e4652 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -0,0 +1,130 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_TENSOR_HANDLE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_TENSOR_HANDLE_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +// Associates a Tensor and a Device, used in the eager runtime. Internal version +// executor_of the TFE_TensorHandle struct and the python EagerTensor class +// (unrelated to python TensorHandle). +class TensorHandle : public core::RefCounted { + public: + TensorHandle(const Tensor& t, Device* d, Device* op_device) + : dtype(t.dtype()), + node_id(0), + tensor_(t), + device_(d), + op_device_(op_device), + ctx_(nullptr) {} + + TensorHandle(uint64 node_id, DataType dtype, EagerContext* ctx) + : dtype(dtype), + node_id(node_id), + tensor_(dtype), + device_(nullptr), + op_device_(nullptr), + ctx_(ctx) { + DCHECK_GT(node_id, 0); + } + + ~TensorHandle() override {} + + Status Tensor(const tensorflow::Tensor** t); + + Status Device(tensorflow::Device** d); + + Status OpDevice(tensorflow::Device** d); + + Status TensorAndDevice(const tensorflow::Tensor** tensor, + tensorflow::Device** device, + tensorflow::Device** op_device); + + // Note that this can be called at most once, and only on non-ready handles, + // and makes them ready. + void SetTensorAndDevice(const tensorflow::Tensor& tensor, + tensorflow::Device* device, + tensorflow::Device* op_device); + + // dtype for the handle. It must be the same as t.dtype() once the handle is + // ready. + const DataType dtype; + + private: + // If the contents of the Tensor pointed to by this handle is yet to be + // computed by a EagerNode, this function will block till that compuatation is + // done and the handle is "ready". + Status WaitReady(); + + bool IsReady(); + + // Id for the EagerNode that will compute the value pointed to by this handle. + // If the value is 0, the handle is already ready, but not vice-versa. + const uint64 node_id; + + tensorflow::Tensor tensor_; + + // TODO(ashankar): device_ == nullptr iff local CPU + // This was expedient, but perhaps worth revisiting ('device_' should always + // be a valid pointer?) + // This can be done if TFE_NewOp() and the TFE_TensorHandle constructors are + // provided with the appropriate TFE_Context. + // + // TODO(ashankar): Reference count TFE_Context to ensure that 'device_' of a + // TFE_TensorHandle does not outlive the TFE_Context from which it came? + tensorflow::Device* device_; + + // Device in which the op producing this tensor was executed. Equals to + // device_ for constant tensors. + tensorflow::Device* op_device_; + + mutex ctx_mutex_; + + // `ctx` is only guaranteed to be set if the handle is not "ready". This is + // typically true when the handle was produced during async execution. + // `ctx` object is not owned and should outlive this handle. + EagerContext* ctx_ GUARDED_BY(ctx_mutex_); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_TENSOR_HANDLE_H_ diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 701f68b8f7..55ba509065 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1013,12 +1013,13 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { TFE_TensorHandle* t = EagerTensor_Handle(tensor); tensorflow::int64 id = EagerTensor_id(tensor); const tensorflow::Tensor* tensor = nullptr; - const tensorflow::Status status = t->Tensor(&tensor); + const tensorflow::Status status = t->handle->Tensor(&tensor); if (MaybeRaiseExceptionFromStatus(status, nullptr)) { - return tensorflow::eager::TapeTensor{id, t->dtype, + return tensorflow::eager::TapeTensor{id, t->handle->dtype, tensorflow::TensorShape({})}; } else { - return tensorflow::eager::TapeTensor{id, t->dtype, tensor->shape()}; + return tensorflow::eager::TapeTensor{id, t->handle->dtype, + tensor->shape()}; } } tensorflow::int64 id = FastTensorId(tensor); diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 02eafd42b3..22317a348c 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -166,7 +166,7 @@ bool IsSingleNone(PyObject* obj) { // Retrieves a Tensor from `eager_tensor` and stores it in `output_tensor`. tensorflow::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor, const Tensor** output_tensor) { - return EagerTensor_Handle(eager_tensor)->Tensor(output_tensor); + return EagerTensor_Handle(eager_tensor)->handle->Tensor(output_tensor); } // Calls the registered py function through the trampoline. -- GitLab From 80a878dddcb04512324cb729a4ef5c92510e01a3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 13:47:03 -0700 Subject: [PATCH 445/960] [XLA] Implement the whole graph execution interface and make a test use XlaBuilder. - Add Client::ExecuteGraph. - Make client_library_test_base also (partially) support XlaBuilder by using template. - Make one testcase in the axpy_simple_test use XlaBuilder. The test was slightly changed because currently the builder does not expend implicit broadcast automatically. PiperOrigin-RevId: 190268658 --- tensorflow/compiler/xla/client/BUILD | 1 + tensorflow/compiler/xla/client/client.cc | 51 ++++ tensorflow/compiler/xla/client/client.h | 27 +++ tensorflow/compiler/xla/client/local_client.h | 2 + .../compiler/xla/client/xla_client/BUILD | 13 + .../xla/client/xla_client/xla_builder.cc | 222 +++++++++++++++--- .../xla/client/xla_client/xla_builder.h | 59 +++-- .../xla/client/xla_client/xla_builder_test.cc | 54 ++++- .../xla/client/xla_client/xla_computation.cc | 26 ++ .../xla/client/xla_client/xla_computation.h | 55 +++++ .../xla/service/compile_only_service.cc | 2 +- tensorflow/compiler/xla/service/hlo_module.cc | 7 +- .../compiler/xla/service/local_service.cc | 7 +- tensorflow/compiler/xla/service/service.cc | 67 +++++- tensorflow/compiler/xla/service/service.h | 4 +- tensorflow/compiler/xla/tests/BUILD | 2 + .../compiler/xla/tests/axpy_simple_test.cc | 9 +- .../xla/tests/client_library_test_base.cc | 74 +++++- .../xla/tests/client_library_test_base.h | 52 ++-- 19 files changed, 619 insertions(+), 115 deletions(-) create mode 100644 tensorflow/compiler/xla/client/xla_client/xla_computation.cc create mode 100644 tensorflow/compiler/xla/client/xla_client/xla_computation.h diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD index 02356699a2..5094e5ce67 100644 --- a/tensorflow/compiler/xla/client/BUILD +++ b/tensorflow/compiler/xla/client/BUILD @@ -74,6 +74,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla:xla_proto", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/legacy_flags:debug_options_flags", "//tensorflow/compiler/xla/service:session_proto", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index d15ccb0c28..5ce3c45528 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -177,6 +177,22 @@ StatusOr> Client::ExecuteAndTransfer( return Transfer(*data, shape_with_output_layout); } +StatusOr> Client::ExecuteAndTransfer( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const ExecutionOptions* execution_options, + ExecutionProfile* execution_profile) { + TF_ASSIGN_OR_RETURN( + std::unique_ptr data, + Execute(computation, arguments, execution_options, execution_profile)); + + const Shape* shape_with_output_layout = nullptr; + if (execution_options && execution_options->has_shape_with_output_layout()) { + shape_with_output_layout = &execution_options->shape_with_output_layout(); + } + return Transfer(*data, shape_with_output_layout); +} + StatusOr Client::LoadSnapshot(const SessionModule& module) { LoadComputationSnapshotRequest request; *request.mutable_module() = module; @@ -231,6 +247,41 @@ StatusOr> Client::Execute( return MakeUnique(stub_, response.output()); } +StatusOr> Client::Execute( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const ExecutionOptions* execution_options, + ExecutionProfile* execution_profile) { + ExecuteGraphRequest request; + *request.mutable_computation() = computation.proto(); + + if (execution_options == nullptr) { + *request.mutable_execution_options() = CreateDefaultExecutionOptions(); + } else { + *request.mutable_execution_options() = *execution_options; + } + for (GlobalData* argument : arguments) { + CHECK(argument != nullptr) << "Argument pointers must not be null."; + *request.add_arguments() = argument->handle(); + } + + ExecuteResponse response; + VLOG(1) << "making execute request: " << request.ShortDebugString(); + Status s = stub_->ExecuteGraph(&request, &response); + VLOG(1) << "done with request"; + + if (!s.ok()) { + return s; + } + + if (execution_profile != nullptr) { + *execution_profile = response.profile(); + // TODO(b/74197823): Get execution stats for the graph and VLOG(1) them. + } + + return MakeUnique(stub_, response.output()); +} + StatusOr>> Client::ExecuteParallel( tensorflow::gtl::ArraySlice computations) { ExecuteParallelRequest request; diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index c28380b689..ec87646ebf 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/session.pb.h" #include "tensorflow/compiler/xla/service_interface.h" @@ -57,6 +58,21 @@ class Client { const ExecutionOptions* execution_options = nullptr, ExecutionProfile* execution_profile = nullptr); + // Executes the computation with the given arguments and returns the global + // data that was produced from the execution. + // * If execution_options is not nullptr, these options are passed to the + // service to affect how it compiles our computation. (The pointer does not + // need to live beyond this call.) + // * If execution_profile is not nullptr then the pointed-to ExecutionProfile + // will be filled with profile data from the execution. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> Execute( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const ExecutionOptions* execution_options = nullptr, + ExecutionProfile* execution_profile = nullptr); + // A struct to represent a computation instance to be executed. // * If execution_options.device_handles is not empty, the computation is // executed on the devices associated with the handles by partitioning the @@ -137,6 +153,17 @@ class Client { const ExecutionOptions* execution_options = nullptr, ExecutionProfile* execution_profile = nullptr); + // Executes the computation with the given arguments and transfers the result + // to the client as a literal. Parameters are defined the same as for + // Execute() and Transfer(). + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> ExecuteAndTransfer( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const ExecutionOptions* execution_options = nullptr, + ExecutionProfile* execution_profile = nullptr); + // Unregister the memory for the given GlobalData on the device. Status Unregister(const GlobalData& data); diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index de0ed13c43..2e5d85ba68 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -123,6 +123,8 @@ class LocalClient : public Client { const tensorflow::gtl::ArraySlice argument_layouts, const ExecutableBuildOptions& options); + // TODO(b/74197823): Add a overload of Compile for XlaComputation. + // Copy the literal data to the device with the given ordinal and return as a // ScopedShapedBuffer. If non-null the given memory allocator is used for // device memory allocation. If null, the default memory allocator for the diff --git a/tensorflow/compiler/xla/client/xla_client/BUILD b/tensorflow/compiler/xla/client/xla_client/BUILD index 69df15c988..cc5f551c9c 100644 --- a/tensorflow/compiler/xla/client/xla_client/BUILD +++ b/tensorflow/compiler/xla/client/xla_client/BUILD @@ -25,12 +25,25 @@ filegroup( load("//tensorflow:tensorflow.bzl", "tf_cc_test") +cc_library( + name = "xla_computation", + srcs = ["xla_computation.cc"], + hdrs = ["xla_computation.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_proto", + "//tensorflow/core:lib", + ], +) + # TODO(b/74197823): Replace computation_builder with xla_builder. cc_library( name = "xla_builder", srcs = ["xla_builder.cc"], hdrs = ["xla_builder.h"], deps = [ + ":xla_computation", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index ec646cabe9..90f2b2d73a 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include #include #include @@ -80,40 +81,32 @@ void XlaBuilder::NoteError(const Status& error) { } } -StatusOr XlaBuilder::Build() { - if (!first_error_.ok()) { - string backtrace; - first_error_backtrace_.Dump(tensorflow::DebugWriteToString, &backtrace); - return AppendStatus(first_error_, backtrace); - } - - HloComputationProto entry; - ProgramShape* program_shape = entry.mutable_program_shape(); - - entry.set_name(name_); +StatusOr XlaBuilder::GetProgramShape(int64* root_id) { + TF_RET_CHECK(root_id != nullptr); + ProgramShape program_shape; // Not all instructions can be roots. Walk backwards from the last added // instruction until a valid root is found. - entry.set_root_id(-1); - for (int64 i = instructions_.size() - 1; i >= 0; i--) { + int64 index = instructions_.size() - 1; + for (; index >= 0; index--) { TF_ASSIGN_OR_RETURN(HloOpcode opcode, - StringToHloOpcode(instructions_[i].opcode())); + StringToHloOpcode(instructions_[index].opcode())); if (CanBeRoot(opcode)) { - entry.set_root_id(instructions_[i].id()); - *program_shape->mutable_result() = instructions_[i].shape(); break; } } - if (entry.root_id() == -1) { + if (index < 0) { return FailedPrecondition("no root instruction was found"); } + *root_id = instructions_[index].id(); + *program_shape.mutable_result() = instructions_[index].shape(); // Check that the parameter numbers are continuous from 0, and add parameter // shapes and names to the program shape. const int64 param_count = parameter_numbers_.size(); for (int64 i = 0; i < param_count; i++) { - program_shape->add_parameters(); - program_shape->add_parameter_names(); + program_shape.add_parameters(); + program_shape.add_parameter_names(); } for (const HloInstructionProto& instr : instructions_) { // Parameter number uniqueness is guaranteed in XlaBuilder::Parameter(). So @@ -123,10 +116,35 @@ StatusOr XlaBuilder::Build() { const int64 index = instr.parameter_number(); TF_RET_CHECK(index >= 0 && index < param_count) << "invalid parameter number: " << index; - *program_shape->mutable_parameters(index) = instr.shape(); - *program_shape->mutable_parameter_names(index) = instr.name(); + *program_shape.mutable_parameters(index) = instr.shape(); + *program_shape.mutable_parameter_names(index) = instr.name(); } } + return program_shape; +} + +StatusOr XlaBuilder::GetProgramShape() { + int64 root_id; + return GetProgramShape(&root_id); +} + +StatusOr XlaBuilder::Build() { + if (!first_error_.ok()) { + string backtrace; + first_error_backtrace_.Dump(tensorflow::DebugWriteToString, &backtrace); + return AppendStatus(first_error_, backtrace); + } + + HloComputationProto entry; + entry.set_name(name_); + + { + int64 root_id; + ProgramShape program_shape; + TF_ASSIGN_OR_RETURN(program_shape, GetProgramShape(&root_id)); + entry.mutable_program_shape()->Swap(&program_shape); + entry.set_root_id(root_id); + } for (auto& instruction : instructions_) { entry.add_instructions()->Swap(&instruction); @@ -149,19 +167,120 @@ StatusOr XlaBuilder::Build() { return std::move(computation); } -XlaOp XlaBuilder::Add(const XlaOp& lhs, const XlaOp& rhs, - tensorflow::gtl::ArraySlice broadcast_dimensions) { - auto op = [&]() -> StatusOr { +StatusOr XlaBuilder::InDimBroadcast( + const Shape& shape, const XlaOp& operand, + tensorflow::gtl::ArraySlice broadcast_dimensions) { + HloInstructionProto instr; + *instr.mutable_shape() = shape; + for (int64 dim : broadcast_dimensions) { + instr.add_dimensions(dim); + } + return AddInstruction(std::move(instr), HloOpcode::kBroadcast, {operand}); +} + +StatusOr XlaBuilder::AddBroadcastSequence(const Shape& output_shape, + const XlaOp& operand) { + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); + + CHECK(ShapeUtil::IsScalar(operand_shape) || + ShapeUtil::Rank(operand_shape) == ShapeUtil::Rank(output_shape)); + Shape broadcast_shape = + ShapeUtil::ChangeElementType(output_shape, operand_shape.element_type()); + + // Do explicit broadcast for scalar. + if (ShapeUtil::IsScalar(operand_shape)) { + return InDimBroadcast(broadcast_shape, operand, {}); + } + + // Do explicit broadcast for degenerate broadcast. + std::vector broadcast_dimensions; + std::vector reshaped_dimensions; + for (int i = 0; i < ShapeUtil::Rank(operand_shape); i++) { + if (operand_shape.dimensions(i) == output_shape.dimensions(i)) { + broadcast_dimensions.push_back(i); + reshaped_dimensions.push_back(operand_shape.dimensions(i)); + } else { + TF_RET_CHECK(operand_shape.dimensions(i) == 1) + << "An explicit broadcast sequence requires the broadcasted " + "dimensions to be trivial; operand shape: " + << operand_shape << "; output_shape: " << output_shape; + } + } + // Eliminate the size one dimensions. + TF_ASSIGN_OR_RETURN(XlaOp reshaped_operand, + Reshape(ShapeUtil::MakeShape(operand_shape.element_type(), + reshaped_dimensions), + operand)); + // Broadcast 'reshape' up to the larger size. + return InDimBroadcast(broadcast_shape, reshaped_operand, + broadcast_dimensions); +} + +XlaOp XlaBuilder::BinaryOp( + HloOpcode binop, const XlaOp& lhs, const XlaOp& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions) { + return NoteErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, lhs.GetShape()); TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, rhs.GetShape()); - TF_ASSIGN_OR_RETURN( - *instr.mutable_shape(), - ShapeInference::InferBinaryOpShape(HloOpcode::kAdd, lhs_shape, - rhs_shape, broadcast_dimensions)); - return AddInstruction(std::move(instr), HloOpcode::kAdd, {lhs, rhs}); - }; - return NoteErrorOrReturn(op()); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferBinaryOpShape( + binop, lhs_shape, rhs_shape, broadcast_dimensions)); + + const int64 lhs_rank = ShapeUtil::Rank(lhs_shape); + const int64 rhs_rank = ShapeUtil::Rank(rhs_shape); + + XlaOp updated_lhs = lhs; + XlaOp updated_rhs = rhs; + + if (!broadcast_dimensions.empty() && lhs_rank != rhs_rank) { + const bool should_broadcast_lhs = lhs_rank < rhs_rank; + XlaOp from = should_broadcast_lhs ? lhs : rhs; + const Shape& from_shape = should_broadcast_lhs ? lhs_shape : rhs_shape; + + std::vector to_size; + for (int64 size : instr.shape().dimensions()) { + to_size.push_back(size); + } + for (int64 from_dim = 0; from_dim < ShapeUtil::Rank(from_shape); + from_dim++) { + int64 to_dim = broadcast_dimensions[from_dim]; + to_size[to_dim] = from_shape.dimensions(from_dim); + } + + const Shape& broadcasted_shape = + ShapeUtil::MakeShape(from_shape.element_type(), to_size); + TF_ASSIGN_OR_RETURN( + XlaOp broadcasted_operand, + InDimBroadcast(broadcasted_shape, from, broadcast_dimensions)); + + updated_lhs = should_broadcast_lhs ? broadcasted_operand : lhs; + updated_rhs = !should_broadcast_lhs ? broadcasted_operand : rhs; + } + + TF_ASSIGN_OR_RETURN(Shape updated_lhs_shape, updated_lhs.GetShape()); + if (!ShapeUtil::SameDimensions(instr.shape(), updated_lhs_shape)) { + TF_ASSIGN_OR_RETURN(updated_lhs, + AddBroadcastSequence(instr.shape(), updated_lhs)); + } + TF_ASSIGN_OR_RETURN(Shape updated_rhs_shape, updated_rhs.GetShape()); + if (!ShapeUtil::SameDimensions(instr.shape(), updated_rhs_shape)) { + TF_ASSIGN_OR_RETURN(updated_rhs, + AddBroadcastSequence(instr.shape(), updated_rhs)); + } + + return AddInstruction(std::move(instr), binop, {updated_lhs, updated_rhs}); + }()); +} + +XlaOp XlaBuilder::Add(const XlaOp& lhs, const XlaOp& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions) { + return BinaryOp(HloOpcode::kAdd, lhs, rhs, broadcast_dimensions); +} + +XlaOp XlaBuilder::Mul(const XlaOp& lhs, const XlaOp& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions) { + return BinaryOp(HloOpcode::kMultiply, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) { @@ -173,7 +292,7 @@ XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) { XlaOp XlaBuilder::Call(const XlaComputation& computation, tensorflow::gtl::ArraySlice operands) { - auto op = [&]() -> StatusOr { + return NoteErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; std::vector operand_shape_ptrs; std::vector operand_shapes; @@ -196,13 +315,12 @@ XlaOp XlaBuilder::Call(const XlaComputation& computation, } return AddInstruction(std::move(instr), HloOpcode::kCall, operands); - }; - return NoteErrorOrReturn(op()); + }()); } XlaOp XlaBuilder::Parameter(int64 parameter_number, const Shape& shape, const string& name) { - auto op = [&]() -> StatusOr { + return NoteErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; if (parameter_numbers_.find(parameter_number) != parameter_numbers_.end()) { return InvalidArgument("parameter %lld already registered", @@ -213,8 +331,37 @@ XlaOp XlaBuilder::Parameter(int64 parameter_number, const Shape& shape, instr.set_name(name); *instr.mutable_shape() = shape; return AddInstruction(std::move(instr), HloOpcode::kParameter); - }; - return NoteErrorOrReturn(op()); + }()); +} + +XlaOp XlaBuilder::Broadcast( + const XlaOp& operand, tensorflow::gtl::ArraySlice broadcast_sizes) { + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); + TF_ASSIGN_OR_RETURN( + const Shape& shape, + ShapeInference::InferBroadcastShape(operand_shape, broadcast_sizes)); + + // The client-level broadcast op just appends dimensions on the left (adds + // lowest numbered dimensions). The HLO broadcast instruction is more + // flexible and can add new dimensions anywhere. The instruction's + // dimensions field maps operand dimensions to dimensions in the broadcast + // output, so to append dimensions on the left the instruction's dimensions + // should just be the n highest dimension numbers of the output shape where + // n is the number of input dimensions. + const int64 operand_rank = ShapeUtil::Rank(operand_shape); + std::vector dimensions(operand_rank); + for (int i = 0; i < operand_rank; ++i) { + dimensions[i] = i + ShapeUtil::Rank(shape) - operand_rank; + } + return InDimBroadcast(shape, operand, dimensions); + }()); +} + +StatusOr XlaBuilder::Reshape(const Shape& shape, const XlaOp& operand) { + HloInstructionProto instr; + *instr.mutable_shape() = shape; + return AddInstruction(std::move(instr), HloOpcode::kReshape, {operand}); } XlaOp XlaBuilder::Slice(const XlaOp& operand, @@ -660,6 +807,7 @@ XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, } for (const auto& operand : operands) { instr.add_operand_ids(operand.handle()); + // TODO(b/74197823): Set metadata and sharding. } instructions_.push_back(instr); diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index 455ea3d9cc..407b2df274 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -25,6 +25,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" @@ -65,38 +66,6 @@ class XlaOp { XlaBuilder* builder_; // Not owned. }; -// The computation graph that the user builds up with the XlaBuilder. -// -// TODO(b/74197823): Replace xla::Computation with this one. -class XlaComputation { - public: - XlaComputation(const XlaComputation&) = delete; - XlaComputation& operator=(const XlaComputation&) = delete; - - XlaComputation(XlaComputation&& from) { *this = std::move(from); } - - XlaComputation& operator=(XlaComputation&& from) { - proto_ = std::move(from.proto()); - unique_id_ = from.unique_id_; - return *this; - } - - // Returns the "program shape" (parameter and return shapes) for this - // computation. - const ProgramShape& GetProgramShape() const { return proto_.program_shape(); } - - const HloModuleProto& proto() const { return proto_; } - - private: - // Creates a null Computation. - XlaComputation(const int64 unique_id) : unique_id_(unique_id) {} - HloModuleProto* mutable_proto() { return &proto_; } - friend class XlaBuilder; - - int64 unique_id_; - HloModuleProto proto_; -}; - // A convenient interface for building up computations. // // Thread-compatible. @@ -733,6 +702,9 @@ class XlaBuilder { // Returns the shape of the given op. StatusOr GetShape(const XlaOp& op) const; + // Returns the (inferred) result for the current computation's shape. + StatusOr GetProgramShape(); + private: XlaOp AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, tensorflow::gtl::ArraySlice operands = {}); @@ -756,6 +728,29 @@ class XlaBuilder { StatusOr LookUpInstruction(const XlaOp& op) const; + // Internal helper method that does the building for an arbitrary binary op. + // broadcast_dimensions specifies which dimensions to use for broadcasting + // when the operation is between tensors of different ranks. + XlaOp BinaryOp(HloOpcode binop, const XlaOp& lhs, const XlaOp& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions); + + StatusOr InDimBroadcast( + const Shape& shape, const XlaOp& operand, + tensorflow::gtl::ArraySlice broadcast_dimensions); + + // Internal helper method that creates a sequence of instructions that + // performs an explicit broadcast of the operand to the target shape. + StatusOr AddBroadcastSequence(const Shape& output_shape, + const XlaOp& operand); + + // Internal helper method for creating a Reshape op with the already inferred + // shape. + StatusOr Reshape(const Shape& shape, const XlaOp& operand); + + // Returns the (inferred) result for the program shape for the current + // computation and fills the root_id in the pointer. + StatusOr GetProgramShape(int64* root_id); + string name_; // Name to use for the built computation. // The first error encountered while building the computation. diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc index a400e4e78b..10d8fa1622 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc @@ -57,16 +57,16 @@ TEST_F(XlaBuilderTest, OnePlusTwo) { EXPECT_THAT(root, op::Add(op::Constant(), op::Constant())); } -TEST_F(XlaBuilderTest, ParamPlusConstant) { +TEST_F(XlaBuilderTest, ParamPlusConstantHasScalarBroadcast) { XlaBuilder b(TestName()); auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {3, 5}), "x"); b.Add(x, b.ConstantR0(1.0)); TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, op::Add(op::Parameter(), op::Constant())); + EXPECT_THAT(root, op::Add(op::Parameter(), op::Broadcast(op::Constant()))); } -TEST_F(XlaBuilderTest, ParamPlusParam) { +TEST_F(XlaBuilderTest, ParamPlusParamHasBroadcast) { XlaBuilder b(TestName()); const auto& x_shape = ShapeUtil::MakeShape(S32, {2, 4, 6}); const auto& y_shape = ShapeUtil::MakeShape(S32, {2, 4}); @@ -79,7 +79,7 @@ TEST_F(XlaBuilderTest, ParamPlusParam) { TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, op::Add(op::Parameter(0), op::Parameter(1))); + EXPECT_THAT(root, op::Add(op::Parameter(0), op::Broadcast(op::Parameter(1)))); } TEST_F(XlaBuilderTest, XPlusX) { @@ -133,5 +133,51 @@ TEST_F(XlaBuilderTest, Call) { op::Call(op::Constant(), op::Constant()))); } +TEST_F(XlaBuilderTest, BinopHasDegenerateBroadcast) { + XlaBuilder b(TestName()); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {1, 2, 3}), "x"); + auto y = b.Parameter(1, ShapeUtil::MakeShape(F32, {1, 2, 1}), "y"); + b.Add(x, y); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + + // Expected: + // + // x: f32[1,2,3] y: f32[1,2,1] + // | | + // | reshape: f32[1,2] + // | | + // | broadcast: f32[1,2,3] + // \ / + // add + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Add(op::Parameter(0), + op::Broadcast(op::Reshape(op::Parameter(1))))); +} + +TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) { + XlaBuilder b(TestName()); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {2, 3}), "x"); + auto y = b.Parameter(1, ShapeUtil::MakeShape(F32, {2, 1, 4}), "y"); + b.Add(x, y, /*broadcast_dimensions=*/{0, 1}); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + + // The binary operation has in-dim broadcast and degenerate broadcast, should + // first do the in-dim broadcast then convert the degnerate broadcast into a + // reshape and a broadcast. + // + // Expected: + // + // x: f32[2,3] y: f32[2,1,4] + // | | + // broadcast: f32[2,3,4] reshape: f32[2,4] + // | | + // | broadcast: f32[2,3,4] + // \ / + // add + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Add(op::Broadcast(op::Parameter(0)), + op::Broadcast(op::Reshape(op::Parameter(1))))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.cc b/tensorflow/compiler/xla/client/xla_client/xla_computation.cc new file mode 100644 index 0000000000..3681792eee --- /dev/null +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.cc @@ -0,0 +1,26 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" + +#include + +namespace xla { + +const ProgramShape& XlaComputation::GetProgramShape() const { + return proto_.program_shape(); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.h b/tensorflow/compiler/xla/client/xla_client/xla_computation.h new file mode 100644 index 0000000000..5b89747fdd --- /dev/null +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.h @@ -0,0 +1,55 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_XLA_CLIENT_XLA_COMPUTATION_H_ +#define TENSORFLOW_COMPILER_XLA_CLIENT_XLA_CLIENT_XLA_COMPUTATION_H_ + +#include + +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" + +namespace xla { + +// The computation graph that the user builds up with the XlaBuilder. +// +// TODO(b/74197823): Replace xla::Computation with this one. +class XlaComputation { + public: + XlaComputation(const XlaComputation&) = delete; + XlaComputation& operator=(const XlaComputation&) = delete; + + XlaComputation(XlaComputation&& from) = default; + + XlaComputation& operator=(XlaComputation&& from) = default; + + // Returns the "program shape" (parameter and return shapes) for this + // computation. + const ProgramShape& GetProgramShape() const; + const HloModuleProto& proto() const { return proto_; } + + private: + XlaComputation(const int64 unique_id) : unique_id_(unique_id) {} + HloModuleProto* mutable_proto() { return &proto_; } + friend class XlaBuilder; + + int64 unique_id_; + HloModuleProto proto_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_CLIENT_XLA_CLIENT_XLA_COMPUTATION_H_ diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index 6664496ab6..c83da9eddc 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -100,7 +100,7 @@ CompileOnlyService::CompileAheadOfTime( TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(*program_shape, instance.argument_layouts, - &execution_options, *user_computation)); + &execution_options, user_computation)); TF_ASSIGN_OR_RETURN(std::unique_ptr hlo_module, computation_tracker_.BuildHloModule( diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 2037764dae..595c531ccf 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -237,8 +237,8 @@ StatusOr> HloModule::CreateFromProto( for (int i = 0; i < expected_program_shape.parameters_size(); ++i) { const Shape& parameter_shape = module_config.entry_computation_layout().parameter_layout(i).shape(); - TF_RET_CHECK( - ShapeUtil::Equal(expected_program_shape.parameters(i), parameter_shape)) + TF_RET_CHECK(ShapeUtil::Compatible(expected_program_shape.parameters(i), + parameter_shape)) << "HloModuleConfig has different shape for parameter " << i << " than the HLO module. Expected: " << ShapeUtil::HumanStringWithLayout( @@ -247,7 +247,8 @@ StatusOr> HloModule::CreateFromProto( } const Shape& result_shape = module_config.entry_computation_layout().result_layout().shape(); - TF_RET_CHECK(ShapeUtil::Equal(expected_program_shape.result(), result_shape)) + TF_RET_CHECK( + ShapeUtil::Compatible(expected_program_shape.result(), result_shape)) << "HloModuleConfig has different result shape than the HLO module. " "Expected: " << ShapeUtil::HumanStringWithLayout(expected_program_shape.result()) diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 5690a89909..1e2d8eea58 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -146,10 +146,9 @@ StatusOr> LocalService::CompileExecutable( LayoutUtil::SetToDefaultLayout( execution_options.mutable_shape_with_output_layout()); } - TF_ASSIGN_OR_RETURN( - std::unique_ptr module_config, - CreateModuleConfig(*program_shape, argument_layouts, &execution_options, - *user_computation)); + TF_ASSIGN_OR_RETURN(std::unique_ptr module_config, + CreateModuleConfig(*program_shape, argument_layouts, + &execution_options, user_computation)); TF_ASSIGN_OR_RETURN( se::StreamExecutor * executor, diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 0becc9d8f8..04487a4795 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -272,7 +272,7 @@ StatusOr> Service::CreateModuleConfig( const ProgramShape& program_shape, tensorflow::gtl::ArraySlice argument_shapes, const ExecutionOptions* execution_options, - const UserComputation& user_computation) { + const UserComputation* user_computation) { auto config = MakeUnique(program_shape); auto* computation_layout = config->mutable_entry_computation_layout(); @@ -286,8 +286,15 @@ StatusOr> Service::CreateModuleConfig( // ProgramShape. if (!ShapeUtil::Compatible(*argument_shapes[i], program_shape.parameters(i))) { + if (user_computation == nullptr) { + return InvalidArgument( + "Argument does not match shape of computation parameter %d: want " + "%s, got %s", + i, ShapeUtil::HumanString(program_shape.parameters(i)).c_str(), + ShapeUtil::HumanString(*argument_shapes[i]).c_str()); + } return InvalidParameterArgument( - *user_computation.ParameterMetadata(i).value(), + *user_computation->ParameterMetadata(i).value(), "Argument does not match shape of computation parameter %d: want %s, " "got %s", i, ShapeUtil::HumanString(program_shape.parameters(i)).c_str(), @@ -330,7 +337,7 @@ StatusOr> Service::CreateModuleConfig( const ProgramShape& program_shape, tensorflow::gtl::ArraySlice arguments, const ExecutionOptions& execution_options, - const UserComputation& user_computation) { + const UserComputation* user_computation) { std::vector argument_shapes; for (const auto* arg : arguments) { argument_shapes.push_back(&arg->on_host_shape()); @@ -778,7 +785,7 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(*program_shape, replicated_arguments.front(), - request.execution_options(), *user_computation)); + request.execution_options(), user_computation)); VLOG(3) << "ExecuteParallel created HloModuleConfig computation layout: " << module_config->entry_computation_layout().ToString(); @@ -894,7 +901,7 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(*program_shape, replicated_arguments.front(), - arg->execution_options(), *user_computation)); + arg->execution_options(), user_computation)); VLOG(3) << "Execute created HloModuleConfig computation layout: " << module_config->entry_computation_layout().ToString(); @@ -935,9 +942,49 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, return tensorflow::Status::OK(); } -tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* /*arg*/, - ExecuteResponse* /*result*/) { - return Unimplemented("execute-graph is not yet implemented"); +tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, + ExecuteResponse* result) { + VLOG(1) << "running execute-graph request"; + + if (!arg->has_computation()) { + return InvalidArgument("computations may not be empty"); + } + + // TODO(b/74197823): Handle partitioning. + + TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_, + SingleComputationDeviceHandle())); + TF_ASSIGN_OR_RETURN( + std::vector> replicated_arguments, + ResolveAndValidateArguments(arg->arguments(), replicas)); + + TF_ASSIGN_OR_RETURN(const auto& config, + CreateModuleConfig(arg->computation().program_shape(), + replicated_arguments.front(), + arg->execution_options())); + + TF_ASSIGN_OR_RETURN(std::unique_ptr module, + HloModule::CreateFromProto(arg->computation(), *config)); + TF_RETURN_IF_ERROR(MaybeDumpHloModule(*module)); + + TF_ASSIGN_OR_RETURN(module, execute_backend_->compiler()->RunHloPasses( + std::move(module), + execute_backend_->default_stream_executor(), + /*device_allocator=*/nullptr)); + TF_ASSIGN_OR_RETURN( + std::unique_ptr executable, + execute_backend_->compiler()->RunBackend( + std::move(module), execute_backend_->default_stream_executor(), + /*device_allocator=*/nullptr)); + + TF_ASSIGN_OR_RETURN( + *result->mutable_output(), + ExecuteAndRegisterResult( + executable.get(), replicated_arguments, execute_backend_.get(), + "result of " + arg->computation().name(), result->mutable_profile())); + + VLOG(1) << "successfully completed 'execute-graph' request"; + return tensorflow::Status::OK(); } tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, @@ -967,7 +1014,7 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(*program_shape, replicated_arguments.front(), - arg->execution_options(), *user_computation)); + arg->execution_options(), user_computation)); VLOG(3) << "ExecuteAsync created HloModuleConfig computation layout: " << module_config->entry_computation_layout().ToString(); @@ -1268,7 +1315,7 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg, TF_ASSIGN_OR_RETURN(std::unique_ptr module_config, CreateModuleConfig(program_shape, {}, execution_options, - *user_computation)); + user_computation)); // Exclude dead parameter instructions for the purpose of computing constants. TF_ASSIGN_OR_RETURN( diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 96352d9096..a76bdd89c7 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -258,7 +258,7 @@ class Service : public ServiceInterface { const ProgramShape& program_shape, tensorflow::gtl::ArraySlice arguments, const ExecutionOptions& execution_options, - const UserComputation& user_computation); + const UserComputation* user_computation = nullptr); protected: friend class LocalExecutable; @@ -286,7 +286,7 @@ class Service : public ServiceInterface { const ProgramShape& program_shape, tensorflow::gtl::ArraySlice argument_shapes, const ExecutionOptions* execution_options, - const UserComputation& user_computation); + const UserComputation* user_computation = nullptr); // Builds an Executable for the given parameters. // diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 7fb7919674..e81e862c49 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -190,6 +190,7 @@ cc_library( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/core:lib", @@ -386,6 +387,7 @@ xla_test( deps = [ "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/tests/axpy_simple_test.cc b/tensorflow/compiler/xla/tests/axpy_simple_test.cc index 3f6fd7c65d..ec3b46acfe 100644 --- a/tensorflow/compiler/xla/tests/axpy_simple_test.cc +++ b/tensorflow/compiler/xla/tests/axpy_simple_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_macros.h" @@ -28,11 +29,11 @@ namespace { class AxpySimpleTest : public ClientLibraryTestBase {}; TEST_F(AxpySimpleTest, AxTenValues) { - ComputationBuilder builder(client_, "ax_10"); + XlaBuilder builder("ax_10"); auto alpha = builder.ConstantR0(3.1415926535); auto x = builder.ConstantR1( {-1.0, 1.0, 2.0, -2.0, -3.0, 3.0, 4.0, -4.0, -5.0, 5.0}); - auto ax = builder.Mul(alpha, x); + builder.Mul(alpha, x); std::vector expected = { -3.14159265, 3.14159265, 6.28318531, -6.28318531, -9.42477796, @@ -46,7 +47,7 @@ XLA_TEST_F(AxpySimpleTest, AxpyZeroValues) { auto x = builder.ConstantR1({}); auto y = builder.ConstantR1({}); auto ax = builder.Mul(alpha, x); - auto axpy = builder.Add(ax, y); + builder.Add(ax, y); std::vector expected = {}; ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); @@ -60,7 +61,7 @@ TEST_F(AxpySimpleTest, AxpyTenValues) { auto y = builder.ConstantR1( {5.0, -5.0, -4.0, 4.0, 3.0, -3.0, -2.0, 2.0, 1.0, -1.0}); auto ax = builder.Mul(alpha, x); - auto axpy = builder.Add(ax, y); + builder.Add(ax, y); TF_ASSERT_OK_AND_ASSIGN(ProgramShape shape, builder.GetProgramShape()); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index a677986cd9..3cae51576f 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -95,6 +95,20 @@ StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( &execution_options); } +StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_output_layout) { + ExecutionOptions execution_options = execution_options_; + if (shape_with_output_layout != nullptr) { + *execution_options.mutable_shape_with_output_layout() = + *shape_with_output_layout; + } + return client_->ExecuteAndTransfer(computation, arguments, + &execution_options); +} + +template <> StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments, @@ -104,6 +118,15 @@ StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( return ExecuteAndTransfer(computation, arguments, shape_with_output_layout); } +template <> +StatusOr> ClientLibraryTestBase::ExecuteAndTransfer( + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_output_layout) { + // Build the computation, as a convenience. + TF_ASSIGN_OR_RETURN(auto computation, builder->Build()); + return ExecuteAndTransfer(computation, arguments, shape_with_output_layout); +} + std::unique_ptr ClientLibraryTestBase::ExecuteOrDie( ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments) { @@ -142,16 +165,18 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments); } +template void ClientLibraryTestBase::ComputeAndCompareLiteral( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_layout) { EXPECT_IS_OK(ComputeAndCompareLiteralWithStatus(builder, expected, arguments, shape_with_layout)); } +template void ClientLibraryTestBase::ComputeAndCompareLiteral( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout) { EXPECT_IS_OK(ComputeAndCompareLiteralWithStatus(builder, expected, arguments, @@ -249,8 +274,28 @@ ClientLibraryTestBase::ComputeAndCompareLiteralWithAllInputLayouts( return choose(0); } +tensorflow::Status +ClientLibraryTestBase::ComputeAndCompareLiteralWithAllOutputLayouts( + const xla::XlaComputation& /*computation*/, const Literal& /*expected*/, + tensorflow::gtl::ArraySlice /*arguments*/, + const std::function& /*verify_output*/) { + return Unimplemented("not yet implemented for XlaComputation"); +} + +tensorflow::Status +ClientLibraryTestBase::ComputeAndCompareLiteralWithAllInputLayouts( + const xla::XlaComputation& /*computation*/, const Literal& /*expected*/, + tensorflow::gtl::ArraySlice /*arguments*/, + const std::function& /*verify_output*/, + const Shape* /*output_with_layout*/) { + return Unimplemented("not yet implemented for XlaComputation"); +} + +template tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments_passed_in, const Shape* shape_with_layout) { std::vector arguments(arguments_passed_in.begin(), @@ -307,8 +352,9 @@ tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( return tensorflow::Status::OK(); } +template tensorflow::Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments_passed_in, ErrorSpec error, const Shape* shape_with_layout) { std::vector arguments(arguments_passed_in.begin(), @@ -563,4 +609,24 @@ ComputationDataHandle ClientLibraryTestBase::CreateConstantFromLiteral( use_bfloat16_ ? *LiteralTestUtil::ConvertF32ToBF16(literal) : literal); } +template void ClientLibraryTestBase::ComputeAndCompareLiteral( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_layout); + +template void ClientLibraryTestBase::ComputeAndCompareLiteral( + XlaBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_layout); + +template void ClientLibraryTestBase::ComputeAndCompareLiteral( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, ErrorSpec error, + const Shape* shape_with_layout); + +template void ClientLibraryTestBase::ComputeAndCompareLiteral( + XlaBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, ErrorSpec error, + const Shape* shape_with_layout); + } // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index ba0319990b..b553beb01a 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -94,15 +95,22 @@ class ClientLibraryTestBase : public ::testing::Test { StatusOr> Execute( ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments); + + template StatusOr> ExecuteAndTransfer( - ComputationBuilder* builder, - tensorflow::gtl::ArraySlice arguments, + BuilderT* builder, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_output_layout = nullptr); + StatusOr> ExecuteAndTransfer( const Computation& computation, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_output_layout = nullptr); + StatusOr> ExecuteAndTransfer( + const XlaComputation& computation, + tensorflow::gtl::ArraySlice arguments, + const Shape* shape_with_output_layout = nullptr); + // Convenience OrDie variants of above methods. std::unique_ptr ExecuteOrDie( ComputationBuilder* builder, @@ -130,12 +138,12 @@ class ClientLibraryTestBase : public ::testing::Test { tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - template - void ComputeAndCompareR1(ComputationBuilder* builder, + template + void ComputeAndCompareR1(BuilderT* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR1(ComputationBuilder* builder, + template + void ComputeAndCompareR1(BuilderT* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); @@ -179,22 +187,26 @@ class ClientLibraryTestBase : public ::testing::Test { // Build and run the computation and compare the result with the given // literal. shape_with_layout indicates the result layout to request when // calling Execute. + template void ComputeAndCompareLiteral( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_layout = nullptr); + template void ComputeAndCompareLiteral( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout = nullptr); // ComputeAndCompare variant which returns an error status. + template tensorflow::Status ComputeAndCompareLiteralWithStatus( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, const Shape* shape_with_layout = nullptr); + template tensorflow::Status ComputeAndCompareLiteralWithStatus( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout = nullptr); @@ -399,6 +411,18 @@ class ClientLibraryTestBase : public ::testing::Test { const string& error_message)>& verify_output, const Shape* output_with_layout = nullptr); + tensorflow::Status ComputeAndCompareLiteralWithAllOutputLayouts( + const xla::XlaComputation& computation, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, + const std::function& verify_output); + tensorflow::Status ComputeAndCompareLiteralWithAllInputLayouts( + const xla::XlaComputation& computation, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, + const std::function& verify_output, + const Shape* output_with_layout = nullptr); + // Executes the computation and calculates the expected reference value using // the HloEvaluator. Returns two literal in the order of (expected, actual). StatusOr, std::unique_ptr>> @@ -440,9 +464,9 @@ void ClientLibraryTestBase::ComputeAndCompareR0( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR1( - ComputationBuilder* builder, tensorflow::gtl::ArraySlice expected, + BuilderT* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR1(expected); @@ -450,9 +474,9 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR1( - ComputationBuilder* builder, tensorflow::gtl::ArraySlice expected, + BuilderT* builder, tensorflow::gtl::ArraySlice expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || -- GitLab From 2219b88a3d5154b9158a1902b061cad6cae2d0a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 14:00:39 -0700 Subject: [PATCH 446/960] Fix behavior of bucket_by_sequence_length with tuple Dataset elements Fixes #17932 PiperOrigin-RevId: 190270732 --- .../python/kernel_tests/bucketing_test.py | 25 +++++++++++++++++++ .../contrib/data/python/ops/grouping.py | 4 +-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index 94f800e8a5..d0131896a1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -468,6 +468,31 @@ class BucketBySequenceLength(test.TestCase): self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) self.assertEqual(sorted(boundaries), sorted(lengths_val)) + def testTupleElements(self): + + def elements_gen(): + text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]] + label = [1, 2, 1, 2] + for x, y in zip(text, label): + yield (x, y) + + def element_length_fn(x, y): + del y + return array_ops.shape(x)[0] + + dataset = dataset_ops.Dataset.from_generator( + generator=elements_gen, + output_shapes=(tensor_shape.TensorShape([None]), + tensor_shape.TensorShape([])), + output_types=(dtypes.int32, dtypes.int32)) + dataset = dataset.apply(grouping.bucket_by_sequence_length( + element_length_func=element_length_fn, + bucket_batch_sizes=[2, 2, 2], + bucket_boundaries=[0, 8])) + shapes = dataset.output_shapes + self.assertEqual([None, None], shapes[0].as_list()) + self.assertEqual([None], shapes[1].as_list()) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index ae10d2eb22..36591c055a 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -140,9 +140,9 @@ def bucket_by_sequence_length(element_length_func, batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64) - def element_to_bucket_id(element): + def element_to_bucket_id(*args): """Return int64 id of the length bucket for this element.""" - seq_length = element_length_func(element) + seq_length = element_length_func(*args) boundaries = list(bucket_boundaries) buckets_min = [np.iinfo(np.int32).min] + boundaries -- GitLab From 13ef0af4867477cdda7e0b294e61560c2952df42 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Mar 2018 14:19:37 -0700 Subject: [PATCH 447/960] Fix buffer overflow when fetching resources. PiperOrigin-RevId: 190273682 --- .../python/kernel_tests/resource_variable_ops_test.py | 6 ++++++ tensorflow/python/lib/core/ndarray_tensor.cc | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 2dc993f811..563eeff2a6 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -103,6 +103,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): v = resource_variable_ops.ResourceVariable(False, name="bool_test") self.assertAllEqual(bool(v), False) + def testFetchHandle(self): + with self.test_session(): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1], name="foo") + self.assertGreater(len(handle.eval()), 0) + def testAssignVariableDtypeMismatchEager(self): with context.eager_mode(): handle = resource_variable_ops.var_handle_op( diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index 994af69386..a07e305ffb 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -267,7 +267,9 @@ gtl::InlinedVector GetPyArrayDimensionsForTensor( const int ndims = TF_NumDims(tensor); gtl::InlinedVector dims(ndims); if (TF_TensorType(tensor) == TF_RESOURCE) { - dims[0] = TF_TensorByteSize(tensor); + CHECK_EQ(ndims, 0) + << "Fetching of non-scalar resource tensors is not supported."; + dims.push_back(TF_TensorByteSize(tensor)); *nelems = dims[0]; } else { *nelems = 1; -- GitLab From 18832ec8497a6acc6f828808e5ea3a2859548efa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 14:41:02 -0700 Subject: [PATCH 448/960] Benchmarker for LPIRC CVPR2018. PiperOrigin-RevId: 190276899 --- .../org/tensorflow/ovic/OvicBenchmarker.java | 197 ++++ .../org/tensorflow/ovic/OvicClassifier.java | 209 ++++ .../ovic/OvicSingleImageResult.java | 54 + .../tensorflow/ovic/OvicClassifierTest.java | 176 +++ .../lite/java/ovic/src/testdata/labels.txt | 1001 +++++++++++++++++ 5 files changed, 1637 insertions(+) create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java create mode 100644 tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java create mode 100644 tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java create mode 100644 tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java new file mode 100644 index 0000000000..d0102883e6 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java @@ -0,0 +1,197 @@ +/*Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +package org.tensorflow.ovic; + +import android.graphics.Bitmap; +import android.os.SystemClock; +import android.util.Log; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; + +/** + * Class that benchmarks image classifier models. + * + *

===================== General workflow ======================= + * + *

{@code
+ * benchmarker = new OvicBenchmarker();
+ * benchmarker.getReadyToTest(labelInputStream, model);
+ * while (!benchmarker.shouldStop()) {
+ *   Bitmap bitmap = ...
+ *   benchmarker.doTestIteration(bitmap);
+ * }
+ * }
+ */ +public class OvicBenchmarker { + /** Tag for the {@link Log}. */ + private static final String TAG = "OvicBenchmarker"; + + /** Evaluation transformation parameters. */ + private static final float CENTRAL_FRACTION = 0.875f; + + /** Dimensions of inputs. */ + private static final int DIM_BATCH_SIZE = 1; + private static final int DIM_PIXEL_SIZE = 3; + private int imgHeight = 224; + private int imgWidth = 224; + + /* Preallocated buffers for storing image data in. */ + private int[] intValues = null; + + /** A ByteBuffer to hold image data, to be feed into classifier as inputs. */ + private ByteBuffer imgData = null; + + private OvicClassifier classifier; + + /** Total runtime in ms. */ + private double totalRuntime = 0.0; + /** Total allowed runtime in ms. */ + private double wallTime = 20000 * 30.0; + + private Boolean benchmarkStarted = null; + + /** + * Initializes an {@link OvicBenchmarker} + * + * @param wallTime: a double number specifying the total amount of time to benchmark. + */ + public OvicBenchmarker(double wallTime) { + benchmarkStarted = false; + totalRuntime = 0.0; + this.wallTime = wallTime; + } + + /** Check whether the benchmarker should stop. */ + public Boolean shouldStop() { + if (totalRuntime >= wallTime) { + Log.e( + TAG, + "Total runtime " + + Double.toString(totalRuntime) + + " exceeded walltime " + + Double.toString(wallTime)); + return true; + } + return false; + } + + /** Check whether the benchmarker is ready to start classifying images. */ + public Boolean readyToTest() { + return (classifier != null); + } + + /** + * Getting the benchmarker ready for classifying images. + * + * @param labelInputStream: an {@link InputStream} specifying where the list of labels should be + * read from. + * @param model: a {@link MappedByteBuffer} model to benchmark. + */ + public void getReadyToTest(InputStream labelInputStream, MappedByteBuffer model) { + try { + Log.i(TAG, "Creating classifier."); + classifier = new OvicClassifier(labelInputStream, model); + int [] inputDims = classifier.getInputDims(); + imgHeight = inputDims[1]; + imgWidth = inputDims[2]; + // Only accept QUANTIZED_UINT8 input. + imgData = ByteBuffer.allocateDirect(DIM_BATCH_SIZE * imgHeight * imgWidth * DIM_PIXEL_SIZE); + imgData.order(ByteOrder.nativeOrder()); + intValues = new int[imgHeight * imgWidth]; + } catch (Exception e) { + Log.e(TAG, e.getMessage()); + Log.e(TAG, "Failed to initialize ImageNet classifier for the benchmarker."); + } + } + + /** Return how many classes are predicted per image. */ + public int getNumPredictions() { + return classifier.getNumPredictions(); + } + + /** + * Perform test on a single bitmap image. + * + * @param bitmap: a {@link Bitmap} image to classify. + */ + public OvicSingleImageResult doTestIteration(Bitmap bitmap) + throws IOException, InterruptedException { + if (shouldStop() || !readyToTest()) { + return null; + } + OvicSingleImageResult iterResult = null; + try { + Log.i(TAG, "Converting bitmap."); + convertBitmapToInput(bitmap); + Log.i(TAG, "Classifying image."); + iterResult = classifier.classifyByteBuffer(imgData); + } catch (RuntimeException e) { + Log.e(TAG, e.getMessage()); + Log.e(TAG, "Failed to classify image."); + } + if (iterResult == null || iterResult.latency == null) { + throw new RuntimeException("Classification result or timing is invalid."); + } + Log.d(TAG, "Native inference latency: " + iterResult.latency); + Log.i(TAG, iterResult.toString()); + + if (!benchmarkStarted) { // Skip the first image to discount warming-up time. + benchmarkStarted = true; + } else { + totalRuntime += (double) iterResult.latency; + } + return iterResult; + } + + /** + * Writes Image data into a {@link ByteBuffer}. + * + * @param bitmap: a {@link Bitmap} source image. + */ + private void convertBitmapToInput(Bitmap bitmap) throws RuntimeException { + if (imgData == null) { + throw new RuntimeException("Benchmarker is not yet ready to test."); + } + imgData.rewind(); + // Perform transformations corresponding to evaluation mode. + float width = (float) bitmap.getWidth(); + float height = (float) bitmap.getHeight(); + int stWidth = Math.round((width - width * CENTRAL_FRACTION) / 2); + int stHeight = Math.round((height - height * CENTRAL_FRACTION) / 2); + int newWidth = Math.round(width - stWidth * 2); + int newHeight = Math.round(height - stHeight * 2); + bitmap = Bitmap.createBitmap(bitmap, stWidth, stHeight, newWidth, newHeight); + bitmap = Bitmap.createScaledBitmap(bitmap, imgWidth, imgHeight, true); + bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight()); + + // Convert the image to ByteBuffer. + int pixel = 0; + long startTime = SystemClock.uptimeMillis(); + + for (int i = 0; i < imgHeight; ++i) { + for (int j = 0; j < imgWidth; ++j) { + final int val = intValues[pixel++]; + imgData.put((byte) ((val >> 16) & 0xFF)); + imgData.put((byte) ((val >> 8) & 0xFF)); + imgData.put((byte) (val & 0xFF)); + } + } + long endTime = SystemClock.uptimeMillis(); + Log.d(TAG, "Timecost to put values into ByteBuffer: " + Long.toString(endTime - startTime)); + } +} diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java new file mode 100644 index 0000000000..b2dfd8f2e7 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicClassifier.java @@ -0,0 +1,209 @@ +/*Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +package org.tensorflow.ovic; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import org.tensorflow.lite.Interpreter; +import org.tensorflow.lite.TestHelper; + +/** Benchmark ImageNet Classifier with Tensorflow Lite. */ +public class OvicClassifier { + + /** Tag for the {@link Log}. */ + private static final String TAG = "OvicClassifier"; + + /** Number of results to show (i.e. the "K" in top-K predictions). */ + private static final int RESULTS_TO_SHOW = 5; + + /** An instance of the driver class to run model inference with Tensorflow Lite. */ + private Interpreter tflite; + + /** Labels corresponding to the output of the vision model. */ + private List labelList; + + /** An array to hold inference results, to be feed into Tensorflow Lite as outputs. */ + private byte[][] inferenceOutputArray = null; + /** An array to hold final prediction probabilities. */ + private float[][] labelProbArray = null; + + /** Input resultion. */ + private int[] inputDims = null; + /** Whether the model runs as float or quantized. */ + private Boolean outputIsFloat = null; + + private PriorityQueue> sortedLabels = + new PriorityQueue<>( + RESULTS_TO_SHOW, + new Comparator>() { + @Override + public int compare(Map.Entry o1, Map.Entry o2) { + return (o1.getValue()).compareTo(o2.getValue()); + } + }); + + /** Initializes an {@code OvicClassifier}. */ + OvicClassifier(InputStream labelInputStream, MappedByteBuffer model) + throws IOException, RuntimeException { + if (model == null) { + throw new RuntimeException("Input model is empty."); + } + labelList = loadLabelList(labelInputStream); + // OVIC uses one thread for CPU inference. + tflite = new Interpreter(model, 1); + inputDims = TestHelper.getInputDims(tflite, 0); + if (inputDims.length != 4) { + throw new RuntimeException("The model's input dimensions must be 4 (BWHC)."); + } + if (inputDims[0] != 1) { + throw new RuntimeException("The model must have a batch size of 1, got " + + inputDims[0] + " instead."); + } + if (inputDims[3] != 3) { + throw new RuntimeException("The model must have three color channels, got " + + inputDims[3] + " instead."); + } + int minSide = Math.min(inputDims[1], inputDims[2]); + int maxSide = Math.max(inputDims[1], inputDims[2]); + if (minSide <= 0 || maxSide > 1000) { + throw new RuntimeException("The model's resolution must be between (0, 1000]."); + } + String outputDataType = TestHelper.getOutputDataType(tflite, 0); + if (outputDataType.equals("float")) { + outputIsFloat = true; + } else if (outputDataType.equals("byte")) { + outputIsFloat = false; + } else { + throw new RuntimeException("Cannot process output type: " + outputDataType); + } + inferenceOutputArray = new byte[1][labelList.size()]; + labelProbArray = new float[1][labelList.size()]; + } + + /** Classifies a {@link ByteBuffer} image. */ + // @throws RuntimeException if model is uninitialized. + OvicSingleImageResult classifyByteBuffer(ByteBuffer imgData) throws RuntimeException { + if (tflite == null) { + throw new RuntimeException(TAG + ": ImageNet classifier has not been initialized; Failed."); + } + if (outputIsFloat == null) { + throw new RuntimeException(TAG + ": Classifier output type has not been resolved."); + } + if (outputIsFloat) { + tflite.run(imgData, labelProbArray); + } else { + tflite.run(imgData, inferenceOutputArray); + /** Convert results to float */ + for (int i = 0; i < inferenceOutputArray[0].length; i++) { + labelProbArray[0][i] = (inferenceOutputArray[0][i] & 0xff) / 255.0f; + } + } + OvicSingleImageResult iterResult = computeTopKLabels(); + iterResult.latency = getLastNativeInferenceLatencyMilliseconds(); + return iterResult; + } + + /** Return the probability array of all classes. */ + public float[][] getlabelProbArray() { + return labelProbArray; + } + + /** Return the number of top labels predicted by the classifier. */ + public int getNumPredictions() { + return RESULTS_TO_SHOW; + } + + /** Return the four dimensions of the input image. */ + public int[] getInputDims() { + return inputDims; + } + + /* + * Get native inference latency of last image classification run. + * @throws RuntimeException if model is uninitialized. + */ + public Long getLastNativeInferenceLatencyMilliseconds() { + if (tflite == null) { + throw new RuntimeException(TAG + ": ImageNet classifier has not been initialized; Failed."); + } + Long latency = tflite.getLastNativeInferenceDurationNanoseconds(); + return (latency == null) ? null : (Long) (latency / 1000000); + } + + /** Closes tflite to release resources. */ + public void close() { + tflite.close(); + tflite = null; + } + + /** Reads label list from Assets. */ + private static List loadLabelList(InputStream labelInputStream) throws IOException { + List labelList = new ArrayList(); + try (BufferedReader reader = + new BufferedReader(new InputStreamReader(labelInputStream, StandardCharsets.UTF_8))) { + String line; + while ((line = reader.readLine()) != null) { + labelList.add(line); + } + } + return labelList; + } + + /** Computes top-K labels. */ + private OvicSingleImageResult computeTopKLabels() { + if (labelList == null) { + throw new RuntimeException("Label file has not been loaded."); + } + for (int i = 0; i < labelList.size(); ++i) { + sortedLabels.add(new AbstractMap.SimpleEntry<>(i, labelProbArray[0][i])); + if (sortedLabels.size() > RESULTS_TO_SHOW) { + sortedLabels.poll(); + } + } + OvicSingleImageResult singleImageResult = new OvicSingleImageResult(); + if (sortedLabels.size() != RESULTS_TO_SHOW) { + throw new RuntimeException( + "Number of returned labels does not match requirement: " + + sortedLabels.size() + + " returned, but " + + RESULTS_TO_SHOW + + " required."); + } + for (int i = 0; i < RESULTS_TO_SHOW; ++i) { + Map.Entry label = sortedLabels.poll(); + // ImageNet model prediction indices are 0-based. + singleImageResult.topKIndices.add(label.getKey()); + singleImageResult.topKClasses.add(labelList.get(label.getKey())); + singleImageResult.topKProbs.add(label.getValue()); + } + // Labels with lowest probability are returned first, hence need to reverse them. + Collections.reverse(singleImageResult.topKIndices); + Collections.reverse(singleImageResult.topKClasses); + Collections.reverse(singleImageResult.topKProbs); + return singleImageResult; + } +} diff --git a/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java new file mode 100644 index 0000000000..4af9a65c2f --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java @@ -0,0 +1,54 @@ +/*Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +package org.tensorflow.ovic; + +import java.util.ArrayList; + +/** Result class for inference run on a single image. */ +public class OvicSingleImageResult { + + /** Top K classes and probabilities. */ + public ArrayList topKClasses; + public ArrayList topKProbs; + public ArrayList topKIndices; + + /** Latency (ms). */ + public Long latency; + + OvicSingleImageResult() { + topKClasses = new ArrayList<>(); + topKProbs = new ArrayList<>(); + topKIndices = new ArrayList<>(); + latency = -1L; + } + + @Override + public String toString() { + String textToShow = latency + "ms"; + for (int k = 0; k < topKProbs.size(); ++k) { + textToShow += + "\nPrediction [" + + k + + "] = Class " + + Integer.toString(topKIndices.get(k)) + + " (" + + topKClasses.get(k) + + ") : " + + Float.toString(topKProbs.get(k)); + } + return textToShow; + } + +} diff --git a/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java new file mode 100644 index 0000000000..4fd23a99d2 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/src/test/java/org/tensorflow/ovic/OvicClassifierTest.java @@ -0,0 +1,176 @@ +/*Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +package org.tensorflow.ovic; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.fail; + +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Paths; +import javax.imageio.ImageIO; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.ovic.OvicClassifier}. */ +@RunWith(JUnit4.class) +public final class OvicClassifierTest { + + private OvicClassifier classifier; + private InputStream labelsInputStream = null; + private MappedByteBuffer quantizedModel = null; + private MappedByteBuffer floatModel = null; + private MappedByteBuffer lowResModel = null; + private ByteBuffer testImage = null; + private ByteBuffer lowResTestImage = null; + private OvicSingleImageResult testResult = null; + private static final String LABELS_PATH = "testdata/labels.txt"; + private static final String QUANTIZED_MODEL_PATH = "testdata/quantized_model.lite"; + private static final String LOW_RES_MODEL_PATH = "testdata/low_res_model.lite"; + private static final String FLOAT_MODEL_PATH = "testdata/float_model.lite"; + private static final String TEST_IMAGE_PATH = "testdata/test_image_224.jpg"; + private static final String TEST_LOW_RES_IMAGE_PATH = "testdata/test_image_128.jpg"; + private static final int TEST_IMAGE_GROUNDTRUTH = 653; // "military uniform" + + @Before + public void setUp() { + try { + File labelsfile = new File(getTestDir(LABELS_PATH)); + labelsInputStream = new FileInputStream(labelsfile); + quantizedModel = loadModelFile(getTestDir(QUANTIZED_MODEL_PATH)); + floatModel = loadModelFile(getTestDir(FLOAT_MODEL_PATH)); + lowResModel = loadModelFile(getTestDir(LOW_RES_MODEL_PATH)); + File imageFile = new File(getTestDir(TEST_IMAGE_PATH)); + BufferedImage img = ImageIO.read(imageFile); + testImage = toByteBuffer(img); + // Low res image and models. + imageFile = new File(getTestDir(TEST_LOW_RES_IMAGE_PATH)); + img = ImageIO.read(imageFile); + lowResTestImage = toByteBuffer(img); + } catch (IOException e) { + System.out.print(e.getMessage()); + } + System.out.println("Successful setup"); + } + + private static String getTestDir(String testfile) throws IOException { + return Paths.get("third_party/tensorflow/contrib/lite/java/ovic/src/", testfile).toString(); + } + + @Test + public void ovicClassifier_quantizedModelCreateSuccess() throws Exception { + classifier = new OvicClassifier(labelsInputStream, quantizedModel); + assertThat(classifier != null).isTrue(); + } + + @Test + public void ovicClassifier_floatModelCreateSuccess() throws Exception { + classifier = new OvicClassifier(labelsInputStream, floatModel); + assertThat(classifier != null).isTrue(); + } + + @Test + public void ovicClassifier_quantizedModelClassifySuccess() throws Exception { + classifier = new OvicClassifier(labelsInputStream, quantizedModel); + testResult = classifier.classifyByteBuffer(testImage); + assertCorrectTopK(testResult); + } + + @Test + public void ovicClassifier_floatModelClassifySuccess() throws Exception { + classifier = new OvicClassifier(labelsInputStream, floatModel); + testResult = classifier.classifyByteBuffer(testImage); + assertCorrectTopK(testResult); + } + + @Test + public void ovicClassifier_lowResModelClassifySuccess() throws Exception { + classifier = new OvicClassifier(labelsInputStream, lowResModel); + testResult = classifier.classifyByteBuffer(lowResTestImage); + assertCorrectTopK(testResult); + } + + @Test + public void ovicClassifier_latencyNotNull() throws Exception { + classifier = new OvicClassifier(labelsInputStream, floatModel); + testResult = classifier.classifyByteBuffer(testImage); + assertThat(testResult.latency != null).isTrue(); + } + + @Test + public void ovicClassifier_mismatchedInputResolutionFails() throws Exception { + classifier = new OvicClassifier(labelsInputStream, lowResModel); + int[] inputDims = classifier.getInputDims(); + assertThat((inputDims[1] == 128) && (inputDims[2] == 128)).isTrue(); + try { + testResult = classifier.classifyByteBuffer(testImage); + fail(); + } catch (RuntimeException e) { + assertThat(e) + .hasMessageThat() + .contains( + "Failed to get input dimensions. 0-th input should have 49152 bytes, " + + "but found 150528 bytes."); + } + } + + private static ByteBuffer toByteBuffer(BufferedImage image) { + ByteBuffer imgData = ByteBuffer.allocateDirect( + image.getHeight() * image.getWidth() * 3); + imgData.order(ByteOrder.nativeOrder()); + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + int val = image.getRGB(x, y); + imgData.put((byte) ((val >> 16) & 0xFF)); + imgData.put((byte) ((val >> 8) & 0xFF)); + imgData.put((byte) (val & 0xFF)); + } + } + return imgData; + } + + private static void assertCorrectTopK(OvicSingleImageResult testResult) { + assertThat(testResult.topKClasses.size() > 0).isTrue(); + Boolean topKAccurate = false; + // Assert that the correct class is in the top K. + for (int i = 0; i < testResult.topKIndices.size(); i++) { + if (testResult.topKIndices.get(i) == TEST_IMAGE_GROUNDTRUTH) { + topKAccurate = true; + break; + } + } + System.out.println(testResult.toString()); + System.out.flush(); + assertThat(topKAccurate).isTrue(); + } + + private static MappedByteBuffer loadModelFile(String modelFilePath) throws IOException { + File modelfile = new File(modelFilePath); + FileInputStream inputStream = new FileInputStream(modelfile); + FileChannel fileChannel = inputStream.getChannel(); + long startOffset = 0L; + long declaredLength = fileChannel.size(); + return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength); + } +} diff --git a/tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt b/tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt new file mode 100644 index 0000000000..fe811239d8 --- /dev/null +++ b/tensorflow/contrib/lite/java/ovic/src/testdata/labels.txt @@ -0,0 +1,1001 @@ +background +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue -- GitLab From 8e0848160a7d135f728dde2519a32876b8a7e3ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 14:52:44 -0700 Subject: [PATCH 449/960] Prepare the XLA and TF code to correctly behave once automatic device placement will be enabled by default on computation shapes > 1. PiperOrigin-RevId: 190278826 --- tensorflow/compiler/xla/service/service.cc | 35 +++++++++++++++---- tensorflow/compiler/xla/service/service.h | 6 ++++ .../contrib/tpu/python/tpu/tpu_estimator.py | 3 +- tensorflow/contrib/tpu/python/tpu/tpu_feed.py | 19 ++++++++-- 4 files changed, 51 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 04487a4795..4f6a82333b 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -861,6 +861,33 @@ tensorflow::Status Service::GetDeviceHandles(const GetDeviceHandlesRequest* arg, return tensorflow::Status::OK(); } +tensorflow::Status Service::ExecuteOneToN(const ExecuteRequest* arg, + ExecuteResponse* result) { + ExecuteParallelRequest parallel_arg; + *parallel_arg.add_requests() = *arg; + ExecuteParallelResponse parallel_result; + TF_RETURN_IF_ERROR(ExecuteParallel(¶llel_arg, ¶llel_result)); + // The "result device" selection is a bit hacky, but better than assuming it + // is device 0. We have b/76035356 for restructuring the client API to clean + // up the current asymmetries and support more functionalities. + for (int64 i = 0; i < parallel_result.responses_size(); ++i) { + TF_ASSIGN_OR_RETURN(const ShapedBuffer* buffer, + allocation_tracker_.ResolveForReplica( + parallel_result.responses(i).output(), 0)); + const Shape& shape = buffer->on_host_shape(); + if (!ShapeUtil::IsEmptyTuple(shape)) { + *result = parallel_result.responses(i); + VLOG(3) << "Fetching result from device " << i << ": " + << ShapeUtil::HumanString(shape); + return Status::OK(); + } + } + TF_RET_CHECK(parallel_result.responses_size() > 0); + *result = parallel_result.responses(0); + VLOG(1) << "Defaulting to device 0 result"; + return Status::OK(); +} + tensorflow::Status Service::Execute(const ExecuteRequest* arg, ExecuteResponse* result) { VLOG(1) << "running execute request: " << arg->ShortDebugString(); @@ -877,13 +904,7 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, // If we received multiple device handles, we must partition the module. if (arg->execution_options().device_handles_size() > 1) { - ExecuteParallelRequest parallel_arg; - *parallel_arg.add_requests() = *arg; - ExecuteParallelResponse parallel_result; - TF_RETURN_IF_ERROR(ExecuteParallel(¶llel_arg, ¶llel_result)); - TF_RET_CHECK(parallel_result.responses_size() > 0); - *result = parallel_result.responses(0); - return Status::OK(); + return ExecuteOneToN(arg, result); } TF_ASSIGN_OR_RETURN( diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index a76bdd89c7..3b79920b0a 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -346,6 +346,12 @@ class Service : public ServiceInterface { const std::function(UserComputation*)>& adder); + // Executes a single computation which has more than one target device. + // The N devices are expected to all return an empty tuple, but one, which + // will be the result of this computation. + tensorflow::Status ExecuteOneToN(const ExecuteRequest* arg, + ExecuteResponse* result); + // Convenience function which checks whether the given shape_with_layout // (presumably passed by the client to set the result layout) is valid for the // given computation result shape. diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index aaa6f3c2c1..152f8c8c69 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -931,8 +931,7 @@ class _InputPipeline(object): # In the model-parallel case, both the host-side and device-side # computations must agree on the core on which infeed takes place. We # choose to perform infeed on logical core 0 of each replica. - with ops.device(tpu.core(0)): - values = self._infeed_queue.generate_dequeue_op() + values = self._infeed_queue.generate_dequeue_op(tpu_device=0) # The unflatten process uses the structure information recorded above. return self._inputs_structure_recorder.unflatten_features_and_labels( values) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py index 42ac6eb680..604e6600c8 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_feed.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_feed.py @@ -23,6 +23,7 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.tpu.python.ops import tpu_ops +from tensorflow.contrib.tpu.python.tpu import tpu from tensorflow.contrib.tpu.python.tpu import tpu_sharding from tensorflow.python.framework import dtypes @@ -368,13 +369,20 @@ class InfeedQueue(object): policy.freeze() self._validate() - def generate_dequeue_op(self): + def generate_dequeue_op(self, tpu_device=0): """Generates the device-side Op to dequeue a tuple from the queue. Implicitly freezes the queue configuration if it is not already frozen, which will raise errors if the shapes and types have not been fully specified. + Args: + tpu_device: The TPU device ordinal where the infeed instruction should be + placed. If None, no explicit placement will be performed, and it is up + to the user to call this API from within a proper TPU device scope. + The XLA code will fail if the TPU dequeue instruction is not bound to + any device. + Returns: A list of Outputs corresponding to a shard of infeed dequeued into XLA, suitable for use within a replicated block. @@ -392,8 +400,13 @@ class InfeedQueue(object): policy.get_sharded_shape(shape) for (shape, policy) in zip(self._tuple_shapes, self._sharding_policies) ] - return tpu_ops.infeed_dequeue_tuple( - dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name) + if tpu_device is not None: + with ops.device(tpu.core(tpu_device)): + return tpu_ops.infeed_dequeue_tuple( + dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name) + else: + return tpu_ops.infeed_dequeue_tuple( + dtypes=self._tuple_types, shapes=sharded_shapes, name=full_name) def _generate_enqueue_op(self, inputs, -- GitLab From fce07c395d7c3931bc809183031c232651eb0638 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 15:10:56 -0700 Subject: [PATCH 450/960] add EvaluateNodes to OpDedupping test. PiperOrigin-RevId: 190282163 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../core/grappler/optimizers/arithmetic_optimizer_test.cc | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index fb13084945..92f7cce502 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -287,6 +287,7 @@ tf_cuda_cc_test( "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 3876486d80..792f675043 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" @@ -157,6 +158,8 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { ArithmeticOptimizer optimizer; GraphDef output; + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + EXPECT_EQ(1, tensors_expected.size()); Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); // Run the optimizer twice to make sure the rewrite is idempotent. @@ -172,6 +175,10 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { EXPECT_EQ(2, new_div.input_size()); EXPECT_EQ("c1", new_div.input(0)); EXPECT_EQ("c1", new_div.input(1)); + + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) { -- GitLab From db51253fce5882bf766e19b97131d90f0947d0df Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 23 Mar 2018 15:12:21 -0700 Subject: [PATCH 451/960] Convert the eager SPINN example to use tf.keras.Model and object-based checkpointing. Uses a more recursive/functional tracking style which avoids numbering layers. Maybe this is too magical and we should adapt tf.keras.Sequential first? Let me know what you think. PiperOrigin-RevId: 190282346 --- .../eager/python/examples/spinn/spinn_test.py | 24 ++- third_party/examples/eager/spinn/spinn.py | 168 ++++++++++-------- 2 files changed, 108 insertions(+), 84 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 081b0af14f..591d99edcd 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -33,6 +33,7 @@ import tensorflow as tf import tensorflow.contrib.eager as tfe from tensorflow.contrib.eager.python.examples.spinn import data from third_party.examples.eager.spinn import spinn +from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 from tensorflow.contrib.summary import summary_test_util from tensorflow.python.eager import test from tensorflow.python.framework import test_util @@ -172,7 +173,7 @@ class SpinnTest(test_util.TensorFlowTestCase): right_in.append(tf.random_normal((1, size * 2))) tracking.append(tf.random_normal((1, tracker_size * 2))) - out = reducer(left_in, right_in, tracking=tracking) + out = reducer(left_in, right_in=right_in, tracking=tracking) self.assertEqual(batch_size, len(out)) self.assertEqual(tf.float32, out[0].dtype) self.assertEqual((1, size * 2), out[0].shape) @@ -226,7 +227,7 @@ class SpinnTest(test_util.TensorFlowTestCase): self.assertEqual((batch_size, size * 2), stacks[0][0].shape) for _ in range(2): - out1, out2 = tracker(bufs, stacks) + out1, out2 = tracker(bufs, stacks=stacks) self.assertIsNone(out2) self.assertEqual(batch_size, len(out1)) self.assertEqual(tf.float32, out1[0].dtype) @@ -259,7 +260,7 @@ class SpinnTest(test_util.TensorFlowTestCase): self.assertEqual(tf.int64, transitions.dtype) self.assertEqual((num_transitions, 1), transitions.shape) - out = s(buffers, transitions, training=True) + out = s(buffers, transitions=transitions, training=True) self.assertEqual(tf.float32, out.dtype) self.assertEqual((1, embedding_dims), out.shape) @@ -285,12 +286,15 @@ class SpinnTest(test_util.TensorFlowTestCase): vocab_size) # Invoke model under non-training mode. - logits = model(prem, prem_trans, hypo, hypo_trans, training=False) + logits = model( + prem, premise_transition=prem_trans, hypothesis=hypo, + hypothesis_transition=hypo_trans, training=False) self.assertEqual(tf.float32, logits.dtype) self.assertEqual((batch_size, d_out), logits.shape) # Invoke model under training model. - logits = model(prem, prem_trans, hypo, hypo_trans, training=True) + logits = model(prem, premise_transition=prem_trans, hypothesis=hypo, + hypothesis_transition=hypo_trans, training=True) self.assertEqual(tf.float32, logits.dtype) self.assertEqual((batch_size, d_out), logits.shape) @@ -421,8 +425,14 @@ class SpinnTest(test_util.TensorFlowTestCase): # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) - ckpt_variable_names = [ - item[0] for item in checkpoint_utils.list_variables(config.logdir)] + object_graph_string = checkpoint_utils.load_variable( + config.logdir, name="_CHECKPOINTABLE_OBJECT_GRAPH") + object_graph = checkpointable_object_graph_pb2.CheckpointableObjectGraph() + object_graph.ParseFromString(object_graph_string) + ckpt_variable_names = set() + for node in object_graph.nodes: + for attribute in node.attributes: + ckpt_variable_names.add(attribute.full_name) self.assertIn("global_step", ckpt_variable_names) for v in trainer.variables: variable_name = v.name[:v.name.index(":")] if ":" in v.name else v.name diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py index 8a1c7db2ea..f8fb6ecb0c 100644 --- a/third_party/examples/eager/spinn/spinn.py +++ b/third_party/examples/eager/spinn/spinn.py @@ -51,6 +51,9 @@ import tensorflow.contrib.eager as tfe from tensorflow.contrib.eager.python.examples.spinn import data +layers = tf.keras.layers + + def _bundle(lstm_iter): """Concatenate a list of Tensors along 1st axis and split result into two. @@ -78,17 +81,16 @@ def _unbundle(state): return tf.split(tf.concat(state, 1), state[0].shape[0], axis=0) -class Reducer(tfe.Network): +# pylint: disable=not-callable +class Reducer(tf.keras.Model): """A module that applies reduce operation on left and right vectors.""" def __init__(self, size, tracker_size=None): super(Reducer, self).__init__() - self.left = self.track_layer(tf.layers.Dense(5 * size, activation=None)) - self.right = self.track_layer( - tf.layers.Dense(5 * size, activation=None, use_bias=False)) + self.left = layers.Dense(5 * size, activation=None) + self.right = layers.Dense(5 * size, activation=None, use_bias=False) if tracker_size is not None: - self.track = self.track_layer( - tf.layers.Dense(5 * size, activation=None, use_bias=False)) + self.track = layers.Dense(5 * size, activation=None, use_bias=False) else: self.track = None @@ -123,7 +125,7 @@ class Reducer(tfe.Network): return h, c -class Tracker(tfe.Network): +class Tracker(tf.keras.Model): """A module that tracks the history of the sentence with an LSTM.""" def __init__(self, tracker_size, predict): @@ -134,10 +136,10 @@ class Tracker(tfe.Network): predict: (`bool`) Whether prediction mode is enabled. """ super(Tracker, self).__init__() - self._rnn = self.track_layer(tf.nn.rnn_cell.LSTMCell(tracker_size)) + self._rnn = tf.nn.rnn_cell.LSTMCell(tracker_size) self._state_size = tracker_size if predict: - self._transition = self.track_layer(tf.layers.Dense(4)) + self._transition = layers.Dense(4) else: self._transition = None @@ -182,7 +184,7 @@ class Tracker(tfe.Network): return unbundled, None -class SPINN(tfe.Network): +class SPINN(tf.keras.Model): """Stack-augmented Parser-Interpreter Neural Network. See https://arxiv.org/abs/1603.06021 for more details. @@ -204,9 +206,9 @@ class SPINN(tfe.Network): """ super(SPINN, self).__init__() self.config = config - self.reducer = self.track_layer(Reducer(config.d_hidden, config.d_tracker)) + self.reducer = Reducer(config.d_hidden, config.d_tracker) if config.d_tracker is not None: - self.tracker = self.track_layer(Tracker(config.d_tracker, config.predict)) + self.tracker = Tracker(config.d_tracker, config.predict) else: self.tracker = None @@ -248,7 +250,7 @@ class SPINN(tfe.Network): trans = transitions[i] if self.tracker: # Invoke tracker to obtain the current tracker states for the sentences. - tracker_states, trans_hypothesis = self.tracker(buffers, stacks) + tracker_states, trans_hypothesis = self.tracker(buffers, stacks=stacks) if trans_hypothesis: trans = tf.argmax(trans_hypothesis, axis=-1) else: @@ -264,7 +266,8 @@ class SPINN(tfe.Network): trackings.append(tracking) if rights: - reducer_output = self.reducer(lefts, rights, trackings) + reducer_output = self.reducer( + lefts, right_in=rights, tracking=trackings) reduced = iter(reducer_output) for transition, stack in zip(trans, stacks): @@ -273,7 +276,27 @@ class SPINN(tfe.Network): return _bundle([stack.pop() for stack in stacks])[0] -class SNLIClassifier(tfe.Network): +class Perceptron(tf.keras.Model): + """One layer of the SNLIClassifier multi-layer perceptron.""" + + def __init__(self, dimension, dropout_rate, previous_layer): + """Configure the Perceptron.""" + super(Perceptron, self).__init__() + self.dense = tf.keras.layers.Dense(dimension, activation=tf.nn.elu) + self.batchnorm = layers.BatchNormalization() + self.dropout = layers.Dropout(rate=dropout_rate) + self.previous_layer = previous_layer + + def call(self, x, training): + """Run previous Perceptron layers, then this one.""" + x = self.previous_layer(x, training=training) + x = self.dense(x) + x = self.batchnorm(x, training=training) + x = self.dropout(x, training=training) + return x + + +class SNLIClassifier(tf.keras.Model): """SNLI Classifier Model. A model aimed at solving the SNLI (Standford Natural Language Inference) @@ -304,29 +327,24 @@ class SNLIClassifier(tfe.Network): self.config = config self.embed = tf.constant(embed) - self.projection = self.track_layer(tf.layers.Dense(config.d_proj)) - self.embed_bn = self.track_layer(tf.layers.BatchNormalization()) - self.embed_dropout = self.track_layer( - tf.layers.Dropout(rate=config.embed_dropout)) - self.encoder = self.track_layer(SPINN(config)) - - self.feature_bn = self.track_layer(tf.layers.BatchNormalization()) - self.feature_dropout = self.track_layer( - tf.layers.Dropout(rate=config.mlp_dropout)) - - self.mlp_dense = [] - self.mlp_bn = [] - self.mlp_dropout = [] - for _ in xrange(config.n_mlp_layers): - self.mlp_dense.append(self.track_layer(tf.layers.Dense(config.d_mlp))) - self.mlp_bn.append( - self.track_layer(tf.layers.BatchNormalization())) - self.mlp_dropout.append( - self.track_layer(tf.layers.Dropout(rate=config.mlp_dropout))) - self.mlp_output = self.track_layer(tf.layers.Dense( + self.projection = layers.Dense(config.d_proj) + self.embed_bn = layers.BatchNormalization() + self.embed_dropout = layers.Dropout(rate=config.embed_dropout) + self.encoder = SPINN(config) + + self.feature_bn = layers.BatchNormalization() + self.feature_dropout = layers.Dropout(rate=config.mlp_dropout) + + current_mlp = lambda result, training: result + for _ in range(config.n_mlp_layers): + current_mlp = Perceptron(dimension=config.d_mlp, + dropout_rate=config.mlp_dropout, + previous_layer=current_mlp) + self.mlp = current_mlp + self.mlp_output = layers.Dense( config.d_out, kernel_initializer=tf.random_uniform_initializer(minval=-5e-3, - maxval=5e-3))) + maxval=5e-3)) def call(self, premise, @@ -370,10 +388,10 @@ class SNLIClassifier(tfe.Network): # Run the batch-normalized and dropout-processed word vectors through the # SPINN encoder. - premise = self.encoder(premise_embed, premise_transition, - training=training) - hypothesis = self.encoder(hypothesis_embed, hypothesis_transition, - training=training) + premise = self.encoder( + premise_embed, transitions=premise_transition, training=training) + hypothesis = self.encoder( + hypothesis_embed, transitions=hypothesis_transition, training=training) # Combine encoder outputs for premises and hypotheses into logits. # Then apply batch normalization and dropuout on the logits. @@ -383,15 +401,12 @@ class SNLIClassifier(tfe.Network): self.feature_bn(logits, training=training), training=training) # Apply the multi-layer perceptron on the logits. - for dense, bn, dropout in zip( - self.mlp_dense, self.mlp_bn, self.mlp_dropout): - logits = tf.nn.elu(dense(logits)) - logits = dropout(bn(logits, training=training), training=training) + logits = self.mlp(logits, training=training) logits = self.mlp_output(logits) return logits -class SNLIClassifierTrainer(object): +class SNLIClassifierTrainer(tfe.Checkpointable): """A class that coordinates the training of an SNLIClassifier.""" def __init__(self, snli_classifier, lr): @@ -450,10 +465,11 @@ class SNLIClassifierTrainer(object): """ with tfe.GradientTape() as tape: tape.watch(self._model.variables) + # TODO(allenl): Allow passing Layer inputs as position arguments. logits = self._model(premise, - premise_transition, - hypothesis, - hypothesis_transition, + premise_transition=premise_transition, + hypothesis=hypothesis, + hypothesis_transition=hypothesis_transition, training=True) loss = self.loss(labels, logits) gradients = tape.gradient(loss, self._model.variables) @@ -517,7 +533,9 @@ def _evaluate_on_dataset(snli_data, batch_size, trainer, use_gpu): snli_data, batch_size): if use_gpu: label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu() - logits = trainer.model(prem, prem_trans, hypo, hypo_trans, training=False) + logits = trainer.model( + prem, premise_transition=prem_trans, hypothesis=hypo, + hypothesis_transition=hypo_trans, training=False) loss_val = trainer.loss(label, logits) batch_size = tf.shape(label)[0] mean_loss(loss_val, weights=batch_size.gpu() if use_gpu else batch_size) @@ -609,29 +627,30 @@ def train_or_infer_spinn(embed, with tf.device(device), \ summary_writer.as_default(), \ tf.contrib.summary.always_record_summaries(): - with tfe.restore_variables_on_create( - tf.train.latest_checkpoint(config.logdir)): - model = SNLIClassifier(config, embed) - global_step = tf.train.get_or_create_global_step() - trainer = SNLIClassifierTrainer(model, config.lr) + model = SNLIClassifier(config, embed) + global_step = tf.train.get_or_create_global_step() + trainer = SNLIClassifierTrainer(model, config.lr) + checkpoint = tfe.Checkpoint(trainer=trainer, global_step=global_step) + checkpoint.restore(tf.train.latest_checkpoint(config.logdir)) if inference_sentence_pair: # Inference mode. - with tfe.restore_variables_on_create( - tf.train.latest_checkpoint(config.logdir)): - prem, prem_trans = inference_sentence_pair[0] - hypo, hypo_trans = inference_sentence_pair[1] - hypo_trans = inference_sentence_pair[1][1] - inference_logits = model( # pylint: disable=not-callable - tf.constant(prem), tf.constant(prem_trans), - tf.constant(hypo), tf.constant(hypo_trans), training=False) - inference_logits = inference_logits[0][1:] - max_index = tf.argmax(inference_logits) - print("\nInference logits:") - for i, (label, logit) in enumerate( - zip(data.POSSIBLE_LABELS, inference_logits)): - winner_tag = " (winner)" if max_index == i else "" - print(" {0:<16}{1:.6f}{2}".format(label + ":", logit, winner_tag)) + prem, prem_trans = inference_sentence_pair[0] + hypo, hypo_trans = inference_sentence_pair[1] + hypo_trans = inference_sentence_pair[1][1] + inference_logits = model( + tf.constant(prem), + premise_transition=tf.constant(prem_trans), + hypothesis=tf.constant(hypo), + hypothesis_transition=tf.constant(hypo_trans), + training=False) + inference_logits = inference_logits[0][1:] + max_index = tf.argmax(inference_logits) + print("\nInference logits:") + for i, (label, logit) in enumerate( + zip(data.POSSIBLE_LABELS, inference_logits)): + winner_tag = " (winner)" if max_index == i else "" + print(" {0:<16}{1:.6f}{2}".format(label + ":", logit, winner_tag)) return inference_logits train_len = train_data.num_batches(config.batch_size) @@ -650,20 +669,15 @@ def train_or_infer_spinn(embed, # remain on CPU. Same in _evaluate_on_dataset(). iterations += 1 - with tfe.restore_variables_on_create( - tf.train.latest_checkpoint(config.logdir)): - batch_train_loss, batch_train_logits = trainer.train_batch( - label, prem, prem_trans, hypo, hypo_trans) + batch_train_loss, batch_train_logits = trainer.train_batch( + label, prem, prem_trans, hypo, hypo_trans) batch_size = tf.shape(label)[0] mean_loss(batch_train_loss.numpy(), weights=batch_size.gpu() if use_gpu else batch_size) accuracy(tf.argmax(batch_train_logits, axis=1), label) if iterations % config.save_every == 0: - all_variables = trainer.variables + [global_step] - saver = tfe.Saver(all_variables) - saver.save(os.path.join(config.logdir, "ckpt"), - global_step=global_step) + checkpoint.save(os.path.join(config.logdir, "ckpt")) if iterations % config.dev_every == 0: dev_loss, dev_frac_correct = _evaluate_on_dataset( -- GitLab From a41a2975c4b39ca6026deb46f0343317da165ea6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 15:12:35 -0700 Subject: [PATCH 452/960] [TF:XLA] Fix PotentiallyImplementedAsEigenConvolution to use the correct shape as the kernel shape A small bug is found in accessing the kernel's shape of the convolution instruction in PotentiallyImplementedAsEigenConvolution. The bug was fixed and a new testcase is created to reveal the bug. PiperOrigin-RevId: 190282385 --- tensorflow/compiler/xla/service/cpu/BUILD | 16 +++++++ .../xla/service/cpu/ir_emission_utils.cc | 8 ++-- .../xla/service/cpu/ir_emission_utils_test.cc | 46 +++++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 093db020c0..0faa9e9c41 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -670,6 +670,22 @@ cc_library( ], ) +tf_cc_test( + name = "ir_emission_utils_test", + srcs = ["ir_emission_utils_test.cc"], + deps = [ + ":ir_emission_utils", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", + ], +) + cc_library( name = "cpu_layout_assignment", srcs = ["cpu_layout_assignment.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc index 788217aab6..f209a69e3c 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils.cc @@ -34,14 +34,16 @@ bool PotentiallyImplementedAsEigenConvolution( // // To be sufficient, certain layout constraints need to be satisfied as well. const Shape& input_shape = convolution.operand(0)->shape(); - const Shape& kernel_shape = convolution.operand(0)->shape(); + const Shape& kernel_shape = convolution.operand(1)->shape(); if (ShapeUtil::HasZeroElements(input_shape) || ShapeUtil::HasZeroElements(kernel_shape)) { return false; } + // Make sure input and kernel has the same data type. + CHECK( + ShapeUtil::SameElementTypeIgnoringFpPrecision(input_shape, kernel_shape)); // TODO(b/65408531): Explore using Eigen dot for complex64 type. - if (ShapeUtil::ElementIsComplex(input_shape) || - ShapeUtil::ElementIsComplex(kernel_shape)) { + if (ShapeUtil::ElementIsComplex(input_shape)) { return false; } if (window_util::HasWindowReversal(convolution.window())) { diff --git a/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc b/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc new file mode 100644 index 0000000000..215f48c4cc --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/ir_emission_utils_test.cc @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/ir_emission_utils.h" + +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" + +namespace xla { +namespace { + +TEST(IrEmitterTest, ConvWithZeroSizedKernelNotImplementedAsEigen) { + const char* const hlo_string = R"( +HloModule ModuleWithConv + +ENTRY Conv { + input = f32[32,50,28,28]{3,2,1,0} parameter(0) + kernel = f32[0,32,5,5]{3,2,1,0} parameter(1) + ROOT convolution = f32[64,50,24,24]{3,2,1,0} convolution(input, kernel), + window={size=5x5}, + dim_labels=b01f_01io->b01f +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(hlo_string)); + + HloComputation* entry_computation = module->entry_computation(); + + HloInstruction* conv_instr = entry_computation->root_instruction(); + EXPECT_FALSE(cpu::PotentiallyImplementedAsEigenConvolution(*conv_instr)); +} + +} // namespace +} // namespace xla -- GitLab From bc1dfdf8bc9e3edb4362314a89a23bb2c827bdaa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 15:33:33 -0700 Subject: [PATCH 453/960] Adding support for analyzing assignment info for nested tuples. PiperOrigin-RevId: 190285584 --- .../py2tf/pyct/static_analysis/type_info.py | 16 ++++++++++++---- .../pyct/static_analysis/type_info_test.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py index 5556a58c02..a969adbeca 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py @@ -168,6 +168,15 @@ class TypeInfoResolver(transformer.Base): anno.getanno(definition, 'element_type')) return node + def _process_tuple_assignment(self, source, t): + for i, e in enumerate(t.elts): + if isinstance(e, gast.Tuple): + self._process_tuple_assignment(source, e) + else: + self.scope.setval( + anno.getanno(e, anno.Basic.QN), + gast.Subscript(source, gast.Index(i), ctx=gast.Store())) + def _process_variable_assignment(self, source, targets): if isinstance(source, gast.Call): func = source.func @@ -183,10 +192,9 @@ class TypeInfoResolver(transformer.Base): for t in targets: if isinstance(t, gast.Tuple): - for i, e in enumerate(t.elts): - self.scope.setval( - anno.getanno(e, anno.Basic.QN), - gast.Subscript(source, gast.Index(i), ctx=gast.Store())) + # need to recurse on the case of assigning nested tuples, + # ex. a, (b, c) = f() + self._process_tuple_assignment(source, t) elif isinstance(t, (gast.Name, gast.Attribute)): self.scope.setval(anno.getanno(t, anno.Basic.QN), source) else: diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py index 0d9d5a85f0..8a8956197d 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py @@ -196,6 +196,23 @@ class TypeInfoResolverTest(test.TestCase): f_ref = node.body[0].body[1].value self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) + def test_nested_assignment(self): + + def test_fn(foo): + a, (b, c) = foo + return a, b, c + + node = self._parse_and_analyze(test_fn, {'foo': (1, 2, 3)}) + lhs = node.body[0].body[1].value.elts + a = lhs[0] + b = lhs[1] + c = lhs[2] + # TODO(mdan): change these once we have the live values propagating + # correctly + self.assertFalse(anno.hasanno(a, 'live_val')) + self.assertFalse(anno.hasanno(b, 'live_val')) + self.assertFalse(anno.hasanno(c, 'live_val')) + if __name__ == '__main__': test.main() -- GitLab From 7c57af0c860746e8a91b13bade87bdd1af9dc9e1 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 23 Mar 2018 15:34:47 -0700 Subject: [PATCH 454/960] [XLA] Don't CSE instructions which have side-effects PiperOrigin-RevId: 190285774 --- tensorflow/compiler/xla/service/hlo_cse.cc | 7 ++++++- tensorflow/compiler/xla/service/hlo_cse_test.cc | 13 +++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_cse.cc b/tensorflow/compiler/xla/service/hlo_cse.cc index 279edd4ba8..cd7cbbdd71 100644 --- a/tensorflow/compiler/xla/service/hlo_cse.cc +++ b/tensorflow/compiler/xla/service/hlo_cse.cc @@ -109,6 +109,11 @@ StatusOr HloCSE::Run(HloModule* module) { continue; } + // Skip instructions which have side effects. + if (instruction->HasSideEffect()) { + continue; + } + // An instruction is considered to be equivalent to another only if they // share the exact same set of operands. So to find equivalent // instructions, we just search among instructions which share operand(0) @@ -118,7 +123,7 @@ StatusOr HloCSE::Run(HloModule* module) { tensorflow::gtl::InlinedVector equivalent_instructions; for (HloInstruction* user : operand->users()) { - if (user != instruction && + if (user != instruction && !user->HasSideEffect() && user->Identical(*instruction, eq_instructions, eq_computations, is_layout_sensitive_)) { equivalent_instructions.push_back(user); diff --git a/tensorflow/compiler/xla/service/hlo_cse_test.cc b/tensorflow/compiler/xla/service/hlo_cse_test.cc index 3601a790c4..df8853f34f 100644 --- a/tensorflow/compiler/xla/service/hlo_cse_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cse_test.cc @@ -414,8 +414,7 @@ TEST_F(HloCseTest, DoNotCombineRng) { EXPECT_THAT(root, op::Add(rng1, rng2)); } -// TODO(b/28245743): Handle impure functions correctly in CSE. -TEST_F(HloCseTest, DISABLED_DoNotCombineCallsToImpureFunctions) { +TEST_F(HloCseTest, DoNotCombineCallsToImpureFunctions) { // Test that two calls to an impure function are not commoned. RNG // is the source of the impurity. @@ -458,14 +457,16 @@ TEST_F(HloCseTest, DISABLED_DoNotCombineCallsToImpureFunctions) { HloInstruction* root = computation->root_instruction(); EXPECT_THAT(root, op::Add(op::Map(), op::Map())); + VLOG(3) << "before: " << module->ToString(); + HloCSE cse(/*is_layout_sensitive=*/false); - EXPECT_TRUE(cse.Run(module.get()).ValueOrDie()); + EXPECT_FALSE(cse.Run(module.get()).ValueOrDie()); + + VLOG(3) << "after: " << module->ToString(); EXPECT_EQ(4, computation->instruction_count()); root = computation->root_instruction(); - auto operand = root->operand(0)->operand(0); - EXPECT_THAT(operand, op::Map()); - EXPECT_THAT(root, op::Add(operand, operand)); + EXPECT_THAT(root, op::Add(op::Map(op::Constant()), op::Map(op::Constant()))); } } // namespace -- GitLab From 95a87277174f9fc49b4b5d9c1edbbd149bd0274c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 23 Mar 2018 15:52:35 -0700 Subject: [PATCH 455/960] [XLA:CPU] Update calls to IRBuilder::CreateMemCpy to the 2-alignment form. The single alignment version is going away. PiperOrigin-RevId: 190288581 --- .../compiler/xla/service/cpu/ir_emitter.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 3b8056d505..3405277d44 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -438,12 +438,14 @@ Status IrEmitter::EmitXfeedTransfer(XfeedKind kind, const Shape& shape, if (kind == XfeedKind::kInfeed) { // Copy to the program buffer address from the acquired buffer. - ir_builder_.CreateMemCpy(program_buffer_address, acquired_pointer, - length_32, 1); + ir_builder_.CreateMemCpy(program_buffer_address, /*DstAlign=*/1, + acquired_pointer, + /*SrcAlign=*/1, length_32); } else { // Outfeed -- copy from the in-program address to the acquired buffer. - ir_builder_.CreateMemCpy(acquired_pointer, program_buffer_address, - length_32, 1); + ir_builder_.CreateMemCpy(acquired_pointer, /*DstAlign=*/1, + program_buffer_address, + /*SrcAlign=*/1, length_32); } ir_builder_.CreateCall(release_func, @@ -2441,7 +2443,8 @@ void IrEmitter::EmitTransferElements(llvm::Value* target, llvm::Value* source, target_array.AnnotateLoadStoreInstructionWithMetadata(store_instruction); } else { auto* memcpy_instruction = ir_builder_.CreateMemCpy( - target, source, element_count * primitive_type_size, element_alignment); + target, /*DstAlign=*/element_alignment, source, + /*SrcAlign=*/element_alignment, element_count * primitive_type_size); // The memcpy does the load and the store internally. The aliasing related // metadata has to reflect that. @@ -2905,7 +2908,8 @@ Status IrEmitter::EmitMemcpy(const HloInstruction& source, llvm::Value* destination_value = GetEmittedValueFor(&destination); int64 source_size = ByteSizeOf(source.shape()); // TODO(b/63762267): Be more aggressive about specifying alignment. - ir_builder_.CreateMemCpy(destination_value, source_value, source_size, 1); + ir_builder_.CreateMemCpy(destination_value, /*DstAlign=*/1, source_value, + /*SrcAlign=*/1, source_size); return Status::OK(); } -- GitLab From 084c10784887d7c4d467416430626cf7eb333cb8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 16:00:14 -0700 Subject: [PATCH 456/960] Extended scatter operations to work with a scalar update parameter and added scatter-min and scatter-max operations. PiperOrigin-RevId: 190289664 --- .../base_api/api_def_ResourceScatterAdd.pbtxt | 2 +- .../base_api/api_def_ResourceScatterDiv.pbtxt | 43 ++++ .../base_api/api_def_ResourceScatterMax.pbtxt | 43 ++++ .../base_api/api_def_ResourceScatterMin.pbtxt | 43 ++++ .../base_api/api_def_ResourceScatterMul.pbtxt | 43 ++++ .../base_api/api_def_ResourceScatterSub.pbtxt | 43 ++++ .../api_def/base_api/api_def_ScatterAdd.pbtxt | 2 +- .../api_def/base_api/api_def_ScatterDiv.pbtxt | 2 +- .../api_def/base_api/api_def_ScatterMax.pbtxt | 60 +++++ .../api_def/base_api/api_def_ScatterMin.pbtxt | 60 +++++ .../api_def/base_api/api_def_ScatterMul.pbtxt | 2 +- .../api_def/base_api/api_def_ScatterSub.pbtxt | 2 +- .../base_api/api_def_ScatterUpdate.pbtxt | 2 +- .../api_def_ResourceScatterDiv.pbtxt | 4 + .../api_def_ResourceScatterMax.pbtxt | 4 + .../api_def_ResourceScatterMin.pbtxt | 4 + .../api_def_ResourceScatterMul.pbtxt | 4 + .../api_def_ResourceScatterSub.pbtxt | 4 + .../core/kernels/resource_variable_ops.cc | 81 ++++--- tensorflow/core/kernels/scatter_functor.cc | 27 ++- tensorflow/core/kernels/scatter_functor.h | 170 +++++++++++++- .../core/kernels/scatter_functor_gpu.cu.cc | 9 +- .../core/kernels/scatter_functor_gpu.cu.h | 108 +++++++-- tensorflow/core/kernels/scatter_op.cc | 126 ++++++---- tensorflow/core/kernels/scatter_op_gpu.cu.cc | 9 +- tensorflow/core/kernels/scatter_op_test.cc | 26 ++- tensorflow/core/ops/resource_variable_ops.cc | 92 +++++--- tensorflow/core/ops/state_ops.cc | 25 +- .../docs_src/api_guides/python/state_ops.md | 2 + .../resource_variable_ops_test.py | 215 ++++++++++++++++++ .../python/kernel_tests/scatter_ops_test.py | 145 +++++++++++- tensorflow/python/ops/standard_ops.py | 2 + tensorflow/python/ops/state_ops.py | 2 + tensorflow/tools/api/golden/tensorflow.pbtxt | 8 + 34 files changed, 1261 insertions(+), 153 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterDiv.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterMax.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterMin.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterMul.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceScatterSub.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ScatterMax.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ScatterMin.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterDiv.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterMax.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterMin.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterSub.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterAdd.pbtxt index 9e0de08267..4eb6eb4e4d 100644 --- a/tensorflow/core/api_def/base_api/api_def_ResourceScatterAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterAdd.pbtxt @@ -34,7 +34,7 @@ This operation computes Duplicate entries are handled correctly: if multiple `indices` reference the same location, their contributions add. -Requires `updates.shape = indices.shape + ref.shape[1:]`. +Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterDiv.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterDiv.pbtxt new file mode 100644 index 0000000000..47148f7b03 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterDiv.pbtxt @@ -0,0 +1,43 @@ +op { + graph_op_name: "ResourceScatterDiv" + in_arg { + name: "resource" + description: < + +
+END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterMax.pbtxt new file mode 100644 index 0000000000..71f06d9a43 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterMax.pbtxt @@ -0,0 +1,43 @@ +op { + graph_op_name: "ResourceScatterMax" + in_arg { + name: "resource" + description: < + + +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterMin.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterMin.pbtxt new file mode 100644 index 0000000000..08e40ee2a8 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterMin.pbtxt @@ -0,0 +1,43 @@ +op { + graph_op_name: "ResourceScatterMin" + in_arg { + name: "resource" + description: < + + +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterMul.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterMul.pbtxt new file mode 100644 index 0000000000..5c63549d81 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterMul.pbtxt @@ -0,0 +1,43 @@ +op { + graph_op_name: "ResourceScatterMul" + in_arg { + name: "resource" + description: < + + +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceScatterSub.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceScatterSub.pbtxt new file mode 100644 index 0000000000..e71e60cbee --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ResourceScatterSub.pbtxt @@ -0,0 +1,43 @@ +op { + graph_op_name: "ResourceScatterSub" + in_arg { + name: "resource" + description: < + + +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterAdd.pbtxt index 4b5201f025..9da9d09ea6 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterAdd.pbtxt @@ -51,7 +51,7 @@ This makes it easier to chain operations that need to use the reset value. Duplicate entries are handled correctly: if multiple `indices` reference the same location, their contributions add. -Requires `updates.shape = indices.shape + ref.shape[1:]`. +Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterDiv.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterDiv.pbtxt index 771cf0b591..8e99718c7e 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterDiv.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterDiv.pbtxt @@ -53,6 +53,6 @@ This makes it easier to chain operations that need to use the reset value. Duplicate entries are handled correctly: if multiple `indices` reference the same location, their contributions divide. -Requires `updates.shape = indices.shape + ref.shape[1:]`. +Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. END } diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterMax.pbtxt new file mode 100644 index 0000000000..7b52dad4a1 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ScatterMax.pbtxt @@ -0,0 +1,60 @@ +op { + graph_op_name: "ScatterMax" + in_arg { + name: "ref" + description: < + +
+END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterMin.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterMin.pbtxt new file mode 100644 index 0000000000..721ac0ff35 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ScatterMin.pbtxt @@ -0,0 +1,60 @@ +op { + graph_op_name: "ScatterMin" + in_arg { + name: "ref" + description: < + + +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterMul.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterMul.pbtxt index a51f571b00..b9e293ba9e 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterMul.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterMul.pbtxt @@ -53,6 +53,6 @@ This makes it easier to chain operations that need to use the reset value. Duplicate entries are handled correctly: if multiple `indices` reference the same location, their contributions multiply. -Requires `updates.shape = indices.shape + ref.shape[1:]`. +Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. END } diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterSub.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterSub.pbtxt index c0d3a4a133..d12b3e68c2 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterSub.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterSub.pbtxt @@ -51,7 +51,7 @@ This makes it easier to chain operations that need to use the reset value. Duplicate entries are handled correctly: if multiple `indices` reference the same location, their (negated) contributions add. -Requires `updates.shape = indices.shape + ref.shape[1:]`. +Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterUpdate.pbtxt index c44dbbd233..4804908afc 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterUpdate.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterUpdate.pbtxt @@ -54,7 +54,7 @@ If values in `ref` is to be updated more than once, because there are duplicate entries in `indices`, the order at which the updates happen for each value is undefined. -Requires `updates.shape = indices.shape + ref.shape[1:]`. +Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterDiv.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterDiv.pbtxt new file mode 100644 index 0000000000..56b5a46d10 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterDiv.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterDiv" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterMax.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterMax.pbtxt new file mode 100644 index 0000000000..8119bcc6c6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterMax.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterMax" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterMin.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterMin.pbtxt new file mode 100644 index 0000000000..d874aef3fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterMin.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterMin" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterMul.pbtxt new file mode 100644 index 0000000000..365a37fa0d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterSub.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterSub.pbtxt new file mode 100644 index 0000000000..72dc5bf889 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterSub.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterSub" + visibility: HIDDEN +} diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index aecad0185f..e134e476f6 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -619,22 +619,35 @@ class ResourceScatterUpdateOp : public OpKernel { if (N > 0) { auto indices_flat = indices.flat(); auto params_flat = params->flat_outer_dims(); - int64 num_updates = updates.NumElements(); - OP_REQUIRES(c, num_updates % N == 0, - errors::InvalidArgument( - "shape of indices (", indices.shape().DebugString(), - ") is not compatible with the shape of updates (", - updates.shape().DebugString(), ")")); - auto updates_flat = updates.shaped({N, num_updates / N}); - - functor::ScatterFunctor functor; - const Index bad_i = functor(c, c->template eigen_device(), - params_flat, updates_flat, indices_flat); - OP_REQUIRES(c, bad_i < 0, - errors::InvalidArgument( - "indices", SliceDebugString(indices.shape(), bad_i), - " = ", indices_flat(bad_i), " is not in [0, ", - params->dim_size(0), ")")); + if (TensorShapeUtils::IsScalar(updates.shape())) { + const auto update = updates.scalar(); + + functor::ScatterScalarFunctor functor; + const Index bad_i = functor(c, c->template eigen_device(), + params_flat, update, indices_flat); + OP_REQUIRES(c, bad_i < 0, + errors::InvalidArgument( + "indices", SliceDebugString(indices.shape(), bad_i), + " = ", indices_flat(bad_i), " is not in [0, ", + params->dim_size(0), ")")); + } else { + int64 num_updates = updates.NumElements(); + OP_REQUIRES(c, num_updates % N == 0, + errors::InvalidArgument( + "shape of indices (", indices.shape().DebugString(), + ") is not compatible with the shape of updates (", + updates.shape().DebugString(), ")")); + auto updates_flat = updates.shaped({N, num_updates / N}); + + functor::ScatterFunctor functor; + const Index bad_i = functor(c, c->template eigen_device(), + params_flat, updates_flat, indices_flat); + OP_REQUIRES(c, bad_i < 0, + errors::InvalidArgument( + "indices", SliceDebugString(indices.shape(), bad_i), + " = ", indices_flat(bad_i), " is not in [0, ", + params->dim_size(0), ")")); + } } } }; @@ -652,35 +665,51 @@ class ResourceScatterUpdateOp : public OpKernel { REGISTER_SCATTER_KERNEL_INDEX(type, int32, dev, name, op); \ REGISTER_SCATTER_KERNEL_INDEX(type, int64, dev, name, op); -// TODO(apassos) add the other types here. -#define REGISTER_SCATTER_ARITHEMTIC(type, dev) \ +#define REGISTER_SCATTER_ARITHMETIC(type, dev) \ REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterAdd", \ scatter_op::UpdateOp::ADD); \ + REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterSub", \ + scatter_op::UpdateOp::SUB); \ + REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterMul", \ + scatter_op::UpdateOp::MUL); \ + REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterDiv", \ + scatter_op::UpdateOp::DIV); \ REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterUpdate", \ scatter_op::UpdateOp::ASSIGN); +#define REGISTER_SCATTER_MINMAX(type, dev) \ + REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterMin", \ + scatter_op::UpdateOp::MIN); \ + REGISTER_SCATTER_KERNEL(type, dev, "ResourceScatterMax", \ + scatter_op::UpdateOp::MAX); // Registers CPU kernels. -#define REGISTER_SCATTER_ARITHEMTIC_CPU(type) \ - REGISTER_SCATTER_ARITHEMTIC(type, CPU); +#define REGISTER_SCATTER_ARITHMETIC_CPU(type) \ + REGISTER_SCATTER_ARITHMETIC(type, CPU); +#define REGISTER_SCATTER_MINMAX_CPU(type) REGISTER_SCATTER_MINMAX(type, CPU); -TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHEMTIC_CPU); +TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHMETIC_CPU); +TF_CALL_REAL_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_CPU); REGISTER_SCATTER_KERNEL(string, CPU, "ResourceScatterUpdate", scatter_op::UpdateOp::ASSIGN); // Registers GPU kernels. #if GOOGLE_CUDA -#define REGISTER_SCATTER_ARITHEMTIC_GPU(type) \ - REGISTER_SCATTER_ARITHEMTIC(type, GPU); +#define REGISTER_SCATTER_ARITHMETIC_GPU(type) \ + REGISTER_SCATTER_ARITHMETIC(type, GPU); +#define REGISTER_SCATTER_MINMAX_GPU(type) REGISTER_SCATTER_MINMAX(type, GPU); #define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHEMTIC_GPU); +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU); +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_GPU); #endif // GOOGLE_CUDA -#undef REGISTER_SCATTER_ARITHEMTIC -#undef REGISTER_SCATTER_ARITHEMTIC_CPU +#undef REGISTER_SCATTER_ARITHMETIC +#undef REGISTER_SCATTER_ARITHMETIC_CPU +#undef REGISTER_SCATTER_MINMAX +#undef REGISTER_SCATTER_MINMAX_CPU #undef REGISTER_SCATTER_KERNEL #undef REGISTER_SCATTER_KERNEL_INDEX diff --git a/tensorflow/core/kernels/scatter_functor.cc b/tensorflow/core/kernels/scatter_functor.cc index 7eba82899f..cf5408123f 100644 --- a/tensorflow/core/kernels/scatter_functor.cc +++ b/tensorflow/core/kernels/scatter_functor.cc @@ -26,21 +26,30 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { // Forward declarations of the functor specializations for GPU. -#define DECLARE_GPU_SPECS_OP(T, Index, op) \ - template <> \ - Index ScatterFunctor::operator()( \ - OpKernelContext* c, const GPUDevice& d, \ - typename TTypes::Matrix params, \ - typename TTypes::ConstMatrix updates, \ - typename TTypes::ConstFlat indices); \ - extern template struct ScatterFunctor; +#define DECLARE_GPU_SPECS_OP(T, Index, op) \ + template <> \ + Index ScatterFunctor::operator()( \ + OpKernelContext* c, const GPUDevice& d, \ + typename TTypes::Matrix params, \ + typename TTypes::ConstMatrix updates, \ + typename TTypes::ConstFlat indices); \ + extern template struct ScatterFunctor; \ + template <> \ + Index ScatterScalarFunctor::operator()( \ + OpKernelContext* c, const GPUDevice& d, \ + typename TTypes::Matrix params, \ + const typename TTypes::ConstScalar update, \ + typename TTypes::ConstFlat indices); \ + extern template struct ScatterScalarFunctor; #define DECLARE_GPU_SPECS_INDEX(T, Index) \ DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::ASSIGN); \ DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::ADD); \ DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::SUB); \ DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MUL); \ - DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV); + DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV); \ + DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MIN); \ + DECLARE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MAX); #define DECLARE_GPU_SPECS(T) \ DECLARE_GPU_SPECS_INDEX(T, int32); \ diff --git a/tensorflow/core/kernels/scatter_functor.h b/tensorflow/core/kernels/scatter_functor.h index 079f15e101..52666645bf 100644 --- a/tensorflow/core/kernels/scatter_functor.h +++ b/tensorflow/core/kernels/scatter_functor.h @@ -18,6 +18,8 @@ limitations under the License. #include +#include "third_party/eigen3/Eigen/Core" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/platform/types.h" @@ -33,7 +35,7 @@ typedef Eigen::SyclDevice SYCLDevice; namespace scatter_op { -enum class UpdateOp { ASSIGN, ADD, SUB, MUL, DIV }; +enum class UpdateOp { ASSIGN, ADD, SUB, MUL, DIV, MIN, MAX }; namespace internal { @@ -45,6 +47,10 @@ struct Assign { static void Run(Params p, Update u) { p = u; } + template + static void RunScalar(Params p, Update u) { + p.setConstant(u); + } }; template <> struct Assign { @@ -52,6 +58,10 @@ struct Assign { static void Run(Params p, Update u) { p += u; } + template + static void RunScalar(Params p, Update u) { + p = p + u; + } }; template <> struct Assign { @@ -59,6 +69,10 @@ struct Assign { static void Run(Params p, Update u) { p -= u; } + template + static void RunScalar(Params p, Update u) { + p = p + static_cast(-u); + } }; template <> struct Assign { @@ -66,6 +80,10 @@ struct Assign { static void Run(Params p, Update u) { p *= u; } + template + static void RunScalar(Params p, Update u) { + p = p * u; + } }; template <> struct Assign { @@ -73,6 +91,34 @@ struct Assign { static void Run(Params p, Update u) { p /= u; } + template + static void RunScalar(Params p, Update u) { + p = p / u; + } +}; +template <> +struct Assign { + // This method requires that Params and Update are tensor types. + template + static void Run(Params p, Update u) { + p = p.cwiseMin(u); + } + // Same thing, but for Update being a scalar type. + template + static void RunScalar(Params p, Update u) { + p = p.cwiseMin(u); + } +}; +template <> +struct Assign { + template + static void Run(Params p, Update u) { + p = p.cwiseMax(u); + } + template + static void RunScalar(Params p, Update u) { + p = p.cwiseMax(u); + } }; #ifdef TENSORFLOW_USE_SYCL @@ -117,6 +163,22 @@ struct AssignSYCL { p.device(d) = p / u; } }; + +template <> +struct AssignSYCL { + template + static void Run(Device d, Params p, Update u) { + p.device(d) = p.cwiseMin(u); + } +}; + +template <> +struct AssignSYCL { + template + static void Run(Device d, Params p, Update u) { + p.device(d) = p.cwiseMax(u); + } +}; #endif // TENSORFLOW_USE_SYCL } // namespace internal @@ -241,6 +303,112 @@ struct ScatterFunctorSYCL { }; #endif // TENSORFLOW_USE_SYCL +template +struct ScatterScalarFunctor { + Index operator()(OpKernelContext* c, const Device& d, + typename TTypes::Matrix params, + const typename TTypes::ConstScalar update, + typename TTypes::ConstFlat indices); +}; + +template +struct ScatterScalarFunctorBase { + Index operator()(OpKernelContext* c, const Device& d, + typename TTypes::Matrix params, + const typename TTypes::ConstScalar update, + typename TTypes::ConstFlat indices) { + // indices and params sizes were validated in DoCompute(). + const Index N = static_cast(indices.size()); + const Index limit = static_cast(params.dimension(0)); + for (Index i = 0; i < N; i++) { + // Grab the index and check its validity. An earlier version of the + // code checked it and then grabbed it from memory a second time, which + // was a security risk since it could have changed in between. + const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); + if (!FastBoundsCheck(index, limit)) return i; + // Broadcast update to params[index] + scatter_op::internal::Assign::RunScalar( + params.template chip<0>(index), update()); + } + return -1; + } +}; + +#ifdef TENSORFLOW_USE_SYCL +template +struct ScatterScalarFunctorBase { + Index operator()(OpKernelContext* c, const SYCLDevice& d, + typename TTypes::Matrix params, + const typename TTypes::ConstScalar update, + typename TTypes::ConstFlat indices) { + // indices and params sizes were validated in DoCompute(). + const Index N = static_cast(indices.size()); + const Index limit = static_cast(params.dimension(0)); + for (Index i = 0; i < N; i++) { + // Grab the index and check its validity. An earlier version of the + // code checked it and then grabbed it from memory a second time, which + // was a security risk since it could have changed in between. + const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); + if (!FastBoundsCheck(index, limit)) return i; + // Broadcast update to params[index] + scatter_op::internal::AssignSYCL::RunScalar( + d, params.template chip<0>(index), update); + } + return -1; + } +}; +#endif // TENSORFLOW_USE_SYCL + +template +struct ScatterScalarFunctorBase { + Index operator()(OpKernelContext* c, const CPUDevice& d, + typename TTypes::Matrix params, + const typename TTypes::ConstScalar update, + typename TTypes::ConstFlat indices) { + // indices and params sizes were validated in DoCompute(). + const Index N = static_cast(indices.size()); + const Index limit = static_cast(params.dimension(0)); + for (Index i = 0; i < N; i++) { + // Grab the index and check its validity. An earlier version of the + // code checked it and then grabbed it from memory a second time, which + // was a security risk since it could have changed in between. + const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); + if (!FastBoundsCheck(index, limit)) return i; + // Broadcast update to params[index] + scatter_op::internal::Assign::RunScalar( + params.template chip<0>(index), update()); + } + return -1; + } +}; + +template +struct ScatterScalarFunctor + : ScatterScalarFunctorBase {}; + +#ifdef TENSORFLOW_USE_SYCL +template +struct ScatterScalarFunctorSYCL { + Index operator()(OpKernelContext* c, const SYCLDevice& d, + typename TTypes::Matrix params, + const typename TTypes::ConstScalar update, + typename TTypes::Flat indices) { + // indices and params sizes were validated in DoCompute(). + const Index N = static_cast(indices.size()); + const Index limit = static_cast(params.dimension(0)); + for (Index i = 0; i < N; i++) { + const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i)); + if (!FastBoundsCheck(index, limit)) return i; + // Broadcast update to params[index] + scatter_op::internal::AssignSYCL::Run( + d, params.template chip<0>(index), update()); + } + return -1; + } +}; +#endif // TENSORFLOW_USE_SYCL + } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/scatter_functor_gpu.cu.cc b/tensorflow/core/kernels/scatter_functor_gpu.cu.cc index 52972997cc..59911bf0d2 100644 --- a/tensorflow/core/kernels/scatter_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_functor_gpu.cu.cc @@ -23,15 +23,18 @@ namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; -#define DEFINE_GPU_SPECS_OP(T, Index, op) \ - template struct functor::ScatterFunctor; +#define DEFINE_GPU_SPECS_OP(T, Index, op) \ + template struct functor::ScatterFunctor; \ + template struct functor::ScatterScalarFunctor; #define DEFINE_GPU_SPECS_INDEX(T, Index) \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::ASSIGN); \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::ADD); \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::SUB); \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MUL); \ - DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV); + DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV); \ + DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MIN); \ + DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MAX); #define DEFINE_GPU_SPECS(T) \ DEFINE_GPU_SPECS_INDEX(T, int32); \ diff --git a/tensorflow/core/kernels/scatter_functor_gpu.cu.h b/tensorflow/core/kernels/scatter_functor_gpu.cu.h index be18658543..70809e4dcf 100644 --- a/tensorflow/core/kernels/scatter_functor_gpu.cu.h +++ b/tensorflow/core/kernels/scatter_functor_gpu.cu.h @@ -29,12 +29,53 @@ namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; +namespace scatter_op_gpu { + +template +struct ScatterOpKernelBody; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { *dest = src; } +}; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { CudaAtomicAdd(dest, src); } +}; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { CudaAtomicSub(dest, src); } +}; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { CudaAtomicMul(dest, src); } +}; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { CudaAtomicDiv(dest, src); } +}; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { CudaAtomicMin(dest, src); } +}; + +template +struct ScatterOpKernelBody { + __device__ void operator()(T* dest, T src) const { CudaAtomicMax(dest, src); } +}; + template __global__ void ScatterOpCustomKernel(T* params, const T* updates, const Index* indices, Index first_dim_size, Index updates_size, Index indices_size) { Index update_block = updates_size / indices_size; + ScatterOpKernelBody body; CUDA_1D_KERNEL_LOOP(i, updates_size) { int indices_i = i / update_block; int updates_i = i; @@ -44,31 +85,33 @@ __global__ void ScatterOpCustomKernel(T* params, const T* updates, continue; } int params_i = param_first_index * update_block + (i % update_block); - switch (op) { - case scatter_op::UpdateOp::ASSIGN: { - params[params_i] = ldg(updates + updates_i); - break; - } - case scatter_op::UpdateOp::ADD: { - CudaAtomicAdd(params + params_i, ldg(updates + updates_i)); - break; - } - case scatter_op::UpdateOp::SUB: { - CudaAtomicSub(params + params_i, ldg(updates + updates_i)); - break; - } - case scatter_op::UpdateOp::MUL: { - CudaAtomicMul(params + params_i, ldg(updates + updates_i)); - break; - } - case scatter_op::UpdateOp::DIV: { - CudaAtomicDiv(params + params_i, ldg(updates + updates_i)); - break; - } + body(¶ms[params_i], ldg(updates + updates_i)); + } +} + +template +__global__ void ScatterScalarOpCustomKernel(T* params, const T* update, + const Index* indices, + Index first_dim_size, + Index indices_size, + Index synthesized_updates_size) { + Index update_block = synthesized_updates_size / indices_size; + ScatterOpKernelBody body; + CUDA_1D_KERNEL_LOOP(i, synthesized_updates_size) { + int indices_i = i / update_block; + int param_first_index = indices[indices_i]; + const T update_val = *update; + if (!(param_first_index >= 0 && param_first_index < first_dim_size)) { + // Ignore indices that are out of range. + continue; } + int params_i = param_first_index * update_block + (i % update_block); + body(¶ms[params_i], update_val); } } +} // namespace scatter_op_gpu + namespace functor { // Specialization for a GPU device. template @@ -84,7 +127,7 @@ struct ScatterFunctor { const Index indices_size = indices.size(); const Index updates_size = updates.size(); CudaLaunchConfig config = GetCudaLaunchConfig(updates_size, d); - ScatterOpCustomKernel + scatter_op_gpu::ScatterOpCustomKernel <<>>( params.data(), updates.data(), indices.data(), first_dim_size, updates_size, indices_size); @@ -92,6 +135,27 @@ struct ScatterFunctor { } }; +template +struct ScatterScalarFunctor { + Index operator()(OpKernelContext* c, const GPUDevice& d, + typename TTypes::Matrix params, + const typename TTypes::ConstScalar update, + typename TTypes::ConstFlat indices) { + // TODO(b/31801742): Implement indices range check. The hardest part is + // with returning a value after the range check, as we do not want to do + // device to host memcpy during a stream. + const Index first_dim_size = params.dimension(0); + const Index indices_size = indices.size(); + const Index synthesized_updates_size = indices_size * params.dimension(1); + CudaLaunchConfig config = GetCudaLaunchConfig(synthesized_updates_size, d); + scatter_op_gpu::ScatterScalarOpCustomKernel + <<>>( + params.data(), update.data(), indices.data(), first_dim_size, + indices_size, synthesized_updates_size); + return -1; + } +}; + } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc index 282165349f..0fbde764d5 100644 --- a/tensorflow/core/kernels/scatter_op.cc +++ b/tensorflow/core/kernels/scatter_op.cc @@ -38,6 +38,7 @@ typedef Eigen::SyclDevice SYCLDevice; // Check whether updates.shape = indices.shape + params.shape[1:] static bool ValidShapes(const Tensor& params, const Tensor& updates, const Tensor& indices) { + if (updates.dims() == 0) return true; if (updates.dims() != indices.dims() + params.dims() - 1) return false; for (int d = 0; d < indices.dims(); d++) { if (updates.dim_size(d) != indices.dim_size(d)) { @@ -61,11 +62,11 @@ static void DoValidationChecking(OpKernelContext* c, const Tensor& params, params.shape().DebugString())); OP_REQUIRES( c, ValidShapes(params, updates, indices), - errors::InvalidArgument( - "Must have updates.shape = indices.shape + params.shape[1:], got ", - "updates.shape ", updates.shape().DebugString(), ", indices.shape ", - indices.shape().DebugString(), ", params.shape ", - params.shape().DebugString())); + errors::InvalidArgument("Must have updates.shape = indices.shape + " + "params.shape[1:] or updates.shape = [], got ", + "updates.shape ", updates.shape().DebugString(), + ", indices.shape ", indices.shape().DebugString(), + ", params.shape ", params.shape().DebugString())); } template @@ -122,16 +123,31 @@ class ScatterUpdateOp : public OpKernel { if (N > 0) { auto indices_flat = indices.flat(); auto params_flat = params.flat_outer_dims(); - auto updates_flat = updates.shaped({N, updates.NumElements() / N}); - - functor::ScatterFunctor functor; - const Index bad_i = functor(c, c->template eigen_device(), - params_flat, updates_flat, indices_flat); - OP_REQUIRES( - c, bad_i < 0, - errors::InvalidArgument( - "indices", SliceDebugString(indices.shape(), bad_i), " = ", - indices_flat(bad_i), " is not in [0, ", params.dim_size(0), ")")); + + if (TensorShapeUtils::IsScalar(updates.shape()) || + IsLegacyScalar(updates.shape())) { + const auto update = updates.scalar(); + functor::ScatterScalarFunctor functor; + const Index bad_i = functor(c, c->template eigen_device(), + params_flat, update, indices_flat); + OP_REQUIRES(c, bad_i < 0, + errors::InvalidArgument( + "indices", SliceDebugString(indices.shape(), bad_i), + " = ", indices_flat(bad_i), " is not in [0, ", + params.dim_size(0), ")")); + } else { + auto updates_flat = + updates.shaped({N, updates.NumElements() / N}); + + functor::ScatterFunctor functor; + const Index bad_i = functor(c, c->template eigen_device(), + params_flat, updates_flat, indices_flat); + OP_REQUIRES(c, bad_i < 0, + errors::InvalidArgument( + "indices", SliceDebugString(indices.shape(), bad_i), + " = ", indices_flat(bad_i), " is not in [0, ", + params.dim_size(0), ")")); + } } } }; @@ -195,16 +211,31 @@ class ScatterUpdateOp : public OpKernel { auto indices_flat = indices_host.flat(); auto params_flat = params.flat_outer_dims(); - auto updates_flat = updates.shaped({N, updates.NumElements() / N}); - - functor::ScatterFunctorSYCL functor; - const Index bad_i = functor(c, c->template eigen_device(), - params_flat, updates_flat, indices_flat); - OP_REQUIRES( - c, bad_i < 0, - errors::InvalidArgument( - "indices", SliceDebugString(indices.shape(), bad_i), " = ", - indices_flat(bad_i), " is not in [0, ", params.dim_size(0), ")")); + + if (TensorShapeUtils::IsScalar(updates.shape())) { + const auto update = updates.scalar(); + + functor::ScatterScalarFunctorSYCL functor; + const Index bad_i = functor(c, c->template eigen_device(), + params_flat, update, indices_flat); + OP_REQUIRES(c, bad_i < 0, + errors::InvalidArgument( + "indices", SliceDebugString(indices.shape(), bad_i), + " = ", indices_flat(bad_i), " is not in [0, ", + params.dim_size(0), ")")); + } else { + auto updates_flat = + updates.shaped({N, updates.NumElements() / N}); + + functor::ScatterFunctorSYCL functor; + const Index bad_i = functor(c, c->template eigen_device(), + params_flat, updates_flat, indices_flat); + OP_REQUIRES(c, bad_i < 0, + errors::InvalidArgument( + "indices", SliceDebugString(indices.shape(), bad_i), + " = ", indices_flat(bad_i), " is not in [0, ", + params.dim_size(0), ")")); + } } } }; @@ -221,54 +252,71 @@ class ScatterUpdateOp : public OpKernel { REGISTER_SCATTER_KERNEL_INDEX(type, int32, dev, name, op); \ REGISTER_SCATTER_KERNEL_INDEX(type, int64, dev, name, op); -#define REGISTER_SCATTER_ARITHEMTIC(type, dev) \ +#define REGISTER_SCATTER_ARITHMETIC(type, dev) \ REGISTER_SCATTER_KERNEL(type, dev, "ScatterAdd", scatter_op::UpdateOp::ADD); \ REGISTER_SCATTER_KERNEL(type, dev, "ScatterDiv", scatter_op::UpdateOp::DIV); \ REGISTER_SCATTER_KERNEL(type, dev, "ScatterMul", scatter_op::UpdateOp::MUL); \ REGISTER_SCATTER_KERNEL(type, dev, "ScatterSub", scatter_op::UpdateOp::SUB); +#define REGISTER_SCATTER_MINMAX(type, dev) \ + REGISTER_SCATTER_KERNEL(type, dev, "ScatterMin", scatter_op::UpdateOp::MIN); \ + REGISTER_SCATTER_KERNEL(type, dev, "ScatterMax", scatter_op::UpdateOp::MAX); + #define REGISTER_SCATTER_UPDATE(type, dev) \ REGISTER_SCATTER_KERNEL(type, dev, "ScatterUpdate", \ scatter_op::UpdateOp::ASSIGN); // Registers CPU kernels. -#define REGISTER_SCATTER_ARITHEMTIC_CPU(type) \ - REGISTER_SCATTER_ARITHEMTIC(type, CPU); +#define REGISTER_SCATTER_ARITHMETIC_CPU(type) \ + REGISTER_SCATTER_ARITHMETIC(type, CPU); + +#define REGISTER_SCATTER_MINMAX_CPU(type) REGISTER_SCATTER_MINMAX(type, CPU); #define REGISTER_SCATTER_UPDATE_CPU(type) REGISTER_SCATTER_UPDATE(type, CPU); -TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHEMTIC_CPU); +TF_CALL_REAL_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_CPU); +TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHMETIC_CPU); TF_CALL_ALL_TYPES(REGISTER_SCATTER_UPDATE_CPU); // Registers GPU kernels. #if GOOGLE_CUDA -#define REGISTER_SCATTER_ARITHEMTIC_GPU(type) \ - REGISTER_SCATTER_ARITHEMTIC(type, GPU); +#define REGISTER_SCATTER_ARITHMETIC_GPU(type) \ + REGISTER_SCATTER_ARITHMETIC(type, GPU); + +#define REGISTER_SCATTER_MINMAX_GPU(type) REGISTER_SCATTER_MINMAX(type, GPU); #define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHEMTIC_GPU); +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU); +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_GPU); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_GPU); #endif // GOOGLE_CUDA // Registers GPU kernels. #if TENSORFLOW_USE_SYCL -#define REGISTER_SCATTER_ARITHEMTIC_SYCL(type) \ - REGISTER_SCATTER_ARITHEMTIC(type, SYCL); +#define REGISTER_SCATTER_ARITHMETIC_SYCL(type) \ + REGISTER_SCATTER_ARITHMETIC(type, SYCL); + +#define REGISTER_SCATTER_MINMAX_SYCL(type) REGISTER_SCATTER_MINMAX(type, SYCL); #define REGISTER_SCATTER_UPDATE_SYCL(type) REGISTER_SCATTER_UPDATE(type, SYCL); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHEMTIC_SYCL); +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_SYCL); +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_SYCL); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_SYCL); -#undef REGISTER_SCATTER_ARITHEMTIC_SYCL +#undef REGISTER_SCATTER_ARITHMETIC_SYCL +#undef REGISTER_SCATTER_MINMAX_SYCL #undef REGISTER_SCATTER_UPDATE_SYCL #endif // TENSORFLOW_USE_SYCL -#undef REGISTER_SCATTER_ARITHEMTIC -#undef REGISTER_SCATTER_ARITHEMTIC_CPU -#undef REGISTER_SCATTER_ARITHEMTIC_GPU +#undef REGISTER_SCATTER_ARITHMETIC +#undef REGISTER_SCATTER_ARITHMETIC_CPU +#undef REGISTER_SCATTER_ARITHMETIC_GPU +#undef REGISTER_SCATTER_MINMAX +#undef REGISTER_SCATTER_MINMAX_CPU +#undef REGISTER_SCATTER_MINMAX_GPU #undef REGISTER_SCATTER_UPDATE #undef REGISTER_SCATTER_UPDATE_CPU #undef REGISTER_SCATTER_UPDATE_GPU diff --git a/tensorflow/core/kernels/scatter_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_op_gpu.cu.cc index 0b43704846..0df329310f 100644 --- a/tensorflow/core/kernels/scatter_op_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_op_gpu.cu.cc @@ -24,15 +24,18 @@ namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; // Instantiates functor specializations for GPU. -#define DEFINE_GPU_SPECS_OP(T, Index, op) \ - template struct functor::ScatterFunctor; +#define DEFINE_GPU_SPECS_OP(T, Index, op) \ + template struct functor::ScatterFunctor; \ + template struct functor::ScatterScalarFunctor; #define DEFINE_GPU_SPECS_INDEX(T, Index) \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::ASSIGN); \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::ADD); \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::SUB); \ DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MUL); \ - DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV); + DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::DIV); \ + DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MIN); \ + DEFINE_GPU_SPECS_OP(T, Index, scatter_op::UpdateOp::MAX); #define DEFINE_GPU_SPECS(T) \ DEFINE_GPU_SPECS_INDEX(T, int32); \ diff --git a/tensorflow/core/kernels/scatter_op_test.cc b/tensorflow/core/kernels/scatter_op_test.cc index 0b8645a2ae..5b3537b94c 100644 --- a/tensorflow/core/kernels/scatter_op_test.cc +++ b/tensorflow/core/kernels/scatter_op_test.cc @@ -185,7 +185,7 @@ TEST_F(ScatterUpdateOpTest, Error_WrongDimsIndices) { Status s = RunOpKernel(); EXPECT_TRUE(StringPiece(s.ToString()) .contains("Must have updates.shape = indices.shape + " - "params.shape[1:], got ")) + "params.shape[1:] or updates.shape = [], got ")) << s; } @@ -202,7 +202,7 @@ TEST_F(ScatterUpdateOpTest, Error_MismatchedParamsAndUpdateDimensions) { Status s = RunOpKernel(); EXPECT_TRUE(StringPiece(s.ToString()) .contains("Must have updates.shape = indices.shape + " - "params.shape[1:], got ")) + "params.shape[1:] or updates.shape = [], got ")) << s; } @@ -219,7 +219,7 @@ TEST_F(ScatterUpdateOpTest, Error_MismatchedIndicesAndUpdateDimensions) { Status s = RunOpKernel(); EXPECT_TRUE(StringPiece(s.ToString()) .contains("Must have updates.shape = indices.shape + " - "params.shape[1:], got ")) + "params.shape[1:] or updates.shape = [], got ")) << s; } @@ -300,6 +300,20 @@ static void BM_ScatterDivInt64(int iters, int embedding_size) { BM_ScatterHelper(iters, embedding_size, "ScatterDiv"); } +static void BM_ScatterMinInt32(int iters, int embedding_size) { + BM_ScatterHelper(iters, embedding_size, "ScatterMin"); +} +static void BM_ScatterMinInt64(int iters, int embedding_size) { + BM_ScatterHelper(iters, embedding_size, "ScatterMin"); +} + +static void BM_ScatterMaxInt32(int iters, int embedding_size) { + BM_ScatterHelper(iters, embedding_size, "ScatterMax"); +} +static void BM_ScatterMaxInt64(int iters, int embedding_size) { + BM_ScatterHelper(iters, embedding_size, "ScatterMax"); +} + BENCHMARK(BM_ScatterUpdateInt32) ->Arg(1) ->Arg(10) @@ -332,5 +346,11 @@ BENCHMARK(BM_ScatterMulInt64)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); BENCHMARK(BM_ScatterDivInt32)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); BENCHMARK(BM_ScatterDivInt64)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); +BENCHMARK(BM_ScatterMinInt32)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); +BENCHMARK(BM_ScatterMinInt64)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); + +BENCHMARK(BM_ScatterMaxInt32)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); +BENCHMARK(BM_ScatterMaxInt64)->Arg(1)->Arg(10)->Arg(64)->Arg(256)->Arg(1024); + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/ops/resource_variable_ops.cc b/tensorflow/core/ops/resource_variable_ops.cc index 0d8cf78cc2..3d0a6c2157 100644 --- a/tensorflow/core/ops/resource_variable_ops.cc +++ b/tensorflow/core/ops/resource_variable_ops.cc @@ -167,27 +167,75 @@ REGISTER_OP("ResourceGather") return Status::OK(); }); +namespace { + +Status ResourceScatterUpdateShape(InferenceContext* c) { + ShapeAndType handle_shape_and_type; + TF_RETURN_IF_ERROR(ValidateVariableResourceHandle(c, &handle_shape_and_type)); + ShapeHandle var_shape = handle_shape_and_type.shape; + ShapeHandle indices_shape = c->input(1); + + ShapeHandle unused_updates_shape; + ShapeHandle concat; + ShapeHandle var_subshape; + TF_RETURN_IF_ERROR(c->Subshape(var_shape, 1, &var_subshape)); + TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat)); + TF_RETURN_IF_ERROR( + InferenceContext::Rank(c->input(2)) == 0 + ? Status::OK() + : c->Merge(c->input(2), concat, &unused_updates_shape)); + return Status::OK(); +} + +} // namespace + REGISTER_OP("ResourceScatterAdd") .Input("resource: resource") .Input("indices: Tindices") .Input("updates: dtype") .Attr("dtype: numbertype") .Attr("Tindices: {int32, int64}") - .SetShapeFn([](InferenceContext* c) { - ShapeAndType handle_shape_and_type; - TF_RETURN_IF_ERROR( - ValidateVariableResourceHandle(c, &handle_shape_and_type)); - ShapeHandle var_shape = handle_shape_and_type.shape; - ShapeHandle indices_shape = c->input(1); + .SetShapeFn(ResourceScatterUpdateShape); - ShapeHandle unused_updates_shape; - ShapeHandle concat; - ShapeHandle var_subshape; - TF_RETURN_IF_ERROR(c->Subshape(var_shape, 1, &var_subshape)); - TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat)); - TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape)); - return Status::OK(); - }); +REGISTER_OP("ResourceScatterSub") + .Input("resource: resource") + .Input("indices: Tindices") + .Input("updates: dtype") + .Attr("dtype: numbertype") + .Attr("Tindices: {int32, int64}") + .SetShapeFn(ResourceScatterUpdateShape); + +REGISTER_OP("ResourceScatterMul") + .Input("resource: resource") + .Input("indices: Tindices") + .Input("updates: dtype") + .Attr("dtype: numbertype") + .Attr("Tindices: {int32, int64}") + .SetShapeFn(ResourceScatterUpdateShape); + +REGISTER_OP("ResourceScatterDiv") + .Input("resource: resource") + .Input("indices: Tindices") + .Input("updates: dtype") + .Attr("dtype: numbertype") + .Attr("Tindices: {int32, int64}") + .SetShapeFn(ResourceScatterUpdateShape); + +REGISTER_OP("ResourceScatterMin") + .Input("resource: resource") + .Input("indices: Tindices") + .Input("updates: dtype") + .Attr("dtype: numbertype") + .Attr("Tindices: {int32, int64}") + .SetShapeFn(ResourceScatterUpdateShape); + +REGISTER_OP("ResourceScatterMax") + .Input("resource: resource") + .Input("indices: Tindices") + .Input("updates: dtype") + .Attr("dtype: numbertype") + .Attr("Tindices: {int32, int64}") + .SetShapeFn(ResourceScatterUpdateShape); REGISTER_OP("ResourceScatterUpdate") .Input("resource: resource") @@ -195,21 +243,7 @@ REGISTER_OP("ResourceScatterUpdate") .Input("updates: dtype") .Attr("dtype: type") .Attr("Tindices: {int32, int64}") - .SetShapeFn([](InferenceContext* c) { - ShapeAndType handle_shape_and_type; - TF_RETURN_IF_ERROR( - ValidateVariableResourceHandle(c, &handle_shape_and_type)); - ShapeHandle var_shape = handle_shape_and_type.shape; - ShapeHandle indices_shape = c->input(1); - - ShapeHandle unused_updates_shape; - ShapeHandle concat; - ShapeHandle var_subshape; - TF_RETURN_IF_ERROR(c->Subshape(var_shape, 1, &var_subshape)); - TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat)); - TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape)); - return Status::OK(); - }); + .SetShapeFn(ResourceScatterUpdateShape); REGISTER_OP("MutexV2") .Attr("container: string = ''") diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc index 7a524b60c0..664f52452e 100644 --- a/tensorflow/core/ops/state_ops.cc +++ b/tensorflow/core/ops/state_ops.cc @@ -122,7 +122,10 @@ Status ScatterUpdateShape(InferenceContext* c) { ShapeHandle var_subshape; TF_RETURN_IF_ERROR(c->Subshape(var_shape, 1, &var_subshape)); TF_RETURN_IF_ERROR(c->Concatenate(indices_shape, var_subshape, &concat)); - TF_RETURN_IF_ERROR(c->Merge(c->input(2), concat, &unused_updates_shape)); + TF_RETURN_IF_ERROR( + InferenceContext::Rank(c->input(2)) == 0 + ? Status::OK() + : c->Merge(c->input(2), concat, &unused_updates_shape)); c->set_output(0, var_shape); return Status::OK(); @@ -180,6 +183,26 @@ REGISTER_OP("ScatterDiv") .Attr("use_locking: bool = false") .SetShapeFn(ScatterUpdateShape); +REGISTER_OP("ScatterMin") + .Input("ref: Ref(T)") + .Input("indices: Tindices") + .Input("updates: T") + .Output("output_ref: Ref(T)") + .Attr("T: {half, bfloat16, float, double, int32, int64}") + .Attr("Tindices: {int32, int64}") + .Attr("use_locking: bool = false") + .SetShapeFn(ScatterUpdateShape); + +REGISTER_OP("ScatterMax") + .Input("ref: Ref(T)") + .Input("indices: Tindices") + .Input("updates: T") + .Output("output_ref: Ref(T)") + .Attr("T: {half, bfloat16, float, double, int32, int64}") + .Attr("Tindices: {int32, int64}") + .Attr("use_locking: bool = false") + .SetShapeFn(ScatterUpdateShape); + REGISTER_OP("ScatterNdUpdate") .Input("ref: Ref(T)") .Input("indices: Tindices") diff --git a/tensorflow/docs_src/api_guides/python/state_ops.md b/tensorflow/docs_src/api_guides/python/state_ops.md index 0d612ee0c7..ec2d877386 100644 --- a/tensorflow/docs_src/api_guides/python/state_ops.md +++ b/tensorflow/docs_src/api_guides/python/state_ops.md @@ -83,6 +83,8 @@ automatically by the optimizers in most cases. * @{tf.scatter_sub} * @{tf.scatter_mul} * @{tf.scatter_div} +* @{tf.scatter_min} +* @{tf.scatter_max} * @{tf.scatter_nd_update} * @{tf.scatter_nd_add} * @{tf.scatter_nd_sub} diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 563eeff2a6..742564f9bf 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -185,6 +185,204 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[3]]) + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterSub(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[1]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_sub(handle, [0], + constant_op.constant( + [[2]], + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[-1]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterMul(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[1]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_mul(handle, [0], + constant_op.constant( + [[5]], + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[5]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterDiv(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[6]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_div(handle, [0], + constant_op.constant( + [[3]], + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[2]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterMin(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[6]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_min(handle, [0], + constant_op.constant( + [[3]], + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterMax(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[6]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_max(handle, [0], + constant_op.constant( + [[3]], + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[6]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterAddScalar(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[1]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_add(handle, [0], + constant_op.constant( + 2, + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterSubScalar(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[1]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_sub(handle, [0], + constant_op.constant( + 2, + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[-1]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterMulScalar(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[1]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_mul(handle, [0], + constant_op.constant( + 5, + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[5]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterDivScalar(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[6]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_div(handle, [0], + constant_op.constant( + 3, + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[2]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterMinScalar(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[6]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_min(handle, [0], + constant_op.constant( + 3, + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) + + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testScatterMaxScalar(self): + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [[6]], + dtype=dtypes.int32))) + self.evaluate( + resource_variable_ops.resource_scatter_max(handle, [0], + constant_op.constant( + 3, + dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[6]]) + def testScatterUpdateString(self): handle = resource_variable_ops.var_handle_op( dtype=dtypes.string, shape=[1, 1]) @@ -196,6 +394,23 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(compat.as_bytes(self.evaluate(read)[0][0]), compat.as_bytes("b")) + def testScatterUpdateStringScalar(self): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.string, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op(handle, + constant_op.constant( + [["a"]], + dtype=dtypes.string))) + self.evaluate( + resource_variable_ops.resource_scatter_update(handle, [0], + constant_op.constant( + "b", + dtype=dtypes.string))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.string) + self.assertEqual( + compat.as_bytes(self.evaluate(read)[0][0]), compat.as_bytes("b")) + # TODO(alive): get this to work in Eager mode. def testGPU(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index 7cdf11d884..c70a4ffce7 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -38,38 +38,100 @@ def _NumpyAdd(ref, indices, updates): ref[indx] += updates[i] +def _NumpyAddScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] += update + + def _NumpySub(ref, indices, updates): for i, indx in np.ndenumerate(indices): ref[indx] -= updates[i] +def _NumpySubScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] -= update + + def _NumpyMul(ref, indices, updates): for i, indx in np.ndenumerate(indices): ref[indx] *= updates[i] +def _NumpyMulScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] *= update + + def _NumpyDiv(ref, indices, updates): for i, indx in np.ndenumerate(indices): ref[indx] /= updates[i] +def _NumpyDivScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] /= update + + +def _NumpyMin(ref, indices, updates): + for i, indx in np.ndenumerate(indices): + ref[indx] = np.minimum(ref[indx], updates[i]) + + +def _NumpyMinScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] = np.minimum(ref[indx], update) + + +def _NumpyMax(ref, indices, updates): + for i, indx in np.ndenumerate(indices): + ref[indx] = np.maximum(ref[indx], updates[i]) + + +def _NumpyMaxScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] = np.maximum(ref[indx], update) + + def _NumpyUpdate(ref, indices, updates): for i, indx in np.ndenumerate(indices): ref[indx] = updates[i] +def _NumpyUpdateScalar(ref, indices, update): + for _, indx in np.ndenumerate(indices): + ref[indx] = update + + _TF_OPS_TO_NUMPY = { state_ops.scatter_update: _NumpyUpdate, state_ops.scatter_add: _NumpyAdd, state_ops.scatter_sub: _NumpySub, state_ops.scatter_mul: _NumpyMul, state_ops.scatter_div: _NumpyDiv, + state_ops.scatter_min: _NumpyMin, + state_ops.scatter_max: _NumpyMax, +} + +_TF_OPS_TO_NUMPY_SCALAR = { + state_ops.scatter_update: _NumpyUpdateScalar, + state_ops.scatter_add: _NumpyAddScalar, + state_ops.scatter_sub: _NumpySubScalar, + state_ops.scatter_mul: _NumpyMulScalar, + state_ops.scatter_div: _NumpyDivScalar, + state_ops.scatter_min: _NumpyMinScalar, + state_ops.scatter_max: _NumpyMaxScalar, } class ScatterTest(test.TestCase): - def _VariableRankTest(self, tf_scatter, vtype, itype, repeat_indices=False): + def _VariableRankTest(self, + tf_scatter, + vtype, + itype, + repeat_indices=False, + updates_are_scalar=False): np.random.seed(8) with self.test_session(use_gpu=True): for indices_shape in (), (2,), (3, 7), (3, 4, 7): @@ -89,8 +151,11 @@ class ScatterTest(test.TestCase): indices[np.random.randint(size // 2)]) np.random.shuffle(indices) indices = indices.reshape(indices_shape) - updates = _AsType( - np.random.randn(*(indices_shape + extra_shape)), vtype) + if updates_are_scalar: + updates = _AsType(np.random.randn(), vtype) + else: + updates = _AsType( + np.random.randn(*(indices_shape + extra_shape)), vtype) # Clips small values to avoid division by zero. def clip_small_values(x): @@ -101,7 +166,10 @@ class ScatterTest(test.TestCase): # Scatter via numpy new = old.copy() - np_scatter = _TF_OPS_TO_NUMPY[tf_scatter] + if updates_are_scalar: + np_scatter = _TF_OPS_TO_NUMPY_SCALAR[tf_scatter] + else: + np_scatter = _TF_OPS_TO_NUMPY[tf_scatter] np_scatter(new, indices, updates) # Scatter via tensorflow ref = variables.Variable(old) @@ -109,25 +177,35 @@ class ScatterTest(test.TestCase): tf_scatter(ref, indices, updates).eval() self.assertAllClose(ref.eval(), new) - def _VariableRankTests(self, tf_scatter, repeat_indices=False): + def _VariableRankTests(self, + tf_scatter, + repeat_indices=False, + updates_are_scalar=False): for vtype in (np.float32, np.float64): for itype in (np.int32, np.int64): - self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices) + self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices, + updates_are_scalar) def testVariableRankUpdate(self): - self._VariableRankTests(state_ops.scatter_update) + self._VariableRankTests(state_ops.scatter_update, False) def testVariableRankAdd(self): - self._VariableRankTests(state_ops.scatter_add) + self._VariableRankTests(state_ops.scatter_add, False) def testVariableRankSub(self): - self._VariableRankTests(state_ops.scatter_sub) + self._VariableRankTests(state_ops.scatter_sub, False) def testVariableRankMul(self): - self._VariableRankTests(state_ops.scatter_mul) + self._VariableRankTests(state_ops.scatter_mul, False) def testVariableRankDiv(self): - self._VariableRankTests(state_ops.scatter_div) + self._VariableRankTests(state_ops.scatter_div, False) + + def testVariableRankMin(self): + self._VariableRankTests(state_ops.scatter_min, False) + + def testVariableRankMax(self): + self._VariableRankTests(state_ops.scatter_max, False) def testRepeatIndicesAdd(self): self._VariableRankTests(state_ops.scatter_add, True) @@ -141,6 +219,51 @@ class ScatterTest(test.TestCase): def testRepeatIndicesDiv(self): self._VariableRankTests(state_ops.scatter_div, True) + def testRepeatIndicesMin(self): + self._VariableRankTests(state_ops.scatter_min, True) + + def testRepeatIndicesMax(self): + self._VariableRankTests(state_ops.scatter_max, True) + + def testVariableRankUpdateScalar(self): + self._VariableRankTests(state_ops.scatter_update, False, True) + + def testVariableRankAddScalar(self): + self._VariableRankTests(state_ops.scatter_add, False, True) + + def testVariableRankSubScalar(self): + self._VariableRankTests(state_ops.scatter_sub, False, True) + + def testVariableRankMulScalar(self): + self._VariableRankTests(state_ops.scatter_mul, False, True) + + def testVariableRankDivScalar(self): + self._VariableRankTests(state_ops.scatter_div, False, True) + + def testVariableRankMinScalar(self): + self._VariableRankTests(state_ops.scatter_min, False, True) + + def testVariableRankMaxScalar(self): + self._VariableRankTests(state_ops.scatter_max, False, True) + + def testRepeatIndicesAddScalar(self): + self._VariableRankTests(state_ops.scatter_add, True, True) + + def testRepeatIndicesSubScalar(self): + self._VariableRankTests(state_ops.scatter_sub, True, True) + + def testRepeatIndicesMulScalar(self): + self._VariableRankTests(state_ops.scatter_mul, True, True) + + def testRepeatIndicesDivScalar(self): + self._VariableRankTests(state_ops.scatter_div, True, True) + + def testRepeatIndicesMinScalar(self): + self._VariableRankTests(state_ops.scatter_min, True, True) + + def testRepeatIndicesMaxScalar(self): + self._VariableRankTests(state_ops.scatter_max, True, True) + def testBooleanScatterUpdate(self): if not test.is_gpu_available(): with self.test_session(use_gpu=False) as session: diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index 230b7ef937..e90ff0746a 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -80,6 +80,8 @@ from tensorflow.python.ops.state_ops import scatter_add from tensorflow.python.ops.state_ops import scatter_div from tensorflow.python.ops.state_ops import scatter_mul from tensorflow.python.ops.state_ops import scatter_sub +from tensorflow.python.ops.state_ops import scatter_min +from tensorflow.python.ops.state_ops import scatter_max from tensorflow.python.ops.state_ops import scatter_update from tensorflow.python.ops.state_ops import scatter_nd_add from tensorflow.python.ops.state_ops import scatter_nd_sub diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index c3ad5831b4..01fc3182bc 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -63,6 +63,8 @@ @@scatter_nd_update @@scatter_sub @@scatter_update +@@scatter_min +@@scatter_max @@sparse_mask @@tables_initializer @@trainable_variables diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 55b82dd765..937044aece 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1688,6 +1688,14 @@ tf_module { name: "scatter_div" argspec: "args=[\'ref\', \'indices\', \'updates\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " } + member_method { + name: "scatter_max" + argspec: "args=[\'ref\', \'indices\', \'updates\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " + } + member_method { + name: "scatter_min" + argspec: "args=[\'ref\', \'indices\', \'updates\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " + } member_method { name: "scatter_mul" argspec: "args=[\'ref\', \'indices\', \'updates\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " -- GitLab From dd3adb6165605c28f1a993f9093e8f7c99b357c5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 16:13:13 -0700 Subject: [PATCH 457/960] [XLA] Redesign: implement local client and local service interface. PiperOrigin-RevId: 190291400 --- .../compiler/xla/client/local_client.cc | 18 +++ tensorflow/compiler/xla/client/local_client.h | 9 +- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/local_service.cc | 153 ++++++++++++++---- .../compiler/xla/service/local_service.h | 13 ++ tensorflow/compiler/xla/service/service.cc | 41 +++-- tensorflow/compiler/xla/service/service.h | 11 ++ 7 files changed, 205 insertions(+), 41 deletions(-) diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 91396f055f..30594243dc 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -265,6 +265,24 @@ StatusOr> LocalClient::Compile( updated_options)); } +StatusOr> LocalClient::Compile( + const XlaComputation& computation, + const tensorflow::gtl::ArraySlice argument_layouts, + const ExecutableBuildOptions& options) { + ExecutableBuildOptions updated_options = options; + if (options.device_ordinal() == -1) { + updated_options.set_device_ordinal(default_device_ordinal()); + VLOG(3) << "Set device ordinal to default value of: " + << updated_options.device_ordinal(); + } + TF_ASSIGN_OR_RETURN(std::unique_ptr executable, + local_service_->CompileExecutable( + computation, argument_layouts, updated_options)); + return WrapUnique(new LocalExecutable(std::move(executable), + local_service_->mutable_backend(), + updated_options)); +} + StatusOr> LocalClient::LiteralToShapedBuffer(const Literal& literal, int device_ordinal, DeviceMemoryAllocator* allocator) { diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index 2e5d85ba68..98ee7c62c9 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -123,7 +123,14 @@ class LocalClient : public Client { const tensorflow::gtl::ArraySlice argument_layouts, const ExecutableBuildOptions& options); - // TODO(b/74197823): Add a overload of Compile for XlaComputation. + // Build and return a LocalExecutable object. The executable is compiled using + // the given XlaComputation, argument layouts and options. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> Compile( + const XlaComputation& computation, + const tensorflow::gtl::ArraySlice argument_layouts, + const ExecutableBuildOptions& options); // Copy the literal data to the device with the given ordinal and return as a // ScopedShapedBuffer. If non-null the given memory allocator is used for diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index d4d67872cf..da16976d06 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -623,6 +623,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:executable_build_options", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", ], diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 1e2d8eea58..499f280211 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -69,6 +69,68 @@ LocalService::LocalService(const ServiceOptions& options, std::unique_ptr execute_backend) : Service(options, std::move(execute_backend)) {} +namespace { + +// Retrieves the parameter metadata for the given computation and parameter +// number. +// +// If the parameter number is invalid for this computation, nullopt is +// returned. When the return value has_value(), nullptr will never be +// the held value. +tensorflow::gtl::optional ParameterMetadata( + const XlaComputation& computation, int parameter_number) { + for (const HloComputationProto& comp : computation.proto().computations()) { + if (comp.id() == computation.proto().entry_computation_id()) { + for (const HloInstructionProto& instr : comp.instructions()) { + if (instr.opcode() == HloOpcodeString(HloOpcode::kParameter) && + instr.parameter_number() == parameter_number) { + if (!instr.has_metadata()) { + return tensorflow::gtl::nullopt; + } + return &instr.metadata(); + } + } + } + } + return tensorflow::gtl::nullopt; +} + +ExecutionOptions CreateExecutionOptions( + const ExecutableBuildOptions& build_options, + const ProgramShape* program_shape) { + ExecutionOptions execution_options = CreateDefaultExecutionOptions(); + if (build_options.hlo_profile().has_value()) { + execution_options.mutable_debug_options()->set_xla_hlo_profile( + *build_options.hlo_profile()); + } + if (build_options.generate_hlo_graph().has_value()) { + execution_options.mutable_debug_options()->set_xla_generate_hlo_graph( + build_options.generate_hlo_graph().value()); + } + if (build_options.dump_optimized_hlo_proto_to().has_value()) { + execution_options.mutable_debug_options() + ->set_xla_dump_optimized_hlo_proto_to( + build_options.dump_optimized_hlo_proto_to().value()); + } + if (build_options.dump_per_pass_hlo_proto_to().has_value()) { + execution_options.mutable_debug_options() + ->set_xla_dump_per_pass_hlo_proto_to( + build_options.dump_per_pass_hlo_proto_to().value()); + } + if (build_options.result_layout() != nullptr) { + *execution_options.mutable_shape_with_output_layout() = + *build_options.result_layout(); + } else { + *execution_options.mutable_shape_with_output_layout() = + program_shape->result(); + LayoutUtil::SetToDefaultLayout( + execution_options.mutable_shape_with_output_layout()); + } + return execution_options; +} + +} // namespace + StatusOr> LocalService::CompileExecutable( const ComputationHandle& computation, const tensorflow::gtl::ArraySlice argument_layouts, @@ -118,34 +180,8 @@ StatusOr> LocalService::CompileExecutable( *build_options.result_layout(), program_shape->result())); } - ExecutionOptions execution_options = CreateDefaultExecutionOptions(); - if (build_options.hlo_profile().has_value()) { - execution_options.mutable_debug_options()->set_xla_hlo_profile( - *build_options.hlo_profile()); - } - if (build_options.generate_hlo_graph().has_value()) { - execution_options.mutable_debug_options()->set_xla_generate_hlo_graph( - build_options.generate_hlo_graph().value()); - } - if (build_options.dump_optimized_hlo_proto_to().has_value()) { - execution_options.mutable_debug_options() - ->set_xla_dump_optimized_hlo_proto_to( - build_options.dump_optimized_hlo_proto_to().value()); - } - if (build_options.dump_per_pass_hlo_proto_to().has_value()) { - execution_options.mutable_debug_options() - ->set_xla_dump_per_pass_hlo_proto_to( - build_options.dump_per_pass_hlo_proto_to().value()); - } - if (build_options.result_layout() != nullptr) { - *execution_options.mutable_shape_with_output_layout() = - *build_options.result_layout(); - } else { - *execution_options.mutable_shape_with_output_layout() = - program_shape->result(); - LayoutUtil::SetToDefaultLayout( - execution_options.mutable_shape_with_output_layout()); - } + ExecutionOptions execution_options = + CreateExecutionOptions(build_options, program_shape.get()); TF_ASSIGN_OR_RETURN(std::unique_ptr module_config, CreateModuleConfig(*program_shape, argument_layouts, &execution_options, user_computation)); @@ -159,6 +195,67 @@ StatusOr> LocalService::CompileExecutable( build_options.device_allocator()); } +StatusOr> LocalService::CompileExecutable( + const XlaComputation& computation, + const tensorflow::gtl::ArraySlice argument_layouts, + const ExecutableBuildOptions& build_options) { + const HloModuleProto& proto = computation.proto(); + TF_RET_CHECK(proto.has_program_shape()); + const ProgramShape& program_shape = proto.program_shape(); + + // Validate incoming layouts. + if (argument_layouts.size() != program_shape.parameters_size()) { + return InvalidArgument( + "Invalid number of arguments for computation: expected %d, got %zu.", + program_shape.parameters_size(), argument_layouts.size()); + } + + for (int i = 0; i < argument_layouts.size(); ++i) { + const Shape& argument_shape = *argument_layouts[i]; + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(argument_shape)); + if (!ShapeUtil::Compatible(argument_shape, program_shape.parameters(i))) { + tensorflow::gtl::optional metadata = + ParameterMetadata(computation, /*parameter_number=*/i); + auto metadata_string = [&metadata]() -> string { + if (!metadata.has_value()) { + return ""; + } + CHECK(metadata.value() != nullptr); + const OpMetadata& m = *metadata.value(); + if (!m.source_file().empty()) { + return tensorflow::strings::Printf( + " (%s:%d)", m.source_file().c_str(), m.source_line()); + } + return ""; + }; + return InvalidArgument( + "Invalid argument shape for argument %d%s, expected %s, got %s.", i, + metadata_string().c_str(), + ShapeUtil::HumanString(program_shape.parameters(i)).c_str(), + ShapeUtil::HumanString(argument_shape).c_str()); + } + } + if (build_options.result_layout() != nullptr) { + TF_RETURN_IF_ERROR(ValidateResultShapeWithLayout( + *build_options.result_layout(), program_shape.result())); + } + + ExecutionOptions execution_options = + CreateExecutionOptions(build_options, &program_shape); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr module_config, + CreateModuleConfig(program_shape, argument_layouts, &execution_options)); + + TF_ASSIGN_OR_RETURN( + se::StreamExecutor * executor, + execute_backend_->stream_executor(build_options.device_ordinal())); + + return BuildExecutable(proto, std::move(module_config), + execute_backend_.get(), executor, + build_options.device_allocator()); +} + StatusOr LocalService::ReplicaNumberToDeviceOrdinal(int replica_number) { return backend().computation_placer()->DeviceId( replica_number, /*computation=*/0, options_.number_of_replicas(), diff --git a/tensorflow/compiler/xla/service/local_service.h b/tensorflow/compiler/xla/service/local_service.h index 15e120685e..06567cabd6 100644 --- a/tensorflow/compiler/xla/service/local_service.h +++ b/tensorflow/compiler/xla/service/local_service.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/client/executable_build_options.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/service/backend.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" @@ -50,6 +51,18 @@ class LocalService : public Service { const tensorflow::gtl::ArraySlice argument_layouts, const ExecutableBuildOptions& options); + // Builds an Executable with the given XlaComputation, argument layouts and + // options. If result_layout is non-null, then the executable is compiled to + // produce a result of the given layout. If device_allocator is non-null, + // then the compiler may use it to allocate temp space on the device. The + // compiler is responsible for freeing any memory it allocates this way. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> CompileExecutable( + const XlaComputation& computation, + const tensorflow::gtl::ArraySlice argument_layouts, + const ExecutableBuildOptions& build_options); + // Returns the device ordinal that corresponds to the given replica number. // // This returns an error if there is not a one-to-one correspondence of diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 4f6a82333b..1d379f0d03 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -963,6 +963,30 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, return tensorflow::Status::OK(); } +StatusOr> Service::BuildExecutable( + const HloModuleProto& module_proto, + std::unique_ptr module_config, Backend* backend, + se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator) { + VLOG(1) << Printf( + "BuildExecutable on service %p with serialized module proto: %s", this, + module_proto.name().c_str()); + + TF_ASSIGN_OR_RETURN(std::unique_ptr module, + HloModule::CreateFromProto(module_proto, *module_config)); + + TF_RETURN_IF_ERROR(MaybeDumpHloModule(*module)); + + TF_ASSIGN_OR_RETURN( + module, backend->compiler()->RunHloPasses(std::move(module), executor, + device_allocator)); + + TF_ASSIGN_OR_RETURN(std::unique_ptr executable, + backend->compiler()->RunBackend( + std::move(module), executor, device_allocator)); + + return std::move(executable); +} + tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, ExecuteResponse* result) { VLOG(1) << "running execute-graph request"; @@ -979,24 +1003,17 @@ tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, std::vector> replicated_arguments, ResolveAndValidateArguments(arg->arguments(), replicas)); - TF_ASSIGN_OR_RETURN(const auto& config, + TF_ASSIGN_OR_RETURN(std::unique_ptr module_config, CreateModuleConfig(arg->computation().program_shape(), replicated_arguments.front(), arg->execution_options())); - TF_ASSIGN_OR_RETURN(std::unique_ptr module, - HloModule::CreateFromProto(arg->computation(), *config)); - TF_RETURN_IF_ERROR(MaybeDumpHloModule(*module)); - - TF_ASSIGN_OR_RETURN(module, execute_backend_->compiler()->RunHloPasses( - std::move(module), - execute_backend_->default_stream_executor(), - /*device_allocator=*/nullptr)); TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - execute_backend_->compiler()->RunBackend( - std::move(module), execute_backend_->default_stream_executor(), - /*device_allocator=*/nullptr)); + BuildExecutable(arg->computation(), std::move(module_config), + execute_backend_.get(), + execute_backend_->default_stream_executor(), + /*device_allocator=*/nullptr)); TF_ASSIGN_OR_RETURN( *result->mutable_output(), diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 3b79920b0a..773f0a642d 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -115,6 +115,8 @@ class Service : public ServiceInterface { // Executes a computation with the provided global data passed as // immutable arguments. The request contains the whole computation graph. // Returns global data output and execution timing. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg, ExecuteResponse* result) override; @@ -299,6 +301,15 @@ class Service : public ServiceInterface { perftools::gputools::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator = nullptr); + // Builds an Executable for the given HLO module proto. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> BuildExecutable( + const HloModuleProto& module_proto, + std::unique_ptr module_config, Backend* backend, + perftools::gputools::StreamExecutor* executor, + DeviceMemoryAllocator* device_allocator = nullptr); + // Same as BuildExecutable() above, but builds a list of Executables for the // given computations that may interact with each other. StatusOr>> BuildExecutables( -- GitLab From f54f57337078c93877df5c9a1b126e879f5b33a5 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Mar 2018 16:15:55 -0700 Subject: [PATCH 458/960] Moves TensorHandleCopyToDevice to TensorHandle::CopyToDevice. PiperOrigin-RevId: 190291768 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 125 +----------------- tensorflow/core/common_runtime/eager/BUILD | 20 +++ .../eager/copy_to_device_node.h | 69 ++++++++++ .../common_runtime/eager/tensor_handle.cc | 71 ++++++++++ .../core/common_runtime/eager/tensor_handle.h | 3 + 6 files changed, 168 insertions(+), 121 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/copy_to_device_node.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index d2d8d59323..8df7b56623 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -32,6 +32,7 @@ tf_cuda_library( "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", + "//tensorflow/core/common_runtime/eager:copy_to_device_node", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 59432f2ef8..c69635d529 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/device_set.h" +#include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -213,82 +214,6 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { } } // extern "C" -namespace { - -tensorflow::Status TensorHandleCopyToDevice(tensorflow::TensorHandle* h, - TFE_Context* ctx, - tensorflow::Device* dstd, - tensorflow::TensorHandle** output) { - const tensorflow::Tensor* src = nullptr; - tensorflow::Device* srcd = nullptr; - // TODO(agarwal): src_opd is unused. Perhaps allow TensorAndDevice to accept - // nullptr. - tensorflow::Device* src_opd = nullptr; - TF_RETURN_IF_ERROR(h->TensorAndDevice(&src, &srcd, &src_opd)); - if (srcd == nullptr) srcd = ctx->context.HostCPU(); - bool is_same_device = - (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd)); - const bool dst_cpu = IsCPU(dstd); - const bool src_cpu = IsCPU(srcd); - // both_on_cpu can be true and yet is_same_device is false, if one of src/dst - // has device type XLA_CPU, and the other CPU. - const bool both_on_cpu = src_cpu && dst_cpu; - if (is_same_device || both_on_cpu) { - dstd = dst_cpu ? nullptr : dstd; - *output = new tensorflow::TensorHandle(*src, dstd, dstd); - return tensorflow::Status::OK(); - } - if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && - !tensorflow::DataTypeCanUseMemcpy(src->dtype()))) { - return tensorflow::errors::InvalidArgument( - "Can't copy Tensor with type ", - tensorflow::DataTypeString(src->dtype()), " to device ", - DeviceName(dstd), "."); - } - tensorflow::AllocatorAttributes attr; - if (src->dtype() == tensorflow::DT_VARIANT) { - attr.set_on_host(true); - } - tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); - if (src->shape().num_elements() == 0) { - dstd = dst_cpu ? nullptr : dstd; - *output = new tensorflow::TensorHandle(dst, dstd, dstd); - return tensorflow::Status::OK(); - } - tensorflow::DeviceContext* src_device_context = nullptr; - if (!src_cpu) { - src_device_context = srcd->tensorflow_gpu_device_info()->default_context; - } - tensorflow::DeviceContext* dst_device_context = nullptr; - if (!dst_cpu) { - dst_device_context = dstd->tensorflow_gpu_device_info()->default_context; - } - // TODO(ashankar): The Sync() call below may be more aggressive than - // necessary. It is based on knowledge of implementation details - that - // GPU devices are implemented using 3 streams - one for host->device copies, - // one for device->host copies and one for sending operations to the GPU. - // With that setup, Sync()ing across all 3 streams should be sufficient - // but more than necessary (since it waits for operations that might have - // nothing to do with this tensor to complete). - TF_RETURN_IF_ERROR(srcd->Sync()); - tensorflow::Notification n; - tensorflow::Status status; - tensorflow::CopyTensor::ViaDMA("copy", src_device_context, dst_device_context, - srcd, dstd, tensorflow::AllocatorAttributes(), - tensorflow::AllocatorAttributes(), src, &dst, - [&status, &n](const tensorflow::Status& s) { - status = s; - n.Notify(); - }); - n.WaitForNotification(); - if (status.ok()) { - dstd = dst_cpu ? nullptr : dstd; - *output = new tensorflow::TensorHandle(dst, dstd, dstd); - } - return status; -} -} // namespace - extern "C" { TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, @@ -509,49 +434,6 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, namespace { -class CopyToDeviceNode : public tensorflow::EagerNode { - public: - CopyToDeviceNode(tensorflow::TensorHandle* src, tensorflow::Device* dstd, - TFE_Context* ctx) - : tensorflow::EagerNode(ctx->context.NextId()), - src_(src), - dstd_(dstd), - ctx_(ctx), - dst_(new tensorflow::TensorHandle(id, src_->dtype, &ctx->context)) { - src_->Ref(); - dst_->Ref(); - } - - ~CopyToDeviceNode() override { - src_->Unref(); - dst_->Unref(); - } - - tensorflow::Status Run() override { - tensorflow::TensorHandle* temp = nullptr; - TF_RETURN_IF_ERROR(TensorHandleCopyToDevice(src_, ctx_, dstd_, &temp)); - const tensorflow::Tensor* tensor = nullptr; - tensorflow::Device* device = nullptr; - tensorflow::Device* op_device = nullptr; - tensorflow::Status status = - temp->TensorAndDevice(&tensor, &device, &op_device); - // `temp` is a ready handle. So the following call should return OK. - TF_DCHECK_OK(status) << status.error_message(); - DCHECK(tensor); - dst_->SetTensorAndDevice(*tensor, device, op_device); - temp->Unref(); - return tensorflow::Status::OK(); - } - - tensorflow::TensorHandle* dst() { return dst_; } - - private: - tensorflow::TensorHandle* src_; - tensorflow::Device* dstd_; - TFE_Context* ctx_; - tensorflow::TensorHandle* dst_; -}; - // TODO(apassos) move to TensorHandle tensorflow::TensorHandle* TFE_TensorHandleCopyToDevice_Internal( tensorflow::TensorHandle* h, TFE_Context* ctx, const char* device_name, @@ -569,7 +451,8 @@ tensorflow::TensorHandle* TFE_TensorHandleCopyToDevice_Internal( if (ctx->context.Async()) { // Note that `h` may not be currently ready. However execution order will // make sure that `h` is ready before the copy is actually done. - CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); + tensorflow::CopyToDeviceNode* node = + new tensorflow::CopyToDeviceNode(h, dstd, &ctx->context); tensorflow::TensorHandle* output = node->dst(); // Note that calling Add makes `node` accessible by the EagerExecutor // thread. So further accesses need to be thread-safe. @@ -577,7 +460,7 @@ tensorflow::TensorHandle* TFE_TensorHandleCopyToDevice_Internal( return output; } else { tensorflow::TensorHandle* output = nullptr; - status->status = TensorHandleCopyToDevice(h, ctx, dstd, &output); + status->status = h->CopyToDevice(&ctx->context, dstd, &output); return output; } } diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 02fb83200a..a619cac9a4 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -77,6 +77,26 @@ tf_cuda_library( ], ) +tf_cuda_library( + name = "copy_to_device_node", + hdrs = [ + "copy_to_device_node.h", + ], + visibility = ["//tensorflow:internal"], + deps = [ + ":context", + ":eager_executor", + ":tensor_handle", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + ], +) + tf_cuda_library( name = "kernel_and_device", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/copy_to_device_node.h b/tensorflow/core/common_runtime/eager/copy_to_device_node.h new file mode 100644 index 0000000000..8a887540b0 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/copy_to_device_node.h @@ -0,0 +1,69 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_COPY_TO_DEVICE_NODE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_COPY_TO_DEVICE_NODE_H_ + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class CopyToDeviceNode : public EagerNode { + public: + CopyToDeviceNode(TensorHandle* src, Device* dstd, EagerContext* ctx) + : EagerNode(ctx->NextId()), + src_(src), + dstd_(dstd), + ctx_(ctx), + dst_(new TensorHandle(id, src_->dtype, ctx)) { + src_->Ref(); + dst_->Ref(); + } + + ~CopyToDeviceNode() override { + src_->Unref(); + dst_->Unref(); + } + + Status Run() override { + TensorHandle* temp = nullptr; + TF_RETURN_IF_ERROR(src_->CopyToDevice(ctx_, dstd_, &temp)); + const Tensor* tensor = nullptr; + Device* device = nullptr; + Device* op_device = nullptr; + Status status = temp->TensorAndDevice(&tensor, &device, &op_device); + // `temp` is a ready handle. So the following call should return OK. + TF_DCHECK_OK(status) << status.error_message(); + DCHECK(tensor); + dst_->SetTensorAndDevice(*tensor, device, op_device); + temp->Unref(); + return Status::OK(); + } + + TensorHandle* dst() { return dst_; } + + private: + TensorHandle* src_; + Device* dstd_; + EagerContext* ctx_; + TensorHandle* dst_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_COPY_TO_DEVICE_NODE_H_ diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index 5bc1700627..328cd5dd5c 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/copy_tensor.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/eager/context.h" @@ -104,4 +105,74 @@ void TensorHandle::SetTensorAndDevice(const tensorflow::Tensor& tensor, op_device_ = op_device; } +Status TensorHandle::CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd, + TensorHandle** output) { + const tensorflow::Tensor* src = nullptr; + tensorflow::Device* srcd = nullptr; + // TODO(agarwal): src_opd is unused. Perhaps allow TensorAndDevice to accept + // nullptr. + tensorflow::Device* src_opd = nullptr; + TF_RETURN_IF_ERROR(TensorAndDevice(&src, &srcd, &src_opd)); + if (srcd == nullptr) srcd = ctx->HostCPU(); + bool is_same_device = (srcd == dstd) || (srcd->name() == dstd->name()); + const bool dst_cpu = dstd->tensorflow_gpu_device_info() == nullptr; + const bool src_cpu = srcd->tensorflow_gpu_device_info() == nullptr; + // both_on_cpu can be true and yet is_same_device is false, if one of src/dst + // has device type XLA_CPU, and the other CPU. + const bool both_on_cpu = src_cpu && dst_cpu; + if (is_same_device || both_on_cpu) { + dstd = dst_cpu ? nullptr : dstd; + *output = new tensorflow::TensorHandle(*src, dstd, dstd); + return tensorflow::Status::OK(); + } + if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && + !tensorflow::DataTypeCanUseMemcpy(src->dtype()))) { + return tensorflow::errors::InvalidArgument( + "Can't copy Tensor with type ", + tensorflow::DataTypeString(src->dtype()), " to device ", dstd->name(), + "."); + } + tensorflow::AllocatorAttributes attr; + if (src->dtype() == tensorflow::DT_VARIANT) { + attr.set_on_host(true); + } + tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); + if (src->shape().num_elements() == 0) { + dstd = dst_cpu ? nullptr : dstd; + *output = new tensorflow::TensorHandle(dst, dstd, dstd); + return tensorflow::Status::OK(); + } + tensorflow::DeviceContext* src_device_context = nullptr; + if (!src_cpu) { + src_device_context = srcd->tensorflow_gpu_device_info()->default_context; + } + tensorflow::DeviceContext* dst_device_context = nullptr; + if (!dst_cpu) { + dst_device_context = dstd->tensorflow_gpu_device_info()->default_context; + } + // TODO(ashankar): The Sync() call below may be more aggressive than + // necessary. It is based on knowledge of implementation details - that + // GPU devices are implemented using 3 streams - one for host->device copies, + // one for device->host copies and one for sending operations to the GPU. + // With that setup, Sync()ing across all 3 streams should be sufficient + // but more than necessary (since it waits for operations that might have + // nothing to do with this tensor to complete). + TF_RETURN_IF_ERROR(srcd->Sync()); + tensorflow::Notification n; + tensorflow::Status status; + tensorflow::CopyTensor::ViaDMA("copy", src_device_context, dst_device_context, + srcd, dstd, tensorflow::AllocatorAttributes(), + tensorflow::AllocatorAttributes(), src, &dst, + [&status, &n](const tensorflow::Status& s) { + status = s; + n.Notify(); + }); + n.WaitForNotification(); + if (status.ok()) { + dstd = dst_cpu ? nullptr : dstd; + *output = new tensorflow::TensorHandle(dst, dstd, dstd); + } + return status; +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index 97e67e4652..eb69a13c06 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -85,6 +85,9 @@ class TensorHandle : public core::RefCounted { tensorflow::Device* device, tensorflow::Device* op_device); + Status CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd, + TensorHandle** output); + // dtype for the handle. It must be the same as t.dtype() once the handle is // ready. const DataType dtype; -- GitLab From 97249979d9a76ae05d590f9cbe199c0b47712b4f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 16:16:22 -0700 Subject: [PATCH 459/960] bug fix: evaluate nodes before swap the original graph PiperOrigin-RevId: 190291844 --- tensorflow/core/grappler/optimizers/constant_folding_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 914a9257ee..6340565bcd 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -1922,6 +1922,8 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { item.fetch = {"concat0", "concat1", "concat2", "concat3", "concat4", "concat5", "concat6", "concat7", "concat8", "concat9"}; + auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}); + EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -1971,9 +1973,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { } } - auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}); auto tensors = EvaluateNodes(output, {"concat0"}); - EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } -- GitLab From 202e4f3b3699e8e40e478402462f76ae853fecbf Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 23 Mar 2018 16:28:16 -0700 Subject: [PATCH 460/960] Make _USE_C_API = True and _USE_C_SHAPES = False work with handle data. This change makes _set_shapes_for_outputs_c_api fetch and set Tensor._handle_data. This is necessary for running the Python shape inference code on resource tensors. PiperOrigin-RevId: 190293303 --- tensorflow/c/BUILD | 2 ++ tensorflow/c/python_api.cc | 26 +++++++++++++++ tensorflow/c/python_api.h | 7 ++++ tensorflow/python/BUILD | 2 ++ tensorflow/python/client/tf_session.i | 1 + tensorflow/python/framework/importer_test.py | 34 ++++++++++++++++++++ tensorflow/python/framework/ops.py | 9 ++++++ 7 files changed, 81 insertions(+) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index d096647558..f4a486d330 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -279,6 +279,8 @@ tf_cuda_library( deps = [ ":c_api", ":c_api_internal", + # TODO(b/74620627): remove when _USE_C_SHAPES is removed + "//tensorflow/python:cpp_shape_inference_proto_cc", ], ) diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index cd604538f1..93155998b8 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/c/python_api.h" #include "tensorflow/c/c_api_internal.h" +#include "tensorflow/python/framework/cpp_shape_inference.pb.h" namespace tensorflow { @@ -109,4 +110,29 @@ void ExtendSession(TF_Session* session, TF_Status* status) { session->extend_before_run = false; } +std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) { + Node* node = &output.oper->node; + CppShapeInferenceResult::HandleData handle_data; + handle_data.set_is_set(true); + { + mutex_lock l(graph->mu); + tensorflow::shape_inference::InferenceContext* ic = + graph->refiner.GetContext(node); + CHECK(ic != nullptr); + CHECK_LT(output.index, ic->num_outputs()); + const auto* shapes_and_types = + ic->output_handle_shapes_and_types(output.index); + if (shapes_and_types == nullptr) return ""; + + for (const auto& p : *shapes_and_types) { + auto* out_shape_and_type = handle_data.add_shape_and_type(); + ic->ShapeHandleToProto(p.shape, out_shape_and_type->mutable_shape()); + out_shape_and_type->set_dtype(p.dtype); + } + } + string result; + handle_data.SerializeToString(&result); + return result; +} + } // namespace tensorflow diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h index 13b680b3a2..2d4c8cd9ed 100644 --- a/tensorflow/c/python_api.h +++ b/tensorflow/c/python_api.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_C_PYTHON_API_H_ #define TENSORFLOW_C_PYTHON_API_H_ +#include + #include "tensorflow/c/c_api.h" // These functions can be removed without notice. They exist to facilitate some @@ -51,6 +53,11 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require); // the graph after the session has been made aware of them. void ExtendSession(TF_Session* session, TF_Status* status); +// Returns the serialized CppShapeInferenceResult::HandleData proto for +// `output` if its a resource tensor, or otherwise returns the empty string. +// TODO(b/74620627): remove when _USE_C_SHAPES is removed +std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output); + } // namespace tensorflow #endif // TENSORFLOW_C_PYTHON_API_H_ diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 0e2b980213..acfdcd15f7 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3128,6 +3128,8 @@ tf_proto_library( srcs = ["framework/cpp_shape_inference.proto"], cc_api_version = 2, protodeps = tf_additional_all_protos(), + # TODO(b/74620627): remove when _USE_C_SHAPES is removed + visibility = ["//tensorflow:internal"], ) py_test( diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index e88fc0c01a..70a3d032f4 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -723,6 +723,7 @@ def TF_Reset(target, containers=None, config=None): %unignore TF_TryEvaluateConstant_wrapper; %noexception TF_TryEvaluateConstant_wrapper; %unignore ExtendSession; +%unignore ResourceHandleShapeAndType; %include "tensorflow/python/client/tf_session_helper.h" diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 6593b17184..369669c2e6 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -39,6 +39,7 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -356,6 +357,39 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d._input_types, [dtypes.int32_ref, dtypes.int32]) self.assertEqual(d.outputs, []) + def testResources(self): + # Produce GraphDef containing a ops producing and consuming resources. + graph = ops.Graph() + with graph.as_default(): + var = resource_variable_ops.ResourceVariable(1.0) + var_assign = var.assign(2.0) + # Use an op that requires handle shape to be set. + var_shape = resource_variable_ops.variable_shape(var.handle) + init = variables.global_variables_initializer() + graph_def = graph.as_graph_def() + + # Import the GraphDef. + with ops.Graph().as_default(): + # pylint: disable=unused-variable + imported_var, imported_assign, imported_shape, imported_init = ( + importer.import_graph_def( + graph_def, + return_elements=[var.name, var_assign.name, var_shape.name, + init.name])) + + # Make sure the handle shape is set on the imported variable. + new_var_shape = resource_variable_ops.variable_shape(imported_var) + # pylint: enable=unused-variable + + # Run the imported graph. + # TODO(b/76173421): make this work (currently DCHECKS) + # with self.test_session() as sess: + # sess.run(imported_init) + # self.assertEqual(sess.run(imported_var), 1.0) + # self.assertEqual(sess.run(imported_assign), 2.0) + # self.assertEqual(list(sess.run(imported_shape)), []) + # self.assertEqual(list(sess.run(new_var_shape)), []) + def testWhileLoop(self): # Produce GraphDef containing while loop. graph = ops.Graph() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 93edaa0cf0..1fa9285e43 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -42,6 +42,7 @@ from tensorflow.python.eager import context from tensorflow.python.eager import core from tensorflow.python.eager import tape from tensorflow.python.framework import c_api_util +from tensorflow.python.framework import cpp_shape_inference_pb2 from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -295,6 +296,7 @@ class Tensor(_TensorLike): # Attributes used for C++ shape inference. Not inspected, only forwarded. # If set, will be a HandleData object from cpp_shape_inference.proto. + # TODO(b/74620627): remove when _USE_C_SHAPES is removed self._handle_data = None self._id = uid() @@ -2472,6 +2474,13 @@ def _set_shapes_for_outputs_c_api(op): shape_vector = [None if d == -1 else d for d in shape_vector] output.set_shape(tensor_shape.TensorShape(shape_vector)) + serialized = c_api.ResourceHandleShapeAndType(op._graph._c_graph, + output._as_tf_output()) + if serialized: + output._handle_data = (cpp_shape_inference_pb2.CppShapeInferenceResult + .HandleData.FromString(serialized.encode())) + else: + output._handle_data = None # TODO(skyewm): remove this when _USE_C_API flag is removed. def _set_shapes_for_outputs(op): -- GitLab From 6e523342d57b175e698bb8379979104e3e0335ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 17:19:10 -0700 Subject: [PATCH 461/960] Update ops-related pbtxt files. PiperOrigin-RevId: 190299240 --- tensorflow/contrib/estimator/BUILD | 2 +- .../core/ops/compat/ops_history.v1.pbtxt | 359 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 359 ++++++++++++++++++ 3 files changed, 719 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 24374266dc..c846343d6d 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -358,7 +358,7 @@ cuda_py_test( size = "medium", srcs = ["python/estimator/replicate_model_fn_test.py"], additional_deps = [ - "//third_party/py/absl/testing:parameterized", + "@absl_py//absl/testing:parameterized", "//tensorflow/python/estimator", "//tensorflow/python/estimator:dnn", "//tensorflow/python/estimator:export_export", diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index b41826d6eb..05d6e02281 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -43705,6 +43705,210 @@ op { } is_stateful: true } +op { + name: "ResourceScatterDiv" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterMax" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterMin" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterMul" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "ResourceScatterNdUpdate" input_arg { @@ -43742,6 +43946,57 @@ op { } is_stateful: true } +op { + name: "ResourceScatterSub" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "ResourceScatterUpdate" input_arg { @@ -48901,6 +49156,110 @@ op { } } } +op { + name: "ScatterMax" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterMin" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ScatterMul" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index af2c563489..274a7fbf75 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -21658,6 +21658,210 @@ op { } is_stateful: true } +op { + name: "ResourceScatterDiv" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterMax" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterMin" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterMul" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "ResourceScatterNdUpdate" input_arg { @@ -21695,6 +21899,57 @@ op { } is_stateful: true } +op { + name: "ResourceScatterSub" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "ResourceScatterUpdate" input_arg { @@ -23434,6 +23689,110 @@ op { } } } +op { + name: "ScatterMax" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterMin" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ScatterMul" input_arg { -- GitLab From d40c53dd2cb7c0e3ec20ca56f5c3c95038820900 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Fri, 23 Mar 2018 17:28:10 -0700 Subject: [PATCH 462/960] Set the stream in TransformTensor. PiperOrigin-RevId: 190300166 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 03e3e0857f..ab5e6590e0 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -3157,12 +3157,18 @@ bool CudnnSupport::DoTransformTensor(Stream* stream, dnn::DataType output_type, float scale, DeviceMemoryBase* output_data) { mutex_lock lock{dnn_handle_mutex_}; + cudnnStatus_t status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), + AsCUDAStreamValue(stream)); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); + } + float beta = 0.0f; ScopedTensorDescriptor input_tensor_desc( parent_, input_desc, ToCudnnDataType(input_type, input_desc.layout())); ScopedTensorDescriptor output_tensor_desc( parent_, output_desc, ToCudnnDataType(output_type, output_desc.layout())); - cudnnStatus_t status = wrap::cudnnTransformTensor( + status = wrap::cudnnTransformTensor( parent_, ToHandle(dnn_handle_), &scale, input_tensor_desc.handle(), input_data.opaque(), &beta, output_tensor_desc.handle(), output_data->opaque()); -- GitLab From c275f2dffb7423328428553f2aafe3b011b48372 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 17:49:47 -0700 Subject: [PATCH 463/960] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 190302194 --- tensorflow/go/op/wrappers.go | 3410 +++++++++++++++++----------------- 1 file changed, 1705 insertions(+), 1705 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5ddd32ed48..838f4f2301 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -1089,184 +1089,190 @@ func ExpandDims(scope *Scope, input tf.Output, axis tf.Output) (output tf.Output return op.Output(0) } -// Returns (x - y)(x - y) element-wise. +// A placeholder op that passes through `input` when its output is not fed. // -// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// input: The default value to produce when `output` is not fed. +// shape: The (possibly partial) shape of the tensor. +// +// Returns A placeholder tensor that defaults to `input` if it is not fed. +func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "SquaredDifference", + Type: "PlaceholderWithDefault", Input: []tf.Input{ - x, y, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Forwards the input to the output. +// A placeholder op for a value that will be fed into the computation. // -// This operator represents the loop termination condition used by the -// "pivot" switches of a loop. +// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2. +// +// N.B. This operation will fail with an error if it is executed. It is +// intended as a way to represent a value that will always be fed, and to +// provide attrs that enable the fed value to be checked at runtime. // // Arguments: -// input: A boolean scalar, representing the branch predicate of the Switch op. +// dtype: The type of elements in the tensor. +// shape: The shape of the tensor. The shape can be any partially-specified +// shape. To be unconstrained, pass in a shape with unknown rank. // -// Returns The same tensor as `input`. -func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { +// Returns A placeholder tensor that must be replaced using the feed mechanism. +func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} opspec := tf.OpSpec{ - Type: "LoopCond", - Input: []tf.Input{ - input, - }, + Type: "PlaceholderV2", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedMulAttr is an optional argument to QuantizedMul. -type QuantizedMulAttr func(optionalAttr) +// PlaceholderAttr is an optional argument to Placeholder. +type PlaceholderAttr func(optionalAttr) -// QuantizedMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { +// PlaceholderShape sets the optional shape attribute to value. +// +// value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the +// shape is unconstrained. +// If not specified, defaults to +func PlaceholderShape(value tf.Shape) PlaceholderAttr { return func(m optionalAttr) { - m["Toutput"] = value + m["shape"] = value } } -// Returns x * y element-wise, working on quantized buffers. -// -// Arguments: -// +// A placeholder op for a value that will be fed into the computation. // -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. +// N.B. This operation will fail with an error if it is executed. It is +// intended as a way to represent a value that will always be fed, and to +// provide attrs that enable the fed value to be checked at runtime. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// Arguments: +// dtype: The type of elements in the tensor. // -// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { +// Returns A placeholder tensor that must be replaced using the feed mechanism. +func Placeholder(scope *Scope, dtype tf.DataType, optional ...PlaceholderAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedMul", - Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, - }, + Type: "Placeholder", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. -type QuantizedMatMulAttr func(optionalAttr) - -// QuantizedMatMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } + return op.Output(0) } -// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. +// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. // -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. +// This operation folds the padded areas of `input` by `MirrorPad` according to the +// `paddings` you specify. `paddings` must be the same as `paddings` argument +// given to the corresponding `MirrorPad` op. // -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. +// The folded size of each dimension D of the output is: // -// value: The type of output produced by activation function -// following this operation. -// If not specified, defaults to DT_QUINT8 -func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["Tactivation"] = value - } -} - -// Perform a quantized matrix multiplication of `a` by the matrix `b`. +// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` // -// The inputs must be two-dimensional matrices and the inner dimension of -// `a` (after being transposed if `transpose_a` is non-zero) must match the -// outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). +// For example: +// +// ``` +// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. +// # 'paddings' is [[0, 1]], [0, 1]]. +// # 'mode' is SYMMETRIC. +// # rank of 't' is 2. +// pad(t, paddings) ==> [[ 1, 5] +// [11, 28]] +// ``` // // Arguments: -// a: Must be a two-dimensional tensor. -// b: Must be a two-dimensional tensor. -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. +// input: The input tensor to be folded. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// mode: The mode used in the `MirrorPad` op. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { +// Returns The folded tensor. +func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"mode": mode} opspec := tf.OpSpec{ - Type: "QuantizedMatMul", + Type: "MirrorPadGrad", Input: []tf.Input{ - a, b, min_a, max_a, min_b, max_b, + input, paddings, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// A placeholder op that passes through `input` when its output is not fed. +// Pads a tensor with mirrored values. +// +// This operation pads a `input` with mirrored values according to the `paddings` +// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is +// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many values to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many values to add after the contents of `input` +// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater +// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true +// (if false, respectively). +// +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 2, 3], [4, 5, 6]]. +// # 'paddings' is [[1, 1]], [2, 2]]. +// # 'mode' is SYMMETRIC. +// # rank of 't' is 2. +// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2] +// [2, 1, 1, 2, 3, 3, 2] +// [5, 4, 4, 5, 6, 6, 5] +// [5, 4, 4, 5, 6, 6, 5]] +// ``` // // Arguments: -// input: The default value to produce when `output` is not fed. -// shape: The (possibly partial) shape of the tensor. +// input: The input tensor to be padded. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions +// do not include the borders, while in symmetric mode the padded regions +// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings` +// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and +// it is `[1, 2, 3, 3, 2]` in symmetric mode. // -// Returns A placeholder tensor that defaults to `input` if it is not fed. -func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { +// Returns The padded tensor. +func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shape": shape} + attrs := map[string]interface{}{"mode": mode} opspec := tf.OpSpec{ - Type: "PlaceholderWithDefault", + Type: "MirrorPad", Input: []tf.Input{ - input, + input, paddings, }, Attrs: attrs, } @@ -1274,38 +1280,78 @@ func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (outp return op.Output(0) } -// Returns the complex conjugate of a complex number. +// Pads a tensor. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// complex numbers that are the complex conjugate of each element in `input`. The -// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the -// real part and *b* is the imaginary part. +// This operation pads `input` according to the `paddings` and `constant_values` +// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is +// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many padding values to add before the contents of `input` in that dimension, +// and `paddings[D, 1]` indicates how many padding values to add after the contents +// of `input` in that dimension. `constant_values` is a scalar tensor of the same +// type as `input` that indicates the value to use for padding `input`. // -// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` // // For example: // // ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # 'constant_values' is 0 +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] // ``` -func Conj(scope *Scope, input tf.Output) (output tf.Output) { +func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Conj", + Type: "PadV2", Input: []tf.Input{ - input, + input, paddings, constant_values, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. -type ResourceSparseApplyMomentumAttr func(optionalAttr) - +// Returns the complex conjugate of a complex number. +// +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// complex numbers that are the complex conjugate of each element in `input`. The +// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the +// real part and *b* is the imaginary part. +// +// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// ``` +func Conj(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Conj", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. +type ResourceSparseApplyMomentumAttr func(optionalAttr) + // ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. // // value: If `True`, updating of the var and accum tensors will be protected @@ -2063,6 +2109,47 @@ func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true i return op.Output(0), op.Output(1), op.Output(2) } +// Returns (x - y)(x - y) element-wise. +// +// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SquaredDifference", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Forwards the input to the output. +// +// This operator represents the loop termination condition used by the +// "pivot" switches of a loop. +// +// Arguments: +// input: A boolean scalar, representing the branch predicate of the Switch op. +// +// Returns The same tensor as `input`. +func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LoopCond", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // ApproximateEqualAttr is an optional argument to ApproximateEqual. type ApproximateEqualAttr func(optionalAttr) @@ -2391,50 +2478,6 @@ func Sign(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// QuantizedAddAttr is an optional argument to QuantizedAdd. -type QuantizedAddAttr func(optionalAttr) - -// QuantizedAddToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// Returns x + y element-wise, working on quantized buffers. -// -// Arguments: -// -// -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -// -// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedAdd", - Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // ArgMinAttr is an optional argument to ArgMin. type ArgMinAttr func(optionalAttr) @@ -3741,32 +3784,6 @@ func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { return op.Output(0) } -// Given a quantized tensor described by (input, input_min, input_max), outputs a -// -// range that covers the actual values present in that tensor. This op is -// typically used to produce the requested_output_min and requested_output_max for -// Requantize. -// -// Arguments: -// -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// -// Returns The computed min output.the computed max output. -func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RequantizationRange", - Input: []tf.Input{ - input, input_min, input_max, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // Returns the truth value of (x <= y) element-wise. // // *NOTE*: `LessEqual` supports broadcasting. More about broadcasting @@ -3943,46 +3960,6 @@ func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMul return op.Output(0) } -// Pads a tensor. -// -// This operation pads `input` according to the `paddings` and `constant_values` -// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is -// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many padding values to add before the contents of `input` in that dimension, -// and `paddings[D, 1]` indicates how many padding values to add after the contents -// of `input` in that dimension. `constant_values` is a scalar tensor of the same -// type as `input` that indicates the value to use for padding `input`. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # 'constant_values' is 0 -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "PadV2", - Input: []tf.Input{ - input, paddings, constant_values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns which elements of x are NaN. // // @compatibility(numpy) @@ -4292,52 +4269,6 @@ func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output return op.Output(0) } -// MaxPoolAttr is an optional argument to MaxPool. -type MaxPoolAttr func(optionalAttr) - -// MaxPoolDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolDataFormat(value string) MaxPoolAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs max pooling on the input. -// -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor. -func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes gradients of the maxpooling function. // // Arguments: @@ -5247,74 +5178,30 @@ func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. +// BiasAddGradAttr is an optional argument to BiasAddGrad. +type BiasAddGradAttr func(optionalAttr) + +// BiasAddGradDataFormat sets the optional data_format attribute to value. // -// This operation folds the padded areas of `input` by `MirrorPad` according to the -// `paddings` you specify. `paddings` must be the same as `paddings` argument -// given to the corresponding `MirrorPad` op. +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the bias tensor will be added to the last dimension +// of the value tensor. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// The tensor will be added to "in_channels", the third-to-the-last +// dimension. +// If not specified, defaults to "NHWC" +func BiasAddGradDataFormat(value string) BiasAddGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// The backward operation for "BiasAdd" on the "bias" tensor. // -// The folded size of each dimension D of the output is: -// -// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. -// # 'paddings' is [[0, 1]], [0, 1]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[ 1, 5] -// [11, 28]] -// ``` -// -// Arguments: -// input: The input tensor to be folded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: The mode used in the `MirrorPad` op. -// -// Returns The folded tensor. -func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPadGrad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BiasAddGradAttr is an optional argument to BiasAddGrad. -type BiasAddGradAttr func(optionalAttr) - -// BiasAddGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddGradDataFormat(value string) BiasAddGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// The backward operation for "BiasAdd" on the "bias" tensor. -// -// It accumulates all the values from out_backprop into the feature dimension. -// For NHWC data format, the feature dimension is the last. For NCHW data format, -// the feature dimension is the third-to-last. +// It accumulates all the values from out_backprop into the feature dimension. +// For NHWC data format, the feature dimension is the last. For NCHW data format, +// the feature dimension is the third-to-last. // // Arguments: // out_backprop: Any number of dimensions. @@ -5411,297 +5298,220 @@ func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Outp return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// AvgPoolGradAttr is an optional argument to AvgPoolGrad. -type AvgPoolGradAttr func(optionalAttr) - -// AvgPoolGradDataFormat sets the optional data_format attribute to value. +// Returns the rank of a tensor. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the average pooling function. +// This operation returns an integer representing the rank of `input`. // -// Arguments: -// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. -// the output of `avg_pool`. -// ksize: The size of the sliding window for each dimension of the input. -// strides: The stride of the sliding window for each dimension of the input. -// padding: The type of padding algorithm to use. +// For example: // -// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. -func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { +// ``` +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// # shape of tensor 't' is [2, 2, 3] +// rank(t) ==> 3 +// ``` +// +// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank +// of a tensor is the number of indices required to uniquely select each element +// of the tensor. Rank is also known as "order", "degree", or "ndims." +func Rank(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "AvgPoolGrad", + Type: "Rank", Input: []tf.Input{ - orig_input_shape, grad, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// StageClearAttr is an optional argument to StageClear. -type StageClearAttr func(optionalAttr) - -// StageClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageClearCapacity(value int64) StageClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StageClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Transforms a Tensor into a serialized TensorProto proto. // -// REQUIRES: value >= 0 -func StageClearMemoryLimit(value int64) StageClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageClearContainer(value string) StageClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageClearSharedName(value string) StageClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. +// Arguments: +// tensor: A Tensor of type `T`. // -// Returns the created operation. -func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { +// Returns A serialized TensorProto proto of the input tensor. +func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StageClear", - - Attrs: attrs, + Type: "SerializeTensor", + Input: []tf.Input{ + tensor, + }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. -type ComputeAccidentalHitsAttr func(optionalAttr) - -// ComputeAccidentalHitsSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// MatrixSolveAttr is an optional argument to MatrixSolve. +type MatrixSolveAttr func(optionalAttr) -// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. +// MatrixSolveAdjoint sets the optional adjoint attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// If not specified, defaults to false +func MatrixSolveAdjoint(value bool) MatrixSolveAttr { return func(m optionalAttr) { - m["seed2"] = value + m["adjoint"] = value } } -// Computes the ids of the positions in sampled_candidates that match true_labels. +// Solves systems of linear equations. // -// When doing log-odds NCE, the result of this op should be passed through a -// SparseToDense op, then added to the logits of the sampled candidates. This has -// the effect of 'removing' the sampled labels that match the true labels by -// making the classifier sure that they are sampled labels. +// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is +// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix +// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `True` then each output matrix satisfies +// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. // // Arguments: -// true_classes: The true_classes output of UnpackSparseLabels. -// sampled_candidates: The sampled_candidates output of CandidateSampler. -// num_true: Number of true labels per context. +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. // -// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label -// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element -// is -FLOAT_MAX. -func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { +// Returns Shape is `[..., M, K]`. +func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ComputeAccidentalHits", + Type: "MatrixSolve", Input: []tf.Input{ - true_classes, sampled_candidates, + matrix, rhs, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) - -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value +// Computes acos of x element-wise. +func Acos(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Acos", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Gather specific elements from the TensorArray into output `value`. +// Real-valued fast Fourier transform. // -// All elements selected by `indices` must have the same shape. +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the +// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, +// followed by the `fft_length / 2` positive-frequency terms. +// +// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { +// Returns A complex64 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length / 2 + 1` unique +// frequency components of its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft +// @end_compatibility +func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", + Type: "RFFT", Input: []tf.Input{ - handle, indices, flow_in, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. -// -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. +// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. +type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) + +// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "StringToHashBucketFast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["data_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the max of x and y (i.e. x > y ? x : y) element-wise. +// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. // -// *NOTE*: `Maximum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Maximum", - Input: []tf.Input{ - x, y, - }, +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["dilations"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Real-valued fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. -// -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// Computes the gradients of depthwise convolution with respect to the filter. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. +// input: 4-D with shape based on `data_format`. For example, if +// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, +// in_width, in_channels]` tensor. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 4-D +// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. // -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. +// the `filter` input of the convolution. +func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RFFT", + Type: "DepthwiseConv2dNativeBackpropFilter", Input: []tf.Input{ - input, fft_length, + input, filter_sizes, out_backprop, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -6236,6 +6046,79 @@ func Tan(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. +type ResourceSparseApplyFtrlAttr func(optionalAttr) + +// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update relevant entries in '*var' according to the Ftrl-proximal scheme. +// +// That is for rows we have grad for, we update var, accum and linear as follows: +// accum_new = accum + grad * grad +// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyFtrl", + Input: []tf.Input{ + var_, accum, linear, grad, indices, lr, l1, l2, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Returns which elements of x are Inf. +// +// @compatibility(numpy) +// Equivalent to np.isinf +// @end_compatibility +func IsInf(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IsInf", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the sum along sparse segments of a tensor divided by the sqrt of N. // // N is the size of the segment being reduced. @@ -6918,32 +6801,196 @@ func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, return scope.AddOperation(opspec) } -// CumsumAttr is an optional argument to Cumsum. -type CumsumAttr func(optionalAttr) +// AvgPoolGradAttr is an optional argument to AvgPoolGrad. +type AvgPoolGradAttr func(optionalAttr) -// CumsumExclusive sets the optional exclusive attribute to value. +// AvgPoolGradDataFormat sets the optional data_format attribute to value. // -// value: If `True`, perform exclusive cumsum. -// If not specified, defaults to false -func CumsumExclusive(value bool) CumsumAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["data_format"] = value } } -// CumsumReverse sets the optional reverse attribute to value. +// Computes gradients of the average pooling function. // -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumsumReverse(value bool) CumsumAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Compute the cumulative sum of the tensor `x` along `axis`. +// Arguments: +// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. +// the output of `avg_pool`. +// ksize: The size of the sliding window for each dimension of the input. +// strides: The stride of the sliding window for each dimension of the input. +// padding: The type of padding algorithm to use. // -// By default, this op performs an inclusive cumsum, which means that the first +// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. +func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AvgPoolGrad", + Input: []tf.Input{ + orig_input_shape, grad, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StageClearAttr is an optional argument to StageClear. +type StageClearAttr func(optionalAttr) + +// StageClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageClearCapacity(value int64) StageClearAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// StageClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageClearMemoryLimit(value int64) StageClearAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// StageClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StageClearContainer(value string) StageClearAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StageClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StageClearSharedName(value string) StageClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. +// +// Returns the created operation. +func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StageClear", + + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. +type ComputeAccidentalHitsAttr func(optionalAttr) + +// ComputeAccidentalHitsSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Computes the ids of the positions in sampled_candidates that match true_labels. +// +// When doing log-odds NCE, the result of this op should be passed through a +// SparseToDense op, then added to the logits of the sampled candidates. This has +// the effect of 'removing' the sampled labels that match the true labels by +// making the classifier sure that they are sampled labels. +// +// Arguments: +// true_classes: The true_classes output of UnpackSparseLabels. +// sampled_candidates: The sampled_candidates output of CandidateSampler. +// num_true: Number of true labels per context. +// +// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label +// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element +// is -FLOAT_MAX. +func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ComputeAccidentalHits", + Input: []tf.Input{ + true_classes, sampled_candidates, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// CumsumAttr is an optional argument to Cumsum. +type CumsumAttr func(optionalAttr) + +// CumsumExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumsum. +// If not specified, defaults to false +func CumsumExclusive(value bool) CumsumAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumsumReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumsumReverse(value bool) CumsumAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative sum of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumsum, which means that the first // element of the input is identical to the first element of the output: // // ```python @@ -7314,79 +7361,6 @@ func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, return op.Output(0) } -// Generates values in an interval. -// -// A sequence of `num` evenly-spaced values are generated beginning at `start`. -// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, -// so that the last one is exactly `stop`. -// -// For example: -// -// ``` -// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] -// ``` -// -// Arguments: -// start: First entry in the range. -// stop: Last entry in the range. -// num: Number of values to generate. -// -// Returns 1-D. The generated values. -func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LinSpace", - Input: []tf.Input{ - start, stop, num, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. -type DestroyResourceOpAttr func(optionalAttr) - -// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. -// -// value: whether to ignore the error when the resource -// doesn't exist. -// If not specified, defaults to true -func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { - return func(m optionalAttr) { - m["ignore_lookup_error"] = value - } -} - -// Deletes the resource specified by the handle. -// -// All subsequent operations using the resource will result in a NotFound -// error status. -// -// Arguments: -// resource: handle to the resource to delete. -// -// Returns the created operation. -func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DestroyResourceOp", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // Applies softmax to a batched N-D `SparseTensor`. // // The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` @@ -7822,24 +7796,97 @@ func IFFT(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// LRNAttr is an optional argument to LRN. -type LRNAttr func(optionalAttr) - -// LRNDepthRadius sets the optional depth_radius attribute to value. +// Generates values in an interval. // -// value: 0-D. Half-width of the 1-D normalization window. -// If not specified, defaults to 5 -func LRNDepthRadius(value int64) LRNAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNBias sets the optional bias attribute to value. +// A sequence of `num` evenly-spaced values are generated beginning at `start`. +// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +// so that the last one is exactly `stop`. // -// value: An offset (usually positive to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNBias(value float32) LRNAttr { +// For example: +// +// ``` +// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] +// ``` +// +// Arguments: +// start: First entry in the range. +// stop: Last entry in the range. +// num: Number of values to generate. +// +// Returns 1-D. The generated values. +func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LinSpace", + Input: []tf.Input{ + start, stop, num, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. +type DestroyResourceOpAttr func(optionalAttr) + +// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. +// +// value: whether to ignore the error when the resource +// doesn't exist. +// If not specified, defaults to true +func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { + return func(m optionalAttr) { + m["ignore_lookup_error"] = value + } +} + +// Deletes the resource specified by the handle. +// +// All subsequent operations using the resource will result in a NotFound +// error status. +// +// Arguments: +// resource: handle to the resource to delete. +// +// Returns the created operation. +func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DestroyResourceOp", + Input: []tf.Input{ + resource, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// LRNAttr is an optional argument to LRN. +type LRNAttr func(optionalAttr) + +// LRNDepthRadius sets the optional depth_radius attribute to value. +// +// value: 0-D. Half-width of the 1-D normalization window. +// If not specified, defaults to 5 +func LRNDepthRadius(value int64) LRNAttr { + return func(m optionalAttr) { + m["depth_radius"] = value + } +} + +// LRNBias sets the optional bias attribute to value. +// +// value: An offset (usually positive to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNBias(value float32) LRNAttr { return func(m optionalAttr) { m["bias"] = value } @@ -8054,6 +8101,65 @@ func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...Resi return op.Output(0) } +// Pads a tensor with zeros. +// +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` +// in that dimension. +// +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] +// ``` +func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Pad", + Input: []tf.Input{ + input, paddings, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Checks whether a resource handle-based variable has been initialized. +// +// Arguments: +// resource: the input resource handle. +// +// Returns a scalar boolean which is true if the variable has been +// initialized. +func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "VarIsInitializedOp", + Input: []tf.Input{ + resource, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. type StatelessRandomUniformAttr func(optionalAttr) @@ -8098,6 +8204,38 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio return op.Output(0) } +// Makes its input available to the next iteration. +// +// Arguments: +// data: The tensor to be made available to the next iteration. +// +// Returns The same tensor as `data`. +func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NextIteration", + Input: []tf.Input{ + data, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Output a fact about factorials. +func Fact(scope *Scope) (fact tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Fact", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AngleAttr is an optional argument to Angle. type AngleAttr func(optionalAttr) @@ -8672,79 +8810,6 @@ func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output return op.Output(0) } -// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. -type ResourceSparseApplyFtrlAttr func(optionalAttr) - -// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. -// -// That is for rows we have grad for, we update var, accum and linear as follows: -// accum_new = accum + grad * grad -// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrl", - Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, lr_power, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Returns which elements of x are Inf. -// -// @compatibility(numpy) -// Equivalent to np.isinf -// @end_compatibility -func IsInf(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsInf", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. type ResourceSparseApplyRMSPropAttr func(optionalAttr) @@ -8974,12 +9039,106 @@ func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_box return op.Output(0), op.Output(1), op.Output(2) } -// Returns x / y element-wise for integer types. -// -// Truncation designates that negative numbers will round fractional quantities -// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different -// than Python semantics. See `FloorDiv` for a division function that matches -// Python Semantics. +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process and will never change. However, it is not suitable for cryptography. +// This function may be used when CPU time is scarce and inputs are trusted or +// unimportant. There is a risk of adversaries constructing inputs that all hash +// to the same bucket. To prevent this problem, use a strong hash function with +// `tf.string_to_hash_bucket_strong`. +// +// Arguments: +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_buckets": num_buckets} + opspec := tf.OpSpec{ + Type: "StringToHashBucketFast", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the max of x and y (i.e. x > y ? x : y) element-wise. +// +// *NOTE*: `Maximum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Maximum", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. +type TensorArrayGatherV3Attr func(optionalAttr) + +// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. +// +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { + return func(m optionalAttr) { + m["element_shape"] = value + } +} + +// Gather specific elements from the TensorArray into output `value`. +// +// All elements selected by `indices` must have the same shape. +// +// Arguments: +// handle: The handle to a TensorArray. +// indices: The locations in the TensorArray from which to read tensor elements. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. +// +// Returns All of the elements in the TensorArray, concatenated along a new +// axis (the new dimension 0). +func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorArrayGatherV3", + Input: []tf.Input{ + handle, indices, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns x / y element-wise for integer types. +// +// Truncation designates that negative numbers will round fractional quantities +// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different +// than Python semantics. See `FloorDiv` for a division function that matches +// Python Semantics. // // *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) @@ -9048,6 +9207,30 @@ func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and return tensors } +// Creates a dataset that skips `count` elements from the `input_dataset`. +// +// Arguments: +// +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. +// +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SkipDataset", + Input: []tf.Input{ + input_dataset, count, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the maximum along segments of a tensor. // // Read @{$math_ops#segmentation$the section on segmentation} for an explanation of @@ -9084,30 +9267,6 @@ func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf. return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. -// -// Arguments: -// -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. -// -// -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SkipDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes hyperbolic tangent of `x` element-wise. func Tanh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { @@ -9861,6 +10020,79 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// +// Arguments: +// serialized: A scalar string containing a serialized TensorProto proto. +// out_type: The type of the serialized tensor. The provided type must match the +// type of the serialized tensor and no implicit conversion will take place. +// +// Returns A Tensor of type `out_type`. +func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + opspec := tf.OpSpec{ + Type: "ParseTensor", + Input: []tf.Input{ + serialized, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. +type MaxPoolWithArgmaxAttr func(optionalAttr) + +// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. +// If not specified, defaults to DT_INT64 +func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { + return func(m optionalAttr) { + m["Targmax"] = value + } +} + +// Performs max pooling on the input and outputs both max values and indices. +// +// The indices in `argmax` are flattened, so that a maximum value at position +// `[b, y, x, c]` becomes flattened index +// `((b * height + y) * width + x) * channels + c`. +// +// The indices returned are always in `[0, height) x [0, width)` before flattening, +// even if padding is involved and the mathematically correct answer is outside +// (either negative or too large). This is a bug, but fixing it is difficult to do +// in a safe backwards compatible way, especially due to flattening. +// +// Arguments: +// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. +func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolWithArgmax", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. type ResourceSparseApplyAdagradDAAttr func(optionalAttr) @@ -11004,104 +11236,37 @@ func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, padd return op.Output(0) } -// Transforms a Tensor into a serialized TensorProto proto. +// Inverse 3D fast Fourier transform. +// +// Computes the inverse 3-dimensional discrete Fourier transform over the +// inner-most 3 dimensions of `input`. // // Arguments: -// tensor: A Tensor of type `T`. +// input: A complex64 tensor. // -// Returns A serialized TensorProto proto of the input tensor. -func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { +// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifftn with 3 dimensions. +// @end_compatibility +func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SerializeTensor", + Type: "IFFT3D", Input: []tf.Input{ - tensor, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixSolveAttr is an optional argument to MatrixSolve. -type MatrixSolveAttr func(optionalAttr) - -// MatrixSolveAdjoint sets the optional adjoint attribute to value. +// Adds `bias` to `value`. // -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// If not specified, defaults to false -func MatrixSolveAdjoint(value bool) MatrixSolveAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Solves systems of linear equations. -// -// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is -// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix -// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `True` then each output matrix satisfies -// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. -// -// Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. -// -// Returns Shape is `[..., M, K]`. -func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolve", - Input: []tf.Input{ - matrix, rhs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Inverse 3D fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform over the -// inner-most 3 dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their inverse 3D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifftn with 3 dimensions. -// @end_compatibility -func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT3D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds `bias` to `value`. -// -// This is a deprecated version of BiasAdd and will be soon removed. +// This is a deprecated version of BiasAdd and will be soon removed. // // This is a special case of `tf.add` where `bias` is restricted to be 1-D. // Broadcasting is supported, so `value` may have any number of dimensions. @@ -12025,6 +12190,46 @@ func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_va return op.Output(0) } +// Concatenates tensors along one dimension. +// +// Arguments: +// values: List of `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// axis: 0-D. The dimension along which to concatenate. Must be in the +// range [-rank(values), rank(values)). +// +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ConcatV2", + Input: []tf.Input{ + tf.OutputList(values), axis, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Reads and outputs the entire contents of the input filename. +func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReadFile", + Input: []tf.Input{ + filename, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // MinAttr is an optional argument to Min. type MinAttr func(optionalAttr) @@ -12088,76 +12293,6 @@ func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { return op.Output(0) } -// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. -type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) - -// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of depthwise convolution with respect to the filter. -// -// Arguments: -// input: 4-D with shape based on `data_format`. For example, if -// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, -// in_width, in_channels]` tensor. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropFilter", - Input: []tf.Input{ - input, filter_sizes, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes sigmoid of `x` element-wise. // // Specifically, `y = 1 / (1 + exp(-x))`. @@ -12888,190 +13023,252 @@ func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ... return op.Output(0) } -// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. -type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) +// DecodeCSVAttr is an optional argument to DecodeCSV. +type DecodeCSVAttr func(optionalAttr) -// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. +// DecodeCSVFieldDelim sets the optional field_delim attribute to value. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { +// value: char delimiter to separate fields in a record. +// If not specified, defaults to "," +func DecodeCSVFieldDelim(value string) DecodeCSVAttr { return func(m optionalAttr) { - m["seed"] = value + m["field_delim"] = value } } -// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { +// value: If false, treats double quotation marks as regular +// characters inside of the string fields (ignoring RFC 4180, Section 2, +// Bullet 5). +// If not specified, defaults to true +func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_quote_delim"] = value } } -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. +// DecodeCSVNaValue sets the optional na_value attribute to value. // -// For each batch, this op picks a single set of sampled candidate labels. +// value: Additional string to recognize as NA/NaN. +// If not specified, defaults to "" +func DecodeCSVNaValue(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["na_value"] = value + } +} + +// Convert CSV records to tensors. Each column maps to one tensor. // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// RFC 4180 format is expected for the CSV records. +// (https://tools.ietf.org/html/rfc4180) +// Note that we allow leading and trailing spaces with int or float field. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). +// records: Each string is a record/row in the csv and all records should have +// the same format. +// record_defaults: One tensor per column of the input record, with either a +// scalar default value for that column or empty if the column is required. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns Each tensor will have the same shape as records. +func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ThreadUnsafeUnigramCandidateSampler", + Type: "DecodeCSV", Input: []tf.Input{ - true_classes, + records, tf.OutputList(record_defaults), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MaxPoolV2Attr is an optional argument to MaxPoolV2. -type MaxPoolV2Attr func(optionalAttr) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("DecodeCSV", err) + return + } + return output +} -// MaxPoolV2DataFormat sets the optional data_format attribute to value. +// MapClearAttr is an optional argument to MapClear. +type MapClearAttr func(optionalAttr) + +// MapClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { +// REQUIRES: value >= 0 +func MapClearCapacity(value int64) MapClearAttr { return func(m optionalAttr) { - m["data_format"] = value + m["capacity"] = value } } -// Performs max pooling on the input. +// MapClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// REQUIRES: value >= 0 +func MapClearMemoryLimit(value int64) MapClearAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapClearContainer(value string) MapClearAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapClearSharedName(value string) MapClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. // -// Returns The max pooled output tensor. -func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { +// Returns the created operation. +func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"padding": padding} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolV2", - Input: []tf.Input{ - input, ksize, strides, - }, + Type: "MapClear", + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Deprecated. Use TensorArrayReadV3 +// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. +type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) + +// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. // -// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 -func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Generates labels for candidate sampling with a learned unigram distribution. +// +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. +// +// Arguments: +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorArrayReadV2", + Type: "ThreadUnsafeUnigramCandidateSampler", Input: []tf.Input{ - handle, index, flow_in, + true_classes, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Does nothing. Serves as a control trigger for scheduling. -// -// Only useful as a placeholder for control edges. +// MaxPoolV2Attr is an optional argument to MaxPoolV2. +type MaxPoolV2Attr func(optionalAttr) + +// MaxPoolV2DataFormat sets the optional data_format attribute to value. // -// Returns the created operation. -func ControlTrigger(scope *Scope) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ControlTrigger", +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { + return func(m optionalAttr) { + m["data_format"] = value } - return scope.AddOperation(opspec) } -// Batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() -// -// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// Performs max pooling on the input. // // Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor. +func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + attrs := map[string]interface{}{"padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", + Type: "MaxPoolV2", Input: []tf.Input{ - t, m, v, beta, gamma, + input, ksize, strides, }, Attrs: attrs, } @@ -13375,99 +13572,40 @@ func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Outp return op.Output(0) } -// Checks whether a resource handle-based variable has been initialized. +// Computes gradients for SparseSegmentMean. // -// Arguments: -// resource: the input resource handle. +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. // -// Returns a scalar boolean which is true if the variable has been -// initialized. -func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { +// Arguments: +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "VarIsInitializedOp", + Type: "SparseSegmentMeanGrad", Input: []tf.Input{ - resource, + grad, indices, segment_ids, output_dim0, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: +// Returns the truth value of (x >= y) element-wise. // -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { +// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Pad", - Input: []tf.Input{ - input, paddings, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentMean. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of (x >= y) element-wise. -// -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GreaterEqual", + Type: "GreaterEqual", Input: []tf.Input{ x, y, }, @@ -14876,6 +15014,101 @@ func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { return op.Output(0) } +// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. +type MatrixSolveLsAttr func(optionalAttr) + +// MatrixSolveLsFast sets the optional fast attribute to value. +// If not specified, defaults to true +func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { + return func(m optionalAttr) { + m["fast"] = value + } +} + +// Solves one or more linear least-squares problems. +// +// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same +// type as `matrix` and shape `[..., M, K]`. +// The output is a tensor shape `[..., N, K]` where each output matrix solves +// each of the equations +// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` +// in the least squares sense. +// +// We use the following notation for (complex) matrix and right-hand sides +// in the batch: +// +// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), +// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), +// `output`=\\(X \in \mathbb{C}^{n \times k}\\), +// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). +// +// If `fast` is `True`, then the solution is computed by solving the normal +// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then +// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares +// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + +// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as +// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the +// minimum-norm solution to the under-determined linear system, i.e. +// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), +// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable +// when \\(A\\) is numerically full rank and has a condition number +// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is +// sufficiently large. +// +// If `fast` is `False` an algorithm based on the numerically robust complete +// orthogonal decomposition is used. This computes the minimum-norm +// least-squares solution, even when \\(A\\) is rank deficient. This path is +// typically 6-7 times slower than the fast path. If `fast` is `False` then +// `l2_regularizer` is ignored. +// +// Arguments: +// matrix: Shape is `[..., M, N]`. +// rhs: Shape is `[..., M, K]`. +// l2_regularizer: Scalar tensor. +// +// @compatibility(numpy) +// Equivalent to np.linalg.lstsq +// @end_compatibility +// +// Returns Shape is `[..., N, K]`. +func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixSolveLs", + Input: []tf.Input{ + matrix, rhs, l2_regularizer, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Elementwise computes the bitwise OR of `x` and `y`. +// +// The result will have those bits set, that are set in `x`, `y` or both. The +// computation is performed on the underlying representations of `x` and `y`. +func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BitwiseOr", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. type SparseToSparseSetOperationAttr func(optionalAttr) @@ -15174,6 +15407,52 @@ func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype return op.Output(0), op.Output(1), op.Output(2) } +// MaxPoolAttr is an optional argument to MaxPool. +type MaxPoolAttr func(optionalAttr) + +// MaxPoolDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolDataFormat(value string) MaxPoolAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs max pooling on the input. +// +// Arguments: +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor. +func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Says whether the targets are in the top `K` predictions. // // This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the @@ -16313,106 +16592,29 @@ func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// MfccAttr is an optional argument to Mfcc. -type MfccAttr func(optionalAttr) - -// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. -// -// value: The highest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 4000 -func MfccUpperFrequencyLimit(value float32) MfccAttr { - return func(m optionalAttr) { - m["upper_frequency_limit"] = value - } -} - -// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. -// -// value: The lowest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 20 -func MfccLowerFrequencyLimit(value float32) MfccAttr { - return func(m optionalAttr) { - m["lower_frequency_limit"] = value - } -} - -// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. +// Returns the element-wise sum of a list of tensors. // -// value: Resolution of the Mel bank used internally. -// If not specified, defaults to 40 -func MfccFilterbankChannelCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["filterbank_channel_count"] = value - } -} - -// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. +// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not +// wait for all of its inputs to be ready before beginning to sum. This can +// save memory if inputs are ready at different times, since minimum temporary +// storage is proportional to the output size rather than the inputs size. // -// value: How many output channels to produce per time slice. -// If not specified, defaults to 13 -func MfccDctCoefficientCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["dct_coefficient_count"] = value - } -} - -// Transforms a spectrogram into a form that's useful for speech recognition. +// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. // -// Mel Frequency Cepstral Coefficients are a way of representing audio data that's -// been effective as an input feature for machine learning. They are created by -// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the -// higher frequencies that are less significant to the human ear. They have a long -// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum -// is a good resource to learn more. +// Returns a `Tensor` of same shape and type as the elements of `inputs`. // // Arguments: -// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared -// set to true. -// sample_rate: How many samples per second the source audio used. -func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { +// inputs: A list of `Tensor` objects, each with same shape and type. +// shape: Shape of elements of `inputs`. +func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "Mfcc", + Type: "AccumulateNV2", Input: []tf.Input{ - spectrogram, sample_rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the element-wise sum of a list of tensors. -// -// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not -// wait for all of its inputs to be ready before beginning to sum. This can -// save memory if inputs are ready at different times, since minimum temporary -// storage is proportional to the output size rather than the inputs size. -// -// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. -// -// Returns a `Tensor` of same shape and type as the elements of `inputs`. -// -// Arguments: -// inputs: A list of `Tensor` objects, each with same shape and type. -// shape: Shape of elements of `inputs`. -func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "AccumulateNV2", - Input: []tf.Input{ - tf.OutputList(inputs), + tf.OutputList(inputs), }, Attrs: attrs, } @@ -17022,87 +17224,129 @@ func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_uppe return op.Output(0) } -// Computes acos of x element-wise. -func Acos(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Acos", - Input: []tf.Input{ - x, - }, +// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. +type QuantizedMatMulAttr func(optionalAttr) + +// QuantizedMatMulToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["Toutput"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) +// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. +// +// value: If true, `a` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["transpose_a"] = value + } +} -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { +// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. +// +// value: If true, `b` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { return func(m optionalAttr) { - m["Targmax"] = value + m["transpose_b"] = value } } -// Performs max pooling on the input and outputs both max values and indices. +// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. // -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index -// `((b * height + y) * width + x) * channels + c`. +// value: The type of output produced by activation function +// following this operation. +// If not specified, defaults to DT_QUINT8 +func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["Tactivation"] = value + } +} + +// Perform a quantized matrix multiplication of `a` by the matrix `b`. // -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. +// The inputs must be two-dimensional matrices and the inner dimension of +// `a` (after being transposed if `transpose_a` is non-zero) must match the +// outer dimension of `b` (after being transposed if `transposed_b` is +// non-zero). // // Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// a: Must be a two-dimensional tensor. +// b: Must be a two-dimensional tensor. +// min_a: The float value that the lowest quantized `a` value represents. +// max_a: The float value that the highest quantized `a` value represents. +// min_b: The float value that the lowest quantized `b` value represents. +// max_b: The float value that the highest quantized `b` value represents. // -// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", + Type: "QuantizedMatMul", Input: []tf.Input{ - input, + a, b, min_a, max_a, min_b, max_b, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2) } -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// Does nothing. Serves as a control trigger for scheduling. // -// Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. +// Only useful as a placeholder for control edges. // -// Returns A Tensor of type `out_type`. -func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { +// Returns the created operation. +func ControlTrigger(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "ParseTensor", + Type: "ControlTrigger", + } + return scope.AddOperation(opspec) +} + +// Batch normalization. +// +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// +// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// +// Arguments: +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + opspec := tf.OpSpec{ + Type: "BatchNormWithGlobalNormalization", Input: []tf.Input{ - serialized, + t, m, v, beta, gamma, }, Attrs: attrs, } @@ -17110,113 +17354,95 @@ func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (outp return op.Output(0) } -// MapClearAttr is an optional argument to MapClear. -type MapClearAttr func(optionalAttr) - -// MapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Deprecated. Use TensorArrayReadV3 // -// REQUIRES: value >= 0 -func MapClearCapacity(value int64) MapClearAttr { - return func(m optionalAttr) { - m["capacity"] = value +// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 +func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { + if scope.Err() != nil { + return } -} - -// MapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapClearMemoryLimit(value int64) MapClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "TensorArrayReadV2", + Input: []tf.Input{ + handle, index, flow_in, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// MapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapClearContainer(value string) MapClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} +// QuantizedMulAttr is an optional argument to QuantizedMul. +type QuantizedMulAttr func(optionalAttr) -// MapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapClearSharedName(value string) MapClearAttr { +// QuantizedMulToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["Toutput"] = value } } -// Op removes all elements in the underlying container. +// Returns x * y element-wise, working on quantized buffers. // -// Returns the created operation. -func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { +// Arguments: +// +// +// min_x: The float value that the lowest quantized `x` value represents. +// max_x: The float value that the highest quantized `x` value represents. +// min_y: The float value that the lowest quantized `y` value represents. +// max_y: The float value that the highest quantized `y` value represents. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// +// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about +// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MapClear", - + Type: "QuantizedMul", + Input: []tf.Input{ + x, y, min_x, max_x, min_y, max_y, + }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// DecodeCSVAttr is an optional argument to DecodeCSV. -type DecodeCSVAttr func(optionalAttr) +// QuantizedAddAttr is an optional argument to QuantizedAdd. +type QuantizedAddAttr func(optionalAttr) -// DecodeCSVFieldDelim sets the optional field_delim attribute to value. -// -// value: char delimiter to separate fields in a record. -// If not specified, defaults to "," -func DecodeCSVFieldDelim(value string) DecodeCSVAttr { +// QuantizedAddToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { return func(m optionalAttr) { - m["field_delim"] = value + m["Toutput"] = value } } -// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. +// Returns x + y element-wise, working on quantized buffers. // -// value: If false, treats double quotation marks as regular -// characters inside of the string fields (ignoring RFC 4180, Section 2, -// Bullet 5). -// If not specified, defaults to true -func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { - return func(m optionalAttr) { - m["use_quote_delim"] = value - } -} - -// DecodeCSVNaValue sets the optional na_value attribute to value. +// Arguments: // -// value: Additional string to recognize as NA/NaN. -// If not specified, defaults to "" -func DecodeCSVNaValue(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["na_value"] = value - } -} - -// Convert CSV records to tensors. Each column maps to one tensor. // -// RFC 4180 format is expected for the CSV records. -// (https://tools.ietf.org/html/rfc4180) -// Note that we allow leading and trailing spaces with int or float field. +// min_x: The float value that the lowest quantized `x` value represents. +// max_x: The float value that the highest quantized `x` value represents. +// min_y: The float value that the lowest quantized `y` value represents. +// max_y: The float value that the highest quantized `y` value represents. // -// Arguments: -// records: Each string is a record/row in the csv and all records should have -// the same format. -// record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or empty if the column is required. +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. // -// Returns Each tensor will have the same shape as records. -func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { +// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about +// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { if scope.Err() != nil { return } @@ -17225,84 +17451,117 @@ func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, opt a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeCSV", + Type: "QuantizedAdd", Input: []tf.Input{ - records, tf.OutputList(record_defaults), + x, y, min_x, max_x, min_y, max_y, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return + return op.Output(0), op.Output(1), op.Output(2) +} + +// MfccAttr is an optional argument to Mfcc. +type MfccAttr func(optionalAttr) + +// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. +// +// value: The highest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 4000 +func MfccUpperFrequencyLimit(value float32) MfccAttr { + return func(m optionalAttr) { + m["upper_frequency_limit"] = value } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("DecodeCSV", err) - return +} + +// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. +// +// value: The lowest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 20 +func MfccLowerFrequencyLimit(value float32) MfccAttr { + return func(m optionalAttr) { + m["lower_frequency_limit"] = value + } +} + +// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. +// +// value: Resolution of the Mel bank used internally. +// If not specified, defaults to 40 +func MfccFilterbankChannelCount(value int64) MfccAttr { + return func(m optionalAttr) { + m["filterbank_channel_count"] = value } - return output } -// Returns the rank of a tensor. -// -// This operation returns an integer representing the rank of `input`. +// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. // -// For example: +// value: How many output channels to produce per time slice. +// If not specified, defaults to 13 +func MfccDctCoefficientCount(value int64) MfccAttr { + return func(m optionalAttr) { + m["dct_coefficient_count"] = value + } +} + +// Transforms a spectrogram into a form that's useful for speech recognition. // -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// # shape of tensor 't' is [2, 2, 3] -// rank(t) ==> 3 -// ``` +// Mel Frequency Cepstral Coefficients are a way of representing audio data that's +// been effective as an input feature for machine learning. They are created by +// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the +// higher frequencies that are less significant to the human ear. They have a long +// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum +// is a good resource to learn more. // -// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank -// of a tensor is the number of indices required to uniquely select each element -// of the tensor. Rank is also known as "order", "degree", or "ndims." -func Rank(scope *Scope, input tf.Output) (output tf.Output) { +// Arguments: +// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared +// set to true. +// sample_rate: How many samples per second the source audio used. +func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Rank", + Type: "Mfcc", Input: []tf.Input{ - input, + spectrogram, sample_rate, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fact", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Makes its input available to the next iteration. +// Given a quantized tensor described by (input, input_min, input_max), outputs a +// +// range that covers the actual values present in that tensor. This op is +// typically used to produce the requested_output_min and requested_output_max for +// Requantize. // // Arguments: -// data: The tensor to be made available to the next iteration. // -// Returns The same tensor as `data`. -func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// +// Returns The computed min output.the computed max output. +func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "NextIteration", + Type: "RequantizationRange", Input: []tf.Input{ - data, + input, input_min, input_max, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } // MapPeekAttr is an optional argument to MapPeek. @@ -18911,101 +19170,6 @@ func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output t return op.Output(0) } -// Elementwise computes the bitwise OR of `x` and `y`. -// -// The result will have those bits set, that are set in `x`, `y` or both. The -// computation is performed on the underlying representations of `x` and `y`. -func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BitwiseOr", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. -type MatrixSolveLsAttr func(optionalAttr) - -// MatrixSolveLsFast sets the optional fast attribute to value. -// If not specified, defaults to true -func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { - return func(m optionalAttr) { - m["fast"] = value - } -} - -// Solves one or more linear least-squares problems. -// -// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same -// type as `matrix` and shape `[..., M, K]`. -// The output is a tensor shape `[..., N, K]` where each output matrix solves -// each of the equations -// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` -// in the least squares sense. -// -// We use the following notation for (complex) matrix and right-hand sides -// in the batch: -// -// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), -// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), -// `output`=\\(X \in \mathbb{C}^{n \times k}\\), -// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). -// -// If `fast` is `True`, then the solution is computed by solving the normal -// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then -// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + -// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as -// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the -// minimum-norm solution to the under-determined linear system, i.e. -// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), -// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable -// when \\(A\\) is numerically full rank and has a condition number -// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is -// sufficiently large. -// -// If `fast` is `False` an algorithm based on the numerically robust complete -// orthogonal decomposition is used. This computes the minimum-norm -// least-squares solution, even when \\(A\\) is rank deficient. This path is -// typically 6-7 times slower than the fast path. If `fast` is `False` then -// `l2_regularizer` is ignored. -// -// Arguments: -// matrix: Shape is `[..., M, N]`. -// rhs: Shape is `[..., M, K]`. -// l2_regularizer: Scalar tensor. -// -// @compatibility(numpy) -// Equivalent to np.linalg.lstsq -// @end_compatibility -// -// Returns Shape is `[..., N, K]`. -func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolveLs", - Input: []tf.Input{ - matrix, rhs, l2_regularizer, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SvdAttr is an optional argument to Svd. type SvdAttr func(optionalAttr) @@ -20803,28 +20967,101 @@ func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr { // * If the coordinates are not normalized they are interpreted as // numbers of pixels. // -// Arguments: -// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. -// size: A 1-D tensor of 2 elements containing the size of the glimpses -// to extract. The glimpse height must be specified first, following -// by the glimpse width. -// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing -// the y, x locations of the center of each window. +// Arguments: +// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. +// size: A 1-D tensor of 2 elements containing the size of the glimpses +// to extract. The glimpse height must be specified first, following +// by the glimpse width. +// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing +// the y, x locations of the center of each window. +// +// Returns A tensor representing the glimpses `[batch_size, +// glimpse_height, glimpse_width, channels]`. +func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ExtractGlimpse", + Input: []tf.Input{ + input, size, offsets, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// A container for an iterator resource. +// +// Returns A handle to the iterator that can be passed to a "MakeIterator" +// or "IteratorGetNext" op. +func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "Iterator", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. +type CropAndResizeGradImageAttr func(optionalAttr) + +// CropAndResizeGradImageMethod sets the optional method attribute to value. +// +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { + return func(m optionalAttr) { + m["method"] = value + } +} + +// Computes the gradient of the crop_and_resize op wrt the input image tensor. +// +// Arguments: +// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` +// containing the original image size. Both `image_height` and `image_width` need +// to be positive. +// // -// Returns A tensor representing the glimpses `[batch_size, -// glimpse_height, glimpse_width, channels]`. -func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { +// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"T": T} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ExtractGlimpse", + Type: "CropAndResizeGradImage", Input: []tf.Input{ - input, size, offsets, + grads, boxes, box_ind, image_size, }, Attrs: attrs, } @@ -20832,24 +21069,6 @@ func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Ou return op.Output(0) } -// A container for an iterator resource. -// -// Returns A handle to the iterator that can be passed to a "MakeIterator" -// or "IteratorGetNext" op. -func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "Iterator", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ShuffleDatasetAttr is an optional argument to ShuffleDataset. type ShuffleDatasetAttr func(optionalAttr) @@ -21717,47 +21936,6 @@ func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, out return op.Output(0) } -// PlaceholderAttr is an optional argument to Placeholder. -type PlaceholderAttr func(optionalAttr) - -// PlaceholderShape sets the optional shape attribute to value. -// -// value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the -// shape is unconstrained. -// If not specified, defaults to -func PlaceholderShape(value tf.Shape) PlaceholderAttr { - return func(m optionalAttr) { - m["shape"] = value - } -} - -// A placeholder op for a value that will be fed into the computation. -// -// N.B. This operation will fail with an error if it is executed. It is -// intended as a way to represent a value that will always be fed, and to -// provide attrs that enable the fed value to be checked at runtime. -// -// Arguments: -// dtype: The type of elements in the tensor. -// -// Returns A placeholder tensor that must be replaced using the feed mechanism. -func Placeholder(scope *Scope, dtype tf.DataType, optional ...PlaceholderAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Placeholder", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that executes a SQL query and emits rows of the result set. // // Arguments: @@ -23339,101 +23517,6 @@ func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { return scope.AddOperation(opspec) } -// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. -type CropAndResizeGradImageAttr func(optionalAttr) - -// CropAndResizeGradImageMethod sets the optional method attribute to value. -// -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// Computes the gradient of the crop_and_resize op wrt the input image tensor. -// -// Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` -// containing the original image size. Both `image_height` and `image_width` need -// to be positive. -// -// -// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CropAndResizeGradImage", - Input: []tf.Input{ - grads, boxes, box_ind, image_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reads and outputs the entire contents of the input filename. -func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReadFile", - Input: []tf.Input{ - filename, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Concatenates tensors along one dimension. -// -// Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConcatV2", - Input: []tf.Input{ - tf.OutputList(values), axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Forwards the value of an available tensor from `inputs` to `output`. // // `Merge` waits for at least one of the tensors in `inputs` to become available. @@ -27804,86 +27887,3 @@ func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Outp op := scope.AddOperation(opspec) return op.Output(0), op.Output(1) } - -// Pads a tensor with mirrored values. -// -// This operation pads a `input` with mirrored values according to the `paddings` -// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is -// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many values to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many values to add after the contents of `input` -// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater -// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true -// (if false, respectively). -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6]]. -// # 'paddings' is [[1, 1]], [2, 2]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2] -// [2, 1, 1, 2, 3, 3, 2] -// [5, 4, 4, 5, 6, 6, 5] -// [5, 4, 4, 5, 6, 6, 5]] -// ``` -// -// Arguments: -// input: The input tensor to be padded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions -// do not include the borders, while in symmetric mode the padded regions -// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings` -// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and -// it is `[1, 2, 3, 3, 2]` in symmetric mode. -// -// Returns The padded tensor. -func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A placeholder op for a value that will be fed into the computation. -// -// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2. -// -// N.B. This operation will fail with an error if it is executed. It is -// intended as a way to represent a value that will always be fed, and to -// provide attrs that enable the fed value to be checked at runtime. -// -// Arguments: -// dtype: The type of elements in the tensor. -// shape: The shape of the tensor. The shape can be any partially-specified -// shape. To be unconstrained, pass in a shape with unknown rank. -// -// Returns A placeholder tensor that must be replaced using the feed mechanism. -func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - opspec := tf.OpSpec{ - Type: "PlaceholderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From e9d6c89aaaf65db2dbaacacdfecdee4a56a3cb7d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 18:39:29 -0700 Subject: [PATCH 464/960] Switch Android C++ compilation mode to "-std=c++11". PiperOrigin-RevId: 190306256 --- tensorflow/tools/ci_build/builds/android.sh | 3 ++- tensorflow/tools/ci_build/builds/android_full.sh | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/android.sh b/tensorflow/tools/ci_build/builds/android.sh index 564c5aa148..d81793efe0 100755 --- a/tensorflow/tools/ci_build/builds/android.sh +++ b/tensorflow/tools/ci_build/builds/android.sh @@ -29,7 +29,8 @@ echo "========== TensorFlow Demo Build Test ==========" # Enable sandboxing so that zip archives don't get incorrectly packaged # in assets/ dir (see https://github.com/bazelbuild/bazel/issues/2334) # TODO(gunan): remove extra flags once sandboxing is enabled for all builds. -bazel --bazelrc=/dev/null build -c opt --fat_apk_cpu=x86_64 \ +bazel --bazelrc=/dev/null build \ + --compilation_mode=opt --cxxopt=-std=c++11 --fat_apk_cpu=x86_64 \ --spawn_strategy=sandboxed --genrule_strategy=sandboxed \ //tensorflow/examples/android:tensorflow_demo diff --git a/tensorflow/tools/ci_build/builds/android_full.sh b/tensorflow/tools/ci_build/builds/android_full.sh index 9d449241e8..41dc66dd54 100755 --- a/tensorflow/tools/ci_build/builds/android_full.sh +++ b/tensorflow/tools/ci_build/builds/android_full.sh @@ -40,7 +40,8 @@ rm -rf ${AAR_LIB_TMP} for CPU in ${CPUS//,/ } do echo "========== Building native libs for Android ${CPU} ==========" - bazel build -c opt --config=monolithic --cpu=${CPU} \ + bazel build --config=monolithic --cpu=${CPU} \ + --compilation_mode=opt --cxxopt=-std=c++11 \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ //tensorflow/core:android_tensorflow_lib \ @@ -62,7 +63,8 @@ done # in assets/ dir (see https://github.com/bazelbuild/bazel/issues/2334) # TODO(gunan): remove extra flags once sandboxing is enabled for all builds. echo "========== Building TensorFlow Android Jar and Demo ==========" -bazel --bazelrc=/dev/null build -c opt --config=monolithic --fat_apk_cpu=${CPUS} \ +bazel --bazelrc=/dev/null build --config=monolithic --fat_apk_cpu=${CPUS} \ + --compilation_mode=opt --cxxopt=-std=c++11 \ --spawn_strategy=sandboxed --genrule_strategy=sandboxed \ //tensorflow/contrib/android:android_tensorflow_inference_java \ //tensorflow/contrib/android:android_tensorflow_inference_java.aar \ -- GitLab From f3f58f4486731faf4137fa62cdf1f885dccfc95b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 18:44:35 -0700 Subject: [PATCH 465/960] When import_scoped_meta_graph is called within a name scope, but called without an import_scope, the names of the created variables are wrong, resulting in key not found errors when adding these variables to their corresponding collections. PiperOrigin-RevId: 190306555 --- tensorflow/python/framework/meta_graph.py | 4 +++- tensorflow/python/framework/meta_graph_test.py | 15 +++++++++++++++ tensorflow/python/framework/ops.py | 6 ++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index 4bb9941bb7..391b17720c 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -737,7 +737,9 @@ def import_scoped_meta_graph(meta_graph_or_file, import_scope or "", mark_as_used=False) importer.import_graph_def( - input_graph_def, name=(import_scope or ""), input_map=input_map, + input_graph_def, + name=(import_scope or scope_to_prepend_to_names), + input_map=input_map, producer_op_list=producer_op_list) # Restores all the other collections. diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 21963d0bee..5d5fb037fc 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -537,6 +537,21 @@ class ScopedMetaGraphTest(test.TestCase): self.assertEqual(list(imported_variables.values())[0].name, "foo/bar/myvar:0") + def testScopedImportUnderNameScopeNoVarScope(self): + graph = ops.Graph() + with graph.as_default(): + variables.Variable(initial_value=1.0, trainable=True, name="myvar") + meta_graph_def, _ = meta_graph.export_scoped_meta_graph(graph=graph) + + graph = ops.Graph() + with graph.as_default(): + with ops.name_scope("foo"): + imported_variables = meta_graph.import_scoped_meta_graph( + meta_graph_def) + self.assertEqual(len(imported_variables), 1) + self.assertEqual(list(imported_variables.values())[0].name, + "foo/myvar:0") + def testImportsUsingSameScopeName(self): with ops.Graph().as_default(): variables.Variable(0, name="v") diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 1fa9285e43..f264e38102 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5872,6 +5872,9 @@ def strip_name_scope(name, export_scope): is None. """ if export_scope: + if export_scope[-1] == "/": + export_scope = export_scope[:-1] + try: # Strips export_scope/, export_scope///, # ^export_scope/, loc:@export_scope/. @@ -5897,6 +5900,9 @@ def prepend_name_scope(name, import_scope): is None. """ if import_scope: + if import_scope[-1] == "/": + import_scope = import_scope[:-1] + try: str_to_replace = r"([\^]|loc:@|^)(.*)" return re.sub(str_to_replace, r"\1" + import_scope + r"/\2", -- GitLab From 753f99afcd6b814781e19ae44afc6195ff68685d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 19:02:37 -0700 Subject: [PATCH 466/960] Adding support for iterating through a tf.data.Dataset for a single epoch. PiperOrigin-RevId: 190307545 --- .../contrib/py2tf/converters/for_loops.py | 30 ++++---- tensorflow/contrib/py2tf/utils/BUILD | 1 + tensorflow/contrib/py2tf/utils/__init__.py | 2 + tensorflow/contrib/py2tf/utils/builtins.py | 69 ++++++++++++++++++- 4 files changed, 86 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/py2tf/converters/for_loops.py b/tensorflow/contrib/py2tf/converters/for_loops.py index 4297c1cf2a..8d28b149a8 100644 --- a/tensorflow/contrib/py2tf/converters/for_loops.py +++ b/tensorflow/contrib/py2tf/converters/for_loops.py @@ -38,19 +38,19 @@ class ForLoopCanonicalizationTransformer(transformer.Base): self.generic_visit(node) body_scope = anno.getanno(node, NodeAnno.BODY_SCOPE) i_var = self.context.namer.new_symbol('i', body_scope.referenced) - n_var = self.context.namer.new_symbol('n', body_scope.referenced) - iterated_var = self.context.namer.new_symbol('iterated', - body_scope.referenced) + smart_loop_iter_var = self.context.namer.new_symbol('smart_loop_iter', + body_scope.referenced) + cont_var = self.context.namer.new_symbol('cont', body_scope.referenced) # TODO(mdan): Use TensorListFromTensor(loop_iter) here. if anno.hasanno(node, 'extra_cond'): template = """ i = 0 - iterated = loop_iter - n = len(iterated) - while i < n and extra_cond: - target = iterated[i] + smart_loop_iter = py2tf_utils.dynamic_dataset(loop_iter) + cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) + while cont and extra_cond: body i += 1 + cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) """ return templates.replace( template, @@ -58,18 +58,18 @@ class ForLoopCanonicalizationTransformer(transformer.Base): target=node.target, body=node.body, i=i_var, - n=n_var, - iterated=iterated_var, + smart_loop_iter=smart_loop_iter_var, + cont=cont_var, extra_cond=anno.getanno(node, 'extra_cond')) else: template = """ i = 0 - iterated = loop_iter - n = len(iterated) - while i < n: - target = iterated[i] + smart_loop_iter = py2tf_utils.dynamic_dataset(loop_iter) + cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) + while cont: body i += 1 + cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) """ repl = templates.replace( template, @@ -77,8 +77,8 @@ class ForLoopCanonicalizationTransformer(transformer.Base): target=node.target, body=node.body, i=i_var, - n=n_var, - iterated=iterated_var) + smart_loop_iter=smart_loop_iter_var, + cont=cont_var) return repl def visit_Continue(self, node): diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index d029289f5a..b53fbb5c18 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -35,6 +35,7 @@ py_library( deps = [ "//tensorflow/python:list_ops", "//tensorflow/python:script_ops", + "//tensorflow/python/data/ops:dataset_ops", "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index d9d8e34689..4e6003c852 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -19,6 +19,8 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin +from tensorflow.contrib.py2tf.utils.builtins import dynamic_dataset +from tensorflow.contrib.py2tf.utils.builtins import dynamic_for_cond from tensorflow.contrib.py2tf.utils.builtins import dynamic_print from tensorflow.contrib.py2tf.utils.builtins import dynamic_range from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns diff --git a/tensorflow/contrib/py2tf/utils/builtins.py b/tensorflow/contrib/py2tf/utils/builtins.py index 3cb62b55d4..251b4ed8ee 100644 --- a/tensorflow/contrib/py2tf/utils/builtins.py +++ b/tensorflow/contrib/py2tf/utils/builtins.py @@ -22,8 +22,10 @@ import six from tensorflow.contrib.py2tf.utils import py_func from tensorflow.contrib.py2tf.utils import type_check +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.util import tf_inspect @@ -54,7 +56,6 @@ def dynamic_len(list_or_tensor): raise ValueError( 'len requires non-zero rank for tensor "%s"' % list_or_tensor) return array_ops.shape(list_or_tensor)[0] - return len(list_or_tensor) @@ -97,3 +98,69 @@ def dynamic_print(*values): if all(map(is_tf_print_compatible, values)): return logging_ops.Print(1, values) return py_func.wrap_py_func(print, None, values, use_dummy_return=True) + + +def dynamic_dataset(iterated): + """Implementartion of smart tf.data.Dataset epoch wrapping. + + The function checks if the input is a tf.data.Dataset and if so then wraps it + so that for each element it returns it also returns the current epoch the + dataset iteration is in, for two epochs. If the input is not a + tf.data.Dataset then it just returns the input. + + Args: + iterated: The iterable or tf.data.Dataset that is being iterated over. + Returns: + Either just the untouched input, or in the case of input being a + tf.data.Dataset then it returns a wrapped tf.data.Dataset where for each + element it returns it also returns the current epoch the dataset iteration + is in. + """ + if not isinstance(iterated, dataset_ops.Dataset): + return iterated + + def epoch_dataset_number_helper(i): + return dataset_ops.Dataset.zip( + (dataset_ops.Dataset.from_tensors(i).repeat(), iterated)) + + epoch_numbers = dataset_ops.Dataset.range(2) + return epoch_numbers.flat_map(epoch_dataset_number_helper) + + +def dynamic_for_cond(iteration, iterated): + """Implementartion of smart while-loop condition using dynamic dispatch. + + The function checks if it is iterating over a tf.data.Dataset or not, and in + the case it is not then it simply returns if we are still in range of the + iterated and the next element. If it is iterating over a dataset then it only + iterates for a single epoch. + + Args: + iteration: The current iteration of the loop. + iterated: The iterable or tf.data.Dataset that is being iterated over. + Returns: + A tuple of a bool that indicates whether the loop should continue, and the + next element in iterated. + """ + # TODO(znado): Clean up. + # TODO(znado): This won't work for unpacked iterates. Fix. + if isinstance(iterated, dataset_ops.Dataset): + curr_epoch, next_elem = iterated.make_one_shot_iterator().get_next() + return math_ops.less(curr_epoch, 1), next_elem + elif tensor_util.is_tensor(iterated): + if iterated.shape.ndims > 1: + elem_shape = array_ops.shape(iterated)[1:] + else: + elem_shape = () + if iterated.shape.ndims == 0 or iterated.shape[0] == 0: + return False, array_ops.zeros(elem_shape, iterated.dtype) + return control_flow_ops.cond( + math_ops.less(iteration, dynamic_len(iterated)), + lambda: (True, iterated[iteration]), + lambda: (False, array_ops.zeros(elem_shape, iterated.dtype))) + elif hasattr(iterated, '__len__'): + if iteration < len(iterated): + return True, iterated[iteration] + return False, None + else: + raise NotImplementedError('Python iterators not yet supported.') -- GitLab From 759e9f874eb0af7902a586e0efcaf53463816c23 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 19:24:04 -0700 Subject: [PATCH 467/960] Fix loop variable type and status propagation PiperOrigin-RevId: 190308776 --- tensorflow/c/eager/c_api.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index c69635d529..eaeb2fd07a 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -837,7 +837,7 @@ const tensorflow::FunctionDef* OpToFunction( } VLOG(1) << "Fixed Output names and all types: " << fdef.DebugString(); - ctx->context.AddFunctionDef(fdef); + status->status = ctx->context.AddFunctionDef(fdef); if (!status->status.ok()) return nullptr; const auto ret = ctx->context.FindFunctionDef(signature->name()); DCHECK(ret != nullptr); @@ -885,7 +885,7 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { // Since input param reordering may have occurred between `op` and `launch_op` // via `op_input_to_func_input`, adjust the actual inputs accordingly. launch_op->inputs = op->inputs; - for (TFE_TensorHandle* h : launch_op->inputs) { + for (tensorflow::TensorHandle* h : launch_op->inputs) { h->Ref(); } if (!op_input_to_func_input.empty()) { -- GitLab From 418ae5ed77f1353c794f93a4adfbf7db02fa3191 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 20:18:46 -0700 Subject: [PATCH 468/960] A couple of small device-related utilities. PiperOrigin-RevId: 190312148 --- tensorflow/python/BUILD | 3 + tensorflow/python/training/device_util.py | 68 +++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 tensorflow/python/training/device_util.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index acfdcd15f7..e6ad564ede 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2884,9 +2884,11 @@ py_library( ":client", ":control_flow_ops", ":data_flow_ops", + ":device", ":errors", ":framework", ":framework_for_generated_wrappers", + ":framework_ops", ":gradients", ":init_ops", ":io_ops", @@ -2911,6 +2913,7 @@ py_library( ":variable_scope", ":variables", "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", "//third_party/py/numpy", "@six_archive//:six", ], diff --git a/tensorflow/python/training/device_util.py b/tensorflow/python/training/device_util.py new file mode 100644 index 0000000000..f1137e80ab --- /dev/null +++ b/tensorflow/python/training/device_util.py @@ -0,0 +1,68 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Device-related support functions.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.framework import device as tf_device +from tensorflow.python.framework import ops + + +def canonicalize(d): + d = tf_device.DeviceSpec.from_string(d) + assert d.device_type is None or d.device_type == d.device_type.upper(), ( + "Device type '%s' must be all-caps." % (d.device_type,)) + # Fill in missing device fields using defaults. + result = tf_device.DeviceSpec( + job="localhost", replica=0, task=0, device_type="CPU", device_index=0) + result.merge_from(d) + return result.to_string() + + +class _FakeNodeDef(object): + """A fake NodeDef for _FakeOperation.""" + + def __init__(self): + self.op = "" + self.name = "" + + +class _FakeOperation(object): + """A fake Operation object to pass to device functions.""" + + def __init__(self): + self.device = "" + self.type = "" + self.name = "" + self.node_def = _FakeNodeDef() + + def _set_device(self, device): + self.device = ops._device_string(device) # pylint: disable=protected-access + + +def current(): + """Return a string (not canonicalized) for the current device.""" + # TODO(josh11b): Work out how this function interacts with ops.colocate_with. + ctx = context.context() + if ctx.executing_eagerly(): + d = ctx.device_name + else: + op = _FakeOperation() + ops.get_default_graph()._apply_device_functions(op) # pylint: disable=protected-access + d = op.device + return d -- GitLab From 0a335dc4fd8cae06d331589eab5858fd0a3ffc73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 20:20:49 -0700 Subject: [PATCH 469/960] [XLA] Prevent using XlaOp from the wrong XlaBuilder. PiperOrigin-RevId: 190312254 --- .../xla/client/xla_client/xla_builder.cc | 19 +++++++++++++------ .../xla/client/xla_client/xla_builder.h | 5 +++-- .../xla/client/xla_client/xla_builder_test.cc | 11 +++++++++++ 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 90f2b2d73a..cbcb747f1c 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -284,10 +284,12 @@ XlaOp XlaBuilder::Mul(const XlaOp& lhs, const XlaOp& rhs, } XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) { - HloInstructionProto instr; - *instr.mutable_shape() = literal.shape(); - *instr.mutable_literal() = literal.ToProto(); - return AddInstruction(std::move(instr), HloOpcode::kConstant); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + *instr.mutable_shape() = literal.shape(); + *instr.mutable_literal() = literal.ToProto(); + return AddInstruction(std::move(instr), HloOpcode::kConstant); + }()); } XlaOp XlaBuilder::Call(const XlaComputation& computation, @@ -794,8 +796,9 @@ XlaOp XlaBuilder::Recv(const Shape& shape, const ChannelHandle& handle) { return UnimplementedOp(); } -XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, - tensorflow::gtl::ArraySlice operands) { +StatusOr XlaBuilder::AddInstruction( + HloInstructionProto&& instr, HloOpcode opcode, + tensorflow::gtl::ArraySlice operands) { const int64 handle = instructions_.size(); instr.set_id(handle); instr.set_opcode(HloOpcodeString(opcode)); @@ -806,6 +809,10 @@ XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, instr.set_name(StrCat(instr.name(), ".", handle)); } for (const auto& operand : operands) { + TF_RET_CHECK(operand.builder_ != nullptr); + TF_RET_CHECK(operand.builder_ == this) + << "Do not add XlaOp from builder " << operand.builder_->name() + << " to builder " << this->name(); instr.add_operand_ids(operand.handle()); // TODO(b/74197823): Set metadata and sharding. } diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index 407b2df274..99d1db7790 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -706,8 +706,9 @@ class XlaBuilder { StatusOr GetProgramShape(); private: - XlaOp AddInstruction(HloInstructionProto&& instr, HloOpcode opcode, - tensorflow::gtl::ArraySlice operands = {}); + StatusOr AddInstruction( + HloInstructionProto&& instr, HloOpcode opcode, + tensorflow::gtl::ArraySlice operands = {}); // Notes that the error occurred by: // * storing it internally and capturing a backtrace if it's the first error diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc index 10d8fa1622..57dcfc4d4d 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc @@ -179,5 +179,16 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) { op::Broadcast(op::Reshape(op::Parameter(1))))); } +TEST_F(XlaBuilderTest, OperandFromWrongBuilder) { + XlaBuilder b1("b1"); + auto p0 = b1.Parameter(0, ShapeUtil::MakeShape(F32, {}), "p0"); + XlaBuilder builder("main"); + builder.Add(p0, p0); + auto statusor = builder.Build(); + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Do not add XlaOp from builder b1 to builder main")); +} + } // namespace } // namespace xla -- GitLab From 917b79250b0e65aa7856b2418b68292d919cd5dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Mar 2018 22:27:31 -0700 Subject: [PATCH 470/960] [XLA] Redesign: implement Reshape and Transpose. Also, - Templatize ClientLibraryTestBase::CreateParameterAndTransferLiteral. The implementation is moved from .cc to .h because otherewise the linker complains. - Migrate some reshape tests to use the XlaBuilder. PiperOrigin-RevId: 190317960 --- .../xla/client/xla_client/xla_builder.cc | 30 ++++++++++++-- .../xla/client/xla_client/xla_builder.h | 3 +- .../xla/client/xla_client/xla_builder_test.cc | 27 +++++++++++++ tensorflow/compiler/xla/tests/BUILD | 1 + .../xla/tests/client_library_test_base.cc | 27 ------------- .../xla/tests/client_library_test_base.h | 39 +++++++++++++++++-- tensorflow/compiler/xla/tests/reshape_test.cc | 35 +++++++++-------- 7 files changed, 111 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index cbcb747f1c..596f39b4fd 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -401,12 +401,26 @@ XlaOp XlaBuilder::Pad(const XlaOp& operand, const XlaOp& padding_value, XlaOp XlaBuilder::Reshape(const XlaOp& operand, tensorflow::gtl::ArraySlice dimensions, tensorflow::gtl::ArraySlice new_sizes) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); + TF_ASSIGN_OR_RETURN(const Shape& shape, + ShapeInference::InferReshapeShape( + operand_shape, dimensions, new_sizes)); + XlaOp transposed = IsIdentityPermutation(dimensions) + ? operand + : Transpose(operand, dimensions); + return Reshape(shape, transposed); + }()); } XlaOp XlaBuilder::Reshape(const XlaOp& operand, tensorflow::gtl::ArraySlice new_sizes) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(auto shape, operand.GetShape()); + std::vector dimensions(shape.dimensions_size()); + std::iota(dimensions.begin(), dimensions.end(), 0); + return Reshape(operand, dimensions, new_sizes); + }()); } XlaOp XlaBuilder::Collapse(const XlaOp& operand, @@ -636,7 +650,17 @@ XlaOp XlaBuilder::IsFinite(const XlaOp& operand) { return UnimplementedOp(); } XlaOp XlaBuilder::Transpose(const XlaOp& operand, tensorflow::gtl::ArraySlice permutation) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferTransposeShape(operand_shape, permutation)); + for (int64 dim : permutation) { + instr.add_dimensions(dim); + } + return AddInstruction(std::move(instr), HloOpcode::kTranspose, {operand}); + }()); } XlaOp XlaBuilder::Rev(const XlaOp& operand, diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index 99d1db7790..c19eb47165 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -52,10 +52,11 @@ class XlaBuilder; // TODO(b/74197823): Replace xla::ComputationDataHandle with this one. class XlaOp { public: + XlaOp() : handle_(0), builder_(nullptr) {} + StatusOr GetShape() const; private: - XlaOp() : handle_(0), builder_(nullptr) {} XlaOp(int64 handle, XlaBuilder* builder) : handle_(handle), builder_(builder) {} diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc index 57dcfc4d4d..529287a57a 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc @@ -190,5 +190,32 @@ TEST_F(XlaBuilderTest, OperandFromWrongBuilder) { HasSubstr("Do not add XlaOp from builder b1 to builder main")); } +TEST_F(XlaBuilderTest, ReshapeDefaultOrder) { + XlaBuilder b(TestName()); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {2, 3, 5, 7}), "x"); + b.Reshape(x, /*new_sizes=*/{6, 35}); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Reshape(op::Parameter())); +} + +TEST_F(XlaBuilderTest, ReshapeHasTranspose) { + XlaBuilder b(TestName()); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {2, 3, 5, 7}), "x"); + b.Reshape(x, /*dimensions=*/{3, 2, 1, 0}, /*new_sizes=*/{6, 35}); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Reshape(op::Transpose(op::Parameter()))); +} + +TEST_F(XlaBuilderTest, Transpose) { + XlaBuilder b(TestName()); + auto x = b.Parameter(0, ShapeUtil::MakeShape(F32, {5, 7}), "x"); + b.Transpose(x, /*permutation=*/{1, 0}); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Transpose(op::Parameter())); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index e81e862c49..26022278e5 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1374,6 +1374,7 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index 3cae51576f..d9bd1ce6eb 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -568,33 +568,6 @@ ClientLibraryTestBase::CreatePatternedMatrixWithZeroPadding(int rows, int cols, return array; } -std::unique_ptr -ClientLibraryTestBase::CreateParameterAndTransferLiteral( - int64 parameter_number, const Literal& literal, const string& name, - ComputationBuilder* builder, ComputationDataHandle* data_handle) { - return CreateParameterAndTransferLiteral(parameter_number, literal, name, - nullptr, builder, data_handle); -} - -std::unique_ptr -ClientLibraryTestBase::CreateParameterAndTransferLiteral( - int64 parameter_number, const Literal& literal, const string& name, - const DeviceHandle* device_handle, ComputationBuilder* builder, - ComputationDataHandle* data_handle) { - const Literal* param_literal = &literal; - std::unique_ptr converted_literal; - if (use_bfloat16_) { - converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal); - param_literal = converted_literal.get(); - } - std::unique_ptr data = - client_->TransferToServer(*param_literal, device_handle) - .ConsumeValueOrDie(); - *data_handle = - builder->Parameter(parameter_number, param_literal->shape(), name); - return data; -} - ComputationDataHandle ClientLibraryTestBase::AddParam( const Literal& argument, ComputationBuilder* builder) { ComputationDataHandle data_handle; diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index b553beb01a..01aa6c756f 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -278,17 +278,19 @@ class ClientLibraryTestBase : public ::testing::Test { // server, then stores into "data_handle" the global handle for that // parameter. When the use_bfloat16 flag is set but the literal has F32 // elements, the literal will be converted to BF16 before being transferred. + template std::unique_ptr CreateParameterAndTransferLiteral( int64 parameter_number, const Literal& literal, const string& name, - ComputationBuilder* builder, ComputationDataHandle* data_handle); + BuilderT* builder, HandleT* data_handle); // As above, but the caller can specify the device that the literal is // transferred to. If device_handle is nullptr, the literal will be // transferred to the default device. + template std::unique_ptr CreateParameterAndTransferLiteral( int64 parameter_number, const Literal& literal, const string& name, - const DeviceHandle* device_handle, ComputationBuilder* builder, - ComputationDataHandle* data_handle); + const DeviceHandle* device_handle, BuilderT* builder, + HandleT* data_handle); // Creates a parameter instruction and sets the value that will be passed to // the computation as specified. This function must be used for all parameters @@ -652,6 +654,37 @@ std::unique_ptr> ClientLibraryTestBase::CreatePseudorandomR2( return result; } +template +std::unique_ptr +ClientLibraryTestBase::CreateParameterAndTransferLiteral(int64 parameter_number, + const Literal& literal, + const string& name, + BuilderT* builder, + HandleT* data_handle) { + return CreateParameterAndTransferLiteral(parameter_number, literal, name, + nullptr, builder, data_handle); +} + +template +std::unique_ptr +ClientLibraryTestBase::CreateParameterAndTransferLiteral( + int64 parameter_number, const Literal& literal, const string& name, + const DeviceHandle* device_handle, BuilderT* builder, + HandleT* data_handle) { + const Literal* param_literal = &literal; + std::unique_ptr converted_literal; + if (use_bfloat16_) { + converted_literal = LiteralTestUtil::ConvertF32ToBF16(literal); + param_literal = converted_literal.get(); + } + std::unique_ptr data = + client_->TransferToServer(*param_literal, device_handle) + .ConsumeValueOrDie(); + *data_handle = + builder->Parameter(parameter_number, param_literal->shape(), name); + return data; +} + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_TESTS_CLIENT_LIBRARY_TEST_BASE_H_ diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index f7b04debd4..02272d6017 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/reference_util.h" @@ -207,9 +208,9 @@ XLA_TEST_P(ReshapeTest, Trivial3x1) { // // Splits an empty vector into an empty matrix. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(R1ToR2_0_To_2x0)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR1({}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0}, @@ -221,10 +222,10 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(R1ToR2_0_To_2x0)) { // Splits a vector into a matrix. XLA_TEST_P(ReshapeTest, R1ToR2_6_To_2x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR1({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0}, @@ -241,9 +242,9 @@ XLA_TEST_P(ReshapeTest, R1ToR2_6_To_2x3) { // // Transposes a 2x0 array to a 0x2 array. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Reshape0x2To2x0)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(0, 2)); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1}, @@ -255,10 +256,10 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Reshape0x2To2x0)) { // Transposes a 2-dimensional row vector to a column vector. XLA_TEST_P(ReshapeTest, ReshapeRowToCol) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto simple = MakeLinspaceArray2D(1.0f, 3.0f, 1, 3); auto input_literal = Literal::CreateFromArray(*simple); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1}, @@ -272,10 +273,10 @@ XLA_TEST_P(ReshapeTest, ReshapeRowToCol) { // Transposes a 2-dimensional array. XLA_TEST_P(ReshapeTest, TransposeAsReshape) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); auto input_literal = Literal::CreateFromArray(*a4x3); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{1, 0}, @@ -291,11 +292,11 @@ XLA_TEST_P(ReshapeTest, TransposeAsReshape) { // does not handle zero-sized shapes correctly. Failed last on 2017-11-30 // with an incorrect result rank. // -// Transposes a 0x4 array with ComputationBuilder::Trans. +// Transposes a 0x4 array with XlaBuilder::Transpose. XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Transpose0x4)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(0, 4)); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Transpose(parameter, {1, 0}); @@ -306,10 +307,10 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Transpose0x4)) { // Transposes a 2-dimensional array with ComputationBuilder::Trans. XLA_TEST_P(ReshapeTest, Transpose4x3) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a4x3 = MakeLinspaceArray2D(1.0f, 12.0f, 4, 3); auto input_literal = Literal::CreateFromArray(*a4x3); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Transpose(parameter, {1, 0}); @@ -327,9 +328,9 @@ XLA_TEST_P(ReshapeTest, Transpose4x3) { // Reshapes an empty 2-dimensional array with dimensions that are not just a // rearrangement of the originals (split), but no reordering (no shuffle). XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateFromArray(Array2D(6, 0)); - ComputationDataHandle parameter; + XlaOp parameter; auto input = CreateParameterAndTransferLiteral(0, *input_literal, "input", &builder, ¶meter); builder.Reshape(/*operand=*/parameter, /*dimensions=*/{0, 1}, -- GitLab From f95347a96c431b63183856128bfea3943585f938 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 23 Mar 2018 23:04:03 -0700 Subject: [PATCH 471/960] Trivial update of layer imports in eager execution examples, to reflect recommended practices. PiperOrigin-RevId: 190319480 --- .../eager/python/examples/gan/mnist.py | 21 ++++----- .../linear_regression/linear_regression.py | 4 +- .../python/examples/resnet50/resnet50.py | 43 ++++++++++--------- .../examples/rnn_colorbot/rnn_colorbot.py | 6 ++- .../eager/python/examples/rnn_ptb/rnn_ptb.py | 8 ++-- 5 files changed, 46 insertions(+), 36 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist.py b/tensorflow/contrib/eager/python/examples/gan/mnist.py index 2b7e199fad..b80c909023 100644 --- a/tensorflow/contrib/eager/python/examples/gan/mnist.py +++ b/tensorflow/contrib/eager/python/examples/gan/mnist.py @@ -32,6 +32,7 @@ import tensorflow as tf import tensorflow.contrib.eager as tfe from tensorflow.examples.tutorials.mnist import input_data +layers = tf.keras.layers FLAGS = None @@ -56,15 +57,15 @@ class Discriminator(tf.keras.Model): else: assert data_format == 'channels_last' self._input_shape = [-1, 28, 28, 1] - self.conv1 = tf.layers.Conv2D( + self.conv1 = layers.Conv2D( 64, 5, padding='SAME', data_format=data_format, activation=tf.tanh) - self.pool1 = tf.layers.AveragePooling2D(2, 2, data_format=data_format) - self.conv2 = tf.layers.Conv2D( + self.pool1 = layers.AveragePooling2D(2, 2, data_format=data_format) + self.conv2 = layers.Conv2D( 128, 5, data_format=data_format, activation=tf.tanh) - self.pool2 = tf.layers.AveragePooling2D(2, 2, data_format=data_format) - self.flatten = tf.layers.Flatten() - self.fc1 = tf.layers.Dense(1024, activation=tf.tanh) - self.fc2 = tf.layers.Dense(1, activation=None) + self.pool2 = layers.AveragePooling2D(2, 2, data_format=data_format) + self.flatten = layers.Flatten() + self.fc1 = layers.Dense(1024, activation=tf.tanh) + self.fc2 = layers.Dense(1, activation=None) def call(self, inputs): """Return two logits per image estimating input authenticity. @@ -112,16 +113,16 @@ class Generator(tf.keras.Model): else: assert data_format == 'channels_last' self._pre_conv_shape = [-1, 6, 6, 128] - self.fc1 = tf.layers.Dense(6 * 6 * 128, activation=tf.tanh) + self.fc1 = layers.Dense(6 * 6 * 128, activation=tf.tanh) # In call(), we reshape the output of fc1 to _pre_conv_shape # Deconvolution layer. Resulting image shape: (batch, 14, 14, 64) - self.conv1 = tf.layers.Conv2DTranspose( + self.conv1 = layers.Conv2DTranspose( 64, 4, strides=2, activation=None, data_format=data_format) # Deconvolution layer. Resulting image shape: (batch, 28, 28, 1) - self.conv2 = tf.layers.Conv2DTranspose( + self.conv2 = layers.Conv2DTranspose( 1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format) def call(self, inputs): diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py index 6ab847cb78..4e1380afb2 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py +++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py @@ -32,6 +32,8 @@ import tensorflow as tf import tensorflow.contrib.eager as tfe +layers = tf.keras.layers + class LinearModel(tf.keras.Model): """A TensorFlow linear regression model.""" @@ -39,7 +41,7 @@ class LinearModel(tf.keras.Model): def __init__(self): """Constructs a LinearModel object.""" super(LinearModel, self).__init__() - self._hidden_layer = tf.layers.Dense(1) + self._hidden_layer = layers.Dense(1) def call(self, xs): """Invoke the linear model. diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py index 6b59413141..a28bc8a43d 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py @@ -28,6 +28,8 @@ import functools import tensorflow as tf +layers = tf.keras.layers + class _IdentityBlock(tf.keras.Model): """_IdentityBlock is the block that has no conv layer at shortcut. @@ -49,23 +51,23 @@ class _IdentityBlock(tf.keras.Model): bn_name_base = 'bn' + str(stage) + block + '_branch' bn_axis = 1 if data_format == 'channels_first' else 3 - self.conv2a = tf.layers.Conv2D( + self.conv2a = layers.Conv2D( filters1, (1, 1), name=conv_name_base + '2a', data_format=data_format) - self.bn2a = tf.layers.BatchNormalization( + self.bn2a = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '2a') - self.conv2b = tf.layers.Conv2D( + self.conv2b = layers.Conv2D( filters2, kernel_size, padding='same', data_format=data_format, name=conv_name_base + '2b') - self.bn2b = tf.layers.BatchNormalization( + self.bn2b = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '2b') - self.conv2c = tf.layers.Conv2D( + self.conv2c = layers.Conv2D( filters3, (1, 1), name=conv_name_base + '2c', data_format=data_format) - self.bn2c = tf.layers.BatchNormalization( + self.bn2c = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '2c') def call(self, input_tensor, training=False): @@ -113,34 +115,34 @@ class _ConvBlock(tf.keras.Model): bn_name_base = 'bn' + str(stage) + block + '_branch' bn_axis = 1 if data_format == 'channels_first' else 3 - self.conv2a = tf.layers.Conv2D( + self.conv2a = layers.Conv2D( filters1, (1, 1), strides=strides, name=conv_name_base + '2a', data_format=data_format) - self.bn2a = tf.layers.BatchNormalization( + self.bn2a = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '2a') - self.conv2b = tf.layers.Conv2D( + self.conv2b = layers.Conv2D( filters2, kernel_size, padding='same', name=conv_name_base + '2b', data_format=data_format) - self.bn2b = tf.layers.BatchNormalization( + self.bn2b = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '2b') - self.conv2c = tf.layers.Conv2D( + self.conv2c = layers.Conv2D( filters3, (1, 1), name=conv_name_base + '2c', data_format=data_format) - self.bn2c = tf.layers.BatchNormalization( + self.bn2c = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '2c') - self.conv_shortcut = tf.layers.Conv2D( + self.conv_shortcut = layers.Conv2D( filters3, (1, 1), strides=strides, name=conv_name_base + '1', data_format=data_format) - self.bn_shortcut = tf.layers.BatchNormalization( + self.bn_shortcut = layers.BatchNormalization( axis=bn_axis, name=bn_name_base + '1') def call(self, input_tensor, training=False): @@ -219,15 +221,15 @@ class ResNet50(tf.keras.Model): return _IdentityBlock( 3, filters, stage=stage, block=block, data_format=data_format) - self.conv1 = tf.layers.Conv2D( + self.conv1 = layers.Conv2D( 64, (7, 7), strides=(2, 2), data_format=data_format, padding='same', name='conv1') bn_axis = 1 if data_format == 'channels_first' else 3 - self.bn_conv1 = tf.layers.BatchNormalization(axis=bn_axis, name='bn_conv1') - self.max_pool = tf.layers.MaxPooling2D( + self.bn_conv1 = layers.BatchNormalization(axis=bn_axis, name='bn_conv1') + self.max_pool = layers.MaxPooling2D( (3, 3), strides=(2, 2), data_format=data_format) self.l2a = conv_block([64, 64, 256], stage=2, block='a', strides=(1, 1)) @@ -250,11 +252,12 @@ class ResNet50(tf.keras.Model): self.l5b = id_block([512, 512, 2048], stage=5, block='b') self.l5c = id_block([512, 512, 2048], stage=5, block='c') - self.avg_pool = tf.layers.AveragePooling2D( + self.avg_pool = layers.AveragePooling2D( (7, 7), strides=(7, 7), data_format=data_format) if self.include_top: - self.fc1000 = tf.layers.Dense(classes, name='fc1000') + self.flatten = layers.Flatten() + self.fc1000 = layers.Dense(classes, name='fc1000') else: reduction_indices = [1, 2] if data_format == 'channels_last' else [2, 3] reduction_indices = tf.constant(reduction_indices) @@ -298,7 +301,7 @@ class ResNet50(tf.keras.Model): x = self.avg_pool(x) if self.include_top: - return self.fc1000(tf.layers.flatten(x)) + return self.fc1000(self.flatten(x)) elif self.global_pooling: return self.global_pooling(x) else: diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py index 88fffc962f..492adbe1d8 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py @@ -73,6 +73,8 @@ try: except ImportError: HAS_MATPLOTLIB = False +layers = tf.keras.layers + def parse(line): """Parse a line from the colors dataset.""" @@ -152,7 +154,7 @@ class RNNColorbot(tf.keras.Model): self.cells = self._add_cells( [tf.nn.rnn_cell.BasicLSTMCell(size) for size in rnn_cell_sizes]) - self.relu = tf.layers.Dense( + self.relu = layers.Dense( label_dimension, activation=tf.nn.relu, name="relu") def call(self, inputs, training=False): @@ -204,7 +206,7 @@ class RNNColorbot(tf.keras.Model): def _add_cells(self, cells): # "Magic" required for keras.Model classes to track all the variables in - # a list of tf.layers.Layer objects. + # a list of layers.Layer objects. # TODO(ashankar): Figure out API so user code doesn't have to do this. for i, c in enumerate(cells): setattr(self, "cell-%d" % i, c) diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py index 69cd16d12c..a90048d813 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py @@ -38,6 +38,8 @@ import tensorflow as tf from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn from tensorflow.contrib.eager.python import tfe +layers = tf.keras.layers + class RNN(tf.keras.Model): """A static RNN. @@ -74,14 +76,14 @@ class RNN(tf.keras.Model): def _add_cells(self, cells): # "Magic" required for keras.Model classes to track all the variables in - # a list of tf.layers.Layer objects. + # a list of Layer objects. # TODO(ashankar): Figure out API so user code doesn't have to do this. for i, c in enumerate(cells): setattr(self, "cell-%d" % i, c) return cells -class Embedding(tf.layers.Layer): +class Embedding(layers.Layer): """An Embedding layer.""" def __init__(self, vocab_size, embedding_dim, **kwargs): @@ -132,7 +134,7 @@ class PTBModel(tf.keras.Model): else: self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio) - self.linear = tf.layers.Dense( + self.linear = layers.Dense( vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1)) self._output_shape = [-1, embedding_dim] -- GitLab From d8eda53c488683b37ae60e2ecbdf0bd2fd47c8c1 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Sat, 24 Mar 2018 00:24:50 -0700 Subject: [PATCH 472/960] Misc typo fixes in the XLA sources and docs. PiperOrigin-RevId: 190322644 --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 2 +- tensorflow/compiler/xla/service/algebraic_simplifier.h | 4 ++-- tensorflow/compiler/xla/service/compiler.h | 2 +- tensorflow/compiler/xla/service/while_loop_simplifier.h | 2 +- tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 02b23c2d14..f9fabd8a35 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -302,7 +302,7 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { // Disable dot strength reduction on platforms where it causes a slowdown. bool enable_dot_strength_reduction_; - // Disable convolution simplication on platforms where it causes a slowdown. + // Disable convolution simplification on platforms where it causes a slowdown. bool enable_conv_simplification_; }; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index f0590943be..c48196e861 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -57,10 +57,10 @@ class AlgebraicSimplifier : public HloPassInterface { bool is_layout_sensitive_; ValidBitcastCallback valid_bitcast_callback_; - // Enable dot simplication on platforms where it is profitable. + // Enable dot simplification on platforms where it is profitable. bool enable_dot_strength_reduction_; - // Enable convolution simplication on platforms where it is profitable. + // Enable convolution simplification on platforms where it is profitable. bool enable_conv_simplification_; }; diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 33e19efc72..b4b53ae2ed 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -127,7 +127,7 @@ class Compiler { // Compiles the HLO module for execution on a device given by the executor, // and returns an executable object or an error status. No HLO passes are // applied to module. Generally a module should be passed through RunHloPasses - // prior to calling this method because the some HLO passes are required for + // prior to calling this method because some HLO passes are required for // correctness. Takes ownership of the HLO module and is free to transform it. // // The compiler may optionally specialize to the individual device diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.h b/tensorflow/compiler/xla/service/while_loop_simplifier.h index d3d55634c9..3d3e1d60f2 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.h +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.h @@ -25,7 +25,7 @@ namespace xla { // HLO pass that makes the following transformations on while loops: // // - A while loop with static trip count of 0 is deleted. -// - A while loops with static trip count of 1 is replaced by its body (sans +// - A while loop with static trip count of 1 is replaced by its body (sans // loop). // - Elements of a while loop's tuple that the loop doesn't use are removed // from the tuple. diff --git a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h index 063e312df6..8763e588c4 100644 --- a/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h +++ b/tensorflow/compiler/xla/service/zero_sized_hlo_elimination.h @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" -// HLO pass that replaces zero sized Hlos with an zero sized constant literal. +// HLO pass that replaces zero sized Hlos with a zero sized constant literal. namespace xla { class ZeroSizedHloElimination : public HloPassInterface { public: -- GitLab From 1aa398fe9801bca4dd8e19c255634d93bc9f5456 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Sat, 24 Mar 2018 23:42:08 -0700 Subject: [PATCH 473/960] Build and import rules for distributed strategy PiperOrigin-RevId: 190367484 --- tensorflow/tools/docs/generate_lib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 34dd419f15..d22a465376 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -211,6 +211,7 @@ def _get_default_do_not_descend_map(): 'tf': ['cli', 'lib', 'wrappers'], 'tf.contrib': [ 'compiler', + 'distribute', 'grid_rnn', # Block contrib.keras to de-clutter the docs 'keras', -- GitLab From 3e4df091fd099170ccb9737be3747b9542a85669 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 25 Mar 2018 09:38:54 -0700 Subject: [PATCH 474/960] Restore dependencies that are needed by the PIP package builder PiperOrigin-RevId: 190387090 --- .../contrib/boosted_trees/estimator_batch/BUILD | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index dae402204f..dcd235f876 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -13,20 +13,23 @@ load("//tensorflow:tensorflow.bzl", "py_test") filegroup( name = "all_files", srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], + include = ["**/*"], + exclude = ["**/OWNERS"], ), visibility = ["//tensorflow:__subpackages__"], ) py_library( name = "init_py", - srcs = [ - "__init__.py", - ], + srcs = ["__init__.py"], srcs_version = "PY2AND3", + deps = [ + "custom_export_strategy", + ":custom_loss_head", + ":estimator", + ":model", + ":trainer_hooks", + ], ) py_library( -- GitLab From 6c1737e6c8c9e5db405853178fb5e42abc080ba3 Mon Sep 17 00:00:00 2001 From: brett koonce Date: Sun, 25 Mar 2018 11:49:51 -0700 Subject: [PATCH 475/960] contrib/factorization: minor spelling tweaks (#17992) --- .../factorization/kernels/clustering_ops.cc | 2 +- .../factorization/python/ops/factorization_ops.py | 14 +++++++------- .../python/ops/factorization_ops_test.py | 8 ++++---- .../contrib/factorization/python/ops/gmm_ops.py | 4 ++-- .../contrib/factorization/python/ops/gmm_test.py | 2 +- .../factorization/python/ops/kmeans_test.py | 4 ++-- .../contrib/factorization/python/ops/wals.py | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/factorization/kernels/clustering_ops.cc b/tensorflow/contrib/factorization/kernels/clustering_ops.cc index dd61f59585..2a6c97e8b9 100644 --- a/tensorflow/contrib/factorization/kernels/clustering_ops.cc +++ b/tensorflow/contrib/factorization/kernels/clustering_ops.cc @@ -353,7 +353,7 @@ class NearestNeighborsOp : public OpKernel { auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); const int64 num_threads = worker_threads.num_threads; // This kernel might be configured to use fewer than the total number of - // available CPUs on the host machine. To avoid descructive interference + // available CPUs on the host machine. To avoid destructive interference // with other jobs running on the host machine, we must only use a fraction // of total available L3 cache. Unfortunately, we cannot query the host // machine to get the number of physical CPUs. So, we use a fixed per-CPU diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops.py b/tensorflow/contrib/factorization/python/ops/factorization_ops.py index 054888e734..8e0ed1d80e 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py @@ -106,7 +106,7 @@ class WALSModel(object): # the prep_gramian_op for row(column) can be run. worker_init_op = model.worker_init - # To be run once per interation sweep before the row(column) update + # To be run once per integration sweep before the row(column) update # initialize ops can be run. Note that in the distributed training # situations, this should only be run by the chief trainer. All other # trainers need to block until this is done. @@ -118,9 +118,9 @@ class WALSModel(object): init_row_update_op = model.initialize_row_update_op init_col_update_op = model.initialize_col_update_op - # Ops to upate row(column). This can either take the entire sparse tensor - # or slices of sparse tensor. For distributed trainer, each trainer - # handles just part of the matrix. + # Ops to update row(column). This can either take the entire sparse + # tensor or slices of sparse tensor. For distributed trainer, each + # trainer handles just part of the matrix. _, row_update_op, unreg_row_loss, row_reg, _ = model.update_row_factors( sp_input=matrix_slices_from_queue_for_worker_shard) row_loss = unreg_row_loss + row_reg @@ -220,7 +220,7 @@ class WALSModel(object): in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of inner lists matching the number of row factor shards and the elements in each inner list are the weights for the rows of the corresponding row - factor shard. In this case, w_ij = unonbserved_weight + + factor shard. In this case, w_ij = unobserved_weight + row_weights[i] * col_weights[j]. - If this is a single non-negative real number, this value is used for all row weights and w_ij = unobserved_weight + row_weights * @@ -435,7 +435,7 @@ class WALSModel(object): gramian: Variable storing the gramian calculated from the factors. Returns: - A op that updates the gramian with the calcuated value from the factors. + A op that updates the gramian with the calculated value from the factors. """ partial_gramians = [] for f in factors: @@ -564,7 +564,7 @@ class WALSModel(object): Note that specifically this initializes the cache of the row and column weights on workers when `use_factors_weights_cache` is True. In this case, - if these weights are being calcualted and reset after the object is created, + if these weights are being calculated and reset after the object is created, it is important to ensure this ops is run afterwards so the cache reflects the correct values. """ diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py index c813733915..002f9cfbdd 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py @@ -210,7 +210,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reprodue the same row factors in the model as the + # This is expected to reproduce the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -283,7 +283,7 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 3 column feature vectors. - # This is expected to reprodue the same column factors in the model as the + # This is expected to reproduce the same column factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( @@ -385,7 +385,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reprodue the same row factors in the model as the + # This is expected to reproduce the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -462,7 +462,7 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 2 column feature vectors. - # This is expected to reprodue the same column factors in the model as the + # This is expected to reproduce the same column factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index 98d6434f47..14d4c733e3 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -280,7 +280,7 @@ class GmmAlgorithm(object): self._define_score_samples() def _define_full_covariance_probs(self, shard_id, shard): - """Defines the full covariance probabilties per example in a class. + """Defines the full covariance probabilities per example in a class. Updates a matrix with dimension num_examples X num_classes. @@ -344,7 +344,7 @@ class GmmAlgorithm(object): def _define_prior_log_prob_operation(self, shard_id): """Computes the prior probability of all samples. - Updates a vector where each item is the prior probabibility of an + Updates a vector where each item is the prior probability of an input example. Args: diff --git a/tensorflow/contrib/factorization/python/ops/gmm_test.py b/tensorflow/contrib/factorization/python/ops/gmm_test.py index 00a4734eb6..4fc9c96e9d 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_test.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_test.py @@ -210,7 +210,7 @@ class GMMTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependendent on inputs, if input + # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py index 0103cc4439..88eb9cf692 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py @@ -413,7 +413,7 @@ class KMeansCosineDistanceTest(KMeansTestBase): self.assertAllClose(score, self.true_score, atol=1e-2) def test_predict_kmeans_plus_plus(self): - # Most points are concetrated near one center. KMeans++ is likely to find + # Most points are concentrated near one center. KMeans++ is likely to find # the less populated centers. points = np.array( [[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], @@ -604,7 +604,7 @@ class KMeansTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependendent on inputs, if input + # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index 4fe22ea26e..62db3bb4c4 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -235,7 +235,7 @@ def _wals_factorization_model_function(features, labels, mode, params): num_items: An integer, the total number of items of this axis. update_fn: A function that takes one argument (`sp_input`), and that returns a tuple of - * new_factors: A flot Tensor of the factor values after update. + * new_factors: A float Tensor of the factor values after update. * update_op: a TensorFlow op which updates the factors. * loss: A float Tensor, the unregularized loss. * reg_loss: A float Tensor, the regularization loss. -- GitLab From 6645609dffd4bfeb33d4d7250ad8e06935c39e82 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 25 Mar 2018 11:51:33 -0700 Subject: [PATCH 476/960] Add skeleton code for DebugStripper. PiperOrigin-RevId: 190391193 --- tensorflow/core/grappler/optimizers/BUILD | 32 ++++++++++++++ .../grappler/optimizers/debug_stripper.cc | 36 +++++++++++++++ .../core/grappler/optimizers/debug_stripper.h | 43 ++++++++++++++++++ .../optimizers/debug_stripper_test.cc | 44 +++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 14 +++++- .../core/grappler/utils/grappler_test.cc | 1 + .../core/protobuf/rewriter_config.proto | 2 + 7 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/debug_stripper.cc create mode 100644 tensorflow/core/grappler/optimizers/debug_stripper.h create mode 100644 tensorflow/core/grappler/optimizers/debug_stripper_test.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 92f7cce502..601984fcfd 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -500,6 +500,7 @@ cc_library( ":constant_folding", ":custom_graph_optimizer", ":custom_graph_optimizer_registry", + ":debug_stripper", ":dependency_optimizer", ":function_optimizer", ":graph_optimizer", @@ -618,3 +619,34 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +cc_library( + name = "debug_stripper", + srcs = ["debug_stripper.cc"], + hdrs = [ + "debug_stripper.h", + ], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:graph_optimizer", + ], +) + +tf_cuda_cc_test( + name = "debug_stripper_test", + size = "small", + srcs = ["debug_stripper_test.cc"], + deps = [ + ":debug_stripper", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler/utils:grappler_test", + ], +) diff --git a/tensorflow/core/grappler/optimizers/debug_stripper.cc b/tensorflow/core/grappler/optimizers/debug_stripper.cc new file mode 100644 index 0000000000..461f1aa2fb --- /dev/null +++ b/tensorflow/core/grappler/optimizers/debug_stripper.cc @@ -0,0 +1,36 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/debug_stripper.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" + +namespace tensorflow { +namespace grappler { + +Status DebugStripper::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) { + // TODO(haoliang): Let's remove assertions here. + *output = item.graph; + return Status::OK(); +} + +void DebugStripper::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) { + // Takes no feedback. +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/debug_stripper.h b/tensorflow/core/grappler/optimizers/debug_stripper.h new file mode 100644 index 0000000000..1fe25aa1c3 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/debug_stripper.h @@ -0,0 +1,43 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEBUG_STRIPPER_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEBUG_STRIPPER_H_ + +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +// DebugStripper strips off debug-related nodes (e.g. +// Assert, CheckNumerics, Print) from the graph. +class DebugStripper : public GraphOptimizer { + public: + DebugStripper() {} + ~DebugStripper() override {} + + string name() const override { return "debug_stripper"; }; + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DEBUG_STRIPPER_H_ diff --git a/tensorflow/core/grappler/optimizers/debug_stripper_test.cc b/tensorflow/core/grappler/optimizers/debug_stripper_test.cc new file mode 100644 index 0000000000..d2cabc0798 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/debug_stripper_test.cc @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/debug_stripper.h" + +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class DebugStripperTest : public GrapplerTest {}; + +// TODO(haoliang): Add tests for different removal operations. +TEST_F(DebugStripperTest, OutputEqualToInput) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto c = ops::Const(s.WithOpName("c"), 0, {}); + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + DebugStripper optimizer; + GraphDef output; + TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 6eb2bbc547..47ec16226b 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/auto_parallel.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/debug_stripper.h" #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" #include "tensorflow/core/grappler/optimizers/function_optimizer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" @@ -84,6 +85,9 @@ std::unique_ptr MetaOptimizer::NewOptimizer( graph_optimizer.reset( new DependencyOptimizer(cfg_.dependency_optimization())); } + if (optimizer == "debug_stripper") { + graph_optimizer.reset(new DebugStripper()); + } return graph_optimizer; } @@ -134,10 +138,15 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new AutoParallel(cfg_.auto_parallel().num_replicas()))); } + if (cfg_.debug_stripper() == RewriterConfig::ON) { + optimizers.push_back( + std::unique_ptr(new DebugStripper())); + } } else { const std::set available_optimizers = { - "pruning", "function", "constfold", "layout", "memory", - "autoparallel", "arithmetic", "loop", "dependency"}; + "pruning", "function", "constfold", "layout", + "memory", "autoparallel", "arithmetic", "loop", + "dependency", "debug_stripper"}; std::vector custom_optimizer_names; for (const auto& optimizer_name : cfg_.optimizers()) { if (available_optimizers.find(optimizer_name) != @@ -238,6 +247,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.dependency_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || + cfg.debug_stripper() == RewriterConfig::ON || !cfg.optimizers().empty(); } diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index 1c15ea65b8..ee126f4955 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -36,6 +36,7 @@ GrapplerTest::GrapplerTest() { cfg->set_loop_optimization(RewriterConfig::OFF); cfg->set_function_optimization(RewriterConfig::OFF); cfg->set_layout_optimizer(RewriterConfig::OFF); + cfg->set_debug_stripper(RewriterConfig::OFF); } std::vector GrapplerTest::EvaluateNodes( diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index fdf16aa1da..bb772460b0 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -46,6 +46,8 @@ message RewriterConfig { Toggle loop_optimization = 9; // Function optimizations (default is ON). Toggle function_optimization = 10; + // Strips debug-related nodes from the graph (off by default). + Toggle debug_stripper = 11; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From 8561c30ea6538083248b653237754138695702af Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Sun, 25 Mar 2018 19:35:54 -0700 Subject: [PATCH 477/960] Use compat.as_bytes() instead of str.encode(). PiperOrigin-RevId: 190409217 --- tensorflow/python/framework/ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index f264e38102..5e4a884a70 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2477,8 +2477,9 @@ def _set_shapes_for_outputs_c_api(op): serialized = c_api.ResourceHandleShapeAndType(op._graph._c_graph, output._as_tf_output()) if serialized: - output._handle_data = (cpp_shape_inference_pb2.CppShapeInferenceResult - .HandleData.FromString(serialized.encode())) + output._handle_data = ( + cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString( + compat.as_bytes(serialized))) else: output._handle_data = None -- GitLab From 9d9ea88abd63d2c317e445e54a4f9c90d747343a Mon Sep 17 00:00:00 2001 From: Petros Mol Date: Sun, 25 Mar 2018 20:13:13 -0700 Subject: [PATCH 478/960] Minor Error type and documentation fix. PiperOrigin-RevId: 190411045 --- tensorflow/python/estimator/canned/head.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index c9635a9c27..bb033d3495 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -887,11 +887,12 @@ def _binary_logistic_head_with_sigmoid_cross_entropy_loss( Raises: ValueError: If `thresholds` contains a value outside of `(0, 1)`. ValueError: If `loss_reduction` is invalid. + TypeError: if `label_vocabulary` has invalid type. """ thresholds = tuple(thresholds) if thresholds else tuple() if label_vocabulary is not None and not isinstance(label_vocabulary, (list, tuple)): - raise ValueError( + raise TypeError( 'label_vocabulary should be a list or tuple. Given type: {}'.format( type(label_vocabulary))) -- GitLab From 668f182b1fdfc31568a44fe650324fe2ddedbbe1 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Sun, 25 Mar 2018 21:57:09 -0700 Subject: [PATCH 479/960] Always cast `tf.distributions.Distribution` `_event_shape`, `_batch_shape`. PiperOrigin-RevId: 190415923 --- tensorflow/python/ops/distributions/distribution.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py index c055ca43e8..0866fa8b0b 100644 --- a/tensorflow/python/ops/distributions/distribution.py +++ b/tensorflow/python/ops/distributions/distribution.py @@ -593,7 +593,7 @@ class Distribution(_BaseDistribution): Returns: batch_shape: `TensorShape`, possibly unknown. """ - return self._batch_shape() + return tensor_shape.as_shape(self._batch_shape()) def _event_shape_tensor(self): raise NotImplementedError("event_shape_tensor is not implemented") @@ -626,7 +626,7 @@ class Distribution(_BaseDistribution): Returns: event_shape: `TensorShape`, possibly unknown. """ - return self._event_shape() + return tensor_shape.as_shape(self._event_shape()) def is_scalar_event(self, name="is_scalar_event"): """Indicates that `event_shape == []`. -- GitLab From c3436d6757a77ab1fefd3f6000a1e961a9ab9881 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sun, 25 Mar 2018 22:02:09 -0700 Subject: [PATCH 480/960] Disable flaky prefetching_ops_test. PiperOrigin-RevId: 190416108 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index f70b29c43b..8cfe4a727a 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -479,6 +479,10 @@ py_test( size = "small", srcs = ["prefetching_ops_test.py"], srcs_version = "PY2AND3", + tags = [ + "manual", + "no_oss", + ], deps = [ "//tensorflow/contrib/data/python/ops:prefetching_ops", "//tensorflow/core:protos_all_py", -- GitLab From a5a1e9e43131b387395930f38234fc10b02d874b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 08:52:53 -0700 Subject: [PATCH 481/960] Updated test (but not source) of https://www.tensorflow.org/api_docs/python/tf/contrib/training/HParams to show that it allows '=' in the values. PiperOrigin-RevId: 190470578 --- .../training/python/training/hparam_test.py | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py index 16397622ed..96eff86d8d 100644 --- a/tensorflow/contrib/training/python/training/hparam_test.py +++ b/tensorflow/contrib/training/python/training/hparam_test.py @@ -38,40 +38,60 @@ class HParamsTest(test.TestCase): self.assertFalse('bar' in hparams) def testSomeValues(self): - hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6') - self.assertDictEqual({'aaa': 1, 'b': 2.0, 'c_c': 'relu6'}, hparams.values()) - expected_str = '[(\'aaa\', 1), (\'b\', 2.0), (\'c_c\', \'relu6\')]' + hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d='/a/b=c/d') + self.assertDictEqual( + {'aaa': 1, 'b': 2.0, 'c_c': 'relu6', 'd': '/a/b=c/d'}, + hparams.values()) + expected_str = ('[(\'aaa\', 1), (\'b\', 2.0), (\'c_c\', \'relu6\'), ' + '(\'d\', \'/a/b=c/d\')]') self.assertEqual(expected_str, str(hparams.__str__())) self.assertEqual(expected_str, str(hparams)) self.assertEqual(1, hparams.aaa) self.assertEqual(2.0, hparams.b) self.assertEqual('relu6', hparams.c_c) + self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('aaa=12') self.assertDictEqual({ 'aaa': 12, 'b': 2.0, - 'c_c': 'relu6' + 'c_c': 'relu6', + 'd': '/a/b=c/d' }, hparams.values()) self.assertEqual(12, hparams.aaa) self.assertEqual(2.0, hparams.b) self.assertEqual('relu6', hparams.c_c) + self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('c_c=relu4, b=-2.0e10') self.assertDictEqual({ 'aaa': 12, 'b': -2.0e10, - 'c_c': 'relu4' + 'c_c': 'relu4', + 'd': '/a/b=c/d' }, hparams.values()) self.assertEqual(12, hparams.aaa) self.assertEqual(-2.0e10, hparams.b) self.assertEqual('relu4', hparams.c_c) + self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('c_c=,b=0,') - self.assertDictEqual({'aaa': 12, 'b': 0, 'c_c': ''}, hparams.values()) + self.assertDictEqual({'aaa': 12, 'b': 0, 'c_c': '', 'd': '/a/b=c/d'}, + hparams.values()) self.assertEqual(12, hparams.aaa) self.assertEqual(0.0, hparams.b) self.assertEqual('', hparams.c_c) + self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('c_c=2.3",b=+2,') self.assertEqual(2.0, hparams.b) self.assertEqual('2.3"', hparams.c_c) + hparams.parse('d=/a/b/c/d,aaa=11,') + self.assertEqual(11, hparams.aaa) + self.assertEqual(2.0, hparams.b) + self.assertEqual('2.3"', hparams.c_c) + self.assertEqual('/a/b/c/d', hparams.d) + hparams.parse('b=1.5,d=/a=b/c/d,aaa=10,') + self.assertEqual(10, hparams.aaa) + self.assertEqual(1.5, hparams.b) + self.assertEqual('2.3"', hparams.c_c) + self.assertEqual('/a=b/c/d', hparams.d) with self.assertRaisesRegexp(ValueError, 'Unknown hyperparameter'): hparams.parse('x=123') with self.assertRaisesRegexp(ValueError, 'Could not parse'): @@ -84,17 +104,19 @@ class HParamsTest(test.TestCase): hparams.parse('b=relu') with self.assertRaisesRegexp(ValueError, 'Must not pass a list'): hparams.parse('aaa=[123]') - self.assertEqual(12, hparams.aaa) - self.assertEqual(2.0, hparams.b) + self.assertEqual(10, hparams.aaa) + self.assertEqual(1.5, hparams.b) self.assertEqual('2.3"', hparams.c_c) + self.assertEqual('/a=b/c/d', hparams.d) # Exports to proto. hparam_def = hparams.to_proto() # Imports from proto. hparams2 = hparam.HParams(hparam_def=hparam_def) # Verifies that all hparams are restored. - self.assertEqual(12, hparams2.aaa) - self.assertEqual(2.0, hparams2.b) + self.assertEqual(10, hparams2.aaa) + self.assertEqual(1.5, hparams2.b) self.assertEqual('2.3"', hparams2.c_c) + self.assertEqual('/a=b/c/d', hparams2.d) def testSetFromMap(self): hparams = hparam.HParams(a=1, b=2.0, c='tanh') -- GitLab From 2b078a508b8c6c920db121f676650d7972749bd7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 10:00:20 -0700 Subject: [PATCH 482/960] Automated g4 rollback of changelist 190293303 PiperOrigin-RevId: 190479555 --- tensorflow/c/BUILD | 2 -- tensorflow/c/python_api.cc | 26 --------------- tensorflow/c/python_api.h | 7 ---- tensorflow/python/BUILD | 2 -- tensorflow/python/client/tf_session.i | 1 - tensorflow/python/framework/importer_test.py | 34 -------------------- tensorflow/python/framework/ops.py | 10 ------ 7 files changed, 82 deletions(-) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index f4a486d330..d096647558 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -279,8 +279,6 @@ tf_cuda_library( deps = [ ":c_api", ":c_api_internal", - # TODO(b/74620627): remove when _USE_C_SHAPES is removed - "//tensorflow/python:cpp_shape_inference_proto_cc", ], ) diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index 93155998b8..cd604538f1 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/c/python_api.h" #include "tensorflow/c/c_api_internal.h" -#include "tensorflow/python/framework/cpp_shape_inference.pb.h" namespace tensorflow { @@ -110,29 +109,4 @@ void ExtendSession(TF_Session* session, TF_Status* status) { session->extend_before_run = false; } -std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) { - Node* node = &output.oper->node; - CppShapeInferenceResult::HandleData handle_data; - handle_data.set_is_set(true); - { - mutex_lock l(graph->mu); - tensorflow::shape_inference::InferenceContext* ic = - graph->refiner.GetContext(node); - CHECK(ic != nullptr); - CHECK_LT(output.index, ic->num_outputs()); - const auto* shapes_and_types = - ic->output_handle_shapes_and_types(output.index); - if (shapes_and_types == nullptr) return ""; - - for (const auto& p : *shapes_and_types) { - auto* out_shape_and_type = handle_data.add_shape_and_type(); - ic->ShapeHandleToProto(p.shape, out_shape_and_type->mutable_shape()); - out_shape_and_type->set_dtype(p.dtype); - } - } - string result; - handle_data.SerializeToString(&result); - return result; -} - } // namespace tensorflow diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h index 2d4c8cd9ed..13b680b3a2 100644 --- a/tensorflow/c/python_api.h +++ b/tensorflow/c/python_api.h @@ -16,8 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_C_PYTHON_API_H_ #define TENSORFLOW_C_PYTHON_API_H_ -#include - #include "tensorflow/c/c_api.h" // These functions can be removed without notice. They exist to facilitate some @@ -53,11 +51,6 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require); // the graph after the session has been made aware of them. void ExtendSession(TF_Session* session, TF_Status* status); -// Returns the serialized CppShapeInferenceResult::HandleData proto for -// `output` if its a resource tensor, or otherwise returns the empty string. -// TODO(b/74620627): remove when _USE_C_SHAPES is removed -std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output); - } // namespace tensorflow #endif // TENSORFLOW_C_PYTHON_API_H_ diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index e6ad564ede..30ecc477f2 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3131,8 +3131,6 @@ tf_proto_library( srcs = ["framework/cpp_shape_inference.proto"], cc_api_version = 2, protodeps = tf_additional_all_protos(), - # TODO(b/74620627): remove when _USE_C_SHAPES is removed - visibility = ["//tensorflow:internal"], ) py_test( diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 70a3d032f4..e88fc0c01a 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -723,7 +723,6 @@ def TF_Reset(target, containers=None, config=None): %unignore TF_TryEvaluateConstant_wrapper; %noexception TF_TryEvaluateConstant_wrapper; %unignore ExtendSession; -%unignore ResourceHandleShapeAndType; %include "tensorflow/python/client/tf_session_helper.h" diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 369669c2e6..6593b17184 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -39,7 +39,6 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -357,39 +356,6 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d._input_types, [dtypes.int32_ref, dtypes.int32]) self.assertEqual(d.outputs, []) - def testResources(self): - # Produce GraphDef containing a ops producing and consuming resources. - graph = ops.Graph() - with graph.as_default(): - var = resource_variable_ops.ResourceVariable(1.0) - var_assign = var.assign(2.0) - # Use an op that requires handle shape to be set. - var_shape = resource_variable_ops.variable_shape(var.handle) - init = variables.global_variables_initializer() - graph_def = graph.as_graph_def() - - # Import the GraphDef. - with ops.Graph().as_default(): - # pylint: disable=unused-variable - imported_var, imported_assign, imported_shape, imported_init = ( - importer.import_graph_def( - graph_def, - return_elements=[var.name, var_assign.name, var_shape.name, - init.name])) - - # Make sure the handle shape is set on the imported variable. - new_var_shape = resource_variable_ops.variable_shape(imported_var) - # pylint: enable=unused-variable - - # Run the imported graph. - # TODO(b/76173421): make this work (currently DCHECKS) - # with self.test_session() as sess: - # sess.run(imported_init) - # self.assertEqual(sess.run(imported_var), 1.0) - # self.assertEqual(sess.run(imported_assign), 2.0) - # self.assertEqual(list(sess.run(imported_shape)), []) - # self.assertEqual(list(sess.run(new_var_shape)), []) - def testWhileLoop(self): # Produce GraphDef containing while loop. graph = ops.Graph() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5e4a884a70..e579289a8d 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -42,7 +42,6 @@ from tensorflow.python.eager import context from tensorflow.python.eager import core from tensorflow.python.eager import tape from tensorflow.python.framework import c_api_util -from tensorflow.python.framework import cpp_shape_inference_pb2 from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -296,7 +295,6 @@ class Tensor(_TensorLike): # Attributes used for C++ shape inference. Not inspected, only forwarded. # If set, will be a HandleData object from cpp_shape_inference.proto. - # TODO(b/74620627): remove when _USE_C_SHAPES is removed self._handle_data = None self._id = uid() @@ -2474,14 +2472,6 @@ def _set_shapes_for_outputs_c_api(op): shape_vector = [None if d == -1 else d for d in shape_vector] output.set_shape(tensor_shape.TensorShape(shape_vector)) - serialized = c_api.ResourceHandleShapeAndType(op._graph._c_graph, - output._as_tf_output()) - if serialized: - output._handle_data = ( - cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString( - compat.as_bytes(serialized))) - else: - output._handle_data = None # TODO(skyewm): remove this when _USE_C_API flag is removed. def _set_shapes_for_outputs(op): -- GitLab From cc6b2ae837e9c0ce3678671ff5bd59f0f8e53e06 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Mon, 26 Mar 2018 10:25:46 -0700 Subject: [PATCH 483/960] Adding a FunctionBufferingResourceReset Op that resets the state of the function buffering resource so that we can start using it with re-initializable iterators. PiperOrigin-RevId: 190484110 --- .../data/kernels/prefetching_kernels.cc | 66 +++++++-- tensorflow/contrib/data/ops/dataset_ops.cc | 9 ++ .../kernel_tests/prefetching_ops_test.py | 137 +++++++++++++++--- .../data/python/ops/prefetching_ops.py | 5 + 4 files changed, 184 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index 190a6ee580..79d1fc3494 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -65,12 +65,6 @@ class FunctionBufferingResource : public ResourceBase { ~FunctionBufferingResource() override { Cancel(); - { - mutex_lock l(mu_); - while (is_buffering_) { - cond_var_.wait(l); - } - } if (thread_pool_ != nullptr) { delete thread_pool_; } @@ -107,6 +101,20 @@ class FunctionBufferingResource : public ResourceBase { void Cancel() LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); cancelled_ = true; + while (is_buffering_) { + cond_var_.wait(l); + } + } + + // Cancels all pending operations and then clears out the state. + void Reset() LOCKS_EXCLUDED(mu_) { + Cancel(); + mutex_lock l(mu_); + buffer_.clear(); + requests_.clear(); + is_buffering_ = false; + end_of_sequence_ = false; + cancelled_ = false; } // If the buffer has anything, runs `callback` on the first element in the @@ -200,13 +208,12 @@ class FunctionBufferingResource : public ResourceBase { mutex_lock l(mu_); BufferElement buffer_element; buffer_element.status = status; - if (!status.ok()) { + if (status.ok()) { + buffer_element.value.swap(*rets); + } else { end_of_sequence_ = true; is_buffering_ = false; - buffer_.push_back(std::move(buffer_element)); - return; } - buffer_element.value.swap(*rets); buffer_.push_back(std::move(buffer_element)); if (!requests_.empty()) { buffer_front = std::move(buffer_.front()); @@ -214,7 +221,7 @@ class FunctionBufferingResource : public ResourceBase { callback = std::move(requests_.front()); requests_.pop_front(); } - if (buffer_.size() < buffer_size_) { + if (buffer_.size() < buffer_size_ && !end_of_sequence_) { restart_buffering = true; } else { is_buffering_ = false; @@ -406,6 +413,43 @@ REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceGetNext") FunctionBufferingResourceGetNextOp); #endif // TENSORFLOW_USE_SYCL +// Resets the FunctionBufferingResource, cancelling all pending requests and +// clearing out the buffer. +class FunctionBufferingResourceResetOp : public OpKernel { + public: + explicit FunctionBufferingResourceResetOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + ~FunctionBufferingResourceResetOp() override {} + + void Compute(OpKernelContext* ctx) override { + ResourceHandle handle; + OP_REQUIRES_OK(ctx, + HandleFromInput(ctx, "function_buffer_resource", &handle)); + FunctionBufferingResource* buffer = nullptr; + OP_REQUIRES_OK( + ctx, LookupResource(ctx, handle, &buffer)); + core::ScopedUnref s(buffer); + + buffer->Reset(); + } +}; + +REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceReset") + .Device(DEVICE_CPU) + .HostMemory("function_buffer_resource"), + FunctionBufferingResourceResetOp); +REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceReset") + .Device(DEVICE_GPU) + .HostMemory("function_buffer_resource"), + FunctionBufferingResourceResetOp); +#if TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER(Name("FunctionBufferingResourceReset") + .Device(DEVICE_SYCL) + .HostMemory("function_buffer_resource"), + FunctionBufferingResourceResetOp); +#endif // TENSORFLOW_USE_SYCL + class IteratorGetDeviceOp : public OpKernel { public: using OpKernel::OpKernel; diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc index 74737bbcad..bd96448d64 100644 --- a/tensorflow/contrib/data/ops/dataset_ops.cc +++ b/tensorflow/contrib/data/ops/dataset_ops.cc @@ -83,6 +83,15 @@ output: A list of return values. output_types: The type list for the return values. )doc"); +REGISTER_OP("FunctionBufferingResourceReset") + .Input("function_buffer_resource: resource") + .SetShapeFn(shape_inference::UnknownShape) + .Doc(R"doc( +Resets the FunctionBufferingResource. + +function_buffer_resource: The FunctionBufferingResource handle. +)doc"); + REGISTER_OP("ThreadPoolDataset") .Input("input_dataset: variant") .Input("thread_pool: resource") diff --git a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py index 1d74afe1e1..a14736ac09 100644 --- a/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py @@ -17,7 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import itertools import threading from tensorflow.contrib.data.python.ops import prefetching_ops @@ -39,25 +38,29 @@ class StagingAreaOpsTest(test.TestCase): def setUp(self): self._event = threading.Event() - def _prefetch_fn_helper(self, buffer_name, device0, device1): - worker_config = config_pb2.ConfigProto() - worker_config.device_count["CPU"] = 2 + def _create_ds_and_iterator(self, device0, initializable=False): def gen(): - for i in itertools.count(start=1, step=1): - yield [i + 0.0] + for i in range(1, 10): + yield [float(i)] if i == 6: self._event.set() with ops.device(device0): - dataset_3 = dataset_ops.Dataset.from_generator(gen, (dtypes.float32)) - iterator_3 = dataset_3.make_one_shot_iterator() - iterator_3_handle = iterator_3.string_handle() + ds = dataset_ops.Dataset.from_generator(gen, (dtypes.float32)) + if initializable: + ds_iterator = ds.make_initializable_iterator() + else: + ds_iterator = ds.make_one_shot_iterator() + return (ds, ds_iterator) + + def _create_ops(self, ds, ds_iterator, buffer_name, device0, device1): + ds_iterator_handle = ds_iterator.string_handle() @function.Defun(dtypes.string) def _remote_fn(h): remote_iterator = iterator_ops.Iterator.from_string_handle( - h, dataset_3.output_types, dataset_3.output_shapes) + h, ds.output_types, ds.output_shapes) return remote_iterator.get_next() target = constant_op.constant(device0) @@ -65,7 +68,7 @@ class StagingAreaOpsTest(test.TestCase): buffer_resource_handle = prefetching_ops.function_buffering_resource( f=_remote_fn, target_device=target, - string_arg=iterator_3_handle, + string_arg=ds_iterator_handle, buffer_size=3, thread_pool_size=2, shared_name=buffer_name) @@ -74,6 +77,20 @@ class StagingAreaOpsTest(test.TestCase): prefetch_op = prefetching_ops.function_buffering_resource_get_next( function_buffer_resource=buffer_resource_handle, output_types=[dtypes.float32]) + reset_op = prefetching_ops.function_buffering_resource_reset( + function_buffer_resource=buffer_resource_handle) + destroy_op = resource_variable_ops.destroy_resource_op( + buffer_resource_handle, ignore_lookup_error=True) + + return (prefetch_op, reset_op, destroy_op) + + def _prefetch_fn_helper_one_shot(self, buffer_name, device0, device1): + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + + ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=False) + prefetch_op, _, destroy_op = self._create_ops(ds, ds_iterator, buffer_name, + device0, device1) with self.test_session(config=worker_config) as sess: elem = sess.run(prefetch_op) @@ -87,26 +104,102 @@ class StagingAreaOpsTest(test.TestCase): self._event.wait() elem = sess.run(prefetch_op) self.assertEqual(elem, [5.0]) - sess.run( - resource_variable_ops.destroy_resource_op( - buffer_resource_handle, ignore_lookup_error=True)) + sess.run(destroy_op) def testSameDeviceCPU(self): - self._prefetch_fn_helper("same_device_cpu", - "/job:localhost/replica:0/task:0/cpu:0", - "/job:localhost/replica:0/task:0/cpu:0") + self._prefetch_fn_helper_one_shot("same_device_cpu", + "/job:localhost/replica:0/task:0/cpu:0", + "/job:localhost/replica:0/task:0/cpu:0") def testDifferentDeviceCPU(self): - self._prefetch_fn_helper("diff_device_cpu", - "/job:localhost/replica:0/task:0/cpu:0", - "/job:localhost/replica:0/task:0/cpu:1") + self._prefetch_fn_helper_one_shot("diff_device_cpu", + "/job:localhost/replica:0/task:0/cpu:0", + "/job:localhost/replica:0/task:0/cpu:1") def testDifferentDeviceCPUGPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") - self._prefetch_fn_helper("cpu_gpu", "/job:localhost/replica:0/task:0/cpu:0", - "/job:localhost/replica:0/task:0/gpu:0") + self._prefetch_fn_helper_one_shot("cpu_gpu", + "/job:localhost/replica:0/task:0/cpu:0", + "/job:localhost/replica:0/task:0/gpu:0") + + def testReinitialization(self): + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + + device0 = "/job:localhost/replica:0/task:0/cpu:0" + device1 = "/job:localhost/replica:0/task:0/cpu:1" + ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True) + prefetch_op, reset_op, destroy_op = self._create_ops( + ds, ds_iterator, "reinit", device0, device1) + + with self.test_session(config=worker_config) as sess: + sess.run(ds_iterator.initializer) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [1.0]) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [2.0]) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [3.0]) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [4.0]) + self._event.wait() + elem = sess.run(prefetch_op) + self.assertEqual(elem, [5.0]) + # Lets reset the function buffering resource and reinitialize the + # iterator. Should be able to go through this again. + self._event.clear() + sess.run(reset_op) + sess.run(ds_iterator.initializer) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [1.0]) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [2.0]) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [3.0]) + elem = sess.run(prefetch_op) + self.assertEqual(elem, [4.0]) + self._event.wait() + elem = sess.run(prefetch_op) + self.assertEqual(elem, [5.0]) + sess.run(destroy_op) + + def testReinitializationOutOfRange(self): + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + + device0 = "/job:localhost/replica:0/task:0/cpu:0" + device1 = "/job:localhost/replica:0/task:0/cpu:1" + ds, ds_iterator = self._create_ds_and_iterator(device0, initializable=True) + prefetch_op, reset_op, destroy_op = self._create_ops( + ds, ds_iterator, "reinit", device0, device1) + + with self.test_session(config=worker_config) as sess: + sess.run(ds_iterator.initializer) + for i in range(1, 10): + elem = sess.run(prefetch_op) + self.assertEqual(elem, [float(i)]) + # Try fetching after its over twice to test out end of sequence. + with self.assertRaises(errors.OutOfRangeError): + sess.run(prefetch_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(prefetch_op) + + # Now reset everything and try it out again. + self._event.clear() + sess.run(reset_op) + sess.run(ds_iterator.initializer) + for i in range(1, 10): + elem = sess.run(prefetch_op) + self.assertEqual(elem, [float(i)]) + # Try fetching after its over twice to test out end of sequence. + with self.assertRaises(errors.OutOfRangeError): + sess.run(prefetch_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(prefetch_op) + + sess.run(destroy_op) def testPrefetchToDevice(self): host_dataset = dataset_ops.Dataset.range(10) diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index e38d53a221..1438b5426f 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -62,6 +62,11 @@ def function_buffering_resource_get_next(function_buffer_resource, name=name) +def function_buffering_resource_reset(function_buffer_resource, name=None): + return gen_dataset_ops.function_buffering_resource_reset( + function_buffer_resource=function_buffer_resource, name=name) + + # pylint: disable=protected-access class _PrefetchToDeviceIterator(object): """A replacement for @{tf.data.Iterator} that prefetches to another device.""" -- GitLab From be917027e37c5e8f21f6ba07f24bdbf072cf6dfd Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Mar 2018 10:51:21 -0700 Subject: [PATCH 484/960] Added experimental C APIs to build a stack of dataset + iterator nodes that reads imagenet TFRecord files. PiperOrigin-RevId: 190488817 --- tensorflow/c/BUILD | 2 + tensorflow/c/c_api_experimental.cc | 7218 ++++++++++++++++++++++- tensorflow/c/c_api_experimental.h | 31 +- tensorflow/c/c_api_experimental_test.cc | 84 +- tensorflow/c/testdata/tf_record | Bin 0 -> 417114 bytes 5 files changed, 7155 insertions(+), 180 deletions(-) create mode 100644 tensorflow/c/testdata/tf_record diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index d096647558..426f97b844 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -220,6 +220,7 @@ tf_cc_test( name = "c_api_experimental_test", size = "small", srcs = ["c_api_experimental_test.cc"], + data = ["testdata/tf_record"], linkopts = select({ "//tensorflow:darwin": ["-headerpad_max_install_names"], "//conditions:default": [], @@ -230,6 +231,7 @@ tf_cc_test( deps = [ ":c_api_experimental", ":c_test_util", + "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", ], diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 8593a8eb50..1c809cb21e 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -22,10 +22,15 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/protobuf/config.pb.h" +using tensorflow::FunctionDef; using tensorflow::Node; using tensorflow::NodeBuilder; using tensorflow::Status; -using tensorflow::Tensor; + +namespace { +typedef std::unique_ptr + UniqueFuncPtr; +} // struct TF_Operation { tensorflow::Node node; }; static TF_Operation* ToTF_Operation(Node* node) { @@ -102,8 +107,7 @@ void TF_ShutdownTPU(TF_Session* session, TF_Status* status) { /*run_metadata*/ nullptr, status); } -TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, - size_t* len) { +const char* TF_GraphDebugString(TF_Graph* graph, size_t* len) { tensorflow::mutex_lock c(graph->mu); const auto& debug_str = graph->graph.ToGraphDefDebug().DebugString(); *len = debug_str.size(); @@ -112,55 +116,56 @@ TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, return ret; } -// TODO(hongm): Replace this will a real implementation. -static tensorflow::Status BuildDatasetTest(TF_Graph* dataset_graph, - Node** dataset_node) { - tensorflow::mutex_lock c(dataset_graph->mu); - Tensor const_t(tensorflow::DT_INT32, tensorflow::TensorShape({})); - const_t.flat()(0) = 1; - - Node* const_node; - TF_RETURN_IF_ERROR(NodeBuilder("Const", "Const") - .Attr("dtype", tensorflow::DT_INT32) - .Attr("value", const_t) - .Finalize(&dataset_graph->graph, &const_node)); - - std::vector input_list; - input_list.push_back(NodeBuilder::NodeOut(const_node, 0)); - - return NodeBuilder("TensorDataset", "TensorDataset") - .Input(input_list) - .Attr("Toutput_types", {tensorflow::DT_INT32}) - .Attr("output_shapes", {tensorflow::TensorShapeProto()}) - .Finalize(&dataset_graph->graph, dataset_node); -} - -// On success, returns a newly created TF_Function instance from -// `text_proto`. It must be deleted by calling TF_DeleteFunction. -static TF_Function* CreateFunctionFromTextProto(const char* text_proto, - TF_Status* status) { - tensorflow::FunctionDef fdef; - if (!tensorflow::protobuf::TextFormat::ParseFromString(text_proto, &fdef)) { +// On success, returns a set of TF_Function instances from `text_proto` of +// GraphDef type. These functions must be deleted by calling TF_DeleteFunction. +// +// If `mutate_proto_func` is non-NULL, run it over each FunctionDef proto, +// before creating a TF_Function out of the possibly mutated proto. +static std::vector CreateFunctionsFromTextProto( + const char* text_proto, + std::function* mutate_proto_func, TF_Status* status) { + tensorflow::GraphDef gdef; + if (!tensorflow::protobuf::TextFormat::ParseFromString(text_proto, &gdef)) { status->status = tensorflow::errors::Internal( - "Invalid text proto for FunctionDef: ", text_proto); - return nullptr; + "Invalid text proto for GraphDef: ", text_proto); + return {}; + } + const auto& fdef_lib = gdef.library(); + if (fdef_lib.gradient_size() > 0) { + status->status = tensorflow::errors::Internal( + "GradientDef is not supported in reading Dataset related functions: ", + text_proto); + return {}; } - std::vector binary_proto_buf(fdef.ByteSizeLong()); - fdef.SerializeToArray(binary_proto_buf.data(), binary_proto_buf.size()); - return TF_FunctionImportFunctionDef(binary_proto_buf.data(), - binary_proto_buf.size(), status); + std::vector ret; + for (const auto& fdef : fdef_lib.function()) { + // Make a copy so that we can mutate it. + FunctionDef fdef_to_load = fdef; + if (mutate_proto_func) { + (*mutate_proto_func)(&fdef_to_load); + } + VLOG(1) << "Adding func to graph: " << fdef_to_load.DebugString(); + std::vector binary_proto_buf(fdef_to_load.ByteSizeLong()); + fdef_to_load.SerializeToArray(binary_proto_buf.data(), + binary_proto_buf.size()); + auto func = TF_FunctionImportFunctionDef(binary_proto_buf.data(), + binary_proto_buf.size(), status); + if (!status->status.ok()) return {}; + ret.push_back(UniqueFuncPtr(func, TF_DeleteFunction)); + } + return ret; } -// On success, returns a newly created TF_Function instance from `proto_file`, -// and sets `dataset_name` to the created dataset name. The returned function -// must be deleted by calling TF_DeleteFunction. -// -// TODO(hongm): Support reading the file given by `proto_file`. -static TF_Function* LoadDatasetFunction(const char* proto_file, - std::string* dataset_name, - TF_Status* status) { +// On success, returns a newly created TF_Function instance encoding a dataset +// node stack that returns a sequence of 3 floats, and sets `dataset_name` to +// the created dataset name. The returned function must be deleted by calling +// TF_DeleteFunction. +static UniqueFuncPtr CreateFakeDatasetFunction(std::string* dataset_name, + TF_Status* status) { const char* func_def = R"PREFIX( -signature { +library { + function { + signature { name: "_make_dataset_d8de2712" output_arg { name: "TensorSliceDataset" @@ -217,112 +222,7029 @@ signature { ret { key: "TensorSliceDataset" value: "TensorSliceDataset:handle:0" - })PREFIX"; + } + } +} +)PREFIX"; *dataset_name = "_make_dataset_d8de2712"; - return CreateFunctionFromTextProto(func_def, status); + auto functions = CreateFunctionsFromTextProto( + func_def, /*mutate_proto_func*/ nullptr, status); + DCHECK_EQ(functions.size(), 1); + return std::move(functions[0]); } -// TODO(hongm): Use `file_path` in the implementation. -TF_Operation* TF_MakeIteratorGetNextWithDatasets(TF_Graph* graph, - const char* file_path, - TF_Function** dataset_func, - TF_Status* status) { - tensorflow::Status s; - - // We can parameterize the function name, if we ever need more than 1 - // iterators in a graph. - const std::string dataset_name = "UNIQUE_DATASET"; - - std::unique_ptr dataset_graph( - TF_NewGraph(), TF_DeleteGraph); - Node* dataset_node = nullptr; - s = BuildDatasetTest(dataset_graph.get(), &dataset_node); - if (!s.ok()) { - status->status = s; - return nullptr; - } - - TF_Output output{ToTF_Operation(dataset_node), 0}; - std::unique_ptr result_func( - TF_GraphToFunction(dataset_graph.get(), dataset_name.c_str(), - /*append_hash_to_fn_name*/ false, - /*num_opers*/ -1, - /*opers*/ nullptr, - /*numinputs*/ 0, - /*inputs*/ nullptr, - /*noutputs*/ 1, - /*outputs*/ &output, - /*outputnames*/ nullptr, - /*functionoptions*/ nullptr, "", status), - TF_DeleteFunction); - if (!status->status.ok()) { - return nullptr; - } - - TF_GraphCopyFunction(graph, result_func.get(), /*gradient*/ nullptr, status); - - if (!status->status.ok()) { - return nullptr; - } - - tensorflow::mutex_lock c(graph->mu); - - tensorflow::NameAttrList func; - func.set_name(dataset_name); - // Run the iterator node on CPU. - Node* oneshot_iterator_node; - std::vector output_shape_list; - output_shape_list.push_back(tensorflow::TensorShapeProto()); - s = NodeBuilder("OneShotIterator", "OneShotIterator") - .Device("/device:CPU:0") - .Attr("container", "") - .Attr("dataset_factory", func) - .Attr("output_types", {tensorflow::DT_INT32}) - .Attr("output_shapes", output_shape_list) - .Attr("shared_name", "") - .Finalize(&graph->graph, &oneshot_iterator_node); - if (!s.ok()) { - status->status = s; - return nullptr; - } - // Run shape inference function for each newly added node, so that more - // subsequent nodes can be added to the graph via C API (TF_NewOperation()). - s = graph->refiner.AddNode(oneshot_iterator_node); - if (!s.ok()) { - status->status = s; - return nullptr; - } - - // Run the iterator node on CPU. - Node* getnext_node; - s = NodeBuilder("IteratorGetNext", "IteratorGetNext") - .Input(oneshot_iterator_node) - .Device("/device:CPU:0") - .Attr("output_types", {tensorflow::DT_INT32}) - .Attr("output_shapes", output_shape_list) - .Finalize(&graph->graph, &getnext_node); - if (!s.ok()) { - status->status = s; - return nullptr; +// On success, returns a set of TF_Function instances encoding a dataset +// node stack that reads a Imagenet TFRecordFile dataset from `file_path`, and +// sets `dataset_name` to the created dataset name. The returned functions must +// be deleted by calling TF_DeleteFunction. +static std::vector CreateImagenetDatasetFunctions( + const char* file_path, std::string* dataset_name, TF_Status* status) { + const char* func_def = R"PREFIX( +library { + function { + signature { + name: "tf_map_func_91295dea" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "FlatMapDataset" + type: DT_VARIANT + } + description: "A wrapper for Defun that facilitates shape inference." + is_stateful: true + } + node_def { + name: "flat_filenames/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } + } + node_def { + name: "flat_filenames" + op: "Reshape" + input: "arg0" + input: "flat_filenames/shape:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "flat_filenames:output:0" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "FlatMapDataset" + op: "FlatMapDataset" + input: "TensorSliceDataset:handle:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "tf_map_func_0cc8c35b" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + } + ret { + key: "FlatMapDataset" + value: "FlatMapDataset:handle:0" + } } - // Run shape inference function for each newly added node, so that more - // subsequent nodes can be added to the graph via C API (TF_NewOperation()). - s = graph->refiner.AddNode(getnext_node); - if (!s.ok()) { - status->status = s; - return nullptr; + function { + signature { + name: "tf_map_func_0cc8c35b" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "TFRecordDataset" + type: DT_VARIANT + } + description: "A wrapper for Defun that facilitates shape inference." + is_stateful: true + } + node_def { + name: "compression_type" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "buffer_size" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 8388608 + } + } + } + } + node_def { + name: "TFRecordDataset" + op: "TFRecordDataset" + input: "arg0" + input: "compression_type:output:0" + input: "buffer_size:output:0" + } + ret { + key: "TFRecordDataset" + value: "TFRecordDataset:handle:0" + } } + function { + signature { + name: "tf_map_func_74b6b15c" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "Reshape_1" + type: DT_FLOAT + } + output_arg { + name: "sub_1" + type: DT_INT32 + } + description: "A wrapper for Defun that facilitates shape inference." + is_stateful: true + } + node_def { + name: "ParseSingleExample/key_image/class/label" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: -1 + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape" + op: "Reshape" + input: "ParseSingleExample/key_image/class/label:output:0" + input: "ParseSingleExample/Reshape/shape:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "ParseSingleExample/key_image/class/text" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape_1/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape_1" + op: "Reshape" + input: "ParseSingleExample/key_image/class/text:output:0" + input: "ParseSingleExample/Reshape_1/shape:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "ParseSingleExample/key_image/encoded" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape_2/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape_2" + op: "Reshape" + input: "ParseSingleExample/key_image/encoded:output:0" + input: "ParseSingleExample/Reshape_2/shape:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "ParseSingleExample/key_image/format" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "jpeg" + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape_3/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Reshape_3" + op: "Reshape" + input: "ParseSingleExample/key_image/format:output:0" + input: "ParseSingleExample/Reshape_3/shape:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "ParseSingleExample/ParseSingleExample" + op: "ParseSingleExample" + input: "arg0" + input: "ParseSingleExample/Reshape:output:0" + input: "ParseSingleExample/Reshape_1:output:0" + input: "ParseSingleExample/Reshape_2:output:0" + input: "ParseSingleExample/Reshape_3:output:0" + attr { + key: "Tdense" + value { + list { + type: DT_INT64 + type: DT_STRING + type: DT_STRING + type: DT_STRING + } + } + } + attr { + key: "dense_keys" + value { + list { + s: "image/class/label" + s: "image/class/text" + s: "image/encoded" + s: "image/format" + } + } + } + attr { + key: "dense_shapes" + value { + list { + shape { + } + shape { + } + shape { + } + shape { + } + } + } + } + attr { + key: "num_sparse" + value { + i: 5 + } + } + attr { + key: "sparse_keys" + value { + list { + s: "image/object/bbox/xmax" + s: "image/object/bbox/xmin" + s: "image/object/bbox/ymax" + s: "image/object/bbox/ymin" + s: "image/object/class/label" + } + } + } + attr { + key: "sparse_types" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + } + } + } + } + node_def { + name: "Reshape/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "Reshape" + op: "Reshape" + input: "ParseSingleExample/ParseSingleExample:dense_values:2" + input: "Reshape/shape:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/Substr/pos" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + } + node_def { + name: "decode_image/Substr/len" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "decode_image/Substr" + op: "Substr" + input: "Reshape:output:0" + input: "decode_image/Substr/pos:output:0" + input: "decode_image/Substr/len:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/is_jpeg/Substr/pos" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + } + node_def { + name: "decode_image/is_jpeg/Substr/len" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "decode_image/is_jpeg/Substr" + op: "Substr" + input: "Reshape:output:0" + input: "decode_image/is_jpeg/Substr/pos:output:0" + input: "decode_image/is_jpeg/Substr/len:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/is_jpeg/Equal/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "\377\330\377" + } + } + } + } + node_def { + name: "decode_image/is_jpeg/Equal" + op: "Equal" + input: "decode_image/is_jpeg/Substr:output:0" + input: "decode_image/is_jpeg/Equal/y:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + } + node_def { + name: "decode_image/cond_jpeg/Switch" + op: "Switch" + input: "decode_image/is_jpeg/Equal:z:0" + input: "decode_image/is_jpeg/Equal:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/switch_t" + op: "Identity" + input: "decode_image/cond_jpeg/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/switch_f" + op: "Identity" + input: "decode_image/cond_jpeg/Switch:output_false:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/pred_id" + op: "Identity" + input: "decode_image/is_jpeg/Equal:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/check_jpeg_channels/x" + op: "Const" + input: "^decode_image/cond_jpeg/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/check_jpeg_channels/y" + op: "Const" + input: "^decode_image/cond_jpeg/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 4 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/check_jpeg_channels" + op: "NotEqual" + input: "decode_image/cond_jpeg/check_jpeg_channels/x:output:0" + input: "decode_image/cond_jpeg/check_jpeg_channels/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/cond_jpeg/Assert/Const" + op: "Const" + input: "^decode_image/cond_jpeg/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Channels must be in (None, 0, 1, 3) when decoding JPEG images" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/Assert/Assert/data_0" + op: "Const" + input: "^decode_image/cond_jpeg/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Channels must be in (None, 0, 1, 3) when decoding JPEG images" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/Assert/Assert" + op: "Assert" + input: "decode_image/cond_jpeg/check_jpeg_channels:z:0" + input: "decode_image/cond_jpeg/Assert/Assert/data_0:output:0" + attr { + key: "T" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } + } + node_def { + name: "decode_image/cond_jpeg/DecodeJpeg" + op: "DecodeJpeg" + input: "decode_image/cond_jpeg/DecodeJpeg/Switch:output_true:0" + input: "^decode_image/cond_jpeg/Assert/Assert" + attr { + key: "acceptable_fraction" + value { + f: 1.0 + } + } + attr { + key: "channels" + value { + i: 3 + } + } + attr { + key: "dct_method" + value { + s: "" + } + } + attr { + key: "fancy_upscaling" + value { + b: true + } + } + attr { + key: "ratio" + value { + i: 1 + } + } + attr { + key: "try_recover_truncated" + value { + b: false + } + } + } + node_def { + name: "decode_image/cond_jpeg/DecodeJpeg/Switch" + op: "Switch" + input: "Reshape:output:0" + input: "decode_image/cond_jpeg/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/is_png/y" + op: "Const" + input: "^decode_image/cond_jpeg/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "\211PN" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/is_png" + op: "Equal" + input: "decode_image/cond_jpeg/is_png/Switch:output_false:0" + input: "decode_image/cond_jpeg/is_png/y:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + } + node_def { + name: "decode_image/cond_jpeg/is_png/Switch" + op: "Switch" + input: "decode_image/Substr:output:0" + input: "decode_image/cond_jpeg/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@decode_image/Substr" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/Switch" + op: "Switch" + input: "decode_image/cond_jpeg/is_png:z:0" + input: "decode_image/cond_jpeg/is_png:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/switch_t" + op: "Identity" + input: "decode_image/cond_jpeg/cond_png/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/switch_f" + op: "Identity" + input: "decode_image/cond_jpeg/cond_png/Switch:output_false:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/pred_id" + op: "Identity" + input: "decode_image/cond_jpeg/is_png:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/DecodePng" + op: "DecodePng" + input: "decode_image/cond_jpeg/cond_png/DecodePng/Switch_1:output_true:0" + attr { + key: "channels" + value { + i: 3 + } + } + attr { + key: "dtype" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/DecodePng/Switch" + op: "Switch" + input: "Reshape:output:0" + input: "decode_image/cond_jpeg/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/DecodePng/Switch_1" + op: "Switch" + input: "decode_image/cond_jpeg/cond_png/DecodePng/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/is_gif/y" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "GIF" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/is_gif" + op: "Equal" + input: "decode_image/cond_jpeg/cond_png/is_gif/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/is_gif/y:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/is_gif/Switch" + op: "Switch" + input: "decode_image/cond_jpeg/is_png/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@decode_image/Substr" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Switch" + op: "Switch" + input: "decode_image/cond_jpeg/cond_png/is_gif:z:0" + input: "decode_image/cond_jpeg/cond_png/is_gif:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + op: "Identity" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + op: "Identity" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Switch:output_false:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/pred_id" + op: "Identity" + input: "decode_image/cond_jpeg/cond_png/is_gif:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels/x" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels/y" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels" + op: "NotEqual" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels/x:output:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels_1/x" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels_1/y" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 4 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels_1" + op: "NotEqual" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels_1/x:output:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels_1/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/LogicalAnd" + op: "LogicalAnd" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels:z:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_gif_channels_1:z:0" + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert/Const" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Channels must be in (None, 0, 3) when decoding GIF images" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert/Assert/data_0" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Channels must be in (None, 0, 3) when decoding GIF images" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert/Assert" + op: "Assert" + input: "decode_image/cond_jpeg/cond_png/cond_gif/LogicalAnd:z:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Assert/Assert/data_0:output:0" + attr { + key: "T" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif" + op: "DecodeGif" + input: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif/Switch_1:output_true:0" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/Assert/Assert" + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif/Switch" + op: "Switch" + input: "decode_image/cond_jpeg/cond_png/DecodePng/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif/Switch_1" + op: "Switch" + input: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/pos" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/len" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Substr" + op: "Substr" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/pos:output:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/len:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch" + op: "Switch" + input: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif/Switch:output_false:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/pred_id:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/is_bmp/y" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "BM" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/is_bmp" + op: "Equal" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Substr:output:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/is_bmp/y:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Const" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Unable to decode bytes as JPEG, PNG, GIF, or BMP" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert/data_0" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Unable to decode bytes as JPEG, PNG, GIF, or BMP" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert" + op: "Assert" + input: "decode_image/cond_jpeg/cond_png/cond_gif/is_bmp:z:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert/data_0:output:0" + attr { + key: "T" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_channels/x" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_channels/y" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/check_channels" + op: "NotEqual" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_channels/x:output:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_channels/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Const" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Channels must be in (None, 0, 3) when decoding BMP images" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert/data_0" + op: "Const" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/switch_f" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Channels must be in (None, 0, 3) when decoding BMP images" + } + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert" + op: "Assert" + input: "decode_image/cond_jpeg/cond_png/cond_gif/check_channels:z:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert/data_0:output:0" + attr { + key: "T" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp" + op: "DecodeBmp" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch:output_false:0" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert" + input: "^decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert" + attr { + key: "channels" + value { + i: 0 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/cond_gif/Merge" + op: "Merge" + input: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp:image:0" + input: "decode_image/cond_jpeg/cond_png/cond_gif/DecodeGif:image:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "decode_image/cond_jpeg/cond_png/Merge" + op: "Merge" + input: "decode_image/cond_jpeg/cond_png/cond_gif/Merge:output:0" + input: "decode_image/cond_jpeg/cond_png/DecodePng:image:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "decode_image/cond_jpeg/Merge" + op: "Merge" + input: "decode_image/cond_jpeg/cond_png/Merge:output:0" + input: "decode_image/cond_jpeg/DecodeJpeg:image:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "convert_image/Cast" + op: "Cast" + input: "decode_image/cond_jpeg/Merge:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "convert_image/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.00392156885937 + } + } + } + } + node_def { + name: "convert_image" + op: "Mul" + input: "convert_image/Cast:y:0" + input: "convert_image/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "Const" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + dim { + size: 1 + } + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\000\000\000\000\000\000\200?\000\000\200?" + } + } + } + } + node_def { + name: "distorted_bounding_box_crop/Shape" + op: "Shape" + input: "convert_image:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "distorted_bounding_box_crop/sample_distorted_bounding_box/SampleDistortedBoundingBoxV2/min_object_covered" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149 + } + } + } + } + node_def { + name: "distorted_bounding_box_crop/sample_distorted_bounding_box/SampleDistortedBoundingBoxV2" + op: "SampleDistortedBoundingBoxV2" + input: "distorted_bounding_box_crop/Shape:output:0" + input: "Const:output:0" + input: "distorted_bounding_box_crop/sample_distorted_bounding_box/SampleDistortedBoundingBoxV2/min_object_covered:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "area_range" + value { + list { + f: 0.0799999982119 + f: 1.0 + } + } + } + attr { + key: "aspect_ratio_range" + value { + list { + f: 0.75 + f: 1.33333337307 + } + } + } + attr { + key: "max_attempts" + value { + i: 1 + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } + attr { + key: "use_image_if_no_bounding_boxes" + value { + b: true + } + } + } + node_def { + name: "distorted_bounding_box_crop/Slice" + op: "Slice" + input: "convert_image:z:0" + input: "distorted_bounding_box_crop/sample_distorted_bounding_box/SampleDistortedBoundingBoxV2:begin:0" + input: "distorted_bounding_box_crop/sample_distorted_bounding_box/SampleDistortedBoundingBoxV2:size:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "Shape" + op: "Shape" + input: "convert_image:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "Shape_1" + op: "Shape" + input: "distorted_bounding_box_crop/Slice:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "Equal" + op: "Equal" + input: "Shape:output:0" + input: "Shape_1:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "Cast" + op: "Cast" + input: "Equal:z:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + } + node_def { + name: "Const_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "Sum" + op: "Sum" + input: "Cast:y:0" + input: "Const_1:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "keep_dims" + value { + b: false + } + } + } + node_def { + name: "GreaterEqual/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "GreaterEqual" + op: "GreaterEqual" + input: "Sum:output:0" + input: "GreaterEqual/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/Switch" + op: "Switch" + input: "GreaterEqual:z:0" + input: "GreaterEqual:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/switch_t" + op: "Identity" + input: "cond/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/switch_f" + op: "Identity" + input: "cond/Switch:output_false:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/pred_id" + op: "Identity" + input: "GreaterEqual:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/Shape" + op: "Shape" + input: "cond/Shape/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/Shape/Switch" + op: "Switch" + input: "convert_image:z:0" + input: "cond/pred_id:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@convert_image" + } + } + } + } + node_def { + name: "cond/Cast" + op: "Cast" + input: "cond/Shape:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/strided_slice/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/strided_slice/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice" + op: "StridedSlice" + input: "cond/Cast:y:0" + input: "cond/strided_slice/stack:output:0" + input: "cond/strided_slice/stack_1:output:0" + input: "cond/strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/strided_slice_1/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_1/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/strided_slice_1/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_1" + op: "StridedSlice" + input: "cond/Cast:y:0" + input: "cond/strided_slice_1/stack:output:0" + input: "cond/strided_slice_1/stack_1:output:0" + input: "cond/strided_slice_1/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/Greater" + op: "Greater" + input: "cond/strided_slice:output:0" + input: "cond/strided_slice_1:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/Switch" + op: "Switch" + input: "cond/Greater:z:0" + input: "cond/Greater:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/cond/switch_t" + op: "Identity" + input: "cond/cond/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/cond/switch_f" + op: "Identity" + input: "cond/cond/Switch:output_false:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/cond/pred_id" + op: "Identity" + input: "cond/Greater:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "cond/cond/strided_slice/stack" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/cond/strided_slice/stack_1" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice/stack_2" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice" + op: "StridedSlice" + input: "cond/cond/strided_slice/Switch:output_true:0" + input: "cond/cond/strided_slice/stack:output:0" + input: "cond/cond/strided_slice/stack_1:output:0" + input: "cond/cond/strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/cond/strided_slice/Switch" + op: "Switch" + input: "cond/Cast:y:0" + input: "cond/cond/pred_id:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cond/Cast" + } + } + } + } + node_def { + name: "cond/cond/strided_slice_1/stack" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_1/stack_1" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_1/stack_2" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_1" + op: "StridedSlice" + input: "cond/cond/strided_slice/Switch:output_true:0" + input: "cond/cond/strided_slice_1/stack:output:0" + input: "cond/cond/strided_slice_1/stack_1:output:0" + input: "cond/cond/strided_slice_1/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/cond/truediv" + op: "RealDiv" + input: "cond/cond/strided_slice:output:0" + input: "cond/cond/strided_slice_1:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/mul/y" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 224.0 + } + } + } + } + node_def { + name: "cond/cond/mul" + op: "Mul" + input: "cond/cond/truediv:z:0" + input: "cond/cond/mul/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/Cast/x/1" + op: "Const" + input: "^cond/cond/switch_t" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 224.0 + } + } + } + } + node_def { + name: "cond/cond/Cast/x" + op: "Pack" + input: "cond/cond/mul:z:0" + input: "cond/cond/Cast/x/1:output:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node_def { + name: "cond/cond/Cast" + op: "Cast" + input: "cond/cond/Cast/x:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/strided_slice_2/stack" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_2/stack_1" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_2/stack_2" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_2" + op: "StridedSlice" + input: "cond/cond/strided_slice_2/Switch:output_false:0" + input: "cond/cond/strided_slice_2/stack:output:0" + input: "cond/cond/strided_slice_2/stack_1:output:0" + input: "cond/cond/strided_slice_2/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/cond/strided_slice_2/Switch" + op: "Switch" + input: "cond/Cast:y:0" + input: "cond/cond/pred_id:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cond/Cast" + } + } + } + } + node_def { + name: "cond/cond/strided_slice_3/stack" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_3/stack_1" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_3/stack_2" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/cond/strided_slice_3" + op: "StridedSlice" + input: "cond/cond/strided_slice_2/Switch:output_false:0" + input: "cond/cond/strided_slice_3/stack:output:0" + input: "cond/cond/strided_slice_3/stack_1:output:0" + input: "cond/cond/strided_slice_3/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/cond/truediv_1" + op: "RealDiv" + input: "cond/cond/strided_slice_2:output:0" + input: "cond/cond/strided_slice_3:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/mul_1/y" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 224.0 + } + } + } + } + node_def { + name: "cond/cond/mul_1" + op: "Mul" + input: "cond/cond/truediv_1:z:0" + input: "cond/cond/mul_1/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/Cast_1/x/0" + op: "Const" + input: "^cond/cond/switch_f" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 224.0 + } + } + } + } + node_def { + name: "cond/cond/Cast_1/x" + op: "Pack" + input: "cond/cond/Cast_1/x/0:output:0" + input: "cond/cond/mul_1:z:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node_def { + name: "cond/cond/Cast_1" + op: "Cast" + input: "cond/cond/Cast_1/x:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/cond/Merge" + op: "Merge" + input: "cond/cond/Cast_1:y:0" + input: "cond/cond/Cast:y:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/ResizeBicubic/images" + op: "Pack" + input: "cond/Shape/Switch:output_true:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node_def { + name: "cond/ResizeBicubic" + op: "ResizeBicubic" + input: "cond/ResizeBicubic/images:output:0" + input: "cond/cond/Merge:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "align_corners" + value { + b: false + } + } + } + node_def { + name: "cond/strided_slice_2/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/strided_slice_2/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_2/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_2" + op: "StridedSlice" + input: "cond/ResizeBicubic:resized_images:0" + input: "cond/strided_slice_2/stack:output:0" + input: "cond/strided_slice_2/stack_1:output:0" + input: "cond/strided_slice_2/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/Shape_1" + op: "Shape" + input: "cond/strided_slice_2:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/strided_slice_3/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/strided_slice_3/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_3/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_3" + op: "StridedSlice" + input: "cond/Shape_1:output:0" + input: "cond/strided_slice_3/stack:output:0" + input: "cond/strided_slice_3/stack_1:output:0" + input: "cond/strided_slice_3/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/Shape_2" + op: "Shape" + input: "cond/strided_slice_2:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/strided_slice_4/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_4/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/strided_slice_4/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_4" + op: "StridedSlice" + input: "cond/Shape_2:output:0" + input: "cond/strided_slice_4/stack:output:0" + input: "cond/strided_slice_4/stack_1:output:0" + input: "cond/strided_slice_4/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/sub/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 224 + } + } + } + } + node_def { + name: "cond/sub" + op: "Sub" + input: "cond/strided_slice_3:output:0" + input: "cond/sub/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/add/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/add" + op: "Add" + input: "cond/sub:z:0" + input: "cond/add/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/truediv/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/truediv/Cast" + op: "Cast" + input: "cond/add:z:0" + attr { + key: "DstT" + value { + type: DT_DOUBLE + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/truediv/Cast_1" + op: "Cast" + input: "cond/truediv/y:output:0" + attr { + key: "DstT" + value { + type: DT_DOUBLE + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/truediv" + op: "RealDiv" + input: "cond/truediv/Cast:y:0" + input: "cond/truediv/Cast_1:y:0" + attr { + key: "T" + value { + type: DT_DOUBLE + } + } + } + node_def { + name: "cond/sub_1/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 224 + } + } + } + } + node_def { + name: "cond/sub_1" + op: "Sub" + input: "cond/strided_slice_4:output:0" + input: "cond/sub_1/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/add_1/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/add_1" + op: "Add" + input: "cond/sub_1:z:0" + input: "cond/add_1/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/truediv_1/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/truediv_1/Cast" + op: "Cast" + input: "cond/add_1:z:0" + attr { + key: "DstT" + value { + type: DT_DOUBLE + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/truediv_1/Cast_1" + op: "Cast" + input: "cond/truediv_1/y:output:0" + attr { + key: "DstT" + value { + type: DT_DOUBLE + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/truediv_1" + op: "RealDiv" + input: "cond/truediv_1/Cast:y:0" + input: "cond/truediv_1/Cast_1:y:0" + attr { + key: "T" + value { + type: DT_DOUBLE + } + } + } + node_def { + name: "cond/Shape_3" + op: "Shape" + input: "cond/strided_slice_2:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/Rank" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "cond/Equal/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } + } + node_def { + name: "cond/Equal" + op: "Equal" + input: "cond/Rank:output:0" + input: "cond/Equal/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/Assert/Const" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Rank of image must be equal to 3." + } + } + } + } + node_def { + name: "cond/Assert/Assert/data_0" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Rank of image must be equal to 3." + } + } + } + } + node_def { + name: "cond/Assert/Assert" + op: "Assert" + input: "cond/Equal:z:0" + input: "cond/Assert/Assert/data_0:output:0" + attr { + key: "T" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } + } + node_def { + name: "cond/strided_slice_5/stack" + op: "Const" + input: "^cond/Assert/Assert" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/strided_slice_5/stack_1" + op: "Const" + input: "^cond/Assert/Assert" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3 + } + } + } + } + node_def { + name: "cond/strided_slice_5/stack_2" + op: "Const" + input: "^cond/Assert/Assert" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_5" + op: "StridedSlice" + input: "cond/Shape_3:output:0" + input: "cond/strided_slice_5/stack:output:0" + input: "cond/strided_slice_5/stack_1:output:0" + input: "cond/strided_slice_5/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/stack/0" + op: "Const" + input: "^cond/Assert/Assert" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 224 + } + } + } + } + node_def { + name: "cond/stack/1" + op: "Const" + input: "^cond/Assert/Assert" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 224 + } + } + } + } + node_def { + name: "cond/stack" + op: "Pack" + input: "cond/stack/0:output:0" + input: "cond/stack/1:output:0" + input: "cond/strided_slice_5:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node_def { + name: "cond/strided_slice_6/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/strided_slice_6/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_6/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_6" + op: "StridedSlice" + input: "cond/Shape_3:output:0" + input: "cond/strided_slice_6/stack:output:0" + input: "cond/strided_slice_6/stack_1:output:0" + input: "cond/strided_slice_6/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/GreaterEqual/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 224 + } + } + } + } + node_def { + name: "cond/GreaterEqual" + op: "GreaterEqual" + input: "cond/strided_slice_6:output:0" + input: "cond/GreaterEqual/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/strided_slice_7/stack" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_7/stack_1" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } + } + node_def { + name: "cond/strided_slice_7/stack_2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_7" + op: "StridedSlice" + input: "cond/Shape_3:output:0" + input: "cond/strided_slice_7/stack:output:0" + input: "cond/strided_slice_7/stack_1:output:0" + input: "cond/strided_slice_7/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/GreaterEqual_1/y" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 224 + } + } + } + } + node_def { + name: "cond/GreaterEqual_1" + op: "GreaterEqual" + input: "cond/strided_slice_7:output:0" + input: "cond/GreaterEqual_1/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/LogicalAnd" + op: "LogicalAnd" + input: "cond/GreaterEqual:z:0" + input: "cond/GreaterEqual_1:z:0" + } + node_def { + name: "cond/Assert_1/Const" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Crop size greater than the image size." + } + } + } + } + node_def { + name: "cond/Assert_1/Assert/data_0" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Crop size greater than the image size." + } + } + } + } + node_def { + name: "cond/Assert_1/Assert" + op: "Assert" + input: "cond/LogicalAnd:z:0" + input: "cond/Assert_1/Assert/data_0:output:0" + attr { + key: "T" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } + } + node_def { + name: "cond/stack_1/2" + op: "Const" + input: "^cond/switch_t" + attr { + key: "dtype" + value { + type: DT_DOUBLE + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_DOUBLE + tensor_shape { + } + double_val: 0.0 + } + } + } + } + node_def { + name: "cond/stack_1" + op: "Pack" + input: "cond/truediv:z:0" + input: "cond/truediv_1:z:0" + input: "cond/stack_1/2:output:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_DOUBLE + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node_def { + name: "cond/ToInt32" + op: "Cast" + input: "cond/stack_1:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_DOUBLE + } + } + } + node_def { + name: "cond/Slice" + op: "Slice" + input: "cond/strided_slice_2:output:0" + input: "cond/ToInt32:y:0" + input: "cond/stack:output:0" + input: "^cond/Assert_1/Assert" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "cond/Reshape" + op: "Reshape" + input: "cond/Slice:output:0" + input: "cond/stack:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "cond/ResizeBicubic_1/images" + op: "Pack" + input: "cond/ResizeBicubic_1/images/Switch:output_false:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node_def { + name: "cond/ResizeBicubic_1/images/Switch" + op: "Switch" + input: "distorted_bounding_box_crop/Slice:output:0" + input: "cond/pred_id:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@distorted_bounding_box_crop/Slice" + } + } + } + } + node_def { + name: "cond/ResizeBicubic_1/size" + op: "Const" + input: "^cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\340\000\000\000\340\000\000\000" + } + } + } + } + node_def { + name: "cond/ResizeBicubic_1" + op: "ResizeBicubic" + input: "cond/ResizeBicubic_1/images:output:0" + input: "cond/ResizeBicubic_1/size:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "align_corners" + value { + b: false + } + } + } + node_def { + name: "cond/strided_slice_8/stack" + op: "Const" + input: "^cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "cond/strided_slice_8/stack_1" + op: "Const" + input: "^cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_8/stack_2" + op: "Const" + input: "^cond/switch_f" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "cond/strided_slice_8" + op: "StridedSlice" + input: "cond/ResizeBicubic_1:resized_images:0" + input: "cond/strided_slice_8/stack:output:0" + input: "cond/strided_slice_8/stack_1:output:0" + input: "cond/strided_slice_8/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "cond/Merge" + op: "Merge" + input: "cond/strided_slice_8:output:0" + input: "cond/Reshape:output:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "Const_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + dim { + size: 1 + } + dim { + size: 3 + } + } + tensor_content: "\354Q\370>\325x\351>;\337\317>" + } + } + } + } + node_def { + name: "sub" + op: "Sub" + input: "cond/Merge:output:0" + input: "Const_2:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "Const_3" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 1 + } + dim { + size: 1 + } + dim { + size: 3 + } + } + tensor_content: "\372~j>B`e>fff>" + } + } + } + } + node_def { + name: "truediv" + op: "RealDiv" + input: "sub:z:0" + input: "Const_3:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "random_flip_left_right/control_dependency" + op: "Identity" + input: "truediv:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@truediv" + } + } + } + } + node_def { + name: "random_flip_left_right/random_uniform/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "random_flip_left_right/random_uniform/min" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } + } + node_def { + name: "random_flip_left_right/random_uniform/max" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } + } + node_def { + name: "random_flip_left_right/random_uniform/RandomUniform" + op: "RandomUniform" + input: "random_flip_left_right/random_uniform/shape:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } + } + node_def { + name: "random_flip_left_right/random_uniform/sub" + op: "Sub" + input: "random_flip_left_right/random_uniform/max:output:0" + input: "random_flip_left_right/random_uniform/min:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "random_flip_left_right/random_uniform/mul" + op: "Mul" + input: "random_flip_left_right/random_uniform/RandomUniform:output:0" + input: "random_flip_left_right/random_uniform/sub:z:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "random_flip_left_right/random_uniform" + op: "Add" + input: "random_flip_left_right/random_uniform/mul:z:0" + input: "random_flip_left_right/random_uniform/min:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "random_flip_left_right/Less/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } + } + node_def { + name: "random_flip_left_right/Less" + op: "Less" + input: "random_flip_left_right/random_uniform:z:0" + input: "random_flip_left_right/Less/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "random_flip_left_right/Switch" + op: "Switch" + input: "random_flip_left_right/Less:z:0" + input: "random_flip_left_right/Less:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "random_flip_left_right/switch_t" + op: "Identity" + input: "random_flip_left_right/Switch:output_true:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "random_flip_left_right/switch_f" + op: "Identity" + input: "random_flip_left_right/Switch:output_false:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "random_flip_left_right/pred_id" + op: "Identity" + input: "random_flip_left_right/Less:z:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + } + node_def { + name: "random_flip_left_right/ReverseV2/axis" + op: "Const" + input: "^random_flip_left_right/switch_t" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "random_flip_left_right/ReverseV2" + op: "ReverseV2" + input: "random_flip_left_right/ReverseV2/Switch:output_true:0" + input: "random_flip_left_right/ReverseV2/axis:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + } + node_def { + name: "random_flip_left_right/ReverseV2/Switch" + op: "Switch" + input: "random_flip_left_right/control_dependency:output:0" + input: "random_flip_left_right/pred_id:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@truediv" + } + } + } + } + node_def { + name: "random_flip_left_right/Switch_1" + op: "Switch" + input: "random_flip_left_right/control_dependency:output:0" + input: "random_flip_left_right/pred_id:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@truediv" + } + } + } + } + node_def { + name: "random_flip_left_right/Merge" + op: "Merge" + input: "random_flip_left_right/Switch_1:output_false:0" + input: "random_flip_left_right/ReverseV2:output:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + node_def { + name: "Reshape_1/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\340\000\000\000\340\000\000\000\003\000\000\000" + } + } + } + } + node_def { + name: "Reshape_1" + op: "Reshape" + input: "random_flip_left_right/Merge:output:0" + input: "Reshape_1/shape:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "Reshape_2/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "Reshape_2" + op: "Reshape" + input: "ParseSingleExample/ParseSingleExample:dense_values:0" + input: "Reshape_2/shape:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "Cast_1" + op: "Cast" + input: "Reshape_2:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + } + node_def { + name: "sub_1/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + } + node_def { + name: "sub_1" + op: "Sub" + input: "Cast_1:y:0" + input: "sub_1/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + } + ret { + key: "Reshape_1" + value: "Reshape_1:output:0" + } + ret { + key: "sub_1" + value: "sub_1:z:0" + } + } + function { + signature { + name: "tf_predicate_7089b845" + input_arg { + name: "arg0" + type: DT_FLOAT + } + input_arg { + name: "arg1" + type: DT_INT32 + } + input_arg { + name: "Equal/Placeholder" + type: DT_INT64 + } + output_arg { + name: "Equal" + type: DT_BOOL + } + description: "A wrapper for Defun that facilitates shape inference." + } + node_def { + name: "Shape" + op: "Shape" + input: "arg0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT64 + } + } + } + node_def { + name: "strided_slice/stack" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "strided_slice/stack_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "strided_slice/stack_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "strided_slice" + op: "StridedSlice" + input: "Shape:output:0" + input: "strided_slice/stack:output:0" + input: "strided_slice/stack_1:output:0" + input: "strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "Equal" + op: "Equal" + input: "strided_slice:output:0" + input: "Equal/Placeholder" + attr { + key: "T" + value { + type: DT_INT64 + } + } + } + ret { + key: "Equal" + value: "Equal:z:0" + } + } + function { + signature { + name: "_make_dataset_5fa5e1f4" + output_arg { + name: "PrefetchDataset_1" + type: DT_VARIANT + } + is_stateful: true + } + node_def { + name: "TensorSliceDataset/MatchingFiles/pattern" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "$(DATA_DIR)" + } + } + } + } + node_def { + name: "TensorSliceDataset/MatchingFiles" + op: "MatchingFiles" + input: "TensorSliceDataset/MatchingFiles/pattern:output:0" + } + node_def { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "TensorSliceDataset/MatchingFiles:filenames:0" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "ShuffleDataset/MatchingFiles/pattern" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "$(DATA_DIR)" + } + } + } + } + node_def { + name: "ShuffleDataset/MatchingFiles" + op: "MatchingFiles" + input: "ShuffleDataset/MatchingFiles/pattern:output:0" + } + node_def { + name: "ShuffleDataset/Shape" + op: "Shape" + input: "ShuffleDataset/MatchingFiles:filenames:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "out_type" + value { + type: DT_INT64 + } + } + } + node_def { + name: "ShuffleDataset/strided_slice/stack" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset/strided_slice/stack_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "ShuffleDataset/strided_slice/stack_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "ShuffleDataset/strided_slice" + op: "StridedSlice" + input: "ShuffleDataset/Shape:output:0" + input: "ShuffleDataset/strided_slice/stack:output:0" + input: "ShuffleDataset/strided_slice/stack_1:output:0" + input: "ShuffleDataset/strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "ShuffleDataset/Maximum/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } + } + node_def { + name: "ShuffleDataset/Maximum" + op: "Maximum" + input: "ShuffleDataset/strided_slice:output:0" + input: "ShuffleDataset/Maximum/y:output:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + } + node_def { + name: "ShuffleDataset/seed" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset/seed2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset" + op: "ShuffleDataset" + input: "TensorSliceDataset:handle:0" + input: "ShuffleDataset/Maximum:z:0" + input: "ShuffleDataset/seed:output:0" + input: "ShuffleDataset/seed2:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } + } + node_def { + name: "ShuffleDataset_1/buffer_size" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1024 + } + } + } + } + node_def { + name: "ShuffleDataset_1/seed_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset_1/seed2_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset_1" + op: "ShuffleDataset" + input: "ShuffleDataset:handle:0" + input: "ShuffleDataset_1/buffer_size:output:0" + input: "ShuffleDataset_1/seed_1:output:0" + input: "ShuffleDataset_1/seed2_1:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } + } + node_def { + name: "RepeatDataset/count" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: -1 + } + } + } + } + node_def { + name: "RepeatDataset" + op: "RepeatDataset" + input: "ShuffleDataset_1:handle:0" + input: "RepeatDataset/count:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + } + node_def { + name: "ParallelInterleaveDataset/cycle_length" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 8 + } + } + } + } + node_def { + name: "ParallelInterleaveDataset/block_length" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } + } + node_def { + name: "ParallelInterleaveDataset/sloppy" + op: "Const" + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: true + } + } + } + } + node_def { + name: "ParallelInterleaveDataset/buffer_output_elements" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } + } + node_def { + name: "ParallelInterleaveDataset/prefetch_input_elements" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 16 + } + } + } + } + node_def { + name: "ParallelInterleaveDataset" + op: "ParallelInterleaveDataset" + input: "RepeatDataset:handle:0" + input: "ParallelInterleaveDataset/cycle_length:output:0" + input: "ParallelInterleaveDataset/block_length:output:0" + input: "ParallelInterleaveDataset/sloppy:output:0" + input: "ParallelInterleaveDataset/buffer_output_elements:output:0" + input: "ParallelInterleaveDataset/prefetch_input_elements:output:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "tf_map_func_91295dea" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + } + node_def { + name: "ShuffleDataset_2/buffer_size_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1024 + } + } + } + } + node_def { + name: "ShuffleDataset_2/seed_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset_2/seed2_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset_2" + op: "ShuffleDataset" + input: "ParallelInterleaveDataset:handle:0" + input: "ShuffleDataset_2/buffer_size_1:output:0" + input: "ShuffleDataset_2/seed_2:output:0" + input: "ShuffleDataset_2/seed2_2:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } + } + node_def { + name: "ParallelMapDataset/num_parallel_calls" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 64 + } + } + } + } + node_def { + name: "ParallelMapDataset" + op: "ParallelMapDataset" + input: "ShuffleDataset_2:handle:0" + input: "ParallelMapDataset/num_parallel_calls:output:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "tf_map_func_74b6b15c" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 224 + } + dim { + size: 224 + } + dim { + size: 3 + } + } + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "PrefetchDataset/buffer_size_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 64 + } + } + } + } + node_def { + name: "PrefetchDataset" + op: "PrefetchDataset" + input: "ParallelMapDataset:handle:0" + input: "PrefetchDataset/buffer_size_2:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 224 + } + dim { + size: 224 + } + dim { + size: 3 + } + } + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "BatchDataset/batch_size" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 64 + } + } + } + } + node_def { + name: "BatchDataset" + op: "BatchDataset" + input: "PrefetchDataset:handle:0" + input: "BatchDataset/batch_size:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 224 + } + dim { + size: 224 + } + dim { + size: 3 + } + } + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "FilterDataset/batch_size_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 64 + } + } + } + } + node_def { + name: "FilterDataset" + op: "FilterDataset" + input: "BatchDataset:handle:0" + input: "FilterDataset/batch_size_1:output:0" + attr { + key: "Targuments" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 224 + } + dim { + size: 224 + } + dim { + size: 3 + } + } + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + attr { + key: "predicate" + value { + func { + name: "tf_predicate_7089b845" + } + } + } + } + node_def { + name: "PrefetchDataset_1/buffer_size_3" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } + } + node_def { + name: "PrefetchDataset_1" + op: "PrefetchDataset" + input: "FilterDataset:handle:0" + input: "PrefetchDataset_1/buffer_size_3:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 224 + } + dim { + size: 224 + } + dim { + size: 3 + } + } + shape { + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + ret { + key: "PrefetchDataset_1" + value: "PrefetchDataset_1:handle:0" + } + } +} +)PREFIX"; + + *dataset_name = "_make_dataset_5fa5e1f4"; + std::function mutate_proto_func = + [dataset_name, file_path](FunctionDef* fdef) { + VLOG(1) << "Processsing function " << fdef->DebugString(); + if (std::string(fdef->signature().name()) != *dataset_name) return; + // Change the input file pattern to `file_path`. + bool found = false; + for (auto& node_def : *fdef->mutable_node_def()) { + if (node_def.name() != "TensorSliceDataset/MatchingFiles/pattern" && + node_def.name() != "ShuffleDataset/MatchingFiles/pattern") + continue; + DCHECK_EQ(node_def.op(), "Const"); + DCHECK_GT(node_def.attr().count("value"), 0); + found = true; + DCHECK_EQ(node_def.attr().at("value").tensor().string_val(0), + "$(DATA_DIR)"); + VLOG(1) << "Setting the value of node_def " + "TensorSliceDataset/MatchingFiles/pattern to " + << file_path; + auto* tensor = (*node_def.mutable_attr())["value"].mutable_tensor(); + tensor->clear_string_val(); + tensor->add_string_val(file_path); + } + VLOG(1) << "Rewrote function to " << fdef->DebugString(); + DCHECK(found); + }; + return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); +} + +// Adds the input functions to `graph`. On success, returns the created +// IteratorGetNext node. +static TF_Operation* AddDatasetFunctionAndIteratorNodesToGraph( + const std::vector& funcs, const std::string& dataset_name, + const std::vector& output_types, + const std::vector& output_shapes, + TF_Graph* graph, TF_Status* status) { + DCHECK(!dataset_name.empty()); + for (auto& func : funcs) { + TF_GraphCopyFunction(graph, func.get(), /*gradient*/ nullptr, status); + if (!status->status.ok()) { + return nullptr; + } + } + + tensorflow::mutex_lock c(graph->mu); + + tensorflow::NameAttrList func; + func.set_name(dataset_name); + // Run the iterator node on CPU. + Node* oneshot_iterator_node; + tensorflow::Status s = NodeBuilder("OneShotIterator", "OneShotIterator") + .Device("/device:CPU:0") + .Attr("container", "") + .Attr("dataset_factory", func) + .Attr("output_types", output_types) + .Attr("output_shapes", output_shapes) + .Attr("shared_name", "") + .Finalize(&graph->graph, &oneshot_iterator_node); + if (!s.ok()) { + status->status = s; + return nullptr; + } + // Run shape inference function for each newly added node, so that more + // subsequent nodes can be added to the graph via C API (TF_NewOperation()). + s = graph->refiner.AddNode(oneshot_iterator_node); + if (!s.ok()) { + status->status = s; + return nullptr; + } + + // Run the iterator node on CPU. + Node* getnext_node; + s = NodeBuilder("IteratorGetNext", "IteratorGetNext") + .Input(oneshot_iterator_node) + .Device("/device:CPU:0") + .Attr("output_types", output_types) + .Attr("output_shapes", output_shapes) + .Finalize(&graph->graph, &getnext_node); + if (!s.ok()) { + status->status = s; + return nullptr; + } + // Run shape inference function for each newly added node, so that more + // subsequent nodes can be added to the graph via C API (TF_NewOperation()). + s = graph->refiner.AddNode(getnext_node); + if (!s.ok()) { + status->status = s; + return nullptr; + } + + VLOG(1) << "Output graph: " << graph->graph.ToGraphDefDebug().DebugString(); + return ToTF_Operation(getnext_node); +} + +TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets(TF_Graph* graph, + TF_Status* status) { + tensorflow::Status s; + + std::string dataset_name; + UniqueFuncPtr result_func = CreateFakeDatasetFunction(&dataset_name, status); + if (!status->status.ok()) { + return nullptr; + } + + std::vector funcs; + funcs.push_back(std::move(result_func)); + std::vector output_shape_list; + output_shape_list.push_back(tensorflow::TensorShapeProto()); + auto* getnext_node = AddDatasetFunctionAndIteratorNodesToGraph( + funcs, dataset_name, {tensorflow::DT_FLOAT}, output_shape_list, graph, + status); + if (!status->status.ok()) { + return nullptr; + } + + return getnext_node; +} + +TF_Operation* TF_MakeImagenetIteratorGetNextWithDatasets(TF_Graph* graph, + const char* file_path, + int batch_size, + TF_Status* status) { + tensorflow::Status s; + + std::string dataset_name; + const auto& funcs = + CreateImagenetDatasetFunctions(file_path, &dataset_name, status); + if (!status->status.ok()) { + return nullptr; + } + + std::vector output_shape_list; + // batch_size X 224 X 224 X 3 + auto image_shape = tensorflow::TensorShapeProto(); + image_shape.add_dim()->set_size(batch_size); + image_shape.add_dim()->set_size(224); + image_shape.add_dim()->set_size(224); + image_shape.add_dim()->set_size(3); + output_shape_list.push_back(image_shape); + + // batch_size + auto label_shape = tensorflow::TensorShapeProto(); + label_shape.add_dim()->set_size(batch_size); + output_shape_list.push_back(label_shape); + auto* getnext_node = AddDatasetFunctionAndIteratorNodesToGraph( + funcs, dataset_name, {tensorflow::DT_FLOAT, tensorflow::DT_INT32}, + output_shape_list, graph, status); + if (!status->status.ok()) { + return nullptr; + } + + tensorflow::mutex_lock c(graph->mu); + VLOG(1) << "The extended graph: " + << graph->graph.ToGraphDefDebug().DebugString(); - VLOG(1) << "Output graph: " << graph->graph.ToGraphDefDebug().DebugString(); - *dataset_func = result_func.release(); - return ToTF_Operation(getnext_node); -} - -void TF_GetAttrScalarTensorShapeProto(TF_Buffer* value, TF_Status* status) { - status->status = Status::OK(); - auto shape = tensorflow::TensorShape({}); - tensorflow::TensorShapeProto shape_proto; - shape.AsProto(&shape_proto); - status->status = MessageToBuffer(shape_proto, value); + return getnext_node; } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 2fa232878c..a9c551d73e 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -87,25 +87,22 @@ TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph, size_t* len); -// Creates a stack of data set + iterator nodes reading the TFRecord files from -// `file_path`, and outputs the following info on success: +// Creates a stack of data set + iterator nodes, currently hard-coded to return +// a sequence of 3 float values <42.0, 43.0, 44.0> over 3 calls. On success, +// returns the IteratorGetNext node, which caller can run or feed into an node. // -// 1. Returns the IteratorGetNext node, which caller can run or feed into an -// node. -// -// 2. Sets `dataset_func` to the created function that encapsulates the data set -// nodes. Caller owns that function, and must call TF_DeleteFunction() on it. -// -// -// The nodes are currently hard-coded to return a single Int32 of value 1. // TODO(hongm): Extend the API to allow customization of the nodes created. -TF_CAPI_EXPORT extern TF_Operation* TF_MakeIteratorGetNextWithDatasets( - TF_Graph* graph, const char* file_path, TF_Function** dataset_func, - TF_Status* status); - -// Returns the shape proto of shape {}. -TF_CAPI_EXPORT extern void TF_GetAttrScalarTensorShapeProto(TF_Buffer* value, - TF_Status* status); +TF_CAPI_EXPORT extern TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets( + TF_Graph* graph, TF_Status* status); + +// Similar to the above API, except that the returned iterator reads the +// TFRecord files from `file_path`. +// The iterators outputs 2 tensors: +// - A float tensor of shape `batch_size` X 224 X 224 X 3 +// - An int32 tensor of shape `batch_size` +// TODO(hongm): Extend the API to allow customization of the nodes created. +TF_CAPI_EXPORT extern TF_Operation* TF_MakeImagenetIteratorGetNextWithDatasets( + TF_Graph* graph, const char* file_path, int batch_size, TF_Status* status); #ifdef __cplusplus } /* end extern "C" */ diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc index 9ddd65f0c5..49d64d18bf 100644 --- a/tensorflow/c/c_api_experimental_test.cc +++ b/tensorflow/c/c_api_experimental_test.cc @@ -15,38 +15,36 @@ limitations under the License. #include "tensorflow/c/c_api_experimental.h" #include "tensorflow/c/c_test_util.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" namespace tensorflow { namespace { -void TestIteratorStack() { +void TestFakeIteratorStack() { TF_Status* s = TF_NewStatus(); TF_Graph* graph = TF_NewGraph(); - TF_Function* dataset_func = nullptr; - - TF_Operation* get_next = - TF_MakeIteratorGetNextWithDatasets(graph, "dummy_path", &dataset_func, s); + TF_Operation* get_next = TF_MakeFakeIteratorGetNextWithDatasets(graph, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); - ASSERT_NE(dataset_func, nullptr); - TF_DeleteFunction(dataset_func); - CSession csession(graph, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); // Run the graph. - for (int i = 0; i < 1; ++i) { + const float base_value = 42.0; + for (int i = 0; i < 3; ++i) { csession.SetOutputs({get_next}); csession.Run(s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); TF_Tensor* out = csession.output_tensor(0); ASSERT_TRUE(out != nullptr); - EXPECT_EQ(TF_INT32, TF_TensorType(out)); - EXPECT_EQ(0, TF_NumDims(out)); // scalar - ASSERT_EQ(sizeof(int32), TF_TensorByteSize(out)); - int32* output_contents = static_cast(TF_TensorData(out)); - EXPECT_EQ(1, *output_contents); + ASSERT_EQ(TF_FLOAT, TF_TensorType(out)); + ASSERT_EQ(0, TF_NumDims(out)); // scalar + ASSERT_EQ(sizeof(float), TF_TensorByteSize(out)); + float* output_contents = static_cast(TF_TensorData(out)); + ASSERT_EQ(base_value + i, *output_contents); } // This should error out since we've exhausted the iterator. @@ -60,7 +58,63 @@ void TestIteratorStack() { TF_DeleteStatus(s); } -TEST(CAPI_EXPERIMENTAL, IteratorGetNext) { TestIteratorStack(); } +TEST(CAPI_EXPERIMENTAL, FakeIteratorGetNext) { TestFakeIteratorStack(); } + +TEST(CAPI_EXPERIMENTAL, ImagenetIteratorGetNext) { + TF_Status* s = TF_NewStatus(); + TF_Graph* graph = TF_NewGraph(); + + const string file_path = tensorflow::io::JoinPath( + tensorflow::testing::TensorFlowSrcRoot(), "c/testdata/tf_record"); + VLOG(1) << "data file path is " << file_path; + const int batch_size = 64; + TF_Operation* get_next = TF_MakeImagenetIteratorGetNextWithDatasets( + graph, file_path.c_str(), batch_size, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + CSession csession(graph, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + // Run the graph. + // The two output tensors should look like: + // Tensor("IteratorGetNext:0", shape=(batch_size, 224, 224, 3), dtype=float32) + // Tensor("IteratorGetNext:1", shape=(batch_size, ), dtype=int32) + for (int i = 0; i < 3; ++i) { + LOG(INFO) << "Running iter " << i; + csession.SetOutputs({{get_next, 0}, {get_next, 1}}); + csession.Run(s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + { + TF_Tensor* image = csession.output_tensor(0); + ASSERT_TRUE(image != nullptr); + ASSERT_EQ(TF_FLOAT, TF_TensorType(image)); + // Confirm shape is 224 X 224 X 3 + ASSERT_EQ(4, TF_NumDims(image)); + ASSERT_EQ(batch_size, TF_Dim(image, 0)); + ASSERT_EQ(224, TF_Dim(image, 1)); + ASSERT_EQ(224, TF_Dim(image, 2)); + ASSERT_EQ(3, TF_Dim(image, 3)); + ASSERT_EQ(sizeof(float) * batch_size * 224 * 224 * 3, + TF_TensorByteSize(image)); + } + + { + TF_Tensor* label = csession.output_tensor(1); + ASSERT_TRUE(label != nullptr); + ASSERT_EQ(TF_INT32, TF_TensorType(label)); + ASSERT_EQ(1, TF_NumDims(label)); + ASSERT_EQ(batch_size, TF_Dim(label, 0)); + ASSERT_EQ(sizeof(int32) * batch_size, TF_TensorByteSize(label)); + } + } + + // Clean up + csession.CloseAndDelete(s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_DeleteGraph(graph); + TF_DeleteStatus(s); +} } // namespace } // namespace tensorflow diff --git a/tensorflow/c/testdata/tf_record b/tensorflow/c/testdata/tf_record new file mode 100644 index 0000000000000000000000000000000000000000..6e16076bfb79ad8151952e96567565e8820b0f5b GIT binary patch literal 417114 zcmeC*$jAT!EE5-;;p%?O#wEtZlbM^Co~oabnwg$aBE%-e!o~E2i7SCi03x28lUQ7= zUy@o;BILv6#pRKlUz(m+q@z$!nwzMjkepbOn44dkSE8c;rV=yr6ml~2^K=vn5;OBk zQb7t6^GY&647b#r%wmP){JiA+octm#H7+rT4f#n~smUe!NlEz?`jxqf6+%2hTwELr zItA<-JnHSaKHp>G;)SS8%}dTtNlg)YaF31a&OJ7+8~51$-}wK4LBPw+(~W_Vk&(fJ z!GnR}|6Ap(fQ8I3m`WH}6ciK`z;db( zxu7joHi5BAR$P9p7WD9#@m-J{^IM1<$Ycga29O;LAQ~jcq6U`Z1CwCFzJUQ^8!wm( zvQrMMQWBNV{29#W1d|LP1Hr1ezzKMNGjWx(E@lnLofL6fiLS#|oHVfaI}C{ij3@WH%^? z7(h`12{$H?O$Ij|g9JJ=j45T7|2?lTaVfiZ=Ff$;_d z1A{n&FM~frD1#qE2!kVoCxah@0)r2OCxbtj<-*{~;K~rhpuiB!;KAU}5CRS*un8yx zEG9tGHyFUCqsYUf3Rx8De2`FHX-*ErvkYuW`K5U&#r{cIASXi&4oWUDWME(bdnzQq zKp`hTFCC;0gnd%eN|5ao|%`LSDMSfz`)}g3^LsxEXKgVz?)cF zlJB0Hms*rql9~cCBc!q*6{?sEq8KCslJU$WUPG&kRr1&A8 z_kan50z4%(EwMDGgn@y9vn;i!1dH-8m@ukx?xb{QP~iy*N@(B;Ld(NvZM9z6^~2-!brkc?>BHAd~*zW@usH;o#ulqD2rw@jxjIy!mRM=?5CNgp1Mki$z(V&A##wJBg z7oFKvCk20a_%XTIRBX|vW;K_|At|nh7MGZbUkZJc`e}2^;mf6uKf9??>X9o? zzAW8(^y<^EVQFP;)0S;JcJ0|WbNBG{^7iS=w;#X${JS6n6B8p73k%py>};&2U?(UV z3b8l_CKf6Qvl=x{6al&5V9>=6#wtku>8NoF3Grmod z-ua*5?0<#_^Oj#;Yjx>s&MS+ZvRd8iHc2M()H+3d_?GikJG=e)UwQw#{~0(IUYr;8 z`{=*vs|$3Ltry)=JL{r%N%6gB;_O=wF6_x|{w$p>AO9=rum2aXWf2@g440x>TQ43s zzG~;Oea+!(e0Mw!@Me@dmh&&^mQMZZ@b}lt|A?;U-O;mS$Mu}O{pG8kpWiy^*2llM zXUDJ4-Tj~8+SV1t>rPbGX)1mTdf0a2&G8LeZ}QkZ+kfTzuPOf-OjcQS-#NYUbK#s* zS$0cz@=W6C&HZxw&+qy#llqQV?dvGqG{^8wsp}_)V(!%n(}mu~{=EOOTJk?beMpY# z)CmV}&d`0wZoKUNbGa?Q`iyV;TIwceN|%=Z@_c&l@z2>a-`w2Wt+~~6)3M1rw(na! z?eo8^`2sucf4XQWc3b`WvPKURvCFa1(yLErtCwy5C;Fek{j-2}LSgZY@;ye!nH;8F z-?}FG-s|KqdD;8j-~227XZT`jdr#P`+13$^-`Qpgzc22;nE$P|bFWZ#>AUn*58STj z3hrXQ`?q#>{k8pp5hCIYo!@5ZX3vh!&Gyh+bt&dj_P4ix-}}z@|0iC>Jtsc6`HS6Z zj!c1PZM$D=-1zg{SGjc?*KAxcTY|HFdd5el=$Cur#0Ex*$H{YcQ;M@&!GJ~d*=&VdwJodi4P4n z{?^-aKZo(v4$H^&yJByD{?BkQWW&BSUp7y#{aNjHCyg!W@k+BB&$eI9`OnaDeC_)G z4BIth>-1DYB3_$|MHkmD5PO|{cXQyE{|vRE=dEp&mp5IivfLZ4yJG^2c=Cm38*lp^ zio2e?ao>WxyKlee-TY$fdh~BH&u)8pt!+|{a?_VpetFf{pS-Q^Z_eMZcfQV>ayx77 z)recNZqFybQ$73a*0*Uj0D6G-P(xw^jMCnA-NQ2+%Qox>;UXjqkXHnAG|ucMcqT zUvhin*Uf*WN|vO`@_(At9sN4a^!f7mJO{gtCsy4r*`3`Sb=y{bYj6J6^*4UoyX~wo zH(k4dH+qXz{VG}Qy(bTyIlEe``)%>dGmGxK^W8-fAQD$ z_4(EOYi#?ZV;tAMvF)&X`7b#5L(MYv^WW<4EVB;&f8lw|^Q0#>vWidpnIjph+rQn2 z+4A~SWui)VxnGMz!G-MQJMZ29m9;LH<<99C+0b%^T8nnaoDGNf-g`Uywd0|^SM5_b z3hP~uwVnN+Vd7gab%zD7-epVuJy+77P$1+f?|Sy`{wv?!+}-{CKf~*;{|vsz($XA` z723_QIwa+sdN%J_|C`Na*VkN%`E;A@5=;Ae;g$F8U%#wx`WLxzernllEyZc=d3?`j z-1%nOy8rcm28k&T)U@AdDcx(@Z7B7sNO#?Hv&q4QISkFAiG4Ts-hU`}?0)z!`{!au z_xg(jzKVKu_HCh^E-&l(vg6g;%(KqFjJ%(5SZROq3*O9(lDTWMv(xwP_r7(sZry_8 z)!VnZvz+O_*DmmW$?9tL@2g+zzO{jOM{Tz3&RBEhRlm+v$1ty4dSJtqSUhZ4{<(B4(=TDqJ6*+wQ-f=VV>b~jkZcqO5_CG_@m;VgY7N4HrWd2Q8wT7Xp zII`=wcwlIP#nz41ImLgo>n-;!zVm$lzkgdJQ}c>vWU7dZi*q*b-czeyJ#}$x(!sQA z)7jq2U!9QruPc3bvEQjM1sma@< zi?tSR>0cE6a_76pFYJ~FMsNSma7_KcMU^tmvYdsj+t_7qADnn#?rz)Dou~84J$~E! z<+{K5u{*c8Z*6UUan6lzoA3V8{=WO?{qyOcv$tp07VceQxuig~cy93Z{p#j#O}0&J z?U6a~dGEUVmC?5!zOL)PoxVN&Z=r;IXjn(di(0W*Z>a~bx7~iVbKj-w*Ivy}uAg*0 z;y=U1fBzYNm_DoZmb?+`t}gq`TP8;Kk?i}-xCz{bdM~F19jh|idS&mMv}2PKzS`C< zev$XDus}X?#_c^k8D zNL@c;A7I+Q_~)*=*VC5SxqWabWcaWTCj99XWIQrL)NpCxR>94 zm;I)E`-}e!&3;zRYb<`0Sv=ldUHGwTcH+C_n6k%@N_m3+s*A2GUmkmQ^R;9D8Cw5p zr8!phullIINdu;=d1YY~56FIcU$?b@M}ItCA! zW0&OJ`Ok27bN;26=3SaEZ}Mzj9cC0KbLjSwIkmsu{1v;Fv$@!I>izgX@hi6Ko>wvc z(!#uW&iy5vzxe;1zIDC&_OJM7R}zl3)~y6Jns_qC7Ty<95;^5NTxy6i6`0BXK9&cRtwdEa~2g^t4 zkfY&_Liai4t0xt1-1ppp!AJN%L(mnru4Qe>t2W)5cZqZ9r56g`i~WPYZ!$|7Wm`%YMl+fx-Ah{?owP^^+txo77MIP_ENhFSfVT za)0{DwTfF#B&dHbvs&>l{B!rbe7EDLHd`wj9Zz~3^Jh-^5q1yL@L!hsm+pQEcYhW2 z{Dr4LQ%M}udw&ee`fz#tY9Kbxzxswc9|1gSV1G43!FOKS<@IO(yuTYIFk_BO`zM`-8{00_Tzj^w+`{v&+)KG{FTd3O zXINTxMQ6i*hRw4+-ic1Fd*&g;!)|)}w)~QNwf&z%=dv%T-ZwsrsFo7?}Y{*3>@ z_4LErR~FYUoR8Qqcl^qQ<1HA}y9{43t6q z&lBevuig>Ctyi#cLF3D<+a!3W<&-_ExBs$#PE>zt-iqt_FPXP89pGz!vGHJf$-cSM zzpXkRa5y_>(cb?I8KrAB9y`f%+gD`gy}6~o-`@W>{qmQU;pQWCzS}UnvnDM#!r?5{rXEp(;h|fU(%cF(5q%6vhVbi zOL1@a2r*bSKmS+M9lTmr{mYw_tw**8F1%`XTkg_r4uxLRr-s=x4&9bhZhCn^*4sHa zZ-W)HXL3n#*^DN(v&(*R5q@FIN7~pt5>@_5t0L z{z8Q!j>o@t2bEUK$tGkwFFmkJJ(#!u$zBFik1g*{tZY1|CvYL=kwwGZ{|sMwLpGlM zCYF-Zoip3J|Jk+!cYEsIpIf&qJp7x$+b=r$3lDJ3j;wwAyzJ4#P@SB2x1Mjl`b++7 zUH2dT7q^98MR$5K9xj%6E_tc!-EH|>MHMXE9&-BsFTc_}z9{C_+iNoMJMye6s-ur@ z4>j&NbV=&^wd+}BTRi*oc8A(T&xkG7trUy4o6PL(z;kuqOx}C%?}WWSy|%$>b?w>f zmveF%w!W91`}6j^Z#w_@rIST(CVun!>+o=Q#_hDf%-b`Q&9>y+_@>9Z^}e6$RgD|gmG<^K#5Cg-YO-ezw8GM8b8 z>B7qiQhV2~zx+*o)5cfK3~eV%m{&V~w_W^w_U@wchU+;BI|P3jt1kGf?>+OcY<}*x zUtYJq-n;(G^}Y4J`5W&ve%Y)4>(<)I!S~KbSl)KLbMgu|!@bur=a)RMOM7i!8(S#d ztW%J1r<_Ukhu(<jqhm^UG}4(j7OR?=ZeqwmWuR_Wt$1|1(^1`nooCzQ^3>x0uV>ruARiRM#E* z?(VyY8`oyNUN$n0PCx9RfB&hC5vXV&K2@6)eoWw%V=cCKXhT{6FlF^t>#ByUwnwnf1Us z>fqb^lkV=DIy?6L(*F#L{2ouaJ?HtJ2_LOFr0o2j8x3&47 zwr&6QlxusN)wTw2wi{|D&F7_S_x)!mp4`E}TYIXPbmwHwc<`U$;(vzK zp|3W6=B?hgwG7-DeBtpKj-iK%e4xW39*@ab${R4`&s;qyXQ#?dFtMLD{%SEzvA*2L9?1&Q|5l1 zW-!t20^7B{x6JR{zw3BpXS3Y>g5=xTa#C+&oVS}^J@flb{l5K6|1)r` zeF}dyt{ubD7?L6=NKQ*w)$M5+HUzb6;(oAAqS{jcAbO4mpnU4HAcN=?E$E0ecQ*1gqh8LaR8 zX9y4%Dz~t{8B^CeJNo6D^w0Z0&RewOOpxU}(@7r}|6QXz%{-I$yII-q^1A%ZNuvA- zEAOq%O?_XGUA8@&VeY;U{~69y2j7l9%spR)*(@vBZTtRr);Iq%NG!XRy-8Q~T+Msc z!yZrjiX+~a@4oq;VaX+ji;DZM2Tm=X^We8M<1V%nUHyOA@~ifDJ*z5hYw%`vUv}>S z!~4MZtuv>^+~Q5X_4YQ)=Ra%j{44m+uwC?5^X$Gu$E{+n<)=-MVUB+L%icNvrTOnG zGUqOSj-H%wd%4nX^+F*>)*~6}vAf%M|LPKnl6c-*ZX<9^l2@R#;?LRL{+n;LhJ7=- zyrXUF*)KOdt{nVx@Lg`{e}+iW^=6JnpB}HcrntC#4{OAQYim1aC~ot}_U7N6cPZxm z+pNx4ey0xHVOy7TZh83j`Q=-8OK#=7W_O-(o3rE>hs=N8^{Zwr-*JSq@XZC&>m8SW zYn1+rpEPN<-ikN3H}*2L?PzefdtR$wwX!}STc=Ed&(bU`?%G?`rG0vLcl_mgQ{4VJ zKX`*+vPB{Ux)Z>wcSU} z*R7eG8aC+{N2!C!gh$ud^4o3ua+mI&AH3J=um7|9*Bz!|zuy*XC?0LDeP8^rf!X>^ zJ*elP8n)l$QK{IRs&4IVdzWqByIwj+I_~)1x2bXWpKE`e`JW+A=}K&WcdfemTZ`?t zSd}ha%s%T}{?h&Pwpz9Q7ydJFZjAbttRMTU^kzNp>x&w7%!RB8cS|hK-;>-r&1%E5 zVE>mgr}v$IWnI5|`bYNPJG+}XYL`snP)|81KkLT7 z^Dl+NPD$_mvdj5vQ{^S==^8ve+ZifYr~PO6e>Gc%cQ=RP;-GV;>Q2vvcjtc#kkZ_C zJa^*O3lA=DVb3=Hvuv^Lp##?iRQxWve7t@8KZ7sJe)Z1Hs#ot=(zRiC7_j#Xd*IqNro>2EYddJ@R&-|*XOMawQOuip_si_RVT&1yE6b*#W7OX zBD^~p>bh_Kp1<{lr;^>& zy8BQ5GdvLv`Nq6+60^tLV=m0Qwi(o#nQgP&xBUGI?uKH{@@0Pyamc%vnf=~3fA5*E zLZNJu=dz=tIleOsJW%dmm|UKd_q?uaYw?n&;bpe27v8&n$-8o(Qf~R87_;9OGP1c( zw{6Q)+AgeQQ@eZrXI8CW6-R5Y8Gp&E%$vF;gYnzd%|D~(y_S0#Z_>E&W>#Zyou>H#U=PhtJ znm3_9@z$2*kDngwm}qaXD^{X;mvMCS!TA^8Hd()$pS{2Q#nQ;zx%qrT7Zvs{Ihi-R zFCljl^cTbG>o zo1Ks@x#fBN*QtkZvh6!n@p!tvr_ z{|vof{{IyV15Mv{U{`25JLr?l& z=Zp0R`d^3s^zzEN#+q{S_MHO1<-7M!-2HN^^`_N)g(W)-XLqvfko{H5yL;Y~Z?)<* zxqmwAZ&m$gSabjPrxoSq(Q|L`-dkz-X_NN4Q+k;+aXXqK9v!I2?_A)g_MA(cUaA%G!+A)ldy zA)lcb%qjpc0l=?9fx(#pY#hXV$XXM22GIO2BO}BAhYFWnD>Bm<7(6|-7(mN4xEc5v znHWF|TP86uFbFU*G4L}mFw6$o#K;U@9|3Zg6eBB`y^4W>K^n^5#K6Fy0aXK924cv_ z#30SUz_5#ffx(E8i2*dRe3gNLL6`|*4oJ<6CME_}1_p+g3=9mJ?JQt3L6gxM%nS@} zpqV0|Vnkh#E$aA16VU{u~3XSegW}>l_mU12a_o8b~jceUFKO zffcIe8508o`y_}PKw-lHWrI=%CzK6Zlfng6{|*$83n2Cy7#bKbE`aC-#S_y4NSNMW zWME*1vQrot7+9ff4rT@hzC{qXePd!^;D@sRF)=U*EP|NB%)r1P4rPO^Vpz-$jwg^f zBa{uYi*X4=J;#HX9O~3yIBx#0KdHEu?Z`VqlP9WMBY=JtQJctPG453@oe+ z&8!TKp^=L!=Enq4uLIRL4N8N|1epU97vO`atL9_?`vpdW7BVq1FoCm2kfj9!69Xb# zL1r*8urV-0(>N$A!SX>FGa?_FTUd9OUi!k@iWx5JdITtd&Om@k5JL+l7ZjB+Ffax%FfbUWq!uSLFfg_- zFfinlmlS}+L8~4_lCnW;CeY#xk)nvmC52?hoRk#q=K2gFW-upL0`l-#@&1_maO zn+sBMQ$Xw^3=9k%Wu>4~9GHC=7#P~hGE>VL7?`gxFfgd(l;&nKFffCT3lYgpO)O?$ zU;%CKS1CzN&R}3*F#sJ6Q4|v7%)r26!@$77la9nrLSlo?lL2|!IlrJ1e87N$MzW@Y zp{1pTf=6n3PHIVsZa`vkc4ASAf^&XuL1JDd0|Ug*AYBYXps-c&bagf}v@|o*HPQne zngNypEi=I<2~$LXj);tc#?R+KaGWuUok55pG$X|F7cnqcd}Ux@o`4Wbn#sVhy_JDM z;ubNe+vV{w)05-5d)={jFOT9D}DX)@^Zb*)MPzSfWj>zr~s}KHv`E&J&=9y zuv2h>oJXMmO4Xplc}j~EiVG5xQx$a46EWEuk(`C(4xOOXwA7;1yyR4cu*}r*%)E33 z=lr~s%#zIfJcZ0WTro_hHY7KIn2>Z0T2Hj(AA?A|9)rMn5eDYBXBb$7xfz&a=7CmL zz~jw_GmHV0^TOqyL()CS1aQg#GY|w*Kj?gy;!N-vCk)O(AqvT*MP(3qP_|@XW#D4q zXAohKWRPP}X3${JWiVneXRu~)U~pyd0v|mS$q>ho%#gv5%TUBn&QQb9$k4{n%`kys zD#I*>`3y@KRx+$(*u=1%VGqMWhGPt;87?qfWw_1ofZ-{_YlaUD-xz)~GBUC=@-hlD zN;1kbsxoRb8ZlZh+A+E^`Y;ADMlvQarZeU-mNM2bHZyiJPGX$JxR7xr<3`4vj0YJ{ zGG1i7$@q}*CF3W?UrfwQJWL`?vP^1B`b-u~j!fQ6AxyDM=}d)8RZJ~ReM~c$7BQ`1 z+QxK{=`_<-rUy)~nZ7bJFmp4DF)K3bGFvb^Gy5|~F{d#XG1oD7F;8P&#JrAq7xQuE z%ghg$-!T7VVPg?yQDD(!v1ajL31vxUDP*Z*>0z10vVvtB%Mq5#EDu>eu>51?WtCyo zX0>AVWQ|}=XDw%KW1YsjjCCvPQP!)hPg%dQv9XD}~8b+1Ie|XTQY$l>G+>H-|ij5r-Q`6h|IM6UQ`; zRUG>`E^$2P_{}N6smf`^>Cc(MS;^VYxrB2U=XuVjoWHpQxzxFAxk9+IxEi@;aINDy z%5{h9GdDN461OFHAa^Er1NRK>4csTVA9DZX5#rI}apsBPDdp+oSNB%heD*kEwoB7Z4 zzY*XRP!n(#NEE0Om@TkV;JUz9L195dL4Uzq!5+a?f+q!E2(b&P3AqZT2(<_;7CJ2S zSeRK@S=d=PS-4qviSSY3ry^`3>LQ*ZSt8vcYemk9d=M29H4zOHtq`3px>xjp7_*p~ zn5S5dSijgNv1?+##AU@D#Z$#Q#n*{n68|nCE#V-MD$ymeUgE06FG&SSSIKP2iIUqT z?@F;qX-Nf2l}pW&Ix6)>T147fI$63)dXw}m873JmnIM^JnZ+_^WWLJE$-2uH$j+2K zEc-@IOwK_rOKytXKDn3j!t!?VnetQQ_shRh5LIwg$WfS~a75vQqKu-4VyWUn#dC_k zmDH6&m70{+E8SJ*QnpY|Q=X!HNcn?`yo#?%jmm13TdJI@7OEMl(^ZeFepgdh3s-Ab z+phLPT|(Vcy-Izx`W+2k4SS73jYS$)G}$yQG_y75YF^M{(lXV`(3-7vUYkkVOgmG1 zuJ%P8Rvk;7Je@^4*L8Vx9d*leSL;616Vvn2Ytq}U_fB6$KT3b1{&D?(2Bro%21^WX z8wweE88#X2GW=wuWt40*+vtihud$nPz3~p?k0#nCsV4JGZkP(2`kJ79th_7CjaxELkm`EbA@zSpKv!vnsLLWcA)!*E-vJmGui7b(>V1 zB{q+3m24Aj7ur6sQ?N_0TVVIVUeP|$ev$nn2W5v8hh+}W9W@=Z9M?F$b24x$a@y+j z-Py{y*7<-7vx~b+m&-XIQPAV=ZZf`t)d}93O1lEM` zgjETD68#ewC4NovOq!GQA=x>3dh+WOhmz(C#615vrU_n_9ERjeRBHC4Eu~} z8E-P3GiPOf%<{;ZpY=W4KYMxh|D5og^|>6m3AsD-gz~cUj^xYbSL9zT&@N~zcu;6n zIJNLYk$2IuV#eaw;vFR-B?TpCN;OJbOCOfmmCY{uQ65&lxk9iaui{LlR%K`9^D4Kh zrPVCe$<>Ey6l)r59@aY6F05m$OR76muUy|!|FprqVMQZXV|L@&CWEF)O<$VBn|HOy zwluUnZgp>6-Nx5e)ONkyx_y2JOGjqMxlZHGnVtW-Qo2rc8+1?W{?n7(bE?;{cSi63 zzO=q`{bv31Ca_J&n{aKS{lpcM1SeHZdOX>8^0p}oQ@W;nnHoR!57RTU@sI*^;m&$Cp|xUAatZS@*KP%k!2$SP``1$V&5-D_6;^>R-*Yx^(r+H8E>0 ztaVwtW1a50h3m!EcW+?WP`csO#)OU6HhFJ4wAo_wx-F_(=4=(-+O>^wTjjP7+cUO5 z*b%wo;!dxfhjv--+PYhB_wqf8duHzy-P^y9YhTO0|NE=O0sR$BYXoCO7in_vNKCSha>75S?DOF6(#1Tmgg5`D;VmTsoQbc6s4qD1-ZCE zjVnq?vsH(>8`M56H`dE9O4m2Ew6xSWFw!?N(k)6!(=D#dD@m--%_~+`hw96WfNMkX zZe|47x9WCWTnb>TTvCgZi!uwq?LLsziAnjTCAR9NrI{&KW=R%iMwXT-y2)l~X}X4n zDVDlP#>r;7Nof}5DV8SYhN-DZ>Rbv?D^n{n5pG4X6eI()RzblA#IylUkb}!`cy~G?l(As8Q0~1{X;}F=|W?cgdE4cG8 zG+-Fzl9`@al3A6SLbO>39dN^fGfGQJQj3BMQd3jH5_3vZ?My9^LkUwJuGO(1wWy@D zC>5g3*Z^4@svKO46S&)$ms(s5(PUy^h7=>{vT$v%m~qNXgla^#3sV-Z(KoRI)g`E| z#E?d4Of5+*%FIjm%}+_SGlfR1FR~z9sSoI=z2N-PqU2ONSlU7sgDZ5)PfpD7NzF?y z$*?mvLkTWqDTMl*#Nv#C%nEoCLrS}7QgA)s^8|xI-9S4-sH>5L;K~9rD^hbJTrzW0 z^T4Us#2l8`(PdDzL?W~po0>r_0Bb>#folQ#Dj+8@FEt`4wKzYg6jV~!8JQTF>%$y} zT?rNwA_<#-tOSb*2m?a%GE3}?p{_@`08Ihhh+t44<)!gewKR%Q>+Cl+(cuhM9~ZfzXnXSX2PY1#jsaCo zIhkMy8+{N5stlR{u$Ts^X52uTOaY{!AhDPphN=7>go{lz_pHBCMbH`5_5`E zp=Lrv!0rO82udv}Ey{xj2b7DD2S*t!3xYWaIk(chWN^kou>dBBP#2V%;#!#s){LwY zNf;D>ka`mAH9IZ^NO&OC<20z-3y_*2C{=qwN}9f*p0PeCDCy!5*y+;Pn>Ph8X{6=` zT+GTNv$!O`sM5|xA5)Az;aHNHhNF!Hl0kAbh->AWn4N0p2Hi_%qmK{+B?1U9KP5HC z4t19uOd(hbuF}6KGc~Uy5u6AOp-Pa&;0lq8d2=IBArH0*Ss1PsxwJ-7i!2OR3(9nm z+5=p48$sQGCJa{|>73+T6p&vG9+AwQxZ!OiPDa& z7G?n0>G?$=`T03^xHo6ofO<_3Z$j15Ck~2Bf>Lp0A6P6DgQ_=uP@)1A@Ik3*%7_k1 zMPh-kFaAi(%>|ca;CS@)wQ@-<$}CGwaVyHtB~XWeoo1Dp2dM}>UF^UeC<9Al!_<@% zGkhH=8+{DD;Q9a}nv$Pfnwy$e;^|@s=~kxXCugQuA<8eP6{f^kfn*pc!a=c!>Pb+2 z2ui{rQ9>3&JBP{mEe40Vl?%c!`lOVK#DXADlfD>#bc1^`#o#EXN9C2040ldJX;BWi zC!dn6pPG{jisE8@Lp?*VTNG?klC9G6i*gf7>>&3qWEG^Q+vp=iLD83zYz1*YDApjY zCC8i+FbBfR$prPxt#T6c(rwi%bfIUTtJ{ILQA50o(xQTD23rF+7@`ifaRCtva&fcc zve5?*Zh!_lAQpqnrX>bt4O*HHHha_(a4--{j0P96Aq7@98eHIDAeI;nE@DFptZp>8 zz`;N)F&bRNh7?%cXmEjpfmmWRxQGoYu)5LU0tW-J#At948&Y6(qrn9Z24acP;377p z!0JYWiwm)WA~i3?R=r%^-tPZx24@ClCJ+FT%*@QpENmVzFCZWQGK3MNlbMBsg@r?ao0D6BWB|S}j4_4r z1~a1|?82}k46+OiOw7nPg)y+QF|)8UaWHT){y)MHB*4JP%*4vX#Lmjf%EH0I%F4vZ z%)%Fl9*Ij#9`bxaiN%V)1*bg8#f&kS2amKbn)Sb;^xJd z)LbS%`uIu0)OGX!BMj1@Q?fueW??`igS>ls7Y`VTVS%GOJWgYaY*AN)C;?gpVBs3a3;$pees*$ zX_5OLB)h6OvOIm@UHX}MzjG7026j?Kvz-6I_;RA>{J_u+`cCn+;-4UYwF9-s$SJsZZ4YYShs z*emwS?aaNdRabqM7&Few4e-(q(%ISeOu6phgI!k~^%!2RD%@fE>9O{z5C4)XIgT$g zn)B@L(x*51-|Q(&HVay^?WL8UopW~i%ZCsCGsv18@6KPatZ}6%+pnfszZLYi-Q+I& zrr8@$JkwX=%UF6%y4|}<(xE+|Xrca$8yENKy=7qQ>Rz1n=yosLzVnGH zhkUc@t zJQRxmEZy1l^vbamwN&$1Ed5WcwD_#>Ax4jDiv~-_u5V&n_teebFA{z7vB;`t`Lh;1^J$zG z-MA-pS{nCXo@YMiT$CMEIf`_57kC}GRlYIyV8NNm6aG2AOWCS)=IZf9=MSR$P{ zxnH6Cd0|)P-MouOKe+6^-`h5A%g1i^y`E<-RxO-p#1c36Kf@L?BbFs*FQwORQ`p-v z{qavNjz_CD&$&|bW&NAS=K}8bW^d|V<=iLUyt_#&b;q{(cN&);idg$`{`bE3nn#<% z%5;t`N%N?+kxaSO(s|uo-R&XYt*N(uPbtikd=Q>iC37)bWWzcElbkEO8*8`CNx8UK z$U#>p^x9iq=6-oS>7(oGCv>gQxxQ-RXX$f6&-Q#)wV2(<$N5$`vF`4!MK7P3ykDp5 zZ0FoRX@T0jGf^etT78?o2M7HQ;BNeTk*{I(-RioE3#+po?r6$!8z$^jT&eF`J5fnl zc+1WjF3y{|7c46`24?B_uXe1eDZ9mSdv=nNd;N?FuFU^zqC;ic7uag1&U$KnF)p@S z;pKYnS{Cc0w_O*SrP!t3wO)|JwrBsNgPtuZX-}iNr>scj(|p^zyH7fydEcutzmp3( zmgl!WJbbdMTXn_~|J#ympPRyVgxz}C*M4UzXLaJrCsM0CwU?jWxk@f-rGNV3l-cUB zIY+*mihJFAdGz3>r@O_)qE2Tm?OE(vrI)grWz~w@;CQWEJ?&|{i(0(ywCl3QE8bnf z|M9S4;hJ@|8ndU)Sz_(TmR?fixvS-3rss;LEUg#YS{AWz0apetPryR72Sk;?CXc&9qe3eR|6@NIrDN&5pZOn~GG-oDM7~ z$o5~rYjXL1zV?P?;nI;RN97}(&!$>yI;R;Qx4bC!^aP`_+LU95T;pD@s{I$EvT~yP zr6X1qXSzcgecLZyx^a7=>28kg?z@__Ufs03e<%5dd z{eaU4_vK|c-QQ}+_$0=JUxqgz)~9c+=fal0tvAvQ+KZe_ujpMA`X1^wRXFte$E3tH zNmH^ezM9l|$~)lT_Gf&z&+|{+u-o45`mA+NZYj(A$usjr+kg@lojWGY(I!l#9 z{fmETdaMuEx|{A_GHXhi!;<=2?yCg@SY3lezSVS}FSy(MQ{P1`>O9xK3s)jaat>;R zL93Bh_gG)iDwz36OW12xE<@ZyZVeTAPDS;vm!%Hh z$y#aic+-;TO5s$Ami*lhMQp-Fo*3S#5|wkg+4J5}=(f_Q=_;!K8K!aTCah-klC?<> zJ|$zHXLQ8l&6bwXlFh#1@#kY=CcTkcQLdeD-0_GZRua!W7I?i2s_e&da(>_=YH>;FcadnFM(#d~|g zSLZWj++A{Os_e8!+pccEp>*w1>xzV`x{oFAKR$dL#3GZjXxr(pl8pw@ zay@Yp{gQ>c0edVfZ4P&aRtfaP6)p8|R$7waaHIRKXWpF~E2U)H9%od=)V>Saez!^S z6~9xdS6#{J+}+o|1}&QIt{`bGyKx(Peu%Hd%jF;cMtt}i-}IkhS>USI@~c92inMvv zG;mHVYl#!w6Dt_3a>ZIh>(#vIjT7zk4o^DZJ#p5ibKy&`%dS3Id1}h>6Upv*mhxM} z9E0ynYk2)J@jt`qZ!sN}|8$+!hMaS9Oo{3_sIhUo)CSjkI@{BBW!|;@ackFdIeAB! zFVh^ql+WBWIqA=@^#S_a^ONrxGHQ3sE)m%hTYYyySLGhV_P^;j|1h3Nn)+Jd^jH5V z3C4t`XNpCOR{EX~+4I`5_nPUHM7KQOW#;F*(pN9) z+Qq5Y7q{}OS?$N=n%VveTE`wbt~Z{gx#iDUze&Op6LUJ&6@JUr5H73uxhG`5_|L+L zHP0S*?Of$*78W_v?~H)tT-j%`>X8#p3cGkMd3tKr-y59f6?5i%wv|`gJiGXa-70TA zYk#>7xBh)SCeZ227=7_jf$MSpNAaH?hrRj1^#4p;*uBDqs}J=XwySP(mXH=K zye{RZ@Nnwy8a?fnpAX(&k!x8S6P~B*t$+FW#j`rf+*|#(s!V%u=O@>#s4Yuo_ugr^ zx4LxSiF(m9f1PJ`o0Mx5tTdkc+4I$nzLOJF&++xWlW)qrcQk&Q^qI%D!pHbDoFk_C zZ?jpj;J}2!+gqo{Fez%U`~2PI?YeyuSKEq}nxng({Azv6dairh4aI`O-M+%{yM?~q zaASOSJ~H36@A_tT`ImQpcy4su^5$pFho!N}1zMYf@^quP`R1+_gG23Zq%<+h$LC^V0g@+vtl6-5xJo))k&t{@21RFlX0MsbjHK$$F-K z*4~|rr=F=~=gSJ;Z4m4`v{qmL=?;Emp~H0w$u4 zUu1TfPHOkfT5`tx@v*ce>3fm_6&Si!p_T4pZT-H=*PR3Iz*@`C2t-5k1oX9FM2_?!^EPEc^m z-FI3sR`;KZ{Ighd>e1=YSC36@)m>HczAiFRw`{$lf?zm*eEY<=e`2~nJW`10ELI6w zsbaJ&-|VG{(KfFK;=KMF)#knX(I=4=R-bt^brL$9Qc;>|CeBg+3Npaa-rnaZ#-;6k?^P5{viq6f7+N^OKC|pt_bfxr_0NWcM>2(HZ}CL`GVH$k)lIoz;~m$n-WN-BbKW_h76{hh zUtFAv9-4%Ewy?9$Sn}ugCN6W?1!L(;MR%@6&u%zF7LIEM}(Be5Kj?%}HgP z5+^PyvEEZ%ez7LuK?)cA;RUqUsDL1A= zOZ!>z)EzrkH5#`6%4wheIV^Pf)t4rVyRIzzc5lK{-!y>u+&#!-ZSep0IYgs*C@Wh0JGhQ&psy+ObCv;tW$*l{8 zFVB1VhphVPvNt#7WbX!Bm)$H;`Gt8o(ObQqeLMfx`B!1X_WD_B{|<+I(Vn@~>tx|G z(>?C5ejNJn^UwAzwOhYSB+AAgFUY&w8uK+`h3n!o#xr+n={!%7{-nI1_gVEVskcA9 zewQ9|+7z&A^@hb$vuAc+GK;Ug<;pgjC9>ynzOCX!p-FmWp^v(jbgpe(#i()n*;WJJ z1$8T1j4GaK$vGFUX%bm>caD#(>i#gxZ;k?6&Wg;{``z($kL|+Rd(kmx46ezkx7Vr! zz7nie^y$|)_p-0rea^oR{rleSuPQ&j^2Lqd?lqf_Z0iK!%vT7P1wk1-Dt)ARbjFm!#nv=w(Q#?B-Jhqgtd+d89ciG>+ork@>A7wM23_pCr zErh4a{L99f%bzKolC9i+MfA+qn6(0op#nxh{~7j(M6c5cPP@jrD$HoD*UC3@*V`DL z{PsR`TN&#)^Xe-*Gwz=L&!FRH>U2hTa;^BK44us};*BT$U2?aG>l(>AR4-bk^`9Yz zU%g)OKf?{xKz_aBYf|jJBv0u@vG3m~Qq&S!!81$HZKmd;!s9dYU9WBs$@{SD;qDKQ zo?hRe_HhLpV(6Tgdh2H}?KGo?5vxSls3?zp3Anw6@Qz zQpsmd-p!f$O}Ws%Yys!eRb5#etanYNH}2T#W7Bf<_`{Con%|Me*aghtxXK6M4 z`|wkJznGZ+Oe16QrDnb-TdtY>LH}U8@b_*Sl3oZ%T7FYfn6+E^_z!?5OgO zGI3$oq5^y>LwR;;x$4bM_YPRT;qlqHb`8gdhgYl`R_IQf&#u3pE9{zu;)`Rle4BlX zEyA*kV$N)R{>ywjPhMh!LN4QmP~lw{wmtgJ-=-GLc;HEvh}86NO}Tv4=S5CU-7rUe zO`qCrrP&MeqStn<%L@B3;bES-fVHy!i|E7;zePpps8TgQo`urA3zug$~cR`*B*FkfT# z?C@v4!*$*7*%{d&|7QQLw+4sA_P_QB+8b_PId|fnfNyK16B<_u&RW-5pgb?=&}Vkx zSyPt9Yy8z=sIsw7`^ed`#p);1e}>0Tw{xW45a3ySMeFN{603+(q1F670fB~!!fLI) zjy5IlmkHePF}@l3TL0je+!yb4pUyHqZ^<`3ASwBsQ5xHij#tl1e)XMSGP(V7zi{-Q z?l(q1P2Zb!*_;&k^wZSHTy92%h4@5)?U}O{ynowD2K8R}ke%;XVz8yfVVfiHGv#qFkM-pM&U;hRgOV$Ayj-)xm_yAx74NwFaO zc&uPS_nn?wXY?<#g`b@&bJX{&>+Gti?7WyuJl-c8y+aqdophDmd(`$wtetJ?`-GEo zoHoxqzeH=v%g}XAwPD*0S6-YmL-6sD(nXVAeP{Nb=u%oJns?ddal*qMw?I9;$OX@C z7)#ANQ92<@V8zF?Cv%&fq#Yj5%43pVwrhq0bNNq?z$-Fw>;EJs@qd{cv$AT=rah+) zn>OY}`t$~O33)C*yj<+!+3VBeylyoyyq6h` zdGjs0E@0bjCdJa@yRJ2H{*3=8ccb^oC#F@(r%tUsTE)v&+oF5*l;tkR0)AVO>ANbD z&S-BwCNe+qrM}lq*Ij1e8m>&nirz|nKRf#+owF<-)CH*+|7Q^YbTt0eHkqfj$?k8~ z-4zR89LavA(WhPY#3#v|6LSkVd4qH9bn8D%<6r0bw_RJeBl9Z1+lFGE6>mdd?#om* zcr1L(;b0UKNA`oIH|+V}TRc9qqgCJUM9$^Kwo1b4{+S!Kmuy}koYdgCYgOl>)m7D( z`6et&)d@h|ATaVAa7k%H9F}mbq&V`B?*AG8xtzu>^f5pB%^3ncV z%zDq(?4KjoKVNaL?@Ev4RWIkU23mVJ`f>GwyPB_cKb%=7(o69eR*7S@v;N;Jw_p`@+>HA3ywjnX)GH&7IHOb^cDjF=wT2US~~I zjdoYoB2iy%y(@|*()vC7-#qVKd%Wt9_=&(x%Azhi^+deG_vtHUD?M)3a&_qaoO*Uz zcZS=6%-SYiBNBGSy(BtJ8&jJ@sgyY7?-Om$lF zHIDo8i!<*-BI|GL_&DK~??;0G2O*(%bxFTJND5f8o1}40xt8L)h;>i$`F_RAJHLr^ zIy4FfcRbP-m}FgYdEs6$VY@4?(`Q8TN9FciEcVYV+MfUL=+q!K$u#b-9!n#wawG(! z#O_L@y%hIXIH|w)bf-&90r#WJ?DrjbyDzL?!WFCL@KiHcBUvW*~&@_o)}M*d0W$Zz|5gEdTq#r!tK z{$j`~TdQQlW$qL1Ost4B+a9s&y0nVx^P_?XTtDpC=oYk>?U8lroU5K^4t-u2#Kaw& zcgfBrW%0swvz9HAu~>h#_3r%sH)_hsr5;=MY&siMc`{8WLdW89%Bhd43>O4V-tGMm z5I^Bbu<4idIZK?*&3yRlshqD#*41lAd5=f$iTB94ak_KWC$-Dl`}O3^?k4|(H0Y3*YxIv2Wlin!L~ ze|InRWHldAuaDBbv5H~ka_(?f#s3Upy7zflwQbq*y#zjA3|(UCXxVZsWydswdDrf= zFgbs8300rnm(=^m@JZe++LCd@ zBB`T!`FoLdy21PVYcd?&Pl#F0ayeGEDyZt*inxBsmlrS0&(r$I5Udi%VwJJ<^q<8G z-pZY6*wtJjwbx14D>TX9Tdrg}=k^|t`L`8v1q1e+ESgmyQy`ysCT5;-%xY6P6LYOg zOLjb8J#WJv z5IuV6cBw!2v{SalTF2X`zn+@$f#J!SuQKkhgd)XjCpY-zX|J0%?UO5?oMwKFIU~=y zV$E4gi$dDw_Y@p+w6wB1dMn|+aE5GRY&@%=u)qG{<1*}fo-LdnGw+P$Pp9fj`>r`# zDa;gfxqM4*?~xd3>!0rbboBE!%;-t4+UB~}sMP(A_?)}iJ!LH4?u42I{kc5-X_<78 zHbZJqR!{1$?2VB-mU8p-r1RRgwRkXaFN$8br`>z+>paF2M~^F9TQP}sd9r79b?#KB z@~lhIYaVp%`pNfxn%I$`Ij`cqefyNYzn@!Nu2B16P=K@^#%Kdx8f!# zGCyAm?PRW86{{2@rPN`i*K;r@&iv3_v4<+zQ}2i8q!jC~SzmrFJMz^A)A=qt^FqB$ zH))CdXmPoH^xNbR?$dw8lP!0g+#eaV$?VW7GbSBVzK4Ptjn67$RDW;&eSQAVf^a_8-L+Ye7WpHP%|x>)t1 zM&YC83r~I5m>q7EOOO_P=%pmYoOSh8g_7Jyd1;r*3?+*_6RyAJ>|wVH(Qd)5_F|@(W=*Z#+KUa4%{tc{Lhef z?X+g->Pu}E&W1~OoRV4mNPkMB+k2<&4RSZ@W?w#bC-TYWOANC_%`ZKxm^$rcNtVx# zVy$Us`vVG}&q}zV`$&vaR&~P`xu}lmQwpcn$^T$jz2UW4mDB2gqSZIGHH}I=C%7(q zzVEh8ROWF{-ke;)SfRaVb3guPFcVziw7z2h%)M4=I_u+>Kdo5vrEbHd#~NR~IJYnt z=Czm}t;%jS=_8a$^nr9!GOV!Bf zvxa2uHu2Nl#@+Vi^q;!$<1+%jXWJ}06BackXlG|$|BQd(>VG!A4ejmmzG68^=_>p3 zZ;4mBv-FA-cb{A)J=0KPbHU`zO2^F`x2c#t6y4+EALQNt@>9~BDSnC_sWOVap5N!) z&6%N4I?biK-pQRuQ1?N-xK0$W+lgYyl9*-pq*-1UNoKFuAtNexieuHvcUP^G*e5u- zM6#Do%fIzetJ0<2JI~i-xBsd8ySZ`>9ogyG|91ZtT{q6ja>dJQ_s)NxbKCju<(&QY z`U>wGLyMAR`uV!}s($O7uRJ)#_xMVOS2{QL90~io`=FsNv=&Qhqt|wlDK2GsPUG0tzGoS)R37>kqeHN?)a+pLV)n;dGAf4WRNESlA5 zc3Qm5ZhQ5kQ{GHVKGhZSV$%%;CEdHvw&%D;&%SD^&9dfu@`+-F&)No#FGY5lInF44 zx!}y*>#GIl<>)HaZl5*bw&UIEdH1vH<|LaQWvtatwQ5;&R@s8JLM3?dZsBE0bIvSI znDW(>+0^{c(eUtTAwWqo>X(3-LP@as+)_}&ejqzdU8_PqsdU+TPcMSV z&*bsj>F)((Gfgf%&^;b3R#H=<`b6&R6)WF#z0Hvej%|5nWhB>fL-9_u+u9eMXL60V ztc%^UV|TKh`1?;Hp)tPI<{_(o>hZ<>yX4<4;(C!oCO=2OH6Zky@|iP#3Vv;>o0-mR zdv>PpGp!4cXKwb^bXIejEb%-!P097~qWK+6h1aEO#Qr_V&^WpyuHW$XZM7sbs|m3M zO0RakEU!!Nk2n>Rkgaq4YqYUc+Qrna`pxny8_X~7e-UPXETU_%ip=s$$>-AzgOlGJ zooz5L*K+-4Ro}4x43n0LmNG3m(^bPkAn#d_u+HhFC+#+|JW;crDAe zGdBrNWxmQ4t2Z~`RhPHUmE?}MJOScfAH!N>qbCM#c=jU3Lg$df)2tQu!k2v&xpDb# zWz5ao`CDW!HbjX{jy2hNSj4vecf|u^c6H%jkGgqZMTZp6)Uws>^tU!zV5`mW^vEk| z=ln#m1A#ny7N0S>baTen#4cMlSH0$~N>lBuMcd|0jh}J)0E@v7y~_!^a&O#wv}(o# zy$5ABTvuNxt?><6{PaJAhpC6 zXZ>t{bx4LadEGqAOPPy~uqCtXa9l>_mR;D>Ii}iP;i0 z`AgS~uC7(Fw;A^yF`5yV@^@}#$(yUwUv5}FH)x8lZNNNXUC#9{KIG4`Q<#5u2aD3; z(}`#Fd_FT@Q$H@VUupU*qtGJnD?bGv3zvM5JYDJgWZwN;7SAJ>ZVMjF56{^?^~UC7 z;R~{E^P65-xLt0m=W>VMHw)iI?>H6rtaH)*<^9L|y^l@X^;90}v(>4fXw7LkIyA4(_&~=T(ZIKh;(x6> z)*tXV$A3a(*bV*TRjE=tp6K=YH02)5%VC!_Yl>#yy^AaPhNn*Q^N;#{GnUV9h}y8* z`%AC)m*kfko2Pj#J~P$1xNKgp-wtzq+vsNtpNY79pD*`Ak82sz!pCxZRnmISzLZwH zrhSuH)7W60r1*)6#lGE&#cf&7PTuNSJnQz4;Jw!#ZTgUC&bq+e`l80unzJvSt;l(E zWeI;^Sf|W|Svy1K1mwn8&lYqkV6a}h_(!++!j|`Is@844A@_H&(OmjCPx%l28nvA2$^JRbLLO}o;r`)#jJ2VQ@7 zUGAKmb8N(3-W?yG8oivm;QZf(?44rwPZnIYIv%`z?i3%Zd`pq~vVGZkOd&@?88Vi4 z>zcpXwdC`*>pJYr%@a?b(_S_sXhPEI(nUP`0*=m|IVGrImO8_UH9yyqam6g1&xstb3FgC?%4qdWRR9EY0|#pUb_yy-#N)ldIewR+d$2=lx~dl~)_7SSh&vo@}e*tAD{V z#{0;IWkD%{GySFF623;9VY{D`*R42VkA$Gn9$yi?4<8@dKRS4_na?(=U#`%kRC7+| zo8Nv;u}+do@6@K3Z$5N?@yqu!Pqv?EI~W@5vEVsZUd`#;1FO?Hc#aks+W1w6&Fy$4 z7;&*_-8xC$M=UWF^XzI%Yj1tf*wh%^9I)gMGx)&{wC`h&)Zj-k8}6ene4QD@v%2jdRpMgJlDRvK|05>c+X^byo);$}v;!D>>r*{4ut}U-K_k5Tz zZOw#xT`O1I+a=#;{;Ou*yn`FgwW;0i&)IZ9a+|)%eQS$}+N$;K(|7%e_zi zUTa_MdT6fd`cv@H>?sv{POjwdX=^%X?IEW%zcu`C1cyOuz*D`I{z=od^b)ye%y_$W zo7mLV#}2%IoVETx!*LyL0Yk}Uev7Uu9ecYOOe;hC1Le38y?mENE=N-nQ+CiJjv-`3>a;k)d1 z^37a%`HzVayrF`xE@_r{U3J>*x5j6sbhY4P?ui_+leeAU#Jlaxk-Sow3whTw*BdED zcQ*JKhie~=Ua(zhld!_&r(#btzb}#d{cg32rtA^#DYJYfE*uT8Xv#dZVXL4jSL&ZD zmhBZre>kUVgiPbU@|SzvgdIQGmVMI7(qrK7U9fuF>p6RGsKqv#w1ykA1@}G92s+c!7yL5O@J0B^iGh{Jc0Eu?VcfVO_O{;%t(YI*9+y`aTxS>Bmr%vizPx1f z-^oYfE;xP8a5xZRB3pfD+U>_(X9U+&ai3HUS{LwF@%)0>7hZl9-n>w-TFKYO_GSCWDQQ=)y&(w13`=9LAHJGAn}*v>Z6lK4H4O-h4j2 z6fqC}i8G|dbut(a7sLnza{F|;hwS%B4{ga8%NAbdwd9J`G9TB8m8WyQEn%`+pBu3D zigiWOHb0+!(d!yN(@oR3nm>Nd_2N0s64Pt6GSfP!RyR~mL+R#3{nq~s{H4d%Xs=QE zII}NJ@zl=^nOl{a%5)}m{oGz4oaUwU;M1?tV^6fc@OjCdF1S~=QseQBhh`Th6fi1Y zSsUbXd7942-RuYcvMf5tr#)_! zY5Y6UTy>+R)9G^C6+4=qZPz_vRTD3!W6Tw%c1|E>=~J7#Og%h1l}bF9JW1aoq!i{E zc*M8nM8hNX-J+3mq^^8izffxPg*&T)^Vz&YgG?meZ7)4LX`)q|?ANDTv+6$@zOtPY zo2>XnYHPAu&w7uGuV$`GzN%5mE+^Tu>BPGoTuWxJZLa%reZNf7+nYaY7~>P?{)$+9 zeb$t-dW#s&G_LaPzP#xYgW$FW8j0JQtnTC~MQN@|_?28H+Px^g(i1|M6UCsUK84IBXW_2eib}XS&&(@NK>G9t@#5x`_=sM3X84= zzOoJ6IdA(JwVRgNy6;>B)K3O#iG-Oox^nRE&~GL@CA zkX4*6f9CdqM+HIKj|jLH9#>Ocv+qo0zqw;exxr(mFN|{&-fvy7sI9BRZ-UUbtERs< zsw|w-m$hq!qh0^z$dob{X>Qj~f?G?OLYU(I<^L=?&XmUKRd`G$t?5dpKJOW$FNysn zt808Whg{5Oa8-GqXY-#S^uh9%QzM-vo0i=Ew`;+L^fT>z&VL`fR$Y9Vw^P#W*xR$F zjGt@2JwClQxKH_7c=xo|eMgu5TG#Qp*XYKFKCZlbDVm#}&HA|})Ipc~`;5B(3={aG z`JGa$w!HF-T<}C{@06dy60cY3o$B1>6Fk3S3v=J&lv}eV&YK>a&>4Swo0d~F$Fgwo zi+|1>f3bDWaqrjMt5(DYzFNF|OG|Z-xy*w(0{i0q^Rm{MUVO9La7CZV{s7jO z>*fauPZNK5`>562_l7lhUs}0J8+e=)OmB(GSv{Lc!ZxI%H_qvCu5x&6-rcudMVqc2 zK4qcd`0|aUjYjg8h|;cwVavBlUpv3)%C@Fw>2CbDpHGxgi+{?AxlW-HnhO;;+aBL|HqlM-#k+MockJ0BSG!g}J$F~q zW6h+X8?^z8EY)MiMi0YPoB-ri37T2co zrefOd+#~zWOx}LPOuU}YV$yqL1mlr!I2p_3!ZFJsmA$zI#be^2H)c+g2|W9psvVv{FFF|72c=w3hR z;jX;xB5`+i2uyl%Wk+WIq1X+k0ZXhss+z-Bd1XA7U%66u)9DM66LOtvJhp^gIjSFI zl)vLY!&_-q-kC>g9)>L0DrdcASFqucRkDExB)&5z>Akp|JG;y8<@wX)`9~XSxNcfF zZFMi2Zp3SPQ$NnXXJyWgEOQS>a|Y%RzgsnNIi7}$drJzJ9o@dNJ7lKH(v0x?nGE-` z%l3tMJO8@ddi|I~tlE65y|Xm8NV(*0p7Y#^Z<7p%-{0fC?hmDT_W!JU+4Ii#pN0Fn zWxVRTsWz+q8hukwsftAQOI>y<>2l23mHv@aV#gBi)8G5#XUkdbnZB_5)4aB&)@JpY z8QV;GZgU>Kyki`>>>&S8nex&x+^BTweeA;ihoC zB+ipt%#xnYSnFhPE46o{*t)?xI}A!`koUPHCG*4DBJFm2 zRqM`0%6tFh=>0hRw;=nmK+)XFH}B`HFur?lDwnL2Psf9sjnZvt-uJHZZVk-3vU^kX zn?cS7EuF7n*|pW%Q^^401S$8S#R z`f{t)Zga98uU$^k#&>3a7nE+ae*N6#KZE||mcsebZ!{O0JbN0PExkcM=T2hV3H{hb zD)+wEO=p%3y*BH#-R95BFE7sLU!rw+w^7ne-y-d0pLZ`k_T|~Cu6fDdldqQ8FR(M_ zu-2V@gyG|@$4-xO|ESc-pV+u3W24!V-h*`x%q@>oRL)Pk>}Q^o!Zb^KiI=U@v=av` z75t9QzPu^-KSP~4*Q;4|;r|vLl?YFa5e~`Su+rSl+Hkgb_YH-ayUc%RZCiRb{9|^R zzELlC%FV;xt1UTupWT{r(bn+2y2_pj0>8hDK0Olrw4KeE*TgTRKI}}Rf!W*4cWx@$ z87p=^%$K!HtYDvTGH9i{Qs3(jO!Lk99_yL-+eq_wuK1qIBH+12g}Ys63#AXjCk}ADy41EytB_aNSJ+2$#*CJg8(x0=%zV~&rc_C!$oJOi^O%$c zHV7VgoB4Wi_u|f`Pr^!1#Fc-wYn`1MeOqEXPx`KEFT)A57H91e$Z(O}$7Aa`F=?-r zt4Hgq&&K-w*|ov>QM^s_Hx*nhy&dlWhSdY7FICYKU8g5ER9#3a?b&l$>7}R2 z?!wKn!Mhe@cWpn^6dk7SBSLU5=e8#j;$@vs3eNfh_wSLjebM!GLw&yjOm@e*SCW;R#&3j@)_5H#w-- zDE)lAE$c7u$6_i?`x81}OuV{Gyy^DI3nJRPKfa0C@KgItoi%&c@+Dld!WV1#_1$?- z1g&89t-8tJawn)S$1Gvd>e46E`_I}%WQWXn%^6g@Rp07D(wQW0<>)+R(G^Y)J2>_& z@XT7ax202bsr;h<468-A?U#{}DQTXyviN1;t1H*nWhLjFNT|8a-P@_KZq|#~KjK{b zRh+{kO`VOLTd#<$v{umlnJ>Nj_VO=36Qq05F5%e~n0Q$FKSRZZ-R1SucKq@Z7xlvk1=wjo<5M5--u&sgPtai_P$yT;1+nu?k09|`Zt`~Fb$lY*#(^5Oh7 zRlyIwG%nxea)JNd8ugUyRW(uSM;jjN9T!-oe`5JwqvT$b^L|AvC1O8W4@Ir~JvFp$ z{qH^TE3^F9R4ur4d!qZEne1#bNfXw(maI)+x*8Z7qIx~H{%HQyqaFoqE6%EDep(im z%(8M4>%KnwfYu!@)md8e`8xLHUu?>k*tO!|)Z^+`(tPIIdAKKOuh}dmKV{h|b06oK znHeT?@0dI;?LKu^)c1)rtMPu1P<9npg$0L~ZI-jUD_^m_M|Ge6u0E^w6(4qntQCy2 zkO@l8G7S1F|MbqzP45yWzjk+NbJ=Nl@M8Z_PT_c70Tw-nU;F(yxn3Q)c`8^vBF$E6 zTS3$+fj8`)*}T)vbu0c3jEN0gc9g|E^k>qh?wEt6Cw|(lvR0g=0 z?~9EYTe(jER#w~|pW1PVP3gd5(>x1t9OcYJARL=RL{bK1$$nCcmN?5X>lc{aD4ZMkb-Q=So42=5S=Cg0 zjt!r=;ruhZjfc$wkFM$0+LY69mCHZi=8Xj|hdAQIvi9tGHose9Q*K+$jEZH~6z@11 zKFG@#pT$)5SV@RokM&IlhhOILewn~0KMobXnjbLZO^`Bwf|7FSe6EFh-S^-7Xz@HU zs+{`Z`9sdQwPoL2S~?hZOcYvQ`r)Il^+S^xMdj z>Z&%YI%SU95#D3@iW^J$MZPB(s;ByE7^S!Cr$iQCnyIco)ms13!;q9)xf@UHIi7zYd+Dv| zIcb~TJmolH$~K)(Sm!AB5$PQP>wi9+&z_J{G^eRB=H(gvt>@werg!y!J|x{XS@G%3 zdA%?9>UzyKOE$SQ=V-!`6H`vy`SH=^BKIqCo`N66rd?A@XYeLpW~(aK+5Y0$`k;Hu zgP!cVW8P&YD8QJ}UA1UdM)%j`q^Qb9;~d5(5$g?SOJ-?!wjF7Y*t>9oZcUV;)VjI9 z|IObmZrdRwckhd`drrotRbRHxo9H*^Vac;V_FKXVybGmPWj<#=YA3qw;qmFujGwvl zR0R|*^*n9(#FZ^+`dZEHRu30+r)+XO{BGY{srTg#pN__v%&*%hx3N3TWyO1siOdzh zrgJXIpUAs1Z~imZBTt_1k7LfNQ2(~zWX`38-YT8xbLvWN7Olyj%)D8l)qcV!F+3~W5h75OYb1bywTkhFc1;4Z-`9_S)$$m%Ni{b0FAuh4VzSJ_X@}#~=T8>P+a~9)D-| zsb5>}H zRyEi#b?esP1BORG38eMQh4S54a_*_=@jC()aR%DwE(L^$dY35wyxpzr6f!M~f5(YS z2kxx69KXwG?}^(?-`Czt@t>y1x!I+v=K7+m@69C^>_4MC7sv*XTME{i7cqR!$a>6|*+26k2}ZWtVl? zsmcC%%HJk0N>cOOmV9P*-1QjMJ_ebpg<`>aUdpd#_4#fRxSe@P?7P5@mAxy=v+LIH zmOY^BT*{X!UnIZw<@yp9B~yib=iND_d!2hvFIxZnUrt&npZN*L^56^irY>{+&#>^? zV+2ScrlxmVmly}nWi%qT% z!i(K*yZGy;Wa`O3?GkkIZzs}+0h6{{~0oWNAk4jzlmJ+LR| zMVzzVKifG!HmaOAGW|M#b}74DyI4px^NpJr%T$YJCQMKWqbHRd8g+p1!x<{9td*K*kr`LPn)%*{4 z41fPAXbX@@DY7{=C2C8-Z=14{hwi7Yvwrk$g;mxZpErM6N`F-H*d!gDZkuyu#Trr5 zSJ~&elib|elK9IRu0K>2z5XYnx=&)QhssOTL_S!GGDLSniJtHeJ5n?89}(Z`ZnV#+?GEJL|#|MU#vgvv~Fzm2@zDnepIe z$-~QY8;=|>m>qagyhEyT?e_;EUAN9{s!NfX7%;))-Lh-3g*%q=MIU${;JW<1s9tI1 z%dGzlHe!t1CuGfhnQb0E^VZo?P31i=jWRDcY*F(rnqk(t_K^3z(=|7;wrz}R%zd77 zGIH6$)1Ncm`^~tsF;}E^y~?NR4wuvWcIn=5S}E%7Vi>h-bNJc@zg_+oXKxn#^kkGS zI{0+a-J# zdkwAGB5kd_(~|h--oLx*l}qdv|4HT@XEgiwO8>Ig_qe%nSI&YNPpax~FIf}(NJmg; zo77)7-y8GWCGI+{G*LF&d_Aye&f>*~lO~;TFP$i^`%X3MR{Q#TL9yG8e=nkcgtEXhSgcyW7GTpGwhbD`u_Qibl}31A|F!9W^QR+?Q~ByfA_A0&`J3loK{Lt zV!O`pUU5gsU+;HEpFhjyR;W37(Q#AB-3^zu$79_FPP3R+{jlptJo%++td|Pp%(%9C=4{O~-BVsz zR;4928%@8&?ezFy_II=Mx?7!IzPCNAE6#fM{MBW(U($qo;wC$J-qzja2G|3jZ^d*=`rz%`0h`v{1?UorTlsbC$(h^D+^x|XGjE}N5F)44ibhLV0EqQ^`&HXqiRMsWbT}t8ebQyCkNi{mII8`!91Qx?hZ|yrFw@)0gnp zZ7GHcQ*WtHI~wY}`(F3*>cXQRCtT1d)_myO?z{R7Q__Olv-OJ<3phF*^nc!ZRNu{e zra49I4&SukH75&RF40pt_LJkNzi@Qv&-P#Y<`^`3FZIfOb0}Ey&*0V)4;o)f|a}_|k2U_6np(S21d7_i6E8Xxe3YpQDK9 zyvL%i7eAW>q*(^v?+E@CmcxEy1INMBwU4Zp{7suwTjf>g0{De8QC7?-YGfEITV*DG(U;SNi?4ANvpO z)Y~Gocw_hyuM=B&+KL}r=zFF<%yQu7+mT=;#dJUW?d|j}E^j|-Jht1D_j%23e&?W9 zeI7dNnL^*5YW7Y%9Bp&!R!8i=>Wa^I#B{zKyxl@;|uQ;*o{SWt#WkR5ZiYE~f1;i3 zzNZJ*$!~~jvHW_l{%2dR(&NHfd;OOk_kRgH37#rQ+w|B?y2rvgMJ_?{L*V=MGsRtx zhPBiutnX@=mUOi1vsr$G>W<{v>!J;s_rGlax9FI=`cid;;F2#3AMW>@*H*mEq1&i7 zs#$HN__BhwSo82GWk(UC@~tP={%4T0mFHV&5Z&|H_{I}8WBCp5^Oihri#_2TzL4w1 ztcu+E=fy=G5A3)pyh%EHnORh|p^a8u(AK@nt~>n5-Tdl(8D~>sjM3T{K60!I3*pf>R{;9)8^3etG%qn=hMEEj7hAvxWCh+H`x$ zoP$BjPMpb|aE?RdgV1g6cIOGtcEu-Yz5JT{^0w$q@3jr!pw zn_f$5T?s1+OIdurEWzkaxw)QzQRzJ_J7Tq=6NxR9gc=f8Pw+J=+OE$1%Z5xIyp z%%o$HpShHe#XG?r7tHdq9!}~sOWsk{S-(+!b=MQ^Yx>tT4T`?nar&lLrL?gu)+uj# zcB19Cl6tMHc*stUjfqp{9k2}Lnk&a^@>p6$#n#y`wzy?qdFXX{4mkG$rh;a?y18?7|B@|WRD^^0=VUnZw>E@>v5 zk9xXEtC;<)>K9p&iPpF5JzgJ|vz~nNCu?+{V219N$7%k|eVfd-y+3ke`Gnw4nMJ?SqRx80YH3@A8|y-@W_OOLbHC|65RgxN5#6 z+x-_dtAD2lrUq($k%|aeekSu(%TBI5i3z+XRQ3q6`+4pA_wNt8k3dCVvGR2@m9&R~ z0(We3Rqgd`x7am1u{KpX2e-dD=0BNthv(Bf(pzHb_ly-EZq3m@#k|Qm>DhYL1@0feyIkJ2%=E|A8*!5b zFa3%TnZD=*!$i?Tg$F#@QlhtU>Nmc={WbB;0fWA{4NI>q3y#{mlg}i4XHvD>v?uSs zb5&G-6x}B1d`0Y(#+0b;OP(uz1)5WrwiZlVBKy_v-d~%$AG{9xO6y*knRqf}`|F~Z zF3_=PY0h#@+B;} zI)A0nUyEPuCzs1`8Y%a0PuTY*UgT=&0rLs%CO`R13_i=UzMSyH`N?0)XX}*q=+-b? z%~So{5fgivV|ovdU)&s%nLJ1Pc{t>DK6PKb_^rUgN!ppKgkEv3dim_=+J!xQ{2^z~ z&0Brp=!)7!#!O$=7-VKyba0*tv6~wIG>={5{c_3$xqI@I_&jg_sW_2mRCv0^UUHxNgu8JP zC*&>qMCTn2^IpTQd8pgK>37S{`j?GioJ|vNONQ+dHaqoB`NQ<5?JE{+HhksF$-ZW0 z0r#sTClBhGeR=H?JL&Wxd(XR_lP7Gv$}gUO`;klMhY8E?whIgOdL=8I54m_tIroVB z@tv8Ee?HpH65MQ&;&rQp|8~?3iJ3E^n6$c9E^GdMzKk)##^<|T-MOFUmhKbHqzzY^ z?qt!kHk&UK#m?hjAbMG`c@%6=4&-_=~iW?j@OWqk>7VLk& znD1!V;aN3XE>A!Dqj1r7FUhy;jK^2HmS1D_T^h~D&BuM|Kf}#gj1j#0CA#keo2 zA$)$))+NT@`)AY(CQ7zf$r~Ow+4x*MEy%lfhD{RG}}O6TR+WSDK#9*`(X~qT*Oc_1mqc zpIPTlKK|+QH0zn7O-E;H3udmp^q}O-mZ!JRaFjgqzx~PAl2hqr`EKqo)+xUe2xzFbFxv1-5ymODQo2RyG z+kF3zNAvzOB&?5o#eZ}9#k1nNf$k<@br-|eajRCh%}?vvwu?zlN^;Ssw@)AUD_;G1 z-u;^Hs{Fu*r^GpC&B(7dRQA?Yy!Ot%&ft^g;|&TfkDUvRl60pF%j+Ng&oEDF4f~px zFW%m9PCgROx47bV)`LRlC1od4%6ca#ZQW?B7tVWR;i^BsA9lB#s0mFwx?OKZS--sf zlz($#?yu<$sGh8nS37m`lqHvsKKI_;*M2)D{oc3bYgp44#+&#|F)8nw?fXiV_iX>{ zoAY z*WSi3>v77F;_Y3!Ejg3@co*l-`smSa&zKqJ&QyYHfN=KpXKlM?H8SA zfBdkdxBBd<5gS5wwmoi?eXM65XH)iLQrXXnhq4bI)tFQUKD-eVFK@_Exxa6RO+aSf zuaGk{Ur1N6tl98ID!b)%vqnQt%gl)f@4Dthzu~jzy??dd9?iw_m0VH=gXRY ze^~RZ@O^B;e7_4vj>#o67*04HE1@xSqPQH#idnA<3xEG-INVaWdhIFgCq?o%zZ-ka z_tsgjfJA7t)IwJe>?LetN-H3Ywif1n4MAP zu~X{x-Rg?Wp13K0pSt}z&vbR>%pC!@?6V#}adG~tnB8Bf_C{Yj+ezXxTkg^7L$Awv zkJPkGzPI1-=$W>8C%&Y=ST?J!?{(w{{k)V;-HVSl`0Od0C84!vzNhH7&)d)T>wh-A z|9Ixj9L8sF)`!gWUuF1eb>h)HAG7owC%2Za=ZagfFX`>_>HjKraxJ+Zmwe&W!=n;g zzH4svkk#HTd!c{At!WEh|MGj2^_z+PT;a@3(o;nkU-G#X1s}MtBwZc{I`^V!*TP!W=SNbn`FI&G z7r7O&>#v8j*!`pssg`A)AC;bH3!bg1wye?@`cad+OIuj5iD%X2&uTm?%`Uxa(Kotn z)%LKknD^Cul2HYRb1I$?WZFCQT{Yn8tW88 zd1G6~-E(Ia{N8i)_|N3su0rN>{HILgy{*yuJG>W(OPcEckPA+RLw}bUH+5SsOQZ=1wRqXnSq1U3(*a zP5a!>-xo;vvNd1YB9!#>#nLN27Vl2YR7hSp(c*&XLPgD2H|~Dv%q}yjxGr|dOHwZF z+|1f*k1p=tI4LVSN7!KHhbIgYTjuP$z*)G>N?zMzou7Dy#m-40UpaM8tqxRmUKz4v zrnK^=I~?kXoMuYCixggX%xb;yKxc8Q+-#PFD{ZbGGETDtoW2T{dM|f~A(;lVUf=*(a|GX}{SU6Y8HKKTS-0!(`p#S{qB2OV#Q; z-uF4*=Ce}Zkv!efPcNEQXT5S=#}mdXHRZUf;|l+Yar4fY-B){gQ1*$X|C!Ap(VQW- z6HG7cUwZBF3>k+13>gb|8O>kwOtR;8cH$;+&diKPi|kc5bN@5&vAj#seSRX-P7Wjjjtmvql*5ZFqLO$E#PRPut0DU*fx6>N!yj(cZzU1W(M?J~*l9 z(&OdI|2(^`T2hM4CQNDf`C53e!ujFbhm8k}_H`lSGLxMR|a+P*mcm1CDY2C)ITj%zsib&_*HoCv%iCmuQ4f}PR>$I~NUWKap zS+|RMPI&h%|Ffg(`#*INYtC>RF(;gw@M5Rzg^C`J*3J6bO-o`fyL`NAa!9oCJ(t;? zhked_%0EtU{m{w9b^D)A^t4yJF(+J426Q>h?HAVHTM&8b>eJ6Z6W6TMIF)cI^6O{Q z86V!V-7S1Bx_OJV(Z(gQrcajG{@F2U-^QInRmvwHnd+R>TFU3sR1}?G%Evlw_vRb7 zw+daA{vG|{+mG((-oVA1Cu#A9EcLL+nG)0~kmB+{O>d_%NA|>tKDV6JTY<3-6^qizM6dI;rOJy|MF}jIe{JT zwg#BSa=ZT2lh~8naw7O#77d(EFb^*PZ3;d~#re@M($ z^qkAn*8O51|Kj%^7owJ3EWmPGn&Se>Ml&} z6rH6bA#mV!>Fn9ur2+{JjdlH{uVy`{D82U7^&7i=`mOf0v)4M5@XYNG&^)wFeDAbq z7MAPrY}5N@Y(5w4xThmS^0nUO=TGOYI3#hl?{L16i+FXAV!#zv)5q&S9CLbZF0dx8 zuKm;5V9nsjvwI}PrKM_Sv|Q@D(R%Kk(vE#>&mL8jJ?yPZS5M*AI%NO0bcW{ES-f(r zd=(bVx`!s6p5DFay~%$Dz17b?{rz$F<(%{xJ=-jw%=qcPN@>2*!OT;Q=FhhuF@3Y+ z%bQ!L?;TUCsyU|>w}|Ln&)3ylDTF7(anc{T>bo-H><9-?o6IzE2(;FtK59w8Tsahk7nJ zdGC{V=9*f9TV~aN*OOO!e4^-gR-kQ!CdCg}iM@RXFOs#;3&1@ypq> zOK+Vie!ytD&Z={jRz>8qzZNPASJ^h4E12Of=Xois$!}WE*1wk&RnIB?-d(*Rf_*~J z(vUSFaTOiE#1>6Xj_MWRzh%1YmZ!s2z3s6advgCOcRcnM`PVm_^#uQAC;L!=WmadF zcPu`u=fQSA@7|XWzyG)t3Or*E&Q`H@1I^9Gav7Y)oEz0uSJz$fIwEU3Dd@UaBySLQ;>X(m)Hp{KvP;{#F0=ZJh(YdA`$PVNxy9==%1bfsHevY34Q|(daDwxc4_B za?1OQE@|7XWesOL5_IIm#}xP9u1zd<>Gq#>T$LsGm-gTM#+SCzJnp1W~3_woqY1l{F)P-{sE;dmnYpU-6zreL|bd>Y0WE2JzcpTFP(C> z{Ao`=x`nSmNccpr6H=k^?PT0Qk zN0f_Qj(reE8tb_wS2dS>7I}4NqD0x1kMUQAaUe^1{TuXksH>A|DMAa{| z>qpC0+mF9RewqI}^Q2Pp(@M!_uOcV@J(790c20;<^zQYY)3(=tn0<^t_?d!f@`Yy6 z!!w>-s(RM_d)iOlJCpB!PZsrgmL%6YQ@oV@P50TtiBWc9KmKg!+IFB&*X&GCPEG~a zwVk)WbiJBCkI&1;Bi-2hrn>LkhdI%oqfe~6+_q}Ijn^}7=Je2I*0S@aFu0!X5NMwI z^4+o8s7I1>p3M(f<26I2;NZl&TUpPqKlC;@zD!**rfZ#tUszAwl@lD>3+~vow2H?> zu4-R-a8v7~318aQY~3U-^mzLuj#btBDgw_q7HO=vQn~CeF}Yl5m--p^zWMdFc5^hF zt}&+WobYhlamO8RZznxYP!fApXFEOa?}sVEPt^Wh3|V%#NT>c@4tK+TkDrsiT)*FI z5g&2x!&f<;mx8w^{Jxi{yKLX0xY+*;N4vEPcXa1Gm6~>vrQ*)7Nfj({j7Covl>f1x zCmMQ#SvGY?X@=OYv=kS;39U~vh1r-^tz5VFyYt=7u)>{uBH!g^eW_T#hvjmT+1=8` zOP7|n&HK-=T*$|#b$R&-SHm^skCJMb?f4ShBPuOHzR^8IRi_bh7!}6A}7ryIQ`O|mP z^94Vzz5D+1%f|!%8T?$=ZB)PH7M`{;s!Al{_`bWUFP(0mYrLiP*^Z-O?&-K27pBP_ zI;S$PKO--dT|Ma4)Y#dpm<{Cme=ju1kJj;Z4W1`=Xa2;;{}~n}Pc7Uy)pdCfwP9J@N3R9-Zg2K_e@Qycer4P1l*fH5S(iNI>@v%Iy!P#4ukJqu zx33!LENuUG+3E9Av77*Fg{>DGb};9zQre}SueYO~b(`IWP#@7#IWHvBrd{bcc&fNR zGWA}z32$A+)%dy%hvTt#wOw-}|RsYtA3)wzc7>uUzj~x&G_H zZ)FEKTa7Z_d^Fcp&h|fKap&cn>GS(qpC+F*c==d!XOMi#9gQXTo+ug#Z3w9}|Fz}g z=hzRAuhyT42zu(RyID@-{E{%aDZ;i}*GuuP-=^YtzQ6AHzZjn~fyk;v|JzKxJ1$Jv zlw=lNyL9=F2@{Spzg9GnWXe6Blw*5ro>0nnp0^6|3rpWVY&^p3u=reCKKJIE6^8}kb9KmTJvQth6_1vy;yR>QtOUh&KjDOPA^3{6^&m8;S>lY?hHsAg9 zRjnM$$)9d7`eGUWR?=i&+3rOj1Anz_xEeUyVO{Evt?o0;6dSrP#S|HEFnIK6D6Cl9 zY}L;izxd&&B!z8B?zXj_3l6UGU+GhI-6ieJoCI+Jn-@DbWGrgg);^JC!y#=prOUU= zQY5E{FYu{-oyKXt(8MVzV~LdRqu9AmOYdLi;*5^W()HZ7_}8`xCwKa4TBp>1Yfp5q zJK}fv?!lbKx`h{dN)E1L^w{{~yPf5+=rj(0yM#BN{Jv1QMMvSUxT@2}onYo2(w zblTG5%OW;MUC&LKyZnqyP50pse?D|p^0KZwXl`0%y6kZ3%f7=sx@Naq(w=j>PusQX zUqr9s-lh}3teZ359r*kF*v(rOt1c;?S<~&@?W^f5aXf$8i<#nI4z8KNn=HzINiFtd zW$w(#Z8d3*lL{v?*>AB+{=z=%>aQ(FPZn-t-X8IA;$Qv1;=R0Q4!)}V@+;ct826?W zqp+SsrOR%oJGx%=m34Zwcb~Ok)t=nC17R;6H*u$mrMz0aHGG$N>vqQJTeg{XX06ET zTzab}V)=iDYX;9&#uaIoceZOUF_&2qk^kwU$>r^;0hd{J2(1uGH!#<((sW~&Pro@Q z<9X@Qvbh(J?EY(E7Q*Vc%wwjVPSsM=XC8V=!4p?7noL(){jPX_Q;hvIky9qI>r-PE zzZ9}}j5~eaC;O(+nd!>C{kuL0EilVix@h_1$DdUXdK>ySPJXU`Q!aV4s{ORhihGKr zZ%cgUxh31bxNvQCeLMd><-lpr^d0M_Z91#7;&MiVYk*)M+nI+YEdg5t*{&Qt^!EMj z9~-_tQ;b}qHa%2sje&CW#ryr=%KM!CIt|yZQ)~Q~xvFT}{UZ^kpEy!2U%WaS6gfj( zb(YK7la7iTc3wYOE&Iyzht1M5$tMB_c20?SE*9)}>`7qDtgH`0iA#L*T`%wY?)RUe zecKLkkuzdnlwY)-_n!I6hCQ-!m#^))RW(iA+aB_E&7Eqp=j!U2O)|#LszN z6Py^6lOE45-zHx4XT#D@yiVo|-{x+3+ro2q)t_+vyYCMdUSl>)-0>uNYwe=2jt@?^ zFY11O{ZUV1P07<~HG z#PPG^if8cr{#=6yt~8CQau&jc=Wp|#v#_qIm^gj+yF*_T4Rzg@-BLB#-oo{q@A4D1 z=0#F!+ybXQKHR>^;jp<;bdx;i9q+aG565ULT9;08UcBu}-GV2Z&imP3+0%SK?%&_S zI)|qlBBx#rUfk0aZEnb4x;;|fQ1bgm@uMCcQn3bWwsGq6I!^bP`{K!QCEe*g4eJD- z?6==*7`NP5yx64o(oFv9!ex&v9_&r{+4|}7{L=V&R{I=tG`1PWPUZbqG$SP@XKvxs zC$H4zFuT0Jf4+=q?V2z}y`8f?gDTfGG@jQE(V-!yzy$`3KzDjfIE z;?dO?1(Wp;YQ>uAu*X&I-El6X_eIRw%g>+2v;MKj{nPAk@$ZsklS^etRL-)Lcboj) zefE~QRdvSomqIs-9NktOUo5wB7muaenX>IMs&08D@4{v&yj}1ke(A4A=kC^A zeEZmZC9`+G{PNk!$r{nx-Hu8kKPLNHFaLL0qSpEM{4au4ubB^?GMUAz*R9oSx752o zC~;!#w1bnfx9Ez;r#bH|TCenLkD>hx=IseT9lP1}g4|{a^SP@%72agI%dPg@zgtBg z<8C+?v#sW+Tr_z~ijj{)l_aCL*i+X$Cys8v7~9)dD}HZs9SRnGEyH z?&g$BWlj)xX?wlJH~9E1wWaTVCKs|jQ9mrk*DGGn<=Vcl8dY`#8x7%uV>Rm_E8%^(5243+mS|oq| z;eiy#Q?nIcewqK~@4QWS>KnFYxn@jQdu#dKuSwdxfs5uV>tBm$yY3@fZC6knu{7gb z;ML8pAAW!Mn(&)hFf*0$#OIehjqU}VZ&$kM3Rj8ewqEh&d&{c#=R?$lK-J9>D~!W3 z_<}`lp6y^M*wS&8_4L%eTSPBE3Ki1-T$tgtG-kf_m8*DflP_E~!+p6;+b zF>kW5zJ=56NgkFiO7EB+Ej^jpwaw|-w%>kXy4x#TpS-Cp`NRI9a{e6?pY<=j{Z7qx8yVYHbHtjr^h@=z3&|u zE_QWtUvOF#|GkD{`6YLL#;5gNjjM^7$hN}NvxjrVh9?^w#l8goICa z_;y)($lN?xGgl2Roj$F1-(K`{_wt8)o0YNOCUv!x+@_ArvUgA9oxj&E(z1Bl!!6E| z-`;9RT@Kw4eyaQF?IXN1wwz5^Sv+U{1!ilzV;&+86bf{A7f6e!%Y6K>rQibh%>2rW zRXINndRhj>7sm7liaSLLOl#Yh-&nCo{O88w3Q;bnwwgq)^qsGgd^}-hNcY)Jt6N=d zEnLqQ#BnWMrX;Y@d#%S!iEYwJdXvreWR`Y*tTxrVF*~Mb!Hut{t|e+UK3%D^=}M)t zX#1TBDSW~y8c%yW7yNjAXT!F1))dAsjW4!Mp7k=Hd*OD?gAdg#81~t;Y+*OKcJ=av z^|QGineKS9YAL_Y^B1N%?JS#4e^B80cK79DM(JEZ#)V>^a#ty3uTcNbptJFX%59$L z6OPL|L*`Y?P0yRX|hIq#(Ap1l3^)1f3rY7YPq5LQxUkaY?~DC)qI-IZ zm5L{=T-)(x!Zz32{Sz(AW9zb84J;O3iE8Lpy6nKD$DPM|Uob|qs9yC*;Hwo;-3n22 z9NsJ4xuc-3(AU(xe*ZS*G}l9|Ct1aqn=UWUZgP6K{pklM-_rM=kHnm~0GmX z+wqdEKf10s>6*U%beVOFqsmHQ`95pL;$Z3G$q6yuFAqL(evxOya^JP0le_n~|GOh~ z*+19Gt!ni1@OZG}b#wJG2Pda#AJxA6*!uoH?@^9R4lytG)|5TkDs}l)-p+lhM?8b$ zmmlXi=|J*^HK$k?{JNl-ay4+qndUsZ2i_ZN zrnk*n*ZnB`+aHT;PxePqdEGNRYS-AF_?^#F`*=r7o}TSQUF|?_k*GYjg}(CEOa+KDdIjqk&8=h<{C^A9`Wt-IxUR6{ zs`&E6xtnMF-M#&z&^@8+v33!f-jPha$J4a!v%ckjIeXl9gW&eQ{i+AAPS{ZS{POq3 z-->msHRtlqUUF*M_aMIdP5CDmRZkZCr5lvLfkQehXX)kSUfpZWDGK{8oG#wcDZ6A= z#m{i|sDIfBr)K&ktxP|2J!&TJ&4=!7J_WtL{kwj0)^Pn)PrLk~WBsF}Hj_G^{PB5a zqVaD_*;+QaINKK+qi?c%N9RgM=k$E>b5bnZUcOsm2X|=Ur&KkU73)kZPbh8iadSKP zUDwX_Vf(3QZf$m>TeTi5Caw1M?Nv)zwdCE@89cB2cD!uZFimRt#8Cb-o=bNw7nDRUGZ!ZDQ}Y z#7zD7Mni>z)1G>@2K9$`hg!Vb`^0Dh6HCf6YrjRA%P)7Vay+y$NNr|X@UyRWYjrr>FF1#?QgEXY`&WMpTQ+= zid>J$cO_wy%@vn34@rONdmt_-DjedXckPzMXW8QA`(Mrs51;+xVEq>JMN!Ka1yx)9 zvg>i+volu`f>3#iJ1OdLcsxEv%{Bt zR_mC;KBMg8hY!at7u>6J4$rVFp4n{ncGp$SXAz3x6J)HTHr8vBdu4A<{AP9fUg@E$(|Y!-nN6`oVX7 zE5uW;9l3h&{cY9>cidji+%%ghL~fPrCDwyE6H*xOTvXlOG->TBKc#}*q2C{+Xk;ze z>}@VH;Z0i1#2|5{T|Uh%d>h*GwpeMX?rMCL;{0&gcF}F8SH6r&{UnfOA-ebdn^m0; z&lVc-EA<60IC{F@D{Bs;Z09*kt!w8V?zuAMPhHlom#xBz3->Luxc~R2_>9tTGYoj< ze{1>Tt!;n7YqRsW(yVCK#%|EPDNH^I@K-Jx5H0+M<7}{L;@ie6A^ZC?`|d z`_!Tvhjxf?{_KAE?gwMJ_rH%D4_VE=YHGqZTnA`wb$={Pd<4v z-q_^H_N6u_XTG_kWLD|Dzy8PW2h$BCJ8tHMl(RX;-92krT+dnl=|6)_jFYyQOvSE7 zj(ugvXZ&a2yZ`EKV=A{%PrF)_xrBv3?;eMIK@r6dzpry-SnO&1q;zVk>`VW>61|U< z9NsCZtgvaKIQhBPS!bdL}hj3{xj77+9>JScxA=o z;{47%{@YaaM88Y#Hw^vHa9OZ*a%o_^q{wsMhlXlLy3e>Mm1lH+j&c3Ovc}`Am-M19 zm2OE)O3#m$z0z2I`tWg%BQxfD&6K~ogB&$WcD=sMqo&%H%D?(%=I!@N z1v#IEQ=HfDj;~`^XZ5SPuoK;8k}8VENrf25ta95^1A5K74ug`?Mh zJY3EBn*FQ4$%mtRADMG>#tI8PY4Ci!d_U*yHLX!CCz%!ew2vt%=?P5NZguyyw^3a8 z;0gCsqg2;zj+f06^QNseezueO$^LGZJ=570CQr?Hx9#>f>4wiqdb0l+l&;;if8H4; zIYG=>)!r!ZNd(JPlaw9FsfV1xG9TW5eE3J{wZ?SfYywhW<=G^XS6f~{t+_`L?)!q9aYa=#YeIsxtGwei3?e4o4!Yt8x61P;;?6`LQ zc+~Z9Mv#s0Z|BrsbwOVi&S(mp=|1OhX;Yfzir?W5oGuq8zY5l6kDq3H-F4j&6VYuF z?-Ke>T)rvcE1S=k<@MJjdU`Va6i{BiEJYm%DeLH)efUI*Gk!zPjv{=_RIdhw3i#@GEvnUw9R5;`vPQ zZrhKZa_J57W&atbx8BN{cmCIpY^I>BqlG(yR8508YF+AuE&X&_U^hcq1rXqdflU%Y42`K6>khz|5fv4 z>x-_HaydLF)z*6Sh3$1&kvGTu(WX`@M1M$(J9NPkyJl#eQ3r!~XR9 zH5d40!pH9zc20Vdr)Fy+L$H#@%^?- zRUf&#o_sL%=PlTC_(b4ppG8kkrysT3RLNbK`R;_VQLo~5helJm7dN=#od4Kx^PkB2 z$?jp=ShDi1m4HlH!i1fw(;8al2Z}d*+avB|cfEePu#(A~K3lV$*8`(6ZfXXpZZlqT z!rCWF@T%i_vD-V6@8%bNuDy}8?NCx%tK!a?se7-dx4iV-xn0gs%JfaoQE7kv_K(@k zY8qlo4^KIA?D0mm7rciGmTc1LWVG|kUZpqh$&q*k7K4|?il;OKUoCieYEqCY>-5+K znG2NGZ~nmJrjTWAWEJLkv+bc-xOAV1_|Bi-jwlvRpDmwK-?HP6Rn10@@ROTXns`q2 z+018b=;Zv)@8Uk|cYAZ6>|gG%({4`SlaIT3>=vw(SP?Fr;Cc7$2Hn$npE)~Tt*E~& z@%fvC);Yavn_I+|3Hw$%=5D)Z?xAbV$?7oC?r-glJ*RY6)jFTO#Aj2P}E~9+nMv17MjI|jsHZUs^d7Lw zURzy!e0xXv!Se1ji8*U6q8qL@{b$f*;)^nSv1k|j{YCY&Ii9Ry*{Wk~#c3($>{u*r zs<3v$jzt-Jch+p2x6kO*l|7H|Iu?H3rs6C5^vy|z8u8dV{&}pkBksNnpYmBH`rW2- zi|1R-{@gF`|6$PGJW=6rQLx?hn{OsJY2BapY5&oB-rffrZx|at%QR|AiJZSPOJ6i7 z?RERD&vF0ke1yCs4)xgVd*HDv@SXLxz>_Z5E{fQj|NeN;<*@gJ!rR%+X@;$vcAs*X z6aH(N)vD5vCkyuoNp3RNc5yu4bye2#)PnUGmH4* zeyPIPIJWb$pD)x{|9d*;E!~+1*Hs;mDZFgHe&=>6BR<&!p7!&) z8}cPKxvSWyoIF~6^WCkNX7x;J`_FTod6qD1aZ7`xy{GuUiOS_DK3hqpXclE=9};}!1wIdu$)J8<}SRQyR_nM*~OfE*^1XEOpj)S zo}9#OH%&FoYs;Z8fm2mlsAWmmdqTxG+<*6< zy&JkbUacs6v8G$;^?wGA4?Bcj=UqR{u*tD~R(*EA^C#m_GyNqtdnM$wWYQy7Mi*KI zURcN8D1MYVTtDQ4xb2VoFJ;d6{W?5Z&R8lh-dy=;9rHP>2Rq-oT@qCJ_cgR+_otYu zXTQwE4R5ffz7*KZBJMf6Aztjso89TsCnn#q=M(R+nD0}R7%H>u()V3f*-I}8+QDANegCMpfX?}q`!nV$&H6c0)clsw>ooOum+vqC>2}Y&HsYzrs^Aqt zx6hQ+^{&bf49fEqFux)x{Xl$jb#6I#bF9Vm))zaxHhc{%vo)Lay7y?AWKY#Z;hf|B z3wknRwx(MDwomI_FLF5CAk=%pQ_(1yt`nsb&d#1$ae^hb^y=ry3ubXGnmT8y{ocb4 z#dBWzPE?xh`$~aXINQ{QMuM zx^>g%|7XbNnLoKG&vougso7qYdk*_gI&sFTZnlfQmF`TdxfA^7p8WXOd-=PCtB@%`(RRL)w&OQCB^f5{P|{nqsQN^(>p}joYo60nc{tu)nQN@>;<>%SN22V@ z&Op%^7w=r|ypP8Vq&FUWQ90Yd+BZ+&;*%4KH#Xn6=OcSKZAIt8=-U&1Jh-~G!QPm4 zLYnsGvop3z@=we&O0jNfUfGnnT-N&b--2onx%RMrkCzW+{%%U?=;e6(fxpgb&1A(h zuP&UIUBunz+{ZcDi0>g^lzdl0)Dr!wi#rNW1;#!0?LDO=KK;|-BE^FAQi13dE$Oop zkFg$RN@=}#rF3hsk0i^VpJIDG70;9iFsVJ+zV(fN-#n|oN7vUM?mK;Ap3`$n6r&FXvS!e_2$^W{35q*;O{z z*J{c+D{paGFY3?#PtM|=!)bwg$|;G0GiI9$A9E}a6cj3dH|u5lYMTRU{Sk%NPFZjA zox4%6(EaqC69ENx7v|~)zB;UZlYN^epXihON3?dm{P)I_T}>r#p@^hf^TZt&Iyj3y zZV9!Yl-OfBS95cdPGimzfm#wlR!?*A+05Q|1+F#p3?G4vDvZU zNb=bfIi(Emi4$Y0GMaDQJH7IeZ@{ZcCT7pJ055&+ICU0*N{-w4rB`i#sIA-ht8r)3 zY~B@mUKVGz%Wn4yX_vh8P0D-9Nr9=g2j+zyGR@hQ%^oCaW3Jr4_}BC$&MT!&-+T?a znJ~{T$9+#!+v&z-D-`Z;{qv{r#m`Ca{cMif_hg)^lnaT-p?A(f% z?|s9CmGmdHHMz>oa{N6}vnVFV!!p9P{QSpt(g7QJTvu5LS=lX-;ohzEFzjH08} zl;>+^tIcJYaAbXYBI|axSr%6H@3%?XJbQe#|J;3_nSAbx&U*>1ICy2RckacR?v)3G z?<9H$u*M2Udk4f+-frPiETbymYv_mF~E?xVdE1jOFvntuFMxco7T zrkt$ZiqnQs>3P9=`;$}H6Lu---QzHOxkC23cs4uNNtO+K(o=4*_V{2O}^%vdhMw@_uKm{+v78|tg{z>yJ-DjgPLR3lxI&Z&itDp zJwrt$UvR?RRoSav%##q4F$tNoCiPTv@ykn9mQ$IMTrw9im#9bYWDKlcx~#jf@Mw+S zGxe)1sm|>&S>9*Q>u#5;p3!k8$2-vUgZJ_NyLr<3if4Bnn)t-;)T7A4+gz?u-cD&Z z-+$D;lOOExK`v=O|MY!wr`NfdxH}iByN640VSp(C@3a#rlJDs+hJ$uSyh2O{J zZ^Y)$$ThqoEE2`K^vS%oFY(6LF5WcQ=Tdp(=^vp>s(goU-@Pw#XWQBhm$gHrS~TA5 zXMEoAxI3xQDE3+0`Oy9G(jot}H(p#|@$$XLk2!l^bLWR#y7IR9gtGYAd((_t z)7B~{CH81{B!5|7aLa#I^~bQ87mpu$Y4VF{UdLZeTi-K17gwnXB7A+`Sa9MxoNHiFN|-lrcrOR1`AG5s&-P{n&czi8RNyKaEPS z%3~4_ig)Nd*>-=`?=KfVbpK>X)jGa-s+pxz!hwZ*v(0uclrIvsHF4oDW7lm7)iL*! z5Zfy3SD-)_`TKOWtpud@3vXc4c5*75U}S~4%* z^1h4uox8)V@T2I@N31uTw=0~IpSY=_^HF&zht3^Q@5|fUI1AVVO-)Y%~P)2mhxb-f=x@u znW?7lZ=d6A(95t|a_QdG7|8`ZJ%w`~r|{=YdtT&sa{a&4%nM)mF!gNbQ3+Ms%5$e( z_S?xN`{y?4E!~xUH#}b`==%4Dd+xuOTZ3jQq_Ule`@H1w)6_rPyz;E#<$ZtE+%gsFauryNs!&mQ)pr)UHB?&sUOS1xPLmy4KP z#>Xjgd~SQf%n991PVyXG^)Gwal9~;_MDo1td-<+1(oo)Xb;lYj$7MHDZc2P~xTAJU zmUqJz_du1nr)?W@m9jERnU!pmjb5Id&e6isvFGld;1@?{CMa%JTfyleow(<-5lhG9 zkG#8n)c%m#aP5=C{I`lP&h31ufAOzg2v_!{)mMUc8GPj5JgLh{{-Fx9|3Asv+U-{9 z){mz>j7{%4Wox(SqqXMrITKvBu{>8QxO649?xFL$9p8*q7^7a!u9u#sO4JzE}VtlTC>&v+*PWsxuc?$lI)6|qs3*MY+mLBluc+2d< zju)5P^KMMYe$aVeSV(N<%Y%nXSGFmfxb>bt#buuPvRMzk+$3gC(Q)r8kybP1^F6%j zFdOzgte+KcpEyg)|@yuVH7DgymXRf$ioxjb!pic1J-oL++pWj*bSuSg9<<6gv ze|x7L`+R=stEWoBXLkB7yUp61!V;C>$$I^$NNDLU?>@cmBQ?Tux0`l7$}60_;Y`_! zwZYjzvnpgtH@X$A zWccFkVyC@NXUZPVzuNw`BcW<5TM6SM#$&IKIO+<!Z`Jr`g3*es~^2AjEpkB zGEPvtyKTaX7rVaw-f%UyZqv$_r*iFPPg!_;;ia3mm-9Vaoiy*k-L=P8eTu31KJ(00 z+cwj@r^jMmM&^{|W(7NibKX66?&VK)n~>-~g}!sn&#CYVE{ru6x>NLU|J2oG&E2|% zJiKiWj!$tYD*II~sp$4oqDye{f>|%`H$IGfBd>78^318e6QA>!Cp`~wdMNbaec3+k z-Tz`VS8sTqcv|Ir#CQMA2^))#+@I0w_w?i2qjqoj5>jfM%w01tb8kCc==IK_%llpY zEH$Y!j8{6=o~S&@8{u@b@YmYi-lf;y#kfj$*r%60o*% zn4R->#}=i~5YSG=`=Z-ki*DG+`l4W6nal(s!L3K~Zg-T3Zg=(PmoZmapj~8@o47r4 zLXOQx;d7G@JI_D%&id(6q;!>@(SJ?c7x2@n=Ei;#SzSp&kzII_YT#Bb%T5RA{ab zHE(FLJ#W=twA)2>d)%$M`qdnXtBzSu{1xxTA7ZO=#^TCNe}T)HJ!YTPe2?aq8oRpe z{MX{nGw+}Eef6EG!jJoeRctQz9O}L%u<3SR$=OY5DN!$)+HNWB*u{Ro=cMv) z*45%Gw?t0nE@^zu`7qDH{ECNBPRohiuj(%B{vpPG&2Y0!Tc6tLyNokBwpiV%3yeGY z*U)#0Oi)YV3!S6tvSo+ZIVvXK*h9vwwIvY^9MQPi4>XRpz#~3F)9! ze%)^!er*4}uRM0?t@_8u%XYAL)taZwur#*`mFiYeypY{%X(eHPI_2G^BmPEF&z9Kp zZ;>thBG-K8C+8EBr?KtxUOCxvY;@b_vOdqqclR2BysNu+y?pwDXj%Nw_`&5B;(8!P6&Ge6`?dq}`lhXOCL%MWK7r8G^I`xf7I@5T}FygOZcFGPp9 zUMaP^mAOutB`zo7yjoVU(~F|HVsSgzC-NR)TQzsRe>dxjxb)CND?BF3>3TeN<#_+) zZ^H3MevPxxU1$pMSsAjPd)teJA&CEnoa=nd7&e^Nep! zn?C(^;f+u0vR6+K^qF=z?p!hd)f3fXHs$Ke;>!+OJ6}AnenN1G6Z37E zd9Qvu^9tDB|IZ-tRVMYrtjsSWaf^3qO*^aDap25PUXeqF`yR=$tW~HCxw7WS`-Q7j zdoEapNql{@G9Zyz=TN`iTwn8@f`vOn)?DQ7{m*cEpU#PMVedt16>a^eMojeVR$tM| zQZDm3&LuoUX6=rxAx93lvE6-96O*iRIm*DyC#E=T&S#G9wplN~Kjxp4b>NbW*h8Dg zYjZk0WDjSjnuBDjALHT z-t;&3%&vFN%T5-&_W!ch=&_5+(?emR^H)0_J-W(gb-1nXHCwsVb&Wd$@2G{YH<-3h z{<<9BKj$-7=geB*v&G2DvS&(}`>f8p*&Z(1%iBvY$J)O)XbX{^c7)-~3Lm-orE?Dz zUT2xJp=EdU<6i~5GkUx>t2wK41>Si0vq$dQC6Nz$r{;bXH+;2l(U}WRjZT)F*(BZD zcjKd?Ad~*99d#aj9{g2H?uS}loA;cx@g8F(hsTUI0eWu%3b%sI*><(6F(RNwOH?lNZO1s7LF z?^JBKBc1p7=|_2OCC!SopZx=sFF~Q)5Vok&J&5m9LuFD)e zw^#qTTY6+x_MehNQI3y%wHC^0_wG8rjcH5gITa^!v0X3rY5r$u7mM$i%sGWw_EmE* zTevkx+ao!fwI-Xl?|N&nLig!(KwD6&wlhMwvW+`p6CmdNY>4veuoYK=AE~VUC zpDxdyYIyFS{RU_4PP-|_4Nq$(TlJV| zoSVcet$O(MVc)jTU(DvDx!paGqO;56@rmrL9qNB=f82Rxe&gw}bdxOWk{MsU4hQtV z`FLdEtd6=52fDu4YfV^K7;k>G=hWS2%}>^e^;=Gl>AGsc5R>CE#YIL~zDRFH_xppJ zbCfn*xLx2fukW(W(T(@i>ZW|kUZ?daRQv11gVS!Y`5o`RWfimb`f-keqH8rDdcux< zUn{;=(cfG|wpvZ#J>=rQ|ER$p2VHkXE9-(=l`j@WUpSiYxOLIb%tha> z+K98X=moqAsDHI<;p{Uf#CFwg;%@u+ro7=rR9CxZH|P5( zB+I__?W*s=ey_GIlMrLqckX%<8Tw$6PhL%Q^o$8*2Uw=G3dZDb+M&IC`me`3WVEm9 ze)aC(_1WOp+I_vp4mx%_UFAJn`u^S1{G%N=WE}ZTLM2z(DzGgPQuVlGN4ksJJ>d;xK%J{#%S?B`Z?lR2y8DeAWBgX|NgkCRMp>8-eXhvQ16=-!X>SAD5p zGi#2Gt&PUJ&?gK&t$f839x$svRdJMF;}X5~`nfu}H-uW1-o3pF!BKwEwmz%`n*KmEYns&#cNU>2g%VXQOdrN+^HR(wWeojILuW$>2Sg1_wEI;{~2!gDs4=%t=by0-hEZrgHm5};mXMs@6|bc z^51E$y`!tW_i@*mb7E>wf2%*)8Tvvm?6a)q%qi}R?41RRPdF+ZjbU+heY5x|cifGG zuVc>l{WA<-w^3c=a&h{i_1-g^FIqHS*DTqY7x`@GiPf2A39Enbh}~b^r(cjVr{$Bl zMQQP+oBSF_*Ll2J7^dH2EQb2sj_MxSTr317VSN%m~+ zp70-*eNRH)i{2|*a^aNF=F)9*ZmC=R6qxE*KXw0VVa2=8b&sxgf2H5^IYoEdx5ecX z8jd~r`9RE%t(`mA%{nyq=I^_bhTAd|8aWrmESR-?)*ZKi)2tR%63^b8TOcVrciFj% zLKn7}zS&;!I#1WWm+RGm>#Ku$AGRMz5#J)+(@`GaTKnrqvSm%4Tf$MDIbx?)SQ|by zJDeB4yWobGXVS7c>-Mhuv9<5uPI>bnh1}ygJ05q6%PA?%ogP~+l-cyPBaJ^e#Y-?R zd*RvL9X>vJ@~wfTad$u5`pLd(+A2PesosUl7+B^AU1>jm_fkmczY4Bx2PC^bKC!&i zvuZ+R*&G99>vr$o71{58ChTpF>~Je}NI12sT6|X5OQW;*KfPonn8oRW{zK6uXgWxs6ajN7?41m5;*o-F0+K9pZO>9N_j z=8WHW8WQGn-M;_waJFv38Z#+TzW#u2xeyD{ZCMz>mLhjkXjYF zsoityr7GjwEk3V4F1@=)P*7at)^FQ1Z38~LvSk~dto&p(UGaq2?f&fxrz@%KbpFqv zqrCqc=ZP)f(sE?f=2ln4l-)l5d(&k<`I8lo?Y3@Lcz4}O>PvG~idygUxjE75JnOgJ zKD}-39)qCCXJ*IrrKM=_?$F!AV!e;`M9W*%E%slIp4orkUHbi78voX(Rd1FolD>8- zMP}Pa(Z_ei55NEUVCo!|GwT%=&R_NGzz$UtpI!VJNmdJ0S1z0TBk|Mvv;$MO&a3{r zWH0v>sXJdbPJL_?t=DlSI_F4d)1yn)i(XBew(okZ#iw6uzsj<&{Qi33#$)RapDsPv ze8t#%zTvZbMcF=EZYRu+-s`aR*sPBVa@U&v+WXlST)Gx3*x)LDD%|Sf(*Fz&LeZ<1 z-)-5*bkXxAlc{G&`Rvkz+q88*JucL$T>Dw;`(Xoaq16{syxJb~I`3F`Ox!&B+C!0# zu5TZI|B;j+u`7ssod#P`wA$%{z_VPJpPoMa?$9R&i@Rz|wUQ@ho|lPqxm>hlyUd?m z{kfatUO!l|Lq*}snRbPs$JcJ9`8+=n-S=_2bO*Qpym|45n;Z`FY!6s{qT6n|jf^Do zo;8y-Pd(JFexbcS|3U1As}c=02i==B<}D7@7S`M@XmdSw;qe`@Dh9kJTLbNSxD!O4 zJlbondBM-+B;UNxygM&%^Vw8pre)RmH)g}K18TXcD+*qIxwHTJ90sk$=X%0a-c8@U z@9yE)lK1)hWqD^Fa+d3NTOIV0o%iLLz$ag)GTt<1|G|CY=Io4)T|1BeXLzY&$dVSf zE-mTo&O=-64Nj(obS-ha(^u%TP1%HXN5XXZ);a9&kDSq3!Iu~v^L6pw>H|Tp8*4ZC z89LoQbCB=T7QZVy3Zz=J=6ke@8(vO4v}6yzYqg=(9j}~<*5Zb{_b#4$GOatY>eHw1 ze=Mqv5`=!re7U}OSF7r(dT%A;GdMU;j=E>T~WA-;VuIs&Tp>smntjeWot(S~9hL&z+wS08YkT0<4rho2<1cuw|J@YJf z+>j9NjaEpATtVr(>W(qLrj#3g zKDXoKiQ35;aVs6wFRL$%_mJFfZanv|lv}US;@`&?iL~^+UcA^>l5th;DovB_XP@iV zM82Hsyg4$xJ=psEyx#iEQ;%~lDyEnvzWFV=-aM@5`lfGCHF_f>Iz$qxHc{^FioRLQTE8(eQ*jVwI(KCgUYm)7!X!QF<>f3?=nvVVGJ^>b&p z)jW+kUYf@yyb-@G-h8gSGW_^ElLN6HYaX4=a0{t;X!`cK=XQ_V?~e$5@ih(guUtP} za>l~AnRCu9y?n}ItHR$Uw#Rw&wOcOmE%;y#Z#MCJu=x9eP+|^TyvRo|CE&OHLSjUrn@;J zM_Z?RvaQT?H-o47g6~XpJ#&O7hv&|$_XG04{d%w89TMXcTl7?4v%|6NO-I`8rNznT9=@CV#r4l*=1_hSJzKVrswozZS9VX>9ruIU!E5LTAE*9Vg7n?wEHiov#`i zU2^E^0q=lOd z6C!*?(m3ZH-qIanec@v8uH_G-3sYyRu|=I+;=aPUUAB{Fc8ZSKiFZ$bf zZtiwgs|CE@nP0g4n)aVz{vnH)<(uE@G~Uz?@hjN0^O)FDfgSHYE?%a>=6NuxAUr;` z+w!$~!s?lG&Pv|DR9Iyxaiv`2%-xW+t99mHRmjiuv)Z|@>fW-$DU0)@FN>> zY*@C*D0-F?n_EwHFk6mF;DsMqS+l0@>FzVzd2_nLtqWx{XK^`C`N@#ez4nNDpTmq9 zlebMyT+b+9ekm^Q*JCZOR;y3(eNS3vNHXbfvo*LU|2w%~_3bN`1-H|7tx}x$`pCXD zyea<~mYK!OO?sEI{LCt!8CO16&C;_|I=hxrIANZd{*?ZAI}PU^43?NQ-&JF4Q<+V& zQ02v{XZ|m?3iNLMdue)aQ^eMj=f4~%*qW_>VzF~tw$Dk;_ui?;m&E6;6_UC)&q^q} z>(ikQF&0zJ(DQHpbUORa_$+w$-NV>jDlWRk@47r%`%d{gY!x$_7rwG;TVNW8;G8LO z7oJRMRNj94mg+yd*E-h1vDr&zJr znT~|hhHmmVlPsPdQL&1x$}F$tt$rA{x$#xaiia{QjFvP{cw(3{al(mRm!JKVvfzGr zr2W-yv9SLPx2MIot0MLF53I5j*N^Zqdc4{qBa(?b3WhHUz2`lR#9e7|b+xpU3Db@D)OW||VT%3G1%e)igaA{?p zq?Eh#dvy71mL*@Cv!}jL71NbmK2MXk_x#QTiF;-*q*QNZdDOUU*tPE9qb_N!adZjkWI_|AEt@y@yz?Joi z;E`Xmck_i7L{2cAv-w%%sV^5#?(5}!G|ShV<@pV@B%A0P374)2s`@?8*Rh?xVKLXv zB~_AN_T1s}k&kv`dNn(DW<2+zUkvx}26~^FZ9tmT*5w+@pTb*-{my6JN;=w z(NAPrro9aeKFoY9aL;+g=i6RSEH*A$aaUS*ieTclIr)y_PqJ6eTDPlcgPlyqOs-w$ zRtc+!2VCJOs^spoQC*P3(5NJ#^v5ZC(YM2*lM;>g9G=(n=-i5DTHYd3!IybY>ewpj zd$hlqX;^yn{i^DJ^IHo2XNJs_K34r|GMnY)zsYjno?V$!&|)ced&%yuTmKpCzdSw| zCe5?b!*qJ`wA&Zv8x&7_e-1B9dM* zPVCt5aaE_rveoO`ZS7?DnC{3>3{jn);;}q+S7T@M#4P{rYQI9ohpXqGKizF3@*^ef zny1l%GZDHQE;tunXMJ>AV8_XehF3~IJenr->CnzIQy5`-%ETs)W8@{i)(=VUG!MbqgvzyN9CP5#4gM%JE{aJqW z*`FP^HZecG((+)ZADcr@eC4r}2^zu-=DU;D*M@ID{pe3bZy?tR&Q)n18#C87M;oz) z>|1zkHt%gmmQw$+`)cnVf0?Eow@!1Xiea{}s%!4Gg>#;*zsWKQnHy|P>6jFk0GFH%^c@N=JzTe`%eb2;Z+#fmT9Fkcc| zd_eGUNzBz!mO>LJ(QE%1bayPeV;8P`O>foXt#%>HHmx-`ysEjtFe|LPdGiMW;e=CV zu^wCAc3*lN-~ZRW;@g?l6Fcqn&U*E*eGWXaJwi55Oz^>FzdPPi+ROW&%$Tq=Db;Lk z(41P?!`e4H1fpKICp_3``jamrjQy%+k$jhtX1;S<=(1yl4V#k}T@?6p)iW@(bbFwD z=99;*VV>>RqT&_hV?~~bTCSRGB$fM7$?@;uzU@a@E>;)Dm!ErowQExI**|^3@sdef zZs!K2I<0q`r{r7OyXQ;yHeQZNA8g&On8qE_%+viWzM(g$*B>F1}^? zj+b!<7G?eIzVH9xp{&@-!a>`vsD@2HyiIgMc7V^T{Dv_dI=X?1xCYn7I-nD39<^1}M ziynQw{czdF?!eW?HfJJqfBo{mnQn5YkbS+{9Np58+)$Ac6~4RsbdnFPpLg<~s#|RS z7p{NZFN_uhWnKOoEiu3ANTOM4oYRZdO8T?1bMI{V^!Vau?G4u>ZH}#2?=_WWh50ej z3E`eq$4=yk8gdK9h+ZuHQF1Kknz7c^xQ1CVFGHWDI@B;M%-dO0E_>_I!Y90;*2_0< zf11Y{Wg^TfC39l4O8Ba&idD~+^Pc5ai#yo=K0wsh)8uc&`PDTA>lL<0)vtKD@8sku zx?Ao&5}cVQx}jd(m|o9c6SQr+z4@yNwSkH!9MxX-X<>$@M5-DtKQX!3>SeJ#)8?^ib=8mlS8{c+271Le zbf3(UIiBXRbbo~P>{}A;ABEj(pULYh{oJ?4g5{8@JF^zJ&O>)cHO3p94qICmOLbckr{XTyz z&*0P6y_*I7`8!z*tzU?-n8dsY+0}eSXi~oA$!95(!+1~anr-(lVsUBZbj5&WNnbjn zR;F&>^k>zPiG7dxgg+jOjK1p{92+if?6f&eY}u2a6HdN-syZ!kv*dP;wo8T=OX7Z& z$NCtaVK^NT_-5DVuWH$buhyGyvpJ{j;AUfXMU>-e*rSh+nv^a0U{r?Ohh3!|ff?tWhv9k_2W;~`dR~{|w1}K_|S6A_`TMoc*lcsoS@USuD0@=MTMV zv{%fgV^hWxW3yu>;Y&2$Z*fw|Sk}?8uhy`%We2j_#jH5)uKSWNjDl{s-3vY^$<`jV^y-t=fLDUs9F+F3NATbhph}>u#;;u^YT@GrGJm*+^R2Su3T(#(LeZ8MSYo_ zMD6RfHm$k-j%l{pCJyI9+0)KgycOnsxBZz=W^RbSywEXu=@XAOR?cEB?YU?talOe~ z)oj*r?wWHZiHn5PuW9YGxE%9re#BpYr?hm}DS_vfs(eqrb7Doi_Ign#UptQqJ*mC& zC!RE0Rxj;m*6ZG&wU95-Tgc?58&AWrw`UK!*oN}DpZ%D0^~cek0Pe6AmP$O0Q&sMj z+1j-6dvVy9s$Ma<94%t|FUB{THL2;uQ>zxMCFXZ0B^@>WyP@*H6DMD;Weuy>&02qE z<5WHF^EZ^dCV!nabB)cP7jH`yo>Z*0IdxoM1IL9U(JFqApI&}=_~~5R#L)f~2H%#7 z{WX16Q@5r=Mp->7!S&(wT~XadgUVu zJeiRf^diqxYl+~)skYl^WKK4?W0W2$@afyWFM*H0$F_Z{Y~Cm#z3l5pk6$x&Pu!B` zh}xqkD3`gu?r5ccO=t%9QHG+G1yN0Yhg0WoHOxLU*+p-A_Axt^Xg{l`G$#ET2iKJuX~Fj^cR0S~Jt(iQyF2%z-#rP} zklqj;zlnU)PES%gU~X0^xO2ff!RQmh_m|B`O^FouT55UISvz%FL_wFeZ{L#C{0Bdk z{{C2Wx_EZX%u`PTp7lV$PD! zN5`EU7Pl);I=b)sz3kF`T(s&={5gKtJ5v=RUq+?O%5zp!xPc(^SIAV znRR*RtG!~*XEtBHI#*Le!TF4ejd|u*t&APN)V=CYy)pZDDD7p*&2{;w8u-i}x1QXv zrK6{nQBeHsvRg0ay~$)Ta6G+vsdN6-Qe}bu+}^zvQ&+@Z;6Gb_dY>X!@ha&r+gFNA zFtjl8URlzRTX=PQC~rXN=bzK=lpfQXaOz}UaCSfQrnt(EcVCaBa4{BD@-UiyeEZ0? zqa{^)dZd>2RliM!roVT*?LJn+zIW9=@f+e5kJ98Ab#~q9nkuHtesxpTv3FB`&)zdT z^H6qzpsBuI-}FBpFE6+`dBq0hGu@FrNyaluBC|A#b{3 z4b!?Rd2-~x&G6PZbmc^`VyxDI%2n1|UMx_RX}F^_d1n^;Ug5l>K?l8DL{fd0UwVCs z^YYDB-)VXV;S)PL)K++w_jl?4DDYYEB&Kra)Y*#;uZhvUbj*3}yXBfJAGzXIuRVQU zFhuN==xdRkQ?gq1wY%?X2gynX-IV3Hz3r@fsK3#TxyL^~=xEyO6cW|zvBZ^Of9e-fU1gOY!Q`&{E`+ahvx5=-LzN;WM+d;Ildbx_-%X~h&n56rY1-TASi~Q0m zk`<)lv+TTbpTC8^dqzh*OTk4K>5#WK{@#e$)uLvb7hG*ucRg#i zza|C2+xu;EK7M{UJ1+AJSDIk4SW1=5iXx>e^P1k2-xgGSWP5Pk!LnTob$^_=zwyXc zx5PB(zufGV{@-}F8TD2QZ(k+WG0!!0`=1*%3%=+ac(&!N?ZOqI-;yof+C*-MesrLz zbJ?Z0w|9i!e|9v_H@;8wRYS$jO}w$wR~BY2FwXoqr{G!KOX*&r7t$GBi{{xsmC-DI zQrjf9T~g`XN{^h_sRs9yc3B#yF1~z2W6ARRT$ii$r@L*^DyL7~f9BWZh?!w)yh;=p z#eIThEaR?l9cO6V5EAnAzJE8n?w8sUkw%NKc<%MnLR?~(^sXqz#naW5 z%J=8LJ1z3lMttelNnSB$W}ezoS(9PvArU4G#+foA6_R<`-dYuKf}HoP@(o8H02 z`>ZXTW7XQ`+HH@WdsPn1m>j0@(_Pa!-QbyVb=|$(9lVX*UQ%y&EnW61YxTMJ7h6K( z>Nf06DY0thH@u>-M&366!YMrs@?)D7Te>sX#{~0!H+3qEr#+G8L z;1z72d`EqzTucx5-nRKc+wY$}F1jVBUWqHUiu>y;AFDGjp1+bv`qOc4`(Cj};!@&l z=Pzfh-Lv8LDfwkHxFakLj^|$s%rxM+nbBsP_50F~?FIaEL)h-UtE{YFF6kY8LF>7@ zw9y~4_(b8;CzGqypPtNnqZCy6>tLvanpf_!N52-Oq|5eQdz@FOd~3r*mWvyd15B^7 z|C3*Jx}Z$lY|qg`vPP>~wL*0Jhaeo zd(w7~Rc`GH?U_f|dH4&$%N;iAiR|>33yV4Tw?ZVceuv(vc5m|~fwC)0-k6#5Ogre~ zCj4Efz4UYY)&C5Y_xaa#v7GVrJ|{9gbDdn=yksSvNjEe$I4JL6E?V4?`|@4rurr%F`fNaSvvx^rSMEF(93jP z&fya79BU)YF|~{*YG+ls|5dL#7v2MD*PidWc{^`WOmM7S#O6~e3;ymp@x;C9Pxw+p z<`geuKjD_IXY*E0TBP*ylm9fq9?dsJxi=J^X$r=#34U?@w_|Ekaq3|u-gPd`>vvS% z{?*R=*ij=aRmXYBg-4g}bbHP_8G1AG*j=T23hDt{1N<}d_W8HJI%qOMbrRE)prqE! z?N$Ghx9QC+`oQ-qdwp;Fznt_v^Xd$Jzs`&Oa#D6@+_cR{K7YF#yx8zrC1c!Ur$_Sr zZ99IMd;FUoxmxW;(_Pb7)jMl4Og*1x>xvvZP;=z(zl(x3FFz#5E1d2#yDMzy*XUiL z=33CYEw0F^Vp$@z4ge6hFP%(BQ&SCEa=j+S<^Y8^=aAMtK1>aKPLJ~ zn57(?tt1;G$L6tEblhY@(kgu1%C5#8 za8q;bsVQZa2FX0n&CYn7=<-{*YtgKt+yi`K|6&T+WUh9FoXU+hiNCbL@6bxq3!ir+ zPdi@Syi2)v=Av5Hwv`N}T{@?hMsAwQAepySb;a8g1^Uv_g6bXul|L(f7U(RxenRf_ znx{Xv<{S)KndX+zaIC@L@%t#F$IUH%W_(#qdM_H6{{6eWjoo0Q;(}l8CVpomx8(=( z{_@pN<=DI1b>*y8>$WXY=+3Z9IgsGHLgm_~J#{-W>vpFs@$0$sd`cthvioKy)?9z0 zr@N=(Va@{4mj%yCMSgsIwa{Pp+p)-e4&mG^r6muvmhZTJoK=-|CT~=!&hqO^Ql~9r zb($+S>vo^7WP;y*dnXkK*MgRcrPuh69*;ImiSuAzZS*u?Y1l7@`-T6c=cThaw3mu+ zh*`w8YFA~;?!?^3{t7(11@6c!Ix#<_d%F6umu}sx%kC1!Do;HB zb@S4U#t9KgDGd9pR=jYnN|aRaZ8?}hh>DMI|OWE|BkGYEk7N5LrpkQ#z$nW9N z%GmP$*;3+lF>mJa2Y6 zhAlYS7}TS8wROdp^4Z%y8Wl`*6x7>xw0M8@t>Zmu*%cbJ;CsAFo5L`v-<|cPHIy`1Vc-| z&C9=fn{k`uwAz-bnIQ#JMSqCgd?#a7owH=RZ1`Kl4ZZt2a^JoSJT^B}W4?pMYu&Vr zttWk^@a<7L*>*VJ{mus0gXz-`zu*4w(fp@}^ykVXo1NdYIAERoA|CN)$6hJ8s(F?- z#AddidvwtxL{upBviI8E;tN9#pU`=3Y0FnVX;si_X(QF>%ez@GD(E_gvP(NFmUq5x zer$HI=LGYsg}04F&CKpvS+6s?^3Znmo>e#BF5O+b^?RkoKb^Evy9IyhqeN$R8GPPo zx_$cN%aIf1izh~~IE8(8y?*S8fA_1S7Df4sQ(1NIXd0XNSjzCu@(=v|G)GsvLg@SN zuRm)oE;g$xng#6GUNlGQ%8RWXZynlBw>;S>#Cut6O(mzu#Zo!PT{BWtX8t&}I()&I zfXBOio87o37+pSN_GP(@q2LCGasgrAu+P`;_6Mr|co9Egdzhfw6L&Gc;7#kM@Y*D( zJziq9eyZ-%`b1Zy>k`|;&!13?@LlbZqiOh!;opUyr8)u;Ib345_;wVO1Ia%E8NG@F<&b2MN}B;Go6#Ao^O^; zSF67^i6hlxu69P3u!ofPxtP|oN1PRQ?A!Y3;cDKN)EiM>@_FC?GQYL7<-{kY%eTd% zC$R8+6}+qVd(q3Kr|w_weP{HhGqqlCp22~0-FCCJ_8wX)7vjAr&m+2h>bX4Tc}Z`x zzWbzjx6R(yG%xm_W4c+$%FlTVFC0p->fN8LeZ=ulkZAp;*n%IK4?lk{Iu@?>>xik> zhjfpnr!-_ej5R!FPv+g$vePJvIb%hW9$zo}<==-6dP%bw%Nu=U?h`dLd$2>tCjA79 zYp$!vRhzrl|Lmw+^4D;o&&Qcdd}qr4wvj&TzRmi;8 zShIy9OVhZXKH;65%-e3WC~W$c%P%iGURZn7(kRVL=$cXZ%7UaT3$L$=b4%NhWuu zY37xJ83&U#|Kk=>Q{ebdXYZ$I(p@Vg_5-y=mY z<^`{3*y6+YvTqyT>IL!36qwGmd1+7PQ7-Lut^NH-{CjJ{>!fupP21P+d%s(tnZ+_l ztEVr&-?3LF#dMpFQAWb;RSmalUwJN`b@$<6wtdbk0#_KFRkCTDvFt08e#b7~cEwDQ zO8p;~-Wp2u#jQyFvw3E5s@k+G$J>N&8$MA^%gy7sr4dzlN0rqs_BzMC#?|7hF5Esn zbJAAtQ_<(#n|*6%uxdFqIEX*)d)cHUv{JKt(;KOu9A~21wPw35JJ-l(l0H{5<%>n{ z2?oY?NmhY7TSHx@Z&1DQ@PMw<0#lC?oB1y0-RpX}IOl$A=I`#?^P@Z@Q2YL;?>`DE z1@_A67T&6rUz@&g*4elkkwDvqyc>tw`q7QKZ_X6`(m|)%bR)6VI#rC_1+Upcl(NP`dq2#N=n|)ezoo8 z*P54MX9Hix+DdN9oHk32?SUE7<+d!f=KHs%_erc$SC>^f-gluRNOR-7Dvin28=q7w zm7JJ*K66pmtp!r$A9V{e9dk}(Y`DEDJ;^*Wtu!h$@F%z6_H}|=TF>wHExCTSt7g`c zAGLqd&;B*W7-!!D;%hNw@2lu+T*A)>m}hyJckcde2TM*ShV+T(DrSj+P5d#OG+C_wmgl9R$F*@`EGyttQrTEJ2!6b zn0t9s2IpfEGfnOfS@9&{w@{-X?JL$EIY*HJeq>-pH>^>7K5+wrhSscA@_F`Eqi5 z^8Pcdd6yijw^B}|KjO+tpHrHy-+rufH`+4oVL+~E$k+ExUw2l9N4jhl^_uY5WT)uP ziMI-3QbT7S{iwWCV7kkl_lFOQt*?Ign6>Ba&Z`aL%2$@jyz4!byOU4;-C;*zcaxB( zzYo4&r+Juj%JpL=;hR%l`kY(rY9LW=s^G9BcEg1kFB8A%*PlPz%lbT9I%tm2@++%^ zly+4&iXN9v&o62Gckssb=_23nb{-3nJQkwklg6lxjA*PJqUvg`YW znYSDHE#a=LV^{M&2n=RTvVPs_@-WZOkrE*>DaPA zh08Dh+b^O0wKrJPNNTmQhGgZj1xIc&6#Sk1etp`-&_J8jr_bJ(ySZ?qP3yCd3+G-} z_mEn0V%@cKuU)p!zR-F0$wgM4lHAhGyVmPe>XpVdUG@4oX~&bQXp@_4d5ms_dvl&9 zvDXTgiuh-6|9q^tzDjWBGe)T?kv@AxI~Z=iy>q5~eGsQwo6PL1q1M?&-`>b>2x#I; z)vY?PLih0eYk6KXl;xauaA)TV-MxKOWS!fzH!)L#`?!9bczw02WzErx0XI2?MO5#inl}uIt-dekQq0)Mps&(A8#S0}{ z%d#dse6F=E@@AXUWBxPW3d+pbTw9)XXfFE9`u#scZI_MdPsvM3hBtJdDXuV#DymsE z@yqRRza_rSPTC%EY1OT1-?Mk=8C10<&hJ@Uv{rw)?aL25a)x!D*@lnzZrH!+#dP(W z(7pexxF(ochAcXjmcqr!9xG!O2YJ%2^5{3?}FRSlOI_c>;_GFo**Z)bf{=iWDGI@jaO*>mri zhOWM{JwxTqiM!6*zV^Jm;iA4^>HVkvJTjIO7Q9{faAokzE7v}=EZ@7|#ZU74-h($; zSwmd?UavpRbHT8tVb0lkEt_)cgFV@fg*}*Uc#x+ucHOMlbzS=u?{mN6`BE==b`f97 zgy5n#RR(9+s>E;XU|p#0F@eMHv+IX%^FI10uFhQE6ZyqH{6dz}XvFYGMJd#?x@ z+_Z9C(J^Zw7gO1vX?N^Zxe&$=)l+oWlQ8>r+KZF1M z{?H5WT3<-+c{n#nF)MJ!Zf_;=7y%*efKG=C><4mx^cL>2H|&~kxXUl4@ZRz_>+PJ{ zZfN9j^v%Be$Mio#?TwZ*>n&#_1T!waBP4tHVg9D*3BT2*SDGF-=4KL?o@nQ?;;qr? z6jR+9@3|Krkx|TdFTGwUVCq{jUB~BSW3twne!mqw?7kCwP6ypluV#4P@zzXg{f^#% z>tB96?0(EIn{LYH622 zSw+d|RRSCvUb`G%|~8s4blvK zvtP^lz~>K-{a0{Vux_|0eXZTrJ??huvk$uk7~0A-&)xO7m%g^z$W!Fy-Dfw1g!yjF zlCo0WmZZ{ppspe=>8kflHkpcGY1^4fM|nSXbH07Dxy)gAFf{DbgOPTGwGE~?&U46?Vc66vnrd;Xe$XE*2>X&>f0z|8+Gh;pWNL; zFSBKi?|S)_sr~D*{Fr~;$5i(Qb*%pR`1z*R=gfk& z&BBw6UcdL4fAM60pkPv{p|lZ$c)&p()4fbrFZ?+XadB!eZ=jWXu)X`W`^p3A2!E_)?)_W}2P zDH|Ehe~djMmXTrFMJhoDWTx_T?&RB(St-a_93Zcyv38$5|D&VFpDdUERq~E;M&DYc zonmc~ZL=9zS2gk&ICkuId1?71_PpMr=`r)gn0xp0FaE*i?%enB;KXak3}=hyL@8GI zE`2SBAaGo)vNI|WcJQ!cW-Vlc6ukiN@>NSWz*|zB6EKBKbF}!DQvZ9 z#s{z2A(IMkY~6IF?^(O6pku-1T@|s%rHs7K+B9E{3C;_iwA`!fvd#%kzv=#$50>nm ze8@XVecz>z@5}GEdItBuVV$x!YOdT|Ih#|vjjB49+}OK=IdvQN+>64`w^?KtCH_j- zEc3LgMLcGv?DamGAaUv76o$M@M}tFr^?v_--EHuRXL8%NWs=w4K~Gar->R zN6%J7si*FqFaNr8{?E2Uzl-D=<(z$Oa!*tqQo3WRRO9Ko#amw`x^BOj^7CJ*F5B^AX=Oe=YWDE<1RwsH6HIQk`^zo)=#ly<^+DBPMG;%Z z3AZoz?%h}z{`k}6$n$$HYTxY0JZC8;uh*9`ad88So|N94i-Ccag~vC(o^!TFbnl- zYH{7K>)aAm7^q$9qph<1Uf6CS_hzm9KrZPsJxXdG@(RA!S$A(W2yzh=y?*Tea$(u} zXFSbU$}&rCd6z!Ar%=$PtzaJK{`W$AKGWXS`%-&D{q)ytj$thM_PSAKMX7?>-$c`6 zPjgS~u3an;>JZQCxa{rjfA1ydSUJsE-`}X(xVTDkP4IaG*QL9w6HGpCnDbd+LD%~F zvaWY?H@>NIEwi6czoFgGC#_d5MN9f%h*hM;=XbeLKV5g2*qVlJHM=Oj|Ng_ad;FI# ze|mB}a{4o`twBqkoVr^$Sz^(3Kf`(EjNh4V2h1)y6dP#y_~er*^@^@jX6yZ$QWQP; z(Ox6Y6Y6pfQx#I|qAbFc-l}hw4twkG6a6es#4$p!r(Ega#JH}HhbQjdrg+1prFPbf zj`?2Z>raa+#`nLL?mL`O#21@&>-2^ndt4Q#EzNRqYSG%KwW9I)e+E6lGn0zsQ;w!- zUD3$hFFLF9%~7_y_qhEt+JmmUUd?-&RQ*A8QlRN%5ebE7UbT9Y3|2iWVse>w@+Nco z20`1B(4QJf%lX^>6nRW4c^R-$&tayW&+=66l-s_$#4O&nCRyw~{W$+%;dZV!x@8Qt zwa&`}l6Q+f@2V+loqp)_(G6kUp@p2Dr8{I}EfXusII zOqcZ&GczXf2&Yx6*X?~9{J3S)j@r-sW?xQzS{kyzs8}lZ-duB)f&!M)uKs*+64~BK zf_vJ`f4dr;-sv_g@Qm``r8jaX*ClbA76gCI-DaK~|JP#u%=G49!-c1ozVle=z&%Uw zPU-vBL;BX5%MUsAeUmOZzTM>XlpF88<|;`GZeO@f??l_%?=fDp8b8H%-^%>aEzo*g z#qn#aS@ptl{8BRKKR``}}?O)qBbP)=H@C8Rc8=yl&}*)%=lkgh9HM@=uJPMUMj z=+Fu43hkpSvu6ER#QvXQc~g=$!-Gr7SF%E^T=ST9r<84#*F&zk z{J~ypndbA=1Wz*cs`Wgvy!5!7PpNg;#8_X8>8*etGGQXo{HA?kPJ1pYj&J)#G$3v)rc7Dbc|3&GlmG-&;ky znLE6LEPJ+luTH%Yvdd<=N^sdzYyZOK4&P%GUDA&Sz9}#J{>W&dXt%)UYiWylUhG@- zGVxqa497W-in|G|++D9a@B6)OJTS9IGT%6KWxo3&Srg?`{l0ng1@4u)ZWP zxI1V{+H7vS&9hqd7Mjib8Z>*)1NU!}T6$_5;vaB+S+{zf+?@0oldOE@Y*ro)TWP$l zy1!@Q=8TS=+}VrNl|KJ?$aV4YZ+6YFjUg*SGB;#y|0>X4?A!6A zdfza4MzQ8cFYQh0EwzVoI#~3!OmVEJ{a$0SQuW4jW4VRbG?nFkd|=!#-lS~D*Cne z$Ko1`_m2g3wk|lc?Cg?Fp^}PMEFMPi9@mcHm%H^h{?+e~%a83!d{T7l8E1P;)0?tv z!^C6#G9GcsH*T?J3ay>*w_@*+4NF6ws85J!y<1{Q5>e|K2zg zsVX1;m*Ie=_rjxPA>}D0Nq%m>4f~p$Qkd7RUS9I|xxSe#Kc8pg;=WCLj-3*b_;N4z zc+NWG+=aW|-W7JfyuD4^#QLbAzmcQFr$v*4%4g+N)v82Smo4kQtNUkz?_-hWq3P0D zE+@sC^|pn5;&bv{ws-G8`|=vsCwopDzodMye_HfRH-o!sx&~FZPX~W`)iEzlB4+P{ zP2ameJi4l8bmr%>&#fV)FZn*~?$TYg&z^s_w5=b%2mhYkY%kvVoj!9^Gc=-TQP1qz zlN{PyGhazbWyCW*nlF8}cFDDe_bdKu zTJ?;7y-3mGgAL_VmYw`)THIm2IO@gwu(RS)>1-oEiCAk;3rvtGtE;`wGq4$CU@1v{>EZB1QZ@iAe; zld`DF*LQ`?IZtnW`{Cy8507tr`RZpcp14{~q_!fkIC+xha+Rg#DltJn+}*wg{8N53 z??1!oKBFCXW`ENCr}XU&M;g!i$*(@%o>lPcQJbH|8IDaHC!L!{=F`ob^WK@MxI@V4qHvW+%dynV8Y=hHph&eE3;HE>RFbRQYlfzyn z?|H#s;l~+$P*=M8)%|A?W*^(ul-zXWaYI$xmU_4&RiC{=-~3} zD{Orhb^T`uP*>n^ZR_+2@OSwZJC`^2ahcMJh9wbOvm$!gRvO$+TJU-+N718m8*bPA zUgzJ+-Cp#c!HippyVUFQxjl}7jgk*F`l|EV97@{ER_}gw_tf4!)=~2$W;;u%g{TM{ zzBFo_*?(B6@TIg_>ypJgSDDTF9>1GGC2(z++^MYxzclkG?kV(`qh{W*ol{_TkZat3 z22;O?cgZuByPxm9K1VY!)9v9Nr~7l>X>vuq*xzdZQ1Hl^u53}Ab&q%YPRjDJ6l!rj zzr3S$)%?q>eTQy!e$2i(N&D=i+A$nFHt*t;kE7Tce2}lHgxQ(4`4IgH8-v%wk2Om>dC1qQcGpe?6~Ur zlG%TGzuUyJb&}GH*1l6$z5IROe}-;3*R09DZEUqqCOnu@IcaWPilwW_j4IDfNzQj! z^Y{0k5!mMvvAT}?8K2c*;njyZXMEWelW|$1a=A3qJGc9cBC>)ZA>r}UbYp}07tGAj zI)1Og;N_9TEuU+H!{#1QUa1v-|8#5IZrhB4w7gKy<&yKcrkp<4cX#%kZD%ICsCxh8 z+x}w3j^(SqaIl9=3`??UPd8p2b9Jrt^`7=d z*2+0IRPtMH`qKGzbM^AuRg(AA^0HD^Cn+sgkkekT_*2Xvx|hrTHw){Uw3h;!@4c2> zv@m&Ly+FUKL$1e=_4sW$Dy%zI$HcY^zt5+m{KRnY!YkX6DJcEDM)zm3)*qPh*qC zKNXkQ>WqE%;rpfPr5_*uds;G^Vajo-wwvy4{LFnuC;O*e+`{rT+s|uP_OrwPi~?Mm zMLA6DBb}#mudHmJq0Z>Jh`ZIN-k#;4IV-o*Td$3BE{)5&_Fmp|QzZFE>&KmzFQ0~9 zG!*;WV4S0893?hSW8F1L-P10Sdpe?3fBlO%A6NQW)a*U`nF&vtBo<3N;Xc1jPf(IG zYUNYEdn$W2R!@#i*y_Zymo3*}wZfN7pBU?lYn0eJ99&JWPr9R?-)C$W5tPDoZ8J*; z?^Hbpqp)fBF5FH%Vi)qq^YJ?V_LJ{#@I}kyEqU+rrCM$+<~bZsxk=n+^HSV(P-aKJ48S*4>}Dc~9Gg zcp+K4Kb7mBb{cI{G4;~e@+Gh_v{&Znv0cmORjf_E-SMjXyMC_8;p>mi=mab})1m7+ z@mt0v6`o1CkMG783N*)F6b(C&v*BM^G54eswQQ4~ta|A5?Buzcl9Whoj(`$hfq<#O zVn6I>XrA<4QKWNb=Zk3psV~@r=4*a@b*uSXQ~sUBM?)*rcb4d1?)N_P*w$h{U)pBx zD9JbS?xj4lr@p$m-qh$!_^aUAedhy`#2Pt;zFn=%n&I=~{^=jPUVi18-V>-9a-;h4 zt$AM}*4yvkIsWYJte4MS-v8P7eBEu^Ms4n7BfDfxzEjny*IW;V-Q!Eqa_R8=ciF|B z@oCTN(~o~031z-Aaq+J$<-QNQp58s8t8~%y($~T$oAl)yRyI~`jYx5c)toqc`|s_%EG~hjdu?h83?9CgSi`u& zV`WIjQxOJ^G|$w>b*DS_-2eN->2FFJ<4)hXVlpQ;&3pFwiIv%hIOT*Jv$U!=E?oJn zNwWKH2RQk-6pq#M#UAA|3>-USY9KL?S`E>5#yxtj0Oy!O| z&s;N^UM|-)H8?0+xbu{*9;?$zF<0+vLCVL{7V_SBpu}WlvP^c}RhO`<{~5#-1)QgD zSK<%JmzdnwnQQ(;*4^*)yN~}FG#(VGF=qC;Pxx?VX|tZ8=)%+&?<+Z%+Sb^VIu-k9 z9^EXi)$FOYx$kVxN5}1Nl^psebTM=%NHKhT|G4~*a(ffwME|O3cdQ&+PEHjoTT`WB z@}FUog08tq+;!#7RZ%hf*30Ogh)SFGEhcx*MZft*CE-%)Dn|==n-e-be4TeyaJ@)8 zF2k&3bL0NExZn$l{#B`#Z*Qo|_#PDF@$nE94Jj}E`D2Y+Q;bQ^Lee)qL;gJoaa7zX<>(8$$`J8)~VOU?oP;7tlRiwwZHMS^x8ctC2!3CGdvUf zWR!OM&D)!-GLN4gPk+oh#qm_ZO{pv8zmB;toolAKI`PE1{XS`>OK%=x+V8kBAl=w^ zm1tUuf%Z0Y@7XaEOPzS_)eRq|+~Z&H`Q_KPRiTSkUt1z_D_GSmd7X1r#fk6~I~(_% zlh$63;Qsk=ZTb0#$Q<@}A`xG%btt>f+;wBmqOw~a?^fRL&vriD^Yxj%+^vIO=R2;m z^i6X(QE}(?B&XAdE^tPKE$aHYcixUGE4E5(dYd_IiTJu?&k0wGZr4ua?r`Dh_)!=B zI;&6k*zOA(g3ma!<%)5y^`D`)Eu5z;O1Y$?;frzH(fdzUSzKN1*7=x0c>3$fZS&<; znqOPnE1zbQJ}>P;!fhshQD0GRe}lWvj*9ND%nU8GoOFVsCc+$$oM6Y+2 z>?@|qUh~~hd3sjqe$!jo`K1R=PGCw}<=l1JXou08aF1=C6L&Pfj9=|rux!Gaj+cgU zoIY2X>fJYQ)ADD%-7@#<#lX_vAAAmTU*){`%9N?$&Ggw@7j4+Hr@Qz5!y^x`r{7bY zvD$gb6|G6@Cgg0oaI)y_v!0$#|LuD%UL>Ao-umNlXkYTqh_%O47o@(f-Pc*wu>Jh@ z`b#_XpKbX2Q1n%ZR15oT^K&vm6Z2kd`zRQ*$aCGS+JA4t;*x@|@!d@id|~j^qjFW` zraK44B`4)o#$4F<*G?{CQf$)FuM;=jIqC1!I$fcZ$5nUw9N}Pd4ws|s+x7(g4Zq)Q zJLhz^nn$YLL|dc!3ty{v&95#DOImW!>q5`k`o8O!>a<|`mw#u*kZKm$Zp7`h2mrqLdpW-~< zRylpuw`eKZC~woGuHVAVc^1uYr<(>9dX{W)KQ5AgHTTRnMXB8^;q6NcPpsd>+aj3b zns1;eY#4CepLJ`g_MRPG7Z&9#e_&XX*kSN;x#Fn}K6lD9kDZ<7-*Ic#%jLDD{*^_C z#9p4MP1r1OMQ-DvM)Adkoh-N0POI15pAqom^<8s=Ylpwu3o>5}TiC5~J?>*ElV5cD zR@s{BSKEHnL>%86F!F;JHg%s_)DZds~0F?c+Lkey{c% zajwY{O2_+@FBv`AuxIU$EpOs4i+m}cE0`)8rCa2_R=v4hBrk!(GJf426Q3O0`+W@w zW&y8yPb}|!&k{E8+j;dXVbfMGZ!f)AV<@$o$?2e9>hhUc>^{~_X0s>W+IID4!P~C2 zGi9Dv$uLSh5uB&ac2(?WR5)k!`sb}rCjUO@^rWiUbmoOeXBU;EM7R9ue#3sk-R-E| z8{?-6M>s?0EZ?tql7VN_g}2?$Zg$)<7kb6Hem3*T{|pzh4Na<+RIcZ6;60SPZKio# zg}KWWPMa6IYQH`FW8J*v&|j_8A7A=Cg=9Br#S0AI?W(=h zk#n+ZW4g!GfEP+<7_$?f-6`2BSlE;uZDT!u-<_EFHPx*eIp@B%EN-ostj9g8vwyJ`u2ZJ_$WSOlgYNM(6kpO+n4l5 zzU8jEa3jH|8#x9k4svx|k_S^CvQ z|Kv=bxJkE-Cir~Y?&33jlh$`>!TM!lPFGenTNb`p>rv(FHBoWd<9FMVk6B2ro_jPf z;P2vDg`ak=e|jb^LC9yqoxp;$Gq=0tJa(Kp_IO+VJtet|HG9_j)!WNhZ!pMht6s`U)0V(FuK-H|swpzAj(4zhZE){~UR+YP)HwMvccR(VC+}X(Qd==C_v67V&!3EEmWyPV zxZA%-(h8_ci&t$?p4&?<^44w>#e7{vl}dBYYtoOA)XMB=E2Fo>%>AMr*P)z zHrcJ0cQwZTXVCjGz4?>CtT{<*0^H`h7rY7$*wNBnWqoGG#s%f#!Xbv2?;mIUIQe^T z>NUN~DP{$(3nraDs(JB3%$8#Z-)vK=^{(U3@(D}Hw|UZ3c=3_$K9}S>Z$)?CefPmf zDSn1k@x?P;zGq4f9+Xqrd!obBGvDR%K|?WlnRkq7kEU-dn;3D&P~5y~twY|9tBkrq zS9N`&cO7edB2=|EruO*4;EZn@l&ntbr!Zw%X(?#M#M!+G{O2xJ`{>B6wuEM-EuY=5 z*}Oi>S@=Xb$sdHCUirIu4QU%t!nubdWN`0cx#!rn`Ye7BUMx^MFz zwGZB)!+mzM^1{MZu5V9?aj(#uqqoyLPHJKH_vu}ao*#SvQ}~T?qNDGA=MR6C{T5s) zmQ6TTs~3CCBgMtTe{-*X{tCTg&wo|Vnfx^8iP(de#zAN2HQhe-J$zGqunc=wi+Ka< zrVm?7L!=Aet=oTFFW)%w!kt+Uz6LyYop*cD<7GMvn4fg7JvM87{b@;=b6jGfL9Iq> zjQ!T-b=z{kv(IqRWKnSHvf{9qyUxYcWvlD?-I7ABKg$0z6lNZ)ob~u#|I4}wMR%f> z1>Ku#JE>rrNoSg~>iXVNRh2vWPv0NixB76Mi%tF?)-X43dBc>-kF`-ps|>v=+Lzzs zZOjpWz*@+5r?UR3%q{z#MW?=)oOh_}y}jo4tS2FhZN1x7kKN7O?s8=A$Bp;yef@BJ z)yui27B7Fc=p1tG_qlm1WYzI+O3l^R>wf3_XXv}0-~Rf2xBM0R>wiQp#=n!C6BQYB z@}#VRt(onTCrxSp8B$u;UC>$;-OqY`S5uMx$LIeUzN8jg+AAL`);6lTP{x+PQ&ckV zmVkHk9=9)k_suTW|EyTf_M`65!pp`hi?jU#m77(wc_*+JZ4u;{U>a|_v~GIrvm@`H zR2|di+PT@>i(~D1$^Q(gIrEx6`?mi1QYkRm*_@srN6_{LpcYE$V z+l`S^U$_Qkd`K~|evPZ^#^Xf^u7o~Kn-M_hxJ8y^gE5B=x z9V;?QR{O3IUme%4I4x4`_RGhO44oEENz+$NQm+nr6H%GN7#?7v*4DK79P4Mt_%!i{%zpXs%B=pN$wn^aw=4IaWP8~mo?oUSEi3|ViOsGxws_V~|bh8X!J#*y)spmm~ zNgm1@BKj71&CdPAxa-5`@W`_$WMSDF|_~63d z{pgT^r0NY{wn?FGSt8qbxB~4jH7`!!c=<4C{SR^8RL(uAU3#%vTOCiCR?SvPv$585 z&b_$LBlD?)i@v4&?3c$`pVWLcow4QORJ&Pu?s>kKIT~1Qh(0~PU*Y+kt^XO`9uAS| zKF^z!YqyXq>~qP*Ifsw%v@0b~uRHxUa^3!U3f=*WuRdS(Y0;G^j!R0LC3YnjFfeXX zmF8p&cYXU_de4sLhRPcYPvj*Yp2u|6vHfP9XO#7M9w|T#4Ua`2Tez&+_ z`04IVPnM+|$kWnU6;oB~6*6=3!ZV$MFS#qleSS||y!vhY+QX~XAAPr-_Z9a>_WeEU zLKTfPo--V6i}|o&t-#ZFAu$Q}_`>xar7A39^G=`dG`k`8GVXzT>6_)xF7wZgXWluV zr+j5$Oy=Fvaps)l-*jfVUEPY)c^;@67615txwsJQR$HZqc3eY4^UlKOc&Q&;Q40p0obt z#GNj8c&>e!&Stw#X2+YA8y<7r>|4iP>hgZ=x7+2lh3~EEqdkB8WWCe5*)8%*@Vo_I zXL_4mT6Ahl=lU&zhPNNy-pl;g;{J3soBrtk30%b_V3h*4&u%NqkF9 zqq`3QIJ?98@D=k#ueK@OikCkvzL2+W!_=)) zi&a@CHyc?vr>|9OJ+|29bgt|vt5*z~C#(NN^v2d1?qp8BaB4Ekg5brnHfgz&Z>t~i zsA*ocyfoy{)c6~MMa~5;FGTlUJALfjLxrDvM8e+IyXj@^$!3kMQ-ze;2Gadu!%>SoZkQmud>;xvKk%YChcEZQEeAS;2i<@RQd*4s^=Ks1O)z4AV`KIl@#<$DnZJ8-_Z|7uTA?=!n%Xj;)7Tv#GX~kUw z_r>2nKlVs+-z==XcmC1a`}?+)9S(1sQJd~)x~kU8Rd(kFl^JLAZ`rK9;}G-ZgP;9n zR*oEnMXp+*o+rMlZ=XNm#g4w?Pn5#Gv$~sH3_mQzv!M6Nti_k?Uu{k~CY+r0u0|nu zlKhuLvyFF7eJ%2zAu2`c2wR`&nq5JR&vo`0sx~c~|602~O#jQbYfsNf_FRmfdHF<= zje+=0v3X&0ww<2sE4+WU_wFB$ul{X2XU(>%;zZVhFVPIKg-sLKD>yc&J`eR@b-w-A z11FsjW9O0#rK|7F&9tA2IX_l7_DD#bXQE(rX59`m))}FzEvG)8?j6XSBRyRr=x*Nh zY?tk7)5Yfo1ls7m?d;h4iEZ(Pdn?m4``_#_owb-(#OzJUf#el$RbP28n0uh9VV-K^ z0bM^2KTGfCzw?$hUs8YH6EGppT{V7jTh_{Lx(haBp4q?qU&1MoDQz|}yJvhBk(uqC ze%-fJKZGgXnj=8>UdhS8%G{5C^vbuhH|Q)q@inV4boR;+ADQJLGhQur7Jizs(6lJ_ zzN^@6>zZvlZar=~5p(*J@-vOXs2Rzkn~FjLb@hZ?bRDaIlz;zg-_3N-@Iq|U>3r9I z<*Sz@e}{DD-HUB=PIFVOP%MnD*;D;{gU*4g?HXITmRLGpjl2HchN{x%Sp%|J`MB&dXjOd2@VK zh~-1R5OJ=nnoR#0g0xiDFM7E;^^Cr6{n?h>%E{0o3?Q+&;?>%!$ZwiVtT**=8`!oIQNpGg7Z3(u#(`^$ySG)4x zxpb$%bHjgz%~E`i;@4mN&(JPf^i$4C^xHS-rw+5tXNzfVipy|`ygPxxPVs)M$ezLn zR?YVEQ`e=LUf&}2eVlOE}(lnX^)tt^)>(>mno{)wB{?5rAVz~b4~jS5wzmJDn;Sx&1QGFYao z%}d+36xq(g1d}|YrqF%Y0G(Trqr#YQG#~GztuRi-O|GL|te&QQ7=9GhJ0+;Vj z}B_Qa({o0wukW^EBPQMqvFs!Z4`3BjYkV&!Jr?!1`QcH-_%&509bV%biLl)hh; z+t;-}T<=Cc-}~cVcM6=SRSdehcGbPTzhCx77*DXV+_X@0y3)QACF?k* zwSsP{(ayab&tE4RcEjmQ<)m3{cgmJU8=v0de#GU^JR3l&bE^XcB)-}N$~le1aRWte9C zXZTukYsv)ixW@|`8E(b9r-p|7{%|a0p+HgEh8q@vx8|L$b(pARyONF=(8Ro(IJj@29IB00we zRb|V#1NJU3K6>3lPqufH3rnnxb>iVqteaVc1J*j8zFCrH$|QZYswHol z?eR*pqeoUf>)Ly!=iSp^HHI8In_qL4tSHG3yLf78K&P4Wru4XXHy0neAf@!`dg9}c zeA~5dA7?OR>xv6K zFHBFA+?-jgd$Q@MZtn~sxhp4kJW6@ENP2Z8e~Qah@9yM|LsPwvhD{Ax>zWmkao!-% zyk|~^2gg+ReRD7GT*RIuzG17XRkiM=zljR^78{RUveY^8s=4>%hlWLJyDD4jbi`QG zc!QHK`&mzDnyq?l-JKa%3KVx-&^6$mDt7zU&z}X^3XGdL-|RSXzI0|^z=>Uz-P*Ui zc?F|)-CmXQZ12{0D?j{`jc}Mev)Q2P!14>P0yr-fKFrhK#-4IKgvB}IPT=+S_Cxz0 z9g=Cac<mqaXf@4}Qd_j^&W;(pfqF|mD=O)|e6zJMZzAJ8 zrQ5UQ=69Xv(OYBm&uCx#s!*c`2j@pGyX)5#D}DB6!4AH6Ta{h>Kfc(%oKf}KZ=QeW z@8w>JcyBm4dg7H$AKw<=$!KZ2a_Peb+t>Z-SAOL0e1?vv6|Rr8PcPqm zN~O#5@%rNU-FsK8ynlDu@?Y+vqCxX7&kem1d?w_lWS!lcj&$Rkoxg(&3)k9+KlTpZ zpX2s8{wu?(Y+d)fSwUSnKk7~=xH#l(-}E!)=U)q*wZEB^mrqq+Sd<)mcq zcExiw+$7dcZpuuWC|>A4D{1Mg{|p;uGG+C-+HXIRp1aI*|D8=r%BNjTU0mOH8)aX< z!90D5%@a*lGZuphLUW!emruzHjeGr|W5)Ss#%DU*&aBA0roys45jQxD>js`sQQ#z-LRc>(6@Z7aM`|;O=P1l^f zGTp`Qm0qq$-u{Gr2kXirysY67QPc3?l@*iDrBra?Ok2D zQJViU!=$YE(thTA!&~3fq^OVXZv8DU#Z0CzfUHBq2VYPvq z@DbknvKaRd4>$cfw&AApN^_<8Im>)zZ9je{V*3K`>yhpj>6!su{ZrMy8XJ9e{GPn$ zyjjQA?KuG#OOJc~J7CM#JZIBa&zG{2d!19iors*l;_{>S8tcF8_E!gr@+S8`!yC?ytfXU6vM)ww+vh?f37;&kJ@1Ju6P|G4;qhKIN~%!Q&54-`vyV@mAjU*N-0w zpNm)}Pkq%nE#@3@Q|I!zotcbhjI$i2T32zc=&JqCU=uli@_St`PUl(If=}+On=JO{ zKLf+EjyF^Mp1heUQ(=DSs{HZt4~JHNtd!fZ+C=Nrl5IyC*?&zGyQNs+a^c9x z*YigPhWrZd*>HDV`@178J8y+6yuN=t{y)QVt^;}pzB;8;&&*ytIqz~_uJDrR#$|i= zP5iSlO|-H>`NWEF`IWh@41FxtNh#Oa?kZh%4QHOPT|(*j@{27grHw|;x!h^|?!wYn zT628G`(GV6IH~c+sZ#x|g$azC6rVjmr26@2+`|715|T}~7rZ;AAl|rB@I;HbrJ&G{ zr0snCzxqt4?9`g?DQ?w%=#hNRwYx`LUiIxax$J*hTx+F~)`ZJh0f&NfMeKSaw;isy z()L8Q`sLbt65ICw;ws!U*@%0Kxr(*qyA`pwcWy7+!8PgU&x+Tdj-+)82nTK9d}cL4 zB1y^3CB;EyiT(85+cd7N{FUxyx^C{h)(!{p!kzD155>=uwpV1Day!9Qf%Dl%flZI1 z$^_aQ`}HRl9@^=BuaxKNcFt8(J%2`@GdQ!o>`w9N4NJUs)z6nZzO4YPJHos^WK@T?(gmF&LUSuY#Eaew0z ziL$u9is!3Ov*DwTdoS5Hsb5`Nt??)JO3uHEJjLxjJ7>yt8%|$-bSZ1pW`+40Z>Ka0 zyg0%dc-U;tn|s?AaQxiB9B{r^;Axawfn6iGF=w zIzFic$G7j~X??r+k&=^wRg>B-ebwzQzcw5Vm|7;}I5(hGPG+UR&5CD++b$B~BYi2P?r+h-pqy`|};&6BW4hbBg(XOy=}7g>qk z>2yB)bbbG9maS^4$5#o)t~5^fz5LefzMPB75{-r1_FmX(tiNC6)uJXo$L5QhwQQdR znRDdXsh*A(@d-P6g7bX8d!v6&f~VQx9y>L=daYFzKOTM7ZhoVeI_>uApzlv@)hsT@ zR_@Q?vTkYdxVinS-i+|WRqQLDMM!!4dSxN|X`z=o>k~QEt;^=utNwgoYHN33qvC3w z6|A-IEiPKz?iEgO^6yt+_PZ}M|4+n*J&C*SZ1h|aveJ}kmFrUJf|(`B?{X$g=6m77 zuxjPU%yp6rzr0}<%H+MGHCZ*JFZpij;yr$>bAtDGX0ET_&9Pqc%NxrT@QCd=etg;%6LTY`n#|=x?v5rQ|7(3O&f|*hIYGy|)@@vMWd4n5pL3eZw9lKpy)&ia(Rt~n zFHBoGoKO9{)bXVusgIAPNLuc*?5bI>+wS*P&fL20Ky9_wHSM#Mwcn(+o-)WOn>BDIb!4t(~q7j-LcC~dD$v({8_(X-Uj341q* z=}nQecO!rOJk`kOuNk|FIA3gP%P`_+UB{Kjez05pH$E2BzE$vM$F%pX@-i`zHAQ+1QctU7Ej(AgFL_>Y<8Wf-E-kB9E4q&#|M_t1 zB%U1ZDU!TbC5x{XGq_aBG_eXPZ4U{#e)`c2*PE=*#JXI?cshgl3xB(~Iky*bDoo1$ zYtWhKaLV+c_@l$~IcK|WV+rNZeYVK6D0A69m&3P~M>>hjSoBuTrf`PfrMJhO{xfvC zhyII7TJ4>FPv6MAb>G~>jq25Tm!kWgbp&%qC`3*@GNiuA5B9j6G4ZcVSgqUVnrwTvXL95!TPc~zGkGG!&OVn?RQjC#=kk9B z{>%N*m!@yH^KjjvE32{<6=%G)-Nv)cDe;+-s`4F!_~{Jq-yLff+o0px6>@!D_g+4o zAZexm0paGcqVJi0hsEcX-Z*`}JJv=d?{c5>($kv91p_=i{;+gb z^#3T-;?s02+vvOG&^lW&os*_qxlumzb}e}z&3fsWc9U_AR>p;&b~4HeH$94tuT0pv zX7;p~8&0kGaZ|&2a*yQe+E=X~@70%Hd)T!=YvS9faeYtIY%BzB{+E!kXeA{al_gz_A zhI^J?y?veL)u7g_b%)laEj}@Gru5m3Y0eeNlgpc{?wI}F`lG&2bdU9{|t>j$M%@IWr}7_c{PDORAJq{O@|6j zu-e`#+i+cS$7=~=_X!59$Nj1e&plKyJM>{ION(Ie*+TKz&%29O@dougUlo|mdO$j< z)MQSs%;S?j>s3BJzRe}Ipyr^`3Rf{#%f|)Bgi~?_J4H{VuA3+O?@ei_hSmzH)A=D< z0baU+4g$o?D>MoS;Q#CY|-)@h+|_&$HJ$fGjLbairr*1dToe>&l%MuqB=pi#|Hr+YB`q^6D>9_gtk1YhEUHep zG2wKZX#Hm>aQ0HIoa(-4!j>bG@9vz)Ebt@LReb-CkAVqOelzBosJUA$)cAJr zpRYbx@;d&N#ECkb9d-g|lnN$isO?~`z!?`4v%(W*BGB@PC1bGSY{y!>!|pMAG(YmlP4?*3C@ zE9>LGKXweXPgpido~Lk&f5SxH)8&W1Cxz`gJf*))yMD>q{|tQW8Br5feGwFknWDg= zyX_(GeX)5G&IVT}F+LAUYCYGly;jlbho|k2s&yMh6Y9MluX(rS`?R<0pAQ;_I=-{H zA@F&(u%g@C7t>tDblT2UHcE6_+*T=@Ju5l8{iNrV^)vp?+N>%Un#E(4clv;ft4jMr zMrZxksXN?H{$w=`wmy5a$!66=6NZzfyxV2;Qww@mWdCPqmP=k+ z6M0vz|9a@D?z{~~HzKN<#k8Ak7TEr7ym-lI+tcsM^L}i)!OzO=y7PX=;zN+1M z`S8w*$+b@<`K3#P+`hd3^HFb!?DnN0yE^#~r&$Vgo7-FvG`sZe@=rBx`H62N&M2N+ za=72{q=1&Y;LeyT ze8asd@wjA{pc^A&_mh_vDsHXsRf0mV_s!6(I2^{|yx^e3k|O36-MftDa{p(j@>sH< zbJ=`d!@`0yDWWV=ao@k~3Tfy$(|gj^Vs@L;g{j|uUq7AssW);xokJuRzt(u` z#c}p<)pMEY+BaFB7u|mRR91NV(eB1Hf!8Yw6sA^%n8n@8eU`ZChNQp=LD5`+Kl`k* z@765-nh<@=Yh|(Sb)y@8>cuAZsootWo}ZsoGzS|v#BBIzVs3f&J>-90`3?}|kSJ(G%1;tsG=o-&b zeySF8#^4tx|A_?88S8D!)$?+FO#(ltT;Klbh}fx8wx+MjT31&VJIxl$vAn4HT%%-% zn}ZO;%r3>6#3L2!_jAduG2qR}6uI^}Czw%jvg)&|&nhN!Ie0FPTAqD1kf-GOtt1xy zRYBgG9*d5bJ}yd$TQ=$8djZ}xk`>$9mzVx$2$SMwPPlYu!MO>4V{YbXY;b)&^V=Hk zO}Q$nrdvx`{0pYd@8(WbYSA<@x~r^q`F^WW#Wj!gy=sOgyF)eJ+wR~qd6fCZ{MnDa zH@XgN>NZmKm?|f$zbqxa`*+^nqKOtR4fC{i@wKd~pQmiL>m*iXzdFH&R>=8?#SF;=p$!lDx|K~rl`dykU)A&ruBzZW!7aNrWP0h5_xb((r=@D_i;lMW4^ToxsUI?M|e_rs9`;HYzbc`dg2;UNz&LoNd|lxO)1&4>57s z8@L~H?rAx*mqp!DuVN;J@E|l>qT`!o~8T8CluwT8!Zivt$I9Ja+1=Pw-3z! zMXgik^S4j_&ma`KjVa6d@}9Hj^Eh@XA3vQnQUA||(;I?NT==jk@vYMC zi!oUayHgiuym~NSI{f{AhRa-qpMu;MZr6z3IYB9>^?+Vn%J&}$vrlNtrmNgptZlPx z>fu9gqJ(OUWHJx(x9m9;(=M7-TC8$UcJJ*2E;Zo~xAVSPc*xSpk@b1h6PAktQ~%@! zUXS6{Vp}k)Vf)1$ZXe8adylxbKMMNB@V$j)-6I#<4GhU^;#XNjw?6sLz%QdaL&kA& z#8mMKd$&K^b-z1)o)TB8+M?&3SFWqry_u{YBfI6tj{gjAg#~ACh-*1ewe(Dd@|S1z z853?VNl-k|p4093pJAF!yVZMl!*?fV*q*9&yeOWz-sbV|JqI^`>Ob$3zDBOKV5?rb zyZ6gf*~^i&#~e<))#h*w*ke=qvoJSxP5YT0cQmJ&d_Ft*kQ0Yp;1!#2!SKhwj%mHt zb6!~*aC>49bKyEG0@P3-3g;6D>Z?-3cw8w|^#XntbNf>WRm5Uwzoj@GR;`pkkWC))Su6 zqW<-DuiKy5I?XoP=E5U4;dRpEoaxgiOjtCtN$bVleHC?AX6#yFrt7$CNjV?udrqsz z8mUXKcV?yR-TNbG6GP*3t5qf9a}y^NL}$z?bw72ZQ?Tr>#@>&q;mOmcmhNbJ5_Wgd z3H4-2iSB$s@i`s)SVQ8ha=#y6Gi6>zp4Mxp&n1Qv&e|7@ILH2JXg3mb)BH~-K!VfcP#xE@4cwCMS-5M*%DC=LzXSq5;>!_8tWxf6tIhjpg(qhx!#(yg?lM(UHD9c`Z zO?7+hhv{_@?>xj(&IM)(91Ah#Zt&e$|wy%x7*om3cM`6LaUjc}!u*j^2mMWlR~keO9|`lzf}j zBedZ1{VZLFZ#{FTIV}1zThC>St01eh|K+|3T%WJzT#c9-v~-W)-L@+;uPomG{n_Uu zpMEUAROh{)Z^t`_2cB~jH($Im|6cFgl_EEuC0+gNX~S}}Yk8qNL#Xh7hUx1pq*&yZ zUfs1)*nEMpT(^J2nJsD#o`-5qL^$vYaJ%rm{-;ySl)$>Q@2%7Pl7yn?F%^-UPrut) z->3CiOL@kv<64sol8%bk_LSeLnaAAy{Jwj~?sapgJ0G95HYagw%<9+fvf@(NZ$7IC z?3D60@7Tt%X!$CoMb-VnJtqx4R5wq|x>|F^bk>u}D-1>3p1bi*YbdYSoZnvjV`F9I zgePJlMN?hp&dzwp?eJkv`MbnN8#?zJHAgMo&-dVN&$4yrBlah%s&0G9=U!If|LusX z>~uW=MQ`_8HGd4dPo>W^^qklCcV?V$u|b;TzDt27N5o=(Idse^wLf)3{b_&Yzp0<9 zxvQOKzF0J;v)5Whh10Th`p@djMaj1}XG^U~UFY7d^mes$*6W}*=~-1$T*uG;R@y(K z_1f#^qtE2h{bOFAZD#y%$0cURL&^S7_H(zFTLkUw*Y;_hwC#X^rlt-4hgx{za%Vh+D0R4JlIIAy>OG?ZV&qpO1Dt zxN3V)tzzE2n?>7YuM}plnJ`E9eD;a5_V9ZB-As?KE!REM$FrlTfBEk7XFci(Lg+13NtG zmx(PjWAu!!TRwGbBF7P3=7(=2dZacnMb0_xoU-6y?sk5q-7hcu#2R0TU2neOq1E*% z-R%z#bsXwh>5>#agERVg!CU#~9pU0xmvy$}ANnjJ@uhy+o%Nw@&d;~=&hvU$$uWJ0 z+Wx9UnbS)ytUbAIi|MlD)_i{dK;%# z{c|n*xarcwsLnsOe>d#9{yXszZ+BF8$kdA_C#QTn`l#f@e})RHu2-GQJGv?!PKXYe zF@1?o+H+NF$!GD~uJ7#RpYC_EtNWyvrRX-16cM>;HXWZL`+lE&$6zM2_S4Tlbv${Im1qC37_h+S$(UY9TI!zB(>yW=1E_tnHdw!7fwB*5~roP zNWcAZ$3FW#p5fO-8N0r3ONke+%wxD`yJgG2h~$bBVr`G}cHH!u^ig7qs%*?22Tz^} z%%LIgI=?4=`Zz&B-@U=rw)yrQr3-Vc^8~*u>CbljRhT8gbL8i#sm{eKx^i|LZo08k z_H6FfknqE?w)g$GuNjMLommldOYPlj-2#u!LnhzA~cb zJJmfNQ$1^6&A0Jg@3`jpg{KxrlU}@+39M|p6E^45-DTUAYOL;eol~5Vu;i$K?B~6c zr|1di^=r3G_MX0vjdw%Nl+~__HaoT}y=-v0ceC1HOULxx-OKjt#9T3)vN&SG=QWoy z3v(t2ci;Xy$Nh$4clWy=e}7zm`6qXtVsu?Ev#}mskh;BN1v06JM+gNHY#v zb~#PZZ_ZgU5ANL?Pkj(&=JwS!Fky6(UwOZDhsQM56NM+^zeX?1Ind{4^X>s7H~%U@ znX+|pXO=BI(0%r{Y}Q1lP_3FMSw5XrIWvxmp3L1VYQe*uDCvYTM*NuobLVk{EU~UKRSGMo1&ycp0J|U{AtP8?)BTR7IR6O``yH6Yvrm1 zN>OQd@9bNn+aRfF61!sKr^Wlz`|f)F+dNy>d zUMe_GvtQ6$&7VQ`();xK!^>wMyJH}8YD-na#l<^qKmWZmEwasbMz`$NAn8p&kc$S4}MCKe=bF z`CFx1&sMo?({I}sFH?T4+#pxa=HNu76wOAN7`eG&|6B_umVNwi=}lUx)NA9NXAIm| zIZ_N|pyR`q|vn6r$AJzC`Vw0~)bA5H1>alPN$FdLaM2~M@u3jSI z;&;ztdT!mOz2Oo2HHyps?s>AwLBBzoK^eY;WBy0i9d_GyI$J^*_VQW1c;M-Ip%s1unM@Y&$uDGpmw{sQcmy2%J zU6J@=(VL*I@`>ippC7wFjiqYCVy)>{ZK9)9E~`Y{{hsLDDOYsjPqK7#>9*}19AXQ_ zj?b=d=-73;Ie&_KtbuGpN=&~-)DH88-2V)E{3^`iDpd;=J5pY6+&*{PYw7K`>bxIz zrN7Q}0gPpW*(jyHz{qH^)BvJugS3Fa7J;7^|pQ-;h@#DPP zcY2rz6yM})lFGh+oMWBTsU6IZ&n-EdXB#DR%wuNhwK7Z2JDu5Mre#pD|d^+pZBi?kqxqNS8z{^wV7I`rbcF1^GdHr3DRr5YH`;Spm_#qw)a_w? zxc-KZ!(-pno5520@8`?u9B2IZI#ks+`1mW(aM6~3N|N6Gt|>{IMV_>7yuIr`!~48L zmL}H<6n(2w-Hps-E8LT=%LwIJaPm$UZ&E6cw!hk)`-CO$-jtjjqRPEW+N$4D`z{n2 zD|zdO>h81qcVj2(HOr7iXUbm1rkY<1Y&l)HsmCGZcU2|#K|`q(#uaj*HfvMwZ8#<( zojh~f*&`2Q7p+p;_rTX+^{ToLMK_|BpPG|o9&xAQ(c_Fh8*{OhH%eSWKfdqk4HunR z6lxX{*~e;OX}LT3=+lkPF3gwz{s^$Q2PGPb;s&F6QlPgMmNmd z&s>?N&KJKvs40alvu^D~Db0TmE_Xz%*q0DnSzEqNSyTELt6K6Di7yGS&1UaNkI7Q{ zyL@q;Twin%=U=7xmj@nb-(a4qv?2MG(^836owyYq6}`TfY9@!RcMZ7^Fm-SB>t{z& z9X?piIn(uI)#8PrX{ma%^$R9Wh)>X$6q>YReWCAt#n=|jgELkg=35&uf!|o;FIb*%q388f=M+J#hoSv6%^@|lS_0dC)X7nOwURzDw-TcY^s?m7;Kpamv7c@D30 zHO`6aQB|DwV{!_E@5)75Yj3TH*`csOd)pI<HR z)$NaeJv$^4XL;wCJ*gDAEPhW>#hl@-*i1oH-kO^4+nbLk@~!w1_QWwQVD{llJ6!nV z^EPPjce)h#{CB~dN-al!TbRWG`xD?RL; zC(RVK=irw+2YqXw9hqmlyk$GvQKP-O(T82)eyP7YDmwXs%$$uHvlrVcuJq5BJ#yRO z24{@+1IJZa+Kc|$-xJfDn%%3Z$h)6_j8^-tz;cC4DrudaP+YP#7P zq0e;-ehpsBKGxX@T!(bMTvmQ+vr#bD+jddjF#FSy{|s-$r)`e-Qa=4j>WjbOe2ZLn zzqh$GfkXF>z;D$b$6wVP`THzf?V;A?E4P*eO}KpC#&=6tbAIgEsk+rK=9~EQByLu@ ze=bGiquuH&!lMns;asOPjP%fk>X5c6sX6;OE^Duayn2 zNKH7k?9@u>GdE@0UDCbV&n{d2_|_TanHmA6$tzvuQcYGpX3qW2zCuZON6VV4zK-HQ zvb&1n`(JY_%{13Y`gP24M#qHC$=-{0T1_d|GW{$s($BKtM7nogRkU9FfSkU8DMdBY_2)Sq=a_5!Wj<@&iC&ekQq z+veN3N|c`0b&6{TMIZdS74jEKmcJX>X-DV?QU9~qkTyg#SH@=hHle9&3_KHb1xTRj)UV2h4i1mlb``dy$t&W

)aHlW>Qa1lecu)P#31X|&%sJ} z{En{A-Dk1sl)%M^Gm@gWV%2)NW_{Srr*`J$+mGybMN~tt+Iad0?f#fp=kTiLvCf54 z+Uqu@u)dwA#ImL7Tl>EGCeAVjOnnIxuZC(*-8ai*MJc1;-pTt@x*qNg-kz@}aE3c% zuj9hQ%l3M=F?@7b5gezf%sQQ6driO<>&4UV-C9`vpMg(_`-HB@?9~?^9yrVJF=X+M zCl$dh2iJb~p1>F?m=}8C>t6e_-BH^VEV(QfwJR+;-EQYOu{(LI>PhC@u0=MNH!Pcf zol*6gSTsXwuEnAa2b_yu9NqNequkWpqBX(MQ!6y9kG*k|XylUEYq#Xgci(BZCJM|` zyJNz8FhI$6%hlYg)oMGK8((p{&vc*YH~(^sE_bgc>l3A=tIFS=_WRMIHCeYgxAC`A z>P{80PeL^gM;%Rlb9GJQ4>u~hx!R;3mR*&1r*1-uXX<Ma zvP)g?{q6FTB~P0s*EeXnMK0>PtS6S-cIG6{);+pA-^ME{h+Y*54>FOqj+wDlA+m4f zCXEGWI@iWMQSZ8ONyhf}+IRP6fp(aSaT}$uo(<*XyEe1z#}WRRFa5s{35k6C_%{8# z=&A>nybeo3GW3IP^=>^DR)lC(L8I%`m0UNm4{!!xZIdG`hV zXYdnP%;xsW&+5cic`?>0IWs0b%zRj4(I+@@w(HGpZmr;%Gn6+xn7r9?V$2T9@Ceh81=Y&GoKAmuW+A$xaBY{qtyH0lC13X{ zvD?27O4sQ*#cXt6QFHVDrw4C4_VX>3?WnC;r?SI?<2Q%(z3B~$>hz!X^!a|1{4={<>td?Y zjKI{=k2g{mUP^f}o6T*y>YfSVN1gBYluZi0k^Y8ywI5;imrauMH2o zS}R}327l6Av-8#WquV+f_3sDEz5bXr-_|fX`{UpFoia@K%KC%y7tA|tc(`t0!*>%ZO2v-J1O+5Jbkmv!}pJSTs* zn`!!Q{ z`|_+F7Oe`H8k>52;nmI;MMgW16}IcjvRvSB%)83;0ZU6G!tZZ*Sah>U# z_V|+0iP!me3OpRVU&iwqtT3-|4eFM&kqnZxJzRKqbLj!4hbt9t-hSlzzWGSng{gK0 zr&6CW`R*?3h)Pk=t#1F3-PU<4ZYmoOSM^o3zUL>(ZWg{%ELh2(YwvnvdHi3FQ=Yy# z=|_u%9gENL?&_Cr=L%C=!N>0>ygkir=2n(%ZqFw;9eMPyzCHeN`>Ov8jR(HitmZc{ zJD1fRx9n)w*(DtxXIkg&Dra%>`&D-IrM}p_gIz0Fr?O3+7E_eNk@#)-(QVrfSY{Sg zWPi5N+Inr)p?n2-!?fkg4|Y$#)syp8db-5szZcr})$iHy=s$yN^YaaxIk{9dcM9D3 zdRk|qO{_Cd-qd$dyOcBLFPG~KkKAxZ?zFyF-I9|IRnn6rlODX9wX^MXMsTh`NS*cv z9oJ8?u50~PUtblNH8rB}fmeO%12xwRg36*s8K)0V*Kz&Wa&*Po!#8)XoRnf+uwC~# z-v>o;6JGIcv)0%12ph1z+5TqXnLdBpt7a}iktqU>pLXbcFM7naM7>38>W$UY>bE(w z?pnNnclWgDx_LSI!hYQ`uGRWegYVhJ7%&}6T4~ECeOK_Sz!USUrg9D<-3}d{*CI?p zZe2c-_hI+;*|J#^)?eY?Vzu-SiAxZPhU_t1RJ=|1gA_ZYgit6pn*I5p@@+lh5S&ks%%)0y5S-4dNA*!$!K`@zFo z52aaUetE#KYI2b0&bPnb9temP5Ik)^~k*B&-3>`zkS@tPR?+qz|^eym$#X^rR=D2UH`k!N-6up z^rszLud(?XfMW|NU}x#`5dB!;p(44QZLy`RRmXZqiUM_<&Ioywa#E#s>O z+vy9p6K`Lb%V=mAe6glRmwQ7bZ|XJv(~G0lht2uAB%x0rCMn=O>!QH3r5|H$N{<{* z2%2`_b0yzW?=?0Tu58OZ`>FbNK@HQk<)-YWd^up{&3Vv+KsDdqV&M=B<_XY}op5ctjQ`&{E8 z-v!%8g1ZiWu9u&uxv3|5q3@ZCN=s!=ZoK)5?{-J@rPuX3M!sueqZ8H#eQCVHJ@M8g z!6)1NjC`ig==ZPxvE$L14VrHY4{Kjv=HGWmYqFY=S@o8~5i2DHhyb zH&1qh#(MvVwtWY##V)zN`Co}tPQ>w7x1w78)I!>iRw5Yqr*d81?1BtNr}d7l$a@D4mdKxOd~@^`8$m z-jMonuFK}s!m3LF)fNBBx=%QNf4%6T*UIAsqRjzWQ+@=*N#B$dyz%$MCq~uXhvTR1 zxO{K2EF$-vFX81k!Xok zbG3|hjb+R3hA{Z%{j+=L-E~Iyu)r>`O=gdiIzOnj?cMsOd&M%zfC%{ z>8R4y*FjIe*YlJn%zxvq?bpoa>X@{fcdPiE0-xP?R;b^;-yZT@uGfFXbhQInzJ4{l z$D?}vEb~lGyJoCv{NpiCW81R*mwki|+TYlAVkt|<(V&GEPtE4Uyld*{i07|=*zn}6 zLtx&f+nJ%Ox*dACbrrTXKG0a?vMc+0S91AcX_)|1wMO3+VOfrvi;R{Z%8V+G^Xj^( z)wTX~pigFBVf)2YK9M!YmAJcJ8C}Zd=*(P@^)UKp7W4OS9}l&iIG=rjLp>nu!qkU3{PURq^WoK5T`Lz{T)5?1(1ceLyZf6g+_V>Fb=+Lm{b{G^KkcqJ#c#MTC2bBp z!GC>jj#QoDZiU&o8*kU#&bShKL?txtpOsej`vWHesI+eW;5|Z0Yr*)!z>qvSw|v7uMexmX`hEkE2Mgo_9z&>uztCE0gX9?^@q~XO`v( z3EE8z7Coe&dRdFbjy#|yzhu{PmiDs~==L}p&vEECn^)x1Bg;kECU14@yL z_BiZX^lH_%Nz3FGc;ud)ID5eww#?}!3KtBxy6>=t#|HbeUjI>hBb04N-Fn`{3v4-= zMbU+FDk|b8%z>p-T?75BR?W;_q~0f_vovn`(N?jgvfJJ>)UFjg7pKIu&%V6=Je%a7 zvqk|gFU@^9(<#Gh>!Vd|89H29U8`^Ex?VJSH*4Jvh8Xh~k9zq^wU!#*KKgF^Ij)Xf z46G|3KA5hj`87gK{)_QXl`md2> z&%L^ZO7qswGBQ@&x;!b=`D5r#@%^fo?|x78GIi71JwKsC=?2GM#zQ79Z&tBA>hFIc z@@F!C!16-mqG9ZlIpqh2KgqX6aA0y>a8Ed}J9bXTR9ti(90nK7D&P z<4dT%#%9fl*WP-^pXUxT5DJ{S?A5weav||72bh}dYF6wQZOxpRk^fO!=>7AqaNfNC z4DS;CLefgt?^!=-cIwe^-pG~yW)hmBmQA0iwH9}# zOB=^-QcxA${v#=2$=Qyp$75oj#~CZT3+Bbve$W1VJx1!Rvx&UR*OzauOx^2vq*ici zxS*$l+gaVK)k&w$x_Q0-_w2lu^Ad&djHQp4X)jEPTrYX5?Kl6_L%bhjg=5@BQYx$3 zxz^4I6Ioiy5dX#@N__s$gx5dw;~#_^EVga+S#()*r{9Dl91X`^ew)oGGY@_JE;zn3 z`0l6UGoIgl`L+I&_Dr_}+n6TtMOrLrW4PVq})e)us z<*>ye|F>!zZBJZKK3Vg`>`w1Scc1RSr;qPX|9D+Y&Cb=KU)002hwGv98FszP(#UtBV)pWKPf>Qq5SO>N zrHWk_mCMQ$<$AenoV6}Po#j@U<+fEUtfzNr>Gz8q3$Ba&Q{V7tX`PBCPw*;t!HpBm zX7PC#y82%BGhdbcEJWmLVS?JMuFKLnPqL=&tv^pleE zEA%UKGxLOagt)jk8mBtjE$9@m$F8m-H?abuZdN&u{fxXKdoEEfZisp1nJFb1LTpkj zTue8Zxs%wnd@T*Czlw^AsMNe=@}&uo4zw~X~Sa& z?AwymiV`7lE>SMw+|i3A20}bxp}%VFfuYScrbV{F#NyG;0)q1!T}Q#GZP4~Ff%hT zv$C+Vva+zSu(GkUv9hwWv9Pdlvaxf304p0O7bgc7hyyYNq@Rh2iG_)Yg@cua6{LbR z{Qm%hAO~YP;}>Q|K?Wv4MrJ|A|3?_)85o$Dm_dd!f&!C)fq{*km4lgulZ%Owf&2dv z1{VPaMrIIT0y&e3jggg+iCK_^Rgr;Bh~3apSSgSrQN*aQkyF`eqG-^KrLZlU}0ruV`O4y;NS#VBFMnR$jrpd#>~#l%)-RT z#LmdX%)lZjq{s@kBC)WMZQ?=^qm2hIKK#J0>=cyrQB=jasA&?1n8_k>Rp(8YT>jr; z;9+KDU=m~&WUy!WVDk9N>OwB>6#iEa@6_!oFV%6~YR4_N%J9qLSt-dKZLBFl6QVi3 zio^vi^_`R$U;Llpy^{zR-=I-^wD4dA*j_e};`# zk;lu=t@Dq{`*!=j@0yVBU14`uq#lz>{lFErU(3^@r+MOxIgW2vuE{yvR=uL}b5!h2 zchwWAH~8hEuUp^nF8TV@H^f4$osrwI-~aBUkmT*>Odhu!>y(>Ref-?Ed7@!yJ+F>N zPcM6SFY9IQuHf5~^0by`-aow3i&5`{s$=bch9%Vv65)xxijU`5zFCr~UUcA#qPcml?pvZ-xAqT zkG|tEi+j)UN!=`&X45tI&(D%|eb)?5t=Mo(@Je1yw)>j9{>!bG^=$Ga{LKlDuc|_2UvK-p;$Asp zd|@!>isehvIyW^;urRY|;SgpD@z;AAY{KSW$-ki(Z{4R+c{J8eit=Crza!a?G zb4}glCXyog$<99Rm)t!I-E(2OA0M8Z8GEv}z|>lJMdNly_h{xErA!mV2ZPiUz8 zUH&_B%XO_aQi=_kKHJ_c-j~gyF^S=6*(V?w61kOo^gHV)D6l( z{!_gd``=&xlH305v&MgUzclAAkxC6>Sg~R2AF&G_jM_72{`|W;i{ojFp83D3x6v}C z>#wc~o6u7D!R2w>j$Ql;31(4umQCzBvX84_;=I4Vv#d8u%e~ExUs24Wmj2HAl}Fye zxHDl(CZ5{FuH?s4CcFP~#EyM>*2<^0x9`5Uetz)u&lXcU-|@)b_Lh55dncqpcyGP= z5y#*O)ny--l?g=e`hLIA*`uXoaqRYe-`TF!PG9@=D|f0*;&lHNml{4E`6j=l;BAXWcYK?Pk^h@}@ zUz%UfURKL=ExPma>)Yk)ruYhoEW5jH$NJ!_{wAuPj*;^Yhq^!RRTi6Q(F`MTj!l%Nm#g(=kq$X{C)j-|K{XL@4I|&w|_~F?^%!k3=g&5mUky#nHqA=Kh@xs z)8reHKa8J;t&{jDc>5JY{SKKwt2S9UAG`bZWsR=yLa$oy)FmqC^;qL($TN9r9aF2n z9Iyc0hxHL&+_z&gT4Ful%XgjriU>y57FE z(00MvUB9ftV^>toysGIHvQUeSp`rSbL3R1HMeHg@eD8yUVmFi~xq1Ey3(x6xH4GMI zJT@nsX=!!H#QO@58w{WS{i+jeGWmu3sjj%tdzHITfZfKmYP}{%={S$wxZZ zZq@oK^PfR#rf$g`|6SKFt(>(y|5E9-zphS``hpKgzOxpT-}7(t!rnc3yV{Ozop*3+ ztMjStx?8TAd$)>iuvpf-PE~nAgYeVfK8wkXQfIDv3km&HDLU}y<)qkCm&$DVAO8w% z-(D(Rl+!(R-rGy7uRVJCGNwV zm-I9HRg~V{QYhTO-ym}B)vsG&hR2HT*1X(Z`daVst8_PWvBP(24YwNauRof(E@b!A z?bqZNByt4%E!0&})%KdS&y_o9si)wPrqG_F+4q|nty(hs+xD)!yNm9;Df@Jk)%w_{oA#c zdP_}S^L71`g?^f;Ti1F&3UTzb*)#w4x$YekgMYhEmHd{da7CLdJ2s_d%S(2<7af-t zG9*uqK3sBfUF*}RmvW01g={^>`FZ|-hN_uUibNR#{8&Adrmeo5nY%E{X}{KygK|DU z-m&#}cT6&2J*UMWc+&CGl8i}DO-qH7yX3rMw6j|K7Z(XR-Q`lO6ynHJsGm~%lwDw=vfnA)X zg4JdOS+uR%bYFx~h0k7g_v0_uc^>`IGM})bUH9R`c;^rE6kan(9{LeFXKViAfZP*B zkABVP2ogM5(0tnR>CZ*>X8aZJXPHZG+QrbDu;4#~{Ijoh8}_!&Ecvur^I~9SLpml=3!g-=3MEI&sq>4 z5xvuNfB4sv>z0P@I-9m+`z50d_Wzb@`L9}T-dipe8r#|BFsZ+ZD{u$o;ykyy{`?J%Y7VcPg?clQQE4AX6JWj1!7!a-?e7j}E zfxiC?0wt;K3|li!>`vUKGly&I-KgztRi%Lixf!mh zT2Yl*Ia5Rq`gzO<7g+b7Vf)N0+rwAqmTfq3M7g5)+XAUGi!;*>eo#ryw|d@N5cG~w z%d z;-P7`GhMZ~qS{3pCUG<~)SO{_>}|39h<#de-p6fsZWPH+KR4^cIdPQ<6IU=lo%1() zxpmb2@@;RUZk5N%KX#iqA%cE*c7E_ZtvcQ^7e`@B8# z7amwWwLPm#U~bCib9Yv#uev@l&)2kV*UoUU*@>c_EKjG)Jn|}ToGkIleQsp#=1OjF zO@`*)*%n_9u3WdM&v`))-*WZ*Yb*QytuMTGZ`szXYxS1&CB{^E?GY;CYM1=^^WbG| z>6uBB-+a_tVEKgi?^)qP58GJU){-I7(6Q{2r3f*ypZbTF}TC@b02 z99z8eQ9zW3!bGo19)XOP52m^Bp0bzeKk(i`yt^ph&-m>0{|urs&0QXhOx6<^S&ipk z@UM+b-JI%MCN*c`B&Pn$@>)|ZBp6IL1o<28_?gzaUOKqo=4-LrXIoSiST$u5{Lf21 z{?FjctS`MoLHiXS+n;S;MXElu=RQu{ZaDX-{~O_$*=Lzw2OZ2>x_!y&Kg&3U{1bd0 z)o)vMC(4si=<)l~V_7#3zPXh#S6y1L=gB9zugYH)i;_OpZjM`Vw+az>AcpULz1@7Lx5+f#43wbuN+mh0v>Df_41 zHl|ClT7d^_-0QFZ+a<-;qo1LwxL3Pus-xEJ>yIwKVv-6}m@ogm*WL8C-KnF!715K- z)^A^t^>Wg}fYwbe5)qm|o9wD(Ir>*CdY*5~z7)BsXX6^bs+I}cC-E?Tx60VObUbowlH%#^NQ7bZ{IX$7gFEqov8IG<-7V$ zD=iMu=^LkP`?LK{m9V>yhM(b=a{ae0f1NeQYl`qI(dD03f1UT^Kf{R&=G)&YvvN1O9ZWe< zx;ZmHAme$~1*>20L|6GgydcG+(jfZw)W$g#6OvhupPL>R^UX(b>Is{dYs4m&?OI~{ z@ia^M-i(E}R2NNa)inxOvQWEp)BO$1x2zl1XxZ=24NsY}d-}o4kAt@!-ZLX{$-EiI zPh3#_Xxe9&Te70Cb;^H+S6q`gIG#S~m+f=unQv8_D&msucH;TIt1ms2y?9lUt(iY9 zGo74#CAw_VELZFE_qQ{5EL)s?p>@00RIfQ6GaqgK8_)56#?_?nkN7Tq-o2%eDO%RQ zeS7=dmj_Bro!&+l=s|CLoz zBjl>E{o&FNM^`!LZ1S!Ca?WhJN6v&+hV&=(nSYl@Jkq%ZdB*)s)?L@~^FpSs zP@}>JQ&aoSlB+<8Aq<3mZz)zGm9Z%D5UA z_Az>n*W@#TQ@;IY2>!TI{YRXn@}Wtbue4tMsa?W2XY$Ka^L`7jH#NO_a;fiJ9~04o zdPN6(d|V_9XIjjcxGLZEpTR#@p>=kg{NF=s=2$=G#A#4oleb7 zo>~$r^3rMU%G_3+4-x^7npF;Ju^ahSzwY{Yzxmm=xp{Bzyn3;Nq4LSUtykaJuP74_ zThpFY^&};EUTxOQ($Do*OTAZ0&#ts+3E1#8soB>i)nhqxmq2iX#Zpb52h4`|N^krV zd8o!6sHtAB^Dg_jf5(s8-j%9HdYI%~dM;hMnfssNT2MpRtYu-}*<`*PWOZ?1^S|?5 zu-~fo$dcdIWo_~A`C6oWjqiAho#U2so*FGX`^){bwQ0T4yB>#}_GsDd?#ASKz-Dft zOT(s6mBP=@lw;ES{xgKAdX_}H9Sxs(yZGPT{W8}t-RJnv;CyxM*N>V$d6tdM^XHms z|C_RnH`=uC?cbB7o5OzjNoAEs<@#DIX0%(iKvdC~fr-RAdw8Am_e3ch!-@TJCF)ztj!s>*t?l_u>-rC`TQ7wg`ZVo&!vE^YM6EZB#vSte zD(3U`-TiITaF1c)*JX3;QWzWsm~HJHudUm6Zre>Q%@;RCc+LGzcQ`R6PsmbnTF~O- z;W_!Ua>(m@BA>icxDI^1QtT3O>jY!a!ot(c^Y@y**f#ri=iFCf9Rq2NPXAo2{x}b zytkWvG3KoEmC|*&Uo@6p+4f{dtG3b7r0N+z1eJ}KO*&ta9QPvigF4HvS&ezt(hVEm zdb#FET`~B(Gt3OF)~&2*l~ulLIn~%|mp?ct;L%iC{a|{z zjop>b7|(eQIqrLl^E)gAj18u4P5#qvKmV=At9O&`ywEEoLgS z6*E=;XIN)y_3^^Q7gI8?KA(ST=jwvY!W9!gFJ#jk7Va+WezbE|l zmwKYZ0*Xv4dta+=UtYd@YOO2lyqUkdPZf7-Bre_c(YX1jq{Me0!IdhHKHNT5`rW(e zy#2ykGa_$zDxSDm^Z8fkwj1?_cfZbF&AA{y>q~KMtj-fr?-%jwqWqT~ zizNh~{G60^`tAM)?OzzTUO065!Yt{}3qJWj&YHi%!e{=uhg=6ID;rw$aRGcssxoL;d|S45|Bw2=^0%eB?3dk}a7mGUo{q?~(1~opMxKE`PO|T~zF_B$ z+@&}87Cc)Lpm^Qn-P8-x8o%maO3m?7(Er0UXPyDS=A&Xo^{-M#zMoZONqG4r zyYlPv0EPK7VYBBu+k54$UAp%I-xMn*Cr7g~Heu$@;|){X?|-#gclT6vqRxVzuWWl% zW^QTz5yBe(;odKUT>`7lFFV(_Yt!U+Vqs2GR-RC^VXC`qRiE=-;@VC-Nz<1~x5aa} zoAIBQ9&?}m;k0Y`E-cy;_+``6ZL(383(I~lbxu^jQ_=GI`120=r}E71H$Tp9lbtQ( z@X=CUCnUz{tcpV`!6=HU5wI6~EQF?4p0JsMuVMl2T(An!4n-%aZyf zKUD0+W=>JzpS0Yk( zRUoS2*m%UV|FY|?8rOpV4F8@mvA65be;d)=a9g=tn!Vm?iLLuTX8X8kSJPD+qpI?+ zlu5NaZ!s4&d9?T9&scW>mj*}sdn$AJWw*@`4ZT$3_@AMR>wTm6q1EmA!druDFMg5# zy8P0z`PtRAhF|$6E}Ie*?(LQxwAIbu%;ws9Z3~mjGor;U6K4@^+xZ8 zUEAu=casZlXx-g<=goC)-R!Iuh%hpUtlnUh{@6SBdFz4D)4x9dskh>? zj-KJ>H6bHv*~`M5%WIFz8h8z15fX$_vU9=NksD zs=W1l)>_Z}`GRZ}8k{Q0%6kr|OI(?5;}ll+tzQ`}Z6z?XGRtF% zKdtPWQ}XZ5uBx-o_fC2p9owV3O!&|1xt{T#UPejIR05e*W7><%23C%fwfWrrCy;f@>(q3~gR*bF+SzZ7?p=IWxOhGHoK|&H)~}uyz5aeqt=o0# z?YiYfCILT}Gb&tF)YRnQie#$zrvCU#<37L&B^92PC7V0d>~fy27R%~M{abf@SaL#`l``#9N}}Pb!iYC!zpmWR zzUx@JDk=1p=Aua=Q?(oz%HQ6on{m;6@*^JM=IF?%Gm|$;Cm&xcxHL4Jd%X$k!efVO zFL65cDF_+MTb#IBV0BPJ;eF`peez|Wd2L>K#@g&qT)pjKDC4^AmrP&pm|ryM%?bwp zBzI+-Vgvt#(yi-{eEileY5Vx`R^cNSv450f4;=W<5bl*8{N>W!%G+UPd0Z>DhI7|u zBy3}mS|ae+SD#f;Th;TUjGNo!?MDSW@2os$ICoFh+U%m4##RXx&lToR<-GqZxcK(_ z$aQCyEn7Q1^XHyVf|`6yoFNRg4Vso={=c%~q(8QlJ3hAgeEQT}-*p~+lbCE<6lz1v`s<7FQ-IB z<(=8a@KcK8mSbj(($u_w@HL`}*#V3UOPCS_Y93Tx?fVxIXzecb*Z*zwS;M4Tp&qaJ zUx!UjeP{OV_nKg-_RJNrxw=aiY?$=QUbFvGr|g1sOJ^pn$l_3^uRaI&<@`2TH-DX?a0{&!~9}jxDR>XJ5>Qp7)+E^zc4qY<`k29%W3LZi!Pf}XvOC8l+ z&OhzN_nF&d3+L_pu=@RhJ4Vv?3?-H07u*Q<&Uo`qY}N{=t}IT~uL89zn7k%+8r2BT zV90)Y+WOv^JuE45$%pUldy?tA&P0QGe&?c~fUXpkkgE%SJk)fs34Nfd@a=c)V%e`M zdwiz3_!!Bx=U+G3s~?cXW2%28B>B|c+(k3HvTk*K*>=NeeysbFvV1M&NGFXauV#XBmH}hBs-fu{`oMjX6{#sLdPQ?L!25u*~eWd{l&Antg7kj$Mp?q z3FZ^}v;H{E@tX8V`Vs$D1DAjI?WJ^LJy(~WUvO)Y`exZhJZamcp4=?GHA`q`epmG} zLz#{D_y5X>Ua2y1^NFgEZ@ZZld*=L*2|fMGaf@}y`giN{yY%9@)M6w0Wwrtuu( z+dFk_>M@spP1}}aQr)IwCG((W)y)ro=bY=6kmj46UT?T_-Hnpq*Z!}#7sgusXZVqw zkAD5ZU%zB6T2Zm>k)@_r#sRtIA}drAn2h5dTu&-tyI0G|b|AU< z3ER!jrWt*Ph3e@>jSXy`mkRjY?(}`}4ZOB)+E=#kJM*mPFP!b4altl!g}2z-)yHPd z>)LVItZfdjrUBEu+~apU?au}fwa9?_}S;V?3{#^CPtK?YiWg z;}cfzGnJ`X@jWBIEqCp8o1^FMU5a0{YP+VX^3~i|<&uiN>wB0cS-QCh%$dXV@a@GX z9x5H#uXi5&E+=YTSb3VwMZ)IW&mgA9uCd-RdeV8@W`3LS@#}=C)d!wWU_8~wsD503 z+nVyrx1$#Lop^rK^WC2p)ial?OF1}SpURXUy<*v;zlSDXy}Dvbouckk(IPdW;QKQB z4yC+wXWO=^&)H^@Sn-ula;wk3bS(IN;z)~^`DgjL>e)Je)-HKkiCZ<+x@t!-^a$~} zs48*VuMwVk`Si}@SSAOF{|ueSCe`}u&yBKh+9Y@Bk=;dp-DB=29xYkdnNy~@~72P!j0T#ie%^WxlNAVwXbX5o2Q}L zU;VGFzJ12ZR+Z6asclckvFfrTS=NX1@^(LLt1;{^KG!T(ZshXIlX>Pj|I_wUnU~FL z2^L>@qcmhyY20jwsbWiG&m42`|IhG+KPRi&PEO3JG(oQ2!tQyNnD*=LiY<5aZYNCQ zFtO`SEZdmB-`($o*CE;OQ&pW9PMz~VXn4QF>RO~=9t*cHgP-}y^UKPX3dh{>Ub~pN zE^243#^S6m{VQ#g?_QY}H#evH+uQZ=pDtZxHJa8U@o%YulTD|ZO~=LQd-CpGb6QvW zJB7b4)W?sv>ij)s(c5+n{~1{9Hr-phMZM0}eZ#D>xmv4MIX8xKR4{XL%zI(PIWPD@ z-_^)}%5U=U_yWYKi_vZfh zCriEf!a`WKP1sVXu+*YS)Oh}VW~a^~mrMQc^5@NDpRRxU-h#QgR~Ylz46j?>TmRhq zU*EpDRrXGI)%~sPFTY(O{5H5JI_fKXp10m{J(r~{E1nlLz}hTmC6tP zsh66bG-aWXw3ifvNP`l8rTU4#HUD-UU3>bFso^zuefy{*KKzEW&OGMy>YfuK!lT1e z_{O-WT=K2`ijEvT36Ipo^UFM#KdqEAKEu2E&4zidTiLEGZdx#j!zlgRXUU62clMk! zP0?R@{CaZa;Uk7~UcG!6b=4%`)(z)Fb6-RSEwX;qgX)6^8mu3gb*WwAJouUU z+s`ZAf?8ElfhF(j!tZ|Pdww&zt84Ds8WQW6_C9SQ6LIw*vE{coE zu3>!~rhKCR+O!Y%6xa6T?&eqiad|37rk+{Itp_VqHMAb9oM^E2?ChzWd1*!*$BZWP z&(?n$mt8u3i-9*Kx5a&T(`3eg082GKAOG9Gvy;y7XZRUM&bsT=ylP)i-zz>7fu&U| zBYm_yCM}jvR6coZ)91}hYkTD9pZ>C9cYye=a?x#J2ZC+FmW7CFav3Ds+4Sf4y6Ya8 z?zhB3rt|jql@ZeqHJ{ryFO$`4S&+!qB@=3&Fx5Ta`}fG+d0C8G#?+~*IsH6!NsA{& zUgs%b&9b?yB{*Ca~;ORABsIY`gK6iL$@aN-e*I z=VkV}=g(zRN_z5mu1n~}EtgfVpU9oEWL>yS0ORpJN$&N+pJ!g4U8f|Wb#~5t8Nn|% z0)=k9kY?<+3sL(#yX?`!Wv;O+KZr2o?3u*pzPbO~f%Ca(zb+npt8};Kz#q#?f5c_4 z9bc-o{rhEo=c=VHZ+F*zS)H6$RnFveGs42Ea<^tr5OTk^)A!o_e=>E8qodcad9glPcXsE7i7$fkwBirz{P=Ej zrup`Mo)p2q?jHY?rg$#=z~1)YpTt_dw`)bKg9Ul+&DEY*oBL0qKDw4QZ|&db7tY=+ z-)uBloLkZK&3v1^^S*Qj_-HDeztOzf>W`!F!ro*2eHq*xYJzP6UO&=yelFX1SGTlO z?A2@UDWNBW9KGjE+7^1JRi$C&XBLmk8hNFY8r$wAe_RqSwrSm^V_Oev3;!9GaL=8*VlUTr)3P;D>$hcX*Ue@96sQQ*K2y zRW(OLULSt7%GY$?wBB4J`Jj81nSA1+PnLMjzv8yeETLV0z50R=>ykg5=ik-$`uCE} zGRX(3e_q-9>ZadzsnDH_lP(3dYG_P6#9$e6aH9RjpI6MLU%Gw5WwzMM*tmD<8Nb~b zCjNOE^{vp|JuNi2%lX{CkdVsiDQ*g#lX!S!%70$?e(9vjEqAYAE{BJeuU=2yztu0d zD(P0%>VV7L-rcHN>ObfFn9TmJqQQpmSM*U;9+&hP>)H?e@ZJAo9ed;7tV21gLc6?* z!+qW!>U`jJv!rF=CxxkHGn(o>nvEP66h=Q5Y2Tb{@!|fr<)1%9S5HY1>{VW9$$ojM zT--L-+JEyu+;%-Cow;RtsLRhsQ#4X-Jee5#j!W2Eab14omUOmYIrD6u+N{=N_RA(Q zr9OOBZU5y*Rr2R0SN}8A-T#&(=k~1dRQUC?(^gKH(i+O=Jjvz!W`?5w40k23etsCo z$@*sD+mC&fDm=N{ZMYuGc(>m$g7r z%ic{X317YK^;`G&?bjY{zIA`W>by&bP37A>S4S{^oT~RBQpfl|gP||O{P0`zJU!K8 z`EI@9_6#hYcjj&53Hd|2(wW9IsuM@q45(i__EQyR2Ky9;-`zwmKfmTzbD0 zJ!AYid0$n?xj#3)%DQg8_%3&$f2e!mw&2Y!QSN_sJ$)8*>x7~npO=EX#0S2`5C1a= zxidtYr)u3i=k%l^_srF(dk>DAe`g4_|GRIB-B)Sf%eN)&h?HAN-HUIpoIcAamesjy zLha$s_ZG*hn@sD})w2?}rfNQNQ2wwwWV4INs%86E{tE5>m3(vCE0%ZNA2LktM)wB) z5@HH+P^o56f8?MnP{g-0{i}A@-EZ3mSFHl#s`DXo%6`~3DD{~7#blM7i2dp~Rbt2d54-d9*rZE8C&`iN9F ze{ksKiy=Xl{(+t+Ecop-;txJysSCdGpJ7c#^-Cc!1qS)U3|9)1LK!B`=jlJ6b#ry* zsj1r@r)|92a=3Mqi=Izwy85;{WgmY52F6`BVkvqxB}9Ap6$Ovo^G4?YyWYjnOucmwyo3fs?|X9QuVqs&1xl+uRQ*^Y{Ffo zWdZq@e_mR5{gUhQ!ndz%FBg`b4bQ1s>XCo7bSi(~lK%`Y2?8OW{oB}9$;*G#coMp; zMXkZe)6!l`xjFJwkxJ6~Cxzj&mVK@$3|ppG;%hNUL|j8NXaZkIf>-*Ua(M%`orUv` zCSTJ1eNW-hI@v834YC_o)US9Sv^$d1X`Woy^;a4z^_Z6Gc{aZPc!T-ZZ439O$ zcvSvi{1x~)-raO3`|Mhcua{k&-89!tJN7zkN_#Z_$DrsL{&LBN@7~U^Qz}V4yM^)U zmL9n!5Bn!Ftq%I*s+4p`|4&ft>PK4Bi?&4ty_E~uH8tfVm%$tdPa7pS_nfp`ugIh` z&i?c5^L0Y5J#)3cBQ5*w!&)xgv!S^w7q4Dgdo)yZ;ekuPS8L8wKA&j2sZ@45@9sCU z)8cB2e?4p6yI**J+Mf1)-4&CI!}iwk$G_dOHdv~}?DdyZO;h)9HSJi&|4ezCvYOk% z&}qy!H>KN_{V|-smbbj*JCD4dU`=JfU$NI&doP^5cH3duZ7=VK91oVvd%{!uJALC^ z{$p=LG7{R)t54r*@?ht=Hk&>6Gkyi_F21)`OFx=f&(QCnm%yZioeIyl_}ATS|M!;r z(DF&o9J=qOzV4~Ds=n^@VEVO#{$X-9FD)xeS504a;nkgZ(}lXqjsM~}=BdA9D!gU= zU8=H@v$Hef>&|1+*8Fkvy{=lE*}T~$q``i^zSPF~{`;Nw2i*@6PhwmZHU$MJ2<7>>q#qkeURC@HxFK&_XY2bO}p=d9>KXW6uu};Ow!#p(~ z=dTUOx0t2Ua@!^R%$)wIWs8^n6@8iSrR#IEs&-S^vn$(KmMj%9>2Ty=vVG;jTEQT+ z=E$FkOLG+`JoOX*d3mkA_$3ef9g!<{wWv=DyUf@ybpls|g1pvzWy={xb-(P4g_Syv z9a`k_zBRzX&gS)-g}S;27I38`r7&$lIU2|xOJhPfpboJ zfa}Czr6DO(Dzi_g2>1-#)&!&hV*I3&ZP+TeV|etq!-^)s?$2 zef6D0u18r*HJ5WuP`(n#Fv-$svXTq){a}~V1w1)Fn2#---y;{;!*zG*o!Ni$cD%lA zH*aa%l@RXG&cc|U2mMCuU;33FJ1XjH9bT=FeregJU76DQmm15{<6oFXuL+3Tw(L#X zoA+DWT*XyFR`k6r5MoL2IPt^EN;*a8wfgjSeXh3e-Rpc3gy(Zfzc_Yd-<|EIpI6SF zwQ0_^7`fla_dq$fX~_#Oc${*nOI=N!eBkxkhUaR1u zWxMWsef9|r54;>|F+repP17Dmfps_f?wPJ(-jsKq*-p8B=F7Os5D%qQYpE(jbGFl$56hk% zc@=u;XZ?SMR{d+UqPUi8+*zG^E#_Ga?@G;pfPa(qYTOK^ysI-JcgMVB>hOLLz2Wt9 zm+DXVckA_By!dwZUfU+FDW4nH*RsVcf*4A7O}I6OcfEp!E@!x~ao^30mky~ot5hENxa#ZD zQ=MOSO6}Wndsk_A{>I>F*R-^!yYjv~3Ol=Y{}Hw`p3~X42-FA}Rr4F%T(_0A_WObF z%ZtwMOrFCpe`ErSAJ*vwshUCudD7{*a|#BZHU ziAjDLwJ>wSdIhZ+rH@~vzf4~L?fT1KYcKIG+#ymh>)4A~%U4Pqn(&Ds=8ud__SvJU zeI;DScRw-zcI@lgCx=`aWgF!lzv8VF|ED_NNtXGS>#J|4w|~iczjJlWF5}4P_{`O% zro7RYnp(C6yni-nxt9}PqH(`i*khjq$DT*n9J?v+C3o+5^z7!%Uro>NHtpJ8c>QwZ zT26-@p}RD11)n>p5VzW`NT4R*4cJ`@nTtN zH#5s~&resG-Pv9(cX8v&+s6La-)gBli!HHK|C6>yjW@gDcXy`U3wur}*F(7~bC2Kp zec9}dNZ$*Y=@$1*a~{bArM5)fTKp|$V{fO-`K@8Are3|Lr6;a+yVbY&ef9mqpEB3N zR7A_!&);%m+oiZYsm--VOYhFVIXkZ!g_q@>N@Mwz-+yaO{N?3g7ccW(jCGHD?ECg~ z)1|vXf0dlG7+O{;Gb)90{bUj1@pyJ$;*Xua?pHnT{@r`6e#G~4OZv!M`26MLy4hD2 zZF}g`-OK9B>MH&6nQSI!P*_KQ;oHY&KAp4n7fspK`@!k>{5@xPehc1PdYgNz;H$t1 zRehncRr@bauWC=NTG@7G@43@4EAKjUI+QUga?d&DG5N;7U6qG^{P|;7IxR=y-0eDx zm!a=Fc9`ycwc)mwmHe@t!8@b&M(4HNjP%%L{vsw?Zq^m2*#&h4${Sf6uU{-PKdGbj zc>Q$0smG=+-8$Rv0OR(UUx7;(ZJly!&Z1IH-{2)e9*-V9a4J%8==?e7)y5;%&gz*L zf)8p*2N-y7k7}>Gw@ohX#=;*BElrOarfMbmB%iQf*ClSd=v#BLON}M7$qwZwMwth< zm)%xsogNf5<$%*vO%F*H4ViC`Zy$WWDqZl=E%`H7pSfo!@w1uyyz+GCSKEM9SyqiN z&F1?k&OG>o*`Q`V^Y6^mM{})a1fj zzR-JfZ?gLC3@u(6yYko36)c-B#V{T57t#^@;&~cWPgiqs)vL3)ul!YcC|AXEwbITKxo1o_K7vYSxGs9+S`?O zp7{1M=#S&a;PVEJS3LePC%%5`{(8r%>1nrhtxGwUr7qzL`)jiNhJVr4zEf}C1sJQ8 zpLlX@nz8Qhn6r}lXJiCFubT76slYKvLblPqeXIMtIoI;#gW8LtXBAgyPxWOzcdh*0w~A{&=JYFwb;(Pc_05|<*W^J=UdY5;y;JM_p4Xq~(tSC1(){$? zDY@(ieUHjqdu}7T&(E{jNagwdZR#QGyDfI#^}BRp-t*;W!tdI1mI+3xm>pqqx9LAT z|Jsbx(RsUOMSZ`ttKUO()U!YCI<_qYL^$^9u%F6dt|Zg_@AHimzr*yCaU~a@THxsU8%KifK!Zj zxYRPH&?8G1+-fvXS+OG3edpi0^w95*zI(R5+1U8!MYg!whkXfcwzk$+n(zK+aGk5P z-c;?T^Nu&%kr4;=3gY+`)DoB(JbPSTznE!f`RLn*hBce28Gl~q$!|`WZlDk(z4EyS z_x=mDN&@$PmYcW z=0$&Yt$cp$#;lb)qxG(smcC*P>MCVg!0~Nax?)g`|GW!1d%S;5{njcJ^1fAX=4b4>x?bYt67%b(re~HWpIIFB=jGJxYpf^Qb$IWQDrvtkA!vfs zDKbo|~n!T9TSd5}qtA?cJOhLP}lo?k~9q&H6yDKa-ck8BA z%f(K`@36Wga5HGbg-HuIKU)|jq@Oe4eZure*+ymJ<(ho&yJN{x=}d8j8W*c|)tQf^{BGW{@< ztNw1;+qb1t_Xm3QF|B%hH1~3lLV0liM3zSNK5NNdoe9(bJ>53JeXi-&@Qz1rS~lTD z+57%8c-=p^dEsCE72Dt4TNm87GJi>1SYSZYDlr+h1k?WD1MCcqw+$Cgjf-Sf{P%p~ z`O9nbi$AKo{=38Np8CvnMGtCA zlFutdLcLdrtP8un&iD7^pj%u*kGPH(S*kj}HC|)8ZN|6B9`;ZCYByfm{^0x0;9N_U z-WM&UQB&tF+k4q-x9&|*KQr0oYl19I{w!F1$Nj0tn~(zgT-WS$&6d~F6%)5#R)6V} zyZ*{fpIqlyzoqwfe=fRI@}D8cZgt$f)Fq+!*70reH_@xrO5uCO#h@8bFjM%Qa@@m9 zhcntEPRs4JkK*oh)oI@p7uvag`71Te3Ae2umtBq#ubMwQ6H8kFPRtmIe+gvz8QCKZn&@K^g1ufHt5x*1&!tS1R*kyH0=&5$6*P*6{M@uFI1vs2)RQUA9&nCM% zS>cK!W4nRn@2bne3^fn(Kdsuf`t8cQzazK5()n)NAJ;Y6+r4;k34f$xOu+I0+a(%( z&(x<>r?`D8I6b9*>%_)S_KzL@&hl7t!RqIQ@Q82uG1}&-n;vxA%Dr2-Y`dkWcc54Q zPw(JKkGnD^RIb!?a!ELn?(=3{sL0+U`?u;o3Y&D&Yytnd8UGplekXQ2?7sKzJIngA z>n3N7R?dm!UYxpVlR%Jz&6KTcz4~)l4j!30{g$l3@;k+ge(0{&ex18=^3~Y(_&uVo zH}`SYj@;kOe07$wFQ-R`Fy``ZW8bT*sCqMg%N)k%;T-Rj8~HrWPqg@> z$zx*_nQF9OIqcrW?Yh!cjk9YP-Sy4fzGYI|?ZhPu1=5uzEsbg(pDPWSR-SWaxx}AW z$3?d!B!ou(TDmbXa&4I3f?IRV{+@L$@YV@g8Y8krM`I;dj|Rg{yO#!&7=BJ}T{zQP zJy)f4)px;dp}8LAi}zmMeQVi`*>2xUZiU;P+O=y|iRP}dDO)eC^Vv~xPH{8y$&O=7 zek+S#*p&74_J_0Pxr&apFZ%0C?XGT^x?OVpf?Zd478ID=S~=^Pl#;!Lyv#ZKz{f68 zAJQw%G^~)5t*gzl%y%nk+u@6)UonY?jgM4;KXyW!P#7pndLM;<|Uz zP5Jp2(TkU@SRVABfp1k=*Uo(ho>VNoK4)d6QT@}kD=n@soWI3(O7F_>bw+h{F{|!X z_*}fHm8q><^mN0|YyB>}Y&&Mr3Qhj}e)PIHtOTR3xcroqt z%V&?a-JN%T+bf3Y<)xxQD_>>35~_u;_*QVXSySwb?w(qP; zkCydqooe(tvWZKCsYCu1f34w`?VESTYW;mb;b(|;a?$gUK%R={qL=(>`Ly+g(aOb*y~!<{W^O2mD};v5!-dGS$*ya{SGzd?aaKjMUYFjQOnUkzj;pf zx_ZCoVXX_wuP*=a>Acs{n!v*f4x5h|&UxSS_+QpCrIu}xrH+oC9+i&lceX5f{)VH+ zz2fTRt*1B0wD*Pyy?e0fTZ;ZX!}-&$#BSSm)%DuORjXHP`f_(?Zf$s@b44ITSoon0 z*WzE(*o&9`iTk{%VT=C0uNNzXn-08Y+>>=gIY;>epUs2v4=;nfGruZN;fvW*?b!1D zU9v&)jMGO{dfPlse*blO<;5qp##SrrWU~w+FI;~9;jP83zx9E-A60g^Z+Y@uiHGxq z?Cuwx@5TM*d|51a$U;O*Blzj*9~uAd=SHvLcU3&nBC4=ZAXHh#bEfTa!Q1CnZ5Nl4 z4l8v3a67tXg1xi*l(T_v=Psr4DVm{(0%H zH=NHO2mSFqs?_Pd&&~ew&Q-;mvSe25xnr(+uyo4RS5H25XFL*$u_^5#TUuW5A${|?8eZ$`JItfYCdSNO6~Dzk;PI+u&b9NLxSXCe z`OI@Ed@KAQYk%1VFR3dAq5i!88Dv7%)~l5LXGl@s{n}w_`Ez@|)-b(|q3(D6CU~6^ zJ;D^M}n8jvJMIdEV9ZbiK|6p_%N7PoKYcyE;EOEOuwv z7SrObVOKUXe)hY?Jyn2Vs;aBO1ireD9~WDMAGkZ?5No^4CI|NGewo`3Z@ZrM*tu2v zJlm?T?W=xIPrW;L$trWN)2^#q_*yA+|qb! zeH;JvxwWhxz8~3qdqV|VNHNn|vAeIX-Pygc)HIx}*WExiL8mzAn8ir}hSuL373#z1 z6;^*R>0}6K{O!NHdB@jt+DoHrf;gmRJoxTRt{KeV)Cen*GM zm((;{{Pmf<;9Z?~wsf)XsfK±WYGB&pR zj&weq_x#h0Pp0wbD<9A8NPWV|{PEt3Z?cQFXFZqr&ydR%kh|qVA6M3-h0hM;9+}d| zuhQ61lh9~;rtX*3y$3Ohos+dHo9?%_nsh8#bwB<1x&xP!mgmH0n!V+o!gE{C#M7%* z<&w?xy&}tRv&zqunDItM`@yU0Tsm3XF1iV}2YKdHORe!d6nSptjNE7DtEwvs-kyED z^VX_uv)raln`^h&E7ya2#=OW!X?_YP!o@V){IJA4WyQehwoF6@Z zc5x}B^&Q?dHG5mgm1RBpljfCjd5N?*DzTR`xqnyQk~!y^)6p=of49wbKH2TKH;wgN zkmAzz&vo;)l5+O7?A`Z$*@autEfwZ-JF^J{Nk#mb5Af*g@oG&*GV~(?8H>OQFZX?;n*+mtMYe>&)?h(*F#qhk98)ojmqs ze>2-0qs5P{n+o?9tL@Lb8u)(F>vLwSg0{Ab3JEQbetT41i{)ceq5ZNoX5tr}-18Qs z@MOx_*=ks>5nT3Gwf5YRgpxHmK^yemUkeY9of`CMD*p-|VMfhH10lx7#eECc7bxB%BoR0AOT=Tludrx0!sPgK%$F9w^+IuqkgJ_)H?UbnqYiFhc+2rW`p zVvtnvc;r*rzpc;v#Mu z_r~vmDZ3SqI~{lwetb@)7&80^Z=vnMg#j?ooRQEv@lP6dHPOWX-dSWz3<|t zwB8rzn|D{g`qZR1yKZ~$uDSI}uQV%9I(*%mP5l8K>bE+NR3GViaavNn*Z=wc#~y0C zJI>Bqd}`)xtCzd-OlS99-o3_SLD9_bqMi{4`3&Rz7A+E)Xm_-w@uW(^LE)v_1%(*W z&S`#0vCt6-nfg89?djX+_eVRmX(zPUU5xfqpZG?siN7Yzk=aKldW)0V%eP-e6t_G1 znVHyF6~AJ8`N}-)P1!xJlS|4LFfEz*&TAUW@$(bqb49atc)Z-q(b*Ex5LQ+bB$is zzE6zRW83%Fvt-+{`Ii&hqynCJH9p%=Eobq_pi;!+{0{cXjxE01<+GVD+g{%i5Zhx3EOmAlikCBj)6 zuYX;;wA10eTuH|(bGKWsbYI;Luf0>6cGOqPck2?7ZJTdQa`QBtlxD=hyz|>k=P8Ag zFH72M`4%3q4qk03a(UUdz~G>*UZ-=nN#rtaaa3?naB-=${c7YXQk3%fi&9tL&4x>N zT2*zV%j@2pPJ6rS-Rz_G0kNV<>+ao(Y~6O_&rb`L4jYHYW9n9=rhkO_-+r&3C$i7F ztR;1o!C&u^g(4X%%6=`2^Ze3k{N=UxCikZb&0HT`9!xN4dUZFp+j#x+FDu$5S~*yL zCtN!8`f7O9&ho_@a=Sx=G%f~eJaboF!OFDsm{7}-6*HBRSkAE7SVbhp&fZf~*uPeP z)`QrSH`Ul`io+&uxpd+7%gDR{X{Ugx1uPm43{TFK8Gn1f!0xDcR_yKl5BGlvUU1Cf zOOMdLug{o&^wlQb)7IGjwCn7SS1)>fef{n)jFR)J=UOVNw)EK~`<1g2wk($@d?F*n z*0p3hQk$OcZqLk1JG7vb?rOQCDr}TfLN=`Bq<7|BZ-ZP2C5+p4WPf;^n|KV7w8%K9FO95a=ou%%O@Y}T1usPMMc@_)Eu{$1ltQt|}t z-CN!Tgf6`Pa#iNt)CjJW)*_38J;L+;{;Lt%bobC^G3k2y*+E+qjQ8EEzET>yMU!vw zl|t`;snwl-FxKqBV!Bl;_eF4m@!ueh3u50rZpW;!cs_wwyr zgO|=F*L~M}v~2ax^0KhHoK!z$vZ9s{n~m6_MAAJ6BEu{ostSoO}sky_qO%7>lQDYVsc``)kRO9zTCA`wWVd7 z*HrBX7Ax5lJ_Q)mOkTJ0!}PyK3Tj>_u&QOw7p&3vv@S&6U)cMH>G!my%aS~varN$-VxWKR)#=?Qwk-_(vQ}gJE61ic z)lDM&bvG@prdr6n<}~J2|M1~#*pKsHldLO!JX(%f%?X)$xFFgQGQrASz(^>np*bdb45SqA6xdj?U`w= z`%L+5KNdurHeGvaTk4s*$}Mzim6xizP2H0NZ*LiYoR;1d(|ogk=XIrh6>XWb+)^$b zc~eyO-Nea!E7xhCkSj*hoV(rkFiogc5`M?7aNzxI-@H1}X$oihmYCaK|GY^0p=N)P z^J-b~3E!VbS)c5(niX<~HA_RrXXT?!PKq9Ln2Z$?Y7$;avv-`ac8c7?W?=ZRcSWD{ zp_FOqv5}!mb0>Wcd_Dc`cd^Xapjw4RUZ)o{pKe+r@`uUK{r&3PqqBBxoBGYvUPvt^ zI@G%NT-D=qbAFo_U)uEg%8JEl8}{@cT6=V1)|07y+J6;Q-KT$hyQ<`mW;)BAxfaS2 zUswH+=eZ}n@CY}HU(89-_;2^U%KhHn|Hj4Tz0`Qhn`_hNXctV4UU8^1xOvJ__e;XE zOBO#fE#LjvtZajz!p(Lc-t_a+v=`osKP;mZaMbB*{HJfym$QpXE=mV2dS1Py+xFLW zpP#v+9M5(wF?UE+KvA{*=~?ANAj zi`MH6k9xU6Pn2WVJ{Q$18ahSqn^aUzu)6PHK6z=McEJX>W6um17f-04xy**UId-0^ zjM#4dAjkXnr%J|HoO7~vryluwOp3$7Z_eu1ef)pz zdp$Us^CNV!KAfr8*XJ3^ak4+huG6X9tu`^8dkYC4N8*MRn>7Tsq z%sfZ>b|+t!pQ|POGHCURUyrs~EiYPdZ|bz!bwAf`4_P={DreiO*NY9FGD#KaPMI#r zscjVgE@s7Nl{w6-1=6OT-IXel(7asw*Yi(nxYwo_#w4wN+PqZcZD`y(t@ZyIm=^`- zEb25}J6&=G*LV46Dt%hV`Afb_?YQIcR&3gYvU|MOFYeEZsz^&?QE2(j z_ur1MUZZ+_)`~w?O?zEG_CH-$A?o>Wg+h5Re@2{zlx6|vhm*=57ChN8?^@}_`^RH) zox@+%Uc7Sl{0{x=zn7jbxVtV?|LWB{dZuiP_CB}Pd>%V#`CLU#nJXcS$3G7Pc@FGjiFaAXth1urSn}%ZF=>hHKD1Z`7456EYtAv zR#X?N6g}TCS$J;r4wrzld3qB&W6z!LtNYKeG~mar1JfmRO6B>sGH%{we}#XA-{M=_ z`n}d(w|@6vbwKpW<^E!so(fgn;VDamIRAP!uSk0|U8>~v&V1R8Tlv}9C%R{(DxcbG z{a3P{Z{NIHv&%twvN!Ik{djdVqfTw#f=_7;9a^qh)f|R;o%0^M?D-@Su{cHkqR5lQ{M^Fq=>ZIq=c9haI%!H2Ej%uA znUP`ACWU6n<2TN~;O~17b!g+c;Q4L$=Vh#T$CF%oLax;J{r*s&{|ueGy|?C0`=06i zly`T~Cf=xLBFipJ5IxMU!gX?0C>N_SeYvgQed*=J zw~t=hbL zb?1wh?%c%TJg@8LN4A71eb3q7_SC8R(GL(IN$h5cJ zyK;65`cCzDq6Z?F4c^D^h#>VnckMpmM?Uu2zU zJ<`p$%yi}D=(*1eZs|);da_(%@w(N8r@i&I3%X5F)re#?n)HyXgf&gfc=2)mH-*jn z8yDs%>8#aQ=5(!T!3H52gTe=elj_pX|7R%tY~`!FCq2>U;pAm=!Y|1esx0Jw)auB< z_oMTCL<=vT3 zXFB1y`@Q3zFKy%MS2+DTN+e{Vrro1D4+RIK^!_`qyl0jOIZd^*Q2(dVZ7H#mrTS8N z)%$&OqPM%OOhLtW>OH?5DZjY-Fju3}yjuG?D->7k6HkoR z)SYfQPtk!daDo~~v*n4OSGGG$Tv<8mV$zkPGZLf@wWe%pSkBbI%ymx5#phqfJN36~ zgWN)8XPmBCC9y>>#98HS}>wCUmy!P(R&g7--(~dP+mGhX( zW*Y_;>=22OKl8|Aj%4%O-ye_J8Lv&roROttRQbN^6Kn2`F!sz(9FM26GwleCxdT>1m*$<;k^D zy9z#u`~8a6W>~Ob#Y*D|juy?o>h_s<9bV_ucaY;n)wPtK9e2_u-d?}VY}Kz@vyW~y zDG}fL^q#NkwRi)MXOo4hQ&rq|nYeDfbBZT1q_X$?`L&Gp{4siyn;%9^2@S8^b!bKW zjVp~?KH4o^qTakGQQ_maIecpT=c?V7wcW7VJd^FtF~jrctAswr+I^6eS+(JB@L!jE zW^(7tE^aqF`*QBl(k`#`RTkTh3WX%>+B2E0qKUnDp2CT{6}77vN|SAiCmH{`VtC$o z!iw2HuJ`|ESl#>Qe9*7iWqId=z2j4sMNi8vx_NTPTE2R_wSn{U?L=B0$otKmppvp{ zFqoPb@elrO}{9AY3cU0w#m0|SC>cK+x6vo=GxiY z-ha9KK;N`(nTEhtH-Co7&zrL>=d{anPx5XqdcR`(k01OoZ$B+~pRcQ%80IQh9lnEi z<@T!T%8i#}bE@mkbZKq*)fuojXDQ3^EtAw(7@4=rJ?M2+N#0_pEZi?R@$*@Am+hBt z>6L$4@%rAW(2jK>(0 zV)46u{Eyv2`fk6DaCH{t2@5_EC6ntIbKv+n_oPyr0HIkb$``n1u?KlP5Bv&RLmxC{ zx;Crcim+41{DuuN6SO8fTH^E5P{lq%>wJ~m;l=L@qVuS_|X-7q~1V`ly8{v~iP zy6kFI+{xwt8T!iRdixs&oJzQ6c4}G z&+X5Zp7^`?&0hbIL$|_B&Agr;f6Cw@;4$+};i@ZtBu<$>k6GUlZM`8k`isRMS97y{ zPh)elyVosVv$9}$bI6~jgFy{4T}h1p8P>mEwO#08i+;Lrrpo)qF8#)vXC_Uyd>3T* zGUlsn@zPhvB6xQ^a?sZHQjL3Tf5m|R`0?I&-L*0YZ!S(@+;RSa=F;nF3xmYZ_xq$R zTaowm&E+c>3@)Ej_2TenE&4Y@z&&IB`=z`YpS_mWeP0?@|8jd-@wW*IN&bOb{qp{< z4o~*o{*TX9?ZL{3!Uv!E*n_6bd*~tfa`iNh@SpBYCWqVaOr3JLva##M9rK5Bnng=Y zBK|X6&kDZ3_V&FOxhw97Toqa2J#Uu9C%F|`!mrdPs&Fze-acQmZ|fr&OU4YR*RqD^ z7S4V!Pn~H}Fw1BAb0tqUe0kZbHsh+@wATWsLfU3!2HfIunl#U)P)%i$`i>LLOJvIX zjJ;Op%BS6au9E+-F=Xq8Nh()9Zu{#bN<8 z#4&+qa+7~9*Mu0s+dR`>Hg4a!WI~~zrpPlP-Gzs*ef!{Dvu9WHRJq&X3m=MC&HZf_D&5;>_;qQk+RwEs z*R2Ybe`LX0rRkih@I63Li?hRW#=Dwi-HPcP+}W$Q2Jrj5)4DFYb4%RSi|48s-ELQ1 z|EC{u?s?r_z1Ru+IGzQcec&SCa=`9}Yzy;A4pP}tu z%AsvbtB9@M`TLvtx&s+EA2-hO_|n9#BjkT3TzTKd?e0OZCskeR z-S$4RbmmkH}_uuRA^e12|oeeR7TXACDk ztLF<{GV5!Q=3@7v)OW8n6@GPvr4-u!E_*-kgWozEvwz0TdTW(`UawkyHdAK8nwAyn zw(QRxet-I2wRYo9NAJLZ!;M+19{qOo+%~aeRl7j{1E&Z@bBiGuOZjwWfVSLvqtw=$QMTil|rHFC$)WR|IWQUwW5=y$o3BZ z%zM-0Zkl9I*5IqkT0iUD$~~{L)|fq;vm$zx&J}~LtGid(aY%$Gv$+>feDFHX{@ac( z9!7_A`|Kaz>N9Yc5_)_3@YKwlxW(6Vc0b+rLhP-!?(r3|{%bU6S!ik)pO-zTGNIT~ zg=OQcM>UZNQ*Hmeb6-}zNOwo#B-`!mho?rhl!QIKF1I?jI^HYm)*bF0fg58>cWl`f zyL{XBci-&-Di*Y?G`O|oiJG?t(5y_4N5%@7`bTzsz^w zSJf?@o7;M~P3`WU6SL!v6GPqBODd-)zjgEAJZ9aK9DUJdxyQT42Zg_DRyEcaT;5*t z;791H+!h74TOkczAwd&4j%Ut!&bGK0}{&j_IkL#?#+6P3=cTPWdcV(oc>6IrI{iZ6)w@DdcKtrL(s7o^#6B

qz50~O?|yJBs8b$E=smfm(x&6S;5bLY3;e}>qieeb>0d_6-|Euvnn+<25n zc~?kFv+jJgVr8El%`q_tml-hpOqx(|nj`VF=RKu4(#|Vo6?O#2zUiv^o$neY>K*Xa zTRV%9L9cq+pPp};^S4d-yYQius=e~WugkA}`>;Dx-B&*NOVGK69%n^Q%?$gpW&Ys> zMpGG1PJFA7QpkCtUw!AL7Zz*Ilqg)PDT=n6XOi&Xu+@!6ebcv>i{0M5s6A!t_RLkB zs!wBL3R04%EVWd>rO?1uA1la{w`=K4hP)Ob{!=qM z_$X30`11M-sWYc4TCy-DXclY9Es z`}Xb8(xo0e(Ua!;b*d!Dy9l4*kO)-%^x)XkwZ|gPh;vl-Jhz*#e- zo)&L!tCu*e&#F0P#ad2=%Bf2|e@;0*QM+Aa>y?vi-uec2?Lw^!4*zGE@T59$|Kmwh zRkgR&@x2ed=iXLgs3`qHrw!G9wz`vq+{qgrACBDD5nu=`koW$Ly zB^1J-lF}*vC9H(??7He33jJS&*k1-7>r-Mga^HAwhxXU}uhXyFc7AoNN%Z zZ931kN2ayzBkv;kIc6^z>R=PdwV(#ux#wAOmeb@S%1)e`7V1sC*{iyPuxrfn~=6>o^JZ)^+#>COj@d!SC^zYYxjT zO%^THJrI1<#^aG)^6~p`rKV-{^vG7#?4LJv;Y?8M1pmjWX2}QS^QitE%ecuE03S zy8g25zS3PQWKQTl54z)W!puZH@~GWU65RrMdaQx_w=UPh?@e6QlK&=#o`Ir--uHxGj_efDXU9Wes4=j7DY zyf~30aq+~@e_x;Hox56E_t>>`<%;{+ zRZl&bU^OY*<3~x*HkIjeSKms%`29jxmibrFjq0E$zjMAOEuQ10OkWx%(JCWAm%!4qB(0cUggF?gN+cj5BH41ys=b5u2g{64{`<-oDCp3H%*!irUch$mz?I$%HZc8<%FO#nN_w3Qu z!0T7Szv^8K*jTkDqb;;(WntXrexsWzRhiMNa#VU)_$T-rs$6tpIt#z=e+Dm|8;fN& zy#I0SzW?{I$1ihF{;ImF_c}rUKf|k)EmOBC9A4o$Pm5)yha|&9mgbdl zv_fIWG)}{=eP-3Yi@xVB+Z`=(HFW2Lsfr02st2r3C^<^V?t6aXQdyhP&5j1!*2ogk zp5&^(RlWLV6Q&rXEIDYiiSggxGyfUZm8$eEGg8hyZ#?JPi(UJ^YxS+(w(XOgqs%N7 ze?x}f%X=PK^HdrxNbGZE-kiko`p;7*=RB6Boj0@QfBpU9@{+$L3wEsgx;=MGbnCTK z)1!Zc&3&$={&VdL5et=h4_5f}GFBg6n>YKdkb7tNi668YwmQBrr>u%yp~f{+|nOp#L0eS7kRiMy!%ng ze+Hr50yZxff2fl`GJz$>@`;VT)}#3WziO}4O^pfl-oI_`<2=y`i-Uq>mRB$pOYe*e~>AIDktY*8(~^YPdf*UM=ywyQRNiz?mKJ=K2a zaVF!V75@7S5(FhDseHd=->o|7o@;{1+G0OtjqQorW{au-pH+<>ZoGyF$hxXK&j~ctdTDOnyG=(Pr01kRed?&2d;8h>9andj zEM2`~+t!!A?u0Qc7k(J~&(E;$mzDR6casGScrRR!|Ka!U+P&1iKMXsbc^s?FoSV7A zY3{Ne0Uu4eS1eb|h*_etRx?0l#*B&T+w23wcQ-}Mc0Fj-VYjEZY|0K##~p4`?lI41 zOz)0ewCjEG>W!jHS?k-K^^Alx?|b!#%v8U8;L&=U{sUKQ_2xb2iYg2ID>F07ikpEY&W zN&ectpKiNuySv_(mDRUuw!`{DJ(EjwzZ|OVeD=v_h7F_o#;ZBCyW^%z>{PbYZS41X zwj$VcjmPR6-#hOxPOxEhw!HkSHP|#Sb@g1;`&ZWHu1bI5tm<=l*<3y0sZKk?MEYMo zy42G#De>*8hf7%9`y9WWU%r+#|H#|0Q1?!^bS6OU zY<##qN`A@hspe+?86+L^@8>N%&>nVmSF5PbCCv=)c@s+g7OZs8RgJsIrBX51tZZBV zZqrlRRTJmi@a_w-u^02q+1q`4&64^FwFrgc*kw)OMc4Ie1Db~p9tCeIxlxayKrcz*Va$oy~})9R(P=Z@v%(2 zG4I#L8y~zY7G2@*E4trT8T8`e)Q@Z3Y6?&P%TV1VWgwS1YZhyjXQ5W`iKqkq$!D|V zZ!pVlnRSHmmdh!wZHL(dp1%$*cjdh=;@Lt^RrgQ9KG#T&2iv(TuFaP=v9UAy}7xj zTW&<~u8@k5^PeJlu!*N!u1a-7|EAwAjFoA#odrr`HRr~k_fmVA?qq&F=KI{)>2IuD z+F#v!8uZY^OJkN-LvWu~0)zUyv~Suc8~*?#|t+alp{w^#hTo1TogB%1-%dLBeeVkk z-G~&fl>0Z=t=@cBBX=s(yuG=)IxCN?k}B!Xf6Z=C?RjnOAtfEzJI@bKvpsI5{pO9Z zoT=-t!i#tBX1@yUFO7YnwL*IPx`m~sqI~VF2al?>PDu+rneF+cc7ckTfx^tI4wrVX zU#qrxHcOZJ+cHCGQnJ?{0-JcvY*xUHYFvd|$`j z?JqKZrA@tO)#2>6sq4w5o6EKAzBS+X zVBeb;A4Cprsu$^NySH8LYT~x_(`@$!JU)9TZe1qhu_+ABF;U!sK}=1Z%Dh!M9-al{3o&e_BZZr z_iz6-yj{loXlwuByP+|0%a-MZaL;dVSZcxHE~p^js4ft)^vNPN?;VZo6N9(!jecXt z?4HQ&eaGf(@qdQp)|+;o-5y@#_Hu7f{PdWYg{Q*1vx_G$eEy-8KYY@0qm=4~&-Ydz zwb*+^CuNE@=eQR;b&sywvOglO@WTv`a3K#*{~VVN+1)RbGr!Gfj*u%)oThjB3wLWfJ|79J2=-0y~eI_S^^_o06 z1tv8yGI6E*?~~sX(jT%SP@Y^}EmNDa$9Sv&*a0 z?|-{8C}Xa*nbpRCohh~HYxC+f771~(F*SNj;5cW$|M=^*L7`41JzPi5EY92dY1NTw zm!@5q?o@pGxPGVlw~3!SEv}_i%nZHhdA$93^t30eb=}Qc)1wuGRs`*EWa((KsY$87 zAK-iQh_hR>`ibVR>UKBQ>|y-9Z0o%>GVS7wkrrx&znPo=Zd`NcprLP;mP+fvL+>M; zUl?&oJ9l<+`LL{h9vyviOZSQp@tjp`Eoa@YG<=%o#i1JH!lBaSD0n|`@9xdXt3G{R zDHof)q(^GO{@_~g+ESl4vx5&loe}(Dp?1gtKMod0Bb#KYuir(J<)+>I&){|W&1L~> zz1W)C)u+_2^@ptbweae7iOpek+WAq@Wv8>W0^%PD+`jbJjpy|GQ$Yr{X3{^E{p;kn zSDy~LILGi=mZo6He}N349seXNG)hDmU6jl};R*m8yu1cMm zD*qYk{+R~1HpE>xdH#r~+)cH{i5J}eGxSZmo4WYU)@`#rZg=kpx&5C(C^YV<(3GzB zi3elYw|rLeoWP%?F17JPd8g6OgSIu9p`SNDP`eruTM%fLm*pBQ`o-TYMpVmY!V;ZC zhZ84{ADpc0w_szYf4PM9ldng$gqwc(8-}qRy(E2f|Elc!WnTk=Zrl&vD$4sQ=x$-Q z3)?A<{xeLSvOAUwTgCLKy74ZzU&=MdD8252Y-@I@+vd_6`6t(H(z+e&`J+H;`Ko13 zx1twRFLfwljbv$Vuz6{sY0~92ajWOiyTQzBt<3zMFS=`8)b-iZFY~tMmun~gTshaU zVA)kc87~G23E}To_>RurJe8UMOW%}m-ili?E6<9q)eNf8)GT$F#H41g-+8?MI`i`= ze<8P90m-M9Ws20kD_u7~Nn}c!(}lvLPV70%{~7APTs&Eo+#e~kf6gk8#XP0@dRsfc zMFgZim@u_zjmXKz57`+WO#D^%>+6-ji&iT$MWcHORByfyai z`2}XXs@j@+?Ksu%Oyc?Z_OtMu+)56GvcpQ7wrj64TF$nU^Q-3{%UipGug6i!Q>-Sr>ehdV#KFQC(T zqmcE^kD~Q9u@S!*wzaRym(^Ie^UL;9>wvFTE5f#m9QA&vCJ?1iRp2haQN`t(al?Ox zkoSwGzqu1-aFKVDn?%j$uL~^Be|g^ik-I1{aN@)HC$5U!-)+7&R%uPy>eajDwk^)x z_UPsQqneV>eLg<&YB|9fRLSJRVPP-zs>behOT77ihRb)0s;WL6E%Gz`;4&@j*J7Ra zX$2B>l5#)4E=%{Tnz&WFOzijOh*dh_RjsM?3#&-c=$~HMP_y)u@yo zQF-*Af&VPmF+C1-%kvNS2ItJuR>{!&IscP2|E(q8=Bo0&)W4}c^3R?==wmepdbGCsou%JIi<<^$h=>zUya;8rCa6HVFF<3VD}?&MEm<}+KeQZu;o$y5*T?#V2#XV!!Z%V z>&okz+2JLs&Yw=0Nc=m@qZ;X%cwygs-;MjXM(sFzZd;S+j230)oX2M_&egbUZZ`MU z>!MB*rG?qClG}}zY?|H_w9KJYD=_t{rh21V*fUNQ=4a7iZ_>?PO;SAOZSLnj&uqz_ zsC$=qJU$ty&o(=_@nO*R>Z6Y)T`t(YXFIpwvYl~Dliz6aUcR!eb=%(+eHD#=G;6ZWa!Ilpe>Sr(xwD;Uyw)~t;?cD1>&&3e-Qf*-|qYBk?J7usTY zxKxAjenMsL{Ixoo+x|Kq{B=tA_|BElwa>+tYz=t8(kkNlFJY1slFPWfHwnmcw+ zywXb#$St>%SabGBa>jvBmi4w9Kdxw?_qx}ry=(W+z8k$zbjr40`d)`)J@Qu9 zOnh6v;p?)|iBj*85*G6nBa3IQfBD@=yD@ja7FU4)sMokL~{D^($dR347Az z%PXgZzR!BH?dhc*oLV9O8BXVK|1^|#(zcr+uaDc?T#8<6ws+${ zhGWn7UU_#qdJF%jCF{1b{!^Nn(!lBDIAx}VrusXR+RdAuD4hCn!QAke{qEE9=R&8g zE{xUKmb}j&d_wfG=98Os=Hr`peo7|sF@#?VIEFn~C&hyaq_;>ML zw)U|vq{Z&S2zx7?fi*N**X5O;d^ZR3?AW=FF`e$=vA{AW-NO7riTQ#Y&Y z%Z!5|RyCFrYTXR%N?eyrm*AGT{qwQUxwW;fAz#dw=)CV+oNIe@M>+RZjm24a6PIa4 zu86do?6=ar+xV!_)>TX$LY`OkJ3h8<3AR1JqQ`eQe`)*<_QT)97aUn5vh8M`^L5Gf z_H&*7seb=HYv-HW@_sM3)jc&8|qXvTKh%wOwl9e6(be4<|djRblqW>O$^f z+byw=PfT9&o9o3L;a`rjiTuFQr~g^J6kmrwupz3k?WDD%8) zi*+yAOu4{H$KFD=wnE?Ea0r(f78^+mJgu zKIrAkxHa3CO>27dR@t;=sb|)viT|_|)qnb#pJbjCI-&FEwP$xYw)Ql!6;7Vu-gt~- z#ezp}$sJ!7R|x*Rbg924G)iVgwyI>Um?QN_(!}k@mEwM>T?H@rv(!}5@4ws{d2i-DjV!$>;oNag z{T>%Bb$r#8VixVp>1V+@`GKE%;iHnS{|s%h(hI(G|LtqeKWuwzU5(S4*C$qdy8rQL zmGZCDUtP(o#j<|;zB`_F)BJVx{I;YUXOqI@mi8yDy#43tmX?oN3VhCwtzMXk2b`EY zdn)sSt2@4k9r|1n1t?mGRb4THS7E4S;x8_ z)ta33IZN7jq>dF>-=k8sZ5NE zjYxidYR_>F4`x&Oe+C~5nA$F^P&rg*!&KmMy!hM4m5FmEI5>W`UzXugW_0~@xQn9U z$;2I8|JDdvcAPvpS5&6&XSr1PQWrIiTj5h#OSM;A*{z>>C!gO!YpZ|X4yY=rx-pU?(y6BmvdD}zLEmJ~QvHWLf-8QQ< zBs|8WXW6uw+hZDy9?fu=v~bbGH&22}*bYkbRle-kmzs7@GR`L9L8;8~wUyu9uHO01 zyVrAvhQ@lI%Q3I#Y`-x(I9u<^6qbHh2IdnB`OHGuxl4G`bPE6Y%LLE3AzCWyd8v4N zo2KuXy!k&mk{?xeeV_bA_RH7QU#`DmUS4;TSsHk^W4Z6;qEOan%RUC&C|W+v+WhFQ zlBR-bJt=cT);%pQR^qkdmHsxhj92cA)}8$|IVQ5vFB4WOh^{=)^3A}}cCrD(d^wgE zMJ|h7dV9k3a%X#36+3*lTV3*PX4=H+%{faQSw8p%i3KR}2^9(o)!FYlr&=!(#L^Zx zZ+F_#{&ug>*yJ0RF5LemTzAK7{hD1j=I=K7lqC~&>eA`H29F6F1VZ?QZyXf9@0;}F zm_^fJ)nk+YS;*V`2w4B&%ir}`!VJN;=KaoIp{aB2@-?#n&!{ZR2ZBF)o~3_(yCU_G zzy_<7EB--cKLh4}c_gth>S*PX#XACaU5zzZndtd#aZ8``4oBPN%+Gy<-Z>p|FklZp z)wFYSXwNifhrbqn=hB+iJ=}V4{*$o|MV(9zOf-#z3B);{kO+^rcJyb-SOLR z`LQXtUvl?d*eaC2_WXYa?l)&o@3>huk?+!l-N9#X23aljgjZ|v zD_Kg-dhyL+hv?@`<{o#ev+fpX1m}y0h6Ya&kPduryGTXycjH<;#Yv`&;aYqBquNfr z@|>{h>r(L`^J)KHe!VsC&8k(>ZTD+dYGxQo@#}naNop- zj^~d&Yiz6JD!lRTZJsyly0o_IBVJLAftdSt&GYWJ{zRCZ7h`5H0*yTc=@l% z$+U343%sujUnPBb)Dpb%fTF?{+XFsYHqSfRkF{4HQDCzF)%*9g$K#+~v*!l)uUNn2 z?%ZouUc2XR)q8vP`3nuDty2YN9XxXUQNRRCmq}s@OU^f>*y$!`+EiRz+8?}cecqJo zt%W~zBSfEGudBOyaNnA@XWw3${rB?n*pS<9>s~$J@V?r+RAlRw3!WOa9*R%A`kxA| zILVSetI$vAj>YOJ9A~PR^6}mIS+eeEV!iAa$Eh3kNgVsDJ@;?wFT0oeAtI~a`@c24 zYjXFhzvlHjOFVpyuG(}jZCE2Cd{mXun5iU3{+gcl;dMuX_^#P*GP_eL^W1LH>OaX3 zIPydn7bzECm*n05_+iYSyL07!HN5QD5q4{3_}(`SJEmOSwCMAfm2N>VbLtH@u}FO9 zNf%tV|B#*KgFPvoveoA532x@!PMDslcz@t>wBO!Y;&STKcZlrYlwa-kzZIKx83!BBQ=KvV`(_pTF&CY^dQN8p$S)`OLdpD3hd|zIAE#d#`s=%Dce!?e|JvbJbBe!@pV|7{9liGPkF*8 zwg-PX8nJI?Y2E429l2R63;l}(w9QhuJ%>&I@BcUbOT40-IG# z``6rN&$|0cpL1_X-gy?)duom8yT*k`!3 z^7mD%JMjUt7k$w)HH+O@z*)97lxyWK8)wCvEFXB5+I{j=VR93`WqLKIj%`bu!CH$O zc?)>{e9D@AeE-^O8&2^UBs8AvJAC`&(naR};aOAOhORGo>z%E)X2-IlOZC0_CoNSx zgRZs_lOowfow=zri|E z*|`r{nY;6*wyag@VVY!bGxK1_v69-qNB1tgxLVo%k>URNesv=2PK7Z{l~BHte&brN zu0v*b^_4Actr}cja}`~7tYDuwalUQ)m+zNP?C6fwU~<~QUc2S@m-DT>MU%Yu9QUj2 z()5kJXtM4jN8yEr*Ij%PEE82GS$PI5f4a%O;n1DnmU zdZ~u(wsF4`j6Fkynk*ExG^YwatZhGN`=4Ro5}WehOP3}Ex|ay~Wo6_poRaXyX4|@D ze+5eTw#}P3Z*9I*$?Q$ty>DNIhXu_NOA`Cx*&tKEG_%pz_^s5Y8tsG1t=5_1y6SgN z{w)`E|1rD!*z0eq7RQ6H?P_+re8%V!SHLZ&N_Ga$;E48=MRt$vxtVs6~L{iRdP@tcZVm6xO* z*1a}SdXwAwO6mwo$+8INZ^Wte2ipl+z!WB9FC+Dv+x@&1jsCbG}= zJh5`^)+@WK&n-H?BO^3*cQJEQyU1h##*ham{@m?6XPvS8w8PnnlWH~Ru06Kw(%Gdi znqFScom{qBoa?sg(pO(rYV#{D=!kOiFzAtx@lRG(e(+*W(zAQEM&CET+?uP~CI04# zMBxKX=lDHGo2%@dD;KZalP47wE}1m1bE@v`UQ0!fI5vw3K?^({_dS@)FD|%!s!y?5 z;c034=`RD<+?_1gSM%b2Ua8daT{0`vX3hO2J=bVT?6NhE&MTDvGc1`YkkCJqJpZB+|*{bzWz_THuUn-1-!a+;4z11bOOuD zp!H|`pD{4Mom&{+QPDCvrsFFEA2Qc^9l>s20oj7VwKMhe^r^L`WGhgBKuly>(nkxvP_0lIFg`GK zy;IX9OM}aoIi9hbtTIsfX}kRU**(XkYn9*ie@&6k3Rc@6V4wSwc*83X?n}W*(UvnUm^q`QI(W$@7<5 z?Ag%ubV3i)o&OAvwr9S(c68}PpSQKHM>QF1=FDVudFUx2+n#yOgxlG8vgxh1o=^9s zuTF0-_?=z%Nnlrv*}1ai@1BNTT=Syu-0tAqmfPl=^LDK|y;b**-RxHNb(M3T=aBXdCC+u4vAyZh5xQSbyfMK{Hp9w#h0I-L=L_Jn!-7W{4UnSZKV=PuPLD7^5b=1*qrW}6kE zw}XB>33po?oBV3Ix7p?CYp3shnH&Br_o~e*P2C=;>jAI28uWKfU8|zfqOwGdf#J-# z)qj*b?%a-9UTE-KYU3pKyT z4Z1OZyNSlD6(ZX$R~~86C@OUB@>*7*roQK9#hcCD4IBOBC2y>^SL_mR|DbU|G5J7# zUEEgIP4Ro*+?skd;Kn-Dtqd|ND&x;N_nA4}(PKWdgX{cmwUl$|p^HBnNpj8jZE1gN zalXxC`=}jJx1$e>b=EBi-S|T9r>w%A<7-dti2mO8aI?sNhPK>Q$5)9*7DaM7UH#E8 zRc-3By+tpAI=LcRI_D<0#hv7NRHiuP)76dk%lGDgR7tB~=WS25f7-Zx{o7c}7vdjn zZpNnEj}Bfw{omZXW}&$U?c2;Jm!#e<4*fSXF#nFw7SlKVp)L(xj*@d@*X;OX9y}xE zP-XM{TRW%A{F`&h9#!_V?|CNEQ%4V&j%Enqp7 zdi~bMR>uO(EY3+wJ>NfO{&VB~$F&a|y&l$m&0Bps;P$PpT23B36I!+uvI;Cp^Ev-g z_}{FvXZLg&Ecy99ZpUDQmfM7&~kd;GfX`8{d7 z!yZSEXBY*imoC%R?EWqmRjIeHRWVh6+vR?<8$m&fV!U}zC=_0w71pQlNr9z)!ii&b zQyg~furdJNdm~(_`;iuY2(~_1pWxy;d*1rvGR^>DAQU@-Jlh7Di@?x9il8?la#%uUsjh zhEYP^uxnpbfyPy%?o%_utW|ei_7nN+yHakJS3~$#l}d)n?+GsDKQ4tl3c6kUx>e`S z2mjYW-5K4P)0cbQSh49+$d2_19z~8%X7rr=_tAL8^4+_+m&Yx&`QZO}){%+Ms~zsh z=O5eec5luVE8ur%J%59 z`=Yns%fGVHLGIXVu_=p>OR#r-{q{t7X#;S-NBx)NRf`s${4 zPoJGH-x(A$KVECOz{`l*h*nM8n^+gHrEy?wQ5>edB|wrd|=dg1Zh{|pKqnjV@SohkzS$?F=~ zkB4nOqrCU&g{H#&z2V9~oFcBrZu{}|&x?OA!!O6}y}EU)XyN|7dD}y|PIZ;d4(8aQ zqOw5IOJT(p?zVtQlXi4A&g)D#CBCKETYcjF*P%0ie(S*xqHgPcvYA__IcYHbJE-ApaXjfmx)MY9Sb?H8Ac)Rr0 zaNB*au61!QwYBuMetU4rE|ryA5BYDIz%R7$aSZ>X^=)}NOYAc`rPO|}vHSS-XvsU? zwxJI|cPm#)H2n*gc zsa|>3=I$rDb1m#@RyEan|K-hH{rIYR-JGb_?vf6+nN#HtUR%e*Fh%{Rwh^bwlo{`o zW9uLKzuNnB&LZwdCiB@?eka#dU6t?qqb&QnbJ6Ag_{&+(g2Sc%o!oJ!%syb&j$86u z4W61Bi*f{*oNF!qGDYFUB(ECx_OzSyn|g1M%!InC45^l+54@!x&>eqZ&==gbezqGtgt*ZbD^&pePS z_uSO!w(GNn9I?Evh07RJHMvVf9NtvO-?wbCaoaWf+|BpAe>Qe)J$NwpYC)gFzlz+r zbGmPhUfFYdM;*2G?os8cyk|4hqhZn%W^GTEd9ghXMr`{Gwr^l;$vPr_UnSXp!MU$r z+(o)xCSU2i_hH4~1GBPTS>2K=dmfj&bjQ>c!mlJJgdX`_yN~mXKyvi?swl@?20h_< zb>#FUYY4Xbb&vfoA&o6T{=Q(*YF-UwU-Mi?n$ys5w zro##5f&E&AvfDO^tXov|eEOy3W;Yi_uFP8+lP&b7?G)n#HU0vg6Ms!pwmi7bCRn2K zz9@HB<)!YHj3<^aFU_+Rf7zQK@@sCw#^9CB8OxpGc3v@RJh^OArHk;)sT@oWA168c z2_B59ev%$@WM0Ysp7=FlOCv<{bzkqe<|VfFOHSa$%VH}7rYz)T{3$T8QQpEo!^r0K zI#$8eI_X+fMaP$0JquEqv>;8AdF9HDfd@G&J+x$;_8P4_C1H2}bL~f|Bjx8hr?D$9 z_?s0rOP$Hp=8xIa$V!$Lfk(0a`~@l+!u>@j8}AEMH`v$iyR+1Ohnw{A89&3MRC8pW zTyuD?;N_s8@|%gvYqBsy^81N5R=mzwtEl2Rq3}d+$C?B0%A~m*OP}X1+ny6s&dYCD zzKAO-WV(e=&!m~hm6;h5Y8M;cI`{CL@#iHres=>515YigoA>=|+Vh(SXWc)x^p*O5 z2Da8+`?7+#>lOutHY>=n2D31J@SOKda*@>9-#edKUe1%ZncG*mzt=0bYjXYl=w)9H zMT>~!nC4_YJ?Y)AGF87%Nysa>f#saFgMa!8?Q`Zb4Hm~Q^>5qA=eKt6tJSqhrHbcf z*l2xo_6^A2y<4p}TwS5N)+j~uidp@%<+XPl!|cy0G_HDoEMDZii2dz;mD1P!R=fY` zekchS61PY&+#dDt-?vw4Uw6M_y=Ok{?e4r)&tA2BwKBq!Q;*qTMq~dMsV8&7 zJ5E+=Ssf9z>iNDe^p?_t(=saRhCMv#YZY`&!?oY$p1nQQ$eC%c|Lzz3J{oIlI{f8x z_!L?ilo@y}ntN~Gc5R6}`=8;C@vKev4jsC~64*rU3;rf<{Bxj8fM zd-kf`;a8ki1%xbZ6WOAERM@etwsHExxwvsD_q-W*9<4QS zV4G6E_u=Ef+wIvA<$vd`yY%Q_Jewjik@1irt7eB_zg1RG^8D@mo8F(! z3ptu6#d?T~b#KX{zn89FzjiyXG&F3h*|ZB1IU;*zJP~;0`AzwD|GUD4m-|#dXq<8G z+jD&9ZFwz?Ek3)R^s_(9a9%N0nz=Nx{@}0fEvsHcuUJ+zS2R~q{~zxm$0<8iJylvJ zG5nja@bLQK?b9dvsCZU)Rh2H;6a8@iwqr-!Cx(jsy_mh%*LwQd#XB;;t4incAG);f zR`c2g?jk<_5+*lGeimNWFkkYv`nTn=3iFa{>aDJ7{829zdSdls>> zIZhW|m)-f#u-r26h1s8s)#ns8xdd)yY%;R!o5}JdrR2u@3s?3oPcK#L`_uc{d;6Mg zR$aGFt=?Sr)cnw_UOu}=mu?A6VsOxYFs1Pbhh%T@>hnE6QqLT_6{aK3to^uecIYHM zd5x`1HjST`v-?=BJ4+-6x!>1!4{O-w|GRc3pTo38-gu5JyYe7};@ex8D=QtF4lD933-d!@wlaVR--}=k) zb2;wtr0?TSKR4s5Xz8?Vt0tY!T)A`6+->)~6xeHn7R(Gh7$Rx>y_davM{wwg@aqqZ z>MBKbUJ0@!=aksmYHhn~zVG(tI-%R?wdK-NlcvmCHalnAVZWm9W}yZv4<2!R``oAg z#i=g605r1z1?2L~s-t&&@*4x81Yr@w?7p$$>_cv(5?c616v!-flIOlF_`On}L z?eTFP`*Az<-!Z+VYN39HPhW?h&fv+sal&N&`g8pn-!?N|yX@;HYc$Vc;+^?Bj!%A& z9%4OD>GS!S(i--ce_h^~uToKDWb=Jx*DCAU`IBU$;>s))*C|X|zk%Jhzt(4to9cwx zg{N6wxN)4CKi?KyvG_-Hm-g1%o^w7aOZ+TdE9VgH7{6BVw(Noeai$3i8jo4lB|kYa?+Am( z0r^U)CJx=X_cGb*E&elj+9iCEyV{)BlJPXye`9`qhs+L5ms3S*MxK9<@wLr5b6Row z7R|mazT=0hCa#r8XU=LcTYl-IW{}Vwri%1Q#~lA!&%cuKkK6X!;#-Cqg^g$ayojD1 zv*NtfkE~fQb1S<}X`R}|HSM8d$t%rgDuOB!kClqQ|Llzpf0TDb_X^L)O>+bs0rf%J_?O-DVXI&Y~;t81|46vboYTt6-}IiZ2T_@*}q67 zFEzS#!e*QGh3|)r%3Yb>`%X;0_pPd%?a{W)vAOlE+)ZvqOXfZ3Fjzn1VvN^4O(mw( zYy8e9sw=Cm{gLjBaaPRu70yw&_}}dE<8IsMik_P_@%6WZ(XJupW_#C}Jk;u{n)07v zj>@4B_WiP(*tvh4Zt%=koFRC#Z!)9X_1>!xj( z?yJ0Z$L&}C%eFQxb*j;*4w*2=M_xU2=d!M9qtCoK`{w&|ZMkvRt^bBm@2;z{udiRW zjt>y~Vp(?k(82wgFHVbyp55o2`*^bB7qyUWee8K2PYsgz9sbR#Nfezuo7(f2-=6Qw`FlHi^`>`Tsw<~Q7OwE_@alLX;=Y%m z|BO-TmmC>C>lG~Djt9wHXMX%gDrCZK5rrSUM`O8iFXrE^c2CK*J-BVHxb@UsjV3Es zt$h)F2o<*2Z%R*wtu@&Nb#2(#|zISES++)VuS3D_OwRi2(ySp}|=|8dh z)wko5QoroVO=gE)t(z|8y6u6cS76Yz`4Nw`6x}%+Rvw%i65?$#vnMnEtL!HGxtdln z9Gz1aC-*tC=PymDI_Mp`W&QPRE1g4+J-r^yjJ=u_ZhHJlv+~UIP3ne{0@7QjeH5J8 zB$qOQeZyn%N&UAPC2=>RCC7NbUU8HAK~`9e)PFaSZ$sD zchBlIAQSRH7%cr_*%H%xJQ2yn1 z)$c`awmZ+QnQs^zZvB~U%BIU)t5dH`SQim+d`GLWy@fsV^Ptm5!W@#6ohs%ue_HVN z;iM}Y4<7vd$KOATYxRzR?b$)Ef-Y&vgj5S26!7_d`PY-lwjXbY@rfTew{=HbEKA?w zB>QezH+kH*GprYV@~yhI?6}(OmS4HwJ5Qc3K9VB1+I3M> zilG#LyUy>fB_Ge3ml`K*h&9dEE9Z(1<@L=??h<*kWqZdHuOzwJMRO@8RNorH zFC6 z{U@lu(1=sEdieSJw5zeZG`Da6WF{(icY9#?4e7Oo{&(I@?XWqNz|d9Jp|ZZ8Q;k7o z<^+b)@}rYN?x(G^D4la@oy5!4iD?_7&89A09lDZtv&SJJ#>XdJ3#YC)veGrkYxi?0 zx8J9@&F!b%D@|xSCK5b(wQP3N{G-8ZOHFpK&b(>1H`94bX8A|H?eBxUuFQ1mc&znU z?huC&$5rRPn8g#$G^{I}SKs#aavgV*$z8^S>+&xKlq$XTuUc})`Y!jTYyTNuvM+R- zVzDhuWtk_7_C0~n`|P1|hO+ZZe=EooCC=%eTe9h0NNeq$&GV}oZqGHV&M^5}wSDi^ ze7yxh+w%%1z2en8bS>V0izB0=iqDZstJ`mvJuvXM#XRh`7ASYUeCpwIL_gWSiT34vyfTum?4HxS2c>2UrSvqP zm)V*7d3CZS*NPS8um3#tlH2`m!|UmO^Df$0H@01LJ9TA) zT;>Cg$sIv8qWm*E-h>`MeA>h)PVwsMX_kLAG%5~ynl!JtdRiyXC8z7xs?}3Dw_QHm zBf5TaS3ZN{p#(1%Aw|09)$W6}8n|)TP7;O`f(&mWp5mlL}pv7~N<%FEcq3ble@r?WnvIDcx(OY_;`ychlMF1we^*hm_@jt^Qw^i$N)0Q0MnQ{HPf_#kZ%b3ZV&oId_eqFv-?bI!C-S%DIXY9FV z7m@eok!+K=K$&<&bO4lylKkIJ6^7;P!KbIW$(+S_Q zH*iP3qJ~CPx6icY7n3Sl6Xq8-)g;W{xAn332DgP%H(r<~$-G=|-Rwss8I+XiJCuzUJ(wAsM$>zT|LeRahTwPS|c!^7C3; z!(XMGyxpaTGv404p=a%#wM%Q=)I%3O``QNxvtM#7pOC{}IAzHSMg!|tXFrs_-ShF~ z4&BKo-WYbRE0ojvW{{oH(^K{3*Y(wQ9bXPF->Y`R%(n5;GjY#NYoA{Bn_OVJ+Iw}> zw1;1>cxVMp@oFhQ!yB_Bp*8;Bebsj3Np(|C?zsEe>sZk2c{~1UUR!pph3Sz<+k;P! zX3DV(&GGNQ)9ZcYBg>8nweR&Wt)3j+VD#se?G;r;5AROn1_vYeIr2UK8TM^&6-ii~ zA~$u7k%!x)+d=bJ+>L#?-dlRReto_+-=)yvhUk}C_qtXuI{8T0*K5kk>r=Iuq`ekA zmbwh|CcKTEJ-c=XBR zS}_gI9-a-CIUkh&ARv|Iv{Im ze#n}&Pq)ZbT0D7m@}2*r;@fhjpY`P*Z8taCGI_Vh{Ff~93)js)RFSmA>lD*u|JT7D znUjQ0Npc1n%sW)gWOrcxAE`x$HA<4%1Z$+FD}Fwk9CuG^GrB6kd8Y~Kxsr2Kt zOFsYFaHVpa$n=nz3?=7|+r@?GeoSZ(KG(e3e5&=;`wbgSo}Rq;bB_O?W&O$XU!7n$ zp3m}f>)f+DrYzea@I_hj`(KUpcMNOF*GA>`?pVL>*Vb@#;e*C6H>vlzXa03FS-Cvo z?kpD8%_ld{&q>>wDbd6D-gQlh*VK~JzW$HqtjQ5&75A80{`AO^gUtKS=QYcK!CMw@p{pLA$KQ0A@Tj;OtxymmhCnJ8g@sjlSS>wcYg+>2f+vj?y|{?Cv& zc~)fIqdoN5}6q{0vMB%iK*@pA4L zhU22je$oX?ET%_AspxJO<<$KBeIi3yzwPIh`pakTw$%Bwknf*`UHsYttFcRH<&~>>ca|S|InZ zLRCdVcYfCHn(PDmk?LtT-Y*W5 z@)xn1@h^C7^G&VuZ#WK0y{%6bvf6oo{m_c*#vbe?ftzjzzg6W7Zq)WF5bcZ47gXr^ z`{kOeyAim z44E9l-rI48ea@XV_I08Dt5&~?<66WK&0~A? zXw^FT=TkYqGtEnJ@JsNVWXDi;-67NJfIZtX}zvC`>Y8*^VEtbFwU)R|IZMh7i89%XKlRCu=z*YYVop- zI*Id){xM7U%go#PX~k;x%l1xdUVnXMCc9R&Pwak>tz3~1SI|5m{~a?i)ccyDATfBxB3n9JLIdF@)iSGv}9$CmC0^#9?-E#Um# zCiH}^pC_jWhup=r*8sqdG!`|fHxs+9e0>%6X=+r4$d7G4PmN%*AsRpQYk(aCzOj|FBjM;H8O=+XDl z)p3rg+AblToHO}hsGaqP>AbsTCm&$i{>$OO=kxQe0`vG6%)MT;JLF34+wUPq88|)v zgzm8j*{3jRYE{R#CyZ}p`_>xP_$&2D9Q@C4Wr96l>6!F@lOx=oCQT81p?S6{tNHp; zXWq+er|z27bY{(m0H5VoA}l1|vOlQ2db|BY_6mjYX#po6Ucd86UhBp5J5G_xg13Lq zI&t^bwnZPq&Rbb6IKkVpA(VH?!9N!q8tqv={my(Y&}4q(e6zj}PfGQZ%79JxEuq24 zGc|Op>NU9H|pxa(9Z2@b2Gj)1U?R#z|PRmWt1{w(&lfBU;i#p4!Gn#HM}DAW{Ygh zs@u8g+m9YGt=@OdG;2Xr>TGYFJ`Wb3U9XnL{H}JFP~Q2en`4d0I?t!`=1tAnHfL4# zajW>q@9WOIGW)(WGGChamG9FOl`X4w)r5SA{J!l)Yq_TK^q-UOGyCuq9dp_}SAChi z-d2?*nMMh@kArJXU#+U@F5MTIx7+CDwsl`@np(a5Mf(EW6OTGHzdh6O{t@>+K^>v) z-Iadx7QBrvc)jDfQ)FdD@WfkXla_~E&J1$uSMkvd;A%K{K413vwvyeQ8VC4))mB}$ zxF|AVrE~eT@L<VTahAE$u8nf|#cD7nb~+)cv2~N%ZgC#}<0C z-P+am*CfPj_L&te}*dOr)6G$uMCYV z|H~Ttxghe*y+zU-Ihv`ymzFm;#xO7~)btR%>+($@Q_H&J>Y2IEUd}%eP~);a^`Fzq z(34?j7w%m9K5%=?UFE57Lc?}U^|IhUx6aF;$^EON`nh#~j4Jit&hWdTko<1)6Rq7U z&lJ`@d39{ffg4{|ujl-+>fF;KTz>BDQGQ>OW`qE*@|CYDR|+(Vzts2_GXE@-k=^mAkGpq=cPe^*Vg5I--pjnvQ9es`!sidl zueV0tJzu}^+NRaqTuXMmala;$p?I=#Np>Da|8I!Uaf*f3IEQ`?D(UvDbF(rZt@l)|OsRT)MAyg@i%a zyeBKp-*TJ$;i}n5g$Zp11!lM9;!0L`8lM(=*{@J!qUpWuSAWKRbLqFrQ$^l#ZK7eV(iC-)br5nCPj#Orz=YmdhFYL#|)C zx~rpo-j``yS)sO<_$v;~Z>Tew-BGx*S|EDI^1mUETU_26PISBSa>KQNYxc!U_C=js zx+-YO;fw^`{NEg`s$K$&A#+u>KYZqp_Eu!mSLW$cC6DA76zzGPHtoyWExqS2KVK2I z$hnc%=J%Q6XRT(U2buUhqKXO>`W{C`Z~3%syGpW!#IFm&F0u^A3NIgPvTDDoZdSXr zccWw0((MuN7YCk{+J(crIZuyNxA=ljS0>Dcpx2wC=foU7N; zxoO?FT+}yYm-%+tt+{4lyTS@?HQ3DxICbKrlIUq>mTz;Dez$R-U}9JmJmcffq&2=p zlU6X#wRtPvJ9FpT?PcrCW+1vobAJ+sm;#S6#v1$@5!{+8z%_PtB`VYyLs~bpR4YV{M#R!9V?qs)=+yVX)l+= zAFITpo7C^TVPdIx-!gyx{@bydde%AKeBNK!Ykof4(b@3g)5kwqO|`{;6L0OCvMWA( z*I^NH+vm}NKX~_MSKpc48feszvSf<-=QT{W3!ZsQm$7a$I(f^dWP98euEP_TORjUO z<@&0>==c1JclXwP-MunYy8Ttr*Ps`!wGW?~JhhhjyJ+TB`KL`Y895%cUHr3U@lNfE zoxM*4y-Zd-iB78j8O(96vE%XmD|J&ZoIQG3Y{d&LbH4LCYzr3z_wD=Kw^rkU%12Ji z=XPI<yuw zISAgjnsnh1r{VY6r?Z->bCzwW49&O`y=&L|vbxJTQL~o>wtI0rTjSU`FRq~Q*UL3E z!CQKkIC}movA!ZSy(iD;pUCwa)>UnH&T8icU+QRORTVHO{58jqK|+4l$DUR99!gru z$nfnmU3_~^_G;DKWk~}2>m?bgIa(et%>2OfcXtNo`cpCA9wj|L!Jm`sqP;qSx&K^; zEO(Ht!mR@gE3=}4RBaZVzb%)-d^;e%P*dIcmf@AX9M3;3IwJV@t=(6)*LmS~tD@2l z=eB-~`ViFQc+7r9^X((AtVJ{DNBp~IHK9K0-ZziOKC|X6T#z@Z*Qafs!wKD%gDyn^ zJik2{{OA1pwfXiY6QAUsvh+Xx_v}g|OD`~H{SCX7Tk=(Rr<=%BQH?20Tt4aF-v0eF zS<3(5vNz27X%Z#M6BaGo=(+pnjZ4RVZ_N~Xu5f!zE6+-;?`r%7%Iw?o_bs0mz#!Zf z@Hc#&P1y<`!N%oFvo8ed-P+>asm1x2$k@C0qpS!8Vr#tIZrr%C-*PrUN?S)y# zlfd^&jt1)#DCjT*@lQ?Yl;8jR#YAD_?A{Ic*gvmyIi^2ok^VsDxJ z%jvRdbB?_3*Lrqe^uvoD$6Xi9BX@P?zHARFQ+>B--qu%A+q5+GUZqa8Sd|s3tGGO@ zrTHzNSH+*!)*n8SC;(^y3BVqEbQVM-L7z< z1-3#PHG}@13hX~EP;qWsTjg&vt=;uD^} zf3fS-MTI6C?Oz@W0`5tSEN&ZZXWenT=;7%6%5LVe!hLN*4h8O35AR3U&C`pOx%1|d z%GK*{Po0`l-x6-t5VB;MXKtpK&lLt4)d&Wrg^kLMHLf4FYzvoKm=L~@h4~#{_PP)C z9by>_dXAr;zV*mI7_~Y>qj~*Bv*}^>EVt%4INk9Qw-KHye{RntwiKC13t3yQba!|? znfrUDy%37Ew2)qcu^ z5C)s6Y-#K}4qQ}jyM0`4XRP_NGRZHdOm~e>E@qi8b`^;;%1)1<1cIHu^HBv&nrCDwMP2Xq+6|Fi{`|v zipkXdl^e3e9>(pJnWKAmDb1&JYUWr*+ zTAYS=joj+vch0xE^(Z@0Z}v=?C%HWnCq9kPW0CENkdI$l+W(_6M@({C-o1raHcYYl z>Rk4_BI3c4;Pc9p68ZoeXQG*wh<-PxUmshX_2?w>uF?Oz{|;PIZ5 z{cZZg@Z!_Lw|7t4YrycIVb+?s=##PsXE1Qy&I=Ja`k&#|?qY>I+rHgjy8ZUG{?J)f z%LT#$7!LYxc_Nf@j^h}E)NMzZo|GL|xBu>t*OI;6Wz%}4hllq+gVwFv_XBR1InLB-mRtG65?Ln<1-98~%JXnA|T*Cvis zcJ=jMyRV(vUHhoByO;Uos->lxK_M)k9l!IJxy;Y=?n!Tqq_8kG)wz?SX=(b7 z&$c_0_@=KhI_20ip|5-=Wdc(-%0S(eX!r>v5Y&W-nf`7tc^XVp?Y3@?*9)LvQs>=Dnxo z?q;lC^SgQ3naH)%rLVo*8Xgfe>2To4VKQu?Y$Adp> zsYH?b)R;UmVeyRHo}s({X(Sitw#Jx!7TvP#SJm_hkMekbv0gB8m{hy+neaV#on!M{ zKkS)c(f6>gM&$Wx54}9Oh%d#=r+0srT$OrdR`k7xRj=;Kh6ZlG6uc_n0=J`s(Uhhc zGL?e=e!a5c(#(0Hxjc8@$Me@#UVrL$s&?n!tgX4+v%m4a-lk`=F7&os-p2W%$AYyV zzv7Dtycsb+^S#v0Q*SPsP3~#_^Dj4)V_rmlz^dezu`lk}8f}VQk*Rkry}A2K|Bu=z zp=Gy&dL$lBaQwviZBp_289|?3ajF=&2{7N&DW3E8h0VrUZ;by~sXOW(d-C&wdB8Wt z^n`ocMR!eGclXMzg4b0|uEEW-&zg7erP$%^xAL-AY+Yj#S$pV_lh~t6o{t<^6FxdBan5Xh z?y3CnyNsWk*~8O9^$)57H^(v>7;l>*-@iOcr@XS{=CfS0xqEB!KAm>4cMUt`z*Xve zuP;$4z>a5P-_0w+HOfa8s7$e4@Pz&OTc4yyi4oHzl-Ct+pYYQ8ZQR>3{p050#ib#u z*L>B>47%;TW}CwVu}uLC$3&NveZS-PrX#&Q^~0S%J8LsMEliiZzdgC-_0GQoatVhAAGMS_jZz9{SRSwFP5Rt&&meSH?0K7Y8XI0q2rfMIpJDa! z+e;(w%`BO}ey;oO+3S7HzUszGi<}CTJ@%+&(WG8)r%TEcY^Ja)&G2lR^J0ZXu(z;f z$5W4UlN)MSxsUC*y2MaRoLScDqHg$yd%u?CZprIC9G`I~l)wE|_RVEGmQJ6vz*EtS zllN4Qj>zOj_I*np^&WZTTXyY{J_CpPl^@SmPyBdWJjYp)_nC3j)%o7me$&q9?#ZjY zygTctuF<6>yKa=8D(7*`F&CN;wD4#g&m+dGYOf!8%37~tX?~xQ{M0w+O;p%Lp}+5T zK7V^@t-JGohBm2R{g+nzeY#njv~ug+`=x%%Ccbi65~O<4Yj#bmrkjIT3!hC$Ldx7I z-HO~hAG&YK@3Caw@$=H^{tml?<~QFdosF04tmoO=oO5gY+Wl8&hn{-AGdReezhvQ> z(iUA-C9!h}6CBxRSackD@lW#5%qvHY+9v;X{yBfC{{9~)Z+x5ZW^3gy=`U9||KiPB zyJ+L}JLasp5xKiot!kLEGLhBG%fWxELXmxu&*YUq+?lFty*w4(?+vz7{Lyevqd;@z z&b%`>-%jV6)oSe3b}3f-&%Rksj#^q4jmN)8{J47PRBGDQ#)n%&)h6$7u)Ama;PsEm ze{-(Yy4QNW%lfVA>U>x4_BqS+4QvO3^OtU&@HODbl3nYLZ)Z{Ck&Sz}af);E$vy3_ zooqg{9PIaBZnZ+JZ~6A1w*6llbhW%Ec(4feXUaDA%kOC@HNJ6g*UJe{Z4B(AB<`5> zu9!ijvPG_e4XPC}Xm;88>oZL)m5s!FMC(KPyNH)P1nx7S-Emi*`i06vZhR%#w8upVK-xb zgZuAu`_zAIIBR>^MA>ih4t4uJ?X6Lp+@;pruZ{n0xsW4qTdgPu?~>N$1MW|K1pD`` z+AcP|DDm^%m;1ENCp}o1!(U~VeKk7f%CtCz8Aq1%@4s(o6>-PPqcLD-+7siTvrDH; z?ppshV%OgEtyirsw^(cnK4H%2X=GjF*!=DNBax4sa<>dNcyXKGmn!_aa;-&{*Mp=9 zwiXdP3RX-N9cslYtJrF?t3%D&w|RC1|8t-D{rQ)*7DtR0XRe;U#XEP? zDJkcze#bXhR3{vKdZ+yDviC=PPA+WrT6_NQ{&OX~yK)z{efhX`yWgkw1x=4RefXZp z-RQSuciN+MsqBQOmDGT$oJ3>X3m9y5~`(P;ddN0GB zZF^(4ewSPB^h#4`eP9-ocjkM;2OR%y)#e}GP&nt()#)DZd0(4u&6jf5^ekm%zU--1 zznX#P=lOXzE_1BSXMF2;t$AH#*eBPOr6GSs?afb3i1iA&VN~S4<@vOlnSZxzUDI{v zwf^$gU)M$^UtAK_ntJr@vWI&wZpoULxF}jp^rVe~2a8wnuY@MI-R1|(GrntcrX@dq z9CP}yr?qxjfq7ia{wThkG5OZZm#3G$^LAY+KK0&<=29NRmt70Grm&RHWD0Mnm=Tiw zDs@{$;R4~_KZWr&{sJ?_UOBc|u6+HU;qr8!a@KQ;9?W)?RX5hkF}l_CsrJ+p&L0Pl zb!>ZlX+fssjA;j$<=0D*Kf|-wbL$Xntk0G^0>p@KqzDjqv6}eBZYPs zluu5+m%02x<;ID7KYl)T(Bku^|QWnO*yPE)&BT&|D5ad@2$@{ecsvpY~1NPUX@+XF24$}yUnmY+FenzcHJl8 zBdk0kGBv`VpFZcGcI?58^QRYD{(MqV5uW@|oaqN=#KT?hdn(f!GQW$?z3|!pjzg}e zp6b;N>pZ5G{h7h?{Cu);^U`gX4b{vPZ~yW8{iE07!)xDpH(_2X^`g)+Y!4(Gk_-9%|iyD`&%9h?_ z@M>wQo`S|9$1@8pp0aPV>HK;5w4b(+GIyy_(aHPyUQypxNX+>8pTRPGVgGX8C3ELq z+&eLo*Pv@^pyKx>AGx9~iBv!Tv-gvdo!{Dz_c;?-+wNB1{>}5BfwkCA=gf7VGi5US z=Vl#HJf|ol(yG$p5VT;mg#Q~3yPBHEV%wA73E%x>ec_UE@07Rks~-r>4^f|>BCK`h zr}FWqlE43cH8~My5bE&FR42@E#rxu=`$NhO&z_wT`*KC*%6q$vmR!5!wPfYD^Ck@w z5_}vjk9S@aYYI4cvDE65rSj)BF-Kdu{0f6_>;3iJS@w6)Yp<57yls8Ht&A_%)qd>z z#PNPkg_T_O%+9MuPyYO8c-Gb6!X}Y!+FbpHclQ-<*0Q^I#7;$>N>@`T2sQen%O&a6 z*xawYX^TzyrSzyC`3Vb`Kb&~VpiETu(%YG_8_nkO-P{qr>dmV3Yg4QyZ3t@BD?ZIR zzhA=VRi(9i&u(j`s-}8j*AFka-W(5wYz zR%Qnl%xwp1_PFvObWyh&0Y)%a}OC(uUm~5W2oRs72FY^7*aJ&Cl``!m{R=FIO3te8b z^-^e%z26p-w^|c|9u)C(9N%WIBX>N?*lod6Z=U?Q$4|BBeK^KFcQ0$f^Vild#op)E zSLOekcl&L1d5&6ZsJP0u4<4-ATv`rM3$%@mwPr9gWX(RdaN^>t8D_zA^845Q*`sy) z!`wHQ?kMTZIIq*^wC7u1?DM=OX-$jz=5;Om9<)_srH_iml=Tl)7$)CU5uX2kNsQUO zca<}jpX?|(d9>E{_zvE!M<*HD{wbEOGnL%_?)olo*?Vs@qE>4*^?IMxO^p#^_%7m- za&V^n%FB`Kq;9NT&3k|1zq+px4`d8(dmLoU_+#MklXnGQd*Ri%6?&(a?Npn#^3RTz zEeo=)PO5Zia4@{3!1&m2hKbQ4ujP)<_g1}cjW+)-z&QW;TAtubIlo@#JzR3fM11{^ z{%-DVyS=9E6?zcm-Bmx~`SHu_{mcIrT38iKH#}x~q{uR1Ui^8h&?6SAGaB|xobxmP zljwzW{k8r9QM&hg?r(n;o~beCi)(1oq3|H)M7c#lE41Dj)L!C{UGiDJ!;bgud>@5L z<_DghKXAEEurm8bD^CDlO8>QO-}NKP@`K)pSDjtBYr@xJuHe(zS07B>FlA=AC_~V? z4@%X^#tYy5lgXHyzUOA!0RjJqk-HuldbH;5ezx-2_j|K@|8Cr7_U=-7QQ*2ezqjg& zsC*9AJG4|!hEYMJQBci#Zb_VIa?T=86~!sF@AVz(y%rukrYF$KY+CRux%TGVpyaIG z`{VWm#=W?9>vrmK@A~c%MNd7}iVz{ov`U%m8HL$yTaEnlIN1~X`MLH+i0zni%57eB zO33*u{C;J7f1OQSyG#3z(yrTaw|Q(tmTvnbws0y#(L*6ehLnnHbwYb*s_SruD6hNy z>$8Zz#Lhk!j&kM0Y^%-)}EM^S(0KKmWs{dJ=Jw(lZOY(zyAz1S(gu} zPxvS{^@;5?z5>Vl_p8=_%sZKT-B~pC<;49fm+Hp-*d7`fz4ANjsV%W{onFQ#`mT&S z;J?*tew|}eN>fcw*4=NF31_Bpx7yrJsB>DJr>k~w{wuAECI0`U*SvhYYu4HLs3{>9 zp)r>OvSw-;Eme_WW+*#2@%wet$Gv6wGvb6!RW)z;dEMz@+SRqEj~%`7`P2H){|wGk zci%7lVs|=l-x{y&yDrbkz1lX{WdEa*r3yOOpF|jyJBe zwLc&^bE)IR>$;OasjdH3FU)a4=6k#T^Qhty=_S&Klk&K-rt9jzUAyHnU+|t*EukNs zUKNs082jrB)c8Cu-*5jRv%#}*f{pR^(^GXG-|l@w501&%Mwv02SxV@ih=X) zF8KcA*e=tp3v+hs>+_$NzN)+Q^2XRBubTJfmjKZED{iMJp4=koKUxn1px zs}vQS99CMgj;mf$e{EZp`z5b+y>FtoKeTGzb#vun&!v%Hb-&oKKhjcg5tz=LInid0 ze}z@y;hd{;7#b%O-}w4viFwdUh6x2V+GqGLPrvv*)Nc8^-wSQcZ(YB=blQQ#`uAQ* zczJ9$Sjpj>n8oNFFDkNK&B^of>eDkl6*LZfVE<^k#cajJ>26OiOrEvIx<6xQ+MVyw z%sQIKSG*3p|48_)Q&S3Ol z^nB)j2J^g|C$8)%%URU+PM*Q7wkFHN#Ng1nui^g|WLyt+`f97KTEumQbIRg<3Or@+ zS1lLYy1mW*cTvQ=gvowCO#U<6zdR>2;pMeQTW{X*RW0=nx!PIKV&?eBXNITxP9>h& z$J-We+tQY8mT9!cO7{G@jYr?8&dN=n8ohUC-5*n1sR^%QSeP2mOlfkz&t`qj=HD!} z!yS7k2xMv*KH|539wlWRmNUt0wRys>OY1uMxu(m`F!K3yz+Rq9&g7J2(cU>aH69A` zw;K966@@;%yf(w+yIFvz)6QLmFBZlZaWbDXwqyT(A;#?)$Ay#9vhF7mIJx7QCmPwon{PJHfPMkh*>T};58wTcQ8M~)NCbROf`Yip%v_o;Ko;%B; z>OY*a$Lz2Bsy|{?I}l@dqsZd>0+k&ul7+K+>!z-Jc4py0E(I;l89zSvD)rs^WGQpJ zjrsVe&ubI+^0hQ}^>VIAt+}JSZ>j)?=cxt9Wy+Q8{xj@bW6$>|@SA6e(e$fl9Zs%# zF1j^c>a2dA$o%HrVQjMuLS%iGak20-ZxN{9e?KfYXv?`XJ>mi%ek%eS{HlCL%XTeWh#tiJEu zRX68#i-wp^xvIzSqIuqPxsZp4vifw!+g87px5RiFPwFYUbFQ%Qa`L%-wU54K-pC9- zZ5pbxH|G_XXVCYpl{>x>jsx0KqS2@R9e z`k1`V_V}f>;eJ)SQ(p!9T~f_8-SJY0_t4C(zd9U*wR11Fnk~L@TxWutiN!XoN{V$(gy#BAr&KnoDL~n1E zC|h;7NGtr`tQCtpov!dqiyoLnfI+9b6@}TeffQ!8n zpDFpe-degn^(%Y+J9Zs$+vPhi*DhLZ^7YEmrB>5o9gPAWGFVJ}x#oP{F^7Qb=gseD zZS9@(musVaMY2M5#iw){;!I+zUGGg@*rI#h%cqk|Wrg5; zrVx(5COm%y*kLf`#@&b< z&z4UfjPXY7bEUTLv6ufIthaBgdxf&PW$LH1F3VPay8cM$k&_f_jMMV21_PFIpPqyi z!OyGT^L2U3yJWFhO)C5m8tP#CU}wJ6w4^2B(Sc#hnKGR$HM2M!HHA3-7Ff$~WcF8j zFlo!)U24l@{xc-cKelO(r@_Tdy`{!m);=%E@12~quxQy!_of6&mT-*23>dSimu%>cSGBh!@k>^ z8a#a@H0K>@KH>0V^6E1keLH`zv8=o*l%5+kZ+UI$49`ptuGWte8$7==3NiNmXUO?+ z|ii-LUClrYXfLqmnS#Xi2fjLI)M z*e$r46@Aip#T%=T<^JZ+p1gee?YZ~fms9p`E-bOjx^U^%AK~XcJ07zoa?4ceD{4)0 zk##K2SJDtr(7rBfTkP{A_hFK?W8&r>63l|+cLJrLQx5#?(g1hqXb|{}IT0Dj0hG5M0e~Q(q zUHd=89@)O`s;0=h0HvvlaWl2QtEN0CY7taF#xRw)czxf4-bdckuAJI4TT=P9(ysC^ z@6y_mjw{R;44MC;e9i62%l?Khcqz-PcegzCoz>Qrud1f%?&`8y$KL&R^>pFW9jq&4 zI%YnZxb~m?+^w2Y1yx^v2KV=_xt5ao<*|I!yHM{Fg^EXa1aYS-v`$)DH`$^@yTB6N!WIV9m=fbC)A&YbiXQITg$lc<^6zf`-9(It6sXd?ECH5jYk)( z+IUe$I%DR)f=GWMl_i~Tmx>!5xA7OeQ+o2?vKq~N-&!|eKn9$L{=t7o_``^fFSq**&Ms`8^opIT|u!x_Ct1SS=pUSaoB;%E5f zmo=LlCbGPhmVWYRs_D1Am$T}NOTL{9-M%_^t7zMt>s5A>R!umsc(z^MJJ00S-lHBt zj$urbI4-_7-I2T|yYv-bp>y=iu9pg|uluGbdnWFR(k=V7YTB!tTg9h(-7UHqnW(R- z=&E7J6~H2-BEa+FXWpb#Ar;sD%g<+JAAF;GVMF3#nLV#EZhvF_cHe1tcJ!LOX%UsB z>(?%kidz0tW$TZE8PS=VPej5E)~QI$70fQZaZK>BY^{51W}%7d#>R=yeNi~hF9~b3r-k+zj z+*xD!pW3Jk3pPm|3{MF2<71S&@&5PADIcR-|1<0ep7?NItt;zc9qv=Hm0TP4THe`R z_O@tK_`9d;uKYEck`+Hy?2tg8b5nc4N|E-_fnII!WvwT)XH3%FB0 zKcDmI#NStXx4l;j&W-7}xxDk6-=1q;WUv zu1=rJ*L8K)?D$nNRcEoPYEr-?9(To#OS>;0*|CdhQ)b+PPo``3q{#{K6ediLoAOQX z@}1jZN4MqZT)lWT@WN$NpTz-_Vv6Q2VJTuYeBWU#;4}GKWu8Uv23IR1GmCSNkDuCk zTwvCAxf&lAnJlIpC}{d2F$Zr=R${`}XW&$pMC@~#@tGWy3-8c) z{=l;OKf~FyXNh8GjC{l8_LeqE^Bs9@_Qkg3&eu~q7cXsD=N3;!9q^0KFDK6?A>%@6NHjezgYt2q*P+Mmwk3gOe5)t~S^xI4p{^|rFdM;*8epQ*VJ|IW8-P51c8a@?O+{%ujxySdq3e_v-W-Dw?kHPt%%yL{H< zsXU&XZ-s203$N!DUby2J$H&`WSN5KXUFKK(GUngDH@~dQUaj3e!Jcc&lBV_fcCnY{ z{M#>QU>dUbVJ1s$7yHSdq06kFY2T{VI{2n`zQ-&PMgGu&7@cjmm)eAIp71H3BO%rG zpP}XMzDD5<#|r_{DwNLfo@l_5|$LI+px z3Ek6XZ#?VB=VM1Qdi0AkMW ze8JwMenQJ$d&MwnupVJzZ1O+mpa1>pG|wd8a0T9or*l{4p49l|vF6*f3ul9E4(BR& zP2h3#U@EBpT@qrpX|dMJJ3or$|Lo6_JY8fVH#;~xHBzed)Y^klAzRcUG9 z{5=2chr;Xom)|k7T<3daxe5=@=dUcuAJ!Fq?EN;QN%Q<(J*#i6mX}xGb#~NrHDC0( zHJvk4f7{Zcp2njr#}E9NP}jD-AnC~Zxi)+D6jfc9d?aRdY^lV@kKfj>+Piv7-f36Y z)4937_XfzHs{CaZ8{y~3n8oIzz5SfwV%_y>p;`@$4|~rq`(oI{8W3qzS8vs{{LXH@ zwhz5)9fKV`3;mfgHE*?S^Go>XFS1JIMr_3@Z{3yr@sQZ zrfs_9JZJBc1;UdiYcYAb*YEt#V17Tc@v@t{@deOomT6z+JfDBz+N{_F)kHqkqb!Fm zKlGZ+Y_WVRwiyVb=bFIm)!2V(tCq%hrCz&>T|*M>8u>Bl@n_FKOTJ6cvQ<{ zrU%Qvm<#qR{xk6FoXS@`x3Fi`CuQ4O%d4v&>@yZo-!vub04({y6Etaf@uZmgR?Sa1u^KD#>6rt1~AuUBo2 z77n=Uyjjt~OWF3?6afy-p7{bSFM>t3o4xt?AmhX9x6a-xgkpDA@P#T(4Olz*WyrN_ zYdtv&j3(SH_@-!cqNCA3eOpb&4vT2J=@Z^r>D~A$xu#Px*l*(N$MGyNucmCz-4;E2 z%H^L^!+)MHa#2V)^Yi_k7ttq{yygEc{_FIGf#v> zC%9J(>&O5 zrl;n7-vPPLh5LFs4r|=`eExIQ^YC!R87bFJFI5WIa;noaNa*I2d?3|BmBwT200Bg7C2B0J{AYwh0{adsluM3|a!r6SMvRR5LLs~I}skZF<%K6SG8}6*Eyz*||4XgRvwaoTC zSQ;8?bYZE_KQgU?ja6%B0@ zCWI`Ne8lmHtK;g$d~eOEm(>p+KJa***4o1rp1UV#&E0G7e{Ro?ulcXoSJ)=qeZ4PF zoA1W$+|Vl)tE^o$^mg=TiOATk;44&gP_5vS=LozkC=@K^i_*UP`lij?vi4XW&-#liRXjM=x!)V4$X^bvekaYNBFR>G zqPPEY)<*xjHXo zJ32OJbG44fs+zCiXuaT{3ycg1pCb?1~&d z9DnH@dgq#ez^bEdA31zh1sNOuJ>TmyZOL4Rz|N9Ydz*WjefQ4axvg8e?8D5C&of15Sl(vSK4y5A}{zcbmu;{4~e zTnpRZPnUl8a?PI5D|udQS7D7|SNgnCk1|QtF}6Cg z99NlmL%WYT^_b|}c|Jx0cP}sb{OLx&p!bdz^RgEXmi5bDyVUCcnsno~d-ayHZa3~u zUpIk!a`t})-kq^wU!6_jHU95FFs}o1_{r@f8W|e}|uF3Y=r>#fvOt+uB3dGSlW+Vf}qXAqjY)N77Y8)ulh)zkDxr`vW4`A)T&>h;*- z^XphG`Np+Tikg$3?u~lCOn)6Wdz5+U^QF6I_07&L4QlHQ4-L5RYkknwfYz3(&Vo}> z#aFaEGt>M_CePUZLC@~<)2DG~(&ZF>Uv+;J-#+O^(~|s$J^!xeZ?}6^bm5eiZ0WY# zqpGeSZEl7<;PGmF+b@-S&gT1dE5ZjVI2nisvbuvGft2uH@GL43qkP*fkc3t+7dd zI8%9xlyKn3*KsNJ9rLf*{%2Ubd*9DZYfD3vm!Gq^%pu=Z*%~tO-GkU%N5w5uiylu{ zOAuSwXO^kEMOCv$e6y5XX zrVMO0LD|}T*Y0HPoSgGGFy8*z)ipNq+oqh>Jd(X%Z~ysY>z-f9`?dA2$rrudFJCVg z3wtZFW&Ot7ul?(H0&x1UxW-?*>k)%v*{ccq_~$tyV?GL>IEuDa*9}_H&qE__EnQZNJ#^Hxu56acBKa-n+Fb>GC$; zEqT_frp;Yd6%w+mp6{avr=GYF)1^Y2iZk>6`Rn~y+^D*x!MSJuJ%vf9Ki~NC@;+;e zhqrY?@a>oPUstD2vvcS;UfW#*ZVw~rm-CtZ5-q+jl(yv?(orkVu- z8ov`2zs>iW61LegNX6mje8ERHC$HU6i`Z)HG3&&uCF`#z2i%F8o>bVi=^In*SD_Bx z?~hzGEB-Sa{=3-Gu=0uR8ODbb4&NzWWAlrFeX3s!=MvrUHFmFrR?cYhIWSqtJ^$;a zvUwWom@}`i8!)guKl0gB`}nPeqP8BNx2tEW9@*C9JR#6>j-N~AxmhuPZ2vQqZRa`S zR)1WjHZ$mBtl`ub8q4Ntryem{c46zM2Qws|O1@V(DQ8z_yxRBZ{98=Ng?N52)oXce z?%xxwwq17D<=ZcXwtM&Oy2Z%5pr|nYjs5=m?eA^hT)g3=lJl)Ohj;et$Y{@Xsh5{C zHJhz_z4N|)bW`7=dFwj2>2}H6_P7Ag@0^E%Y?>VGYqA&D=pKAJV@;u1sO7x0ReS3GNw6)CVfNRanAA3PmX^xe zZPFfI!85$vGkVT9RiF6#_3jRRuD+Y0i81NNlfSNWKI&8C5!p5?I)259btR!&MBl92 z)VGi;Gb2*VlZ(Zz`g{MK?^j>l{B3r6y}n#%#ExT}X;-Y0b3*g)Y+w8M(%jt@b@NVq z`*VAX&)QRQrkY8s=1!>B-nWu7!ol!_zzX(TKC)43mkY}(@NT#6U+{6s^*>G-4^LRc z>(xD*Xsf^F&a2#2uf9Fld*OPN+}FI$Wm{EC^F=iiR$r}VjB(%G-l!Q)9gv*WMj2`;O=nmFDbVV!Se za=g7@s^)G^{!UAIvwJh|_*pBs`LF+Ma{R@0WPDoVW}|6Hqb!;8Y~+(p07P8D6Mny&uozz6p^ zX(p__ud@udt9^O-+MVGBx8SXf;g>I6-yU?k$2j27r7arP?;JdRG_~0rJz0KyoXGG> z^GWKJ!wI)#dfxt!n)#$uzB)fRM)cSG7w7K3Q2P~XY!ddWc5&R+uv?riA_~R-vui zW^E1*X@0d(LvMKp=ZQ&9K8c5Oaf9(cHOPwm9wJS+X!u<g17 z32m+3*-&t2G80SlZ50MfhwB#t-8qj4zBzklp8J1>nCk|+dlLihyfAv{u}eK}^^(+C zEh`GXTnp+i{BxPf|LHsb;Im`FDXswK{t3-IOoRS+V8)A?DBS z-nb@JvPAcHt5?v16*dBG%dZ4YousT~#Gbt3?HqSS1HHDKclG?wuBwCypDS@Ft(f(R z|Lgp(j(h4hR9%hUmGkz>?QfxL+4}Qt-S=OzBv4K@l%Ma?PmThGe*shaS?21kv(`?S zuc@9RA^ng`eBCP?+E*t~Ri+_L^_$Cm5wPIqN$&U6mha%R=y%h4x2r>Ji5 zWZp`dcMnwYtaL~>hF8q%6E$RcE=DbwfjIU5RhAO$#&)*ty>g?94x~~6=PazRzsC-s=_Z_0Y?Kr|7Q{wMn-J2&nxd;X}GDO!2DTp(5=FR#hc@j zX3q^;cl*jU?$V9BqO4n$Ivz~fdcq=&L9<6oMfj1a^TRzB$4ibG|2_K5@nzK68VRFQ z_F7CkOU}*DzGzrdbp6NYkabFPc|K_5aUN=v-CcCN@p;&9Kl5CpNA8EY;u<8a=eK3Q zTfB5j^uCGRrTdNsCi0$Yp6dLwPi6TT#yc%@I?pw1d-T~^r=R&-_v47{%90m3`qLQy z7<%%4yj1!op(W(r?7D@Hjyx@srn=SZ`6cgoFSY+x;YvM*6EWs7{g-osJl`yB?JJj) zZhU7PZof4Ci1Dk7`{o9g9DAI9b8r65Yg5e5ivRIVbIm<;=a7R(?yL=4?Ai`WOx0rS zJHV^;qq+HppWRvOspoa_`+Z7m%NM+PubVL8q%>>2&dcv-OY;}VN7eg>hOJt+Fh*Cp z{iXk^uqna23$|ZTEziz-@ujh=%4*rAIHxO>0;dlu6su^Qyl%4pN2%@;`NS!knRlpj zu`jdy^YZI@NnPQ?rCg`vSACY7V)-@yPj>l$m z;$N~yqFxp6wi}8zQ^H?q8!-09>+HzgdM&u)82`WJd;67thx*-abI*B|R%Co4rEcos zyro}n7nRmM*){iAZm6iP$+XOnu7Z{gOsB3aSyC1>%~O4Uf&#DggZ;b(|DtwkRNQc@c7gf94U0S&T6!3*ZdHqIQ+oHbja}8^PDO6a zqeH<ioVRd@=NNaWpDHIqIJb%&6zuQyqkU5D`!X8>p5qQy#ti*Y;Ex9 zOI1=}T(w>P(cg~WuFHhOZks(`#<2KJ!rK{bxeg^CmzrI9@%QxVz3I!#)-9OLx>9@h zs+G?!ZOp5zne_MI@erej8H?sFyUeJtVnS+Q@TAY3_XP7cKD0mibMl)qlYhdwX;E+c zW=bz_P5ZU-wcPFd$L@dmxnc(nCDV^s%^#} z=i=f_8Mh-${)Vl87(~<=9==vCn0 z;Fh&k*WvuErHUeX3#O=M7z9Nt6nZWAUBe;Uw$uFB9p1AyqKy~2EBM{~Q&*btBfryR z+K;lU^L8Awf3}Nx!><0#=Da(1brrqcw}NHqxBI7V1%|lmoth)~pTRdob=zhZ%_%8) zH?7kT1a7+NEqQ&;&nsWw1ca%7>7JSY(KKt(Mnv~-SF7-brnaryW&K0;S7*t~}ua%(A^2F1nY?s*Dl^#)(Ez6;LCcm6ArPH!M&)D*>$uCQVZ2{ArSw0=m{c=`! z$HQ=O_O1QzE-o%9o!Xtqv8Hg_8M7&Ehxi3=-@h~Yh=4jnrps;j-FJ!N!SQ zmx?Z2Hh*$;%jMjq9*Qn1pES7?669IRKF^;hnkRXP<59Xy$LmRjZygG)uTPUTO!)C^ z^M$LML>{VoIi;#}TFy{cv+-m~mR;`m_8og>i)4@RiU+@IGrX%4WgJfTTxrhP95U66 zE6k6vflZ-V#jAZzxk63meJ+pcf&<^(^_gEgesth^>od2}O?OIp)s=bj9l9+azpr%R zd@_HcmhGMyAJ;GX&#){;P`N9*qfO?=oPQGQY(IvcV!pBSX|(B8)zm9Hwk%O$@OdKh z>3hWoHDeY3;~^f=5w4d1YL1^{wESxAl8`PJ&3vwe#is;j2DLOAq%hC; z@JhPYX#3scCocKv<{h$sn|E*i^r-MJ#rZL2`>yY{jOuRx;gIXE*eaaXDz4!drDc9KemVAK)gjg2mQyCLs0`a0?m6R*=aM_pZePx}-Hy4i(nRmFctrjL zttS&WG~fB~c+5Dr=GvSJ7n73$YaCO47%iE}KGn>sa_dAKS<0>r0u1oMf%%2}^j?H7mu+)?l9u2bcfu6B1{hf3@n9PA-^nUwMA}_PJRm zZHzxdi+;^~aaTOzS7PPH?H(`hd~yHdc5PXRRMfm2`KQ13q%vOlZK={EdZ3fjYE6yv zhnqXLX+-b8G_Sx;FL#|w_L8m#UTeawR6|2K0SVcbPfy{F=N#$OLX}W!GfBrK(+Oy4Y4ohr*?D0$M?tl1`)p~CK zu5HeX*6z;kog~KQ$mBTj$Ye>|$4BO-H=K$pFRM*IJX3BiFZ)N)Jq1o8b3VU}z3sre zHbHU2Ca12@x7@!~gavPJpZM7K@r6^SefyuDTdCOXzVX)nu*cc?U2?%0dMs!BWsa*> zsHty1r&Myst7~p|=Ax~06}UL=%~oozyd|2uqV$&f5{t9WN+BmdxYxxovLEU7KKx^{ ztKH=DE04O@=bSU1`)cQ%U4i>`&xKsMd@)kL;G{;WDPud6y3o`~j1AQi6PI23xY~Wf z7N?1Q@`cTJ&hLNfYHP4HFI)EAx2wPNLT&&dfoG#Z?#q9UVBfu{^;b_Nn3tp*}W0!7gCnvd6_TVaMHr+ z1H0GOsPurVp|@W>eie7{%lS*E_kG_Ta>=@?G^y>pPxX~B3w{BiqJT+US8P<(6*5aN zR(yN+XiW`!^XW=smuBtx{!1(F-Cq;4>-LpdYp+(^tN$4Hb>CH~+1+1Hg_=Kz)6#tK z)q6tIr6B26^`E)QCmy*?jVoknlJzpGVqL#xhy3Ay=-W#I8&^wDGW))E-t?{h8?D-R zZtHbeuqW<6!)&i$zu*3wF0TyWlMm2Xc%tK3tA^lDPqvcjJ*rplc0Fd6w0LD->{!@$ zN<2i%$aqed-QBX+Oa0z#yJjDvJ;U4JluVwpw-!U8GNs&ZjdkO*<8^SeVOFUZISw zL}!laG=&Qfw?CicE0q_f*x|bc7IT>5v& zZb_HwOS!15oRXZo{ny29*8=X!JS{Cfas1R&^S4_)Q+L<~=It~saEcAqDAkB^wK@^N zFj3Lvhq7-@b&zP6h~@1cnY>?TY@2M=)~}WLCaX2q>-N@T`=*w5ma2LL2KeZ)Z;d@8 zST)@bKa7PhAf4W9oQ49>7- zytcgh_`2MsQl<8t+u9zy@}K_N?{t>ao3E>Lk7=##D-62yLS+RLpG~up!^GR~muhpq z<-axEmH--_Gv`_?WOIkX{nw(k8a-q&3NEq??&SJhR_)!=;MVfmBck9)5`o@$tO z)_ezht4XFAmfLn6uDkU+?b*$RS;wxu&OO#7Q@CtzdS+kk%TJRnTmF4!b~vwkZBBwt z`;jYc!q%K7NvnAeFRuP}v}{+*HrMV_t(jZZujpP_QRNxp6_TLFIY+DJ@wr(MH37N1 zx2u+}k4ygf<5Hjc;iM}m6`!ht%i6QI`+KHuHgydQ4|^qYb7iT~_Gt`c&HY*1+R z-~Co_b?5#!SGKHCIUndK@S-s|&A#HwUcH;^mh8Bxl{@v+?Zl~7E7oxpES-8#)l=hp zvsQ!q-x3p>(n)Il63*vsUgyb|=}B)cIafLBwfn`oYcHPNd!c)K^|ZGSvo?RdHtTo5 zM^Y`7&^;j9}I&a?YOQwy-Wy|(DIx0H%upCtQbV8=Cgk5BBc>8a&AMcAT z0^gsxGRL@uRdun0@XBX3`?gm8ntSzZ@0XpIZeRERW_rIg|MjD}%d$RKttxdDukOqd z5$k{IYE&t~$#Pe&(;B<1f`; zmap6&6#aA8@%+55@FuNt)0ZX>w_Mn?iG4YvB43E0BBMf+yQ+-m!lkd{V?47?9a5<5 z-T0qjt!>v=-Q3CBBq#0lFWJ3pw)e7XqqCd7)y@@r&B|(#HFryAOv=imYYqmty1IH< z+O$kEP2IV9dowquy4}q0rhd^k)25u&zi0nJYkh4}iPP%x$lLqhzTG$Lb#rQbx5*l> zQx?m+PSqQBC|UPR?hUO|dDy$a$c?+_=iEQVjbB&0vp;IGG+&n(y(W6z-M~Q4Xs^HP z_RYVteC_f|?yn*Xk2jt2U`d!fQGVUN(jQw+%7uAsoL6!G@(=B;5g$9|8F(&>yMDiV zc82OH)ortTt)>V?uhtG<$W}l7-RIjIe|}s#pZ||{Zp!@hnZ}AwHsybk)tIMxxb+#6 znbv`u-|yYo_fO{XuPIxzb3UX#vN!lH()^_7kB97*Tc1Sco;s|kWhFR&ZMby1XWnAv z-H#u;Ph(|Y&mR`{E91+xDI2CVC-{qwh`{cBfBLj!fisKdM|NhA`?U41ic=tT@xPGDcng0y81^K4LM&^n>Q7}F> z(=({TjYIo*=9|L%8)sZi??0*XgsFP*wCDMSbAtErW`sPSxN^7M9bfIlX^gH`3s`tQ zPAF#a6sXVp(W}#UXM;dz_1x&%eF?#yR~zqFsinV%PXUM zqYGE-o;)6KnrmHJZw8l_cW0@ijmil@tw05jj)_+8ANfVAIfWj3#K@%AyuXl8`a0<5 zhbNcLsuh`^c#)OVbU)~6^yC-+1SV`P@@JXS;xfd<=;EF!w>A&*M@1P?843*>et=`OMso_^- ze3rv+^=wb}Bhe6zwwdcMZp>bo)HIP#bIXL=3KBLB@4wqVQ&xHX;va9uwGR&~>ut=| zxz`$&oMpMg;OwfP={ln7_@*2AdhyAO%xs+7aFf0gu_ray}Q;AcIctZH{{hyzE>8;(`yJ}-@-GU1x zuM6&2CGOFhnzdv}rC*fxv`MqdGp?=kE@G}eJU=J2eV~lH;YViUQqG&%TmoU zZ&z&LUK2J*5ZiOKFQB4WNR?lc%tB=8qaGV3T~I#zmhdz_~Ywa zSTpx&`csc}6Tiy+ExNP2J?(mU*gGq)D6?#-RT`|v4hFg}dUfFYDu)2A6U{#xv{>VB z+jud{_A562G1+vulPzgY@x!p(?Lt#m9&DX|s=#*hOgsPj?Z02l2v}n+Q~964UMtmGA^>|gP(%aT^k;kWczzDmaH zK+r)ifBp;w_rCuO`B&!F8p{iHvt3r)_|LUb{YJRCkno%*%?~{O7&=E!I`MRg$%I?t z9`Dry9(gtxc}$)tEIG4z4sZC=ADu}PZX77t_@5zQ@#4L@3JgUr_g`Kbv3~*A*Py?y z0ZXq%IA!U+Yd$ToDJ4bj(u~Ji?E83LT|YQ;Vcf;*Wv>kTZIX|loAxKLOs#Y3bl%I; z>ZaabKIxWhP&oeD5bscC=g6JmCh?dlp8s~4<6o(Ag_1Z5hSPtNJPeBVkgyuxk5 zuj38Mc3SNA#`PJKdzaERV=4b0>&GY}a{A22>vOonHLtqaJE`r# zq@A~x>BL&@EID7ez9Qdx@9xsAMVv{S_Le>L`zySJWob=gQ>%m5g5pnp$xiY2woGX4 zEPUDMdb{AKxRu}6L_LPhC+4XhSmZRRW=Rs`gaY>b$qS~xdwBlywK+#T3)Y_7bERG) z`rV_uzgMS5NID-onjhS1)35SR(WB|np<+gbk7|xjdnTC7@3`N0`OPoK^E#sT2IivI z>z9YS-PyF`%GoE`{pDKjWuXgiuif`OJR(cuTKhB!ACr{Do!n16EEPHoYO=Pc7upET zNb%LVmvBAF{LDk2H6cHrzhHkp%lpf(wyo9E?iNPhiJbH(OjX;*N8;`L{@#e_?xlP5 zI(yUC8GGvQX#DNK_vEL!{oL49;(FPOb2Yg0mfm#k;NvqeV(~nv#{akUoa(Nb6DQ7A zu6bJgRcY-X-N(mN9oYT3er&!Sxq5Yw)?}B{y_zyHTvJ!9mQd+AR{h-LNMVDiaq{}= zuZwv1XsRVL^h-_N6KuFeKV;2z&!|=Ak^JkHddXzoYQ7^X7gz6hQ>nyvIghaOZB9-* zcFS)odS2h0U=V&>;_nOP6}wN}PTsCNcW2?2YvDVXELJ|69<8Usa)|Niud+8bnfI9= zbiHqiP5vxl`Q>P6#oS*BD(r!Fi$#JjX8X<+c^j)W+m|(9Y4&WMm0o#E)4Zl_SpP~j zvV)1uMEz*Kl6l(u1$^%FmqzX|QB}TheE;+Pr@G%ohi$vJT--nCZQT8pE3_(GbyZXr zIvlz!q2&2!;rF+z4t+d*tmANzw7c8EpIzZccL=}IJ1!UfK1$bo!&T3Mg-e`*%b2f~ z=rB|=Kk+WY7_OTi6xP2wx)9w_1N%6wfz2|0@@>~%K-F9g{ug#RD zsxlpFK|&Ud-x)ZMt^Ra-PoG01|i*&aHeHZ+$u|XUV0&wb!nP$81@dRhaG1 zb|pdNxTdGa{PXuG*e(t(c+K6cEa7>{@@-?V>2J3iyxU{jpJ_*@2j%_}-+wqSls6=3 z<@zVvZZ8lERTA6MIpsO?fu2{|t5tT}wwcWT%71S0ysr|iZYONM9kYEo`AW9>k9(Jw zuD!W++pN_O!iD;rFMIJ>oD550wtX`3#*?WZewj|a*LicURRYgnR+UE|tem3m=l@(0 zyYWARQ0}w0rPs5ptds6%Uk(abqCG>2-{ct|o8P+akIrRsxs>QT9ajG$QqQ($l0^M3 z^A8^v+g{$fw&Y6Am4HBttKkZdPh@@&eAjoN^1br8(o>;4hnJTA4m~xutL^x@b`dYx*1Eie2TUA4CnWD%?VFv=+!oHb{lqc%eU;Z+ zr>qsLvAn4FtvGw>ZNJ;D+S^_qUaPw+YnilH(375`fH#hBRD3EvU2eH`%&thOm<-P$8~ zg17&2R%ERVo0#{yeZ0Q{?)|tU_BwavY3JExD@79>ms(7_DB#|-G+FYD|M|JH*{LGu zLYvucyq)uD6-zM3jgAy$S z`JwWsxA>iM#-(FNzcSUB**%{tm%DbJPR7B{$Df9aFWw&(J^$sTTV;87)@=^B`Mz6o z$?{dHd5gL>%&|L^{&~VW8$QvuM|!Ia7{zMDqPeps>Y_g(h6rQ2U# zeyte&Y}dAf*I3@#PIzth{-c@R*HEo(mt&;4IT?MghDL5g$I}2U0(L(d-2Wfeb?50wP!EbewBS@#X?uD z2OI6Li1s)NN?ewdJk2S3EP5JW@zX0jLFt{3CS?1ht+;>XxlindcV4xBT|K0?URO2Q z`pV12=)7KZ;ek|87j2Wc5kn=k>sE zttU-QOmu_HPiV!cDl{!xC_HBlzn#ykvjvKMMh@KbW?IVI?khbjtQPmde)^e!_W}Es z?VGzx+i!c-{Ht% zpN3`R_H-|G^Pg(EtMDq{{IB^L7vIaxdv{yy)2;QNoU&d_Rn&HTv}KjQ+O^4jTnv5g zQlYh}N@9Jb2V+kt^xM8T@x<)qOtY_xuD`kx_E6LN$(jW-nhYG2l?Ci$-OVKgXI#$x z^FmT-vc-%iVIeEl#l}o!S!TOT&H6!qPQ}MxnHzTN>lM!VbWHoQj`!aa-x8|un<6iOfN6FHlNi5}oLjDJPUayL-_s`8=8@fJZnsv{-#iw?j7yYs1 zsD9e8e{IgMZ^CO`g7+>dOr0PgUCc zW9o8t&tn&)$)^|6EH}XJ8a_rt`ifRo+aFYRXfd@jO4@V4`%pwdlm1Uoz>h zx9Uxu`|sg7yQu=NuD{atYqt$KIp2uKr>42*N3X^99VexemYo(>Bl9aBsrz^$W_M zEN%aG>{s2=BO=YdVW&M}y(?5TR(Ww|%rD=uFs^Xfx9@=_JqMj+H(seN_B_uuUE;P} z%lu1SzoPa|-!^~6%@-T)Y^(cJ(lu*V^~71-T@8~yX{q-*xg6j;R(dTHX~a zLc|{Yp0tFq?~~_y+vRUcRo6U_W%B#pc;HX=Aw%iM0ryW_t^T#}WvO|XmFtc*CziQ} zKI^rAG;N9I3a#2Dnyw6dM>Dh?<(C&TT=}QAd6vhPDXf0azuD{yx&Dy*%eH+?!L2sm z*GUCkx}EuQ4zFq2f|cbzd$&ys3R2Ye#LBl4pMJEMmrX)a#?PtW7vOf=cV?5gW^-2u3ZVblaaA@ zYjUe`;)}{Hhk_MS?d7NV{5hWYtxrB%``ATKh4XHg=T!Av(abOK&)~a%E~NU)g~!## zuk4dVm-5!>9``J?KOHWra$rKGAZz6Dss9{Zbp1x*1U2L(5toDdO0rh+wbyIrgl?88iQYGE}1fE>f%Wr z0St2g8C1SsoS_KBoTnjO5fdr&k3w*DZcsyIy3*+3{TFyn?pVfve8HvyMGIVE43WMdpfcwh6x4)TI&1UZT)90@BOYg$zZ($Q- zQY(L4&9FARn!Mv~-nH&p-D#a!2A5aQ;GM*E=&Jpc4wcHEOeHDXSDGZDGBdV%bNB!G+Tegw&l`=Q~R`}+S4+yxby zncv zl}QE7FHg1#Fc{zd{qpSIsdI!)4D|NeoGbZf8ad5{VY-#CvDxA3@Z!tX+u!%a>s+@A zV=R0os`~s1^9eULAC9@&kA%9N^4_)`zc;fWV|n7eUl;72zxe$-ZtbaGxo5+^X0LvB zxWC?%OYkYnO!X-Y)fs-WA9Ig9aW$&XUT3B@^KR5Bk;M=C=f8BhEuVThEIPLCZ(w=u zmCHI?oNc<_o%kjycvMMU-GINMS6ub&{=DRda{eE4Pkg_%b@MhBmK%Xfx95jyPLter zb;n=j*LIVf*GYOAB-kKb{rq^PYfuDBStWDdd zeGd<=PI@85q4UwZm3P;I$w#dCGgKr6-180xM918SVsq56+R0P&ea#)o6kj)6i_d4( zHOkrE?T@KHtS9nr|K>}d`$DfocX?!L22Q)SGhp)D#~>0YV9le^m0qt@7-2wm#t}CvPrC+Yfc19=j7HyOUnN>~|;qv-iekMQ<`rTAs^zB;3rS_4xitjt4xRPP`*2``Bkk zNz+Xx2Zy(oS@%U$wz(=_GAg_>>-o}O^Nz<~J)LSBTN{-l`d%@kbaQL9pOE!RmGdX1 zUYVJP8-DnnDbpD|QArySM$^S<@=@tT)b5tczfN= zRRK}C&t9IIlX|$%=E8!c>Jtag7vI{jUpVI5$-F)Z^NxdZmZr^Vx7!`gKHmRumg2&7 z(O0idPoKR#AUMD)Q<__G%g5uUtS@82n1$Wrhv zY7ui)cQ`4N$>uKHH zC#~6d>R!ek{3sW)=~{U2f!-o_d&cN!;dOg!U#i`>E5_HEU$-nO^IpRBJ8Q3T9lvmy zXGKA%^9rG9Cs{ea*6!>K+nai{+M=Q5TlVD1=kLhmx|y?^7tLgP^^@`KywcafFDIwJ zliq&G+}&(>*!QYmb9cP@cGh@0_g3e3R=W;u`RuhfbSZbFLlFClEv*_$WX{cr{qTOP zn@jJ?__|7kN*=DK;XKJZOL`03Pd|@&@%nOpNx^;H=sD|Kjf6C(wM<&I?8-U~4G*>u zPbM}8_BW-Uw~PEx`nzt{t@~ap|1&5R_QzGMe$TM~+%%i>j{CGGzyHs$IjtaDem%npm^qSevMf`WWB>M|cG2)Qk+e>wNdC6%&tq4nnKyWek(w@5uR z?K@L?%!;SaV|L$|ANf1z@Rc+@(Y+???@hCh%(vLJ>UzYA=#_Gd!lrBzWlEmXtmt*B z^s36g=JU2Y!&i+`qYceUvY2u)flth z*~Kxt3NJG>`pp)dcwN&q;iTHygMW^DswAvp+ttqUZ|2SD1IwO@-YM0x=b8JukNd0V z%`3cni(~$l7r)b@4r?I{e4TZvC(27tZa=>E>Nr7`J*??#}fk+ist2Jnp%CP2tzJi6K*F zc&v0cnecqel#cU>$Lv@BXW%$ko^niZk?ZU8$}6mkKE0@Yxh5vg<9M2g`OJHJ51IR( zne;hVlOxc}y~gvi<QnHZ`$26pDX#z-8rjIzquZ6CYb59 zXU=|M2Eo4;LhcpkkL_YV@SlNW=JU7f7(!(^R`nl|sTH!-*z@P*pWWR&ttRcaoF<>N z37BN<{^jSS8i(_ZR`0)+&X=T z6Bq7Ap9+Wy`Q+52p_y(|+<05a&+gU?het|}B}JQy>dwi0HZl72Vag6kFM&xLmd;(!2a}#gZld0=0+FB-`&>zGcVUZk2Ye8n>^zJ)Uy)_Gavu zH0f2)mQP^;t#f#OFw}4PesSZWUnN@egT3z<%YI++b)R(k^iq|=z@P>eewMx?}ejapS=V7BaRv{DjRCNnZs`<9+ z@K@dX+f$@1qT{%VOG0eU|45Q5sMB2Dbiw)T3W4ay3alIk$?xPMqgt6&_a3v``O>qZ zTT4)ZA^p(8wR;P8zUBH}w(s+%xcAees!Bo@J$9S6GjzimRoA5=hmI7+%RS-Y3tX$0 zv-nDikVfG1o!=Jd)+vQpNeDmpmz{p)?$Z4ct5@8fF4`JwGwoI8{7j#ai#q-JD=uB) zQuE|G<&!WsQRmUrx^Mqv`d@xstovie?lTv7Ry^PT)@Mo6R>fnpRAcsbYS}$GpLwQ# zf?x59BO9_+n!+WHzjyT$onmILKDY8e!>`%5)R)y}+?%b|ckzccccXZ8@yd9Cx56*= zT9yT|ER4CWu&w|b?ooPICtG9hQh`__g0_HJ+!jD zxzsE$V4cE}H9J=LcPKdi{Oft$)>G$B)-H$sD+%-0%1zRyI zj~+X&!zyHV#8dM4k=HNO9-QzxS#n0U#(?wNx= zx5nl9`wst#UiU*26?gG`5{vyccdwz#ji%+^W{0+aGkYA`ymGdA(a!%2O0ymu?ptdS z{*2Xy|5N|jd$pTN?mcepF}YQGe4hLIWoBPjt_#UX>??g4HEp43S#@nep}*p@OTkBu z8#4D5CH&jxXPtT4aYpmRCG}e>ioUG#IkI?b-O}~3!tYdC&g>N8RQR{7!20ol8CRO4 z*X=aFdL>zDVQ8k*hFw7$9?1r;T_ZF_Go|OF?MAia?YHVmS@-UGu1(%+Ov78&;M;`}-5-nX-< zhZ5xDZ|!`4#dhnCZ;RL7e)rx!cZVU@;+j^SuD`kL#>?Za##a8j^ka)m#C#72NpFv=HP^PZU|Pq=e?rE~fs<2V zvatH&ZOyZ8?~$`)<2ZkJd5zuUx3O;+`)mHJdb013fT&-{(k(tK?b$qL*zP`YX=fh$ zoe4tw?QgBQ_G(t6dh+8_Q?1{f_uZj$y*kxf@sxXHkjx~9C=Vr(!Uxa4eq6fRVyCmj z6~3aokL==1?o8pE6Z{*VcDu zH~l|2@=Eb-`Cc9IS0Tr&Pu=%Q zyZv(AZM(y+kvXfg9)_IJZ*@<*qa_d^q{hhpmEALWZtsh^%4cfY47WdbKPsLkWN`oH zE91!g@Y#Qlor+vm+2u8>-PL)u)7}*-q5>?b#YTn-{FejV4_uVFr}RROeg65mg5?Da zOhOzcxiz(`Uh}G^R=3`I)F2!p!@FdOLdzuKMy2{a8Py>@KHnL#j_Wko?OAbJWui#` zr773G8keqi-8b>R@71OamjpST8k!h8`hK2&z3k#iE9bS+9`-)J{)McF6*S;6s!IR1 zXt%uo_kUk@T!}Qhx^RiTU(@IJdp>0HF5l5Pzwl(^Bz51*eIIV@sNg$#(_g`yZFy*U z(UZom)O_D&f9JG?RX1Jz6YuHfz7Kb84?ccvAMmkZlDeQx=ZQCN93MOF$ zlUY;I;oH@f@7Ac5Y2H$lId;^4-m#|5LKYIo{vDed{b=!utG^$etoPkDvv=EW`-k>| z2UVZ!n`{*Ne1h_kM%xXg?>LWIBu86qnpRNvSwp2d>{G!{-8`2pqqVPtMf<};FPZFK zYHO-BKVyQ!13AkH>pmLF?`xMaJG*5|?rPb>=#QK;r!26(bg%6{gLD4v>$0moW*_Cv znja7=YikvwRj$&>eCH;U;}3&sw{@{y=bASb9IPq)`7boA%W0yB?~Tu^**BDjE<2cW z+dAq$gMZTPvZ;ZZr%Mmm|47qv3UTy4GQ)XNb*07eDZw8i)plfFOB2%gJyYVik5IkT zt`DAq${~6CB3HileYDhiXDDx|-Cb>$6-(O>dfit4Ty$W9|BU13T6))2t7Xm)e8PCp zWIlX&A zk560IY+E&7c3MTuqCDQ>lDB_PZa3#HnVzPoBRd|aZn)9NmFuQd$??(f^Sb@T zRXp~o>MpteEDnDESG3A)rgcop^XvIpTmQ=JU0!_GYdv4++p1$zq*7KsU8$cSlvF&k zFV6qvD)%GbCc9=Yy5}I0-%~fW?7>Stu4$IHXG(l~8UCs5(d{iSZcho{b!yhC1tpNpbKmPby&>h&&lhZkp>kH&XYEeO-9J~0e@XeQ zYnB)Hyn3-W?6z}gT#K#-=bzVd;l}UYIlf(J(R9mX-OTrWA@Qtjo>R0_0$u<8^-W#6 za(U=o@z>4kMAt21j@v0Pqr+(ZmA=cXs#LFjJo(>%8vP`rhT+n{tO^Rj{6EYTCR7NB8Qg2+WH=UFj9Sv*;d!$Mv9X z4?aaJ9eaECxc=_XAJ@r$^!;GUKD@McckN5&klT}1oi7UY zdyu)QUhFDUV85r}`y5NInCeM(cc;fi9WFLIUZ4Bd-S};+_=9sQ+phC0my69lW%u^g z-|kA;>*0l4?!?{|)l7(#Sx{o^>$We&MlNHaS0R^Y%f~1#ju{~bc7O114dN(0^{Zy* zv-L6V9cvn0x1HH*`L{A`ZN9(xvzssP9euU-+WE-5+tRJKPFcIY4AnV%=90Ycw_CYd zUjt5sEa2|)s(dDr@N(W%)~S6{UoiZsc@eT@lJl))-ley5{!J0x^XgZ4$m}AsbJy>d zS?^MbarM2r>)1RS2iC@yikXv7x@gS!kn9o}ai=Vy)Aae@9mh}4Z`GQ3+9S+y=ZC); za@P|>3o1WG`kXfknzVI(A^WE@2V`D*XC#N}pZ=G5{xa|B=*z3$ z>Ym!WyK|{`%H^v8lV4xhk+AJ|*5?BUn{V*Q&pLj@Y-OdiyKr8u`Elom4}I4KK2m13 z{BUpY`~M95{;SV*)@Hx`a=Xm$>7A{==jTOd+}ppgda0UQcf4M%^6S$10(bT<4t}jjt*m+JGGWOMn^_B2#eS6x@N$~A$nzbCfqy;w&gT14Wgpo3S=QAU z?mxF6``lR%=|^p0pDxutJg`7m%R`92UjFv>V_Ub+lruX1uIs{?b&iR5SCrq4o`1Uf zDyPtr1v-6^pMTZwf4yj}(z|WTE%Kt%S~8t;+{+edO^UJ!5$gNkUw7m9x%B>A}I(;*XTFu1g+vj9gOR5Ck%Fur8eX1>p^WLhn z$J3OB9JAIXO+pJ?trfj>oG9XyV3uIi9l|D!FPpNW~$ic#y-yl>F#Fl=)I-Y{#>)%_p$_Qtjvg-;@P@B+>dDblN1grJW4>s}t}eAdE#bh*sX6)h&WZ0NPqyF63GVt#33N%Imjxf!>!Ci~rDeY7*X>e)UQv*rBySxg(IERkS-{M;?Z?+iiN4 z_FU3t^JMYw^8Gx4W#9Y7S_?xSul~;vIhX6kx7|lK{b$%Cb9?trlg7hYtG0)%eReN6 zsBKELWB$CLNzM~z8t}Yi7kYf{XcWgf=j9G=lRrKGx^DV`I?+evAIqj)EN8sG{@M=F zOKYdr|7w3-_kLTh?CmYzu06W5V&~m?(#-{G*F7U&Y}n4tnx~rEF5}^y>B*qez*BDG zt-W&zf4=srudE&S?=-)RcT?T5xp~`#k`=KHI>j5F@b6z5n71(U-ko(9<4$DFnt#E2 zW&WfnZ8wf*lhQl_Rc$|hyPPL}{DG@D1Ji>CEAD8@mVI1cah|7GQMKXt`afKrcP_g6 z#~1Diuuj;%GvfWS?o#dil_oF$GZ=-vjM@J9U(ytj>c-oP&(_+W-5ALbdd%ZK`?-~8 zbFJ*0oBP_oEz%Bq^>yvbZAuGYb(zaF$dum@tEN$ z3B6jp>0*4?s_PlwT9++a`>N~7q>zgRt8Y!3_*Q|_t3CNw-7n1@IjXlbxA%NHuBGPs zX3qD`TYZCrFW#4meRy3<;88TM(G;Q9sZEnO`)@o`;{3Q^jhuGnnX1k5I_5T4J}IAG z?rsy>v`MM4cd2{v+OkEvUIw&Y-WqhMU8{6yO^T+W?4IWv<+GmN@{~O+!`kt-Z>RL- z@`^T5|B4{1$2ofqg9|lQuiI&3d%}C&gX8*H;rCc7-~O6YYR#wiDT?U{tNcrb`RlTl z9#PmoL6$vB>UO#Qhwa*$?@jb9{XQ}tyTj_0(s)$UVSdKsI|m!@I5x;8?(8v`>>;Q9 zM^tpyxo~5-{|u*J`sVI9cRy{}&6>q`%uZ=txw6RHLG3mnc>P! z6C94XE6V=F(gluq2cZ|gfT-GAp!DxRO0;k9qs#nQ#w zHoQ&NN}FL3k(&{qx|K&WsrKT3hRJK<*qd(X*!0gh|1jXrp{GY0a%wCCf@2-FZZBTG zWyfigt&^m)Rv1hX{oWby#?XHG_bYeo+h$xbh`Sv2Z{8Mtp-uN=B(79mTlXRF;ij-o7Yey_TSghg8U1e>% zW~p@F*RVaNo_>#47AVb?oy1=8e)9co$?lFHW#4R9Rd8^)KVg|cfp~=XwB&v(fg9C7 z=gocBy;U@8{nqN*r;0Z+v&%o6mgyH+maydH+sWT9bGJ`=^J?e2$EPM2TsnFx{XfID z`%zo3M!#Nk*X!2GH4E=dnfANMU1Z(TrlSsp1{MVYD$Enq_xWV4E=CS(vs~ZK=_GhDiqVGZ<=AGS5Djp2BRLF#o&U z_Z@#UwEcKgrtg|5r_>Vv^07#N%*vT%JH^EXw#^K7|5WrQ$p00`$41#N_S}!fiwt#) zmY-Vqs4VE1hs(j2-oJ(Ky52HBdu9q*am&-r|OZF+&+|i=#_cW_PMv$*BakEXLFPWFd*2z3{Ocv(tpLza5(%SDaWw*_u-Q6`J7Htl_9+J}~J~i5P z|H|*yp?9V|WL@vvmD{MGD*teucaO-56~7yT;9nEc6YC>? zH~O_}=>FB;r{~rN-Tj@NcYVLhZLzy&3tcPIZoTGI-8SW{Kt%X@#`%Fy4yUk+@|-H& zb!ANo_bv0g2N|AU7P;K{;ff1?esBCsw#{FpUvaPc&)^-t_Vm@biC@LuUV3}>ht%w( z`IT7A8(u9 z)nDBIPo?PUq}4gKsX5-)-tFHu%Plwj?acf{Q`46Zgg&jlQTt?R{wL0;u=4`@T(!I^ znv&)}xZYvd@znfTL0$6CtwF~RzbuhB^oqZ&{L8wBf9>jp>lfRvynS|i#+UD_M9pen zN4-qyI=S<0-mTTzb7U&F94MG}Ct`xd(&Ag{2D>gQb+Amfx9Oi7y5xbl*~6I|9@g+{ z?LKF7D`=j^BS$WuGt7M#Z~XAU#YRa~L1r98{JGfMZomVE8gN0t0%7!_tb;_rX|A?4eZZAaSrWUYL8O)4F`8tZ3; zOc$AUQKDe-jmo~CJ&&K6L?;>l{p4vUzc0(IAtB|8Z9&9x`?Oa(cQ@02DZCJ8biSvoQ;X%X4 zZ_>TRd}paWcNI@p4VU=zJm`W{v~TN5&6A!i%RK)x=;u60-d^NCmz%?2-?FDcu`6;| zp9f9JdbaG!!zRHalO`ys-<~nS|6kc<^^EA`b8Rj~$EMf^&bI$&`occs&Dt%GufC0r z*>P#1N~_YM8SE1rRUBJ%jSac%jaS`2a!2;r=Ct1Lj`QlLN2Pz*dpr8htJibF=YOnR zwoKb;{<=W*`2xQZh1A&hZ=ZiZ-q`m)LrB^c`T4)IUue2)e0(OYHE(*cC#Rm~@{T7> zH~%wCR@M@}mGbZ8^-E_`I|LRpeEG1>yD}*xdwbsd-<8)dM8@hW%lGi9+tfa2{kQzg z$xrUhH;(dmMxNTv6}V02w$0x?I|IsU9MYXrsvouLomqIQYWr<(UjL_@jzM!HEFXWI zc|PaW>Fl&m#vX=C9xt~l**;-$E4$^VW46DpXVkaZ2LzYBtUaA#wf$mj<|<8@J1h&g zHmy`Be{U$PFq7fse+Hg6i`XPouN+@yS($!n#eE5vg}Z$o?TXnVDl|XPx4~iw>0^L%>E`ZOgT_J%4o>^N7_FLh;A$_h~hk)>Xq!V?N=_tY<`ZnJUtd11P000VQK zeV^v+J+&$ea}OqM%kwtP-Spn;Xz0!-EguRQ<7PH0&99W-+c?Cr?98{`K@SEWw^w8wQ zO!-#DhnqJG&A6F){mS2&d(FMxYzXy^sDG8S7i(io@;gE_pWmfM5l_Dh|CF*O9>G*duCK2jf%vDd1DSNxaVI(sqMy!Z8{ z^;@@Hy}fSFouZjXFWYd1H9ZdS_O=L-F9@1cP}Jsqd$;67Rfj${ueU2wO7E@zX0BOq zccazi<3FE!&V87;M^kR9?DhPR3yO?RRP89QAO@BroCZO~ce0lh z^r@$pvZr%T4Za}z$tJV(I^*0a6Yk!({kJ85r9TDnPUV?&wkmS|@wfDfiBd{es1V{mXOQe1aZknX(uoDHo-9b8>Tb>{uub%n zt>J$L6VIG`;b$-Vzq*sbDy!|`#60D|LQ%sTD;QZFdgOnaK;V_EVPmD=F__mpZ!q@CGxLc|P;MG1#S> z#LSp#z}H_TQC)sJ#?_fWI;KD|t|L14)TH%XTuSNtcbqumR+=!o#Z|e?VB5lf@6S!} zRd#zEUr}*o_Vl$gixzITj?G)NFJL?G+U){~%BR9uJkI~h=FFMqux9mgn;AuNGtT;} zewp;2A&+^hYn*HBxeH&-ZU*1>JLk#MCKnZ(yuqtB$T7$z;mF^QYkcdaTtw@4R85ol zYxwYawC%yx(#GkcpEu~wzMZvqN9L<_@7{HFPc>RO(KG5}i%Xx!438Mep9lYzOz%|* zPn8N4T=iLD&Yz7gF?T&)NU{~539x&)H1!p8fvc^m(VXJ;W%5%knQbOFRVVb_uZ_$} znLEWZ=jha$m->PeIp3R2j;hWszH&>i>~-qxf}Q`E!y0a~YCQMaFo$X3yGM1;*-VjehS8Q5(DKntmA^K#`>M6-K)o1wU znm#laI>{*{>smSS#IGNhqmOeuy)!NNG<$tj*V5%0tIfq;xB5mnl&W4?>bO5+%8H*& zJ`P7J->+DhdbLwLXuFLJ|G&Li9?vH;?*Df`aN_)`**BIKl`8!$x&0;S+3u;bx~_-T znwWjhy8B0&_gCLu-Uq50UU!0~GIDyVG?e7N-E(dA3`T}ewS@xsy0#mq-B_b18nWZnN&O}Mi4rGO+;|SYG3n!PU9@A4kaRwC|5}rskG5TS zdujgc^is>6zbXTFl{0VWs&0O-x$)&93z2F8A;*))zn^%!R3%mS*!`l;&@8D++i>UDC_#!x8=hIToqsked zdtb-?UV7-kr!3L@72doJ%yG+?#R$}-8I-=xlbWbs(L8^~8q+hI8gH)G+BjF}*WCNH z9A$64vleZ;XI7i^v%WQC*8($tI{p>pq?REX2Df?q76U#w~D;uL(q!gcIp5e@@ zoGY4Y_B{UES#_7Jy7}+-+TAecQa3j;w~n5Fp(6H1smaobZn92I+$<~Baiw{(xd@00 zKRmPXk@Aj}Z?CBAHe+wRQ?sHt`KH*sTW-{NN#MNKm#&|XD z+b3tS2|tb-_WY}#xb#kX&xYxnWu_mVnj8D_or%uQ%5Cw&QE#{HkP2y2z3#*$GTq{S|p-6H8Y|f1PU1b>Y!} z2HBwbg}HpHYOyg7j`&wxjB)#W?m_nVqb0YkJC&Qg=9Q#e<_No6cHc8wJ4#hcQ$f>x ziqOJG><2%u+Ot;ZPTP!^-_L5cz0kR(@;Ykw{EWTvTfWQP-kN(`R_V@{RUv;JgV$|f zsy(~mp&Fyzi_V!Rm`u}uRQDZ8Wnr6qhH-xF>lrnrSt&(_XMI}rc>A~3>`;@iuBa_{ zw6460iU+aGVJ5?_n~s4hfkF+N!szj`ghmH+j;Fp zk=teG&Ca+g3|u`Yq*hx@2kgC`K~($cz9^g-?CQLtfu@opgWS(&9Pwme(B~ z3NKW3lQFPPmOS2Dzj(g)JLzfLm(^OVznA}O_tw+A+|xyQ1Vb_}#IP_qD!K-_Prk42 zRIu0S`WEYn+w7lLJuc^4UvS9n80Vpj*6XTcSd1nGg$gqo@K3(I<6ZrI*TY5fnaQ`> zS(H;<-L8I`^gtszE3Bf4d*USb6DR*3+g@F?X{N}vXR*;za+O-8F|{shlUSCd8b1H_ zw~oIi!+?8VyC_3zV6W3vRnOqA46*b(OU^mWpMT@~@5~tu4_IzB?b+%Q>%7*{MuB1L zN>zuI2JeR=!mk4GIPeb_`Le)lrH4Em|LiAP95h|ML<{tvIv zBhTAGLWa?63QcBaT`AZ$EhfT?>2l{AHG56wlal@0wr%scTP!x=@vTKu1CIq~3Evg} z9d~W>_51nxY`R>co|{>WwO#45+QGT4hr&W8D7x_Uy%aK*GO}*pJTVC2c6s}Y*;{W%PHLN|!W6<8_;ixzwkucdvhE7{4dYVzbqC#ZNh}pbDX?S zGpsn}U-+p0&(A9#RRrcNI95}6CF|~d-B4@){|swNO+vXG7p%z4*&!=;Jz8POGCqq5 z9+M4Bx9nY#uxjocnP-b+0yvEhd=`BsUG?Vmw5xY_PtV<1_(yq)IoB#D3mdMz=F=J$ zzYRIzVXQ8=ezL{X4cYBRdOvNW|1*4Dzv#i^lQ-2DEcBk+J3D*5($cA6Ruh~i%zyfc zMfmo?j(*vHrg!pwtl>H&;XV0b@88dRi&&>vOFWeB+p{8b`>J_mPwqNDohlG^;!qmH zp9bOY3XDB}<_f&H<$L5u@AT8f4=0q#Yb77bTYUa|c$n1$gKV!i%U{ms41YK|V9UIq z8BrTl)#@1D_WfsAwz%v4HrW|dp8kA1efsjP5xR>alizwb_dJ%keQT}xhi|Wrnw@>U z)OOLT4MEZRy8nbPF*<5i-&s;5ZMD&qqKHgXjwik{_}B_yYyDii?`of zJl(P0-OsF@S9#wer?4q&8XAOWSXwd*B-HRXhOaAC9C z&P(SffBmbmvFMpjt)XY_RDZ9k?knaYCqtd~bF}R6cozL=k-`KXj<>f}PN=(`)Go`E znpg9)E<{CW)hovG<)*&1y4>=e>sBSroUs09H-kzFrV0l8cS^r1_s4?v+|o z@+z06p4+4Q+o-5|`I%s&8H$S}*H!B31YZtaK--iJ^Mdx&vwNh+%NLC zElr#u)0Xqk>gK)Szp{7Ux?UQ3?P8>R&JxD+zCLU9t5ya0tY}~`RB`F(w0brzXm5ju ze&_|BPs}QBuXMe>_>n`eAW3=e!+o}e-u^$NR*IN&>ArN$xD#EqZf@D{($H)^ab2$| z-dq02$jE;C$Q=;SIpw)u(&GyU1t;B8^K{p3y!z$9pO+uDbPDL4Ic3v!{NB>4(hIux zcYjx1zw~YR-h$hrUp@Xa@FcCjbJ}l3!&EZ|8;&WT7$liE8WqBSOzyhJ+G%E&!uZL4 zYIbdA!nJ4LnYk*PDyxcD-@RP_(0cc@w{Ls@7AI##Pq{Ta`Hy|`x;+oo?=2OUVPSP@ zP?7j~_0#!>ovPaQBD3S>c`bNSZ}T~3@gw7?^LsW_$qzvmfEdMMVU*jEK4T-b&Qyw!ECku_MbqVAdOI!ACI&4&raFV9GN56 zyx{wizq*^=rsZ6+jrq^8V7UW#q3TxtG6yxL<;UNzzR#2So=ug3-*~I`qzjA6zsVNv ze|&1`(Mh|e_1=1I6k5KBF=bo$(HuP$=I1I0j~r{6-BCNs*W8)=-vvWg$ApEY3Fd1V zZ@zdx{nV``ccs>C4OpO=XSD2Rkst#{r|p9I!mE9<8H0`H_Up_MetT`=$t0ECN0vMB z?b$D7mUY_O>(Gj~>n_hS;8)qvv(R0I$za~y&%eH372Gl9;~nm_D>a?5pZ$2761ElD z?9Xcd&mg>7bM2PcXy0h9DI3~i9z}b#7pd==bHu$jQtV!A?GlF7>ZU5f``gUyDi`KH z54z=W{CPl+X-uT)nY*vo&g)*txQ<~8BU3^%%YTM7S$gWFI#1K)I_vwq{k&#*N}q=* zYxUAqJM8qD)~?WMSae5Mi$#q=b}GyHoLA=${Fr@Aidkw+tj){PyHkohr%2S#SirUG zck7BpeY1PBn3sA*&7Nv`{GcTRvs_Z~0_CQgSGFEx`OmQY`}075xe%}B{Z~rXh;tv- z)!e@B+|p%TK~t(7y>sRMZtS~YXZNe=()+;FZBp(_O}aixmp=Vbx5azs+v?EYs#{MB zR?hmilWV!)w0kze2c3$~Oj>EozJbYPp7+VHgeAF;U3MK2FFjY&eyt@#e&?3IwxObu ziz>{2R88HsRMDT6$zsw}rUed0la&mmbbrirIC07BU)ttOJx4A!cYP_Y-8r%+_q<*= zHEizwioLr|^M!^;xruD_yPOoZlx32W&}^!)WrhdFYm@Nsyr?OCBQr)iO=|B-n=MQ`pCp7iYD(sRcTOD+$K@e^rw@-Qu3 zb^6NhtDV0OWvvb6V_EBX{K5*mM@}sh73ST1FK7Mo`r$ifk7PZ5Kb~l7o9QpMSMX)R zfxbo4woZ?lefZ$+Th@I-@Aeg>FUnqwsRLHzjmL-yF21_ zd}rPqt5-ro%VU^4&P;jaA^-XpuhOF!&tNV$?L{BXv9&3rN#qM}75UF_xSU-yU|E*r zquX{Xzp{uXGcQR5Jtt;fDK7Xrvr#=O*7kto zEyh!eEL9wmf83`1tX6#)59iTU$4E=AGlb?9y{vH#10j`P$k?X(AgK zEIxl=ueea;tr_bs@!{N;8y`OBS)ZNyuk^Xw6zj)DZ{0F)F5Z1*n&!1oUrt6}jhRnc z4`>=ss4HF|bkA~zP(yW4+UDLF0&@?4{jzxO%j>qcuU1bjyt7^{)FsQIBIbdM(=+zm{YQK7|+`~UKSQ1_y@3+^weydOaL)w+!j`=1bFQ=Xo z@bG*ll2)KpU3RtV)77POY@F>Jia#B-{_Vo(pM5v=>2v?rwth#KZM$UdUnY90__DrD z+rpA}k_?=kKCAcDWUW7DQF~D8sW1Py>oZ~Y?b6{@gW zWr@GhB#X5U&t>ZPcRY3~;8dNJ)AvBOlKGw1)+1+x*Jspyx+;FhO!nZmyt30b7O!5q zWQW1&mHrD_)fTFph|W4Mpt4%HbH??=hvA!IoW4EvHu_}o^w+bMX~EMB-_BR`lbd(x zTAkIw>Sf!n-hOvwD#!Z8doHWR{_&Fe8X$09LG+zhLf?Oe={p&&R$P6bYRp^Zamx zdeW7Bqo&HiCx(J+bz_+%RT3hDVN{s(BydHN~}lF!!06P9Bnq+J>p|O-t%q#bytsB{i$+CLd?yc zGT46(*y5-dg=UB;)Gt?`2nR=36b!2o??3TIQU< z9@Nry+wpVjni)Q4$|g3wU6**wXYRubxn10yQLofkZ7-h*VqR?PysM;b-Q&x1zn?8T z#np7wG%E1KhE}bh3)tgh$$d6l|cM)b<-N89F1a%yTcaL+uSDNuO( z`!%6At1GIWS220lOl+KFSHHBd{?)9#)!ys(dTurMFYvWmEf{iX<*NxTx_Tz9=Y>8g zoa#8xEUZ@g`To(lub=DJDA~U}cx!h(OTxEPXM28M^}Bav?X(rsoaAhF>~NgQ>i*^X z@nqQtGTH0m3R}B_4zV?!-Q4^`D65n8>}K~{8xLPHv*!&7xaqL^k5f!^|yuItxNAr^sA^|vO(bR`^v{t-QRInfA>nixh(Oh9?Mh> zMj>mLqBr~%=WFeLlyPti<{c1uCVk}dQSHZmA&DZ@0lq7LKRa-zto!Y@ttHc~v|8+J zyEGZOzbo%yz9V;!=hb(9`5mqr>sog)?6j+`(OP~u)@7HRk>{u5*Y;+9v962WHTyE} z)}1e2WxV3P(zEi&lu4EiT$gMnSf4+(>D6Ro8~wirFT8b#kCs@HuzJJiWk!=0tW@Su zSWz^|UrzZQOWD6$vQJZcEEzjLtyp}x{&CcW6{*vuybeV&tW;=amb7jEK7Vq5&(Wf% z?HM;DUe3R@wplSCe2%hL?Y5DQ*UfH(PzT60@ zs)_s0O>ukReP?NYkh$o&=X)}~9nP$LwQJXuOBW6NKb`8B^zdyV%dUCMkzHO58R^WDEIo8*Vxd@s^}=j0QiN8DN-3=>+`vP?=+tDehccXrK$ zTQ!^#_MI!9#B#1Ys8S+*;Xi}Vug!aPs?U5`y~@Pgt6a|UKLg*&RjkSGkrRHpBj%A5m`XwwnQ_5B5sc)EJDEW7@ z>)Xdd%{`eX`Tgmv6{)JZ z$Nn?;PtAzmrgrO2S=1}(r@N-TbbF+lSvW5<$ijU`eIS#E;G>#X(Vkm0xr*n@pUL+* zkj5D4sC`8yT)ArbADh{C_B!skl>2vUMDPBH{|sjHm#S{`I%lb3wergWoBhibmhc_O z?u(DS|DWNi^O|J$n0m8ybF)9HcxLSS_v7jQ$B(_^R{v+>fm_x_P@1GiLVvkl%4}(`n`A+(N!&na`K6^q=bQ z?uw2rcdvAxry-wxQ@nn2{Slt+J7YKOd6~Lud+sWclupJ?k30qM2H!Z*ytF@4Ca*L_ z;ir7njKUqkzq8Cgo35_BwR+37Eq9;w|Ey{H+17Pw-X+J+%8z#(Z(gKlr_h(NTf>Oq zmAQpoU1>|8eyvqqeXmxweCq4Dhq~r!EnS_%I%Vmn$D$m^Mfn^A*oz-OGRb{rC@m|5?`Dx|p>X5A=nW2Z~=3FUIJW?DqtKgup!czsIlw`?Ew`X_9 zCaK*Fo^bX2%b?n}WVt{N_UDdM<~J|fwQSb!B=gyfzF=MFBbZ(v|I4>1?PlTOSu<9#zOA*!zC;> zR`9v`2lvQNReZF^%{J@Qj-z~+u3U-A(0(n{RqV7>q@_b;9ec^QMnUB>FRC|ZU06~( zP3C!Rbe*W?-ib^2m)reVGdu0aUEbo-WixO6vOjFc@YPvMRNIrYV1|(p|I>f#H!?f? zXUMGIa(cyOkGwf%PK|+AZ*MN-QqP_CdTp%L6p^NFbAp_12Ygozs6DAHP*A-1;S?2z zO)t4Gd|g(y+4jJd)q8c!Wmj!4v3Hn#wrD?t%(DgJKLZRI?d7T;mApIMzjU{;yTr*) zD^h-_+?g8P(_r~^QB`-Y^|p(dn{Q{W%x?DJj1sd@ah$Z$?&TAXzUNjxcd|TXP8;XW zbL!mpBTc(RTS9>^IzV@M+LirLq48d^xr=J|-3V%US9nA6Kf}jm>yJ$h5qTZ@n~z=O zlV&QjzEANCEkxpz3=YN$={&!3Ltf)m&3gRb^#W z`*yZVE?MU7tGMxq#ZsM>F_oHLK~q9JRbHN5VepjcY1Mn@wZAehgcpBV@H%(u>t?M= zovO~DGdin`jBnLn-)F*m^hNlEJ%6-Iwz{)aT;QSUDKJH-7uP z#$LE}Iv3w6#gs<37Ux-~-tKCh<^G>R>b1$K3?{J&oLqLvJKrhQnTo`CxhV!ZvY*B(VDNOdWXF>=PKav%92u4ba4~mZ&IoFo$-6=+($=}nNM8WQgK-1 zKD%szJ@1S4ABtR~q*A9#D<)WQaQX0OO=6gkkbdUG$0av2uA~;()VExg*iobAce%h{ zaqhm7-*Ig~9hyQ# zQx5bU4=sDdkd-Rwc=^l9+&r~eNjDDt%JryQ^h@WK#>(5xWs3^7v{wqgEK+m!IMkFN z;4mauNV3suD%q#y!L)ztibK}xxZ%rHVJpr zUZ;3vXTWK(XVYY(-!-@x&C`%c@0#LN{owMCC2zhrTNcje>D`mLUU+wOwUcP&lj5V* zd%PwsYL!_tKg(&Un~TW5iR=$*w^sJ}E-*~xvSN&}^=eg^Gk>o0j!jc6A~dyw92nH< zzpeN-e}BDHgHN}?6od2aR#%r^GBaHsVEKv5!Qs&44=x_Yw|{4B*(vmH9hw)eN5&h5X=Mk@;Yn+o_@@?;d=~o$GaLx>UQ+EUQ!MZ(RXyC?efPvW&bneO6_r7 ze9%Y0W=~Ca#lw^{d9Fp1U#I76`OgricXer)m}p?$;!>}bTbC{Mo9}1IDl$*`woJ-F z&&evUC)I@JZHn8yN44`62lLaem-l%Kr5n?Bg*?8sdh`1)i{4+heO1m{_jJow%_YH> z+b{Zhv}f2ReE)oVP59QLZB7%Ws!!2bL&_uj1xyGtJ%9#4Cc{B-B{RXexO&)H_% z8shaT#rSB>uG4;In_8OD`26VLzC^0*kd zZq~ewIaQ_J>`ej_k7_guRUe-?=O~3wHo!Pxxx}0VA82+5Q zdgcB@r`MkHdUsc8uhl0liIOYP6MlZz6_~Qp?%k7We|_6O2Uf-0KKX6Wammm+mzDQl zM(ld{;k1-j`1R?J&+YaP*R`8&>#}sK6XOGR{g=YeRVFJxj@Y4OexaUgZCUnuha11R z`lCX&-Q_cX#pEtp_+8QV@$`(7?-#ybQg`Ode}>1=n>N(4JmHePBj%m=czb9-Oy;Dg z#tW8wvgva4@d}kw=?lJbe#VszvF>xTZtZDfKW}QDaKQjUn?YMW-3eXsuyo-59Z%7 zi_SfD<3&~Pm8M&q-<#@}MjALCZ=8SG;cTX+?6e-1CuS_aq&Bqm$%s6C8u)wj^56AU zt4;?;v~G`g4!fEw>zpQ=dqiO4J4wzXf*);<^oq(pF^XndEcbIwpR7b-VUN+L&x^%w za;I*oS`p;M)ih0nN5G@WroZ37WZI((N4)3cUnySnaAwSg^HB$~u1pW~jC-7@EjKC5 zN7c=!$v-vu>(5KOtDRM5O!i-D!nzsnvvko0=n zDaNX~2`er&)!z8G?(*-l-s1SKH);<*iOrL{#QbkU#P2UZ!y|uG_Z%;laJ)02NPVr! z+imUTau1f=lQp^@?rqdGTkqoKoh{#4xgF*eRnOnH^FIUs!*an`?tPNVI=TN;of1wncYit*>o+#T`=8;rTCq5re8;2g|vT>Lagb9Q>FReP`1o z!z*I9Pg^{H8hh^ggXrveG7Eauk00A@ySdz!?bWSoz2{^7-mctr%WSo`zeliU5aXjg zjK>w8q{qr{>U+j_C$acZy<$w;yDKWom`XEOvfkGH>Jb`hu`G&TB06$Pxs2$Cqy%XU0y1)+(x|Msq&@!uO!%?27kDHdv>Mh*J;duJkJ)r zUn75*z2+vj_RFtY=FwcNtUL=vUVHV~9?WiSMNbKdzS@hfUqhWxdyci&$8r8C0uv5VMsyMvCtljV}1tNgfZ zz55AYZ*k6pzUuT3XD$05Nt^e_ZfU8U<&!7ZlRtOei}iP;?Tg=B<6;W_@yb0q_ehV% zq022Nl&4RY-E!`I*8&GAW1;OzZeMhF-S)W|7W__YTb^m)tcxx!HW`!FpJ{0=I%w1Q zCMiYm^RgIAuc(<1)qLuw+f+41-&y1H%OuCD_hs;oh^z@AyY%`lYifP_mHj}3gGnL5 z$44e}>fVjhu17j9`Sa4VD>g;xdsV;q^B3o8F`Qw{t9a>FJ?m5X$DQw`zrHNpz3y?)-_+1zkr%oC zcKf`ex`L)G5Y=`0=;hEzV>3ZyrtEaRyq6;=m zc&@tEVWnH~Be(SZ`ExCd_C&mOPoFP;UVg6r#_e+#+0HnpQFl9aP3=RM-)q>UgLY`9 z2q-`37iQyI@}FUaTUFMtIujr76)$W1=jP|tP0w4qH`3MN?e6~!QNN@rw=B%CTNPrd z(!|HfB5}m8&Xk}3N1J4)_69k(#(5W)+%lZSxnhgjuk?Sk_uf(~2kqS82KN`$ z6;_))F1|c1dzPo7IGgd@;|XiG>#SQg{i8mY+5}-my)~l3eGgLPWBEbx)!A{?;r@9{6ShBV6m+x8+ zhp0=I<~@(3^#(eRA^7b}(>1 zUnzLsUh2}Tt518ktUSy&Lxapq?=6VY?EuUxpt9Uj4Vc8}7Z>OJ22x#X)&vF%ug+` z40Mgy6|_XXLq&+|f|rA)g2a&-AD4alsFClq&y9a-)Z4XU?1s;SW|`cYow_w-YjoIO z?`0tyK4@~WGaJsSDWAORxa6ZEy`+|8nX?|E0Tb9ZKkf^;ymD(-IP0ZJGiNPt5{VXN z*s;WedB*%#67N@7gf?-6Ro-U!a`4Zpjg$SO!VTs?nT zer@r|4rY&Eb)}pkFCINCiq~X$P&nCArDOj7%;UMqTewPIviwT;bG9-!a=NXS?xCgP zT)}Vq%al7UexGxZwdlNYyzfl&e}=UQH>SQ0$X}y*wBOg!k=5nElgZ5ce>4C4b&oq$ zr}~IIPx4FW6R*EZywj*#YQPx&@XCE7OIBu1^R3$(RJSe23*RDps$*x&{H*~Y*Q@wLbz3wKoURD^AS_@#`I&v7 z?3QchF1dLn{Yr(O|7KKow{6|dRe3f4?(bjSE6ozOzBS`L>s`N!=aVVVulk2aG_`^* zZSaV?B;*t_aq@gA%Um%B??i?V@@FoYJ>oXJw0r*ZUL&)mb&tJ5&iX#Pyi+%4t7jyx;2TELJuJlYW^V`K*KGK?dea%3i;=`;+#$a_i-&>)U7N zUR}Cm*+jivzijj;T@hf~<8u6bZZ!MhTe)ouWaRdo`0`=t^-Wey=Zt-P`y=;#H?@Ab z`)cUUoE0hRR|9!=h$uANbP#a)Wc;n~XJ~QOL@zNJp5i-`YF7)cm{=w%e_1P9?&d4i zL%r4ftINE%X6ISVUV3cj>E3+{SDxhiwVJ2Nv1J9%#Ks%P%N9K{c-5T$e63@i0{=}9 ziJz0h?oRvbdbwq)C5!en&cJt-k_KtTH;x~h;x28stg!iV?ca_5c{BimvIeg^wO+g(UMFoDPlc*DHBOvA<5*Ml?2{rw2CL6AtKVOKY4xWwALVZR zXDH-X{jefx%gS3-ORs2O$W3%v>FSzf_|C{u?uVq{-~JzElKJ@!)2AC$rr%mRDN?C* z?$n~1vR~J(@`majnm4cW%i5GJOHNs<921#r>Df5H|8i`awd3t?*LkAlA79QF^qBT? zo$kjCU;bv+mv31&@xtq@+EdFpy<$8ynfssm%Y4`T{OxzPaUpv-U&Dsn2ATGkP9NS$ z3uy{}{m)SLpF!)|%k9D0yZo>CZr``UA^C%;%sf6X@3k5tYc-i|7H{bJsKnB%^SC&s zY)R}&!#>B;OtuEDCY!hSFo`^=>c6x%?aSfX=&IQp*X-^)ACi@+b<=r;LMxLPoA84t z6YqZ)URZbG)XWB5iO$_Xdo7CpmY2YaLNIBr{h9ZtI=YRhhPHoOo zZF$nuwZUxG^LJvuvaYXKXQFR0`ITY79bK;TTMinrpRjGXB+%6tWb(dLi9OdI z32|_G-O*Na?E9f?zot@3?^G(gGw&)>ht7!+t6xa&3cOa-C( zB6;Sg!ijTiD`bMW_lfNK9c-dJi=XSIjM~3@AKS}a=5oBM*II8n*KEPvaOt+cFLQ5g zx>*=?Y1ZkeD-mC0xda?onCuPbtIYe%JR!bDY2=cg0>kspmv6tfcFJ__SXbTDjbBtVqP@7h8ktZMoFS`cLQTW~?sf|8edU$AL)e#WVgh z%!+z@f6?8?t~rZtUC-Mb`|4NOVU^iRkp+TXGkwAYA3^3@3NEjYKb-VHVain5 zP!GAEYBSW22ArI3nA#uP|1|vBMQ88itru>tJnQsgB)t^2d4P3vT`&7geey^FCj{E&5_baO3)SRehXJ7Yg>LinQE8mseTI|d6dHs_e zAr~Ip_R?K)sNz_)$LEQb=QBR9nD;^ay~2U#8y8EZ%HBHgRw>SlDd3ly=Od0eb&0q4 z$iH7O>y~+0iUsfKd9^EUl#Pl$FP`XO>iI@QM{8OE@_VYL`4e!hb!;)%N2QUf1gfv@Yw^*UcaO$=>h3OK z3cBQ@xG639^NMfppFEC{{x$z&zR0{k#&b30bvMXDk<~?~a z_0Y0K*ORL&a=+hjUa@1{T(6wfm$`xyWDeB`vFA&jXMYr~+OV@W{kW(6+^Fl_u12q< zlJdD&UyA={IB>mh{yi=8SpoV}R(}cfn)v5?rTd>P6I3NW&i{6;PI2r0%1IIa#kubF z*ILcDdv})VO+Hf*YV=CA_3z&L9U&37*XU|Ab){eA+tMN9bUgFGkL%t)syjGN%{cet z#;31q)gOI4QNX}uw?%gQ_R>u^ixyND8y2rf)o0!BBl=>;FP0NumE3F&{x!MWQLs}l z#6CkpuEO&8iZwS2RTNu%m;AVP=zVq3qGj{eu69)Q*L3g`nECC=zkH#(U;e7Cr+9g8 z#Cz4|T5UJGQnSHc=HlZOTTOOJb=#^QdhGUEvoNzzantmqkSP<2It4hJ(yCv|Oqz6V z*T=9QmklR51U~ebVC=;Bulon8swJY|2UzbkoVJ(qh)@2h?tAzusjk4Jq?bBm7t zh}s_PcSxj2#-_V-rqKMC_WAqodqsDatqXO_I-75vQX)4mey-Q`n;Q4@L(HC~HyJ+m(N#+ggro4J%Rgf< z@>OT)H?di>+~X39ZZ&E=^YT~GVB)yiS3RxOM9JQ?I5A>^KMUE#<4hrc^H%%mG1e&4=& z`Jw9vJw4fMG{XKf)U1ES_i|^9blT&P8}mIK^g?dA|G9epsr)S#zn%>ncWiCoVJe*5 zI47xp=OZ`EFV9&n3r)|uZ?-lt>%Et7+%2K&ix%tl(FEv^zsk^fMRnz7F3b1$kv?yJVyl;g#7jZ4#x@EEaA;Luz94}DkFa(}cUo-=sL(h9Gp{cE?**Ws7&GXzw+g&A}0nyLGGmX_hA2jW-g`&$Rteck9f$v|5GtpMG7L zc*5|msJ0XLy9KJcJN6a6Y+`cgQ{ER;U&Ht-``VjR_FKvp`rgU8!Ca-f>?`wHvsc~v zq6>;WlsJS1Z#=0>e(>$erW2Pm^tbWdt}7|5Y|6cR;(1K>KZWR3*Rx)K*dBT}@7wpN zjRE{#bH86|X%bt*GDG(3=j+Gk3q-05mCk;jd*yoC5B|pXho{c$>ht@0_M*@8f6G>$ z3cnH)B^|)=jZyM@&zzrfoIkI}%)487`+f62EpfTu&tJye@;v3H`zvf|1n;Hg&0pRz zg{;%GpQ<2tp?1lHI+n=|%E1xQ?&7t4bACwOdUa7sM&C4hAX?FkE0PRk3&e0rq!ul4s`?T=Tl?wi%t=o{IYqoUF(P{dPyUeR7b z*7?Y~rVUN%Ji_l8H_A!aZHZ?QvT0S%`|u>wdFQ0ZrCf#6VnWSD z`Mcby&`u>yd5e7cyqjmEb9dMX`pA5?waMOKe^~le{2#NIu^0EAE?6C`n0ni4nuFFG z@7#%kPa2=M*Zo>+dqj3)=ZuyY_e|7}$e7A}o2dW!`r7i)Z?l3#UVC}3)#KxuDtAL= z;?LxhzmuOwZrRql%u-!W@@Tm34H@R|t!^z#UAf~$KKd(6S*fYp@$peH3$L*9vHm6N zCO)1sKkM#t*#-LhTeEb%Gqybl4DJ+~6s%-;9_3*q@e5%R( z>bKY1%KjdDow`-;eQ_?A?xvpBBgwZX_sqMy*h=Z%jTpJxVbX`%r$zjBRI>~y_y5&i z^J?FfJ%M2t?v%~FyDns%Ri{(XLr+!@t`qjlj7nZ!lUBcAE|PR?yTH>j)76*Gc3#n) zTCx5=gWTOydn066HeJ-yX`RFn7<7J1@qTT7OKN^|jx=t-M#zx@*}ars)#G^Dq4U z8aufq`pEM)s{;E2*k8N*A6d48Vcrz&{lDV3?3$ReW?PTgRaDKsi!TF;4%@w-O`FMHyoT5WC)(JUHH(=w>-~Pr#V!Qg@HT+sLpQYv`XPujNyL5UW-{M(bJ6Rn4>Q8?% zvd_(xnQ)?7Okut}*USiw)1@n}XKKn7)?8X7CK4h#n%BbK zc>()=wg1{~ucdV;l*{w?z4>#CUOh}+vOM^AGsn2e)kXfx zz0|uk)bi3!^=0)l7TkKfFjw_>3{Mr8f=KhvGfjrKjg9ZiZ(H7V&0?|9Ek55h z!VmFYXXOq(nGLe6k^i=Q4NuD3H^)3L@7do&J8lb27Y=LLwq##GCEM=LJ`=eP+UFK} zzf6BqZBe{Hu2Oz&si&Q!v20fMxnn(A9nHfX9yW8-LK=iD4((<< zYWRIp;bOrDm0SNFsN|a(v}KddaS!He`(8_wP0q{{({^3Ab9VcpxZtx(pC@uXX=vwA zl~i&{m~oCJ`+k1t^v%5n^;aypb{_xmM`*<)pI32nH9dS6u96cudsS(z*0ZLB1?qP^ zIGRFLmK!tp%~><)gvp7rl@{%s)6|4M2fCHzzPntbyEP}UD?HLUv+!8uj#HE8XV~s^ z7xFMNWZpN`o~7mKu3z&n%~qZ?>r?Tck4JOj-R_0GjFP-NMLf!NaaY!b(`zq>>UvJP z=hE>erJVV|nLk$S0u~dJ{k7J7`fS*zvbZT^or~tdk9K_S-;@Jl&uUJv`kpcS^wS7KiIWShpU&Df z^<3HYJGqK3(<=Nsx&HFFzp#0A{@@csvz5n|e~>x8HZWh0OK(ki6_?QV=#a(;HC zxO7FmexnEH@roWvBcBufR{2NrrxmZfU3h81!xw9MF7Y1MN_e|k*GlA3_my>bgpX+I zum0H`B2mbEfW^~@KUZVk$JqLvQ)D^pdo3ScFL)t!f~ThPcW~QDpW9QztxlVUminq{ zb7kK5yl{NgR)tR<1`CB4dnU2p_YjM&P~(gDw>^2%R>RNW#Kw7Nzu%s3SCYLcGX9TU zcHYJy*mklX4J@l{e(n9MChZr!tVz?J`rfz`9VBot^~YE5ITJ29 zOj5L*z&GQ*N0YOiQBB|cWq($d%yE)Cxkc{b^XG9N_sx4Ro3#5&{Ico%E4WkpE?!-( z>$|&h`Qp?JFD*VVvF5kV8(C{?=bU`3&}*=5hgRb|_S*gYhhN9Ice)CD9OB+cUp#A-Ni18@wOwe-p+kK*69B z*2}ia1Z#SF22E_4%D^)57RT{_0yQrhVz*9_SQ!0g@q5dt%c0J3vpbv*E)Y^SJgLT> zP(M<^+x;a!eq1f?I&RV)@ce~+)W2JMg_bT~yKG0E`Aa=tqm4hR ztirr9Q`MNRc}^FZvEN?%?b>eaHpO-#oUvFKdrxP5nQ>qh` zce_Aq!Rlz6uFqVFcN&(3IIzms1u8gabMM}1w0Yuut4al}na{RbZ!Y&$dv*KU?NgE~ zmgGJ=b>-4j)pn*GYq$CoJz5Yvzl_m}>0797M+0xr@p8$Q=hORbsssOWzm>CJRkG(Lc5B@Pz)} zs%ua0JbS>Odwy?P^N z%=|Im-tyQs@7Welj54my{dT9eY^B0x{w+l*6>tADm@utAy=B_h+xIf_0#=Ai&y4N} zR9Nb%DbtjeQu{otj{V5BM+%de_{!e#uVh+PFI@2Ap`(ufy^jq4O0~CNJA2T5cFJW{ zE)7l_=Jqef%KP&khm}=G#x8Z@komar>ykZIcYAgOG1#2@xx)5h!dfqZ#~sU^Q&@Ff zQykQ7e|O5+zhAQUh;pwO+k11{oyi9tSG~B_ByvnxF6E!)Q%5oFueTH9-rin!HR!f^ zl~>#R%uMAw3pT9$&LjEp+rcIitGKe&okeReWZeF+@_j{1K6}Os4*N$Ox6kbju2raY zx@q#bQf)lG_isrvRO!F(eAC?`;lUHRnEh6MNv;RIF09V)(DcYn5t??KPle;i z_nx1!i=*PFO#R|_EBZv*lD3MATMf=n)h&Pbb5qH`W2>*e6cy!_$lW$0XYKVVOBnfTiHeATwtZ@V8?pSbpRdfc<8Msp$+ zS(p#%PIVNre#Eu>xo^eN<2=9hI&KSneK)DDbm`n9J#Sa5Uz%$^JMTm8e}<2|yK8be z6unGLMIM%%YElwoh^juWXm95euqZQN73&r0mCrQqgzIpew`Y#JR{KM=&~bc3 zRd++B@$c@Jxn~5XUE{MYllPh>Ca}(5IX+iJGiY6Qj*`(z&m&9HcfS8_zis*+Ls>)1 zck|M1{`QJ(SMuLu7eQ zvcE0lvzzD7$OO5@I}8MNw2Xg0GCB59PWAnIyFDu^)zov+ zwl?Z+d#=>7Fz!(jSNp5NkH6esuXwB6@yzwke5si-tS3#EefTnMrT?-OoZ*b`p72ck z_HWJOFIIAqTlS|dPkyHD7T^}PSUyPPR#U}}84pi!N>1jqxBSnrtb5Cwg~gL6Ki~g0 zP$zr2?zx?(zXo2M`c7!=+0134p_=x;x{4-ocPa>}+8$Bk{2_1El`-?&GO5`Ia@?QB zZJp(!E+2nC>&dl$i&iaLTiDfAajpSedG_$L*4PD+uS+st72e4g`_ zi_<=r?>_8lo3*M`eNDA=!nLs5ukWlYymrgG+bi0;C`Dkh8h-`*`FEy!mwwT?+rfS% z&u*S+`NkiiPdiRrTETwg$=~ehv61(u%dLJ_o@cJL>Q;5oRk2sDJw^?QnvIP;la2gu znP&GobhHFHKJ)rF`|^)PzYUJ0I(`%>-uSEkinZgFs->D8QJhNNNq3GuUaIIVL%SaDe#*B473&p)Rm-x;v& zRbH|&Gr&_$q(tbpuW-Aho#a0* z=ym5=v3{|#sM4KBVLnM)Uh7}CdV1~hR);Ru#gk$jEnf$%2j#0OSkOLs{EK`~^{p8aTdo<+vf9SnyE1pBzRss}o)eGM{IuUTt>9+plj355+mmzZ zoaWs-w}$QX_H+AnPCJ`suHBQT^)1Kb^&jhXsw}2wEhRhNsGl!fb6zX9KFdv^g2A+A zRm3`_UdxNGnx$9rZOy3Lwo*~MaM^`coqZ1frZ8|#m1J^HoE!Q^mv_qECHddo?pglP zobuhQ=$PYVshZ<|!k6(sjQ?YjT{nNO{}uh{T<`Q%q9qT-w6*4ma&9zwz;b18hiJBk zz)VGs&st$?PnsS%vng(MmMqsgzS{ls9gkSNm+MQq=-Cb>_88TqZ2l?%}&(U|qkiZQ3KZ$uk%#yQG6BS^FeZ1g%~%``68_D~}3=N9^7k ztQE|$EJHWL$lX}(;*5izd%la!lNXrEbK}G-`$vs?cecN4JNSkD)2j0PqDSu^zBA`q zeO33RH+O7ssrU0!{>c^%4$T(FkInJdwCHJI-)2|yZ)?=1J$E+z-M8i3o!H!c7ruPm z)fVn>?3rt^*{fAsH;LS?>HL>3(-6XxJmC>nhjB0Skq3<&Lf?xoytA8qZByY*VJ-K4 zTGO9GGJ(tzlg!}8_4kNcXJ z+e=MvkLA44^J#hA`+H$$x2D8KP3W5S$<3U%I#xfO4`!I0Qcz);5h2@X`$zbM`@Qs7Cuxayu}_j$@)lY?o1x)Q_Tl(* z=j~l_CP(;VwHP$7G`6U)3R(&p**sz^(UF~=v+ae+sxQe6c5x=oH&!jrT#{0emsuMn zuxi4=j7i(tSN^#q6nI-+VM!yqp#0uGJJlQJiIV61j$8BR-O6Fa)*<_+y=((G>pPqjm=e?!>w@$fDo4v4y z>{XBJCVbky?h2wOiTtHX#2ZzYxC+20?OKzJy#d}GXB-J z`>$>OZ{N_^Qs>=HdlSXkx)vC4HER2BD-=rNv-_m|RQFo=&g7m4<$eDdbVAp6{LxEo zc_q~$^Ref5_^Q84cjnxFpLbh-n|r3V=KNGq-j!RMCMzF2_^7V>k+N^lEw!ADER5&v z?4-ka9!>Mv^vyXZWcHNPnOV8VE(pjR2%Pd=g3EH^?}WMuihEK|B|JXR^Zdf4FAh7^ zmumf*yuto4x58o`m8)itVx6vt1fTIwp2KOEB2-s8twlpIy!i2nDlWdaJk3=hCCj=V zZ+y4icW+JNv70Mb=x_SZAb8X@_~+EW4gwA;M;;u>HaB+Ge3Q!Xc>UX1*IT{j7qduh zUSn-@r%;ApWOb+gzT_|GUoJHd(lHW|&~iJprpv)`o9D-cwWs&$%ubvbSi0id4hz*S zOkp1t4ha4gsQLD7{@jW^UJSi#{&w?YUsV3+a9rfLRn1&I;#Xm9X4u;Ed29Yiw|rRf z#d4dG?U5yk4r~02jBlR~+t%aW#*%(ssinWm<-v1ix$1wSrW>oe^B1mLv@!G@TRCg% zlcgr&3?G9VW++bI$5R(NXClMFYkeD5Jq!>3cW1?KKSgO7vq$qYzg$~(>D9r8xV{3$ z$|@F|MuIp>(;8A&{wm!I;GvD6+3;Nmy`dp ztu2x=UK>xy9W!SC{$4;i&@Q znTdJYJ^Te;Y{_bcH?rr{xPCl4pF8hR`6YhallJY;W91*NZ(6qBDI~PwVx2?Y;_BORUbmxg<#XbDVlSAb+dA5CeF?p_k zE0avn1@VBuW@FpM6FK~b&&^qw@ii!9CBp=!PhKv~ zw@;jZZ@b0HyG_L~=hIn_bIz=BTR%Ks{$lIf6DlVrsR-GZOteZ{v3}>xUrE~xS4e+e zak}|r;`S`LSZg2kJ7G&7IGnU!BKZ52N@Mq%;>b*cLa(z&uli0+Tr9e^{B+gJol{hO zSLn%{VP%|PyX3hFv(%BmLo1ia9(;NFb+l4@;nHoUNBnlwi{45p?Jv&F-Ja>2?;}-f z(i=OeqtsO6Nqa1>gI2;C^^QLmW`3566+g1-@{EaYmMQa3e_z+0|JPo~@mp`~!!zNk z=FhJl`&_(Y_Tz&4@7ODD+^u{Xe936TZBMyJ_xY15mfvDD2=SPy(qy!GO}NxS9)9N& zzp8?F+DUEwt>qE2^7HGMzkA;QJ$TjX*Xp(Z9_DB5yfyVij^ehPf}9?LHbFD|it6J& z-rMB3D&t^46<@qoD({vew~cxnueZOC&^EhlvhJU4*DLAOM>*V=`t9H}oarga!hHMf zf(MTUJZ2Uh_{mdO;<88E?QEOe{JlIe@663bcGqs3vF!!>RnCbgcu@Yprw z+ozUJ`7!_2!bim{{W})anet~AefA9a{GG34=^~aLwi};SR~o8IGW)1*-8lElp9Ql`Cl%`?FiAmKCMMTRWo?iq(lW3{+1V!Q%hd@ zPAOTK-spO4hqvGZw=4XmRSVmn^*s)Lv1yVNtBVNp2ZLnwFMs(uw+E_3*5qF5+P!B2 zuS%_b75B=Z+tnWz_8;A~wIJ$l$SMv6_xZN}lKqq}Tzqj-?sv^f8=cVIo)XEYt15S2 z+WW*RbvssMJ(KPJ*cX_J8ACYH^IGCYg!o$)zl4GKJMKt zDS1AvX8$}pJ5htj0g_Q1>MZMwmd`l9Jw|@5?3dTC`YXQNUL@=Dt9C=lSDS${qnQ8%;JqttuBU8&o)V)y0!qn*xg();#$^{ZXBTlXJ& zSL?Xx+OFxBU5nR#Ikaxtv8)NxZ+SID`>8Tm9<<@R6ni8%xZLcSy}ev?vcz+5y&Wcg z$KHoU$n9=$t9x;d%~9dURi(70PVc>cuw})q{d&7nD@922MBtn11OFNJOyw9}=` z3AVqkMhcp&*cow$#Mp)?^8uoW9H1aznxWo@OiQk z+v|Br^QUK>j=Ou)&ul_qw4YSRgw`vQ!j?T%-oPN=p!_^Eqe5`vVRuEl$Yb_Vw>xKg z=PYKMY+`=v_1RfZJ#)8ST6-jfb)i57>+yt{txhR@Hzq#jNqKl}!oo~Wi`R4H=W0y4 zD$Kcltp}f}*Aym(9EZ|Fj`C9~ zmA!N-%6TPbO1wJfd3EEbgCgul_QY>$ezTzIZuZse_o}`vmzQcq+MHlneb9*im7j{+ zn$58-wNZWR=9ZbBUfLUz%689m!6aJ^&Wx4m6N3CW41XreHoh_P+cHV*L~)N?^gE5% zEK7Z@?y3bEx(n`$P1<;PQkNIcPs^Dnk2E;4N9i2B?W|W@wm8XQ@53`!wEMPnM2Vd! zk#Rh=T!20CHUs~bSBXq6*6syT3!CoQT@DVBjh+*9Q!Ch8U6W<<1cxC11cTBiQyZ?Y zV9L@|+J1Gq%2!!fddf})&OGQ>f5!dW+m&l`W73=s@bg>=wpyWP-1w;K3TKI? zr}tD2uM-SYIu#rQ=X?{qFL`Wnn$=zDqW7 z1k5RKd@Pjl$nda+*gJ`eqF)!TmmPCqS?9l0wcPH&t!TBWyZ#=o-g9N?ap&C2tJd-= zW-B;xH)=LEr5rE5FSTKw+Cjbu<5 zBdXT_%v;>~-R@DNO%a2-@okk1>pG=Ri``x=mg$i0uiK(}<)RVmv6^#NKbLN}^d&y$ zo%g=0cecy!F8w2Qr|hoZmTN)5YZ<*B&5vZQyPBYq(&3+AlELtwp|U}JqRNBnOPx*! zTCe9^=6-p*%y3W8s<(%x>q-BcHFbL2>VlP7mCFMaHW^Jj8K4m)Ea335S83YEz$M&m z(s_)sna9~mD|QDN-P@X~7A=N>=4brw(!CD)c?`V5<3ZZbISe4&60zuO~7~+MnIG#hD-Im@`Mhwt+0n4qMYHDJ^gAWC zlp&;|WAZyKH5CSqc|U%?s^hc~Y~!0G>|Et^{HwEXad&mUK>=i5>-L#Xn{=fB!(?GN97 zZE0(@KdQ=h-+SH5xx20l-Cg)y%)$A>Bx&QHli$Am{8)H}UVgG*n)#c{Im`{k?_W>*p^HFEM9R{%}Kg+TXb-m09Tj0k^{pJ$D9PC+7zM7DJ~H3-}Esr>od z({ES$IBy-#jaY@;6X`!Pn(aKwZ2oBN`s)y_w^h3K^wo{~qn5T*Wu|I9KX9V6L(S9n z5f4vK#&3bi4DU+b?l@ujvuMelLKWqlo=?Zje_rdm|EuWs`dgZF_QrnKefH}yhpDMv z4425m2lfg+ikwEtZ|3+scCMDP4Dng<`QFwzo=XmF<=@WgRyBUPUB6&S?BS%i)%R)@ zU3sopIA0L933%l2#8B?#@9)=+cW%A-CbG8CzE^G6!(xY`C6_-hs&%QZ$Xy!!?nS8F z>jn00JB_voEZAtU!^oKVO!?ae@w;h^cNqGQtuqt9*^nLk^8U-E1q-CTY&R|D`Yupp z!*TrkU%%=5c{A(x9j@E4e8Q5v+cJz1mrn>WNj_nC!zuf(&e~z#e};@J=7QN0j@##D zES%Uu!jO-g@m}(g1TeI`6v(=?TZ2vQ)z14l_ z@t5uChRE;Cer8j4-1X*+H+g>0|Etx6)OFkELTa0okKz_p1qKES8HbN^lWyJO&~2ULWy08arhh4SVYRY% z`}@i(mz;#As_I={>mbq>{7(KKC!0ZavTxW;qn$PKLcGj{(#(%)eullyW1Y_J={@15 zO_0J0#wYF__CHEH#iL(No<2SP^1Wr-cvobJHtgExnxnB+#pm1nk`r zm8;#YAFj;{b&&|Q`IOzgyJ>%U?8P1Cxwm?sXwAH06~M)jsvWa*lP9YelZu+3%G|JJ z4-5C6TDpTndUi_w=k-f2EGc_n^km}3gTJmOtlfJ}^;FlaXIC_t{yN-@V0~%)UQWoc zzd=20_H1nn8P_}`cQfnfmk+IoarZWil=%HGWc#*z{6D%^FLRPy9Kcm5)8WN(BJu-E zgZlQ`2&ISr88UiWgZp#1Jd+L?mCW6f(R6#4c+|8TTuvF2maM*2`=|btpHZ5_`&C!w z2r6|hkz8$h(+83%q|xjOx0p5nCGU(_!Zch5cL!(U)md|dwR^E*k_5s$ScSaSDiG;+PZhAQ;%-+_lXN$v(#ozrI&*Zll{s7AvT}P($*f|6YDf?e^+f= zbY;)-y&)SeU-1YGy7p$vgFp8LWm_Km%x`y@`IgUimC4?+{=lbCd?Rf8udKaR<@M~~ zzLr*@_=YfvEd^2@LlUtK!CDCqvXDW?mUF5K^_YyGm-EqOu%BTJJ~ z%CRL6?!1Iy;_hSboNdul!(BbLuZnZeA z^7X46JRINo%NYyH-Ji={nxZfmwI))5bfp8y~T^-OajqI3e?H_uqR{p8X2{<+<|p z{D3ljpR%y~Yx0Y_7CpW8*6VkE^zALx_FqGJSFj5`5?QM1qSM;P>ESsw^Te+0`?ott z`y8KP-TbmE@?-25v%)ODdH4m55n?YODO0NmB%-s2(|?-m-&_@q9h={`r?9@Lta^S^KH8fBL?4(XXf8p6g-LP^NLri znkrVF%T@IXY*>?)#(u(r`I()v>v4hp{|pAqw@o{B@66qK;rDyC6JA|G7A>uUeaF=w z)YmIVNr=whT*E4}ZK3XmJ;$=R7ECn>d%h~Vw^{|yCbmpnspY@5#=Ymq;~U@lzh1czr|GoL{L`P0Q3^Z@!)L5bRn5|jo;SNQ zz`{{M;e_xp^*#S~-Oc$rL4t8U^Y;Bwt9poZ@Lq;@IQlK28-FWdn{8_?N7hi zP~^yvqE%FKV8+9QjePIdKlgsJ=Ncaa`;p`irs3PQ=Iyn*JMs9cxK*o4_z!L3TBR*| zLR5L5P~k~8yOfj{r!B8o+brIF`TW;ab5H6VGfkWvc6@QI;fh-!Q_H>e~MWm^C0NnxX*$0ZL_*~S+Ib}n_L2@hlJEH1OZue#c>-R$#Hmd`;u zdic%>3*X+K*uQUDyNZAx|G7EN%!bD@wzf@q6qC_*YKp_jsWJ`{eF~iCCzV>e8#hEV zHHJtP+&r)4U7P;yRP4%&)^XRopNd}p^<~q`T&?hbQy0Vr8f-|`V2thTJhn3aiu7mE zZnp;Uyof)qo_^4YO3!3^&My7wY*E$PRpud=rhEN9^?=>f;?DYgi4If#wM_o|^-%64 zH-~elmLK+5w^#oNi%h27omG44teP4HesE4c!CF71|1|Ta^xNvf^FEbc|IyCVn|nln zY58GMw;gY0+fMiA3vHjfYAg3jQC96Be$ihE^Ma3P2q+pKf8Y2#@Pi*)Qrb*s_h)={ z2~W7z$Ea-gp1dS!diA+&o96OfHvM7M&>%8t;)$KgcjkB{e4lT4!{Oq^_|BNdC7Zq; zJiqV9ufQ1Pwiu66HU@saE&Lffw#EkWCax~kRN3YAd!@R}ZjCUdF=8nBeE;FlKSh&T=LHL1ybsRz*}5m=wpZq=S2CP`sxvh` zIGI&5**H7wS6kK7^^S(b0SQ5NaBQw<_`|%cad7#9htcx^#m8w;Lsp{H>g@#W#?XF7w3Yi{ycACIuPMagg_!v&uPpO-EMLfJgbc)L= zcEkB6E@wMW`}gSmwGSCeKq6m%F!q;U{*^%BU4{^+u2z}&vfOT`rV?6QzsU-e$}I;YgLb^h@4tzcV%Yh)7uAF9N$}A ze9oLPqd)%IvZffbsrU9xd%>v1%D5qvG5)gpmPvN@+uF=s`7@4N{HwmWu`R&!Y5dw< zO^-c29G3?r@G*CJ?XFK|^LZ`1dGqUI_w|n}?eSnX)jC_hQFs4~)|HOzla?rS{<&v; z{I7b1xW53$$t8dOGw_5joaD6Qc=@qyx7K(~x#_)LX9cTLQ0;4l&hx*8toeK0OS~=g z%6|rmoxG*jv7LEs&h7x_JV|Y@et{jQ7V!wq-y%O(V7imqf%_&tmyI^9lv(}cimKz4 zeV4UZgeEy~22NC%e5-K&w$0zxGORk<_M%4H<5BCUH|0zGWAk&O%1Zw|`54_`;uya4 zZ0rJw=G*prUTr*<&b2=ya?|5=w`(<@HX7~lIF^0J{wnXb#g*%>9Nk&6Z>wqfBZ12c zrmR)9oa6b(c>byM2d@^*343{C<0bab>-CqG#J4B^wzfK9em3y#?X7pWN37evyH%+E z@XdlvA3V6c!WMXW_I;MO-5<-%dt|#fw?u;Q%5~c-k6hGc)a~I5b$nNLWq;|~wQE-R z?wgx6W!sd=M?F~*61vM%xdb5&|Ad#}BW2(lg_HJeQh;qtoj1_dlM^x@ur% z`|ExBiluvky2>Bwgqh8c-SWfhkf+bbDD|G6nfB7$iA8O$$L8zRYXxR2yK25?s?nHr za_8%{eJejLVGDQ=WWisYW_Y8Vq3_rF^I4lbA}gs{f&v%HgH)23{j zV#&gnP+>f$T)ueK#)mtKjkz|t9$V6OH}}B(wQM`4TAcOPFZ!D5>9fo}sIGe6<$trP zruA(6yKvjrRd2rwUpgo~SLn?;{cBpS9405S|7GnpSm)xt%~;L7UjDYj$8!pYF3e%# zvzOZTM?B|}pR~z0Tc=m-J~`3vK5keqdfRW^>y#C@r*o|m->VvSJIh`CW!M7sON9!o z%Ti;CN+Tc0L*ENzmJm6)!@5^nvao2BtJ9YnKVp-^yTbJ_IpS8%@uxhKM$N{Hj2mbP1 zoW`r(o=)@CSiSsS|Kv?m0wq0NI`u#5HfLyKCm6~cX z1@lfa`M+J+`=)bh;HEz-xJ487q-TCzmUeMlaKPQnYoB~mC+aP^vcuDRsxRvk$2T0T z>Vk6ve>@kxewyJw!}hF=a^i*(d2x0kmDg`CU0fIwa_#2rYrjR8ty-8r#bmO`n-wZ% zXJ$STV32K8bxRlA&Nl6o{h#dnA5Yr1X{tW?JNcDiw*A@HZ13)cy<4!+e@}Vn^rYL{ zd2P4OPTuk@TiI+?XxlB_uJwNNn`|q*D&_1xsr0-$c*J5&kBjmy8^-t5mu^c)G<6g{ z_55Zw_jl2r%h#6Je3kjGEdF+7^!psgLQz~nPws`jzfn?Gtiy7F*t?7nww z_imn*PWsP~J!2E!tH;xyyPEUO={j{ct?=S7RfBZj# zm}}7^>%>i;KCj!j%}MBm@&ttyeFhv&F8p)KtTyg8$-X)H&Wr3>8!P>mUhX>c)aj|7 z@SlsCtS;5X#-I2nu;;uM^m@fu=qgfVwl;lLW$~k>G85)_20op?|L?~|Q%RN_5`}RaM@mD-#wNbG>D$asPg7akg4N&z=6gp3JM6*o>bBi26Tc2I20vvtr8-&=l%ni|)=(pbYb;Y8IC;1G0WoKUgZFh0$Dqr2;C6}R@@{hC?#a_dy3m)pzEGK;CrNdMXX$5GWkR9A-M>5&~%M9#>&E8j9b zxX=Dkf3b1&;t2scmRVo&_G|sx-1wit@1A`7;(hu}&(&66mHTivlvgNcTS$w>Hs*I5 zS{n>h4fYu+959^Y&|CEL%o@j8OmF+Lcl`WusdeM#{|uJqE2OvG*S%sOU4K#4lTW>q zlSQ3V;sgtijQgCq<=UDm(UZc0+vE#AiJnPhXWwd}dudDN+MboBCh8XwzfD^2GnKVf zW0?_~|CGNUf4yAKKUeQzd2dPMgu}@zehKlPkP5Fl_bIc<_E*vO*WR&5jyj7v%{(fy zfyvo*Q^>S4<>%CI6|+1KIZH7_bv1xgx zrTS+39X*1A1Q!1}bDnupl4JNC0rfq$KkBsSopNZAjpdNAkNNazncGJxN9lD-W!Z{q zt7&bxb6!tjyJx69ESYFdc}qS6V5`)XDj?A^U*!p7qh z|1(IPPQMdUU1OiSV@-g@niVrOb1%6_dc5=OndEtl!9{*?V6!;{BOF zuHKCQ=DGZwH{<2wc-I=<13`8Y8(HtXcQDKT`a3aib=||;SEq_)KUJRJxqL{s}% zZO^fNTef8JD_!FZzN!fmBPKL7JWhGA%9Ew$?2`7&pryeM4<@o|+A&PD@i}K?_;L9Y%l`}~S8VTGXPssE zcX2>hrk2Ytv85UgPuBSG9B(Rqyg%bix6K0n_sbSX76=!fTR83Vs-W3=(?u3JxJWYZ z7p?sGoAdhG$FquWJV&konA1-Q!+)elBleu}p@z zB+L7!{amS>;Z6}!m)7xwT(b1r@}EKHl%NII#CtAGjLQFJUW{F`ilzHML-(OAJuOZS ztDFm#?D+D%^jfgNqm`L=>aQ)S@N#@Qsn4N9$+7ci<6N7r7gPRySoOy1g7=!Qt7|uw zJJ0Sq>>nQ_)ZVXR`}R@D2FCvkw-^OdI40W*%z8RQa{}Y?^K)Z#^VoXT*GlYq?H$13 z5tJgMuFH8KJmGR<^2e>{1{M8ma*v#jp$>(n1jy89>F{&ACq!mq|n zQ#m@H8h2W#KYzP+-Qu^BDS7ke{3`vp-cws~q3!vMMR&ryr>hl{^&li62tNOG2#kiOO8ko_KetvBwEjJay+TD{t9uYG9Eo|E!S zY5eEhliXIuXg`=;9r$ux7Yd|5t+zelHYQP`;;pMG6u+BJ1!^cLG4 zFYYZ{vuwx01e2*vQ&l>&SOt|I3*M_q`_FKoV(Z4*jH5FRC%8T8o1(Hr!S??0ol9ij zXDEu?u6!O}{>S%H(Eb4C6F#krB?1-x?!O%_clK^i>&Mmg{!KT*duI~FjH7WgS zX!aNH%wzp>4y)>Q90Qb(r9Z#Da{1P)J11C*26m;qEuYk>GrOzl)`vt>Jq>e>pey?n z6Zm(Ye^tM)XP0%hpQ4~}2^Acy?)j2OuuzdZqa_^Q|X?LB2L%im#C{%RmXGmw>KY3mG$I?w6 z_1o_-FFf|2K{+<~fwaojN|kfRI?DZ?Ec2ZyCerq5YDmKiuOR;=Gft>0tg7gr8*Y)$ zQaZ)ML9R6QM|fE=t?jJUEFLK_xWq*5A!2Tnb z)$jU=gm_u{8*m;oy}Cwa>JvdXFSO` z{-qzIUoTNS78G1jlB+(3-GdtGI(! zfb0e32lDnyWW#cQSnaa*-toMLb*-M>-T8jo;;km!oBr)pWZqoO!b{dI3a2v#7-aV+ z|9X)pm@d~=8u#Jp+}DL=>*lsZ&D@ij@wM&A&)9YMuGT1~`YrO137z=({QU2iJ$7u# znss-BujRuz(iMl~LK9`uW-tB{9rd4KYq{8?y~ZJuiB8)l?ke3Fc_=HI^Vx=F0bxur zwhty5u+4~gS!sHLucw5$-mdcYc~NRb)f7!KL-p^L_RsEI!3^5vJUV5Q# zZp%5=zS=HtI_1g~-|FyS4n-}S>zpxtE{~-a-}5a$!hSP%xlY3zw%c-c@mkqOmu`7z zqpRe4@>tpPH`$l<9shOilD;^rI`&w9_U-R|Uo$zL*6-S)l_g!FA+&7L`cxYMhDS^b zIE~c()>ya%tUAh_Rk!5pe}=9H^#Xrfi>nr<*1dPIx0l-0p_M7Kl*2%RcZg;fO)!AHJ$1cwGSP%T`rJPH0OK2RomxnF&bC9gi^1} zT$6hE=marOmyV##-&HD4+Obz$6gM(!4ZQp}`eLDhvgcZ__MiagQy%WA8fA9;+uz!6 zua*5~5a4k*z>b;u`&#vjJ!y+pt@^7jw(OEnWufuAc|U&3{AXBFDK+ob<&BApjEVzp z-7U*1KD8irRi8pv>q>8yZ;xs|$+>XKueh*EBF!}I{xbI05uGz<2jytHUJqM*q*b@Y zDP)3?!jXo;2A|sg4ncY0J((dcWyiNaoYk?d({$d}RS&%8s#%&QbMq{-r6izZc(~xq5rf-wn6pKe`^7B&#Oq?57kT(sMwC!Oy2A zv$u8kO5NqoeK8xK|2e9^b7}~SbI9Sbk&7=Gm=^rdJZgDumC44lc~S`iMO!`wKAPmSM95lwr+wZ1_bace91P<6 z$s_!(*x`=*O162Se`b|4IkG4)a&X41kYL~`le_nyVck}bROWzV?tOOq_onK8)thr= zud3DUXs;lDQ8TZO6D)E^(rP4sOT1l_w#&mI<@fXXZv)sTZ{e`y_tH@gt@E-tf5UGFzwo}uC)pd#w#Yx-koff6ezzaj8Me=E zHJ$WEgn`e7)hD#n^{CxbXR}=?zmDu!vUZDh=cE~)XF5E7eEv24j?}Tp+{3pVZt?$a z`&aY&$?Dl*?i%kzdN+1$;ke`|`E%%wbolbFZdU9oo0^chdz&&oesF{NKQ+h^YFD z+*DrqNfw)wGcKR7&hPl)=g|eTmrnd}dh+9dDJK`Nskoq++d6I9R5@2q=TxQWXC6~0 zTFMC}B>Sve^6%5_^)=5`wiUT=yP7=h;o`iDZvNBmx2jrohCJAHEi$(=YuZc|1#`yC zsSNx|#`il9Dhu|^4Y4@3oM)qp$hx`@H-1TNuQ9I5d+s{jLcMVE*6hIBD_82S37qfA zz;>TCRbs$|CW;tOhNq5)cgydr0=NDnY6xkYn9ymt-i&{(ziF}{`tOO?cCkw z?ta@8XPy0)YyPdg#Wmp(t3?iPd9-P+V^(3JQAkL-q56R-^Dl9p zW1HigwDY7>@%g&AsVA3lt)3jvcXcV}muU^t+3aUdQr{54&L%3quFmvj?r%0BhxWa5 zPd@$0b&u0i@@1gj6%~=`bG|Kg|FuN!?!F)Lua|b;X605&biC2~%2Usj*YK^=^eO8e z=dD@#CpVOTb63d82TOz>%U|(JC_ixI<>}qZZF89_Qr^_A(rlSkFf}-T_VwbZyi)7< zaxU+~8lJ)Vp++LN*UprmA;2_wLVaw6(i@2^H_I>VAC`!VFI^(wy_s*zD|lz*m3EkPR_n*G!(SO%@vUNDntV&vy6E1Lnq56%yQj0*2|eU; z3O}Oh=&)p_CHwij$5Mv6k=CDh|NIR3Xw6q*9vd+|V9`U9cjr|_TW0>7r|@_4B9&Ut zDHdvrCnxpKHoBHKrsizBz)x@W1UZ%-YoN?EKf7RhW zKWf%|nD98v#!)oZT-Eky-oodCfq_1X8FI_FIG#Mm|Esd^R_Wo0*E-F`kE1@eUAT4A zvO@T0NX4QZaj!F_mOr>|(&02gL2#1)`E%As>eedFO+4ks=HDS2XL#V^w~pP{dSfSs z?K-+4+g>>0X1m}K{*q@Y2j3aFZJMx?HPWwUe^jJ~k*wigtFYPnx~E#Jw(41jRJ52) zao=jmXgHsZKY7uDZK~}#T#ZjBJDj+#@*?g-T*;L!cUQ$OHF=TwOl%3mRIyJY^B%gC zo#@}+c;CaK|8Rh(;$)q}$C*E@oBZKFgY1zs;|a$P?Gw%2dp+>Vp1V==br#Drf%cf&p}O(m~Iwa2PG z`nO~qOzXKYSFqvm`Y7hELh0@%ffT`K8HYAbd-Y1-sXJqe9Gq?;Q05; zl}Ath-;H|P7nbiD0#;ADC-LXSw%ZQxMgM93XK)dn=Vvm{lcDzaGwXE6*e##D+a&_e zJpQsQ;BJe9>hq_we(sp?_fg}acCCy#3e#u1G;~^iRH>O^sr-IL+T660>MXgFDu0I< z*_Eg2*cL|QThE=%`gB*9_@w30rzBQnYI%iB@>$EitLDfYj$YG;8@&bJ&Ed~K)z-&0 z^{bu3SLN5fe}mQD6lJX~U3C;D3XMevK)`L$YKPAD^;NBjNfPrDx93Ger;)OcTex6L^G z%JQabQ&g|2H0|?bjA5%eU%$V8Uwd_U&9T`Rl+=Q!-`gL>?O5E*n71IwqOYjHsK|Ow zn%lOm4<{-AXUHx4tnkmYd&7~`%4?##rtIKUvc0f%v4%q5A^yDQZ>=VU9=PE7=Y?8n zuaoTAOuLUxm2B4@r8G1!KI}XGq`EF4%j3aw`n7qvLHM)SzZ9sMU1-L>SZC)y?WKa*1O zoSbntV*Ny=?FAB<9se0@U%TvRYU_wPA>kFo$YrncN5aB(f2{8^lZ1nXuZq8}jqzB# zdvD^Gev>KFPI)mX{C(bkr*r=PzFXaII$}0j^X)USIrB-zTrtYO{Z61aqej6KCH6g! z9GME)l>hybd_K8SId)(A>!?_dpYQY>o_hQiwc?L3@7!g4Y^m8TV>gaJYlIvXJSLxE za8N$-{o)3fPwuzuO7-=nf;l6Xsy#@WdHw9lv^!UFFU7vNxhf)P$sElcK8Y^h!*d>M z9(lQR@(KP7f4iy%)G;Iwq7AcZl27O5*}4o zHu?W772R@DG9~5Ie4Scf<;m@>?-}$O%u;`tn@R1t6Sxdu~&H7Z*Bj4C;_RF1HmJZQW8aX`#&7oXm{!B;bm&y1h- zKHB8%{D8H*OMMwEnLAbZF5W9{{`~#Qwc;ZYBDs;Dj-UU&_VJ8kMzaG;x80j+vwW|) z*j9hVhbn1>xBoL7>$5Rdo!jTiYttYb@!VbPb#bX^Zt#{{VYh->y7Y3^0IrT z*@Q)S-nNwfVv?QdWa}R1Bp%W4>*c-o${v4VwQ7!;Gv+j|zAyRUf~Q8YCiCCqg!*|? zPvt)2@Y?XcvM1ohiZ9u-O%p@;R!<0LWodrumMlE+li|b_%e$^@a{bnPtFMCPNyW6M zr%x@AaXg?}VAFT_!`fr7Vt1HFI<0EIr;$B5xyv(ff~TP{3!}Wk_qk5GdOW!zIWf#v zQsiq_%+6kxAR1t3f38F=>)sa4=c_X{wPoU%=C-tTHYwR0;rRETp;~qB*)su~&fitZ zZ9IK%bH$~JH%{z|_;l>csypFQzm{4o{b&%hVcLPI%M4k5o_zc5z{OATv$wQ(E{!Nw zo?ZR#%CS>d-_91Qebuq_ZmGzb{|qm@d99cx%y3ZjR^pt%z*7BBjpxV3>w8>PYN9u= zx^aB`d^X~7THI6Tkl^hRmtRNwhuX3HXE5LCFC==>CCSlt^9{*M|MaF!`}Sm4yRDJ` zz8fDG=~`@Z)LxSETISoQ7a3MBb8FkhRUYtE#{kY4dM(z}oGl@3?EP^2E%~yvBXu`-%x-j>^9#IR>$B zv-aDVn|fF;^tR3#yLWsJU!TrDwQFws*Hi!QvY1c&HczGGi~UyV*lij61Gg7$|0kJM z(9q7<5#Hx~-~juX^qAjWoQb>Mq$aTY@V>e&{>4cpZ2m_{`vo|n3k!ecl_YCKV~a-Oh2+myEEFvoW&ro^hN)R z;vYT#8IDbv_HSNkWJp=8o&D*>w4DyF1oZqXj0H5EOEgi{P;kMCV|E@}qjP?|n`zp3;c6D^- z?VMlXZaX#GJz17aIlyK3GtphB-mtmXsJllralyo=e?+4nm8H47m408DAJVnC;9cL` zn9Kg9i&uL4TdY$|ZcgC;mlP_%p5b}0v)5?W-+7^($0k)3{p9_(Y{EhFq?-v&^Q97E zt9mb=z3rZS@A~bZ3B_Qf?s`ARUsfoJ3G8v`{tIHR82(J}lzx(xjRpY+GH=1t@ zk8#Yc3zgipZ%NMd@0ly#-)1klrTKE+gVXbNF~4NB44Np%X7PN-WWj}x%KkH)Sk8G_ ze#VOTKei-fr5&r;>^?he-o;(FBW};$+dj`JXlrncW@CAq&!kCQN9G82oGW>{W0w0O zOY1XM6&H(cd7R?v?GOAF+@1K=>+jO)mHY3mJF%)ZciLpX1CJQgxTdOkH6E++n;2gx zav*3%^7F}`mQO1@%RN_jzOvu`ADz?gmS#nGu{jt7%d7vbf4y`Ov)nfBiQ8^|T5NZ1 z=9SxF?D~xwLGwJKySyrygzqUFIQj9;kDpikCZ;H`O#ZS`d56@d%CN5Q%qdotTedc( zEa8seP+fORRhUI-ne_8$M?=P$d|5_|3lwyZi#!ULcc)sY=0OTm{pWKF=Bm3N zmg(ox*s>(uV#U>XQM(7X=4iHj`uM2uM(2Nq+}zxpTh_O$FI+NN7BuszbLjRIs}rT~ zR8yuhpHShKedK7Dyr)-ggQ4u2EdM^nZ zzNX-Ln@xPm$M2l7SgXHqy5aZRXP*2H?QW^@E@n2Hlknwf%-dJ}myZ9DS+!;1t=Fd* zzKCq)e9%yMX0hCO*k?veg9lt%m)NI+v^LICNEa&8_p1_scrJhx#p8EHf!+ zx#NMJZ?a2jUY%4I=z1h~QHAvn`|F5Gfo&Z(L}k7|b(n5leE#~5t4^#=AuGSKG&Ly* zsC>V;O+06nfb8G;tmck`MrW5PPR*a1ynNgCEdI5oq8%%P3z{As?D?1Xx25mby*hQn z0?Ce}Ue|n?lH-iOtli~(R9`Df>)Vahx%<4IpYBwdY*p6 zF17xXYQ3^hD=?=>oxS77U;96=qjy&-IoN$$FQw->&*4(Who?Rh_vVzH-6OTEuxs}0 zf?JwTxhJugI({hr)f=YpaC=(bzn8CjYBi@W`Lt+i+A+J5^b4y?`JUeEC@#wVt@k=< z;qrQ6=fa$wKCigFykc!U0-{|rHH{gboX^cYs`rFPY1@q-U3QfdatdyK_^Z9_KZ94j z=c!lcLv()U=id;_x>2I2cW-@;s``gFtnSNhp8p(vg3tEBIm0C}TBn{>8oYXZw$1&q zx4y-RrW-RO#9#3Hdwtn@>0IlV^d*_8|K^sQPI;SkZ1$J42UnTQn$)0tH(;ONF_kAj zXHGVI5oTSkFLw6&?(h3{hui*VSaT;vEWN1q+aImt8v|r=M3-pysXRHRe0S!&{|s*h z7cHNvCmqET5@?mlxO3K|%tKe?J^4g9xjYyIWzT#Qn5gn6{oLX${ky#R-lloTvpbv= zmG&>mJU(}ECjTezM)yf+hD@CAZy$gCae?kNjvKE;d5`C8OzjYzc|2&TjqH;)cKytV zCrw7`Z{N(2&zx(VcXYaa+!mqM-mml0m%8nF#V+D>$;&~}i<9GnvEhT69~)|TIdxVn z$UMpZcgOL4zFFUuBg3qJg)Mbo${jSJa)m&pz-EPVixcmRtley0r@!o(Q~Edi$jhK; zcAGufOp`o3bX%SJ{~Xj*NlU0a|2w({`vXa+7Lz7T}_uRZJe{3 zLrpEL@X3$&j{|Me%uh&H<^~-%=`v!iYuOfbdqv6`)zliNv_cmDnaA(3-#1D#t+mwI zC$TVLa)eU*`|GoOH>a=P9dqq**sHWFpSvD}1vE_Z>OU~S&-k%L+qQ3?&K+WT*FV)Y zzlqy>!_sx9UiF=QCF^_qpG@%lMX<(`7mKga_*z!{PROauE z`8H|&LhClfHJXb@==fYS&)N_coTt5O$D_HKy1_mFQq@_5>UECaack~VN-5K+zH6EM zacQ~usXpsHe?I%Jb`3W7%C0@}t2g+H*@6ih6b<_=9#5A4cdUb3r${Gx;>Y_U=QJHB z7(A)^5x7@YYKm<9f~azN35!oIlRG{dT25ZuH1CS=7tx(XuOj&U`ET`UtPHs{<+SgD zNh>%w{~SeGt zHDk@XD@qwpbxrEf4hn6!o%!T-yTC1f0tHlBHT z$&oUz;u-jAzb%$pdv^P)+}uk6mkoY}Y-Nkf+!r%hiN!5J(q^}VavRkG^&u`6R^4xft=x;3Y&Z{?L%3+eEj2_ord zj`0*q{C?3b$5^a$C?oCUlLE%0Lf0P|aBq7)HRtB4RhkPIhHcmK2oi{`nDRq?qFU%P z&bRm7-4Ev}D#j%}c%H*AKiBBticid%6Br*}xsrP`Pq$qpZl(A9DJr@as!Af8`~`1J za!aW_x1dg<#3**NS|uy9+>?JHF+zHh7dyYabl6@MqB!NM$>&mT`ID-8<>!+o-&qzq zC*5$VbJNtvn*3U+lGoMU(a+8n1(Z!y)a0toQuFC^ zk?>>eIDf8iHM{VSkQnv6mThdFHv~T`n@qXqp08hFUjFmiyzAbns@}Rg+9Ll*w`ffG z$;aa`iN(>lqmkix$UMd5%DhABQo+;iSKpgqE@Z_#!NIn0{#vHZY3sLzOn-aTbj`j? zmp-TJeTaxkSkkKM#L-ZlqYL-!$VI4YTw(})oYa|y{g|9U$^s4ctG7WyPbMr zjINt^O)XwsH&eqo=IU%S9dF2+{{!dxa05Z z$uY_Y6Ybk_-`{Zl%$b*3XN%NRbvWZH@BFRT(#qsx zWLp%wTj>6Zed}^zOH`$^gn~C=+=E5AI$CQxANUx`65q4 z_s$AU?F&nmYI}EhwI_43@!7}Ebu6v5PuMk^WuCLe_mAJ#OB)=I>Ef^xnR@p(b52l@ zY01Wjw^z1T`TKqov$y03P+?#>zQw1!|Hk>!n&i04^L0=2Fh65Ff2wuI!;8tu`&&hJ z`fs)BOgwv$UT#x2^A2`FbKBr@zf`E1B6(P>3Sl27_1=#9pUbfEd zPrcu}ebK+=d9Ru3vZ)Dyww~E!!$LQeZmp?CtW#3rIXE>{`yvRD>L3BPt%A^IB zE;1>?jq`5&yfD3Z_sI!Hwye^t_lfShVtu<~>&qoOriCghYKD9|a#B85ywb*sLCY;e zW``3?^uAOcouZ*WX-Wv2 zMFZ3R3nv8aEw6-6H&XF7v-|n-`|66fY>!DXRB|=3o!--Jyg}SY@YXb z?=7LSQYP=Yv2zV}D<~us-7`IJx8lpL`%&$&uF84qpZB=&2=E2f9cKT2t)h0?G-Edg z^}~sgnVqtCa=)%J+*fuq?Ot({(<@1Zd72X^-cfn{K;ozJbDv4(oy{8==5r+<4BvBi zo#BPv{|r)5mO*z@wNEZ9JRURO_g&`H72iUhReQW$7H+uWNYC-Wr}IzEIp?h=T~N!) zzVDFJv_&Be`y50TG|anT=U@589UY?5qbetMVuBTbwdAp-|C%Uwz@fS>E+o z3%e~Y`#n(dF`8pOt82=huwAct)^;`f+}Bh+XOki`!HwgId?j;Y*pp4mwDo8GiF7lwDQF*o_JKlL(OGUiu;McT&8Vd z5Bt_LPW~OVa>tyon^WFgeR=)m;U(7o3+I||$&uaiG`84x#g))Ib2Bxn9tNygdGurB z6BU*7B8}{RYjRVzOHG;l%+0PjIp$O4(c9V6jM&#k-3p&HPlh@3OQ%4HkO#x@oOze6 zW(Ae9`=nnMmD<bIkt{0>=6Ttr z&lfJKT5mBgU`kU_$^-v96YB3f>n=UKk?ThC<5TTiXZ-xGew*6R?pUzW)iWw&A^SJx zeMRYeUUcN7N^fS4Gd!lO4?mn&7 zRhBEX`{m+pesGjHQa5W>^kymc9C^q7pG%J^u>YsD~fi# zo!Y+tsf%-=RC8m*E6s}`S6ywhq}D7C@>j9s_HORc@?%e&(*NG-(ALWyJUcJhXrv{C z9Q!TL#j^43+sSEr;%?qH`)#}E1N*)&htBEOd}e_aSW zFJ#7XP-OLvTUKAL{b%q^-S$0e#jR5-ST<`g7$pb%PXC=H9#b;_f92wOw-xN>mO$Xye;BuWi+<1C*IoxN^6t*AZ4Zvzv1$g(LS36rwKnBXgeIybq)ad>o@?A9?wd2i zl&3~dsc~|Cfac@O?S|g|9?boUn)#O}Pgzpf(%&d-_)YM>m)_xT3uh<$sPwwj&dB@t zLFC}}V6~}=FLICe7kCD=`b>UfX>3^oLJQ1Mim(cqseE3#hO$fMx#fw`-U-YT_h zy|K-*=ko`qJr8R0Ydv!7Hb<1V2krW{Zth#X)~oxYw^liPxRA-);9k=gk zuV1s$Yi{AJqKQY=NOCFsWNB<};BRCOy7A#@_wR2(P5x#tY&uq)GER-(J1^z!kAy2} zUzcvXeItYGc$U_rJ9ZpiO^$Le1o$;4-jG!G|0mvg?@<^FgS+AB9~sU`0)1yc?6s_$ zxy1Hb->hY;r$0Pc-jV65=<2(|ajNhQ4khd7dmcyZ+iD;>O?b!4htGWSUhito+3~`kW#yr%A6Y&rvbell?31Ex zyuYG;a*^DhRpFNooLIm7UgNsmzn(g*cl+w;x+A*C*8C3ZQiUUOY>X!-&i~XuSMab9 zTi)fjZsxjY1eg42_5IJ_>ol>0&Am5q**$5^SM%MrmP;2oSxPdr$p5|ZpCMx6udMQi zFD6;Oy}CqZL*pqd3!saEeK_Gs@Blr=+Au5{C=U{(k;83 zMbETvlR2pVvWrVI#`k7&;(7VG{IR;`Rw-P^HShb{Fx}AU)06wC;OPJKOsBk5bo${I zk6Z7U7%xmRp1X0*rFCi?(*=^l?uqb*2W3V@MBS`0i!NZB{chnr2YDlrijL#A!?^u~ zdBPrD%G^}E+OLv7itUq=?CyO2=W`?c`tqN@o^o~F&yc1i6WXTn@8z7p*my_qxYVTc z%?_7ME^TI2pI1CTd6sco{06HihmU{yPtBQn?Uv}iM?0(XPR&tuVw^I;U#Q}sfv3eu ziDRqxIJcG+rQP%4nbXAa`nB(&!z%5b=cnt0*gw>L7h-bh(rI4}xvWPbOIF*q_Y_Uw zZ(h5j=Tnr(6pLegzvcOyWwVlNns>-cyxh53^J4vqxmPc#uXsDvv_IBtb+<<_XJpik z#wL+8$B7EZ7~f7_YGdAOx_4(~X=2|2O^c5<-@Yx8iGTRwiR2aYPxfm|FE5{)ddt>( z^VWFBqNS^zE%B0x3)E!e;(WsXl$ohCapGH*kU10YJ>AFo^?3;U!%5L63U@4?wan|n z^vu5@mnSSa8CARYcTL{|zxsV0n}v*ec$F4Sa#$`Z{-ZCOQ)?1~fS(Ypaq-a0pN`Q84gd5d*zKUpVCn)leiuXp?IchW^h;+Ox`ZROY$`o7fk ztLIfNuCSkZVfSx;4XSn7=b4q@!7b6S-oDW4-ks>Ie~+~Sy$rOosC&uL2~1j3OQrMglzozkHtoOm zc52h({9vp3L9>i(H(4&;zN3tRU&^}O-=sxG)Gi_UxtG)}mvhqGf1dmN>ng3fU2yCC z<@@HQd-feZG{fqtgVS^d!MAD-{~2C=o>Z(HeQ&1G!6}San(V9WE(p!NcJ&9pm+mXJ zSJ@?RpHJSpUDVb!+AA=3+XJQz9Ja@lZTrvH>UD4E?cCPx>hGs}eIG2TQ(X1@N}TrshJ^=?3-Bk{S6-ZKbo=mP$+XXZc3xlm?CSj) zbNaaw_lJb+_w}|{o}f@U$@o|QZO0`+2A7Tpe*CyBSJ3hN%fS55nF>0JDyl9T6OY>e zjrcA9<^9SVeme?g$n3nfid&b1a{U;q5qv;0D*W(fZMcxgr4(;K3f`B`Q*b%9CM?j&t9DUL+TuKcB`<@| zAKJe%_iKhjNwBEZQdP#f36EZR^c->jm+o}dM(+m8Mwu1u4F_(@ua#eYan-hE_da{G zcNhgSZ;@#J7kNCFF3-HJKOEPFz>uT$&E`lBVgcBbFcL%p+Drq2#aS?c*+)aAfO z*)#laS1#Vgb2NGOo|=u@qZph*RvjvyS1I3GlK*n{%eqy+7g{ zuO=mvwg9jW=c5vb*|j zW-iawUC5Ej$#;N3)ctFU#)|K@C!3dD6T2uXsQ3QQhoiyyd8PX+-iAA8#ZOtLdAgL( zYTgq!;TcLsDu4Ws&DnEqW2=s>{pIMThbFHmS{}=La(OhvE|EK@F0paBRCk{25&rmZ zm-i9f!2XPAm0$n1xE@wnoc3+etndk)fqaX9RW!4B$nNv2sd=p+alqio_n&`-d^g~=dEs1f|K0ASv!NARbAR`}T$$?Vy(BSr=~V@fKrXvW zOP2YR844+t#CEy7-fzEtuXglArdN4eq+|Ez``$A%dQ%;?=>2xRr@mLOu9$Xro1D?4 z*R~HPESdRs@;4)Wp2K2AhE6f@$08Ve|CQWyx)g9nMQ4Ji-Gm^aWZ6A41O;zfMQvPs zcEL)8icgELNql*(o-f7rO8v*CZEJs;Wo@{cG%F(YzF);TmG5tCo;6{CnWLrM&w7P zb|y{Uy?p-Mur0xBuht61Gj`u`TEOmN|xq&DQM3Lo$FD; zno(T&UUuhZ-B9z}M>CfuDzj!POq|1DRHu@a%W-a*t!w|~>q|2$PIE~vS@rtD@2#&3 z^WQCdm%UiDWbMAE3Yn7}bQY*O6op8J3vqUISez?;6gxBTtg|s+O0rzUl0SNJ_l=sj ze?GQ$x9-ZkHL4kns-z}zhiabg^zqhM?SR-M&ZsDb+885DEUC1-fb6$Bgh*w~26vESlYiORN58B*cuE3clY z`Zsm6nwyGsO77zrwSVgW8A7Jp8f zB3~606~8o1l~mL+oS^#HL;sp~+P@pmx8Jjqe7(1p^TU6JFWMb<3fdp}n0WSj+28)p zAbM=Iyw~J?uO(llU3>N3)to&x=l!j=7l|3Qvx@^DP-L>mg*REM^*%$dQ2T!qzE}GeXIjCjNDUsyMPaeqKb^I95+xNMB zIO1>*}^wr=;2sbEPKe>ahw5 zA8!%qILLC6WzMmEjE}!vd*-%n8!!aht+(00HX*UhZi`puM4#=N0hjfc zE*0t2l3_1kP)g#kJXd;t)`J+iOQ~sjk*lkt@9f~^`d;X@_RC$?b*q+b)%;_nD7QHE zoYzyC{`~Ud$A-t>%ZDBOFvTfpd+g;O;g4h2Y&!HLTct8xzE$UX&6>1b*LG!jl>{tQ zVC{T%!BAi-pE8@>ruhqq4&&0#7Eo_gsI=x3PKSuLbu%B7` zl@2E_f1x0kLk)pHjnceIvd(Iqz8Se{(aEL=*@H6Q7V2J{)7DtB-d{gT=+?E>^B(Vu zdSg=dyZ7FQuTc_9mo8ow{Z-UYPe?j*Nz-z%r4xjvv}NAHKb~KG5Eyw&tbM_ph<9rh2ZwHJz*Mw%G24 ziMlIyU3w(4;8D;LjRl<(=BcV%^_{x*?w?xR6^@yOyIvl*{djd{y!1oCdxu3%6)Kq> z`N}Tyr{3%HiU~!jyc3ms6kJwg1gW-<9ms z?55;SnmVI1c5D6iso`D6CSAHPb=!Iu8-IPDKIbPbjRpz+FWh@YzeOi9owl1$$&+G! zT0-6`llRcKCwB3&{)#W7Mb_-vr51Kod1{_2m&e0-UXNTjSQS?A@6Q#kO+IwQ>*yEt z4u+jyzbsDm`1)b@~uYI_}_Sx)R{b9-ni!dHxX6DC1u19p< zPV(99wc~bJfi0Vcu*_u7JGK|UZ!q-HvgQxfTKo1+x$6GpI~kgnU$5@=3SRHP;F-hG z$gF-o?-y5{`r)YTG}j9?j>?}_ofqxpnW>Wdz^|U`&yBn&^V-t|rvF5)c0J6EVqVJE za(U-TV@-C2#uLB&9A&@e{g~b^(X@x>hRyZEPiL(xc&GGn-%{45(sOprrpasb+j~V7 zcihf;8Ni*pGw`MbgOLCBw<#v<}Sij|eYbBDcf(&xedv zFSjRN4Keon>|3?e%Wh_?&Ua12$Hw#S%QRk%DfsauShF^3cc$e5ySOb+wqzV~?2ies zy6r06v+HO4L#L@nEc!DYPJC2(=6}oPad4S`zC@+0(>%|~`=@Ii34d=a8u#(zhi{Aa z&cEV&dc*4%>%MW%Soo-7*ZtkiE1SIRxT^o0tM+->yLZnA57j9zaVW5fA__L&GD-;bFbGiuG8{YyBjIM7{aM8yXTvhG0*%?2Z=QfD`mRN zH~$Lt?yj->Af#knCt7r~?q)^es#WfnVuJz#gxp1cJnj2zdH%qemvcUPbFmkg%xku- zk6QD0!s1tTrF^eMy_}|=>HK!8FPSNs`SDjPPQQsOwoSMfUHz!D>%qq3t(jI|t!938 zUaL5#!Q+&&{6b0l+U#y8>-GC5+`hN5xFq1!IW41YO_SDMRa-P~;uoEju6pVdv@DyI z)t?!DUZc*|`i?Pi)t*mn36U@2o^C2@jhgPaTXn0a%9^Gt0)>wyXWCa@yZlk3v~l^n zAHA zudw@xuD&#IlleNsiP5JO|N}<8uw&xg3_jiS#p>EYI<3;a0XhabNzm`_?ShC zze4V<#M3G-|7b_MPE%gn7q(>94KuB>=4n$yZ-?Gg+_hANS^byK0of(z=iSm0oyXzw z?aZ3?`!5Rpm7V^6xBhv>`P<#cxq($T>z3{eRMmXyAJ*i@X0eiany0$4;Nl05e9o?V z?d1A$U#(k~x{>y4F7sEGA)NPCEUvipE;}~ueq6|`2Cth60`&%OfA0BrdEugMtQ-tX zl|seaCrJN!u~2Jq7Vn(DTD9LYUvp_|T=mKPwCr?1(1!_03IE!k+go{??shx+rHS1} z&-nB4s59cb=LF2H-KeLrz=SEiX{V$zTLO=aVCV0O3#+UXZ&_#^o)-N{a`B}2PsLxB z{=J=T^7E*5W`4h(R-o2IqeV5(|6N=Zr?P`dG-ms+P4!-#Ptz>UDw&7-M}?bPoYwXi z*s^#Br%L0E%K5j?ZL_h7T9=Y^_R5pTPy41WIv3g#^=i$#f0Mhm$@!)Vx4LS-xx;9? zQ{wGUhaV?@UP=^O+3XNv_m}^A);(*7wYRj>R;B&?RkEV+c$Uu6+qFqz-f1FH0TVSF zjO7Jf4qoQgUU|kM@%1+2_vc@_E|;Ci87gTf^V09VPq}a3RL_-LY_=zCV^$0hGh$Fa zf8fBuD~*fFulb3uP?&#eotR9J#r2qp`JaDV$*S%>fEY&Q!p4Jz2{;~kSSuQH&XDbwUml)Gc&wx6q~ZZErRcK^95-;`EO z^?UOiIMn=;pVYiq$Is=qR%+X#D!EXZjT(wmp8tH_Rhzr3{g&pbkmtq+TKeZRBvtg? z&%7KVI!|~_OTOc;D>EhTtz2jb-N$BanX0RKKxC<=%L$)v zJ`<9+sZ{pwThJk}i&N!iSgZ7wAYMPSADlsV3QU-K*37GU@~Eb!Q}*#2mFgCwz7Nth zFE+frm6PRKk2b4%RZ z0C^8ASU z{+z7s{~7!<=A4l(zO=-c?fI!`;rnLXTD`YwmU-*ePY?8#Zt(W%>?uF_sIW(pE!oA! zG9hK5bnB$jl}B0b?8@B}HqqC6cb?g|YYtvBRz@*d9C6>$XK_LyIlyoKGkO2%KYoX> zY)rnnPcyvY#_ssM&`Yy-Z;$=EPcHmlTj#H>A=+0DdUdFGF*LtZW=h_YaZbAA+){_I zc&757bz4?dcsHz7^H?o;dF_&SH@19Qy~5Y`*V?OBuUoip*;U>0pJ68FlqE8CafeSB z&G7z~ede>oQS%K223^H{`$VpZoN8zg+4i47Q>I=)a^kNv6}K9fEQeW*4#r)H#Rap? zO7;DfgG;u3=eo_Spt@v&LzFIN8YQktSt zSSmBSNod8l4?awe3jbz_Dg+subn!Pncbz`vXtR5mwe}M8fNd8}Fr189YQ%mdg;Vyo zux;D2UXi>%yK+x0PF%_)yZOAVZ*lnR>93vIa{}&LsbBJ5b@a#97x(netQT=~VfMRq z@IS+xg{LQ;XIRg&B&+{p{;>%M1)I!d*Dt%Ve*J%j1@afncTcl3KbCYxmhn|pb;Wh2 z@1_mmSNB&ZdgVR1&aN`Sh55uMk9Wn2u^;DKJ&ZB#sa^D7lR=pCnchuhxAr}IY|phm z;g(>4V1M3>PqNxok9Ti-{C)20OKT5*Rli)<_cnX!i~Fy)mA%={zGn8Nwx^zcyZYmn zdIl`7<<9)i;CaqdQ)_}c=dbGz|0!I$#m>dt*mQ=?f7#cc(tkZ3e!lfFz0sssum16s zOa8C@Uxt0$_RD?yihr}Ma?AGC+UBxltuN<&)O*=?hgNo_f@h(xuAiab1+NgPOhs1L zEKPsO%d1QJ9x3(Ruw$OE>P-1E8I$yVOUezOyX|Hyi#@z~o7>ymDQ3HN$6B9Yp6C&o zsi|gU-8_HJndk9hHI4^z-{h-#s=KMoNw<{#dV6Wz;Xgu$UN=R@=>KAmsnwhs>~z)t z;r*hmUo4hQ3K7i@Zka!6DbM+O+uvtiwc1?UxG`Mo`0=cr2en1m>*t2w+I1{1L~Cu{ z(qGeJDs&gL6i#r|v6VC(on2n^XN9pRg-C zYi-&k9RGUH;V$P!&8X{|Oq>=>ERF)})!(nSzH{uw1|Bx+x4+-Nb+{9`Dz{;P?eslKk-5z?m;;Z}0a^K54 zwO`4s-w_^@?G(Sh*6Yc3naV8ff)z8=>e!EUKIAW&xFpPj@nfy~e}+#gVdptAqsoH& zzkHFK@igMt)t8OIhh|N^{VBqWGfL>Kv(ADhVY^wj2J;^A^zf>c?4Ra#P4v9qjPLBC z&!=vjdV>8-P~zOZDQoq*w3GFWE;$BGx*4LBJaOJJuVD2Kl|L_gHg7UIsjRf~`LuJk zM&&Ed3v}@|&5^gcXuCK&IA(oM1n=XVNNusx`HG>})n2}Ai1@RrurzUoPT<1`Tl=~JG8$1UA4ZXWL599Lm{*JKTfe~ zU9i$YlW+S8xqHeRe_Z(1RD5em$C)Cx@ciY9ubm?c)qA~pz1V%^&$U&xCokKkST<|# z_2m6QZ8C-Yx3yvmCo%ang|wZttLgjTBRRh@(p&b5-^JF%6NV=i=pEg1aptE3p>Z!{ z_m+ykzRF#4YDqzeS$Npgl@2D1Ej$iB^?@hPB&jd_lUQyjI3qs!{JZ{jT8#(Zd^G92 zm6`Um=U-{j{6$Oh+V)MK=p-E#G}U6`J0TY#KJ}f_`&wUY_EgS%#BWr@G^o)c32yX*Nq>sFCJ9!4rJ^+mQ!lQZ~S@L?UC@Yi^rZY z{Aoz2t30`$d+Vf_Ij!FoxV+u=_I2jbe~Mh(-YYc?<=%NR#<6_+cq7C4$ZpBs4&ut! z^8>GSOp1&=wD81-*YCspm;7f4ik_alw2S-fp;a%_qdPrv4^g?i(NfQlsl0m(S{!eVcIE z=(o5W9sT{?V<~stb9SK*HIn$PR*A2h9wWC_b8FD1M_FM)@lJbJc?PK4E)xCw=f~yc zGkY5E*-wA^`+`yH#4o$9F5GH1JC@Z}ptw0_solGvMH38fJm`?!|8i;h9T%g7x3Art z^Ax9~q-Lp?y-U5J6UuM2EM{f6*oCR{6ob@6itW{mJbzemNiVDTttK7t-B@_Xs>Fiw z6~AxoJQWpv_S~kGhgSZ}nsC_jaUX|^e_l}X_DRXEH>34$-gt4Y^6kSvFTEA_?=X7d zcB3kgm*3^roZtLfi|el5_ByC4H!nyUnpb%6kWA1;b+he`T z)Xu{GtQ$*j%uT%MXj&%yblZOh4-v~CyNfxt+$TJ^8Wz8Md*hK`d!Dt*#4_V!5`iMY+Mq=2!5cqk+NY3-cB|%r81!;`rdHbwjIS{1wjw%CblPE_S^CAyV}4vT5oE zGEJVGQt`F4{JP-m`90t2?(YhY$~Twt;@a-y?RrpV@~5Rf>azPvp0|HEy?W+MCB`kE z726p8{9Pp=sza1ws_q=_;}yH zD^X=#M|O$`R^GUh+RnYDS7jnw;gRY}o+QUPSs@QQI{)tZHqX*#b;gg?tqOC06O_o*Jvg1YY zwxbMFSXQRS{%2UFzO$*o&+f+c^~p&t9k=@`ERGubsdR7J_Ep>YvbL!BvSlwHFxY-@ zC_l4If#a>52YaA%(T4J44t6Q}*P1e{W4;|%>(wY*D=M<>tahHJ!Zz&|_d|&(aSwO4fvC4{z#v*ICgfQq%nI!(|{LTm?J3hu^QxE5?;p}x|?^Kyz zxMA&rHiyf#~;m=w;z2_V!yRzTfhbRh}a{$x@?5Sf%4FTTOOGMfHht z2FA;O_8w)M6KBSsZyXdj&wz9X*iKpCmsD6JX9mt@u(!EK| zQ-E^S{$D!Z*#>^GNx(g@y^$NW@dB$MF*ANdzN}xF-`ga`{TaQh6E4h zM}9r~ri%2vdp_gO>+tN>pFwpG*PzlvV#K3kQ0+tWI$*V}#NkBJ^3Tg+8hS5Mk~LXG8{ z?3ZKbo_-ZOAo;S%|38C1%ActO#quRTguf{&! zl6^}|I(hXTyHC-}x362cYRSF_JuD4kjW^`f4<7H$&CWZ&ZO+6~n+vNxHYm!zvcDDn zZr}Zk8})^`Ya6%A1O=beTv_>P^2`|wYDLKk@|IVOmX+`7SVR=fBWxQ%p}(+FbE!d6KS>k-u$A!3(L9`7(lE?(JoqJ7dji*V)O5 zZ{IHV`MEV{)9KZZxL0Wf_pSI5)br`oRHI~tA68Naj^>?N8XoxV_y- zrrnwHOTjx!Q;=mz<8j5{2WRS*%vW}}zd~<`Kt%d7ajX=d9`i|H}(!ZMB%L|K(@r9Visc^%CxHaEz9 z@-s%|dkc2Voavq#^Xk{3U)$GgdRsm1+>58l)4o`|%6Rqez=VBTQ>I+D;Aj={|6C}n z;3IOqPXF=!;(S{p3IA^;12i^_G!O3+Ki(THc>wdHb{IuaAcnnm35O{?4v=SHx?vF-SHV)o)v4_(x}>vB@T_zC_Ox-|XzR*nV+m*>|Ai*nbAA zpm(R%F5e}RBkTRr-LGn5-r|Jd;Lz~fQ_kjUYue2A($>6k*GYL+hs~AL*aeL1CV#L$ zE<34A$!FC(AIA892hwkwep-G!{(>Fz(RH(4IVG1_s#ZQf&Yp8=-{i%zqFe9X+g)v&t#o_7R}%y|A8e0l@Mz_n?au#irR&;-(-d2u@3V_D zJzTN>KSOu1t3sZ%d4~J1-D~QW6IXZyg4p^ z%WqWigg=pC%$?w}_O8sX$M?E*%Xf#qoAuh+)8}ficc$yc6K2Z}`Z-OVDd5rfdi81Kv ze}+q(J^~y^{AQl$RB5`CIeXLlVr7{wrbTuY|^?eoy&e9_YuZN4s_oMxQgm-%9wYuq*CCG!PmvDH1^ z~Gc7=d%X{rYg$XGFQyC4vGw_Jj@ke=5+qEV5 z#2o)wJb!=sWoUexzKYS#9fzhrS+l#pFpN?B)$+LI>y|A`Jr;bzs(tF2os%aiF|zQS zX`Dafd5q`&mkU!HCdn%{+?Tao_2ydBb)E$YpAS5Jxwr1xo#4WC1^a_ucY8CQ^YC~O zwA3N|KSPd$@C0SYNZTI4ndv$8GH~gZ0d2aHX)F~X_zCM5Jt8E^zR(jsLRaawt z_q~$Q2-$tYqM@Lok(1eg#piroAnSLo%0Sr}?7>ZU4$95dl3df0z4T`1x0S!Q-CtC? zv2gJ!4{aGwubt|Tl{Zz-FHkn@nVTk^n^Sti>=j$#%iy(#cqIAk+Mjzz-S*C1vTVUd zj*QkFQ%{EQ2-F>z_xxSqlJ$o!KDV7H|GVgCU~$ahiOa)mwy)T&`|b7iX{+{bFJF`T zElfn#vzftPAc>`ri9NCJR%-5z?~C}n`I-0ceCh7ZQ|=h7(zoDE*4>iruP$v|9=YJ2 zt7Z<1iI(Fxl|;k0|5lz)NGraj8zy|yXGPkK5dC`0v1_n@-a9*A&NDEb8#VCUWM0z4&iJ{|Q_;yWr_6_WpM=Ee7?W@%N=z4q8Qcd6=OpF>@OVV3Dvj_;^*WavCs z^J3{Vk0lqncGXnbT7J^#HHtA&a8S0-m~ynO%gC8c=}4&7%wz;x4dQqy9U zzLWN;b-z>}N8E|gEBlbU{dVmt>BU;749q@$_?xZwi*eMY zr^jAJ*q&!{(KzDy#QogXkZ{|lyz_;B2Xe^V(TmP9yj$kgy>;TTl~YcvbgEpSYcN&Q z$X&ydsrr@oj5t!$O(`=P7~4b6We z)VB!UIk$LsJ8X8mjX=E+=EFj~Z^739$QgZ=G)hR2ax9tPaj%oO#OTWno*ZU3DK zZ*Frj-xu%NyFO`0+={oR;#$ilO=spPRrF>S+9YhCQoo|-+=9&y4^?bRR+#@OILf0vsL-kZ)|4KQs3F&UiV3vrQ(wM!*%&$1wWV% zJpOcV#u|E>Z7SzDwTbjXa2Q^OCQvk zC@^WR`pW{~#9C-itd~S%ol2x_cg$Y*k z>h-o}W}b6yK7DG#?(G55flnH?R_|z;!N$n4{ITr5e-7%-cMkA)ZA|zluQg9|hS_$R zS4toM$!}l#{?58k-OE7+H!H7;>}+Hy|5V3+=f#d6#hw=OB@@q>xA$Giy>hW5#*m9Y zc;l)!eQVy`UhABn+q>SYU8X>7?yTjOJA+#5g4mcGCn`E6Z|mRs@Y*9|mGjf=A4$CL z7w$i`%J*2vhBq4<&#$X}c`ka0u*&!O!o3=Em5WzB zEbjlr%>Mj$h>^ANt7G@0de_~!Eh+N$O8j2wNH(sZ%`aElCCIG)Yq(-f4$Iw->NWKf z?|)gKm#aL{Y3hH5x92~twf3F9_>I`Bm23E}USZYbTIp+VG-cvcO|5qZYx?(<7Cx*o zRr~k&?BDY-=`SBf+4F4=Na|@-pC6U=<~84s8&g;PXK2cwF8XG>=Ze&pmOYvm5!kn{uI0Y{$Qw5=Sk}V`u+Ux-D)U@g+=dS+Hi4 zlSh!3=dqN7f_=ZfY+$*~9TZpaJfix?@xH8?xzc=3_82@}b$M%+#_Onpi}nG(y>dAV zHy@a^V=70}jQmd zuI!QKWId`eX^DRW$NSqI%8$L?r%m3X(Y(ve)^_C=K`XT%=a*@`dobax^p&?}%>hev zHPml0ElQ2-`SZ7Jn)0Gkt8S{)?5$bxaZA;Cd#)u{)-5}EZSVTxJEvo9wPqJhT|PID z)z)i@!!w^k6+g$B>TgU7r!F#-xcc*VZPsd;b$X8KlB+MeoC=ZFS<31DWu?cr<%SH5 zH|jTD4Sh1vXu-s1{tv_0cg^+SC|`L@Gvu}_!)Kd=jF+m9&w0M%uNvF-JzJ+vFxsF{ zYI9cl+jq{tmv^pA)6EEHWtwN>Rl#8Go~*2X!Y(Dt*TAp!j>dV#(-OCuCW=n>o^$2G zvgIjNxqEDbDmv8f75SX;E40tIVw@}JlG2}lZnN@{=ex4XZl@IO{B~u*mQNnxf-DnH zzf(AWKG#Df=wQTyim+8D__e0*kK0!6qr2{2x$3=Hy&NrV-t*5E9AGGvpD|N;!#g9- zi)MHG_L@E4`}tRH+>T9=(j33s+qCD(M*nKLGdFnI>ctCcZ}XjZgHs9_6R)Uc;<*xHKRb? zgT;?z6lJDNycvFYuHC~qS?}BLmR&o)v_0_rbFbc?7guxd&M)q^HUDE?7;4?O;kW+U zO}BRNsd7%>uwi`6=4hcFdQbAN@k8ArpNY@IJD>jw{iOf;{E=@D40fu!2k#AzoAP$o z-7`=BUEHxgZ*S=mhpL$|6`GE&PDzZGoCX|kze&x1^q=9SP1!6TE~n&z`*x%$S^{;ozTM6FZw#{tb!&tUQTeMbc$1nE}^|7pKmhtpStKGYC=G%3l z<_CQ3f{rv;t$X~;zUu8Ab5eJ^%Lf*=Z|3T&XLw^wLBBprX23yU^=D!jH!=eGGqdNwfIh!a09kT^3E7*7r2f z@6g+j{JH?<1OX!^kB~`zehwCIB}INDcUfOrshqq}I{A9;tDEWNnQMD=MC5AvPuH$W zx^`;qm!?T=&(?ct8NcH5;`*W9m(2X_%K7|E#jCgL|ET;evgb>DJ%9DMrL z`ER$wrK4NswM_16-W~gT()M}I_3bQ6r))dj>gCWl)#F_9_MiW5^~ot0T(?@zQaO3! zSDE&$M!TBC940>LIxy?iuU%S4yQa-r65+|K>Eg6dVWokQqy75&nHTo>w@Ys|I`XWt4UkhMT!y_@8ORHz z^RM*dwHebTPMuhlYR!}TFa7&eGIy_a^9Ngm}?>)k?N|c z364tjPH85&y?c*{JFY8S(C1t@-*K$Nz3!aRp=g7W84`=_*ak!mOFLSx-siWIt z{+&5lEw?W*o%+vUA~ZuqD0zyH&)+taN#8UTb;J~Q$UnXJ{N?32VB-ADO$h0r6ulb_Do zUG2T*c6mvV!LqE3td$`%pEOi{QdXG6mSiZzVZUsh!QW4BX7n$V`Fl3_*~W?uGE+Yv zT&8q%(Ozw{SJS+giAb?4x^VjRJ*i**cF9xaW6NXGSKN7g+hWam<^>GAagR2J1WZxQ zW^CwZbPD1#{`a5Z&Wp}mljBn)&+$ExKVBte$mKv&2nd%P}!4kB|OaaiKwx6 zj!4Ogg!-+Z~bcQUgst*_qlvd~~bT1xXf_WgM`B$b^$yk5EXsi4Qq-d(-!m$O)& z8Js`SmG^eurl?h!hpW%ce7nAcp*=LD={8Fz^X2%7Y<3^-e7m~$&7=DFJLUFow^BOw za;8CwsDAua+u458UatMBUT}Sf(3Ifl3o%ibJeluIWVT(tzf)?}haK1BBu~iAu6Z%B z(jxHYHqFreX3K+qwQpUYzwfEuy~WOUdu_LJ>REkJOFWvuXz_r}W=^kl_Oos09`-x$ zb6;k*bWfD_?gbGO?k&!0dKr4}a#rnDdtZN#mTl8k32pVB@`~d-N1scA+qUhI2i}?| zU-Q20q~k->odWf5~?PzbR)^jdqq<=sv1% ze(t}pI3P z^Ixm}+Vs}3Z1G}q>u67#8D5Q=F%!>fh7^6iZ~52cdt>Z@Z@-pU)fWCSQ5Wib=6kNz zY+>>)y=kXziQIW3I&F9IYYjfDmfus>om5q0IbdJE@nX1)(Xrs$(H^GjC*FVRbxW2Z zx54W2@nz9xBFs(g?k!(axwsPwWy`nYg}3FuT`6`uweSO9t(K>3`K1j0@-OYV`LD`@3zmJk z@~D|lcjM`aA$2K&ERD}uYF_M){H?+%^__iQ-IRI%E~>^lJbYce{IA~rH|s)@YhvGT zGpP>t4>}dLq`~v>{TZDH*bPMI9eL)Q_xC~j=U;E5ZbjQwuibw6>vC_C4zGYEOLiXd z3Rd|p$3O4>vWZ7E-7c<<{JMDE0wad=8-HGzs=IB|f^AZcsdCpYB%C^^W?<*>_TS$v zSx4TO=G3mv)RQtWxGiP8@K&y&e_(WR#PTJMTHz`mHCgx$OqNi1e&pXhF@_0GitMH? z+mqV%ctJ#^ksv$SbB+?7wE3L!>rmM}G>*#6_@eg+(!H)&zmhwX8SjE|;HQSS&!SMz_` zdHw{`ZKrF*Uj}O~7k~Nd0D}XAWqo>V>dBuK*DcrdguHpTNy@x6o3m`~ zE%ojW-F{!MCtQs}>?(af1e8p#RrCn?9y)XC;1XqaYr+17Oa5573q6^{_~Dt&#)yB5 zbHDTiUbto(FeSw7L(nA;%>ZMW-!pueKdty_`E)<;fu-|q#jKk9iQ)M9xiLHDtTYMS zGJDt0i9bT*4&4o73(c#|{b#o3_q70iv5OsA$}BV4zx~eQGd-}7OGRsq^lACOdum_Y z*r#3@%GJbG)qkkFob^%He}=QWW4A89Q@E5_mvttCV^Dv|1pf)flciRF+~2nLp|ec@ z)9w1{52J6?B$hHJP1k8ZAhy=aw=s{WInZhSkrkhcZ99eKx3Rxnb>%+;`z?=K_jJ$C zjm_D=HeSS4qF3+PfiHiv=bMU@oL5`;EA7>_1=3jqdP`8ymJUHUA-{mWeKgzjwu9ctZ>J%ifse0n=EysIa@@(^) zEK=P1x`)YM&mlir_`T2FMU~z3 zwjW>N^Zik6tjSyadrEFAUpHlYYpu1@?9{baER&oMi3Jp%VMulIDepAA@8T>IwAfSV zL0|ge+jDc3&K+lc75U`K71c>y%j29XK6^!KYT2IfnZaxk@L{gY(I?w%x6ECV{AbGq z_ig1XoV?l-m!I2q>94fOs>OvK%d#hyY6QfnT6{bCyWirXe|@Uxf`HwV=N|Xhy7%m2 zRL<@x`L+MHHs%HCwPvS&Ulsj`(>rI%?@84&XPjeN#_}p!PLiW{Nm*#v{mX~HFBRut zo_cvnzd-SiuPc^m%vzOO(#w7+I`ZH%lC=zx;Wn-a09d zLQTuuy&9RbgJhTaYM=k7w?Q)Kbc~mRi}r6OLj|7)yrPxNn~rTh(-&Ntxbw?_CCqIz z=Ci!1pYtnd+5McQ3+{+dV)YL7wz>1HA*#A7L5sca^6hQLz17_{VXE80B1@*+SZ6pt z%jgiBcUwjE@*`gtEzi80=&`O)<`tuMsiT5s@f{Tp2AKzSv)08##5fzwd)dKWVE5wt z!bzD=Yo71DKG%EEm#(to)>ExH8MzEL1^FC^4>LjKmYQlC7Y}W4gN;$T0J=f!3u`qGdtVee5je~rw3s)4j z_`2GQPSwBkgkPav(ZBKUAOFg0(?3t|u6g@lx%}VFQMZpT%R86cUv;zUMy!|YzoZG1 z#9nn(tkqPue7EkNz2>!RKGSt?-ag(Yr|_ia4>p}CR&F|e;`xndy|dfzcpQFXq5Rs1 z+v;21(`74HZn?YGbakYsNAAhM4h04uWO1GS z|MPPDwV9!ByJpvB_Xk~GTFO^Fn_1C7;ZxsZ<$XPEx2|21O|{=q`Jdsq7nAi_qbW&7 zRef96dWV7_T;z5MFw)OCWni)j+S-u?d!TbRFJxNi1$cC+03&wmWPO{V55&-L5x za{HiPz}4VgtzN;W*i{98PU2%P{JHp?OU9B&nX(?kI0wP~+X|lszS|lk$D(@R(gn{) zoGhHj1s{BWV%>3WukJ)4elE>-9_PYY!a9TI%n#H^^-@?Sl%nRw!ZG=%3gm&%eA>ScP@^mb|Io zKCABPk5zSbDJXO}@=Ymyf4=bAuq&<_iPQ7Fv^TmP+4(eZt4RK~o23~QCnlcgQ&?)d zvBQ2(`9-69@;es$x@%1N&+u5%>(E=NrjRAuUN-S>mDw@hcKPx7+mbx>LcBTJKsf-Y-BTJv|cMH1DG%tX~@ucj*AGTj)R~_wG^KM)g%?Nm8enL*tKIW42 z(SsJhWMQIgRf*Ef5yqKWF;d+qxoIVcz$J=TA@g z8GPGf(J`szy_z0@UKWL(8ZwnjIRCO7ubk^N?VWI3^<5*!+qXqBJ3TKMH$U{6d-GE6 z&K>jXwsNoC7Cz7t{XF6Ytg3KB~Jdy?SSd*u#R#=kvER-}F>zIk|B; zcL~?h#4X#xZfAM~F?b5F@Y%B;;s5$(vEt--n*`e_~I%9e8QZL;@ zONto0etvIYV}Jco%4F~H#0Jq?^Gh{`0mt(WmajiGOG;~(`-(i1lBo>y&n@w{WZ(Xd zulhejEpN8U#M=g()35HcdBqXhGb^UtH~Z$cZFe@^sXwBz=F1tYn~Ypq^E5e`RGw;m zmOUx^*oyPrJC(Hbm`wX}3*Gs`Po2(ZZ9Ls#UAlbkyl2rdQ#ZCQN>^AhqhaN5{!69_ z)Bb$(iY>4)p7r&}Q@`M<;v0IaJaSuieY(xU6p|qN%%`UQ3cv7r&jN{F(n$++@& zZQH7q2V_-Q4>1N!*H%%fcCa&Gk-PD?bnTMDMRVhOs~#$PGVMH;v8_8Z_)@#pI)A09 z^W@&82)IwOySvQe!bV4viLLc}>~D8ni{2T?l9KNg`}3>n<+yoji@NSD&C|Xap>Vlj ziP1x0u9$=0z4mT$`0Lj>%;6MnHP z^cAQ~;1ujT@OU5Rk(pm&JcDEndIZlOuaCMG9nS9gq*miq+uguIr?(;efh*SYiqGd@ z-t(M^rCFZgPlo&!jmDtDdv^aB0#7vvip}=RyjuLyD5H$|cVDB4nYNeimh)z(vg6kj zJ&lmn(UbLd)DfL{QlnrgqrnuN85gxV+js-N@7c=SpTPL*;@-~WNnax}R=tkjAG0@Y z+Jsg%r;|Po2FZ4p)%VOiBD_#}+GEF4O*LMRwoSU}^(5at?ANimi+37^`)Xxq#!qf} zW|^#3=fhL`C3U@I@02ASr^0Vk2VKq!`TjH1;#b>^@7A%e&hB5a+WXq7CmaQZ6CR2G z3^52+->786R4g0(@bry&cMiQ-9B|I_yuH@ixWh^(RD+!Qxvt&6W|jMP`>a*V)`_er zZTVFC%ITq3kIR%McVQ#r`z){LMQeTd-9GbRfuHs6zuF56Ui)u%P~5^$u{kL2^rg1k z_w5qv_V1eVbivuF=d^7YRK2y>pQ%*z8!66zB<=mh?4{h}zqN;IIZxPY$<;Tcw>f<` zYPkP9`Q@)?y*k&l%(UkRi@Y=2vU2m0RSrP~D$ERw>U;iFT;lFd^XOUUZs+oT`&9LL zXWZ2*Qhr<(U6p!y!-@P4r*b{DJP%A%DN~o-U0-u<`}~g66PNHL|26r?J@3K$>l%~J z2IuS#47n=pH+9;r%$Sx5lO}mNvN5Q5C{^?QShS)i`q|5nA9_FK|1?;vo;bJV?;Q8Q zSB1{+qjr9Xl3BE5LG(ff-90LNA5SH?OkyiyKel!AzS5ovY{t*(meh%))PyQcmc0Cg z{c-$?+kWB_(WNJxL?#OyIvDX@B~A3kM23m?3|G`Cdg{rz%HPY27XH!baI(bbtV%?_ zJmbrs*Y+Md^wmn>mbUmA4PD1vjkR1_e-5AT`6*y5Wny}tbK9GxNBSxgg!^L}`*SWB z-}9Wj^rZTKhKFl6yZp?*vSL-~t;Mut;lGu+B;?j{rqoO;7cS z!gFQ{J}c!t&fa&=b%*r!NzXqX%{`(o;<4$pn)~MkXZ5d5kG`|z%RwH^*6O{b3%xZP zZ*MfdrBY#)bI0vo#qYQ6A5%{C`=pB~aa_wg9eMop%V^P%n0YHJbEK9wLPI~J?XP*D@etTPG*oBWe%QM&WRi+l1eO&Q-u4xvxSFFSm$EXQ0 zDnec@OBjp|4Ql*zH6C62cvdRJ zr>LpQ!py?__&r0DolS4#jyaQbxa5o!mG(JDcz!AzQ}cKv;C3X2dHs<&8gjE|^_?rW`}Qtmy-nAxmCM)`ZoBR6 zn&EHs>5RZSMTdrZvt;&zz0z6VUQ13CjGXsI(w;ScEvQ~o{G;4s*|ooE?t#c8uDI2 zWuXVFl7nWSvPx&*F}6ZuHq$wQp1e`d*cw7?f+b8FI zZOffo@3y?mZ11|^bs%bQe${8??H%XkCEl*&+}pU@+`{pJ-Pg#={gNWuGRe!IyM431 z_U85y>C|(3KOSIN8298rGpFQ-IgN7mvF2NLu_Y=^ivDYHrpm@MOSj6#CCw_UTln_( zR;yLgt1{MR{b!hSxw&g6r-W@M!}rcPDX%gvs!JtU*r^0fX}ub`xl1c;(eH>?_UwsOa{~4yspSI`hpL~PCRQo`a-kDP&ecjLZ=DC%oTNfR5 zv&~vid0b0AY1-1Q3@?HjR9%kDVW|1H@$Z&`&7CnjnOS&`8^3npuDBqJ-k%I6E zKL)#WE349oyQRf>wOP(3y=t9oS37fq*B-mew?$OdMc4AgV`JOnXX^JY-dL)aQhN32 zX}PO5-8W@)Ys4-*KD9hGqm+|nE$gNbr|Kzxc%C1tyVWI}_vYnt<>Q6@c_F+<5AQFW z%~1LN)A~M<39C{LYXp4fcruwyVN!vM{}I9O7sGn9IXPB8Ztj;lm06->W?g*Z_^Exe zi3_-{T-FZqTB>HLuD~H=Q*-|9g8Ut|3*PO&FWuZ1YPxKT)5RRtnx)HZU9As3*;T=m z8sz@7Em@HH%VoVo3=6*eX9z2Bbv4;>_xt*mZ4u5(y%J|t`0xJQ!NhR9@y4HTA6KYs zFm#iR=6-zP%5~vy9Zc6wG3d8VSuRpBRbc4~nMs`X>at&jJ=U$gxjlWJ%lrPH&z+~2 zDi%h+kt%6j-dSqHFj09T^X(5Rm4E$j2j21&34B>;YWVDig89l%Yi!PNs&%}OW!2M7 z)wZel#311Dx$hqTji2wAotZTuv*xEzT?^Ndv!`uVPEFsvV2!F#xc!tRlNiHi{<$b< z@o`1kv>V?WuetloKf`rVNwwP4VtLz@eW4+YFP1(jj{NS~B0QOWr_1?+!u@kSu6y20 zNPjx(?3R{tcdhyxnc1%+6c~OnYO$)ERI_frRsG|@HPQamHN`U9j$hicYeEy#<*e@9 z^|!X)&DBt;-YLW9RMG!d;Y`_2E72c4fg6|;Z0p-Ls%P9;o%ec|RsE(JpgYCw>)o(664lY^sdavEaZ)ZDpdv02*7pvtx zqx44ZqD34kLY#I_Ch;WM=lXxExbe+(x_$ifW!Y*W=eA28pBj?;@Vf5XE9$bow+j}{ zYd9_?rSdDoXUV^OEv8~Ei}$xff6w+5b&i(ReEitWu~Jg@;^*gHR>vhGze+gOrkWK@ zX@0f*iLg=n-)i2h?_Pb@yRV&ANS^yb>~m3;nDiHyCa;isjsFab_We_i+x@8QNYi7M z(sNpzGOsS}&G0;Zdy43ptC~lR8XmRm?hguXnN)wLe~)9Yt4ybzsLGXn$@Rs-n+=&y z&9m0~^Z1-;!PY(Ay&D*U{GYJ1-@W0M($oBL@$=o=dmB5;UTJjX9yeR|gwN-cP)xa%T!G5);z4gq(CJCS8S)#ElWC5Sa5szg)?%&g z_2wxtiA-f$*{iX+E%U+|MytxcnEwn?Mv;#clZB?KuMOc_T3Ysg<5c%;Yx$*?uPght z+xSb}B$=#zZA+^BOg@QC^|EAG5Tv51+*f+Bj^{^0A*W-BR>PB5Pq+OJ(o_AY(NN}B zRGDyT^Rigi#9b*luEE!Cn#W4jZmnBs-=95ytA&*U9&ho_lQBzPTDZ!>;Xk|DWOMvxR3}Dya2%DFs#-uvog-mwJ7s}r4kqO&w>OEnWcqccjm^geR4_c$GY;UF*koB7C& z!(Im@8s)72O|f0%@$TUFc(%o_Gd6aGw5M)&)C>x5nq=zVm-o0)pzK9B!| zPv8Gz*8a6;+KZ~UuK4?`_qtRr;zZ=MvboxseO5Ohy1F*SBW z`P1zWqqOgYeG8Lf+Iihy!IHstk(!77_Br*r5eW~4Pl-k)ot}Kp=7YUF@5{AEzc?m$ zhO-}Au-ZK7+UYsDIYI$K(NUd@DsIIeRv!HL*z5jd?~v(l7S2ds|28lteoN{ymZp7d z?Q(aI?%FPtF?(sIccIEQ&#$3JI#g!L+1>nG5>md+oN=jJuF8{s;rG$*1shk)i~FFS zFZ1j38jd|H4@uiC?l)=T@N#fenO|tcdEofW_Y2=w^zLjnFg~(pn{wfNu6eHSg-x;w zjOXxN>1FzO-F#v=f$!#f=FpCcBbS8w3)|%1s()Ov z;a+r<@7fu2e)3qB`{r0_yMQ-P%!5r{}uilBwYJ z>6fQ+9^0qjp~|MXvOkE`^tz{$^zzDSA-krs7V6D&(DQivn`h=R)8(SAjXK84+aA{{ z)NJ%se#Co2$W8j&+7hE>^U^OS@`+7j?g+DJn&2)of8Hf_nQA}N=q-D9O>}As^>aLV zwcq(%$Lwx{0Fkh$Q-Us;thPoWmrfn_2%ZppTcz<9gR;tVtNC3H&otgHn(%^MVP5-3 z%eB3qw#l|VKAimduXftS;$^-Gi;JeM)NK%X-K`n<@Lj9Re}+J=pUOfe+_C42HJaQM zcsKbi{Io2vP~*nmIqS=RUZ3l48gzN7$+jipFPCO=7p7=5sid)Q@t5dle;#$?T#(z= z>HAI;uAX*p>Y8mCKecpY(RPV@C z*FMAMIn6tsKPdLP+#w#YdgB`XO@~4Rr`jw?sY_aJ{V!R_!&v>~mB_wVmWmxOG*2^& z)Fduq>|FZn;dZk~jzS-6pHmepEN&c-WMMG=JlTHbrRbOIuAPlM=awxLn;m!4jp3P1 z`GRc|58m2cFRFNCLeNavX%)?e>}(v3cjiW}{V`dh$)i~=y4LOMl{d>%JB;RJeP3x7 z?W>z>8zt>8#xEk$G*7_mfPzL5`}|vWznVL=JPsZ&bKZ9Q`JIc|mA8()T&{fP`N!4e z={d10GVW$hi~Abs!>8c2qep0^ZKIugFLP;DSg{0y0rLW7yRXOf@^3y(IFOQiBIMGG zc$0HRAH3i0xH9SGOqt*l$zSL1dC{GHJb3>-=Gs<%(fv2Ccv#B({AW5hRj-1CewX@BUAQmqslR5U zvG9$5yRr|x^-uq{SyS*UvsM4jmC+q0Pdw!f-$r*7?bWthYOCE5(z4LQXQiX7t4hZ~ z%l{0YmmK@WxH9e3zJeW3-*=_-q*xp{zhF^!>AKH>r!Vx~*8lQ-QT8uv6vVqVywK zC-f^CT6s<}F<8oezcqF1!-*TTPTbbr>$Nn|Z`UfeYtnpER$f@DDB#C&%+C1sxz*Ec z{Jf*v`odJceE3UY*sZ+S*o=8ac${n{g?<{-#N@wAfQwbl{_dJ;>IAh)g(>>cK zuJO0lJA3-n!mCS`eXX0zgx+2VyOnuMrDg4vUz);m)Cw&Aw%OSo z_Fi-5l26Gt?Ub9FEW?EEs|kd?(9GGv#ib&A+;IMkAAi4^PWe=p<*&ET-mbJWpW(jf zr&-|^s&?y5K6>>V+PGK#p8w}uXPu(i1jiedTwff8NgwDoBep@j|12C7{+F>h}R#jnm?+rSo-xms+)?LLLJCo_~M zo_xbK$<6WP@niX_eK#zx6<7vb^i$@Scp3cmZ##?|ShFLs* zq~u{CX)(!Te@1Vo^1O`K(YguKY*x4z)Ovn3F1oZUuqt-hJQZWNTY-P}DRBl)ev%;A z`0I>kx#|(uE3D4%=bs6i;BGDMFLP|_ z5(JDV-&SYwFkJbrqNdz2)`7j|?X`xtOcT!UTxff2iRM?;Y14KkZS`a1_HS6JaI#-O zC0p>r=ez^LdnG0}o}QbKH0i$Y%U!dJzsf9h-1MJeqeGeHiKg@?6YqPTebQa>rm`<2 z`P-=?|FY#8_x48J&6{g#%ILE+=vFA_&zW3u{`dEFZhY*hs!;pb>A*A3O?TRLTRm@o z-}^l$=4HOw>8sbS7CBdx^?sL%P~W4Wl^ENh(xf85c*Q&>_ra9TlUDoQnH@jX(Y-Z| z*}5VjWZK(bZ(f~u`4xF()~j=yzp@>@ylZv&L;aAmYyH$VE}F8C_k+Lc(irDlJ}wW9 zDVmNfue{}qE7rNV2fpGwu=w#C^&Q*`*y47qs|_tpuKvn;&1!MQtu?u;o&7ClSH-ST zdvbMuZZ);ZGo4#B2(!P)30a-F@>z>6_En2LaCscgmqn65?$t&{ySxqYwJX&yO z?OBi8x0Um=+HN--6nPes#Q9;FZP>%5c1=^=`F4pc@#^sOI5Oj}Lxq3fh?sdLDiw%1uwIZaDimLF8J|NO|LKkdEM`?tZnPA%u~v(5g0lS`UQATu{H zJykzBKPSJaxF9h(RfwI7m5VvZ-HA(+iw`21mYI{9mzbL>B+ezuC7fqqZeea{VQ3I< zZftI%=M~`U&LzVo22q-yl$DxXqMwwMU!h-_o0%uXB*cJSwjwvN0xT=a#SPJ0o|#gT zA;c!d!o~E3nM;{V2qK)ElUQ7=UtF12oLVBp%f-#b33mZ5lcDxXaPdHlNXswEO)L@O z;9}!q0lQb4OAsOhF+3+RDK!UT^I>)_SuSyiB(i59N|>Y=xWr+>lAMv4mzSDTEW|3s z%*Dw3#`*eBwR9#15XcDnC&{(w94D6;%n2E(nduoN5Iepzae2c79_+i4)QS=zb1qXZ z>M8uwOPa~Ru)URmLE;ud zOl1KB1Ahwx!?yEC{t*MEmyD8<0xNy}^73-M%+zE(P=LZMBB%ha5;p_MK0T0q@UT;G zNli;E%_&g;rD{+?SX!h2E(#TN(GxM*8j+lZ4Dp_^d> z!&HV@4D%V5Fsx))$FPZEJHsA^gAB(QPBUC!xXN&w;Q_-_hSv-q7``$5W@Kb!XXIrR zW|U-W;9~7V6ikBnL?OinbMgGnW~stnEIGzFfC$Q z!?ca*Ak%54t4t4=UNe1VW?<%K7GqXq)@8O}c4qcxj$%$@E@G}@?qZ(Cyoh-n^DgG& z%$J!TFu!5`$->4W%%Z@e%VN#q!4k@n%u>iw$I`&<%F8Ol zs?BP}>d6|xn$B9z+QvGKbs6hc)}yRfS)a0gV`F0zV^e1{XY*i-WXoc!X6s>_%eJ2F z0NX{j$82BO+1Vx8wb^ai{n-=QOW51kXR@zh-_L%D{VDqo4sH&44kHdXjwp^ijwX(2 z9IH6?aa`hf&heX5fK!#ziqoGng|m{gpK}T4F3$6uPdR^c3391(*>Z(&WpOof&EQ(c zb(HH4*Jo~SZY6F@?m+HL?gs7|+#9%0a6jb!$s@$0#pBEq!&A!B$FrR00MAXH&%C_6 zYP=4-QM@I*eY`7q5A)vR{lO>9r^n~Xm%>-iH=A!8-zC0x{G9x%{Eqx_{8jwZ_&4*P z=YJ!>DWE3cERZNrCoo%Jr@(cAuY$sYhJyZrxq>}{s{~I9z7S#;QWJ6&N)c)iS}b%} z=&>-fu(GhTaI$c-@Dkyp!cRrmMASt*MY2S?Mb?U(6Zs%2Bx)iWCR!mnTXe7J12JYX zH8D@I9I<|}O=8!?eu>M9JBp`@cZ#nQza;)$LR!K>B2}VGV!gywiC>ZmlCF~3k`pDj zOWu`YkCS~5X0)iR4^&d7X~m6LUsEs&il zdsz03oS2-0T$bDvxqWgk<%Q+#%2CTwwZRO z_FV0YI;=XDI(a&abgt|2=sN0_>8{p&s3)f9qt~RjUGJT~ihh*-ME&FX{|rnGatxLj z+%^<4^fGKR+-3O5NXsbMXtvQ6V_su7<9g#A#ve_zO;SzfncOfHHuW`aH$7zf*Ua3k z$ZVb2OLGx7WoA`kwaMzewXSux^(yNZHtII1 zHcM6+Ag$xV5eZ0V7I{TfxV)AqWvQKM-IvkDGtjVo;zwfW;w2LeCK4~ROGbP z>ASO)bFK3M7iJfCmoArcu0pP%uCrY4xhc7&yRCNn;BM+(?S8<6)x+CkqQ`YlSnDWSmr7PTbyj{`kcB%?Ydt z;R&k}{v`S*E=v5GNe<}*8Qg^x#v``VegFI z|9xqF=lae1=S^UnkT>DlMEi*=CJ9cen)Gd9#))8`nJ7%iPoG|mxtc+P#XFJVaKSyRx=bZ0zQ|4Zp=P+;GeA)Tk^M5VKTySHd`@(ID z)D}%!%(l2}@v|jiOO7wKSh{kV)Uxhnf0ySif3PBG#gUcfD_5?PS=GOqX?5x9muq6y zTv+R}cE>v1bqm*vt?%Bzu%UFrtBna8uWj<)bZE21=5+39U(OwVjRYkGF;IkR)y z&YPd#al!J!u8TGo_g%8TbnvpvclF}6&}%oY$6SAKBjv`6n>jZ>-YUEG z`*y<});nEy1@2C}Cv$Jnea-vpADBJZ`_Sd#nMc8oZa+?b{Q61plRr;ep7B1L_FVq? z$`?j2_Plg^dEr&`tEaCEUjKg6_EzxioOc@Uw!C+Ef96BPho>KlKK}pI^I7uq@-HS| z4t)*ydhc8Ax8L8pen|dU@zdhx@n7M;p8qcY!~SQ+U(LU}{(1kq^FR0h|NplcoI(8z zMmS(%VrBvX7G`E9W>ywfR#p}k7FITPHda=4HWn5(PBwN95MX8FCftd;9 z8b%HV&i_XkTm=}I7@1g^SQ%N_SXmgEnOPZ`m{|lFScMb~*&Nx0l>!qBMT{CJE>w2f zc<|x}4$+{8AH`IRlbpp>i#R2Mn@p06Fa1BlAjb%`1>73~S;foxc>|av#X=)z90NO>zP;d?+cONw<_rz zJ;Swfd92;F_6^6SH#jUWi@tHY@%Z(>{|X*VV6dv*Z+83a&Lf^(3Td~$wl&$%vlTU3`nxG^i>At?=g@;haTVW>Gx5`YMkBk4_W{p`C`413Ito1WiZe@|HY@Jltr1+r!eZ$DJu{u#ZG+029Mj@avS*Q;&kacx<-X>XLs z#$X*iS^jCfjki6-ZZvN{+J9W;MysZ(Qp?}$fcu8exA3gZpQ*O-bjPU;TC3!9*M!tX zzss$E+P*4g&zZ=>vqE3zg|%|3-IaJVFG?!orf6u}laJN$_5CkvBsCSEF`PYpfA;J= z(|(m5s&CoUgg$yO2}p=-*q)@DczgfDdp~-FCn}nXe3z@)`*jb8LeHVS+1EaP@?Q5! z(Cd}f0zM9#<~twvZ)5x!88l~w&{w-%rLS3Lr-`Jkzwxy~tK*hK7)wv2S7UbB{_XO% z$0a5#nILlY?(3I+TLhRUo%#5oCpa^kiTB&q{$t^mhd$F{$^ZiaeEm#9>u-@3b*Jics+ulde2dud%!nU$Y9-5V=v=I%{s$niImJude{zr>SO#Zu;9ecGiD%m1l9d}Wjw zv@mDEuLkz)=-hwxd*7|sb^U6$=P%n?#-Ny|eY+NK$v8SegVA-H`i2cVEF1(xLe5`b ze)-&nNmm@ytB-q18|yayy0h_44%>m`qne?DfB9cc-%_o}v`n_O^-q$*snj{ z{;lAqOWG;R>rWl`Yj9|BZMFSnAh71vdamOqt?w}Z*=LaV#DL}3t$hJYxVgSw_my$x z+*0MBFo{vsf8X!lwU$5j*-li{b3gg@?`yuO(3002Nt@V%R`qYx%@*DJO}hBwHZ~!d zpsCC6Pd_KBso4FW!G25mx4Nw6B@+*9SNi>*p?-Rfte5HO?|W=iBjYSnIAj_km{0xv zcwBzk{lh$(o&nN1bP zZ^yTIZsG51SxmgGGXJ`teLAD3Pg~4Q z|Mkq_Pn!4EURBUon47fB^~(+6GUc6nw?FQ$F%-HX{HJko^+D0H%c~qtI!fP6Ql6k& z;JbJC{h$B*RW@At?cSsvp)FGVv(QE9#>BI2zZ-AozwhBRYBpNsutVHl|8_=T)~stH z+a|Qd3JE!=baQjR&#%7!a+}o7RMj`En!jIuz9wm4`cU`DE{&oS*Z%16OMiP_`0?%L zr&F3dHkf~TdV1AC;h6_=6H_NW*rIzvrMvg+PkHm_9}Zk#7MLV;ZT|0m#lH{n31=jF*(1#cf+5B3gY?6 zpTFhL-l7z;WbQ0G!%ows7o!dc*0EiCyZ!h3wr4J@r>njD^X>BMLN4AZtDQ`aEnKom zX>H?X?mK_V-XA|Nk>TWc!t!_Kv)}y|AM|eRpTq9Ze{#Zcp34kj2iN(Y{G;RcpW*HY zlc_>W)=mBScBjqPJtEUt)Q+(}b5rJWab4$-8d$hry7<=|%c4&!C+U0byzG#@eZf1FR{N-YI{xfJl zR9)HNAo|O89rt|mYc>-Y&y>7gePHXPYnQmPzEQA_3*R7rBiNM-CS4OZ%>iv zve=`iaiBfwpj3lL5^vMnqkZhgyI9f{51YRJWB+yyZht4%Fwo{Qb8e*N%V$#V)n%3kHyt*Paf!E2xfeE zZR*WW)*W}LS%padnC0;V-ed4maL;5iKQxk{%zmC zeZ%4EIj7F?-@3D{x@|?phUtbbcRO$AZP~uZAS*$1nd+&nk6rg2`~Ku@+WnW=9~(qe z4|wxm{qyG7@BJ1Q^M6_W+gB(Y%Hp_`No_9UmAvgL-7yT-1$h?7cTb6&?fCln>e<%8 z-bbg&Wikjm=k5G-O+iaz1Mdnj~=%t16 z`zEU$4%j!lNPp{Y;r|R4_on`c5IA4W_4Br0Pr=I5tPQrMpW6#pgd{2ZTb35pdR^Zx z_q)%|AoH}&Q=dPxu6%y&(YIZNmp``Z@UG+)dzZ8sJwL-DcWv+abASF;M{K*g zJG}68K`z5A=g8aZEsCEpzkIOq%96F6cj69jT~lXpQR`;=-MyCAr|CM*38&au0?wZ4A}uTKWM#Sht(7it#w z-k7tz-5@T1`rZvKM=vi|uL#-wS?-X+x1KB$xF-5!5Lm)rK;hcyim#`OmE^9$Sdg*^41V9CFB?NY_1xP>z zy4kK5D*k-EMp83_X)EW2C>2$KI|`SJ*ssqq^w7)v^Jn`lk<(eT4^BKe;q}#QftPUX+vcQ-z2O;c5EpRnI9 z-a3qJgG2G3-v=|Fc&hX)zN6cyBJauGm;Xb?(DTDH6T7gjEA zUT%V<+pqrPiTUpzX!=*gZhG;%=HCa_r!2g$KCn47tn1`|uguNB{>@+RN6wk3No$tR zx7(gEfiaRzP&-p=O`M6{1c{hF-TLIuAK!LbluTusaDH|4&rcbiiJj)JS2;~Iomy+u zzCpUXo;&yc?bkJ2QA{3d>yo{C?RSIzDQEhmA zZQlpIk3YYD?(d0en7L$mpIhK}-`)c^Cf>;1_~)36_`_3k&2GHloAYnO!YIQfnx33X z12#onR}p21d3$@(?Z*ei&zOF04Hl06^Ec6>!|TST$t$OB+fbai^POJB^BPXC0L%Qa zCs!|D_FZ+oZKcoS(+hc3qb;4vuO=tgwu{+!{7TqVf=-#uKXWc^`nd7;>D=Q-*XjRw+t%L@*n2zq@_|bXk}cBT%D(DNdH(KA z*+20=1xF@1@*S|hwb!IlkJsv7<$=I%oxE4ycSv>IxOb;k&)!Ee+i613&= z-xm!%m;Btq*)3RrJ}wvrj9besj{T&I_DUKX>kb zSorbtF`o7+uC1%TE!ckR!+!<=hWzF68L!yby)&;Y(S2W3@!(%X(sPS56P-`Jtqiu3d;DB3VVOdAywl%I z9?mneZX3VOzNDg)w7+)!#+|Z%`=7pTJmb9b+`fHtzJ0$ZZmuux%bQZo5}I1^_8IT` z{pYvmZkQ~hCN$MRu{H0$o6X9nLaOSzrQ4VA>~WbAEHX*sO4N}YzTe-z{b!gveOJ)2 zlu1H=X8yH3*cudCUGw_S0SR%Y11A#`a^LBnKX=W3x4t7d`Fk>`#*tA0)G&N2(kJ)z#ecK5@uJ;!Es<@&yl z?GZFK+I{^sr?yOzzHvI!@$8t}J7oA@{zyJKV}Xo;RDDYOzjv-8Q#Iyn{KV$5S1@ZM z)AL;41l{eIdn~#2XR7Sw=AK*nG+xVn%95!@3-^5J6xp}b>}%B;rvAgP-?l3yaup{z zXP*6=S37G%ijTsE*Y}jQ9#=9gYfIm+xORo`iErZD_it13p0#MxlKxuFzy-BmFR7i- z$=n`yNO?(bfQyXLH19U{21hgV`@P$LJo_P*$}(m1)&28&%^6&lJX1e>eT}inugI^b zQYL?&{oVJC+x_15`pZ0LAL+Wx)j$1nt<)?VPvb+)4Tqm!`*3a51c9`k1xdBxY;QMw zxqUy+ZudKbwz@1I@wBX8?{2lPo8_qTi2G@%d(qj)N5ioI^!z#+KS=Q@_V79>FJF{b|3k<%T zaxP!;qb_MmdX@V8D@S{pHmrGaVSC(MiPg^pTZKh=RC8w?$n)*}ms`Kw-p75$6qe@! zt8LxpIo$g9NN)odOXGsAfA@DJ%NV+KwFFx*CtYYW`*mM*bAe~DpjTeq$?!A#9z?0G zs|>98vQ+PR#oT~y4c!gWb9djLe_A?a^&uw?-Q}@a`7&iyZttpoy7aJ?9+R9Yvv%LR zZ`+R_&;7RH;4{@qr#rIW_D^w$Sa|#V>kD7oUoG~yCOD;=g+YLK@9w|$w|Qr6V_bi3 zp8vM?>S2x&V_c=?_n?U-J#w;M?|I7XzSB$yMJ9hx^T&}gbV)rg_kE_Qn-k-mY{q9qJz4Q6oEk5uZWu6{i{I!W`%_{F{Toqz>-rs&e_vbJDuD?Xqvm_ww!ge)X@7lCO z*EYvze=>Xgvi*EOR>-Sw<@-Z;7c{g?iuZ7F<>usDa718bg46rsFK_RYzB|!nr^o*3 ztBq$DwfpcH{dxP!=HG;GMo!k!(V@5QEv{cCDlFjPxubCH_Gh_*&#W&^=v5V6b0S%N zs`-Wcr*GF6|5(Mj;@Y3v@i$vr<2Yu{?B4!ac9P@DFi(c+n2331JM5RAJ%3Efaiy#O z)YrAQpXMg2F4?^7;`y&z`Z~8wti95|{`w31Hc3Oz@E?=o_15z$th(Us5)_u3t7z5t zy!(CPjrx6Z3s$Kf$n&d=56t?#r`oEK_u_Mwu3wRkZb`SC`W`pdzn@d-`f732e+K2V z4~j!%7&#`!x_&Rak*VyV{@eEFoy5+W3m4pbWA>%})C~Q+{T-1#Z}>Xe7U_uGTxZ`e zk>!@pz4N*M{JHIGjQ6}e?#q>QEtSt>gVgSqkL&B-By@U;Fs=VAn{)BWia4IlGaj?f z40g`qv6MgayZBFm5T|eVQHc*vx*qqcg5g7EQ=k8b~GD66`%bvO6d z_?e&O3Pjf};!bMWw&WV8l5h0HeUEQX{`oL~LDGMF)X&4!C*;-O{fa#@YQ2pGCKPe|FubUO0b_ zCZDXs{=Wb?RWSJc^>Y0j0cMB0l};A}qbwtu zrfoaGWG;K|-hYPkEOW|tJpcUZ_tgbn(LKAqZqd+sA*6L;bv1Wh$nDo(|GjNI7s#%- zQnvcN+?m+^x0jux+xop$uc_wK%QN5m{ky&N3W7^NurF4&f@Ie z?aYO@fB(8K7v8nRLTq`gxkjZH+uoc6o^xDxJ%ij=CxpEJ_5SOT&Xx(q;pgK5+*!rd93=S7q64E`E++~u)BYr zozwIApZD+mA2_KrY5ffn*3?+$5-wc-_V@kuXa6%i&nQ}MD!Tmi>@61RM~+-#+IToE z*7bVfPUVjq@Ap5b(_5A;a&fO!)+c`csLnMavxOF9M&G!bn|ObozxBTe&zX&9{`@&_ zZ&lo?)|X%V>-TZT&-qI)hG@t{_paS=C;#vJA`OxGqG#t{TH33<|C;oN36C~RYd^cm z(#m5utN5)yh5IGq&#KRq&A!l|rumWkkZPxMWnSQI&#Pw-su$LGC*G91&wF&{>*{~^ zwkw3cYO%O*M2Op5?w9`ZI&hWXEiK z_fsDZF^6=0eb4meh1rCk)34|s-+KJxx7$4xOEVJ|di`ezuPHI_EB6VBq9C+BDad`rpxE`;~(F)2AZ%PG3#~yeudesGlONC+FkpP9G9ZlCoH%#->&1=OLpM{J73Q+nlvlm^wXpIb9lF8Ze)llyuDu{D{V#l*@w#)?w?w=Z%Xr` zD(0O_T=w?#d{$!k&rt5&Shn5zWBt1yJZDbMU?@7jf8IRb)A!U`1?Kagk$t4J$+Rq+ zK_s^*ci%Pj8~3-rZr@<1()G#KLS}2XB)4Yrir0$;O!bskeh%5dpkemqc5!0fe})f+ zPgIvaE9X!9WHV!~YvrCuPq&n(%yqRj-@BgX#O2DyWSNR;)s>6+PUwr`*ICOzh~*1F_wyH!j!pLfxb`FDq0oX;=`#KE z{yaD_r)}-8=W?yC32RrHEL9{a8S!56@AWw+q!2~{S2Zfh27So-kl zOvloIB{!yqZ3x{d{wwPKato79ye8lEoj?6_NpkI$e zGX8g0uO}U1pE<>8e}=;4d`l4<-8*alGjNx0`_HgWCM0aJa#Y0YbG?6lIy6|_DXo*th)teT?pNq1|k$j!YQpR`9C z)Uxdsa4o5J5;~&&t?&3tjTOC5Cb-{W>6R{kP=D^)@o7y*T`F2vXUUsCPktKlY1M;S zKaZD-3r_t0j#KS;-re{6c%}+1Vpy^5x14WAQq;_aw|`FD;K_92JJ(X(_Z#=!kAA=L zkD+XEamD3VZ*JY5!?I!Ha*J2HW^!w1#d|RJ-xc5X?biLbEpd{WZ2P4D=5D_vlO=J_ za)n{`E8SYBGk%Y^pRYg9=he+~u>bshXF1K~vG#8n_s>|>^y<*JD`rY}%KtOuh#GZx zcq+`ddhf3twXg8WG?m%Tjj~r5wKdg~>Rx`oKfB&9|HKvljtRg1dMtbHweyxl*s1Mt zu@dDn-ZDN1cH|wbulp{h@}chCkDNn`7Omd=)?c@r_j|{yfS}ck+|rR#UhmLdIqzs) z+2iWwwk4~Lmvt5V`MK@4?SggL*F$b`uh8;%b2MavZ|fI<+^yd>iv8SX)6(GJ`sMld zJ1?2|GVHVeT~PQid-b~Y_ths>|GwQ|Z~Z9fP-FQ*k-IzjYQy9Lmi)0T{p~#A^ctO> z{0#}LZ%;nnFVSzBmE^F5wc9G8Yn?KKP>-XuoW-5b=U=w@252W2UdW9J_}rq+8rq~e z)%lcfb8&&K%P01)|8`GU>DqQ*uRv+aqzei)TcVF8vmIzmOfuX5{a2!zu4|Wn{MGsy zmZ@Uz9*@b#Ou)oksq+{A7z)7!T*s%6rzJ(()E$m?zG zcDZYhAJ?ClEOJywtHgBQF-F6rV;gqrTsSHqV9Eu)Ez5n^O}O(UBJ6%`*{cb%UQ1r3aFt(P_kHKKBX9Rj+leG^M)Hj94fE3e?NG;M&j&B^L5u=*Cx4_Pg~i*>7lp$ zs^Sivx=Qmu=O4DtnBP!p@z+DlPV+AZ-QId^-}P_JihsYqUgfa0)oO8q@4C|7w&Q*E z_AgtH82{MwGH&%--o$AeT-j8#l6aagd73fE=l9S1{h%>1`;70G=YM~?Ph0-vZx~C& znt)fVIfp~fo!*~N*ZiFC9K)CPlrtXtEvsp{_HQ;aj!Cbajg>59@_yQ8ou ze|!GZ9}jtsoZ*{xZEJ4TJ(hIOlJ z51Sr6c>lxWIfe`>osFSSB(DFs9Wg=3z2~0YETfB4T{p{is!#CW_wU>BrM_-4EBDR$ z`SWw*qO_>5*EU?R2&oih+;PcUdj4$w-VdfL*8JGN=?nAuD3=A^+hZ1l3rn1u8XBCm z+w|?9{g2rl1GwC`$Eyhh8Z15jPKqUEV$7zyV*eRxec$B2-FNqc(vp>?IyS4nyl~#V z;GWiT}}h^3=Y*hrC+uf#2$}7(b{pa+*c*)0F&pHvM2TDFW>#-5XicE{j|erQZHX~ga(v0 zZri+4WfSjKp-P6i+uv`y_ABvn$b_3AG5OzC{`%ac@=E*q=ZMVQvcwk(*_*oG?j05s)zP@{_pR6b zGP$emoq0yR^%-efoq~mI^z?4cKFz}ViG*Xyyui2KGB$bmt8zpKfCK^h{Qxa2YL4EZ@+fE`Ltz{h_fN9<0S_xzi&I=zQ6zR zwvg9M$!X_)W$!({K*!Q1%FE%@(`~O83S2t&ZG!r=Ro^zt-w!Ik{p{MG+`5NqBCH{k zf@4jV+U)h)x+Zd2Qc$eF|FnA-xmU%Ob>_WdRy$&#^1DtxzRuw0r0h8t?|!-X{>!tE zNghhezHfHsRG;f%Xjn0WY2U&PTi2dHCT&y|P_*iw+xqZMj_I0fKmAj#ZI4Nr6qXw2 zSUpwV{P`raV{Ow+{xjT?j?U7rtqVN0jAcbmdEU0BEj_0`PMsR`(D#H@?K?Z&qyHIt zKPv3VbaM+d!)@ez3A{2#vA!ZcmHRQ@$gV!dHMODeOHuB z3Crt?O|7+8lN4PK-8**V`}5v@i%;?zsl}h}uYbnkeCO-+&{olagPr0mLgx0)_0{po zAK$jA3j}cqabJ77{@S;ea+zJPCVp(#vsP+>R;|{yyJgSkCz<~(c=JMob2t0))0Zb) zlS!Mi{;wl5ulDyDeB}!LZ|bWbhUqob>8s^d8K{}}FXelEaAEL>q< zw(ggcDy!P|_8^vs10nq$u`0K2`M%xu``fSlrjx9xo!_mpe%-&vG%>8vM$KF(LX7#6 zBlE4>eb@ItZ}XhNp%U}!LVlEN<>AYQm)z#oJl;M@d1dk@{(D=0-~0Wb$7@q-@PvB* z_zW+>s)Hi?r|50lx%KG5t#9=Bu08Iq*D=&!f3$2X!_`*LoxcxrB%S-P%%!l_SLqJ- zn{DF%)bHPB=3qT4yX)^mmP<2=mo_sS>=3ib;lI33rKKYJ?<(ojm%cw`m=U;q2z)$sg)@j8a`$&2O(S zZw>F-?I@}zhqVCPeVIPSw=Kbj8n5pAuc_XnGAaLh)WOA9mvXBViau!GcOLpM7n3&R1Hu=kdlBmtHj{>v(nN>DDLy=A9yKTfcw*Azf2v(bh2O=}+dce_N&Ab9C=~eR=&9UJ=&xo;6uI9xA+U zcjQmcpTADj$Y_Px?3#UUw+~-8_@lM@tH)!bS4OQ2>{q{+Z9o1(ui%M~YSX&XsyA`J zUh-+?^O#QbT9~hMqiUVM+0RG+8K%jFPPz7H>+4tFn!Rt#f5&uM{msQI0vtOPr27w7 z=M)^c8Xx*o=--8aEn5}LLX);_;7!Wmd@y(K`TD*a1rF+(s^{n0+N6U&ogOjrRwLBwK$TgEw{Ig@FWa}EqhTHod|K49C=>37`0K09y!G8u0 z)`fY3PTI25p6LB&V9#InpW)sQsqmRDjZxfDe>1+dEj{~^Gc`o2X|v(peYNk;-TwCT z+rgIWp%QyFpZcyj)^_2;_9fh2tKE`&_OC3n?QMPkrhc7haYNDKptsevnQIpYZJzSX zOu&~#f1#6MY;UJ@b*)}KcS6Pt@2Q#BH&^Yc_}rqw(^aa9jLgab;}vSkl8D*I4E!z?)<}kOh#a;>hxN(`>(lDd#)@gG*w;4oca6q`<*iV z6L0g(Tkvke<3)dN*H=5oXx`bv;dZc=@ts)!yV&YvOf0I-sb|VTS7Z~rrx z9T2-Y_v_`;dD{iL^qz!n`|Z@$#^jr|O6~{ZCuM z8yPjLxQjnYz5Lpysle`I&a0@mT9I=}uhLTowr_v>etfzh)T+oHAG=rb*BXzM4KEJN zGg$Fkq~LzR?2S8aZ~M>i{x(bC%79y6zW1kp47#rse@dET>qf?Iy|oTmZ|~33egE~_ z;R#z>m@*D-ySu;aYxBxnw>6^4j10S&!mjOYJNE7Ujz|9)BshapRjS_o6@GS>H>l(L z>x^F}4rLL?uE+bX-7hcpZ~Fl?eFN^S(8lfS_Sy($6+Wt9^;*4oLiE+!0nZ=Q$uGN+ zv-4nN(5ijYe_cN(=HjTui?2hf%byjqkD}KVO}B@3_P&s{Y!&3ul|yeT!JFoYvk>I$q@e z%)fZwj~>x75z|jM`)}WfecC!{vilnIiQPwRPIz@kJ(!psKX=ESHwLLJ-+MyytAEGW zdsKK!m0n%AdGCd+LY1~_sT=R#I==08*k2$uup9ds1t4`K@|WZ^14gx?U8^wPzCJlFR@`*&p|}%S(Kk{L6#)_ovNl zHj9q)%~|;Nc|=c&Dc>ofO8q-g4|vZVyCxm?gXN^7f@IIp@0a>=l(JivOp0FnZ=z_( zRzb;bCdPmh{>!uH9(#PJIwG3S+sCp~&+Z!Q-AiAVpL~=vS*M{?yrc74u%f;~=i2YC z-+sLm+b1EasyKDZyu8O#B9*eD`rk{h+AXp{?P2-Wb+7CGGpv95BiZMu<5`*i49TaH z&shd~l)sSMU#qY$IqLS`4<7#+0>y50$K-y#FC;KwLx|4Vzp^%e`4+KHU3vX_h2Dwn zU-={M%ys#AV&A>{*O`J@Q+++wvhJOKrg=rOlZ*KyliR0wxE8%kRB|&9@J!ItUw!-0 ze}=<+##~b$*_CbGc8w+I+4R3XR$S5n38}G7yLtQWe?I@^z*-lnYlr=IeqQ6ATSkuYYCY znk<&)o3Lv8>g&}XYxnz@MY6DRY3I+CcTdVmtno-Q;XnQUbJJ?=rdwjps#jvyMBlrA z_~RkTlH7fLxzDRQzPhXRi6)8H-+WtS{yc%>=Y}8B4>nEHVt0?v{deJz^Y*w8&1;OU zCT8D=U|#ZvyOw>Q(u#>m@5^q!4BobHUcuKD0!PART^f%q=vnZqY3uf_?~iZ)dVE4- z(1ZP7KQmlv{TZVj9B}B*lB5!^)Ctlrm5y$EBFDLP=b?q%P9B^~v!AD5zcp{;qqRB8 zPg1X|J(R9h55Ied$$t7duAK`Qe#OrZ|GQA+SLr#~NYSZg0gI0A)H?S2uWtT%-k9rG zfAR00zsHSRLCDsGrS3hmmOVx<9*ylzVR4lWsAaKlk_N*yudr zQaYEro8NxpH31=$=3~2fqIc`fjxJ4bd~~}$rRG7FUdSX5J(Hz7dH1Ykn4R7~|1@KW zo7BH4tlQ<|uK$y$C=pm|nph!IwmrYb zR`Ci)wX@K4Q-ie*s_#117Jt81yYD|k zbpFinKvzjy7Q#V4*QbGly5o0}=8ssE?m ziB(yaflVtUukn5T0=xUC<%G7cc<%r7@1BL1nx`o->$C@-`oul&N#oYv_5L}gMhD9L zx0_wR^Yh?@Hbu?Ls}6@}&(643@>}pn^X=}K-w$4EKB`iPwbo-H2xZzy&+FuzHnnEjH zCheOjwP})bY-{V@?K^M$XE?{PsQIDJm1X<()+R2T5_@x7Ys`wUSXGJH`QO(T9)Fx& z-CSqzjJs6#%NMDvdudYKU$4)pRi3<5U{n9R`(pQNc(saWhJO0D^~GlArl^mnBNt9p znDA>yn3d=Kug5#$e2#3)p0;#rtJe*YN&hB(&gv|g63Z2#vG>HI`~8=HCfe)u?32EH zYR<2phghElA9CteoE$TIu4{POAO5G?ZX`Ieww_;HEdI6XX7llBYgfwX9Z_1u(7SQR zzCYE`_0Q^A9at;R`W~(McSufk`_V$K&nD|89gGYv{d>FjUG0B{=Nv&28saRo?^yq3 zH*fLEddBu|!?8EhE;-A2>}UV{=id&=C|BXY_}ZG=m)dW>i16J0U53e2m22H>hTre= zmp`)GFY)P;mq*%_s-O4cW*pM>%DMPSd)<+Y?3}|1=5If4Q}K!NFa68iy5{yjv*Xj2hA{fJ+SU6i{ylr^ez9!EljX~&xhkJY$YYjn z&R=%8{!P-loR3R0_|GqVxx`jjXe;9uo-J*z;e2_ecce4F%U%0d{l4S!Mn_GRs5^Tr zKkp9WII#5BQ{E%*TzY?dG-#b$oo@4vf5+C=z8l}`CT;lsNoLpcJ(58|lCMAgnPbw?%d+)rv&iZzq z4&R;RL#{ziwc6=RzgpF@2o`mf>@nL}!0>6&jk&!?|FK*Do`0O3Lr!7RSGn}_X`TGB zzh1xPn7eq>+`@fZ%@t}7R_EoO6qEh=r)_G}v-{pvd*im1ZK(=N%zuB0>5|9Msqb7H z)dMzel;J%ofA2=uPR&!E&p-eEYkBz00`9*3Gg1}YFRjs!*X+u!6HzaLtu#lfZ8cR%Vg(`}mw?-_?S z99yu{Xmw`@zc;JWtK6eM%)i!GCvdasZcGi&4$h6weJgBmMxx*S^_z?GVF#PDgN2Ox z!)GkIU41{#Tz%)^368iYp&8oE$@nh6}Kn3UOK@L(fI86<-KpN-HrrPK7j{kqc> z_k{cAI^|jAq3rSg`zBP}T$I4JF}a}Z!S9LZ)xRFu##QLG|MOc9uA81(%PxE}D}1_% z;mCHaC6muTz0c(2s8z*VvOM0{*Tpr0|B>C7e~+Iggr>OpD%XbZaL_CNX7B&FMoD1G zgjOb3*HZOYmD#66tD5WYzvg>r7s;gYb=lfO6U^Cr*MHo;Phi)iVr}0)tC!l$zs0iY z;gu+{O=|mfw_0ewV(H5(|JB&GUP{POvGIDWZS-4?nJHHdp3SZK(kK2|$*Sqjnl=BGKqtnXBw4!9c${XK* z*3}6!561fX$L;q>zH{UF(Z-2aG_EX|{Ay?Lk8ghsm>d*6O}?zYch|B-e~R744;<#c zJlF2XnQyyh{XR!PI5^%R=GX0evpYUsSiOFWrs=5@$`9V-TpBMtioH{LcKA z@#zwSYQV#&Psf&f_H5i?x1e0Qe_r|Fkgy<+g|hjd?k!WEExd+jgOH)h3-isgeG|9t z7i>RuZd&%`*<$zKFI&gZs=MIz4}-6=k2fu!vL|9T)1DhOpMUh-5V`bdy4{^Wb8ayN zaTfh8YIRjmbC|zLN6&1>5fHH}|5>_6=|be$`U;b*HPZwQ!ps!?+5R&u z-FYC?aEiy`BHnFZS2O-T?5P~>D0pM*e})g!9%$@8f9ZFfgqCWWbCc)amv5gl2Z-&F zOzsur(%u|npu2);!ur_|_zWImw^gl+}pXxs+n5p(R`|`n4`LahA z9mv~u|M&NQGh7s>L|mVLdi8;j^_*L49!wTGH?d>>8QmX+$6wZ8=ke4m)xTZuc5iCG zcgmGaPGu+dYo|o=css?d<8S}|eo$3Y;r8og7ats%F0eB`;P0Xd-@i^_kh}JCzjZ>9 zF~iY28NQ}%-VSUN)SOtRg|G>%X%Pm?z6J$T9W#;5$c$M^HxZ09`lR6YMONa5wmEo>v?t1BmM;W~mWM);C?z|mxqc+e)&0`_{A}j~OB3^z1(n%D19CTY7vH<{dH;3Z zNz(#7_%hdDpQ^dpa*vW=bdc4isZR5jB|f#+F8^9%ZM;EXM}$Gdq0JG6)w3H9{<#0+ zZO3O--GgQ8Kc9bI5h|d)fBTZ!T-8vf)go74+|c|}yUyTe(W$oD>~ph`Ce1&euYu7>3X{?tv{ z{y3@f*?)%nkDJT0f@0+3_E*VD@m%5B|DO4}%q)|gD)$cdHj3Xa{QLIDLk?BFFPEok zS?+t(&#$R?P;L>| zqW)5+rikXGYw|CD#l&yj`$5G!k}2ruo7=a3HJ^y%ZJ)5^ht#*Ww1B*gS)p7@w*P8) zTzqir`%jfJ55uPAT(PQ(F#Yr8YbTc@i)B&#a~_qcUJd)E_C4&BpMUAVe+IuBMpxqX zKOOe3iwfA{;lvR@&{rUE90P?AHCb9}IJ+Jd3Em6MgwDld*@{_Vw%IBhp?fEjrb>grnrm?mYjx zt0kzI+C~Sy1n?lKXW%6ZI~te=vRIH9u%ei3nKRTe`;MAV}{SF&F<1y#W zdG?)0eCO&5Cq%wH`Es>zz=MltqfCDMVN$8T9sG5zsUvp+p`m1sov_354h9n_wMQvj!BE+mek$*5FpZYW@+@?D&6Gj z8~k(EpA+y6(9x>Dl4Vk_^(iAM_C~e+Ihhr`I?j<>Ufp40uie}E-CicQMMIlU$JT$^ zKi|)Sk|#w!HNWnD^RZRQ`u^mkYcdbdE!8*pzI^-juxk?cl)k=x%i5~<;I!NnqX-87 zy4-y~VnUv9U+LG+b!FeTp*wPe>GieBPYRYcOufJD`MXKiZtxfAb~277-2R2y500Uh^HlMs}G0y1(y-$jXOal?^*CUAx*FeRhF!a^1G3lV!VR zax><>t-F5v{`0>Z&Yxjr^SHL?-Y%{`e-~ulT;Q?Fe?j||JB$Za%KmISaAj)G3dPyC zr+>?2(^@Q1ab!V|{f9Od20iKO_dn+ubk3S0bADQTRptwu*DnR{+Z*#mEL|fpHCsbf zkL~XJvz_1nS=elO#&+L+`ZeX2>7V{RxX3ZrRQ0bz_aFXK{~7K-&^g7xni=@)jok0w z*H}#^)a={vk#EPuPL2IFLU;E$KB@W7ka+oukY8Zju55>C(?WNBZd$dZY=o0a~0e1k9TSfPPSSG>s+q>bNgG9H$%_* z>_c2t5tWTgcH}YcROpue&%j?J!5}-?o6q-5RrNjLJ*y}3*(~wik)d2Gw&U;pY`N#J zTb!XN1Ej$bN_ zn8iGoaGcnrk(m_4ktBA!zxe&%0#nrvA#ZEDt-t>Kd9cE{!m;fepR7lchgANKy>0&) zqRY1l?=$*0eckqd4`z0}T*xQ+E_7)S>w%5G;=b>E{^76;)2$1${yn(;oNJQM`3#=I zxsGdevo&lc=I-17vEOIj$<@85OTV^HI?$UaJn2a6DbqFXsek^>zi&~pRO>tYozL|e z8Ud_5JXLdM`ksyuI=nym_>MmxK7~whD*1Ign)!T2!GDGTg*VrFuWV-6w^VFbbA5mE z-)~2PEjt)OKJWj}FkSik7LK?HrW4}QuDp^ub}on*6|0OPmdm92(<|Va=tmV}5Pl zuHI+P{`h&1wgSK80;`fY{_A!*PioLg&+oaK zajycOp0m&?){Y%-cz?M?z4{0ZzrQv{zg=&s>NzqPez`GwcAlk#)Mgct*UxJ;lA=;CJ~Z_D&M&ul zqq1*fws19%)=7m)Rm(p=eaf*i^M{XCRs-Mi-YZR<8+JbWzWw;?UkRS)I$N06J$}9T z^S%i$gJ#X%V=nwcM>>3(Ez_fe4(}Mu_kG*`C!zZ=)6aIr=hb|N6YOOaBCPi$TkseAg8fEq(% zR_eW{5#1}^<>xQh|Gd$|QE}e>*ljD?Iem=7?q8Af>J%`Yv+L#E-0F!t73$P~?~@3U zYCc-N?fX|g#T5>^ZU*VcAs=CjfelfWn{cDQ{ zi$)V~)z2fjF0C(*)aBmwo$vS1JzMV1e+Ju+;(~2HM<#69<+=FQo>gCey%qlOT;)`k z#5yMCQ!Dxwuqgdrx$yzxLHXW|+>D2&s{EMgrN_UUecAqii2P#15H9sPH{pPk-XmGJ z8pQT3e&0FwRA`X%Yv=m?a}2pWMSr{PW$5(Kc(9o_+&upT-|_sX$4oq;j+%%imgT=V zZZ%6}m7t?X-B|_a6^l+VsP=x@_Mc(dk$)9pCmogEeqTR_@nHIjUye}*jZ<{GU$IPQ zPW}1gKSPdG%u&{S+cX0m8eH%Co!d-KxdTnRFyrk@L(tWeHEnZok3%;(rK7*w? zvm!xLb^F&B`4L5%I1YPA9x%VU?MZ+0{rc-HAx%$pSO0a5IK}k zbMM%H;@SP^8W}BBgM?|OH~yCVx?ji_>MU^N>xEVw->0uXT|cd!Juy4_^S`4XE_WrC z>(t)+=eEe!F!TKMxV?(L>ns^}a@A^a?AwN3m`yU@{H=OJ+X7b!aj7&3UMJX2k=& z83%7nu-Lyr{B6@p1r4FNeKGy<(T64oWf%VrzGSB-;;}uA?bx-gt-Jrl?)`b6B}gcE zhT!>AUp`&=o5;%%zTIwrhsipxV=f)hNsB$L+n`HDAE0DWAfeuJ1kj_WsV>k9iU& zJX746dG+vr1_6VoCv|0oWPO+wq9=)eHj~j4yxe++Kg3tBJpn`Qh$&R=*OZ}gvj&hAlK(Y*WD%WKc4REV|*USru5)A+K5UCXxdhx|O{nGF0Km*$^;%lGc( zUWFAamhL&4C}92K&aLm;Wn}m7{qXg1kVIBpETeCtq}PI#&oc_Y%@7Mq2r;ytRQBU` zlmF)SKa-5)Z|{+)Tg_JP-wHES6^Cuim@jqH41!2Z4dap&Y^8f<3yv2UOAzq@j# zy%>HKI{<77Jd*|!i*H;(bc%^74ge{d`>< zzIX4=L(5`gx(-ad*pVqBA{KD{4)^c+`aZd-78h<9M-CzF2?wg&!S^jD9#P__8DtlW>C1YmXOj5|< zYT)17Cy{2LB`$q??YD0&MzJf}6=x`)%$U`@P%0A76ff>Fb5Ludhvbb?12G z+=vY-dIy?xr2p^~FlxpQknfa4#w$8vt5>JPW> zIq~y?vq+cW<@dZNw3Al|`LDkBD1e*Jy8>b#aV@2Qv@3_-Wwe!e}Ys!%KV#@BbHJC!{QJPIRnm5n8J zQhLjO-`~CA*1g}kZ(GhkRJk{&|BLB86-9;O){|?m8RP`62@N!J{CDG9!E6h=r2h=g z7h0U71NZPAnHK8c5IsS6b@lJ!`UUUo3T4Em-KwrN5tn%w5R_Hgcv3}3yvuZBSWYc_rXaGrS*^Vw-`-r)LY}Tafh^!yW0ID^*l;O0v=L3Z_PJ4aJgd2gk?)P z+TI;qvWtaba@y66H@cZ`l^FU-#z*LRAV|(6S zpW`uAMPwJNZE}1_@Pao7Eq48Vn0xlP^!&%i^a@r*vi9=w*7?*P*H1h@)xEYZ&i~qn zZ`vmn1KPd0rz#!(`@Y$8np1IevD>m;k*l5G3tp4py5hXO^P0B>->Kcg1-j?%-_K5* zoSs_7_>v@0bn`*)u19p3f1?cZ|*jYYbe(=TmhPX6Ph>7soo zdiBqPjwwaBh(x9iw>L!&OFrDQk`{oVt8{x}{-pZbf&ueB{0v=LmGL)U&TDh@? zX|_*?Cxv=$u9mLl|GrO#;q(kPUB8`|SdEV?Shw?d;DNoTSLp6wn9ZHzzjd4V&*Mv% zc7>e(lj-~FJXh>X)@Y_}>0M^l6O;Gv{8Rny&&S6zHl&9g`O`oB`V^KG8?T9}a2(K3 zeE(9bQsM7w-u3yt{~2Pe!W%4unf>SPeJFNJ_OM3MEYDAGN*cFY-+9=esh(GUb>_QY z7q@kM4y^yy`gf^ocjVdx&Rf5$I;}i8Vfl zL{<7$b?7cTdeC~ix$ODv9}bIb2u`h8@Be9)S7@#7@jGAQ^0f7 zQ|Kh)+V@`E+kHN?XwJGIsoJthVei4aYb+V!yXAgJc&@s8r?%wwul5!VrqaGnVUN=E zu*AKyi};BSJd zP^MJhOabnuHRdv(?B!20dv$(jj7yGRV&~K?Qvb@uRnhcGF>%)G)-XY@BjA5W-GNn+-mkLVbi5AR>#j=3sulsE#E!2_xJ663~8&4 zwN{DWnp<^f$z*2^qhAd?M;pH8*?Ql&fB)}?b8mDu-#eb~%)-I*tM)%bhDzV{uQwMn z9VlY3w@wsUrkHtozW#>iOZcLkWj21vkuu$Hw(rodozitN4_U4XT(2wpY5q3j%aw}G zmg%Zpoq7vh*Hnl5?iH(y-i5f`nSN)OFbD~NzU?*1a&UGy87%@ujAAF6=u_J-Pb#`;UJLLYX!>Hzz*(b))*=g_g~+3)l+Qn3~kZuXHfus@IlL^vvSw znwxoMhJKM`>%+}EjW+H4bU(rEd4cWDC-Z7I9A(v+qf!8a#hO}`r8hR) ztW`+5Rr2@kc569*`{PeH32=ETFz)>;^UAKY&D+DH@aea{B}aq=_HJ4G`+jm=8GD_z z+zkz9nVT8f;TJB4-#oZO&@G#*l6~8F-LyL z(h0lv-@CnIgZfwHmyJt3wchGB0wA-QVi!BPAWb zH%}1wdqIb_?a#-p-^BNu9cDN)Nzm=e-f!P_UQ<)`WSai^_1it}r)3)BFAHm`2pqX} z?Dw77vL}B$zbzN?bgIgNiPv}J&Rg_RE09n2<<*6q(<1ECO+wDA-%|_9%isI2?fajC z<$SFTYi{nlx^>IHgi{RDJ(o|(4t#Sj!IO0kt3ut2`$_ETb=CL9@)$Xm_FoSd$@;Uc zN9D?{$ooG#Eqbm6L`>3SjuAc}`>#EJ8Q03pRUY~8e_nnokfs&=EaL~8(P{Nr&#q}) zXIXo+_x+Cl40Q!_ZwH5~-MVn$PyeY4#?v)CWi#1yUYR)x?2zib!#}(1pM}$^RHb<( zE6Q3txK`TDi?54vo?39|V1r`$rrE#0KmNVlBInASWk*+72JY4Oy7Y`8XhEFF*;S`j zdA)p>o8SDm&fsa9hrY|-y}3KvR24$QUoUGswaJ5_HZE~ftCk1b?T5vwdC&fIeA>Qz zw)EjAElclRUH#ipP&W3G%iPB@H}^f*Zu2%j|8+T`Q{etW$0h-P=xLEq28S=*~#`>e{*QKKsw;9!u3#8;se1*8EKjlWUGpNGw{oX4}@8<-a28 zt8eprsG9PX%SOLt&gT(QQx0mmHR0&-R}_I4z-Dsv$S`WSrQ) ze>Y?fOw|xyHg&CQ%^5>Smr03o1w0~6A&keBU$9^Km(Cp=%6;j%|C5^45k)}{W&}>K z(ET1#wPWjc`S~($+c;abRpnw{$=`o-NP6l@uBvH_xl0>WA_SMrE@R&M<5%L#Nz;G* zd(Z2$Zl?pwG9>|V-X$xQZvU>gKVDOru=aG=p5y!eELh4PEH8PZNr`W78fVk z&A-2mBgFGmL~YyRibPMn4{j^u3dJ1-7|Y}LL1Up$?2EZDh6vG`2KQ1 zzJ|)C1uAcJ82++<&*9aszEFJonMeNo39T+$ohwsSGWF6F*mm6By1rx8BilQd8YF7> z&rR}D5j&glVydaZRfQzeoQgNOzY70+2tBR9|MmWQ4M~<|6UtVG1VqM&swnl{Z~M=1 zy@oA%n&Pfs^13w-7e%Vh``%{k*LpQC{)X_5Ux}Bx7!_T&KdW23>PTmhr&!;saI>A+ z{dc!7-~Qp>-)WHnD*Kma?_BTCbLtS^q;7^clAZgW9juPe&;EWmRAtWb3zuKMJiBn! zwAfa=5Bz2?lioybmE%32Qa$nf%YGeAt+Fe1*IwAZGjdj2_;nZd!nfA`gE3FP|3ca1tw|~hmQy8P1N*dpo`~G0w|Mf_VYKY^0 ztad>^Z&--0C-Zc|_VXSmR$%I+bU!ntH( zc5mC$_Uf3weD{9j91)fKIA>N!@Pwri8Ho=jEm)DMJb7)mfTeVUgLKrr{|vt0549}y zIwk$>``#XpDGeMI44+zjf4gxfY-c+1dE4)@?fYcs>?=55_4!q^M+JA8@+DD?U3)J( zrYeR+y|K$b_-{vF=V1%kXuo><3I`qg_0J@|b#ES@A;}e4{c8Jh`TOUdO25p?3q#i_R~1;`0iY6*6F1H_0^WZQ_!D*5AY=w5G^ROtAYUzxwAPgK`5-*~jT_E<4_^ zo!x)@Y5wvDmvvoLM6E;K@7rs9FrdMCk7~&J`%Gsz-XE{4U4Q(f0uQh8tXW8K-s~S6amfUIlxP9xt9Ys?l&K7Yz&D;B0=n`|-<8`x? z)jH+e$_!onez{mR{F46Ik#&3ngVoB@H!l5WxDuf%vj3bJ#|0MK%U2eEOMAsA`*(-f zjhb&q56qh6EHA2e=hdyf?z%n2GrQ~Gzr8-i&a?3o#|4RKZy_@kz619^e%o+xnNK>; zb-69q=gzpdeu;~w;<*p0DOXgsH5aZhDq_$7{!Mnn%^UN%SDx6tSt69x!OCxKd)|MB_s3+KLL%;Pn_azrZ%y_Z zUd_NrmCakE-+C=w6x5~1t(m7|-KT%mmo!sR6`!DzY({DPV&10(>|2lp}>x|`=t(K+V+t&xRPBB|L9vu#-7-oWIz8L%gjcFS*xe6{r#3v)7;=@ljIFH zqsawgPvURyyAv<;)IUn-!PZ^R`B_?an^;V|IAw~$%AYs-^tTtbDKhQ}P5Sn##o^Mo z5HlSWSFTyH&Z61>s%s|yO!8*u)GD{Do0s7Dxn<@3yuFGJlh*uVjGoOae#wD_|Jr|s zyBi|2PA~`DzVM$x{)j>2^*ORCy$)&GxzW7Jm-lX95Ghs+HT!boYlO@Nuh|(73%2vL zXncA5^8M`>2A#8vEWhuccYVFCB3Jy{e;+0)@dwx?zZ7S+SNog$vpV5bber_Oxh2mg zIL$I#@M_}qsohnz`qlCi>L>jA7m=WNV#oC}Rqs#VvzhWLTb56PS7-Gb-R~X@AMEP) ze)y`vb3*AqLtSm9Ggrhl2AQ9;&P`*x{rk84WsW0FvrTVT7uzkKV}IIjZ$jf8WwxFM zzqKk265qBTw=Ud%j!8{Z`^=;xaV_s(Z_WGGs>srH?QU_4HvgW;YfhDyx2>y}U&d)y z)D$vh!j;dj=V;#g_ufp=@yq` zkL{Z^LrG+zgTiY~m+NZ<7<4>u{AZ~B&%mD}=%t#!dw)n{d7|6YMw3+`YZ3};56aKo z{hwi*+`9=Px##v<_s`p~&?K2jl(jjlCzCnNZ2N=S_dk-X(oz(PH++6QM|u}i(=#qL z`ORMraqQi}{OS9_Ph7p{x6inBPQlAF>2K8`DPQ+XVvpY!94Wrf#O)P4*DNbWu*&=^ zpBLwnn)hoYb~A@#nt?aANE8}|PGThPU9 z;8`5aZ8rP<<$E$yEc15#Ti_cg(ZO%Yr1{m*?9SZx_p58mjxW>9e<(N4@5}2`8@`;f zT&2z06e74;STK3-{{H08zY@5VGI?^>&AZU;)X4LeX;$FBiTVE-6tn-<>mUDku;odE zSByZ}rTbdHvRS5UyvpPdHkzEkvg~O7-i~j!JM4QuK5DtKto@SImNdTHQ@kpVPnTxO z9qE?NS^V3G-MH0|f1^clu?Hn}ak&8on_f8a>Vx(GRafy=k=zudR+rDveb&;JYt;wy`j z1$NKB$3FW%!v_0#9AX$ck+;+mN@<=*`ITP$i}JWHmq_y2WW$6-@`Fldv|SIwVN^Mm~> zs+LX+l<6uKHVk8p_R31gSy0`3_U!uY`;@21tO}C3{`zdQ*}KHo(?VEv@{av7iK$CO^Wy&2EI8q~_(=7cGhL_pYj1ZqR5KQDFK#~Lw7q`)950t` zp%3`AHN(pyr9BmxzxVDCf0z5G;K*53o!MPqKS{m!P)c2L>29s^)0GJaUEXmhu(Q0_ z`hCOpm;2>olqNW6dGMHA{rPf>(o(h`^%X9Ca=BrjZp0l85AC>hJAQxhecqrsKX0B6 zuHVe7`03J#jKB#UOL{Mg&TVGfDV@hJ^U3_j&wWyo+UE--_r5=U>i87p5P{qaEeBG1 zJ2nc>2JNgZqGua-3k#;Zu&3j6PZxw zclhJC!$CnYODv@C?prdOO?-=%qvFv7PlK;2U9zrcJ8o<}X`X-h_daGrEsg&S(bu+Z z{G5D>XN&M7|F{X7oGUs7z5IUf?v8EUJ@MXyzpO_$Y^vc>EZe?U>dLzJ>Fp}1dKo6S zb`^DOJ|)R$*e!dswO?U={*OcJ@=_Z2pXm#;nYZrv6u*G)KYux=Ykl%KlgGK^c4MB! z(VVX=8}hSbJ~u5&bDYAr_i=2KrL1xc-nw?<+LY_}l)ks^lE1zBu&2DY zr_>_t+kT6Dw@DX%`BVPw;D#e-0~DiX2cM5uJSDFe-L5vbszkB8SZcF{_r6JY-Y0*4 z|NiIq!#7tP-IiR=TfLz!>X52wfywKI+&r(CGQNBed%*UqkN5mGW#6E%She;=@8c&l zvTWO1yLi_$UDa@}-YvJjJ>UPlQ?gOAod0*}r`A+q?xN%!35&e@zB51gJ%9aWZe34q zF11-fzs=t-QLxc}LjgnEss%4qXUEZg25(UgzCb9XCswvkjOI{P{ii+pjrZ?=zgX zZ@V9rxkRF2%B_u8N1N&WLr z-~M)5DO&NvbKSqmA_?~s52$ZHc7th(Q&vp$t!egGzqd=e1U+N9{HrrrLE%RC$K>rF zw|_jelvCl^^5x;*a#!;Som`N&Kf}v<$Fz_#{nd%t^PWAr`@5F^99QS;t9xy;^CPMz zFmhU#Ui%lYe98vvjoB~N3yaSk-*J0h*@5XADo=NR+HUsi#hu3|d7oZC$Gc{BY)E|d zBt746k3alp$l=S{y+w4zFXsGx(>6P+DxT#L%Ul#*z{#_3b`Hnw2J8O!HOxFqb*C?^ zx3~RzOF~71>j106wwAw}1vh?|>Hg19^T_9n0oR$jEyde)wU=Aem#$pU(kXg1IW4wa zdU7^*-Tv#$vvpQHb8XbU9yj4pn?A>9rY4cf=8e|3KmXbOr{KBb>1$i=*$D6|w*{H5 zeZT!>p6<^)gO~m@-`uLV4cq0&WgnDyYQn2+mE3|!p5otk{QG%eiqYiR7Y|_5!tbz@Wa{AFkJ^|u(Rx*ukm&%oouct zPFZzl-E8Tm{p_FYdpDG-G%cOYZhe3KEx7<0@$Ku^6y6NKa4*o6aUJUkx&I97UliCp zSunA-H&@O<@8PdYY?nmY->t9~@a+)${{3IsFT-f_>pZS(Z& z^KN|92t30f_4%{ZulW&G`LexJl(p}AO-*1>_jvsALEVf5Mh2F4`Lp|~-KXlF?%Vb6 zq0yN}ucnad`nSi9Z#X{Dpy7z6y#F38)ycsXKd$IConT;}E&cubewl~H6-|!oqntWJ z8IB*kvr;j(!$xqYOn?8k`+QyjQyC_mUw61(f4|1Jusuwk%65r6Cmm?Lydp;S#~+S; z0-A@p>_0}`o%?UXms1K~uP@PDrBreD)Q&qZ&A0BeoAaQksp+8S{`cR1nO)fQRQHwL zgwA=cE?9&c43;!fO>TE#1|fx&J$F%}vg-J&j_gL|xzB zI(_U<&F6DAYC(?;zsSswpDdv?$xgOvk6C4Nmj6xB=vJ#WjMo1djQ3w>o#s)dGMC+# z{hdcrX~Sj_huFhx$<=vokNz`=Gt`A07UKFRcqZ)O+N}Q!0xk{SefvHzMVS_?JLmSs z_2?Q7=I(^KpRO&pPpV2}Wf0Q1&_1zyL#DtahwW+Kq-*Pa%#ws+=JNgf5Te-J_@}-= zO)Y&Q>r+P0D;&FZcgWq{{824X( z{_<^;qSqg-SbD4MijzTo1F?<@CDyBARRrqMZW zSH;7GkF7~7!b^64`(ychj?x5UgN>KxzC1m**@-jpW$eSm!`iYpBW^ZIIcyaB@%G2V z$!?+MuRgOo@87B&@ZqsrH+ zs{DRu&v)mM#fhonVYe>c+-oj<8V4uKj!b zbK9Loky9qc)GF8XWc4&e`yG3m`~Lm?+uL)PVy^HDCPbgQmjC{g<))+Ab-VWHK2uIl zT9vGUw$1<)0CX#p7g?0es@B-`93vYZk1-o zXPVxPf88f}%}`?&nZ@;rEwpgs)~)r*cm6X}Ps@-Kd6s0e;?C@+xes5trEHzWuktU5 zb%OQWwW(Y8RsNFQ`>#Mn!6&?h{mQ3(U%u%x`>Z*7)auoTnR7iuwR(=dvNW@~>;C=6 z!%tI|@GOb=&yc!Jm!oCc!tIyW>uW@9Nj#CEF(1`Cp0u@|bnTw$tacznsHIx zzfXMIuiGhk{_FSsa}0wTZA-sryqsb^Vb-IvyR+lY=-0omJIv~J<)7B}rT4G(es0^y zct*wR=q!_>2fBN;x2{dn{Z!v4)zZbJlNp~?b!b%t)0S3~UJ1E`XvVx+ow|LuA1>fl zu~_%+>f-R{p9Rjn(U4jC>(}v8trIH*?|aIg(6ZnBGn!p>634Pbvo}2b*3V+0Wp&8s z-eRpR{py$ZCvUWmf88?8FGxhg_ow}p!!4@}UuP(Jtnr%o^xp@o_g|UkS3jJ5YMo!= zQjNQ{QUXm9_g6{C&wl0+Av)7x>p^Sz+P3>|%nvPjy2_#a`ZL@7)7NAtG)~+2Y5U6A zXZRD#Z0^rJAur#?CL%Iv@%ro2?E)4|l3ag1+C3~(S~KB0)7vEXhdX+()}t%wb0yY;SqpZ$G~PDee*w9EOkf4fif{N*6! zq|6nks&Gg4Z0Ga*hktuKf|~kLnV+8bs$I=B<;tvUcDfZe_EhENu)gE}bN|N!VX=G8 zcmF)tnRQ51FH;E=Q%zzu_@e`eBkZ&?a%7hNu=$bsy64|)$4N?{**c7a>Q=2 zqN`1Aa(?dm`X2|@MIPR-ANJ`}duo=6>e4{byNg{DS0`@zxOZ(k=MtgmD_*|MpRW-x zDdd2)lgla()`Ke3WCYBf-@g-4{I*TBX|tQ=>)6GorbXVm&_2uj3P(j3ljq+540;t? z-+ydhvSJqd2Hw+WXK~&Yx%Qq@=u8N2RLb6W0Rrr@=l(PFzigYzW|{lj=?TLvmdgU} zZ*6V3MdjOm&wcx&*5K2WrwQ}x*MH@jV(-4%;g(a>Cr_D844$u#;@iJt+@_~jNX@#rGD>)fh4RrEH+P9P zo>Y!|NCF-iB)NQvr6tfOugFICSCdPnoEFB z@5bj>ENlUwHV!!jF<^EgJp324x zGE3YJy}2Vjp?;p4A)mwglMT#IS3I+M@afWl4Mhs~s%Gjnmx?hx5q!Jt`tynVn5VS# zTw#6fD|dFiHb;7yQ+D3245^<-1!LB(-IyK}(FhumefhcHGay3M@z@rtuS+G*R7S=h zvYj{YLc|l*mroK+R^6U-Ajf>q_~*mTTI+r^ zK9UYjGjXXlZG1#lC+L}T=%^BhWznQ3V*D!Y+SeOtoJq$U%K@cM`u-TMOyE& zfL*u3cqEv17v}wWn^(Zt;hCJx8}~c<`EQq2rr;S_f2$8I{d6+^mfMkAXZK#awo~@+ ze+K#Ue8N|}HV1As`M$e2{Wds(FN5gU`uettpr0GCqIeW6NPLVAjN~=xMTPLxQdKr2J#w zem%bQQ8#4zncrr|F9gkYxHF&cLy_LQ9y#Y#Pnvv>n>~NdI^R<7q^QWvj(cmD_49CB z#$+<}x#>w6tWC~kVXj^K<;S0bM?yAQ->ss(ue}YsyZE%?jNgY_Cq^c-9(`l7shsUV zt^NLW5>u81x#qup_Wb@FUeDn9b4sIwekL6G9{XkOiOsU>s*ChLzU|k`3Rtq1rOl)| z;%(IHvWhtN}W~r<{zyxZ`YO6wm&82C4P02 zlU{WA2Kl$=4}3d3+0>JF#rF00rzlU5dN%Rsq+b&jUhusg{%-E>f8zVkpA&L!AB#lZUIp?KWTT&$nL>2lXrwOTNhZ_K8nb024Q7k4@t(Cr+)B zzS`Xd9(y-zjxZEA-MeqwJ+|$Mr|Z@;nerS^NqBWhk71)Z`^NpppEh?oZqCcxzH;Ng zO6Q(wdQ}-dW*73GORUjdX?{Dq<^#V)Xnl{&@x0v6*S0E6c-eSvZs)sg`DxTl2aXR8PCuGX0w5le~!qxnX4}DJw{@F0Sw_*3% zj>{|VU1z#oH~)>HflpAhF6+1DpZ^};G+mdAM|8&V(%XK=zTN)#K6k@|MKO%oF@JAe zxbW;>y@i)SayiR}q+N|W{g40rljw42;>_FY_Fag3yOK?)A>ve&lDV1L4)z~KxoxVB zK0&5S&7WS^kn;-Ke@0O4QcNee!tLJmpzUt)*LfvpPIkV0A$*mysP0i0b5W^vt;uP- zx|=us=H2}dJV zX_)kJb@$(T$#O#^c3qC$+tv+-xEfF9?ccb@;mH68jHd+D9$DO!t`BhVyYv0ue}>ry(gL%ErK`=f6Y8_TuJ#4oR&YFcryGike+^gI18|0Hq_EY;e- zw{HI);oY8VxV+jlN>1I1V9399_w4s)*KI#IIn`-bezAS{C_-zii|UsRdPmwW?_@jb zA@k4vM1o^6pSrM;g=gg!E?<>jbMDMNu(j_`^#1Rko0esIo^j>B6UH+u zfJ3I|2};{nMcdrxq5T zFWY0wJcl#IZlYqUIm^3h-hI<<^UPS#&3*mUT1IrMl{xY-@W9hb`4=hX|= zKWytAhx962*IzGJ^Lg>@w{V{+nSNbM zfn{#^jnBvTe*fzys3*XceO;r@>D+`wo;RT$rPsHvvv|AFzF+Rg$2kHIYRuREl!#dn zcfrtXD$iN9J`OjA?%cvVpBtAZh$+W!wLLe%)m!EBb9v7|MitlfXOulvZWxMf1Dyms zX_dx>{qJ9&+Hl1?I!tSYxjP8Ji+iytGA-kyf0suO=;@ni^*jC?31W> z$ob8J3A}%QfBz>@$wXHfZ{3h}7+i>6PQ^vz)-v($MNqquEojwp6|52{p-Pu#)*;a6IR#Uy4`DYvPq+2*VC&}TuZyP z7)yc{Y+YMem%rhGj8xEJ>kIq$ZCSbUTH}JOJFlMZJQzAv&{TBN_2S|Kj}zbTIHY}K z&9AR7UvA-bnVj}I;<2&ysuQUiwN;(_-`Cr@UwFv#pkLtf$=AWB*A+bRzSjSzc3;7n z+18B8ip-&Fa#(-ZuQ;?YOIeHg&)%}yZOOjMBsGe=ORZ7%ou3y%lU$>OwN@lqw8;CsIp0;m&adlyJ^T%@>8w93_fBI(iGXIv^ zvPiGo`02|-TYjEi*)8C)Y5R(p#@aQNHr2nsA1PV$j4k=ot>v-2rpqTgeNX8sz5bE+ zY)J14uV)YLXPVu*E%*ID!yM5{0j}&bf!~)kzqlgc_k`i`^FN2g+;!C&UObt!pu6s+ z?XkNXu4)))XPH~w{9d?RqjH+9{k&~$m(DOR32JC$YI3|&6H)y2$8EWk1yfsvpV!;@ z<>-FuG+HXeHF-^;%(lhPT5?_FZ?f;)Zyvj!hcD%*V)N{hKMy&i-S+!OJ>_5JDeJ(! zdgqU~Z-15@S`;;D@eG5-(dV5w?mlKU^We!mzxv>;rM}-LSSDD0Y0Q0NXJ@uiro3dS zuJd8H{Cc}tQ`We1F6FhKzyFl9rqOH-D`)Ac%2z+sP5bx#Nb6!v%gK*KK3%wc{XKsI z_vDHvK0OY>+8eB+-sXP4BUf(z?;Nke+vL4kcDFxU9bVRy$MAYtGrQ}KjKjAcUr*xw zw*7MMw)ZWZ!NQ5_)waE_&R*hvDz5z&x6Yb3tF>OSyx^)-DYIYx<)BL2(&D&XeQWn^ zIO)L<@FjVr(p-hz89@w;>$q#?zi#VXXDqVw%C@vh}q>>BfY?c=*QEw%mrPtmobQgr6qUAJz{xb^J*2kSYuDO=gLUXZnA zwLR?K-a-<4G$L-${IKjK<$+Ye23HSZ;^nZQZC#>Hga(VgX`_r$sD%$fd=nHpti|l&6 z)9l&(_Tw)=iePQCkf9cF8KAoC*JpA+P zi0>ut6Z>KoS%w)JW@o#4MNYJ^{%y<`JW-h(mydJSM#c0S+X3n{tJzF~B-tWA6yXr)*d5@MqKR>z3SybOiLFdhNwigRT z?{>1>?&kaRx1OicQDOSE1-0k(7dEkMHW9FDOc0G=@@IQ{?AU$(b!=gq76rXo9bJ`m zj4i12Md@4C$kQ(aKI(+st`B_t^ZSoOLJnu#=YL3CVsJwQ|9-7hT0FNI#S5`?!Wi$@H~&b*$%6A0+}?s|iibB!cZ=@h zViw!*G(LWA;z_493%PzfgeP2gV|f3jw(WPhMAq!H-%RZM7xYW2bUDn5<>Bd)deS}b z?zwBX?mTSlD2NGOpHUZ7C*>r1WmQXn^!te4Wzywr&wt+y-DuC~hZJ{qyPDrH{uCOBb+j_fi zee2h6oqbDYX1eD6wF>tVwfD8C|7VE4^`GJRb3UGXar3@?sn~J3M*&_uF#9VZl=(-sgqBjxaCtpKcMd zNo~`6~Ue~mVa^xoQ^ zjzvZddAoicE}tG?xbv%V)PcGE`%m7wfA3YA%-u)645ISR^>Y_YnsoF?L5N1ZZq;UT&}Eu2b(4o9_s+Kkz8!zw?)WC5 zvUCHh(%RxM(|xlXFUj5dyTEk{i_1pqSLQ0q%{D&apTCY}YI9WJo}K#_{xVH`sJ7&n z!|BN6*c$HX%0;^W8RX~5{1b9LyGrwE)PD1{iXeHvT@@2rjP|TpnjD>Z>4;eN!~@dr zzr1gmZeV?Pr(F5{d1pP^O_ntH{#_Ks;E-{Z?RkL&?>F|B_wVz}wDj`c%RX<{o7zQO z9j1XDCz~TqL~QJ7au)Ku_n+bTkHlknp{h##%hfC9tCoJgeS6j6%w&^4rcP%Lyk_Y2 zI(CQocyj*o2PZ4${Wj7%uzka?>!&`X$Q%&kT|UKFZ@o#v@)?qWTWcSlKVG}jx+d{6 zr=quSj4bcWRor0EOr)P-K)F(>e&Y?9aZe( zoFkY|uupsFywY0KdUF2d+aDh9v*{MuS-4~QC;ppjE__j$eVkcm;`#FFE$SKPj3ELQ0aSD2K)_xw4PhiNISCs*3-vOmSqsS%L4R5_%r+HGmh z!}Z@CRqAYbltMhU!_{kl$|YAE)|FG+zNJjE%Ps1>>W)hc3=@ynAAk9=y*x9>&nxzR z;rSnJdcl|1Ta=w&I3rf%jd%1F)@?txe|gUDby`$bGu~Z&wF3i-?Z5f=_gDyV<{Jd@ z9{noBUZ+=iH{G2Z`tXqIa4!5jT| z-+sEd@O7ZyuCLEIGfx&u?&)zozUJQT#^m?kxBtBVpy=8(mWBC_7msc{AlK z|M<7wr!qA&fXigTKi0ja3k|L=nHcO}|H{s8_Lfey^`S@KCT2J0<^I`lbnOL^Neq{N zzA$T!2v^x)pls7R`%1{f?5vG>{~6Z)XV_;@y6mX30dL&CZ5e^8I?uQ^StK+FDuit> zs=jqw$n1|nXRXKH+M6%0?=j6_wd`FfW7H6MZb8FTu{(Trb=40~zGw1spTBVVH?t+m z()rw<_ZporS)kBxHi7+jM3Tw$f@@24?%l3kkg9q!w)9i0-|14hMHVu5n6sNd>2AL~ zN7Q2S>3R3+;=SCLENWdZ8ynsm+Iyy9((K-LrMqkDy&p;lEXgu?F}L!N#RFHYc@a@z6w>(oMb-z59jpW;@?BdFN#mr14dg8}F z?^_ETSuVbO{rS%4>_bz881D(I^k+ONwR&3}J^4rR{k(4nS}urqdCy(F{wr6*bPWN9 zXYUTyr(L_`a6`KMhuw{wLt(OBd263PXVqMsd;OG{ik0u>G~Nwt{~6>j*O^NgO%AK# z-YFaR%qBo)W1p|`=AA)aZQGvwz5PCU>;7pBt;`R3WNL2vFSkh9V!+S*wO?Z=&#rG) z<=d-&*IFK&YO0d%%a~mmd~r)N+e#+R@+8S-<9Xjy_}^XMCdI+7JA3^rrUOyU>*fi4 zsylnrL90?ZTh3np{hi0B^rsZPDE?X8#F=p6{<;>`S6`aBmSpgK+p%-+Hre0uM-pV; z1$p(d{*pDZl*rb4W^sv=VY1uW_2=F&{N?@6pi`9*<=-B^e~uD!zzokMkKIfX5zYJ7 z>F(cNzf2}g;QYt@*Ly7cV@r~of~Aa2rcJv&!8|#4J3CXP3QNPgUq4@-5)3*t)zQQI zMuu|HqTAA+|JcpDQ8?$xS_i>D0f#4kZQXOwYfaz7?V5>2zI%Uj7v*ld_aoF>%P;%- ziI87aPD`HD?_5BZj3mzKnY_Tl*&k$@s?c#wavf^P*TsQRdmv4Xgxj{_X==g-C z8uu-#Ca&ASDKo+QRO*xN{Mxg}>+2u4m+$mAW%Mij+p8%}DuHWduAJI!sJramYR2sc z9``?d-|sW$i7fm6H5I*UuWe}Z$uZ4aQ1@}8OmyPE({c^9zgf)s>`# zSQwWuEbwTK`o6a#xb8ng=abx~G%cN7%<+b&e^Ix0gMpeIjqO|)#h!cax;?M_hjl#PY-_6;rH1~;bF4KLws1bNikxJxq1(9e zPJCVM?!Wr|UY2+Dr2g{XlbQI4FP|xCm22CP_~f@YZofRoJ>O_W&EC@AzXBgVy1J)0 zFjx1tsG^zDZN48uTtP2SuUj-{rC-fLr>N!%Zh2F^HLFXLx9*QKdp-|zK+?%j-*;b@ zx~>1zEOfMo%}r{__LZgFOm+1uq#vwSVBg7i|BNKV)7nI~fX2iL2DjfWs#+`^_08tn zuf)|e-$(5|_coF5UCc9i`{y1^W)r2%o=hzA{qe@_Kg0VUk1sEqvSOv^-dgaAx1$>f_UOjJX|a&g+WKjc}_~d3itYKST87 z)^kCdg3Ev3+_HR&(u9BI?OzfqFSst5J3X2^_v7cC|K4^?R+DtL)URL4AKLL_`YHb{ z5t55JrH||6IPRCque<*K{_E`$eLGw0gxAMC(u&wwD7xx;=+veiT%2OkucQAh&d%?= zbc#cAz5mvq_w)=FOyt?~v@J#FUZj2H1paL9?mP?o(>0Y|XQqXIVPAjmqPp_ZnGg5O z3!G%}r1Fwq%{CUM@~y{|B&#!BD zyg&DwM^t$CB4eI4caG-1%X@ljf}`rxN4sD0oax#ZS+4OXayOHi`9|>{KV=d+V{M#| z+*_yb6?}TClE>6A#^Z07)T&H8adt^-a+%_+U)9pLZ{1&aLtuu}rX_#pHvF8c`{d)x zOii(;mk%nfS{}S_`p#Xm8Dwhxv&(-R$#J>LAe6bb`_Hzee$S_HrO8%ZzBli}lk@KO z_d1;Tmxg`Ypz{0v4aR%lneSL?1*~c;$X>~QZJM3$0)Zx#1coe+HNfbY}O zic;NKg0J@1n%{nTyYEKLR&9-y-|FX|o6=N1+1_>Uzry;q#h1J`s@yts`{NGVx5;1k z*{br#`*!?uTe2fpS>DLI^lIXX*ombETXG_-YcKfzuIaPeF{N{>%I@Bv+f}mpykU>5 zMS7OpXgXcZ$1Py}{q5KLd|r1Mw3_dq_pN-R;5{u$cFCy_x0OqSezkquF7~*8p7q0+ z1r2YP$8WsWq`X2PzFyvll}XT&qk&~G)#2u&-fX&i4U0{ zre`0Qy<(B%+`L^}>+rlkMFokE%eMW>xi_ma-y_R$>;C+Eyn>gXHXJd!?|JXQe}-4N ziU0ooHcI*|nl;yd+4-lhxDWYU$*B5u=N{c>m-4JcF5BL9c%_+`fMQ z7R!MclbYD6DJt4}M+6Q%u3!G=`{7fu-P83K%Dn$9ukpb_#q`(fmyBJm`8T@_cQPlY zG4D5fcK1W4TGZwS_xP&+3<9nTSLIxIni85iHE?lak^L3(?XUNLI==MewCPGm;vWBc zaX@6M;MCAxPTt45(X`!!;Mre3*Vwz$^0 z&VB3p;}$t5pESi^_Ft*X9J1id!t3v4!(3G-c}ytCPde~^`}f<=d8fs8sCF*5wvB%+ zcPhtu#=}_$8?L#nb%}oWx4Js&PQ=3#5#{_Fa_gpiy(N2h=NbVIT`glH?)Qlai+9S~ zw~2Z)P5X7J=dhQ&(HwJM znVF51j+2ZS%v0?qa7y=Q|NhIzHp%F3<-`-!+b+D!RA8EreYNT-gI!Xz?^oHrMS70! zi{Do#c}(N=vrFCn?CUjl!+F~q zgkF>iF1?zc{O50*jGAh~GxO&we;wSkxA*Z=O}Y8IoK|e)J@M_^zPmpTge(npy7y=8 zg{;F+BekFO%S@5*EtM0qq+QrUiwXVLpV7A$+J&iqk;=aQ_3LB@y zl?TqW=+4};!9(ow?H6_R`+j^ZFpfX}bC3P*Yfs!y@6cKJ*oi}{g1IpFyL$28_Z^=d z^?cj4_G-WVyhL$fhS#Q|7G>t-?ehNb|DM`;nfvSWKYta&nt$~a7)`x-sQaDCo!-Ca zbJu?Ccs+yt@E_w^=i^NtX07`sUDIBwe<06pcioK287GUMy}kbC!j&C(6 zA-}*uetC|@j8)n%9b)w>QRf&v>osCwadiz$q{XOUHe&yT!=b7J6G1hvz z({kJHuI*byG&FmJohRu_RVK*kvA#X~ySQ;`-;^!(K%nf9@;Md0Spm+A2_{aNS zpA!ld;okAODmZ=$XYeW)Qx5iNOFSjCm)vdK@aXpUx9w{ard>aM<*SET!I2rkN4l15 z2$!CHKj8k(_X+uJ-m^lE{mS_QTH3xcs)zMZ8~d3JD({xRFR1T0KG9Gm)!w`Ad`4B6 zgW>Vbk`hKM#r7*bzg_$H`+*4yW@fU8-3VVS=g`(Fk@H&Oa>mJB?|*#%^`JvhQ<5_^ z@2~YQS<_D02ba&WDwMKmGge0($OR3T|9o(AZoK-FukxFJ9$IqKeeD9HrBkP@DGMsI z={xq#`jh8>hB>@iD^#xB+`lkZ-tj5>l;z3SmlP&Va#J$Xdvs)hbvg6P^XF_tIHU#6 zzmlK7_C429#*YEc3fCu04G`cH`EJAfv#@zxs?>A$t#bGMH70CN`r!S7b=%(Q-^-+{ z%WUfTZuF!reWg`Z==Et~%fjVzOeW7{op+7ZF~R-Pe}?$=$GBR~h^n8T_QvvcVDvQ} z5&8QbqRy5RC$Oce^hWhwd;a-(K|K#wB&S2vS%+`=lY3XQOgh82H~h`LjVB@$CeGE_ zl@oP+`}aEixBdQ>Gc_4Ds(en&x6i1$^mYxS_pTczZStw_tY_bz-~X~{>N87@^rwq+ zKW%;ZQ%FzZ>*WJy*7SXnx0teF@7lr}{MVU74*2Er*3Gx(o3TSdfYs{N2a7C+B*u-G zO72#_Kf7)3@0#xi)x1=vl;rZVPfXt{eU5LZYIDbwWve}IojYK4>tM;;b~C#_%wLW~ ze)`N7w!ZMQo$KtUqMtv!(+uQWGB2%A%57T$&lsW^rI&qLPiK`l{~_BCX{g ze|auE(a?og?#TUmUHyf+62%&=On2ko@~MVKHOlRtn$U0~vTfqq^H&~B`slLp%HF-D zR&!g|GkERMyJGjff5|)rrWXplDl2W|-u|)Ouw>FH)}Jp9|Fn7eo~tuUd41kIj!moc zJa+7Or<-4Y>~=kmhRTMB`uX2{zdq&B$OxIVl7pj&Q89nho&M}Qf;n1hLQ6#(_Z#Rg zy?0=~hg4wnzmH$~IE#)MG3;Mjnz`+F|AizkHJJs{m#cZrKC9VGxisZxweyaXsfGe2 zjM)v2S5w~?*WD>T_M`vaG@Y1>tN#6Gh}s*)wBYEkZ!Ow)RC?Aw`LJsn_rbTfbAK1U zZP#QFm}R+h{jv=YngbNp8YVRyIko-zPI2k)-+yesUSq1wQIzYO`FZ=j1z@|cooVDD6WPT2d)eO~`|D40KJZj!z8;@lANW*(ZPEx*&>_w748rz**TVYNfm--SU<8VV;Bl3tXa z;+nr~HFI@5!?d6vk==Z==I=2%y{3k1S}@=1#QcDyvir4rL3gmNT&-Pp->-^y%daq@ zld0|}%@<^Zt9z)t+P3)p`P}{7#ph&ZTXjcnUApTRhwJIpa{KqqvXVWwh^=ksMrpBc z+qc)B|5TPT&D>^<`R7mlB27nbIPxt`<$0$fmY6Lq=lAdX``b*5)7+bo>H{1icUZqqFcCjYUaNqT4!WrZQbF z;L9Zcjo*IpR6OFeWbV#B{cCTWOn~IpmaTz|rDn4yc_^2CtuCsy)M*=J=kRtlC`?`x23XD&W)`^$lF3zdpjW-Igen%@$%D46u*-+hf_ znQNS~>qKHKS=R6U`Qz=zgFGK%eWSXHmft_8Dyj45@w|jk4`-!qM{{#iK30FeU445S z2ZxHr7rVQeTt{~Gwtf57r?NmmI;79_oAtdr_wM}7Z&>IuspaN3-cK*jRC+ACzeOi> z>yfS_Z}+cs*!J&l-|znn$9P(;pZ%y=^{>n}FR^O^d&}QPVY3|id~cdCgnUzeo%{3G zpGR^UtvpMDv*Y&PyC_t}wTk(`>gQ5jN}C0Dy}Whz?U#SGA7T_7RaR{LdOv>pE%~;| z?pvH~eZrj9z0oU;TvPUI{`~8oCK@HpnHrJ(eA|D9v~H2a(_44_Q{F1PLgDB1o*S0T zvui82HhzE1>vI1Rvn*@m*Zg3)>ALsk1>D#J2%{rF!1j@UHbQc>fD?6+;Y=Q83cIW@UDGRDXx4cVP@!bjfHhCyKANH zn>Be)bi45NC1d4EPPPn{iEmH+YTdWv(`U<%PrrK9<>YQ(BFZu29=F@E{xqpsx^E61 z*SDzSwCxEUbme+X9}PvX}jkvQqDbNNz^dl=8J2iER-~^Dj34{G=G1F)q zEWK^Qy|?P8EVN`znw6$K6{!l(t7phP{`f|c*NHPP7GJvERm2}3;p)kD_bMO6UkiL_DeB+-(DJqlZ-)dppn!R^>n>ydCbsvnaC9X8ddDq^4x2ODVyCMTq zlSEai1Mm4|8=iQr7WCWo>tI-*z-3_pr%B>Xcfb8vU%cPKRa8Ut?An^cFSj23a%p-? z$fAXhu5nxqGM?1BBv)~JbgTHE-3g8i8%=z>za~3>YAkmMIOK9*&&HRZ^>eqG-QIux zx{$ELRIcLd6LaTB6i)Sec~!CMKuD<8vEQ=MbI-0X{*?b!UgCqI%Jh8qmHQ2|th2oi zT$wfbdUVyHs4!`R3%}Y9cCg$#!gpuhorm2F46(rMY_d_vicW{iwdMbiLa@%PO&L8&sC06M*Z1URvZy{@wqWkj$zJd7Jv%VTUco*_@)L7>{PMUD zt-7+mgI%AkEW2Z`= zcG@d_U$$^1w}I^9eYqDUSVQ;SmOX!do5aj11@HL&Y#vaj;#p znZ#gIYk8Q-k#*gF2FGu|irUK)S1B@uN3XmS_sZz(S}LDuACr3Yoi%ss zdAn~v4z~p@uWSE&O5`LrKTB2XHHo|PbKN)oF&Dq_py(Axb^i6Jxr&_=iZ*Pmc_^2A z<8{xC$nDaGe{J8sZCcPY{r-i2AD$}pDcWT5jiSpLy}f1fYpLY>0l^~=x6rOZ`+ zHn&M!*wA*$1be~T{|q^cxsJM;OBd_9^_QEbwRoGJTdwGOs#PKXcK)H?4_?fe_F-0$ zT~(e!=kx_z%#Exz1c{tjT)SiI{@C;TWLo$2@b3D3_>*m%rD*`yZH*u^hUOjL{=IFT zZm5~K+^#NQ(}rh)w%%+LlS7q$YsV+woaNm-bd$e?NW(=S#}iinYp_@~@mThCW;Xvtnz)_L1xW=uXMqbi>K^x6eO zWdqN}dXCY}-y7}4-`Oj+#$1z(-1n<8J~m;d^r7UgPYg;s6`DLa-2UCJuTB)%FQ`?x zdEV^PS%;onzw|-TFuWxApoVkJ$L)1@s%sLPDmcq>HJ#Y!ww+x+MKI`a(SA3pbDfKR z1u1@?_GIn3-!XZNe+sT>ta-QU{pYpv>G>9cGp_&m)F9p$(i89ZyQxC9?Kk7!dVw!3 z|J~N(Y0+qK6!d@)(gnH*T=aDoKRq7HrDA# zRq57We)-t#_xrzW_<5v3a`MW5^>g)~a;a`iTqBx(UCSgY;8)otr^Zj#@(-&MIA%ydZns5$IjyCky4SBDY`mm-<4K$tmF=U z_Ur3qC#~5Tb0(~EZJWn;o5}q9{)e3!M^0~>E5EtYeW}7;yLyX`$_0w$M$)riv%T@S z_x(YRy^oi1*Q;RnZTqHlo^blJ_PRyBX14yc$cPCGEX408RXo0W;(>f&+nkl5Ht#Z5 zeEs$N$jr?9Pd|PA-61fs<5P}OE{j&C+`_~J#(m#!J$`vkYRb|Ky;7NG+bp|j`5HwI zcGh2CE-{<9sE2KHWArzhzjf84Q|Pd44;U|nt2N2Qpp0x#2gH>!Qv zB=oDKv21rlaqWknNl5{W>7q-upRdmQUgd5UrQ>4hsivS9)@vFQ)A;h+-qxSr{(Mx~ z$#c$3gtzFr+}CSu%4eK1@~Sd?CJ9~tdTiS6X{vs!-{o06|7zjt&yZ?b)wlKfEuoNA zhHG@ZKA9|!W>c8PBKtRd@7~@28O}*gWYqK&xpe>PzC1aRB@Eo9zrQXKKBLjtyinmk z!|L3Qt>1smuRqTo6c##B^Xcgem0g!leYmsW!t3|)s-8O;erDCW&(kmb5r6wf;-|2u zmxE0AR$caRRt~P)cOg)D-@RlN_e-VBDbhE-KfaMDZs-xIQg1c!{F&<+c>(k7*A#~~ z6i7y{-^p|!>cRW_w|_n8oW;Pj_Vca&7avajv8~zlrN5_h$(FSx3I^iVpSR79Z~n94 zrm<5<(3P88|JAIP6FNKj`s(+5p&eQeh13}1o0Du0{B!trys7^Xr>=K+{H;Gfoh4Uf z*Y(t-2`;v3-#0;j_4o3fKW&(QB@|s-s@ZQB^{(XU_qI-#Y1aQPv?v{D6Ics)c4IU(m(c}VI89`i-xM&^WU#CTu&+L zUak7l-!tvg@L&W}VhrqhfYtb?l$B`#v1#^;MX7GOfpIqmx5{y`Sa# zpNtL5g`y2se!BePc5I8G$5LBGZY?};o_ znPncor&R7#1Y1CK_9wGv_wL&>GX-5*^!)Vcssm?sZG3(GEyHimtEGI~)V|wvzpYQM z&(V51iRDnt@Ba+BpPR~cBwk-tEIhMmO^WtzF|!RD<@;G8l23E7>`Y*>O4&3y{*l0^ zNoOV*X0$fzt-jrT=bJn;-vVXRdw*ko{ysiw0)yuMu;prxtELQ!p>7{wVk<4ee|0&ewx2f*lRwYonfBKtTexC)m z-*Zihn3Sm)IrZAFhSc4Bw;S*MexRl5$*9y^JLl)&fXNdy+P?mM+?L6dp{o>g?|Amy z9sAYuuOvKG)KXRb_2l3D3};on@2{trO-p#Os^^U7t>0|F%I-71KN0K9cy9mU_d=@5 zx`#{O%ex1N-gB50cKhYVJ8y4&`*C1X;FRSy^B149*{Rm_%6ni{A8DLgR4kQouWp&BMVjh=h7M&v zmz*=d996@PG#tBqd-nT3`mHHbjdHyw_=jGyQRO)_uQo{NYHIGj*J&MrAlUt z-8=T+pZq+Qrk19umG)nr?LVV%SVNXSW}(rUp7Tm478&F*JfEF^>$h;>_NVpugJpf*GEANklyF6Sdn9(g@AU%6L`G^%vn+NRLQp8WOkK8X;~u!T!&|FqB1 z*{RZ0Vq5&ZZIV~dEyX=mt?O4;f86`yPHo}K;-JYheV-N<2Jd8=kaAjSl~~)QkcL~= zZoPlC=|dgkAOGvz|Lj}+4u`LQAI-Knn{B$nEcx|sKQ)@XVsK*ExLKeye*ga0tudFf zc>Vq}Xy<<2BjRN*(9r2M_h#OL$;H*TubzMU6Aw63Ua{FwHn^4iwg`59JA#n!)-SReM|{)Jjw-BMP)q`SUz9TuNm za@y;}mL;-BF5hux-GAxq>iivra~|K_^Kg!7>@l^p%>6Igg7)T2*{S?U>000HoA(c` z)9rRj_i-=Q3=6s=TK}l++;{~0E`&t6@BEH$?_{bzl9oqXt5)q+bZ>B1fzfB$+f zd%X4SyV{M%Ebc$HGs@bOn7S;|tX}f+1jkDUHf}V$wPF7s^AnTx_dfcuQCFI0(Y*Cv z`=h_82bG!!m9+amw~f90cv)4`su`#4Vy163+?~lF-x_%`CN^GJp9v|-JfMff9|=Z+VrIV_WC7CUe^_8gNOANm_m(_QE+T5gd8+|f07p+{eRPD%$AUTUWIks_WeJ?n7JpNPu@leRl z+UtB%&dukV#{7oU?a$O#l8ayP9r(vQb5ih;icOPdit~j`nU`|WOa8}{XLma0ekeZL zFZ=88$_J;d?l1b}GkJ;9@@bBNLHEC34=j~S$oBHIZtY<#D%;v8yfMDR##biDXXoag zlf|J^9rN=3Gpq3B$T(XBkcd*6Q^zni!J-R{0cz6sv-4N(*s%3Y zx%lC+02iy6Y7tpDS6S z^5bUzH58*ATsSn)O3Z@(Ux zzo%x+{vUT|obzT*FyXIlmzMt6I%U(rl%Q!}HZIt!r>LfKC;GX(pYnA@jujmxQ=O|< z$S>Tw{zvlN$2Fzu(c#a2XWX(eb5K>D^Lq1j)1=&*^lDYg4)m95bzfU} z$@J#o&3in8oV+EjghjI6GR;l(7P_`=ed)I+4?0yg)%!IkZa-09*c`+ls=W5$CvPPs zlO;xPBx2&8+>tLTW3GSR^u5vN(A8hjX(Cs01*c8$I!oIPfdhndopr7Nq8 z*Y6Ne*5MbAEj`7z`9vv`iC*_4j*V~1wtW|S{xiv{LC)~$_w`Y4nT&#%zh1Z|EHZI% zUSQ{~2fYWs-Ts+-KF4B3%cNNoz4l)}b>ZivldspWkfo->`M1>Fm4OibV~e^CAX_TLT9wEUd3D(8D;zRcGs zoEm0c;jz+F=cDa|0KVUA`+mpI`@1_aXllb{=1B+gubrt~Xg$eJd){9A8Mb#O@!G$+ z64`q6$NnYdd%qnxbYX(l`*T;Hp8whSM5HwCZf)|TPo`HNB{R#u(eqc%zx?&vVa_DS zob%B?EkB7Y(PW$>)e$~heMRNEGl%_IWdAeV;XlnH%;*s}{nMpu^$S{r8g#C-R=!`_ z#j^EaarMvYBHQ=puRQ2k#1+P`cz)*kYwUM3*84wyp(vNUbA#gg0!F3R`;UG9%D?i! zk*gZd19N#_UH`JgMnxq``*r~P(j*>!38vQ~F( zY4!;cjhh8ORZ3Uix&L7w(*i94+0}o4zKr+y^puy?|MvPvc9}QYtRh(1uho71`~5~y zz43jHhHESBu59?*xACz~X7TdaDkqnQw$wz0B*wt4C+~>Kx7j@E_pNyu_IKZvc9Hkj zr|6uK*yVM*iFN&f??2w}W51X7QK|R(+t<84%W}3E7(I@gf6LH%!sAPN*5%*qEnNNd zjAngWxpw}wX}Vvk0zI--o&J7YdZF&yp~HE0(|$cnwVpD2_T81=cOQIuBG1k8*}fAJ zy0f0W@wblqcf0W8IVp9^GZXlI-Ikxc_uj;ZzxR9epPe=oHC=K2=@u=4P~U|5 z%uPc4(^}r?C!hV#u=!(u$cnc0AFusc`1fdL@P!7c8FQw|MP;doS@nM3`=8-CvsPFD z-`2&!@BcWmOl4*?EnKnN%34Uj$dUa{-=BZthtre%64+nw%%xKKrrU(*&tm`H_e(BKoE*H{_>+LCs>+-z zZJR@9oZES`EMSwS*u^B@4zt@^|IO%HT~!XetAyPJm|?pp{bGg zVue4IUUO-?QxNrh-oO70eXPbE9uAVrRD?z|06|h_NU--HYCLiM$Xl=GE^77mAa@U!=652t|K`OJ3#61HD&Grma2`qd!uGKrF@)x zxTCoK6WeQ^MKW*wwk(+%XBjhj2@Bui$ej=MJ38M!=QuktVoLMsXFGB^e#Jh%^q+yp z;?{@%46`IsCEaE=3+|1W_F4BmpH@xa3!iQC4d!iMsi~>BEc(jsg8?jJ@2>T~F5=%V z;rVy>!*b(;%oCGr8E^XsTsSG;{wJ>J!ptB}?XAJ_>fxbn7boEtLb%Fgt-FLz@@ zIyN1ZF->xt8M#-dYj5Xu8m%VE)XYlOMCx)eUem}p6Rc0)maa1L-?}$zp=dTs^%Ver^5B@&XniM2s z>n)_A6Md$r@!TKVM=vHAY8}?z6*`4MVe2bS=BtaW)*sX7&6pRu?sZOIgRcLL)X(0> z|1%W19*LUEa5eY4*|zWBbl%Zt(gEM~Q zyy4#b>r?ZUADx#(PU>v_$`_M)P0?~yQmSz2R>Y+Jr&hx&u3I1@eN#eEEuIEcNGkMn=uIo7#vG&+>@mQDq zW#6}bd-Cwp@ASi7PAy*yuN!>+#J^zo!3}pC9|wn|F6K6vyJ`Az?d@h64X^G?SOqVC zxvZ?}&a2fvK}$jmUpY&^wAyp|#kRTcI5*rnGCQfiqfYh19G(pujrZuhnSV9t;Q1E2 zhDlMnziodcA1R#7py2K6sAXF4<&<_+==Kf$ieIPqPQ6(jHiR`*dCO+s~ zeD8Fi=H}B|JDr#6l?QG-&hpm%(KUOya4o0ZON1S!pIKNuvwYo39!pdIi?uWV7#{wx zXpY0wnYkZVPKgg{{IEQLgEd-MF1ml+uibj*rnR*%%wBdnwP@l|oqU$4y!@wAr9{iE zSN@t)Y*X?qv+y&w-U!KLz{mLsI^jzdn#Vf7M*L7Exe_OlMTlTM(!~NQ} zZ^tjkH%n~glQ-tuIkO{bhF#8rjf}UyXB|I1zk8GFna7uxCq9)7x;mw0%2RueqH8N} z&D*&4S>;=?K)0X0rO~QFyN(x1PL650nH;cP=M8I9gXbUpzTP7ibLN;#^*^%0+e2La z?b;RPb~pHB52+_D<1`9*wXbQq%5Da>C&$eFAGHNvzbaciv7|g8ZKkZ5f5xiMCtlcG z`E2VtZF1O(o6~)}cZh90pmyZLyc_>4X9`blcxd$bjN)g32g3Q>c8T?(G9f`$o9g-1 zJ}gPxSmPBiW93z!HD|pHqm5?lZdvwgcE`tQ(V>s_$DO^h^M1?5^-f=Sb)FX2Z8~Sn zvOKDt>0WAHdccwCPp8e2En>)Fmw5Omdf%}}VM~QO=CA7HW812(#5%iHhox4&cj|#3 zNh%L_JpcJoOaAfU(~iOXU#F`wU1Ll7w7gh)$$y4N{s-#jG;6<<=lkVu{qgiFF%wat zr;+JDQ$>9p0^}Sb<(4flRGF3$_WgNrN6`{Vp-G05KkH;j>?p4}m45Mb&0%+Q`Igv4 zM|CUSY1&#y>N{-x-p#q1>8R0#!w#En2QgoWReTg5S6?R1e}?krh52F?4+9+&ur(iy1%)9rkACa^Ag($f42KG|5QJI_eNwJrx#}y z`yJD7D=#(KMax!L0QRbQA>Po@& zTX+_1OI{v--QvZ{lz!(E8;<#&c(UO^QJv)>P3@5DS^Tf|&A-t0UE!*!f!eea$(f!L z8n!FMwkzE4yWb~rCh&sUp~Yc#N52YuS$ewLx$+rf-|XW88+Wqz<+$56{;3!G8Pt&A zTE6t(e};Gun=Qg;zFzlb5S8|@*(=0pDm{sH=Z5Xq?)Uzdv5kM6+9bjJ^u*-oTvnqj z?j5h!m0e@gV#!wenRCkbAbaDE9n5vyObpxZZrIFm?);zr*zar?{;ZRa+tYS^4PWLg z&-5wGHb=L8SO3qDpM9tB==--FKbx0ubIafM-}?@_w6$>i+t5Qk{;Dmo%?$ zpCWM5Uw{6)x_f~uXQqUx);dOg&*Qvx_;GTLqgP)}mrkwwJM-EtJrdI#3?#JBY7E{fVc5f(p_T}aAsXc53a(gSkE|D@-ImU5i(eb@&${84M&;7acP-iQ{ z^kv%?)}G&7cY#%_eSL*Rd0CIegxQl{i78!wo4dcfI=7v(D?)>H%hRfeo$|2{yc~*x zudfm1mbkJwIw_>~(w#j0zaQIXE)e>&*U};}-|H|(YPf$yfr{4AdN|2W1j5E1b(^y4A%b{WVD_)Nv-;zlR@!yjVGH|Z`766zF&IHZQ{Rm1}>|$ikIwIzV_=*rF$QrcrJb5Cu8h# zZrARYEVr}o#C-qupP|N5YuAaSzx7GCofbN7HWA1Ql61J_zCt+r^ZBoGe?J^iJzw^Q zb>(}1>!Ljxp-0rz3!)uzeHd9{rpZ0e%l(o2<)Fxt#)tY<7r1KoJY40Ww72M<$fQ#n zkF}gRpUqu8&wgL!LB1f*u(@@=qHLD1n=Glh^eTDTy+G^L1*|)Mz0ZEQC@DzW{rZm= z94@CBid&g>IU6L_p2&T7C%Vu4{g=0mD<<_U`Ezgj&edN%k|u9=zP~;~W8z~Y;opw; zjvbNy&#*r`VMVBO_m}tAwU2i(&D!#g+3bCPn0DR|{B2?=XMP6!O*3Y3|49lbKW1o8#c#twp<;++XOopI18dM0$pvec9z}(uR_L!4;l!{ic)# zF+AX_*kM2a!}0DV0r%&w(!Q#Aa_Wly-jF4h9A~^kdNtUJ>#m)_}@Hgm0vyOrFqZ=&|Lhb8xZMOSwxD|i0!eJFJP$F-%x|5g@Ma@)Pn zzSd^;BmUq%qm?Ho8gfqaP(QZp6zi#(pBh)+JQAY)pW(i?srDbZGWbl86 zSB@%emXr8WZcpJh_;~)v<$2Fa4$ap;wJVY5;^okm<&kMqgM;0*c5iknOJZhzmbpQ} z^rNHo$Mv0>XSS*^20r+;)9mK_Q`}8|)E;CWR!lc~E~cutjbR##0b|UKU9mSdyM<1z z@30rBnm7N=PV>%4zsf6H4;-Env?gHVIcmU8)ZTPxqc!xQN9C-TuMN44_) z3;bs3wX0(PEUxYTBlqBjY5M#ojmdhe_0r64djFW@f8-^XcThjC)}Dx0GUw#xEm--{ zd8N_3MQOizw)Drpnzy0$$qL7WjgureFINi|7S=c&t5f)>x=`VKbeF)JH}{U8zNT{W zkL^LvkeeL`76$E8ShLOTrRqsDRkk^=c`~gIo1Wh_eMMHn>y~pxD;`bKIX81%R?)I> z#USYut8JCgW@h40g>mtg_Awn_-8+_JhNAIc1dbl`nz>w6EgtMa2*{u&bLciv&vf3uK*F8WbHa9y-XvdwW6E@Y>+?+js;VM$P87 zo~0#*3%WdO(|x{$J-DAe?cC#H;n=yi8mbw%yXE(Y#)x-#9q^yt$z^+?V9KV>tUX&b zo^dV>cv93Zsp}UiR(?g6)A!0e^^#}VD*akdzZRZ2>l!Gq>6}Vx+B~5Z`qJy~UUEIh zvhISRy+`4~F)OtlcxS5coD98mjw$4urg@=|No0J`Gr{z^ zhL7r^-LFo)Qn)u)G-I9N>6x0b)7ml~^DNNRN#4mKop~VqxY&ZHe!QLXXH8x{lG7G# znSM9vht-)SYbPyvTC!yCGgYsoXAK*aE7qM1y%Osf|M0uhb6evJJKx35IQ}g4>Xo0} zYjQdMp301U4t|8a7$V^SX z-)rsHMOI&7xpDNm-KkIeCiUsf`53)kyQ*Jv)$;38+-C%5naD+Mc2(&xJs85k7$JM$ zN}0m+`fF;O4(-ax%fqU#SDgK@C*@7$pZhF_I*WzsPXt99UlHJMEjqc9CD?6?v)9ek znpnxXKVq+~mk+pl+}1AKH8^Tw{Y+;rMpcuKlR~vl-En(%D4RP!yHzHIrGYO%gYSAm zeZ9xL&f?TLE`5gWm6=If8T_0UKaBENIqhqZ^OM?n>Yvt4I={#7#L7o!tfca$FQ2t) z>858T^;L>eOzPCab&W# zh}s0nDEHB6Ha|JUU zKVNMWI#9m#-18#E<9$i~kUYZ!*tX=NT9F#_)R2ILQ+}&-iyOTgD^qg-*9hwoP6k;LxWzDPY>v z)EOM7r(8TyvOBP0J{ik*9JiqgYHtC^2j>@)JJxE17-YH=?1lHf;Ho^{b*X0QcTE!|S&&YI9Ob)rf^A6sXA!ktF-Y5y4t-w6s$V03yhDKE#IrG4Ad zxvP}?*tbSR?pT^!ctSY!tf^NBr}tGA)uWdmiGA2rr*dSy^R*wjkNJH{`2w%S@|w9n z-saEh5-O=Rby3Zk<2xs@Oj%MG^6H}C5|u5p=DC_n8S=%PoTSd9rnELGKoc?WIBy)p=?mj--&KWaVSa*3|`xp9a^WXO5i}_86LlIxOI}~j-@$3I_rd;-9p`v8i#9kK zU0KA{7hc57rZT(IY;nb7*{zR_HeRl`K2~tbn1MmEJ1k&!KbKI0_pIrseogkc^vX)F z{ZPHcFXN;0Kl(qOa##GKTwu4>%R3wI#T+&YV4u;*HsfLcHSRNx*%KJperf;qzs09{ ztl&`6lrlF|ed*x%`P9Q7pMIQYI8tDDwBX!t0q%8KH@w{z>@3v`u?*ti z4J!XU{YUpl>D9Y#AKi1ds{Qbp!|VR2Pt%;fv`Slb-Mnt?(=TW6tM=azOqtuj_^T_OF0$B>8tdg;ZtOqY6{bGb@#IDIIH~+-O~(Y>>HOx9O``RfhYR0HmniRt&-6|A6BRR;lYo0(gbKlIn4?oPfr`=iKm9z75SmmB~ z?}CKATb9&3W?m5g;p3F&A(hwTD<;qS@$ixKbM{~0QSoOM2k8H(xcIv6Cf?P&W>mfUxBhTrAWm$xRS`fNDGEnar&_iwIw)qgTn zHSI+21WjEbpUlSN)4#~!GJj~Jcw+XuYT3ryzaG8Ib7`D)?)>Vxl^6f4ba=$Xs9>=? z?!#uQbt`>m=A`iO-<7UEZ~li(O+{o*m)WwZfA?&;#ClgOH~0G2&wa*CJn5&WUtMvx zLAJN|cb*^%O~~A5s{uc%-6jS$fZXzGn~`2_0rL?W3c|dcfa-nm8A?zeXg#ot@$oJ zD^qBqj=;B{hclTrzv405$i=z+>-XpVb3cU6S;DH?xOm#~nKy1tU&-RUbQ*8n-UY2H zz4QHg6+5DP&vrg3ym$Le**A-%3#}>)Pq*BjP#>M`kyhZa{Q7#^#K}z`cPnPR#!-v4n!=V~sc z%=*QZHS^tSqh!w3nm2a*x$AOWg2Srhm3DUS@jQc*lb>3v-Rbbufsk2*Gfq! z)|{KUuy?|X70sn$dplX)=vPbUe%xpL$9juq%=y!2B*UFb_r8rc-`W0nx1QMIpQY9r z`RBbZyMOIvn{3py%w>}1%2apNsh5sNY`FFK#@p@3vmg6xX*u_k{oZpAR=JV;#Dtv9<15EBf+~;wj%>FAko0yHm+E>}%GYo4F^yThH;B zvOHjIu-Aq!GW}oW-bKx|Q8J(I>|C(quA?~1`}_Bwe>gZvXz_zfYlBbquf4t8vr*-_ zK>QXD_nQY(m4h>!&wqV%?{IyNUDUDkaT!Z+rZ_{;=TmPSwhrjZZ)2%??VODdgPsbxF--8;9Lh zAvepTGZ}jO^ZtB4*mQXg^YiBy7Y7L%Id1*;VZQZgMxV1M6ISi+PyV`3c%PQ{(rx8( z>2K@;KB~UDTX(@RL}WD!-~C;}?>^_%ez$P8u)TTn?#q4CE-|fC*&kKQQgcZmSuw#j z$EkPA?7ZB!Uw%#QSF1~5WlPfg*I@SjOM9p4rk_r%E?F+AU7KaSa}UIoef!0qQ&sY5 z=FBY%F5bzDzCSfj`Ix6^mUCp*pB3{W_pC_jb$v2nmBLrKH#yet{A|09Dn}vv+t2gI;?aylJS*L+{vEt>_LMQt=|3*1zB8|-nut{9 zgc=+db~EO=B_S7Zj?vDkt%SK+c%wCcP4Pd)A5RsHPEOQ4p}JY?%MDAvuX|e`FG#Rw zRsXJe#4j?*`-xKrkLDv4reANg%DVo{GbpV~oL$@2y6O1R-&$8r{)_fWjmV2^>YCLS zQvT_3WS_yA-P1G{*QhCf+~0B5t7pN?vQ4Gg6}{1A%w7_EC;x~)oa*sDE$!@#CF#M| z21PAQ3yj5BU6>PR+&)!$&-iOvm2u4EHM^b~zge|ZrR~ZGky)&1R;4yP6D=8TZdt)u z5c12E($?Dlj(42-+A_ebGd~tyDZX%`c@bXDidgJvs+W=Gx3Wn;!I~ zq&EGMq*{L5RMD;Oh6^fLWUakT+E46BY+cfqY9+Z>!R+Wx(V~xD5vr~;b@M|S-rcrk z*fx<}pvPv9oWcWjO-@B-rG~Sno^nM70mCEuP~S)s-r<<^2R3FCNV~`EF+- zkJS23yzpe^_L$Oh)mLurtqF}%SkvdLe14f?-DwlP16c|!-ZvUQ*SJeOnC`JJ>DiWW zrP9^i*>W?FJWEP>xFb!6Y2J_T5y0Y0RFvTHD2U?mc$k;bx``?iYV%y}Zj#*ej)E^lsC#(?(ji z=FNK1x8RJ`a=kTCHmlZd+MsgQXq(g8fPdY_QOCn~_OAWDVYAmXul_?NRbS8bC7qT& zFx6UQ%A=1QKAoLfQN1}YFQW1WsGOm2awP&)WCMs9IYz@Ak6z&{! zUElA(TvzvXugapW-f>8XSDPhuC^f$Na%uL~_e%mKFaJ1aT2bjK#Q5QyN1%qLSnSNR z5?!;TkEUt)EH?kopkK4x_`H?*;;L&QE7ura`k8+@@{5aT?`+?r##^`l-gc>4AgYk7 zNZ`Qr*!rY{=e1lnJwG==Z01X!mp>GOrU!rDBwED$pWzPk6>+JK>zn)*dg+{Au_s{N zE4x{%yEQIe{->If?IFfsh<=K-%`QnO1j-oXdw2%or5p*+D zUm$OF(83!xnv~rCGpzWzEPu}Po%uc`Hd`vggMxdGAC{c8ful8R>(V~Yh?_C36C`^7 zW-hx{{m{Q*!P*;a>()fw{`*M&KZE9p!w;XHJ(lDcC-LNH!>lLyoSdQx7ODJ~;vO!t z*8Hf|w0vb%{=ZEhE_~VF9vrePcIF;avz-2we_3BMuzyq7)^>I4)pe}bnPyCyvTNRS zvD(_CFyl>=1>0u-XYf1vea8Z0#iMG8Tb%!`3f#`)!KU)*S3dj8UE0SZe@$NV>;9@u z6~)(1|C-fqbmqdAuc>!6jk$ss2V8!|+MaPoz^z9rV#dr>fuh&8tdIN1FWLQg)$tD( zTPEs-9Q8h{>EW0#fpMm0Y1ZA<1!p66rvCMw`6K^9Uhdo<{g*8xUitJ{ygVs7jG+E6#TNTd*uGQT>W3lPitYh_}hn5!BT~y{hq^`Yd z-t&h6A+23!oSJ&4Feb!gE_Hr*y)(A*(e}2v$?=gBLmjU?W7hSk>R7b=Y`WCGlhMq( z?w{?Pk-c4c+TM;{ts<9yFa2)aT2Ooc>3f#9;i(qstdBmaEXpv7ZDtB-xx8oE0&b(j zo7<~7KC2a9lCwo=9n zHu{R3O4?lYNB4p+*hVuynQN@@+s=8GuCKByTgoe*TN74t`UbCPYC0giZS8G_S(`dJ zqBsK<-)cP@b#1Yf*)Dgnhp|(p_B|-Jn8xsYjPcHC{#-M}TEw(OaBvMfwNXt9TaU}wb3OegP<@TlGc zIvJ1VD=05K^G7G-pXARa{^8Gr<{Z(sbK0ckaORf(8O zR=twj(QWPdX!-h8iz1Ge^grX~&a8-!2=l))L5M%cxWjL@$&@qRDh+uYcPcM+iQHYZ zUienrW&M`ZEdpYHio4S$TSl!fFpa6gBju=B(XZsqVH|P;BnZKQb2g1oi5l zU7ed*#y>$jVEV_@s%7h@S_jF0b?^SmZtr!u;`PIPfBAolf)`!7zIdv?YUFywHG-$t z<%&DAx}Dh;J$ZTJ3CAOylb&Bz-hB1W?O9rx1*Nz2+27a;yV-op@A3Q3@KWrRBVSB^ z`))07#aC(yJFFN!2EBa$G5q1x*s{p0vXu`*b85X=9xScp2#>h3cIwIL!N!KFOL~5( zJYn!WQhI8(NvhVFhU1G1pRZ+heJ=7x{L!YTt62|yJiIGlRjB(~XVnJgFRNZ|yu>@H z{&(BCU-z~j(P!FaZ~9UG@Skg$y`sE)J6FCqDOT}kt>eli(sPv#9k~(Be)R02Gc%^1 z(pj>(tLf35y}6$n3pMvnJzVhQ!@A3lgxijKF&j-e=(Uz3SkTbQ)+6C-YkK`d{*_f` zT7293kL1n&u=b|VqGj`{R=UhA_55n&s@1ZChkeQ1Xx@8=S&c9E2I~cjT3QFcSfuy; z9_!)#avN$Ky&mqL5~mZX!f@KRG01y!*WR6~kF`(la`-CimiZ{X<8ONSqoqEIFE@3{ zZM`*bm&f|#U`@7=Hsj<48hU5S@67Q$we+l?%>0ik8&}IE#LjJ=wPE9u+>LV={+Z#< z{?MYZg)7xZGDxIh^-~W^ucVR6j1Ma+Gv>FS5fy93&$7!A0;p63x3X9u;$gd zg)ij#U%Z>LK*8b4lx0?pK}T2wCqxK-@6J-(##*`KdYOHba_2|Oh*FE`AN96aW--6> z4m);iLru*&7#lHzBJ~CW2us*3daVq<)Z~OlK+?Xvu1gT82-H|Nd2USaCMmxhPpf zB_X+fPi}Exo_pc@kI7BGGq!BvygKjD+WNWMH(Xg+5K+6hEwp%PM5NQNJX__tYyaMv zocm4YrKiKGt0ww;TffMM<<$w@xF#*hoyn@=*l0ZZ@xd0l;M>g4Akk(0EK zYOmhR+quz+(~PU~)&l|azlJ;gR4>oGV|Zob>!jPqggXmXZsHbrQ1x>E6=~P0Ns~g9 zo@_gIlw-sF`SU$(>Sb(vrWt*2ZLRswFl$=6U7*L-C1S5*F}@VN;@Vnjnrrv&?V5W$uw?(~UK>m|kbLFJHqNGoH-RbSqmjOV{~=1BaT{ znxzda_hpJDe}o@RSuszx`@=^6p6PrhnvAP|t#WNuu5SC|er!?HQa`UWm7aY%2NTU1 zbhWZ~uQ*+#;VIJa^q;i5vtG=Uy}^OstgJE?kVRBS2so<#PBnC+pfyoXpKQ{TwUin@u82Hq-+Lx8pHb3H` z$Jbg%zRs7s&S~7^Qe!<6#QDgji*d;?iJsDkg&-LpR?KzZHFI3ciUgOvC)x|aQ zmN9F0_WulM)tFvy7WwbuQ&ZQ&wTH8Bmdx$bn*L+wQI?j~dpQGHZkkvb@6`Ls|3vPW z$wLd3cW-O#`KB)Qb)WMz-DSC2LjRxD#j}i$ysBGxX8NLO%|}*QwXNe0*0p-ow`hUp zj;M;mvCXQd@3b-Ge%~@b*m{b?JK#nwLA| zP58BDGZU}S!R=)gN3D(@(b?8)(rr9{?IydG9>&b#H(xKBw9Qh*^Ut%uwcT!SL=*L# z-KEwq%&$rMICs_2B&M{pK~o+c4Ag(PSyYPY!G!EI!^_pyH&xg97xfq|dFH(|0@XWmR zsot7Zwl3Kl#7(*n`_F8hYvL=Jbzo(0(Q@NYE3(-5yr*^9N#FdZY0kDpt7ux#niZv~ z?=CF;<28+|n6;(BQT_0T-52kL|5Ny5exk~Fv4+vU&bkffv^2xDS1(<3(Q1*i%7oSa z;o*`Ku3BvTx|nH``3h;CKwsfyXHW5RNM$(-a4y`MQzF1q|FB&AXrYYD%^mZeMR`VO zl}rj@>Q;(sa@q1pp^3SpBW}am6PY!7CyVvEPy8`h({FyZS1#V|%rc!>9=Db`YNVTa zrlsvj;Jm=FA(}($w8istc5Jn5K^~&3_eMvBsrx)waa3QrV{&JhrmE1~to+{#9T_{A zRys!>DDs~4@!7p4D_-~uu2}V}ds!7%T-T(2qpZ#Lb8LF9X32>6UT=6XyJ0)ma^H$x&CY3`%w@C!%& z%KLj7EU!ATYGvu#Q^$fmoNu)5y77i%yKHA`kXKvL(X+>vPkr>CVN>jtLx!h9Cpms* zJt-owBrQr3ciIhJ`b|8qU6nT3=B%;It)7>Y<^@(^2Qq}kJY?6r_E;yniT61 zD$%QTMXLLN5m(N(-3vG8ZTP4)ec5NOuw+wNFSjP2Q*-ZM=e)41Tl05EZl3F*#wI^5 zR*irkQL}rwKV7=g{?W(fWsRo?=(z!{jY!ZM|QiDyNnkv z_FM2`-Tc!lmrT>yx76nIw2+&ZBEH@@r=;%o{c^!>9_&u%P z##pU?cCj0u*u3WbQeLjW z>**#FIa6l|FL4Z3nOGI07;4qAk?DB#96p~flhS;D^zGYXw=eFZ|Bw2k1xJN<%vrh8 z<{T4Jtn7~Eq!&$XTDB^@sYNbyt%n73pi5Q z1H&{^>}KD)xS97u=4Z8vsTRA9r(axAzbZ(%EPtzoW>I=!<$+TnPbX_0JTbF#)#Y@_ z)zy|vdbZ{d?|0jpUb-uP>96hlrFr*0Kmv+CWxhVkFWmrhohsP1dYmi1|BR=LVeffF4Iw$6~2doFaoV)K;t+xL_6b$`0v z=lJ6(A^21ObnCj9h4%vfGfWE*iMZ1;DI##*S+PBzb|1;AdAzo`Z}#Gsd;T-1OxF0% zpm@&cR9W;>%N`G|P#u@qn=7ZQyiZrIRTg-%R#Dx8?a!W-B74l@Ib&u@He~dV1t<8V)j>qhFzAaLkMpv#nRyCA~v#xN- zjrG4ERPw#K^Rg|sTJ$5wrAxg2&0cv%q0jBCX4#~y1hX_H*(~NF(T2B)UeiK5j84tj z-1IiK-}=vy;(z?NHg`S#&oJkaf7=3BRB-7{J3L)qd5Rq{)U z_S`o;wab3(YTsO)B8{?_J#QH}pBkDwKT}%vgt^r0KSS{J_A`@m_)lG1_vJ!(#d48rXzI+I|MajA zx4K?oWXdZ>g^>NPo=R?5@!`Eryx6a%^@n{nZC_c$wY1GEnVea1JKI%Z znhc|G^+REeXYZa)=j59kW4wK7ZozH01zwLX?^kS_-mN*c!@E`V$C7iLdXwB=2C}v- zH%ba$_p4d+oWJjn>W6$WbN#2Mm`r?jGc7Q1TGp~xIon+S?v=Gj%DbpMIf3mfw`q8ET3SFH-F12oS@D}kcGYS0b?4TVis&i=dAJ-quesj&| z{V1!6dU?Z`wPw3zzy9u9>DRt2J1~X)3d7}u=2HJyu4)HV;Pm@aqBdljeO)+;H_gg0#OHK;1Q_#5fq3)$s&Eq)_?Zpf&)(IN3niVZgdO9iSsJmH~ z&y=+dqHf2hr0uh-Z<{y!qv8FvRp~2@>?XXr9kpoILQ#bkldj)6*6wYNjywqpCxtMmuxJ-9_7-<=cMRJ3jeT z@qO|aU;mgOjfOP8k1tla{AW1y^6mVwYH|?`BrZ0!nxk+nG32E&TLEROgEbB zqPZowtCibUcG2X^brz*v!(7 zbI>S3QQx2QjsA?BwN_uJB5G-IU(u zg|BV$KD<7B>&@%~Hr

yp_)$Dcvd%n)WPk4L92)A(NR?B4Q@*YTO}l@B$Tp5^^a`=8leA5+e~4S6|vnP*AU%v|RM!TW;0Uzl<&)WWRvd)>$HrJ-bZ*`BIEb(gUWAXj-nGCLo7q1L^{9{`E!}k)0mL}!&bJtB0 ziHK^L{(N(U#LK-6kLS)hP*}2j^J=evjnj3Gd)EJ0(Hy+du(2fJ%%?N+joO#)@o0*h zeoip+wF0k>P*&)s`OX5?4Zgko>=HF{I$kqouGCp_-z#ikyKzi`+?VX6QzYxdgZwA{=)8Ev)>)u;_VQgDQxCZd=Gh}+tNfAvqKCKlo!TIR3&Eh@pw-gx{?$7Z# zdRp7PVyDBRsovLTUyt4$bWYjw=DZ8vx`kV`A4zSRDRUHuj)N zfl^Q2gKq2UIFmiRG19W#HThhA@)BJ=Cp}p{&D1g6y{X{|XT|sMAB7)YFZt!axai#S z6UH{zSF9@T3YnL+Yi_`@*|U`{PZIT-aA4zZImQjgv;K&``6RCLG4s`b21Qk62L{E1 zOf8f2)0pgn{$#vuvzI?|z454`(PfnhLK5?hR!_({U?A1r@40D9q1Vx=r473ds$YHo z_0NxQ$1g6J_eDi2q%Y?F+HHLrQ=1n2a-MXGY42m_f;_!)_9i=>ii6cpc6#uguL`)) zc75XgbIMQ6)_>o^%e9b2{??@Y1U*LU=vUUWi;u~gWldS+G$U#oOLo?q=vN1n=y; zisy+RkI5B!Ix32rKfmhzDp72kUEQ1olZ92X0$!>J`7d3=ap&z$nLp8jDQBJBd$#3U z?Va)I_pif3bCxuJzx+gTHp9E=Y|cfFTYvYyDSK0|qyJnY)oJ~yhW;S^Po0XEjCFex zQ&%|>aE{7{WH9$-Fq#y!FHSNu^ZhTtX`$Yp9^aDXSvLK^IBAK(pOR2 z$dzqYQM;TDpK`jWGi|^Air#J3fB0(>Km5@u@|d<{&b2CI(|`Gii)}LMUsY&!oY*`y zYQCUcuh=ZVxl4L|CZCPSPGw!DR+ygqK7nl4%!#1`o2YkPn39>1qEgP(3P+2Ai)WZE4Za(VGV)9nl|w_o7xKXGKshpTDl zLS0Q?96i9+dT*;<{LARxM~5;v15RBF@?@#rzi!T=i_b2Y82o3rb>2}^#n?S4Mw&U_ zxjFG{>-WdoR1@3uf;Ek86k>L+?I>eE^WwW{mu2USr>DDObw9JzDrVcHpXqZ?msXmb zSk1We4dean-|xFus;KPnzuRaf=N>zA9z&|;UB&fZw@9inn652ecHo=Y{f}?kQut1- zw*PQz``oNd#&@gknn;A|Ja(`O^zFMQyKhO{_d}|hY*Rly{eJm5gGH%|@XFVx1TMGV zvzWN(_bm^0&wCHPsTAkg%_v&7)Y!&OII8_={^J1ig_AY7=fC1o?=m>zt#oqPsPn0{@( zem#q$!mUeGKErToh+D+c)4B)6_CL;FcK7?YBPAM5ANd6J@=o%G4FTp$BYd#_pvesXKlR0{r5{++xN&E$whW~Q(mq) z;B|Gb?c_6Si;kZNnQ%k?p0|+e-oM*#OZ;(9`4RGDir<8$oCl`2b8WVEZu;%sKZU*A z(sAb`yOO+F3ZM0wUjEu;wTdzL$`{F!&ZnlPcDeRbRZ^4Zm)~B84ij{j%avNhM#w&>~i4ct;E%LK>drr&HzBxiv^z3v)M{gD(2bH_`j_jV* zRw!O%FKDO~@=`6xDZQpLWJ}t$HLI_^eJ;R!NT;h=`;5^G8`}@Yrd?i>qf&EPS7!M- zY;+S+-nBw@iiX_cmv`nQY_xBTxYDwiyF*UCt;PI5!wGq{1a0xmKi)^q&g$$wniOenJ)ebK;+d-JEJZrN$;66-np$pnWjr6!eidvlnUYaXwx3 zi_@yBPowDmM*rz)C6|=0oP4@VvO(!|vhqu}edpZPXHEU!EOZB-WS*v*JiBwDsZCMZ?>hs|MgUZr;eV#wS(8#d0vxr zTl~GkCjY+R^Mai(WIrgZFi7Y9yqft-cb~AasIuhGo~V%U-BWEjOs?LKUz1R${^aMs zz$+7<%}t;F)z4*X$cu>~SDjY7M?9GOa8>Utb0@xQic?|)3!4)Rb_!2*TXpx~`k7xA zuGL9iU1lxWek5V{I=Ll2r#uye=kWhjbkGo7dvJ%Lm{^ayZRdcsezvTHTSAI@+J{hL9>6KN5i!SFCmJO#5_AnOdt}e>W zexNm>d8*Qb%SpViqa$i&N5v^N9MSJMF}Y>tbP=J`fqK7|I{n-tWwCehg{^Z_Fa1^a zeRx;D@vilR(rxwbjPra$&Z@fn^f`4zr|s3dm745-y+mxiz z?|reY4=mmkee`~-bJb(YrbBBcnyOfeGfepZn)Wgy#kq?w*a)yXo#x@-F$G!mjq!d#qkrNv*l9xwQ4wr(4q}tV?#);*SZ^ zl7{do$wNq^Ci&tOU z+VQX9V{Ng;o}}j0zp56iD^ypdzEncay`Q7Z7JBmAgX|Fi&%5T=n z$k;~<&I)Xv6}fY>kep%R_a?K|vd{b8sw)Ny9kIV7wtlI{t%;XIj|;ZU)E4j2{BunH zPw=FJS2Zr^S~ogHJ2?kUmbJdN*Y?6(=Gg0%VqrhSJ8xd~PKi7h8)@B@9LBiiOy9Qt zqBRnNyG3urB$>R-RC$wm_V3DA6LCZK?y`E(BR^(Uq-j=eC~~jV&15$7|5_09#&I)q z#fDP*ps#%C505un&hMV9I{jI#{={o@cCTKUr4*JmE1bbOtj);mf^>l+zij7M9=CTv z?mrIP`ZqtVmLqr3@}`14D(m!*sC}=>CD7qzzC&j9XPL!j_R>oqR+P!raqP3^ zFshidtY-4lUk$6JLu?HGGl>2ZyjW2!=X~u`X6-2p z?3>Isl)So_9l5sYVahq7GY2>{Wg3&+lefnEt`@du`CVz;IdhIl;-#(LTJzgj#JJ@} zHh7EuX9x~c{UZ1xs-w#Nk6wLS#N&_5D~kGsHH%&`|7Y;OdeG|i8BNd~0sd-dixQDr!Q~K^PFVsyMyn==_j+B4mQXw zZdUWJ)_E!?&K~5nruOy=mVj^9x5chIoc!^Qb;p{SXD1a%MrFA&CTJaKo@2<+dERgAeVCG(EfiGJJM_`!4T!nn{=n#HPKx<+1;ADx@BY^UQH zQ&THvy(J&bS6S&FSE*b0yDa&^zd}}9zU(;VmP=1ePi^a4C-InHrO<-;?A2|{R0M?O zj(K_oYi4{3Ot>@U>WaTnyxh@uv^PocsovaVux3upq@{}N1|c`oUIePBO#CIaMoZ>s z@jCk>{S7zOyLP%cP76$(>pqK<+55_sFB?zu#Rug+vAV(^v^Z~Rt)l0h=o-ZtEZJw`$y(XLik6>g6`6t1D#7zm1d5>3yx{_;kK^pXyil(2Cwm zH>bDV)4zP_kk*o@KkaXQO|8!?{?<2f{r$8c(J6EHeZ6vDOrl5Vva(jlDVCV#sEKd% zOf%lpxBZ=PFky$wPn-DmNku)Ai&|DNCxql2J030GuTt@M{@3L}>5{JI{@y`Red&q` zt1^}>PjGE%zP#xri;y1wYv~RzpPjKcf2?!9c02d9$GiKjGTQAAK3rByI`FYKh(qCR z^(#|B<@te6?s{JQX|qo?H2C?O7P0dA(b< zvhmlS=H8X>l=L}gb{_mF+1#wY+fg@Us!>S+qe8CF?iH><{p&u}Ojc-Wo4>&7v}Q*0 z!bK}09{K8+Pm+{AotE}BcIx{Ld*5&T@M^lolWCF;Z7fWlS+h1Q>y5YAGxuhXo4Q!q znWNsp+dC}vOadeun5Jm+}b!x~w>RNlecFrkEFK;sv zY4e)&K6>9Q!8adY)XiJ8blv$+HM`a=%98Thbwe?$!)^I;w}}Be*sg0y{qhvwcH`xq zWU-Y-;&u_G-h5X@8E;Kxn|{CINyW`oE>nGcy@E1I6@-_)-I8!O|DTkV`dZCjyA4hU zEnZ}Gi&=VYSv1!gV}lt>)&}}-I>CGW>dV4+`F**!_8ogr_<2c^(f71muFLCN1VVKD zH#a@Ax}I7uwPQZ7QOYA}!FIhDk)@1NFA1!gaJ!;iQO@9_@}amjFJm^3@8xRKnC|Xvhbw-0Z@k|j zW8ZO3_=n5+PtvCXIV10D7R~Hk7W6nk!Pa`_kBJY>of!kK>@@GVC$W>+6!qx0%RK#t2Y<>tpZQEp+_?C` zoXJ!DSA{r>%#zX&Svm9M64xh^dg1fF#zoJpV4an+tn#U-=v@8y86rz3io8k?T69k6 zK$GYag&g_nq^u>YMJ_#Ata9n32aBQZ#_hW+UDg>Md7fyXJ^faT<|GcECrht!E?A}U zTS#!HPu;Sq69ao1ldbYD9{nD`&8uDerSI-HiQoF0ZbVKpC{od7)M8X81_P}}jg_MB<)GI17xKXYe(O%C{#`NW~)fzZTR z_cfHy)okdNu-lrlwRPLp{|tWC_xUDIU=pc`+FP@yIdt{Q2uBsWq>?pe*FK5+>Ku9? zb~SgOhus6-#v^N-P9%suJrN{c7LiKE@mU^{3M~ zwSv4%R#yl9XUJX2A8l=0tNb)sf<^FY@79=G*IEAN=5ahpTsddynkEL-6W8yb_cl=q z`o91B@1M?IJC{XO`f&1G+WCdkyG6GyV#O79a|P-0XFs|-j;-+)PGvLODzV$T+Jd2f z%^8&`qCf61mZkrk_K8p4+ac0st#inPvnw`F;9%Wm{!2P>hve%ZwldWyuD)`q;@x=; zO*Nqjw<_xe4{P#-JqzXDVSej@T)p@GPlul+8P7R>c5hDIzEZ;{UW;eh6l~J^S?4`< zM|Hc(?A)a4`GJ%HS8eH+zP2S237$oZDvQ4KF|S(adLe87s^f*#b#i~c z{WSaZY|UwnK>Hs%zUunV->T&x$#XaUn#JcGt=_;;<-c9tMUQ&uDmNzN*~^*9-TwVR!$gw*kh_B8ovuGyyG~BZdhqr79@{YI z;Nn|6HV}E8g3G{F-)W{-^A(FMkOhHjtVjaK=>D#+B0WY&pfB&`!rcFLQ+xzG0`rQYV{kMNQbV@`0xbwD!D+^aj{bvxktZCWY z^`F5YP*Z>P8FyB82W5H0i^Y*91qLy3*RaX1$-3{bkp5(u_@k+(N3%~X}*lr%(p#LMU`o_b+PDVSy zJU@n~FP&@_pLW{B?4c~ZZqbs3tiC^=7gXP_e|UDo%am4D#$|u@?Y`5SXk=JeyD&32 zH#vZ@Bl=xLO+BCE--1uOBL3^@y^zzodzRe<^Rg{jMPw;#Eac}#V z10P-${&Rb}+wRou_c1CGpRiQ*P3+p2yK^g=dJOIM z%187RP3ld}^JcLMdouT=-2G`>#SE`%4;F^!PyA`@a>UB_*OJnEsv(|{DzVaR(M`6; zAK3>so|(33&h;eI^e6mIznp{k(go+ISpLnK8-Lx_bjB5_nVG7~*DU7Ux^hWto?VDX zkM-Y=`@S7ZZ!8qF{%lcs^r~h`z`vRo%$*X)2Q(%3AuTt@W@zjn(r;RHM2b^26z?*9 z`+Lnh!d*Q657)EJ^Ep|xDf^*{@XNq59oMLj^{sE8i#biIezZw<25ZqmQ@NHPBcA^Z ze{?FU=W%teFphb?WL54`Ull`d{sQOA+Zs;q_}w$7z|UY$@P}5h!@dFPt}2>#ex{%6 zlC136{?t6$5$5wVbI!a}$LNdB#?p7R&n}vMIgY86y>(_;uK(rI$ogpinHe>wV+<}c zOx&2#r&8vw3TOM+>I}@mvT%l9z0@MC$YU~R+oTlW{}&jpIepYvmV&6$5^Cua@w3O zkDxTwyWD?O9p@WgbUM2uBhLNduaaqnm-ohB_<8I2+(7;5JeI8;>lfVhm390cP_jCI z1JkhskB)!3tg+vLJ9Jnqg@{~2TsJz4za%>IgbZcl>NT{kRz zd1}t*Q|{sK*j3|Tp|o&D zZ|M`?p7b5-pKh#JV`k}PbVAeTVyvz~(E48uYh~UG|K+_pPp%{=b+5eR6g}@tl23}R zT|PQvamy)V5#}??bXQ~+EtsGe_OfuJMYDNp&tb))9R4$vujKAcjJ=h|d9IIevnw&c9; zm-T)prw3PbPtuyPc}o3e&d*ZHViULwzFJt%?PO)&r}p8Ws7bSl;-6Vplg{TqcTiSr zf78!!5;N9@Jh{p5SNhzowB7T^b$QR!oeF)+wU(XT ztZyV$t6Sb#vcvG$ftQy!Z`Z!jRdY`_WL4Hk`_GVSQ0U!$@f~w>#P;b&KSrFB?0ER& zom9GBb?ZuwRPjyS%a1r)Z~a!WrBwaLqSyX{yOtl3n|N}M--llsRekd`js7!i3Jp20 z7ah*URJvuF)RN7-&H=ktGB7uvKe*Jms8#jK0mC!?##!@qwMA{zKNK?_DzJ!C>^OK% zoyQ|CWdZBL)D_GZO_p*C@g^M>%f5Q^KSN+(seR|)_{r%%YvaP@{IC4AIw^HHTqxe< z!%2aAzV2tG);Nnx?KsTOIm>ws1B1c_xwWs3?2wjNHoN`6!-B4ZA7ZSQuH`mXIk$Sr z?4^$JOWUF>6uVv9kID-_K4YVK+D7N4&)M{oMP9G6W^ruJJR1^fa?#*F!{!y)yqQS? zhF4QVr6<-%By93}H{obn<5|sL{<`uRE8fJXOxN#S-q-6CMbF7 zt7J>?bIbj%KXO$n+Y*#ad)eZ@WkoIx@~k-0z;Q4s>h#Cj14qutvm8k}X_T_k%RFUT zRixYe-FrMAubg`K^x=;(*^8o|vQ|!Eo$=H((d#Fd+msJKF3;by+;Gi_qR^=(UYo9& z{5rVz>k@~idil%JJ^T-^i^cyGYB_EHDd5YZV&nR#W7?lNN zh7BoOt{UzS*~)9Zags;TE{&Iu%5_g0Pt(0qUAO&mf<4EnzRVj9ocls%%{Q%C`P=on z_@C%7rGy}rG^K{Mt}bjB7#O~H2mR2#jq0?8WjcHUy;3OyQjV)UsAA zdWzFA4b6v*N_`s5vR2AWrx?sH**|~7cRpchVnNZ8ht5WhlCiyV%UX6;{k?JgQ1Qpf z&(9cd@Z}%#zz9^UeH#@h8imq5;vsS58R4i}sEBKKrSxv$3#V?!?s6 z$p$X*R)Ibny!>WoUDZ|DzC%38bKj2*Z>FrARPf9#o@eP=#+B}NN4uw2yX<3F)N;mY zy>2$s7s<`)Cnqg9yMR|N^z0smlMHKDy|}f~%4*lLPa(n19#bQIi-q^kpLW7#@0$Z# z)YNPyRbMG<{&2f3`TEb!`iJwlpF1%ATw&q)nNj6+#akKekLDsqryphhIV9}h)upad zf2^n`S}Y>s(!_`t?-{L_RbT7uUX`^+Kd0Gl`QJ6BH`&9CQ@z9Ne@5%9UM?!A%=F;M zNlM`syan% zRpnm0^Jlu=vsl!=y!!ar)Yz*_uDr_gy78gnW}?}=Lj7>BJ{5O7o=3f?E=B=foBXleFz`DafGF(q99lFq3aPpb9 zHiNgAmj2oAkMI9X;GCm9`;gg6z5{{HpKedR8P>LN50j>l4g}c6*dvE`0T$q5Ip9gDy|CAFs`J zySevlRe`bMJKgK&>?SR|%FuIKfu;4*5yK7ELYC5Ve=_jbNadV4t$2_jJN-_`)xGyb zg@m|Qn8yYkgC1-*roy|S`*KXN`Lk;Jy$h>@d zvFYc*wvMYeOJj4UDNo!W@7%BH$*z>?b$zO?zrSumq{K5_Hc_oxP5TaC-t=;Z+V5{N z1=h6&pH&@A`JMNg@2yS9ny}zV-%cmxDLWNsyNj@0N+{m1wLD&n z!K9$_@KoJ>@^fT;7fOj3v2D=Z_~>DEZjHbru4!CnO_uHXp1*FF!|bEcpML#3e({9Q zz8MjlBc`cH_s`GsKf6DKYswUH9aq!6{MY_71Xp}H@*|(G*Gt}TVoFa;o`dd^`0R*= z@*VH&dF(RvrrkRDr`J2??rOow?E#7FPd;+n*0)`AwMprRHN2`yA-}kGon~g4W*#2o ztgRdqy;I@FN@lf+S!zj7&rb|kuw<_8V%b?!H$LY%yizzd&9Q3x#G*-jM+ENdY~~GG z>^aGeqW4s&U){_Eq%OY%EW0$|1<2b zf5#|t+N9_03;B|}9$dP$=f3^=YiR2|W8%5rlm0U(=3N!_{cI2G=WaZ!@~BUI%jeV$nU{{Q z^f3AN=dgR|!xs{Lr~M}#m>JB}uHbO%u zo!H`a(sHi*+_USW-~Bpnd@S)`B=hvS4qGf%F-^UcAuU?ADQK`yykBA%1 zX^*$QJ9H#zd)LRLso#aVMVaJU_Ij)T`1?5gK!H%0*f#E{AP-m1WlM6u3FvH7zC0y# z%2v^t>(c$Up7(cKD}3d=(KN1&OR`#TOuNQ(cb#5-@`fGZl78|mQ~jf-3j1HVZ~thP z^T+eO^QJ0ldYy^gUh|!cN-7PQM!B8rG{4xLempF63Yb+b8$mYwxU(j)_j}{U(+*T|;Dc!(*qn z`?bR3BbGmsC^Xd)J5_syQIr0zt#A8M)}1NfDO0Nx3Yiu% zXXhy!1yP;@Y&ssIik=IUisd}6d zHRhO?vxxn|-v{gm|1Aqk`Ool5Xlis!ZNtkO?>s*9%l-Ry!2O|_uFJ}Z2}fs6%6{=q zcl|Xkd!Yg!j)PbJGo0K1pFtus^1*+GkZVDwJ5WH`Lm6*IRW5OSbvjtsCXi zAL{8J`g~#ee}6me7%8H1kIk#W_Iha`^cW&h;v6or<7A#!) za_gkZm5)ySc@@57^#lb|E|IBeo1&c~J#@2Wk4V-x-)I%}em_C@$-*c>=Z}nGp4M9* zpZ?=_Xtn6iE1p`j9N6tbSyfdwc`6=Qd(<)e_}r@dfAoHIP7m~qob)`hyd&#jj?&6$ z(+&FbSd8WhaZju@5__EcasH$K4DB}IY^`TJ-Un&CD1LTQXMOsDEoFRX{#h2pxE$zl z;*;n-6sq%|;hjg}s|mJ?Pu*PZ7m`xBG%a}61u30Nl02qUu9ozuEUW%|Gbz_d^U0kU zx$4JEL6e@o`nvb*$~yOqcRTEKdVHpwpS0(M;-#Prx0lL?YSu6Dttgyu{mR$>3_ee9 zrpASPMyuACd=%PsMd;L?skzG4@!OAy3QbGnEV^VY#G0k$aO?K$Eur6bcY3*V&$TKF z2@qIRvO(QR{&Za7FX!n$Ow%%r?KYp&k`e0K;4=P~-|G0l-p3W-osMP)+OZaE99KEi8qDW`zo;~xG zPH9NLd)36^z3#Ty)|0pI{jN!lZg_K_@9@sUC!Kir34GbTqwI5%Pg>!ec=1mDj#*22 zJIz0BKe;ILo*QqaRFLZpvG>~!{rz1Ru{TJ3j@s|an*ANCJea3&oL112ofew9^{_xp z44>PbzXc}B8j>rXpW*jcKE*v%*Z+aj>~DGUeotr-yT|+5R*9k0)-p zB&)kU?&GJM9bXqsiir4lqdR(H{p*7V!zRtr&YQc-RNtfMyx!I(&ZBz{?3(S78}=aT zfZV|WL9C#DM;X2vi1di|Q@nTDxuDu!Y#-J-cCZp8h&fA8Wm)3c94 zTE))jSUnZ7+xS9n!t1Ki#jkpr`b`Aiv-!Sq|vzIz5d&ggqS$HY@`|eNs6o15axM>A_C=Obg=&;&)hlids!>OAq zUn$og|IZ+%n*5{c!{!f*V!nMiGi6iTwUt*gf-tB>&A+o z%ls6BPQ`_MxHD;k<{8myD~GF#<_k~dKDE9r)iN#1{-0CBy9s(O&d(p8=~pzG)o2tb zA+dL_tNK(2<_4x4F?~awd=0zj?UQV zRHHX-acT0m1r?92em%bQ$<5Jjs#V|b3CsIDk4{;7M*az7`{k#f>MdNF9^Cl4evW#C z!ZD>JNAA?(t4wpZ}e`@<4CXhSYt_HmXN%uYdfXL0$0a$}2Tz{iY~a+4iqJ?GUK9 zRkpN(bE0dPLBX})6_Yoxy{>*^@qW97`tyksRQ5GzU0NH}op&LpPkZ@2hSb;_DlKW6BB*zL=kFtv^nb?B zWB7JBJ*hKj&ZP5CuIp9WU0wRXYXXOYRH@(A{$96^86GJ?o($XNSMJ?u;i_$PTU)ll zI&hu#)}!|xiJU5axW;hC%G4bbr}TS&xVByP?cSpwR?Oi@ZF=-_b@uvKR*i{{LQk0( z?+Lw270|tZ(lJ{+cU>j-aqBwU51bFu&rjFt*unFk;rFaFPrtN!ZQ5y{pTEaEESKw< z`UQc=9Sjk_axAvKue(2wUt6e`_r)|udi0^VLp>7BhI&de_?{d&i#*a zn^$FV6)m~cll5KaZ1^b+*>ew?6ka=cEd7-~HF=`dCjB?fj2o0}qig*?-DW&;y7W(F zWm^8k?GqAD-V;iS*kEt5jVaHIZQ}ld>-K6O4)3uyk zLQ!qEF8})8u3CChCGTi#jPmN)ayfa`g+G};wtE~^*JQbSdV{Xlv}?O3p17pEk&{bq zZfPO2k;$ra?lYt(_`YKbI1>Qj=m`!m_r5yXb9k@%iujC4|xxbtmi)TbcIR zZuaGBr|%xNQC6EPx4UR9&b`6HQsjT&`KR}tUbBR>YhB~3a%x^a&v5lVb^iCkniGEx zs419siX}M3Kgo0dAmQAR7RjT-`1I4ZDRLVwby?MYZ9N+G=>((JlN@H*DIaTh$K3w9 zPmb#V>z4Fu>)lM*r#6d89Tso*;J+SqJSbt8r;_ol+cD+){?r;WoK4d^An0|u_@zVK z@pDou%df9hJiKSa^kp+8RYMsT+??@}hx6O-18?6qDP{k7vh?V&{viFpfam@iQmRXS zTu%P9ZvL5Q%~Z={p1d;ry0N}Y9v9opG`+PZTr@v);^GP2mw(r^6!)+T9ZfmzJtsqC z;eUo_ru!DPy!Kc-tM6H1)QYLw|1(T5mrK07m-%Rg-^Po<1{{$mr=D(#=-*|>@%z5@ zw{4~oK3>;5W}Bw}{ukQ2zbmPyX z;weoHQNOQEGq0+(beO6U!X{h$>z3FiV~rG-Yn~g;RpRf|M!)=eaAWX^;HC`68>ZQ( zul+JjTomh3x96|ohIz)TF7B3|svtX~`o@2Tn;eWIugz#WcMxSBPbmfnnc| z?wNM6v%m1=D1JT@IrWLT^tq-`KWw-Nnn* zg3S`05_>FlXNvs1eo05NJ;F9DbK;kKE{W5&ZAw|%`fcNsD-v6}=guzNFSokMFuvoh zd|~quuPGVL2Ese^(|4?pzNGbG?re_9#^POz0%ZCe_22$>)?B*keAW7upE*@UU#cYs zZEQIs9CmrWPv+`u-2=>%w#h_`#;8l0f7+zls_URCwIg{o*JslOtlzqomBOUiwLByv z73MSkXNX?$^X}dGvyYt@cP%|0m?F6?Uf%U$`jyvOVTZJ}@PjQe_DYjLKYU$yMyieFOck;OjCECL0deLZupE$Pjvt`i4K zW4Plv_9^NnvZWmATv;w&@Mc!*&9ld*^_)+7_V8nkL!peBQdGl~M#G2+lER)U--QmW zU|{fSYScM$%f@%^^Ph!Xt91J2`SqQ2xEwj<$;?*eD}E1};*F-w^IWakGci}^-ygv? z9TDZdk(|YAITwe9TF2hFc{KS&p6p@uG|wXwtx_-T?1=DSiSi1Y;>Eh^q4G78J2RD+ zocc7&*wx!rrsNch!C94)5?jA4KKVLv=kIGuk8jIvZq-cZ*U`AbSoCaKt>5<|lfL4k zVc{J!%?$(9o-mXZ2|pCOq!Th}!o?d?nXad?+xXA3h|Zxsq|~>x_oh`K+cUzYWgmpmRh#h`oxL`#$1DaC#Q8ie*G;JXOIgFda-oQ@i5b^ zTb9kYaB6T%VEB9c*$2t=&*D*&cHj2<{VwX|=JlgY5q^q?iV@`0VWb&u~_e_qS)zzXqmds}0!>`bjtc6IP3} zG~QS&Y{;oMK_e#Gc*BhiMUS>+ypY}WC+Yad9R4S%OFW&9|CHDn@Sh>ji?wd`v<3If zf4sTYqvfgkHi`G`oBG}TIb}18g${ZyUmZUCYX7-i?vk-GH=HLvn<;vvL7RKyzO(s# z$L|Y?y-He`U-i|aPg-+H;hz(ytd}({*pPhmxAgoUN0J)mGpyJw=lZtDW-UL@0*7yn z>=S1;^jK#pdu%-^{Wkym!2ltqDOcoXm7HDrwN=wQPSiBYTS;i`tDW1;?)MkozTLm} zu3B8+6!GWjucv6KXeu-^6>D>bZ0vQoqjf{R&eHn#_XE2>ggjf)9Q$upNZa$O!im9IW{n>mW^%b8kN$WLT;Jle)N^P4|o6f1J8U`sNDRvZ!WKkAxm2J*K^Vifm|oaeG{hx)NCil6M=D)d_*Uz7KQ7y@nCHhB`%h6 zXTiS9ju4QDDNb5m;CMjwBWHHdQiUHA zuIT9}NB1)F$sS1d;1c8Nl3Td1-u~&sw`_@ek&)if4wt)@B)vQ5*yp5)KaG-0ektnnpJDB+`Kw=Oyj;z2@=<8t8r@ST3-}h+XE*4c-EZ+H zyhAU@DInHuU9Gd&(mDSgZCv!wb{hA?Q!z*WGfdd&{o-Eq(WboZFRNUN?F#*ucBFCe zH2#|=e|I;JP?}L}@e0Q0A8tQ0-}`7*c9;bDFU4E6=^<(vH2 zt8ZpJ+IGB0s9bo85X;uSEyeW?+?$X1_@@^ZP4F+PW&U&g#F6*6uBAmfFW7pZy4&FW zzQ=byXD5Ypu5?rgHMLuOCI4%dYxvVm7ydH@UYU|3<|vTB5M2D~-m}Bq#UDFP%~+z* z!gJ)>vGoBpD}VD|^f2FDtC+1YF=llegGkY`hm6XF@9Nyu`Ae48v5W2ASbnj8iK&M| z`)8S3LUBea2}if)iJ6-#%sVpAL;34%7KxT6D!M9-zOPtg*By!2Dm_QTaly}tzw0iX zTvEigH8i_<#ca`SzZxIhxL>?qA@StVrU@e4(l7X~u+84^O(pRzv^x-|2YlZ1*8 zTjTz`x($EMs%)#c7ZSvF`|UP;b=FBQXSML9u&+O&nY^$g_Exa#J)K!+vPGA;rrt9O z`)0oJ#=HFN+xPE_8Knz5et2bTUvxfKSL7g{huQVFe-Emr_g;@#x>V&i+tnn8@B3fq zc4p2BnLPdB;-74n&q+)W`LcX{e8uC_8jdR$Mo$o(=*_xg>-S&f2N>P!zMAh8QCj;v z>UKrW7S%^)yVe|^erlqXtl?$@1?IvNa({&QUlw-BnIADa>(#m0G%HK{;5zN&6DTaI?<)Y(rx3`Z`mL7{vPy>Fcp zc9Et|`eH(#CEkbC@_@^?)x-MF2#Jlf}l99MaZiA$TFeOIwx`By^skts_yNA_wBw~ zl*!yJx~P|<60fCkWYX&-XV>3HCU9<$zCXLxu>I|pMKjd}buBkNRiB#wGMa6z;>iOu zPe~M)it+B7_4*~3S4&lGRAWHtkpuTm=dOQ${$+1H=ahw-Mh8E#TtCxWSHH(?rbg#X zk0(`MS_CCRRquNyaGTxYlzHcOhyQp@JWtfcpcO{xgo`TqWW(TYBWPUT}o&k|&BtakKEcYMn`W6P3Fr)_P0_de1~ zo9=BbDVDn9YIW1?-~W_vvkPAnn6pH_ihuU!vZq2q6Z{?=Gk+|@AAI>|O-GL6nn;^N zI!US#Z0z0l^Jo8I?$iHq(ELwC<;ju>t9tL)vM;|Q@p6A#@uZ%lLt8z0_r{-}ufg6v zvu0*r@};KpvZJ#%#`1mmQ^}GIz8dRR^jy1^`6=m>O04g) z?~F55@(uD=y1p~i-hB6&Qs6v0p#w~^pBnU?9z8m+lUJB=TEUIV%X4^ES_Zv;%-Z~k zg=bk$(#4CG>*C`>xq3(yMJh#`4lX&w5#F=xXeoo)$;UtTyv%%8$hb`R z6hrXNKtAVPxr=X9f3^{m7Hkyyuvl@jhKQ#2w7w4tvnF^shUHvUj9m4`bDi?s0QGQx zZcqRD$IWlg@>*#;XJ40lgg0-2d>zwGJ%Vt&KdIx^=#k9OS z5~A_>y4#+-f*FqwUvdbN)>iS3IaVvS(zsYeK>Co(-WgJnqFu6FEFOzJFXz7fW1YLb zzNU1|Nd~j?{!?}R%DxCr(&XaHs{9&J%^4}?bXg^@!Gk&X8ShqB^&RY?yBm%a6nb~Q z6*Bm7C1Fa_QXP4*KCOG_ZWUL2oZ!WEd~)`Qud8h`>?Ff3KCKP)zFIleR4kFt-QTQY z%Vjq?&1X%==1O`pPPk%gx8%vX^>b4HGYCHOo!#umJsXy|{27gKGspCnr! zpJ;HxNM+U;MV2?BAM1ay8s1&3GvDgm<~5&<)~`DAO6!7$t4|NJX>Rspl}o!P_$`Qu zVcu;md_ZFRgBz;KVu3nOC#!H~y0-OsKA9lI_jBu-?ET8^haU#2YNTo{_E{m~<#1!8 zO@s8|W|LWWR$i>Gz4z$bPL;QBd$Ts?6;EFt^(x=rtb=pPG=_r@SNEw*e74r0ZBy%p z-CH|DWe@D)uI1CJco{gUGvSXH(=7|LyL108zI#$@QS6*w5q}d6HyKG=PW|!i>fZ1F z8A6YLGfdgpz37ei@7ftJOrA0Yd0gD}(?tF3%q8i$(~donzJKHPr+*S3PdM9ZUT9L) zSXA};p51j7$B8DZqfT$!X;b!}p~rjbBXxtidh1Wwhf=wc5{yLqPxJK3*Y}LDjJHNK=PDr3*#V zw`ia7^eM8Nn)tnO|LWh*nF<&gVz-EVTbrYI-P*lF^FxP3*=M^Kb_-e?Zhv}X9shD4 z$I7|4x~@N5{BrsAlkM7vUVd)7KtVXSpLuJWp#78JHJ@L7d;XMdx@KP>C+m{* zuuP#d`YYlNec$?f_U!6^5iV1iqXl%|F1sa`t9*(_hX37#biR2pa~N~qE)h&F`+h(1 zecRePsjKsoPws4d%Htfm>nU^33f7Z`ze+wH?a#mRH@7{BYctQW{-t{7pXT4r%)MnnrO|4> z!1L1_xAX5^7bvdlK68SOo{;*|)e6dQ4q3WAn0!#kZSjAG*UB&1dmjn6KZ>Xc%IfTqc_)*` zJS(K~Lg-qhko}*_ zvg^FFi>8S*d9XAbyr+HXq+9wyESrs7TkHm$*|(l@vZxsCG||ZgEnSO@m4*sR%7<1DGLi1 zU)uP^pYM>WL}k&@O?~T*pN?o+b+*{@$;xT@w?fVwIC1p&irce4__p@`j`-ZQ^wFu# zS*{tD+ooOYwL6n^p7-z*MMcd8zWdWIPqz!?$zGKb9yT>OZ9kXT*=iwE`FH;rELct+ z)>b~8wbyKM<@AMLyX8+WRK5A@=^inupYg%djD$B_y1hVLhI!+^inf`nS-d@Vls!Lr z)?M-B@oC?Jvzr!8wKT0u_2jbb{Svbv*Zk6hUw*f{yNf$^)I^?{!g^IfgI#~K zah6`0UZ$wTJwc@k%_B>-YTunNT<=_Q%gX!6t(?NWC zhaL)Uv03u&)RMRd0(ytDWp{r+dY=3E%vVv*86WCAT3>$uh>lcqphRebnk=XEw&QLp z_V!aWS10sM)sD+bP?>zvlVugdM34P4Ik)5I{_T_NQ&$zf+J3`ArvFXep+0tvPX$`) z8ti4cR==J1^sl_FG_P*K;xmeUSz3{=7701?eB3fG(&Nf;Bbyv<4@RD)wP8<~6w3M{ zjEa~;1!My5zI%2{cAu7wx!@t5t7?w^$CfmQG9B5qibL zW!?Qv0TTT;?_Jm0z2e(Wrtf>--JzRvNaZmsg6s+OWO)j&Pp+z8|jsf5WfOj(@uTy@%%HnFrdP zDrRIRDhBB-&(^zkv-2JM%dZxnKH5oN3Hy3G`%}lUK+$dAe>pB(^_e48>B$@Wl8@Q{ zRPMii-}oqCrH`lL40)^Hz5BMd`AnYF?qBb}{Vl&S>umi~E3#jUrY>9M$&)6oej92pCxacS14RDjko^ZPV>Dx@4VCBr?bQ5s6N-Erz_-oqSn6n)8cB8_m=Y$@8+V{ zPkGg*fAiZlK~QdC!kX2IC*^K`JojVbk!NX#eWqPeXg6OsXU|UKD~Fe!`WPN`t8d># z<-*d6s})g71DnuxhD1L$?N~JF1()*lec31v^h80xIQdu-5PlL zq|&|=*VJdBY5^D7F29s2XV3o6z}$Uva)&CrZ!7=Ih2L1WbPH;}eE+R0KK#heR>lQg z606dZOLSJbsQ-2OU}nGa*W;=G)IU7zII#AT*V7-fwq#AWzWdcA3B=heKvv(q-KE?=^)NlC4Z;qir1ws`M5Sv#ZUc-tQ~JeuL-@=wgr+`;$$mlXoV z0io+GO>$I|H&}d;w3rm+%bC3+*ZfMrQOPRvRavi#mFHgkcg-_Pbw$X;fD`AU&q#^q zdFR;8*z@K)%iQ+Er$0>8<;a>C6=PU^_2y5vw*1Tu8eVR)VW(JSXEv<-(kp#fXVz+s zldsawJXp2h-0O6qGm{mApSCEOY3YQBZWeGonLW?VTz1Z6y)sz`A%Qr zICtkG&iS2QssgE}c_i08SYaE}?7QNS?$w!9{*g1>{R4x2r-dBp58G0zX0j$(@r#GZ zuhzz$4AOR5?F>`9A2IJ`mAw?z)VIC*_M!Ruf?bw2PAMxn0~yu`3D2`vprA|x|Kr-YR;>?HRakG&_w&5tb%4%944J;BpF`v&9d zqUWzJPCSmlp=8=p(oUHqtH zvQNEbuEmdlnVZ?popj3I5xOS+lI>4HtCA&40w&#Cz`yW!w`;IogR8SQgU7S^o)fS1 zE}Pg{p1l88=bi7z9=9g`XV6vf5))5)qH*P~zrR?;&6&?uhB6(Vz{BvZJ@33o-$Dj) zsm2@cH|{@qw5>8#a+TwYs{1c)9SN9op!Bj+@{BFZ!upbhrs=HSwn#tE@xlB53}4=F zGv1R{=J#Z;Q^Vp3p1%E7Z5bw8jJz0E{>px*m~W{fzqw(-FR!`QIr`^*Kl;dJX|!bH z=g+^|ug|RDFD~G}XSaOHT%LrCW8ahVm*=164?5tOzCJEDaE0B3$rp4!1S>?EY9>r$ zF5daZ{BY`7VfL2xpSAJVEAreI?)PUl71Cl+3r(v%)yMmL^4o0rby8|e8dfpa=3M{Y z>N92X41SLvyH=E|2RFastq)XA{CcC_Mo^e*#%3n-?|XkORX*uG!Q}7ahe|eGPuZo7 z-buXiyZ8LlVXmuF+SjhGtL1k3o}$vXOYTF}qLT%WFB}ed9>|j8`R&h!NLS0Q$%SXk zrq$N|ba=8v@abEg$FqDndNvtuU|hrckK7LUw*fBQ!hR`JDfZQuX-YGTlt>!p1QIaD2)=ULv~Ui+V+PiiXDk~1C)*88vP z(d3z`HM8UOx>-V-*{{jQtlV7vcEi^H42-`X21rht{-I6&`0bZ`MrE%)1ncB&V>}@( zyg2Z20sr>1dq2MHS=?rGz zp6+!?Ltxw9G+UP#-P1K2ZnHd|@#M_I&DZ;;>a6o>{}~{AVuiNMA!or~&1@>} z%RIcqUh@5C5Y?01-@L1)#q(=krvEJWIi*^9G0#m~!p|l{ui2w5K3p~o4tY&lmCv*KfyF#T?RS-N+d?2lu&H|+6?`6sNK!SceK@kD{;i~AP({ES77 z9&7i`sp&9(E+EP;#dL7WtH77$;@cD4zR9h9S9$DK&81cCEY-!^_RL`G)(pDpx6f@} z;4@1rrCqyM7i(oiMQpl}uB&qF*o$Ki9k#uFr4aY+zGR)brq|IaOaC|?jZiV3HQREd z;qBmCokcsU#Fo7mk>I)$c=t4a#>%g0v+|al`P3?S@t|bHg^gAL3_92Ce#`#cr}R`G&SI;nt}d63P4c#8$l(%LSN2mu&fnf* z)r_FjwU&E4^;a`T3+B(TlDK*LgNnw5$}6=ex>?PfXL@)WUs}TwbEkRv2bs7B@gKh* z5SVOwNQhyr#GR({-InFqyz}Oji%ilCe;pU?%yRk_YtF=wi|0i=?!OA}3*G;#QEcDu z=@0o%`qTwEN4mIZqv3yo<}L(JoYOcI7?&EhVRCe0i2X!QE!ryt&g& znpC}K-0%=`Tv_Y%$o0v@jW?2(a{2Ul%+&P>;o0QA(D&!vt@|dC{h* zH2zq$bg35?gYon$@c6};f! zW8AkcJ6h;iZ*Eebmq8+@*W~COSEm1-KP7&}t71(Xn=_i9wW=)k-1#?iveUaYTQ)J3 z#=VlTU@8Cd*xo>-aA&a^TX@;>1uL}J8ed#r_r1z3TUgJjVv^4!Z|~q=YciA0`bRXD z@lM*Sq;ob@*>IYQ+t*_v4IX=E)V}+^V}DI_VN>64r4>_}jy*em|YU zI2%@nJ1?6RvBlJQhA5AUgLFWUYv;GE+r+k>u={;~j{log*XD>!<=W)1Gf8`e`pMQC z{q|ZBk(RIS*8bX}Sv6_lSJ#=kDVLAbY{^&p&#?Ka+dAJyrsubB-FUut;Sv8oU+%rv zw)oMYwm>+`cvf!4jC~Rfj1$f{&%Ph=)hsiV>9nm5cgU`EeVaA*pH3dw@SnkJg_!JD zLz~;O)0-Zim^O2B#sdSkMOVzee(7Uwo3@|p%C!Yj{CDPO@hTa{J$tp{;BBu~>!{}w zkN*DiI85!d&IGTu0apU*qyEf!&lbgPY1Ve>^ec{kK`T|uz2PY)fzOXQ=F~`ue#;=9=Xz{A>Krkk(x%YbbQ8YlYF=m)m!KuGey%x-?Si*3J=y%me#a7@MJ95;D-Wp}Du=9}akO!RuX8YeHUIJ75=n(KKQ%p#`giyO*WXNT z9Wh_!37<9ZI4x=TcB3j!=kC$}4AVHfA8DRqwq{qj5U_E*RaHl1=Yl&g`D?CDo5s~~ zrjWhCP>SQTschqa22Zma{SWsU|B>%oZlvWIs1o?8G2iTVF|UzVSWt=e*J~f9UP|Ta zt~h&q34hp=JAC(LKi1CHC>2-8OnAT@w!L`U1*xYjGqzp`4F9SY8hY`|)brD>o>jh{ zx>h4{VzzQ;N9V+!o)>I%R)lP>bje%vr+E7e%^55`dAts)s?#Q?s!UvaEI1(ROo^J( za!GA&ON)~$&P^~{{bkOQ3r(ySY8)mwNX~0BjyY|8rpnu5`tqv#+LElr9cfF}HaQ2n z#`0`jv(aZ#=B_sPu#LBl-WypOL|s&vMgEBzLG^cd-2gpU8}fG9Qkm?QN^|Qp4zU4@|{Y*`;)Wd?@LGS zaTQ)Nr$O`Ps^@-bU#gB(i?__w*mTK##@}2288WtC^W_PantCB~zDCzVZ?k6~+*cj$ zzGun0`(5F=Cy#?VEmD(0gl2jNuR7S47`vh_X?xckomLIWs%Og2>Is%c>7)mY8suf`0y<;w5+j$TjRXS!pZ#zI|-rcFzImTbs*RkJ?bex=)4FNu0_ z_cEgft~WBVZarTUOs}e57T^_zVM%?aP!cQ+dE#P<*|2NQq;rrI#bD|4Q z%1@bix6LxkZ|NP4U$ghRnlLVUSfJ#xQ73er?@2vRn=APOlftw-jYKakx*qm)`~4Tz zHGDTVxIFiYyWK8&Y0Ap4Y&vW*8*EFzcC3t4o%L>;&c1EDs{`IA)iH`a`uy=2qYtMvEhsg+*Bo8BUCa4*!%z2UmpHGtwsDz3sUbpPKdZiUXS%oq)#}3&{nhNwDN2PmiuX*Hs>y;A6?qwJykzt|Ef;)Cl_lf^Cz5~etgO9usxe( zr3EIg(y%?%tUP!3K*gp1(janpbFo%c?j2 z3kqvn*WYh4|2N}e`74b;#>Vo+TYclccN@=m>09;NVaX&**~K#jj=gceYhThG6Fu(+ z&(Q@7mU>>;Z|mJAlKbyHpO=UhuZ!*ZSvHsTR;|cj4ZoGJY|;j!R!5bfW9#nSo_X=x z&OOs*-yT~oGh?Tay7q(fDt1RbVm5CL;;8?zq3OesRUsFIqBX-}eS=oj|C;zjL2yTG zZl-ef<2kj+u~G>lZ(#IJuUpXE+L!OizARFcwNBVlj+u9#mVbJ^`kM9gqsks_>r4_jx$yloAIXmQ7HV@$ zne=q$oz1VztsR0j9rc9kcI{CzpO`XnzVFuD?e}iH{c@-Jlc8Rk;-u4S|6R4tU-|QR z7b64r;h!BQZ%y)zt#TFAe${S!!@nZ`PEPqY?Z9rf@~8h9f`b+We3lm5_V3^{?%sa) z^*0|b@)9{Mz4dy-_AA%69lF0v)hALx_lD_a$mFpXRYv=521B@0Ya?MO7!CpB~a0yrJ>5UF+AZeG|C(yUVt={#z_J(_{^+ z^64PT##FRd++vt zH3*f((M_)|1%h<&77IKymNxy?C$w{ z7wjLH>-Fg4sgFl7Mol6#~-gxmKeu7zc*wWSPMhB9Y z)ND<3dfl`7@3)9+rw(t}_~`dPGr1p)c17K)?^i0`kFGf!B=YcN=->C(Z(mZeN;X`2 zCfMSos`23&j?RaY;0MJjO{tJF-Fi%D&;ezAAE*~7VuCRoIa7;UoYaoNhYkt<7O zE~Dk_v#rguJ0D!v@p`1TQd85M-6-TG%bS?Zz0qeX_vYN3mQ<|Op1EvAuWH%lY{R|p zL>M?V%9svI)gQJA3Ov*%<#l^!q{o$)j#1gQt*1Lg<|s_+duhU1pL^%!=Z=$Vcc(~u zT-W-|zx4R?{U0S>o}4>{$#ri@+Rv`5^DcN6eKnKYC~ndbo7D9^`{nMMcRD)OzyCAH zxH>AHT9tD@A*=Gw*NizWnsd6{@~?!*nzQEm70$YJFg(IK%1Wnml=w7z)(^JW1r4hx6ye1|lSGI7g zPEz%i`Q*9n^e6u0_g$WwRl{7CFwB*Soz?L6#Kk{;7dJoLk=Ul16{w~3{byFC%XIHA zOZD=k!gwS7W@|bHczm-i_cxsy_1q7CCVgu->_2_eRGy zX|v$1oi~M+2r#u~CTZm7KKjov;i-rSlgEpfN==%{6*Yevn?FJ|>scP3nTX*@-(!es5g*!br>DW{75dTIx`_|0`$sIYS9*I3_^PrkqX z+|5~$qG-uzCnmN!B0Kl&jlBw+ezaVW(7yjNtG>|OF5Ih1s9R_0=ao;VTUU1f+;xK6 ztwY>*9pg1m>-SqXPM_QQpW(gV;+e8Hr0RFB-I_IfT5(0Meuw_Q1?fI~mu{Uco%DXM zM0CX&EeFY+aESEza_CEypuvTLppCom*plO?3mOwapAojpQkJ-tgs@Yx_O1ANwzh&Ycx< zwBV%zf7kiM>CR8wwpPtP{88ej(4lExj=V9vyW6~Pde>pEm_F4lg36B%RB!WelmZ(Qy-JTb*LeT^^HuhlS%qY(+jpQ<)Y%6 zF6diqI-8J~aI(O4joZ<^?{oh%+~)~XS3Irr`pLtSrY2%5`1B@vR8ELVTl;t0_eO)O zrY1d)J<5JqZF$4+ZRgvw8zr@;8l`Oe5$Sl}IqQ?v*41vSC)8-k#a$>iy6v~bc?omz z-t$H7AHFI5dE^;2p?`!*b@*^eOgz>Hitd^m|7wk;rzmRkdY}b^gyV z)$fz$Vz(5%SH9f_>izBodYsX6KQ2Dn$Th#qPcC-yqhKk!POIr_WumV3`%T;Pt3i8# z)Q0cRa~0=A?l`Ue>CUmXmDdg0N>+4x|NCVyk@wr>SE1I`@i~ra%v4~q9)33^K{rTLcD`en#^iklfpu5I;USiYyE20(%NNhiJV6IDQ zMVL}OpZttjMXIS&Tv>KDv`m$qu#GQg>ZblyFXeNw^tM~nBdf;aIUGOM^<_c$zd)|Hr6>*~DO;YU<~@sBw#i_Vvb zPOZ#x7I~_ZCz(+epg+T?vD@aX@8rM<6D1N>I$c~nQPCrWJ#Y=Tw(yDgFK_Ql_nrJ` z+$UkV&cAHJd{dDhD;`uh^BYyV)eGttFJI~ZVZORr`(n}aW)?GNN?ADE zTT>T27yfuq`N>s7ex1EGtBt1J_*oPwBj0dZ@;}3`{+#5?9n70x=#yrimMO_;nk`O~S1#RvQSEzFZer!(Hs%n4Yk z^8R6{SvlLBFU?+ux24E(Zn!YLR_{HJ=zGT9RS}g{P4}Mse*g9M+>eWeFY2WKRqU(_ zU0jqu$I_wI;PdOJrCJ`)n0Uj;_iY*YkzT?#F)gKgjj$;nS%rPt6TA z{V04oocVHix1FUMh1y5&%@UnWbEv}&qCD|7zJPqdyb(TM%dIL-Dl1Lq@ag*t&nZ4HiRu)5#HR2hqFa8p+~Z$=l9StKEmhEU zTf?vR{K6%^nRznts}Ces`>kDOp5Q6|#(nSG#Qo|wa__Uw*wpFs=KEUvqVLkXGg*Te zJ!R_+Zn3RClcms`bHrJ%sS0U{p`z#^14G9hx-dFco2IuZrpJ`$8^>nxtt7DMJ z)Ry^II#aB*RO(>*g8 zJEkVAP2$+P&$f3R|2~YO;B!MgUP z^48YY^|z&LWY)b4nrdLwSsZjOrg80-C2Fdge?^#@FNZ&`D3tcNxPR#>%^NGF`<_4k z`!QoqQux#8JO3F>cV4{_#I%Ly&ci2>YxUy4O+WK^;-(M1-{$MCIjCdn#dmA^rEBkk z?#Lup`*y_1bN;-kcvWu!L+;uGi)|0AHqq%(TDC%wZOP^I=cm87Z~d8;Jkej|(pg`d zoLbS6%{m)*Gag7)_d6o1yMtZqk97Y-!xLN+oMz4uXIng5T>AGlCGV)TB}+S=II`c^ zzGG)w{!&ImlZJ?U2WD%U{Y!rS`RJ-N4b5j#4f1CuIyi6m*SJS3=3=abwP=#`nTd0l z-sU}t&wsdG`eE4C3rh^d3QAuX-~Drj?@C_ZhKpA{!izSpbSgE!n|$&o!~4JH2PfLH zb$Zrrz5V?8Yj*$WuiqNh&wP4fVq1D(abb4Z<4=eFW$JNt+gg75wDjMGRSqh`T)nO* z14Mgvg=Y&MSH2;fJ;&MQfSqvkz223+=O<_Lsjgh??mY8aX87sAe9xcTZr6I+cO02{ z+Ebz1-rVLtL*qFK!HE;9UQdy;tXa~tsB#U*4hQoc_xtYtXW(Hi+B|vg&;6%=3YhjB z?Myr;%Cy>1<;<&`Uu+>1)3ai-ro{JT@)m z&vO?ibH<%H@GHE|bzSb4JAV=;Hv~CNl3@;ZI3E2o+rwz-w2$od`}g0wuw1Zq;wzRS ziLY;y>!faf+;}JEPfgN9iwB*@wyC=%FsRtBtSh^5SYYb&(<|fyJ{-|uopU1n`ik95 z^;LJ`x9-?3U7g?j;pz_$Z&tl8QnP~-4%hZ7IbB;|qjTn7|L58V7hhd#a&>2E63t(7 zzrn6-=YNJ8|4BwFEBW33<-c7%hc83)RPOaTdY@Jquiie*JTbR%|MtS;$7~EmR&HOp ze*W*!j{1rOFRz)auYA?MMj({VHXZgoe~l8e*4~^ zZx6n2(Dh87sCadj%zRnrq+(cC3gctN zZI}Gl<{zEW$9c?rn~hE(r>F3|mhP~vTw2Y<^0is(aOC?SL-3i!*MpNNriwRQGY) z0p9n`k}D_7`{?=M8 zfej3L*Z)CIrO zUz>jWSNz+L+LCFh?(*zz#uk%U-f7p< ze;({|o_S$ep}nZ`yNzc_)ZkFE&JN)0Vln!-BeOTvR4N>8|@e^@@^5%9LLRH-}>apaiHv|atC z69YD|%y{v9LhmBppB)&?CR&)rf8ytS z=Zd0aYT>^YN1wCI*sAEQ zSvqME>sepfTT5HtzLnw$ipp9Uo&USEVvYOk^Zu-EJ;&yy8Tw2-^>OoNjWC4`VwX&& zDimi+zfYe2P-4pIW4?z^+uqr*>72~92TZ(-XXMtEW?Vk|aA9$f7w;vH0*MRqR=Y1x zkm}YgbxJc#4ZRtbJ1f*=NDgSa&4|@k?hRIr1MXuN)@MsIG0}k%xiPtQCNpo!z!l9C#~iDj?X_M zcuuV3SCgkHds$N5ng<*B3)zJ_5)yB1HJs;f;mLB;+vVcy7V*EY-{*(t9<=>&)L-~f z_u;cnmqVsZs$^gL;y**++69hW8i|2Pzdv1kxmJpQN$@oL&7bp31zv^J%+Af0p13ny>4EqU|Yc=U!A<|v2P3V_iumL zbxyOJbDEfh;Me*jhI=2anHGu3T)OwA+tvHPhZM=CMQcnYbBs3dCTcP)Hb~r{vfesA zIk9Ix!&{b6DcFSVp`lU%bjruAlZT0Hdy6SPUL+!pB{(chsk%fDmrkHGPd@AxQ>gI{CS!pwGWL!Ao!Bb}~#Cl%(V@}B)l_giR z{d?VwdgTLdvH1$wo}MiB`NIijEoa8-@#dX@Q=PP5rmQ^kV$-y;ZS6c>(-_((G?eHa zdzYJgc5O{Md&V+#ktGUg%t;#V*PWwph_@{l@R>A;tKr%-))~Bhhqiy|R(g6>bgtvY zy~4A%mif+hj28=ynY@Cn%WRe2tk!pnY>RTrHUx3*v)pp-vgh>nqcdG6O-&bJSTSXS zqx5Vk^w`<(q&Qpu|`6DMy=DE}6g)TWuRyxdO zEX;0>xbom5hsDZ>H=1XfM4H@AeN=hxx>&Qm-LT+e*K?+Ai>+6PFdEJI6Sy=+R7owX zYDVu?N3HlmnRN%a+XPST^0<^g^9{p1l?4eNp3H_^s>_%nr1k~5a`Fal?3>vhGv%S< z+<%^tn24^!*}s-mNqptK)!L^vXB`(6WLbWl+elb)V%wCjg*yw_9@qFhstJlYBXqSr z$meK)nB!KbeyLGd%q6_hntBr@8x;XYI=8|Lk~p zGFwZF>EfP+C9|TI<^OT=-Qr)X@i6pQyUx^_^o~asRjOwW_k9Z4rKxj7g};~A$zi2; zNZ7||Z|y~ndCva%WU-hX~l#;fuiH?t{T2*gqU+*KwqMV3(Z)(+Z+ly=4_O}!on+1jbirl)Aam5j_ z+w~sPb-7If{{Fe0xAVXxky%YO$6C+E3S~rfiV3w%?Y}Fmc-J}V)bk6!BRkEG=T3`z>6}RBI&p^5!YY&%6MwrOA3tDgEKeu-|0LwcbS08rdw4n5j~y zqb9%LUVfKVP4koIbDdrZ)tpe**5mfG>-Qi1#j}j(tBC&Zw`=!&?>1Smqz9bL1s$oGxM_Kh~P-+%d)^x`b*gz4v^w&x|3{40#s5SUiC{mF&K2#K6) zR$GN^cRZh+W4BzWTjZJNh1r+pRc%a6``&8dCb~YhDo)&{(4>f=F(lGyZo72j8?)!n z1vh5+JQ2~b3X|x*bEDs@I3s6f7guf78jm$w+aw-O%u-qCab-!r#MJ@hwwWGPAJSrUUQ%X6Ntie1H7) z+fJcpDr=TX%?f|!{nFjc=D}*`RgDXIB)872x$0e5oIUT}@9NuJhE{4<#TJNPuC)tz z(xx-}SLsWcrS2D_6E4-Ao!NLl=nfOdpWS6gqe>i;R(4vFe z*>l~tY*_okv?8+cpWye`uQ``aQ4G`b)kxiYqlxiGcP;y$B!0nJTUC8OimPARZ!Ng7 z!sN)#t=Yf#a4j-#R59`pX!~?-VfDn#@q2gLuYF_ixHITbNV`hAh|*Wq)%6pr*jpz) z4mN)6Ke_nJ&m*NGSCzgRsZaNb_^G*3k@I%w_Itm!{jj^S_>f}pwLPy>S}k~;4j+wW zR6Niv6BNMVl)v)%>6d(A3!i+Nw7Yu`#3rX-4CeTaP!7HlZuX& z3YI=rX(p#-rtjq17iA<^P-b?!?)l66OfJXG3ZJjuzirQ22Cw2t?0Zyp+Ltcu6DsA- zFRYgCp8NC9hnuMiKe8;%rapB{|7}vC!tf?e)>`7wt``{{9L{%Ittz;0%V?^*+D*9!a;FuF5C4DKfnEu5InZf`}D%ER(CdU+}68SDL~vdS7hH7_=BeOEtcNfC zirezc*0k+W_}$a*)k6D2y3a*VI5tr-*uWqphHU~{qQbM^g+Ff3`(d-k=d)uK1-j_s^;a2MpJkJ&8N? z!fVIXjTvtbS}c0&Ft^k3;ST}huBit3e`-!hdNTfO7W*8{+j3T8pJL+W{|x;98D0cU zP4c^#=qj04I3u-GAjmz~^XaOoy{@;ro6P0Q12*0$zb*Dd$7SY@5=9QlkXdo_w7T}a ziNE@&E6MsK=bWaFgY}?Bhwo2sS95%KkrmiLD&hhB9=(kOnb5*l! z+UmB%5bb{%Jn=_bGudZeunUc4iO*SlCUYrURMf0YZs$neqw~xXc-NUbF(11g7OHf- z`mn_(Lq$(5%cGKhT%iGuJ9IQeQ$rml3Z*=9NDy^G4%|3;kTJK|ZyrF`K@8 zOPFxt>B7~^SGc8Rzsz{%bX~fBN!<5a_r9I{Hm}Iwj+3A2x|y9iSFQwy?@SCj&o{A+ zS!j~OihysL3iWau=cG+`=T(W!_E!6OY}-y2u#ZZ+=s z+;{0KTV|}Uf9RB_TNd?R^!OdqWWIO%rU`uWlggFrz8zIh`?2e4)3k{iKb$T2xU)F- z9d~E5?RaRE>Kn83RA7MmLze!6Wwo~^zS`GwR zwru&tDS}?_f4g#(>~Zq;6xKTStHD!#e!^~FTZq2w57U$lW>4;a?`=K0yZiUWvV|2>eSCI$DaiTfGCa{p-b#guxz#NRciPMjlm%}& zZ4vi1Z>Q&?$xhC~k9ZDr?^u|m?a|sfZ%?4aq+->!#MKPUZKj$^``<{;=(?P}d6hw@ zU8<<9YOJgK6kZ!|!Fdk9yhQd*&0e{afqBWDa%-35BZq<&T^76u@jZOEQ#0v+T-4Dv z>9#Yg0z;PAW*wY+KQLN6e@f)%kS@JxFMT#Ht|;c&x%$PDsT15T9ewt5qx^pcrAarM z-!Z7?6!)Dgn`!j)@r31?s;sNsB}0CP1bixzeYBicXSJkNfnMj_TeFt-#jL7}oLnDe z*Lj9a04E0^E&y_!A8a!d@29ck?{xb+ztvNK0LnUgm&4w8_ zcDYGtZcv}#Xn(w}aQ5S6QL3Fk3MYCd@OG?_w%&Q|+a7uDwwZH81h~8ZtmF9pty|3b zYUNY+kS~ipWkYL3E?MimD_`DcG0WqcukJcWsUuDkOc_cVu8BXMyYlcVpR4{yo49UU zs!adLJfl}ecE*OSkMso|T7)E|E9y*Iwyb3;L*!v~LsOQGrUg$IZLU~l^s?BBtF7g# zlS}F(heJ-kyrvZBE}eSu>h6{732xSD!u_J2E-%?8YsmGScanGFP&|8U!;Z|%NUx5C zYdbbKJ-B+1Mc|7`i)qQ(;7MmqYoh}$UO5`1SGve$&C)5#Ir|!(9roD!bD#0uRS!C1 zS|6!gO|bNB)XDyue@o1ZDcI1Gd5z%4qBk*dYon%p>5$tqZ}+m5RtHtr^3Qm(RdY5k zZ`rElQQMoA>i%o%TieN;|0ku);%SU^mp)_9p-1-=YUBGM(ixc8C#id|{*R~W7Gni+1qVCEvz%D9A%-Fg_6)pF?c}M!S+(VnRJZ^y{Z2@Ibyg|{<+pjTCD3x{f zSwA)1pQZAw0lzl??nuF#S<1J&??%>_uCq`Uml5t)m(&fO{BQ=drm3W-RQ=I#Z!Ym` z^2aQMQkQ?W2=G}NT{3%$TvhXmXCKqgOkA>Xnxdww_FCW8Mi%bRxohuld~{>O46h*J zDU%|P&#hf~Zn3w_KbDrWTzUJax4%?fFlm{2<|(6fv+u+-@7%F>uX>J;M9WIkt);uO z*ss49xUM6b8Y=rs%B57V*2QSgZH=|1U0zYGrRh6$nT~}_%m2@C{mGpgGs{IEf|fmc z#uF9EDrdDpl{?|>Vu8ia^u>jDlw3B7Qwx7Bm9eP)KSO%HNB?yDX_IX}{OZz@(pqKz z*<+O)OOKn{vJOv^?WO-2p6qgCo>2E=hth-R73)(j>PxgHir!tfw6kQ1N}$uab62iD zI?ma0w(aPdpbkYArmWM!DgiHp9th^@OTUY}xKljJX39P3bbh^3EwPwpi%A-wL=`rqD+$3Typm>i+WW*kH=S(X7qv#DNH7nZs#ir%->D%>;Rz_%x++n@bs=zP6*>Z7hOt{`77XM@!u(?0DG z72JN;hoz}G|E9#*1f`VP^dN;3LdCTM0za@zaZr@>2`?!x= zT~goV&}EI&zL%UgiY>n$p|o_8O@iRYty;d;j~AXTZmLJlrVq(4!e=r+>_G`0LdF=(e**lj=;b6Pt|9C-?q2 zI>p4MqdTUse(sM#)d-KRDUuVGPBOFG=pM6H$>K-Utjufw8FpobPdJcld+1|uK}}Ve zT!Ny+lBEf=)7)FjmNxmDnRd-=@Hoye`9DLc%Kkq~H{?fVv2Hz)b8)`Etd7S;i6v9F zniuHYZ~y+s@RI7I)55>{l|Ox!s!e)otfm<`PvKfsrqrYbR#zN)RXcK>9tc$2nC~gi zF1Y+7%gUf-3mrSv3$BW{R}l6`Y^58QtF^`AshA*X|a!1T{|x^{eH zKBwHUn&Z_KcS9j1tDV*Iq3gE2alE}>CbvzpRV#h_e(g)MozE;)**3qTQe|ChbD&h} z1b3xPi!-eaHYy0$@vGlDbmRDksY?o5H@&{gbMF4KM435O`?U8>5m+_t(_zh<$8GtR zzp+iIlFC}C(BgM?X$PaX@J6!@;(Hta=v5~t6=qHm+T_fn?SC5l418-bK%)aF?-TqPFXLd6vu`tk?1eL@CtSkOAsQ*ilE8F2M85^?pz~4(2 z+s_@T+wUVNEUK5O!>V8Y<@2|8Ne9iHzrJ2*duiDD&}jecv<+N!ujW0FV?D4x|1{Gh z%b?f;oXXd$`Hi0`MWsITyuAJv>y^++{~4y*a$T|E4K^v%n;&$yL5$(^%e~v)_nb;o z@p>=p%EitQ8&VguV2?Zx%fAopoGhQ0)~#ike5$o(qf)Q3W0H4P=uhcSdVKfWbSkPu z_Ef;#JG2d?7}<*h#C zH}RL>ybn9o|1RWKn%UNn$9H&wqfIyOe}?Cx9x;!MLe-Yd+Q}1NzBs&iiNfTE#aUl} zyQe2BZ$7XvQdYF_NmKbRY2iNwck*mJomUv@KFTfsCne;vfi=hdLP^3UuUMWkL#Gu^ z8>DtKF8R$re}3;1ucHh_&#HFIz7GE#x$2xk=CrG(CbhP^{z-TSy}A_TUUEIx?a+PZ zPqy2S%$(6UMPv5`Zt2(0{>)Nc=8(924_l;=%LK7KOPd-j0~2y=d%JGGZv0o66&C!~ zOVC$8wjH2RUxMm(&b<%HRC70f>DZi%OZnB=+cRNOgU(jHo-@@Q{UIx8$ zY)!lPq7HueQS?;X)GT@Z+jkcyq)weVE%NHmst|>hpBB#3DlOnBnO^F&``a#qZ^`?9 z7s?+w=~&@%!dF!_DZ%u^^p#?KTG5vyUT#kDoZA>R(OpyhthxSYnTOv)gH8Q|Q{$7% z`ZZ^GeQBIx^5tQOyypAtUGJ9sVUsp9iSSc37FHEfSDn(zvU0^cjcxp{Y??PNTye2u zw>0_?EV65@&eK1tCnejJ>;E%Ecg#HH?C2RDYML^&(q$^Qzeq6S(-6kAotql;Ug_Qu ztp0vfPSsP!FUZ)!Ej`#a>eh~zEe?-ZRxAq&I?Z@eT*~yD-GZa)%Z;C(+pKfn&PwSN z`>n9nxa$!@K?}0Pc774A-SNk8#lb>jbD5Q#9`BaUS$~EhWT}No(V3EKQ};jKqbiYU z^r5rnpUm-puKH)!XI$2paMaz`)MWUhO+ z|6BQ7*rHBmRhGblmsPJGhyD7q;AT#~tGln?tH~!;P4Mv8w=6(^gT3=frR)2*#xdwx zl&s50{wUi$`@Ngunk)Uu*YEiTP4W;`KJn$w!*i)oQ!A&1&IqhKm%nkA)zT92>w;gT z=e9nYWFPq4xv(bSp~F;*htJsbLTq}ya|1lGxn9amY}GitD&|zr&BiASip zOZ+mo1cMAYD~%%0rGMUkDqDJgk+GT2vSqVZ_U6`kMhASoDzd>KGE;TZ0e}D1sRdmk zE5Do&_l~~w(q2?APhX^PhLWIxf#*d}krS&7`2$x^XLvp5t)8G-fklAO&IA?VRK}8( z>o`|jHTPR^T2<&`=#{6F!`wqBKMQ#hbls_EWyDG?gH$(hw_s_e-d&~#-$WbU^0U<{ z<1D_^vw6ewV@>BK#YkP45Z4mud*TSwW<#YSq0`f{CYS8;J+VV!heAc`R%Yd8T34rR zz4~ZrR$QS(MuUP7r%L?9Y~8loH946Tn*?RBLag zHi@u??6Q?>T=h<7vdA4>iKCBN))-A{nDo?l*2^O14NFh|bTu~pD12CMPPgikvwLjL zyvkNxyzH)C5x(=@IsKmF43B?I^~f=~&*aT?Xbo$f@YAJhlp`D8 z%ojbXm2$>R)zYb>Sl3j6-Cpd=XEyV`MQ1j9Y+kXM=lQbFeAAbT8eR?#4hyy^+>&rh zm*)W|x5n;?{~1oM?|zid_=t1zp;Ln9d*)1)d9zH_?d6qwE0+pMTyfVq@E}#sE3kc* zT1Cv13E8DW*Z4C3Gl+?1g?-9e^IlE#(#14U$GWNc_BjBtyE!Dbw zhr*3VueZj|o!k@cBn~si0}D)&lG5yAI=lT+S~;`0yk9H~SeRx~80Y%% zU9`KXx<|>g^vi!8=Z0Lm>>KFt@rw2F*eRzrF@N*ACOtjC+uiJ6iK|TH)T>3cQy+WJ zwT!3<3JNJ&ZD7aR#1S-k>rJPg1qU_?yi+xFmo+th$}*K@RZqYI_v1Q&ll+g&N{L=_ zv2&BRZmw9Yu3N^|&w^2zbI!B{oL(?JNXB$))=uSJzlD3{r4D*M8 z&nkAEJky>p`(@s2@<=OOES{LQ{lYt?dYzW^;4kw$!+CB_4!;s~#o6CSU~alklwjkI z-ajj+ZoPHe^nGEU?V%q_&M2v!^`4@zR!e2uQcsok^_?$w@?5Yj=8CYOh6SEw zi&vh$Y*eh$vt1>&sg>A28%lRdQB}gHE5A~Fd@{JC&O_gYl*=|#Fk3qE-hw?j8$Mmu<~H0P=;fMcq?%=Q_vjq)Wgm+?uhmUB zBV&AOOJu9Gk6@@v$Y-|J8Pn_<)=a;~DE-d8M$Te2@3CcxSC;u)(VEIIrQJ8`>WXsf z_p^MKs!L6omG1P;^P}Pv-Ey|Z05p9=HBu|M;z8jj_PC zcb_$y9_)(LO0K$?^PgeG)q9*bb)q&#c&?mRf3D|d-xHb5ym_xO8@J3prI=CF`Rd`i zm1ngyKIu%1Oper6N&D@v^`ysRo-@-os4#J|*XeJadFy2{%Sns!S)067l@9RiQ;JY8 zcruCG`J=={okcMV=cS(T4tC(wk$KQz()GpDB+pYL=q2qN~(WyM=cCD5OD5SkEqL+HbVt zj4M-Cozq#6Cu4l=$OMOFAFEprez>RH9#ydF-Kl1g6|b~xX6zO$U(wsI{A$ncmAA}J zXNh?{j!iQ&n>g!9M4t}FPK6uy^B&8Zv)|H`XqnTqHmdKtv{dGYou4Y6)kJTe(Zl6m zGy8Sn^KT5d)U~w3W~M9?zV|DjL3iB~2L0N%+3%0(PPN$N)o|(dwez3lGnP+of6rHT z?>~dZs!LaG;?&kfn=W2<)~M{0$lQ4uGiKL%{i=69W?3&YW8%@zm$e%zTYK8w%1@Qt zaGP1g^h0u1e^NPa@O(VY<4EG{`hNx&C%?;G>TgvaTvxdieF8sK*5giDzCmwtSkjK=PoM_vQZ#Y5{VEo4$N2 zoc3(8)8?yJf(|a^-qGUmaqE}vWlQhGx+{EtJ~vKTL``8?utqAouFgH}=GRrXwmrA; zZ}put>15Hf{@T2wUoS4cEb=#b;#anC|4C{0)Hyz{D0QFq{9mlEXpDN2(z+l?t%pxI zOMjj$ID94``8da;7``&8s*Oe;8cynjYX~e6XW13EFy!)~oQU?mxP9y$hkcDFw%Xpz z`E*yy@4W7+)~uUFTK9w>bsa0%vh%4}Zg}ve%Ch9EH#2&kn{3%!SseOvlbF{n&xx}v zlV&}>bn4V5P0oZ@J7(Y9XZ=%h{^Y49-?{!XZ1YfIEY|ff9mElagi6W)_{Q&^CUv*edH(m9na@?PRE%AON6m+A7X%W*L zkx5V0t}0l6>WqnQJMHqH;q<9cMjr2&*xRllK?j!XJS}zH<(Y}e%+Dp?l4GS(_WF7m zX+HJ%st|d~R5fA8wi&CEm&|+8Br1{PDRJ`K_TG~-Tc-CN(tYNom>kbh^We?Hdy6hk zd~_pnk=Jr>|1QUK=YywyUi3Z4YqI-FnZ;Tw&!jDxZ?x>p1If}@k3VZ)@!J3{+6u6DMYbL#bK z%_5$@Nk%_=9YZ;vwPr5SNYUV3yK?p={RB_xk6lk6Eq!1xN!+5P<*C;4-`3Nf_-1@n z-P$P=EUe=@wX!u!4g3>CZ5AHBs^s=wd6!%Fv+c`$+c(PnXQ=+T&GbcF=ngx% zP+QqIw_H?Cs~?&**>Beti{H+tQ+uy2){>ZDDbGB={xn~hMoX(K|F*u!*>4W9q&%&; zzD3L{ePx?v=2W4Il+O%1E*;=D{g>mv&G=8rqhRGd6Bf}1!ld2$>z1$ecwcYj zlA5dZ?dBAN?QG9}%arW1ZL_!|a>x9)O0TA~zyD_OC4nFL_%|)~f1nQ2N)} zzrLYVj8br+$34Y1(Bt064I|?RWzmB#Gn0G{lLcI@eePbqejza zO{}@kncID3`m$rv?UT&~o7e^XN>$P>A8{7DwN=`+Znat4x~P?zi%cfkY&x-V+xi<# z9$HJfH?0@{roD5Y`nTe)Ic}0#vkoPESW$mcZC~81qf5?7q^w|yG5GrV#{3lqhxoPD z`V`krS=Vbe?Wk;nLi6XH+b?gEJa%}`VSNJ?%e`__%6>{obJ%yq20YpoyFX)XYN@wl zXy~L6??A3Y`?Ho@VVyT+t>UzRLs+n0>r32xBtPMZtlA&ZRNJ?` zdeVIhi_;S8zXq)7`J&obR?gVHZ)!EONXEf6lcx5pGJSIF_~h%eo41&lmi2DY+LC(4 z>$LjP_`P>z_xA4Cc=Y}AparjZ7jE@!l@Px(H>0#)o5ykO&9f$-u2~l@T=-SCw|^r` zQ~mUF$C-~LF$EoIFt2|-^HTv!wQh0g_0NK$qH4>wYjovn9A@@6b~DYqS<=pa|C;%~ zX-74uJn;@Z`*_`(>B^@b?AX|CX|#K1u*m$YQ!-c9Z}Geoc6R2)%YK@VT?>NLTtZwF zUTxi>n_VsZxvik`q*G+d=E+fW)@A$KInI17xUs_JXwS*<5bSHE0qRl!woSR<(W{q>lIoih({P1`?RIQI7Y z%rLW!>^E+Ivis3j$mPXybBg-JRTh7Mv_j@Y)_8iyf)sheO=LnR4 zm_3_qy;8PV#*|4xOZNptY`ir8bmft>=quWDzI=yRzc)_a+~m$+pq+el zqj*c9{2327#gga=%PuV0C|y|lYMk$RktCrh3S9Z;}IJK#3)~+{)cn&Rm z*crRnZA?~bRv|lsc$ToI z7oThS#r|uSTT}X)v&M7QwN08Pb4h>m`B1Y?k^1Sk48oGGUl!CZRqWVdm0N4pZr-Vq zV6bDRw(T#j%vH?-j?W%UnZ&?8CG|<-Wo3TDVoui}=6|cM>XwFfBsxc@^?Gey_RDGe z0h{xNFHfHS`k%o|*K=Ln?xsX?wB` zPPP$U!vEJ)rD8&iZM1>>+nFma&YOB)N2YA;bejB7VgA>d?xKt(B5F2U zr*y5E*RH&vOLX#ohV4tI}#@G@7>$F%Homh(#Wevh4j)yz50#|qEswfH!xEM!z+ zdR-;(rpB5v>Qhs}oOq$Igo2e^O#Nny|1%ur_02ky8Ie6BHPU8gt!%nqsafb1!#!#K z3Ws%7;}%3GvYcWzySB-C>-(+8uGd&^VQDhVI2~55IzxJ5V6wA!c0`$Mam!U@L7meI zTMtcsK2zm#xI%DP&!^TYGB;PNPn)jY#=kApKR<@;npVU7KmBE{aeAz8b5~XS9`%#; zIKrkKDl|=0H0c)u^K{;|_e@t!o};W$Y8PN)e_*F|l-AAm_@xCa+l?27zRapx>app@ zUB^C#UH+4f2&M2Gy)k#@lr|RGjXUc2wt64&jjUWVRn%-3Vp#V6+;*G&=l8F)&hIg~ke!QA?*rAZubUK~z5 z`#p8Xo|UaeM^z^j*-Fkou=w9eJ=Qw{xgKk$9}+bdnK08wB+LG)QlDm<<)IlgX?4s_-jk~Z3U+R|dCg&?+*-#I zlj4h%tv_16Dwf-NZnLYv)#D+LDsF6?bi#ECb8SNa>+WRnn>|4jn_MO=e(>Q&W_@v7 z-mhc3H$Ii9lh<=pRW&iW9$f32e8ogI@oI^%ukYk%rh-qWrBnsEZ22sl7Haao;sz6= z((&2l>Qa4I3;r0Yr5xZ6+ArKSi6c2H_WP2;?^;6%+uQx3a_z1e+S*ziBYwn;AzXm6i(B*LV8cTgqz%F!icie=th)cmffa@e6>1VJiYCtBTA|t5)!tScsqRy1 z9)6)zmo53qU0c)go11lZEy@&%cs$G2YT}I7eHkfSr&kn7`Y*lpJ8Ui%L@u&S-}YMRreqbITLOyr{aB~>j~(|6XT2J$MErkI3s>IemL z95&wUmU1y$=k%)Bv;(qPxdnxdN0SAATK=dknY6UECX{uHwGcDYZx=qx!4 zJ|A&X(^Yi3RmXgUo#)89K<~t5d>?aT+M2B*YQ(gCBk!~z zEmIah)RHQ5Xna}dQSwbSCPaCzZ>1aP<~rl_uYV1@E^VJhn{nfkjfvvpE}A zez|@BY4wq}B3lFRZF4DixL46L@oDQNol_3mo)mFz{hgHYSYt-<-Z{*x_cCXF`|@10 zqEjQ-%QHEB^-uFDIkv{LJg;1-)c0}t^ei^g>FK8vha;Z_wVdYLdm=jaYL0_K|Gncj zZ`-R|kMLw=G($l}(+eB!)Zf)TzFwxi*K@XVXPU~onWx!` zY_ypR0+y^=dM-frNce*%bJPVtu%DS|c>759-E$rndb*}SXbGwcp$k%%ALqzwooZvsd7m(jc|VGS7kO$>)vfz1PJa7Jc}9N%fEU>(>?a z^8zoX&lSCDDf6*%ep*YK$BBhK*>W>pIXte{T{O8V-~XJE?Aim&Q=L8wPYyd<(q{7_ zw>F8N^N7#kLe7Ym`5jM_B2Ud({GZ{5$bW|SJ@vY$jO}@LJvM#Wbu(eb(yD2vpRU-* znCj$Glz$>MamO3SpGVUlyw4R{I!Efje+KnVrGEwz%dg!_TrDfynVj-*o>))J2B(H| z?8+axqy*~fE*78lTs>+#%2!l`RDy)HZm4AYyv+1@yP zYrbR*!@J`>vu=O?(k@x&KIIf&l19+HD0ky1O_$tn`3_f>@O$e_6W`9Kf9%IR<(r-A z!v7gAuQ|2!i}}K<9T(3!XS2>dn!eQbT>2TQg?~c77lz#l(ib^-X>WX})Y+JuVcBa` zf_*|bQ_aLv9MhB@oqcI@Ur;5<;u$CNXMR(Dw+o-HWG?)>B5k#B^YjlpuFh**!?~ht zt4rEEgQu3qT}tP-Mb69hW(m9;X5zNcbJD!{9TC~v9yHb}7cIH<{iwUdTRriPb0<}P zba7}dOPLUpBW>#WBD|OJ)i)uoyM!c}oc_vvd8jX5QIkHZ}Ls zjvrT+$xNHlaQxhYdHGwsQh%O}3k{x@ZqoDVlE2@Ix7-?h*LXJRU1MyzasRK|1DV_% z4IGwgY)Q`>Ca?- zhwfeU`Qpzf!Tt_At&f}LN{4uQ`_Eu_^Cd6(tz_{jrkR;BmYSRP6s>q1wa=h-$swke zWRXdy<&-pI4(l9{c^VaR;A&Uw!Hn~T=VYA3W?u37)D-l9wei6$#qV!GU26^kDc%1`pZ+pJj?3T zzbcn*nVBDz`uTWh@D-8!Pa}W3F!p}VQ0=YxT)k0LJoQIZfoGUl#_2orKJpjpdaCPc80oOg-o&$c^^BL< z&p1U>wK?t>e3}_|)I)V!>JIkOZQsRz$&^*@cv<}6$Qj8dzEHJy=Qu<+u8sC`QF(A6 z(CO<+-j}bGqSw57vUZWJtBAkE8KruAr(UHMqM!Y4b59pyu)mr=$MMZ_5u+5Jho8C6 zOgJDIbLsqn$_r1|%0wxAkTFU<`)AXChA$`5_eE{L*lFz|cqXN3p2f;I$+WF=Hb?Br zP0W3ByMMjhb7|Gb9n(Z6JX}*5vfV2Cr7#<>R$S^1sUqv2F4KIkn0#IT#3f2KCM;K7kaQ+<;PjKNB55=)y?r#`69gM zqMa|xEiHi)0z0agHvYZ6!TC6M?b^P~+xGva3-u|tGVdu^>?OE(uK$ORa$W&~M&Yjm zmd{v!@vnE7;^rH#!&dFuth;E|))k5AZF6-}=U8mA^nR81bO!TMzXyyp@gM%(IAwI% zu4tw45!O|Q59u8@s1NU!lU9oKm}>HrTk%}us*>djU(JdIFQk=PF<}l27@lIpK%pe1Vt)S2s7+%r?LK;6{&>yvZ)_D&DzYrFqNSu9{x) z{1>jn*BSc!R%pqtGjiYZv%6QA%P`a))0x=)XTDIJWa6QjnOEBXkNd$Q*Ux9r(Yix@Lo?>KW# zvnARFI}?`d`|{C(X_rabh94!%QeI}6oOw_bP-H9e#^jPltZQIkyf=@@&A6zY_u>Pe zUko$0$qQintohXZ!bcC0#uaQqjJ(;++*>9sKjTy}XR6#s1`RI5G}RRk-<*GP%ybX0 z(33TtUA9*aoD$K=I3?Ap`bEciM@9R|&mNmuUq!9rY?*X4*f&^MBU>jpxYxru+NbN| z)ZE>2hb7OQ{kC}Dqh*N+ohKKRr!s!tnwP=s+AjHU)ufq1&-gjLCNI6TO3J#4|JE}t zNwv~QomyA3P(Lk=MQ2}MKPeIrb+<2aHYRb|Dr+2cDECkCx2L_`#K3HEjX!g7RetR|{|0A#yUf zIb5#sMEa`2iI+E<$9yy{lO2e|`gM;h#9+zuCR@eK%KEbx`= z)G^r|+ju2%LGTBziK-4wS|--p9R_$Sl5!d7@D=`Hk+&zSJiE9%W-vnHOdshOKu zSM_av+tZP}q)mx)l56TcQ})p9S8`ZSZnJ!!X_&lUC}_G)P~+;kd0M4vw{FQFO*=hN zVYR?=Z;_{~SFAkDW^VSh=zvYG=+1J#XR-DZzPQB)^E;NZeBIvH7i)UR`1J!PEw zJztPcT&S$k(_ZiggoZmLRAzn~eLgbk{BGCc{|pyiMNfXs)so|9{&)6UmY*|R>Jnw|{LB$)&skM3v_UvJT2OCkxC0+W=WW458_n)S|#oFsnM%AsHB$OU0qJ7D&{L1q> zhbs%q3LM$4seR|Ri+Z9jYJ7CgYOPWo3E7$RwlM6;C@>D~n4`Wd+0%1zh{hY;C-D!> z*9a@k)HYhynb@~#lfCXM6|1dRjvdI3H2=!+gyWS1qqB!~Pg1(h0@ju3X0zGMa%IhT z&)R*^qD!Q6$xhDIyo~IJ-2XWqyz{hOX|+z%V*R#=SE*}e?e?4->r!B#{4KS?sxdmt z@qlu3ck9_%E=Qjyuw7+n4${%eJnfrgwDg>2Ad6AZ+i;s>S2teWD#-M8YRRYPQvwxD z?Vn9vV>m%Vd+{sIz3=$%GT(VPM_K&tiT8{jo_IV>@YJm}UlOsLUpI5A!5>SZe=f$q zvRAvT^-H%Asm$_LfBUfG)Fiz{vTJ-hH*VO@^6`rWo4M#*Rbw6h%@ZoU60{l}f$vSewZw1155$XqZNTG z9n5cGK7Z?1;5v!pI*)DK(qvtO8M7E>zs|n!=Fb_Y^q;EvzUiW|MpLJ3{bG7iFWX-u zGa0vz z(3jZdt2c>b{leC#H@Eye{p?EH+}W$cwLR`_T#{CEPW7hp#D3#rp)BdsLNE9pDPME^ zgwMQBC5!(VY+31?bl|z)N&6L%=2bKJ)R|`5-nz2>L%PniMVvc%W4d;=Hd?lFEl&UN zd+nw_k5!huyg6^?W?e6%TkT<;E0<0YkGS!^;>q7tY|>dFEP01TViz9J=Ism9=l`&3 z6(5Ik^s6IM84rSP#9jQrc_Suhqp8sOV|ot)Rzx3}&)xl0b&bMZ^Q$J7p|QLnS>0Bp zB9_6&JWb=Ak4Ku!in?}zq@8hE zI$JXK!GDIB1E)Q{W_1}0X?lDL@3^U(<~;k$G>3_2<0GbiH4X6OmUyBmv{604+*7pE zs_}_7b4|${alymqk8wU|^V#r5CL-K@`QFYgW&ch-ewvh$Way=!uvAmwiq*~lkrlxw z5B6?WTV)#?dAZK;?3@&XlYIHI@=>)*R85jHeddLRXmyE(tuqXY%vt?yM;7Z_EuMgn zrE`qb3T`HBx=_7HFM^+u@3~^m_7aVNr-tW#Y?J>qDZRktbZFo#XRA)mbgxcRqp1^^ zUfG^nI=fZk^dGkuyJqQplWAU)HQm11C}Cz^#imuKr(bhhCb2TMwRf6E!o^kB0+%)_ zB*~}knExzwYC?N)*MwKPaUaA~u7}s0c`~bQO-{A?Eft?bbK?2@%TjBKR-JO{OVwfC zHD%2iU+<=48dGMr2d6|n*{k_Tmo0h9S60K6iCPanJz4qOd)o$1m6eL^OLxEHvuaPT zj&DA;_%k2ZfeqUdogUscwG(zYxY)hx;SV1_>Cbmm9gC(MKNQ1zAn9J)Vy9!X*85%i zyRNtB7+axnZ^64+oW(2pK5jj6yLs>RoZQs&PEKkH4VqKt?tGD@oA2@bPdZ=fnfEz| zzF)fY_L;`9jk(L$NxEk5RV|xyi&3%5!6U0u=!n(rg6kWlDt{cSmp&@oIb+IM{~uRY z?UfO@sy6F-)buR@j|`Fpb<~pTk1Z=G<+||c(zYANF2yE!n4Pu<*lcRaT#%#f~!smTbJE!56o2M#XfKFQT=Y!L!e=*FU2u zDKj-+^-;g^;)n-*VflAG7ntznZ}>d5af8Lv{|pnA3TL)?`?6c4tq4?Ek{48LT5``^ zws%tJrsqmtkwNDx4n2!G6y}`Q`n|#Y*97OoTrX3iWY#6E7dawtu$cR2qlenvHwS(f zzwKXWaHrgGz^~{+8BML zy}HwnE0e>R;oa%!XC^$^Y~Ft8Tx_y28=LB{{^c)??wpEpo8)s`XRFPZ$mWHSTA8Xn zTZ*Phzw>%!)5c?G;KMH}9Ky+Ks3*dhI74u`)mGhrU(OF_yquiU+4*6`GcLc*0G*&a z4LYVg$tLG*yk7IQp4e;^@oe$AU3pn6UTSaZIX5#aV;OT`NW!(0gs_K?wNCGy-C%6H zV(Q^%=clAC&~&}R@nMZ|h=!%;!ri^fsRw^+Ha?P=vZkqXp{i#Tr`NG(Qxuv576g?E zt}f{h_%gr4w92?R{L&Y7oq19fQcLzsk(#UGx_n~e=a5&`%1oglanGJh%BY;2A$fUv zo8(c>0(tHE9f2nbAMrfCk^JL71NYGnN1jb!xjaWyEMb+%q=mnHnI~lVot0!uvc(1W z{wi++)P1kAMw$FH{YI#@4ljo|9T_Hi7Gt!nSr9=I?T3 zd-mXtO#U)YpCjrVE$zC6ZcP6fvg4*NiF~-DX3M$R{j)tY3+}&Jq%q-(j-A4>DVtpn z8=c?mIFt2rhIZVt(wsw=8NDe& zVaMj(HER=YUs5jkC$CviCzI~x^?cr|=yl1Nmntt*vTD)Lg*5wNZ{EiayRC~NC z-t06w-r9)n?aY6MBk@nyE?U{w#kFQv>FN&77-#iq(GhG0 zuNch|wqG%qo4b9Rv6jotO=&8Vj)-i$9woM8N3GkGo=*m+r-kgy`mn0;@U1tQ0dJ0d zZJW9`+i~`@zxJ$Zp79gRqEgl@H~D;0ICPESofqk`QH!rzvuWjpyt&;|ypFSby1K|p zi<$F8S56T*Gc_|Swe0MZciLhdZ=T$g$ehe|=*TtC0=}4|s~GKtRRkV=QPSc4&)`?l z9V@ceHtNgX*o9{TLW6aUpD`%vM3#!^2`KXE)_zrrI+Z7VWLZH^eWl6db}qdmb6VP1 zE@&)k{K=awk}9!JQenB+^oi~Y&p6jgY|wefVKdXK>iNyJd87K$=w+?hvCf-66-()f+DdkHN`_Wl>`?l4 z_4TQZn=h0s|LcBnXU$#9K-I8PnNJ=?zm|FI`7+V8{^rhH;mJ=oXRxMhnh-KUPx#iw zhDV=+dlI#hf*c#`*M?cS`E8ts=SitUA0Te)UyW@s0$;5CbvKKb zmf=xV&q=X9vX1s#Q`pupXvweCR9nJvdb8ZrGrP=JT6ImcRQ8&wowj1B(2CxkQ_I|z zPT+ny-Pzh$VZoqri21s%`aO=Tv2mCO6GX{zPMNtTBeHyl{69FV43u~B;BzQa;Kf2@?a zd0E_W|H`aET>^UMQV zPWnnrek-!(wALnuM{mFX`*cSzG55sb9rLp`dra?i$~g11d&0sbUX$gz3>`7XjS0`R zYW9TxR6V8Y@3XYa!(_qMZo%4~Dr<$8=Vse#PY*3w7By4XRxe)W=T?!=Hm@d5m}r*B z)As&b_*7ku-L-R{vHUyzF7#V=|2(OB)vk|C%g;In84FbYTTmasqosY~$8o`YA1y<*BA5!qKkhmU_uxtse)w$xNs%;6^wTf?&pU9M9$%o5gKQslC9 zjoo>@9o}w-XB=kYv|4X*&B{^xk&$Oo)8sR}Gh%g}csotkrEHf_vER59M5$H-Zh;C5WVB0Y0*`3E*3Uew3@-Va@(Bsi;teqDCib? za@ze^!{cBt*`536otI7vn5mm&?*Am#)&KI}cvC0O*?Q>)5vxyVgw5j25nv5f(PWM+ ze6{PbW!tKjW55cAR;cbBf!&XqEHKOzS08ub**`6xj4@dek-A_|(g{2NsVoJ98)4#3{{<6Kn z*7%yp7LiK+sIT=u*ZY+`%b)#>+a@$p)?}h zRrd1CM8zpZbNW6${kQg1vCd|dv%7>>7Il1jxBBI!PsjaKRsQx^Z=bY;Kf>v;BTJ`p zNoqFh-et_a>;Ek>3cZ{9rk+v#&+yXt`ROS(lhlK*Rem+{{_TCzlcz%@z9MwOqyxG6p=5RydNv2v0mVcM(vu|SW(ong#M0S;iqwkA%U)Q)jzf$Oz^=en< zrj2tJscgO+5UT5{d};EMkfYf~tqHdlBuH9b5|K67@W(YsrNysaAZ1pX`d98re#dsU z>I>~C`qZ*qgugt1Ym!sb`YC%k6&oT{G;iK*WU2M`51n}Z$KIMm8klW=%y;X2i8?y}Wbf;w- z?+O3ZCTUt0Ben7Q@@cNuHh7&xYCx-50W(vM-iu$I4;zn8y0SF! zu1cut>FZ7=3)!X?I{iNUVWrDWrvh;rVLGnAulXGCurt`L1I8e})|4C&9rN*F^s6^Af!L zdgjJ?lU8MYV%C4_Rhi-Z%Wsl+@B%aS+j}=gg&SsZDjrC$;=X+1Dc@~ACv6KOwYuz` zJP!VopP8(UYuK>#OUBAn%O@owpAKxka>nM+tp#O)J7k*JSKr^JQmB$Pb6#AS;gN*H9FvWnOO19@yKEqPRXucR#!2 zKfzlng)$}=4NR_NI2Jgro?Vomwx#Cyp^qAmf_g4+L~%dW<8O6YdC=;L=~0)o*O#x_ z`p)~R7Cpn+e{%W+7di2gl4V(6i_R=PclsG`&D&K+ZX~U87qm!G7H%tDc&GiNh+tmM zzuc%>rpr4QbGA*kiqxLsE!A_7C1A6C!}g_fjuxvmFZC-96w8dC{bgzJrAkkejhzdY zPF2x*MW%%EpBX%X+eO-E_Aec3mzkvF@SPw23CH zAxuY_7*y|?`4*>)xn{k&6y+ak9o(rfLqa>o@YB=ID>>hDb9zJ@cW&*w z$rEN5rxaht$kr#q-Fxstb2>|Z#Ns1KD*h8@7R6kVYI?~VvO-qlKs(!1T>;75=;yVG z6Vtw4vsKW4hhwCG!U+um4@tUZA*?>t}mXhqE~o|enTPF?e2gRYob8}t2M zRL6VoxX^AzrH~^@OIAG9($r9Su|S0RKf_N`z4J@No7V4susP+yiE}I0hp#GP?Vo6O zG5q9e#g}}W&uU~Ttvt8K@NClR(^@Lac)Di?xxMPw@h+@%Z`suD^Pv=u{)WP1kpCT6{9(*W#k-`(~^c2~xQ6vif~dq3g|;d%qb5Z#XxV z?L^+$z_OjH>JGP}jBPuPd(S?q?l(nRBW3f%2_bf`l7d!Ec(_>T+|qNq9&0XKbhfza z&3U8Kp)Ulkdc_=bjx8}Xk9B&|PBAqbs%0M21Uly_9WD&dUBjDq?2ytf=T)zE)&DuH_Nq$fP$ff%sK`07tk~oC+`gPz zp7-d`5ji(g3%-jBGdX9w@?UWXKY8$5;fW*fE);N9oSD%h)SQ!3)3M5bVX+hI0`v!-@Nmw%0Z-CsCQRYx*w)}$#b+7l!K@7}*T z`?XAUZ_@9T2Ygl84R%EYeO2ucf1-5w!PbzOvYuz3eERhD%xCp=ryi|vyQp$SsP5R@ z+wV0dX}nT?5`T98KI7PB9P?X+>r_>&Iurz^vZ?)NSbQjR`$~7f-Z)wqK zlHpvC79sXw&C)P??p&VDc9Yv=9r>N_&$lM7_V%7P`&Oy z!{*@Jjr^iV(&{3GH)?TeJ9t*7-Td;gD(pjzym-`-Q*--1h%Qle*&w;@utPB`n@N%L zQE~kn-zrsaeIz>dKf@7EwtqsgbNdf0GuaW?lh5<$^n;qHkedsh zy02cC#q^)yKSN$3pQ+FF&xdSO=S@HIX1V^$oI~r47Q9{?Y znMq$WF71*xUjO0Z<};gD8v9LLa`JPO+!TlQGFiwk{}Fgatas%dP)BD0jKMdzYt z(kX{h?yN(1nJzskdM+zC=gX1UKz)B@h2Nn*w^SN5lAl(lh{vUb?D&1A{B8lKOs@9K zpNEzzoKe)`WDv-S{mR3-P3F<0yd1C8#~(gdZ|u-_kkNSe?zmILY_2$)uaEvEJI?wt zub8LOSIlGOEY+Y@*`KbQObhU7?NHNX+^HfhEqg*_)r6(Ke;!QkvBVXbxYOW zCAy2ZH-4|Z=Uh%pZP74CBycW8y_XkR{a_U2-8|0AK4vwt3S3v6B3DbBf< zlQ%icypVU-eB=KNW>tI>Uftiy=oja+sB)>cO3R&$Yvl|ER^0r%a=asCd?gxIE;Vus zds^!%clayg<|{Mo*$>o3YCV!*u<=o2(2x$h|9AFRL#9V6nyXj8ShVbA&9k*HjdyV^ z7Gb=y`bzLFS-rMziqQdP>T_+|bc6eYrl|`Uv$I64UAL7vT|`i+pOM)|~KCsei{2*_rNl#IHyvo_7qna&{pX&(q@ud<}bNykF6~O9}qIOqiTq@pLR&4)g{i9Q`z>%3F!){PfG^bA%u zN-SBmN<{CU5L@_e0qyr`eJc+wIXPeZvx2CX#~J%)8BPuBkNEaYuR8W|Myjmuqc-1* zH~XE#pNjl^7O4|*RrpGXj-ahU(8e<{&HE;OG`f9dcbURxKg%AMH8p2+KHl@5^^`~E z-Lg-nwRVN)E9wOfeaQU$P?o zPMS4o-p40DyRPU=%1(+{U1@lt@!cuWjvc%mbNGb~i&>VgS-v|!@$^d1oUM834|THg zF80TCHj7$ig*b&2H~EG&O`32^a88pmlSi3~Yx*+dDw*?&uXGl#niR#eX>QcMNhxU& zFJ2`x?8#XW8RT*C@y=VXCI4RTWzQ}MHu%i+VXD{9;Ock0uJ2qTKGv|@w3ulzhriS1 zX~04)&yD3pL5nz^_yoGetGQ2ow%K>)tUXt=?u1shtn}6K$?3=u)3ux5RBruv$%Bff zQvzEnrYsP#zuMh2O-vzp_MI7**9-n!`NB)}sauq}VYuR5|9FiR3k?IGmAp6;HE-`> zm&fn5Hxyj|#^z-CM#uM~HtUhAKO;A>hCJz&n6^}V{mIi3d*(HyElmHa5K_eZ&Y}L6 z{$@wsu;hQ)dX>}DJLBzdJ!_k_MCJ1O{|txbWjJhC_b|V~!1gO|N1oogH=Z-v!lrQX zu4eGm(GHX>QCSvWGBZ{7#wkud#{Ue-Q~!Kf`?W)E-PtT-xdoq}oK1M?x#{QCmkGb6 zPksw}nI4vOT{&dJ&fs&)%3&x$G9+UVZs8A+<6!E=Y3gMwR639X}OoH=M3?cZCjI@wpj#C%eeYz%92S2Y}PMR z?bgm&oX2s*|JSQ!x*11yGfkPqSj6SAR`1x(nRmZ-#29x!+&%M$PNHVQl1Yv#O-_>@ zDt%`=vLPt%+ z+fTgZIb+8f5#V38WI9WYij<%ildG$C#EB0+DSNaoo2q;-<#A>I)1Xih>ab-i_qIiv zUw_UGI9<$h-MVYta_g|JoMW-vtJbhtJ)P*e{g%iE9S*4rY{~&?);#(me-16tH~%qZ zl7!R^i`<}h?pq>HA5sl&naQ9k$|=Pz!f;=6Lw?JT9OsCHk4n>57_Yc|q%zWKTH7Sn z_LYm+qJ9XjU6{qN=~^%gXVQ_g@+*yO)>Z){1%9C;L(XCp|xo>!Tzl&B}YG?>J>5;D3sih*k_P}E6@TiWE4?B~>lr>JM zvx*8Kk+S@@plm6iJx;&`vCAa?HaS?WdS$ zoY~{LG;Y<)IkQ(4S;%hc$X@v}%VpzFw|V6bhkDn(l8fL@x~6KOa_pA7cCV^ZKL4x* zAc()b1LKAN;hVuvQ{?gRfB-o)NK5PDd{=6YEA{jb~o zpVj(J>a=Pyx>;7RHa9eAQ`@rDN(Y3EVr6%HPD-x7_2_uh`9Cuy%t|hLnkITQBYamoEP@=SL3tjSh9QQh&BptHc^#j$HF;Zpgs}BB^uz zo_l$(^;>4`~Vy4f|PM&?JfqScWNLM%eC)3r(Zu1_Fmo{?{36{BXX^G*o zeOudN9p|U*btrnbuusb6+|0$1+D{(s3-7+9rMq!r1A}+sniYJWimiRZ-KP&f3EAk+ zdHHaV)ULcMBGpIk2_M?<$?3yn&J(he>^)fQ;(xTOYhSSRWtNr;zZz1R<~#Arzv@Yj z^S_k3wXQv#6?6HtXk4q>s#%LYO{OX^?K+TBuq(w=p!k)0oO^-epQCOI|l;_@hVdtEo@j8(lsv%!M1n|Y2e zGnPn+nYXq3z`K%OPb21xXFbwZY>5Su5v!TIW8?*=+A4CHPCLiQHkJ9p#HuJ0+xd@z zJ{t9GU7(i5ac&w5ZK?6x5(R+^d9z>s`E0j%wr1Pb*P9h3&qxU?7;0&{_GzjV zxSmM8y!D`BDfeTAHA(h{_s$edPyBau?)lTjUWY!$X|^Bs5uP&5Rp3qBI{xs#RcmsloPJr&fgZWpwPAq?}x>llIDS@3-6yA-X}&Rb7Ox_^#sXGpnEC z`S8+YofRG`r#&T0S+r9tCuuvGwa?YQ8hXWYS#T^ZmQ!;zGn{;R*DRSObMpB{Ptnb{ zZ2vR-Gu>qVvYyxe5S!~|Tf-|tU({S5{PUf0AYQb=oxPrrkv^_mZtm4yPO!teSht`9`15Gix#Xrb}fSQ`Hp$!vZGHZClt%I9q9#v7Qnz1fmnr8v`8CDiypR0V_ z0=&IE$st#pHnr>bwHq#7&Mgt;8GJ?EJy6B@@~I644e#{KpXW_I^zos#u)z1)3|{V> zBjSczjwD{y%Q|yp(tiel8%<7oXPf(qHa|P)DjJv0YZg2CtjXlddvg}4tOyo#cj90V z$k*xSeOICMW8)(kZQ^ke~%u}pG1Zq0U*WHp( z_x~C*S6bIM%=OC8T8BxYnTvHEpN;-^?%@Ttp8Sd%_jhcRzK|rWRrFcsN&vfvxK7-= zdj;<_54zbdeWWtsz{aItHx`uHXWN`BZagj_Hjy>s%-RJapZTy zE0&`ZCKmNs_jn&pe!4EHP}oD8rOWWgBo2iaoT4&24{WG0-{&yB&r@rPv;8X;m+nT^OpY|GqTg-6Fe{amS6@wX2`43=Rod;N|+I?B+Vx zGPc<+oqKG~IQ2|8a;*Z+5yUnE!=a z_?+&UM?yO@Op?zYYUR5U+&_i6ReSerLxl}`wIbZx4K~D|yO?rTap{?=`=-lJ-&4!V zdbxCxTEcqux7=p4->Pf>XV`x1`myTfwLv?If;Qcp{NhUb_0y*`3XGR|J=}EF^wk7* zErb6I$K#^*IDEZuMzbniZ&};4Wlg(G=B)_YoV_t=!E5$+lfvi#J>KrrH%nBl}yhyoqlPYxA8US=e$KPXQ;JtY74ck zicFk%M{ilegXq&5mqaxpru{u%?Wb;+WZ@+?GjEcUQ=^#rSN4Tvy8~Yxag*P&W9h>U z79}22HcfIWX5dJ2V4VNx40F@jJ%OKn`X*bgJG=Um#HW1=n%3#-EfdpvHsg*#X^}Hy zszXHHZaK!?7Jg=VVjeCZmZa*5*w&~!7ggSz^Q>1)So*G~%gQIGHTCouFLtcB@}qRa zYGb>`u1RtolaB<>cxL)@kF9I;^q{9xC37xccAj=zs^4nSLWb6RkvTM91v)oWTm_4n1XTR?gwy3p_mLIZmW!Q(E`m$%Y z>TaBK#Am+7_h8AQ8=Lx%NWR!`rZlzTRMvzQ-R+Mmrh400OU*f|sk5<2TGQvqO|7IF z=R?abT++I-^IHD3TKlUv_w1}O4VnM!tLYWDMFOSHI;+%NOSK{eZ<%eoU)g_5A*tY# zLD7?y>c;}7hVJHP>X5qV`T0(mUfPG7GWMcM4#C>Hy}X4>mLKv^*cjx1;Wmc=0-CC@@$_!>o8HBq271Oz{=z5N;W^u<+fvIU;!R36j zjPr!P-uSe}f2uL3p1(j?YI#Q#*OZL;g3AJbH96|#&T26A+c@h&x^48ODN}Z?dFyEx z_8~OnvQ7Akl`Y$vPPPAMPGbv@;0F_AS(6oV#L~ z+Q*gyeuon_dK(G-eGuST&!O|?o8*?u7VD2FpWo|yN%Fbx*Sv6#i>cjmIdx~6yk6Y3 zyRtaQ&|E5XYU_-bJJ{>otk}~(c3K=g;pNNR&snxQ(KltyHgVsO3HV%$lu*3s>5mc`K#fzVnPEZ@o{zm8D^hGsB*(W))te z<`_^qR8mudzoPZmGDGkL8?@$*@4^LQRD@tZMi z#>}9*p*dU1*|se=yRzHu{-wK>lhW&RPo*wia&FVCoL9>DL?vA)F&0_H8U+L*6~kbgF<(jwA944ED06o(+wrk=7|Js_4ZWNf9&ZM z^Yll^0@v8Sb@?Jo*Ve}B&N=wxaInZ@)iu|p*?W$fOz2ddW)Oarq3d_!3T_78jq%+l zdXzStR_W~2<~exMdjiuIECNLeaP@Ewy zTJS_{hhDCfPRyC0v_&~9Z@OGt&BV!)=2^}hBw@)Po?d%FPVrKz=}x1kccm}QwKNX7 z9MrYTWJQA&rGLYP-4Q#JL@3Qb4PQ;uf~9v;vQ zSFsLKC|kVg^^wyv)RV-G4=tYZR9R_~Q$tF#%a4Y7F_+ob49{&lv00zLCU5=OqFJfS z)=q2>3Ai-%y7Xml&7_{`yi0TLC3R=InVAQ;6>pkT8nhuH)YI|Z6`hAG;@vmczd0`} z-H`mWuj4)dHTIp`ohY8li>q ztLAyAEt@F1bxV4H%G!oCk1USr20uwUpr^W$i^1{?SJ4XlBU70J9D=);gB`n$`#e&Y zRF~J(;E3{Bs@9a0rXZe_u|-$gtK*c2M8lJ#&NiouV`eR0{i}cBGtrrR>C9;b{;C>P zJsXZoWF}9H4?3>3*?@;RCSs-7fekahoI0e=-sQnPZNY)7S}YABCd%a-_heR-skwYP zarkjt}Rc}uSj1y!JslObD0+B%})IS zk9ybh)&jasA9=EZCL7j?G-z)(W1Q{qW!c14k6#{qsA4rMX3L5U2EOZJE0$)Skve3| z=r~W?KiJ>f+v`eQOl!oZSyv`5t~F|WX*I?6RIB>s#Dap8k?MZ7cW+wyEj_;R8QVMV zg9R(&B)(^;tkAr5kBO0QZbnq=fwf7qJ!gDP^LKUndDT`gk(Vz)^U=IAp}jf>O-)TY ze9!RNIwx^5G}l-ZhV(du9GNP__t=2<&60`LeM$Py9vTEMTakD!XvFte{kuaK z+pIgswAt2s5nI!9_bkCLPsOYL%rRi2UgdjGkiUsY0FGt#x<_UlaD!g+;(`Nam_ zg9@vY_4Wj${kXJL(0$P>_v`m&9hDQ8R_0nD;$CjBMs@ySL(@}bXP5O!J_=t_ykya9 zpKWcsCVpOJv0AKIM~Y{W)JiTbodCvVDJrW(6kdlH@vrBdwP~lThwh|h%NE4mTEE~% z$C}4CSbZM8o~;-u5;N34-MR_7ps>$$=}}~+L#xJIpuG(Pa zF5SeTESkQBp0XhRW*3-%e(U)VwCT!NF|XBH!MhEW$OVmByi+(?TMebki92 ztqq81c1ycv_2(bEn#Xj#0NH~g5 z^rVv2GXHxVAVPPg5b%jL6eN8uALLZ zSi-ze?`7D5F5@F5rIW7MvV2L3$Xul-r}F{6 z-Jx@W?woy_9IG(M^9Ql zopj10DYALW#3K7ESN$LAoiupqeMR_EdZk$P)HKU2l`*=JIz?`qPCIokOzAVeEV*hQ`(Lq{}sNjHkKA5Z6rFYHQ4GYgd$2;3mO zG~R7-$C(EfQ{LEI@i=C1ZqmxE=qEEKi8yJ7tmG;sZwdu~3n z{G`9tye*ZdT{X;th3DT=7Tq8f{hp7*`t-x2!eT0#mT9aSX;;_Dt~|B%_L5 zEVb8nN(<>+?6zoqHd9qAkhR-TYImE``Kk-HNuLF8-ZXAF-|l%jm7ObiR;Z4n_1?pk z2h!zKJ!f1mDGR=8DcHRD;kxx_jAiz#&dfZq)n=l}2ew`AR_*+$9Ft!7dKRu1sJO=z z{?zA;q2`PTrWzNPIcH+KZF?ivM{GXu&DKaYNj>42ox)O1r)TVc9UZmO0zH$oye2%I zy6{F#)ypccdj1-l)5+_iKL3jiX9-Q1GWAUH%ib?BGaA{{G|MLRn6s8cJlk24G?u&u$t+S&HGtbQ}6ov zPM)S4Jo`%E(a<18O@T?PEVY6n=ZJ2eXWOUO|!Rh1>y)4ZCv9B!EZw)M7M#I>$%QS(ZT7pq<^U*NOHS*Y!h!@Epyzy zqUo`xxsBfI<>C@D&WjwwU+HhPo%?!A#KUJwWxQTf7>y-+Tlx01YO$IwWHYtu^s!Zq zj56B!B;Wk}$_P#0sgHtW8WS!~@4dN2c*(?PIcbF_idqk^>rt?jXl$K^kr11 zwx#i{Zj~z1S-_ScyUx_iZpKH;O|#~_;+){`IQQc6pOdcmFic^+&g<44xxDj4sR~om5&1!z?R$jo7b3IGysJqOYjw4D@mm-TqiVQZE1;xIY+NJg8{UVQf zE4!C&id*=qQd4eP-Hg=>ykbsGI%a5UESk1*$xe-n3qPs4@UDBHH*wXWs*S=Mg)SPJ zM6o5fwXK`ai}9&wLb9Pnq@thtLYIZ%sqB(sS4(+L2IefWEEjAlP5(JNt>ALl z)0<0KWi(e#J)G%ODlJ)&#&YL(M9pKzLmNA1>YSdiz@NJ9ZnpBoFMIu^XEn5pV`gpf)Ow*MtrM|0bmF2kbFW@ux1+vOr%t(D z>-E)7SR{T@^xrv?YhAhdZ!w#)~%-9 zM(dX>KXb|QvcbgkEBCh8Zwuq`R66keW{UK!;{xfAyq1?|slMuUC~y|e?(Ls^V8@B+ zbB$7E`na5P>Vzbln?H+HWKR1!v#HtkIc-r&nbyUtE~c*Gg63QgUU?V1 z`n2$7+sqmB-Z~l^JZV!q_VV_vrK?Pzz5K)fXVSC8tN&c4N$v9dsIc|eo(`{8*W8=( zkx%dOR73_p@()W5u{@T0rS$&OFEeWnw60jP{O{`1ON@TiJnzgo?78*rjSB^9Y}Vww zT(@av5VM8-$`a-O3@S@(Wu}XItO@yOch2YWpI94ZrcEsmFVFQ7P1Qd%;Z61wVXv3x zjL+@)$kx3+dU@40o#i?&&sDWHiZ9#ZW4`XmZkbUcp%X*eP5+Q?t%$ zJ^grLcZkQDh_%bF%Jk;8G@ngXxPQ5z@`z35jF#t9Y*Jk+yDS!OUHM!rCd}u;)G52C zvWdtq?T}gV;Jy3;`$wmCuQ2v&`_I7ixAf9qzapPU(+_FcbX9e%`gc4ySS@_UmB61H z1suvbl+1$itObvjGaNZR`)137rsA2x8#5GFFwR=Jl11#p6d#`QJ%#TMCvE6@Booiw zHASS^QY&ZYB;(T?7jJKmYF_tGX6>2&ps#H4Gt>SvB>JuB>6*Va;9^(mERQ>rHn}vY z&)h6;WUDeex40(Qd)m~87g|EK7Vz=@XPEZsl2xBPSHX@_?y0h%#a{iXm+BkHm$H>fyLJ_)a@QiuBreyJJn3QYqC-y}%NFiA zbn3`SJ7K*f$DpDsPr}y4{uKDP@$8RzTy7F4eI`xTDhlC>{Lk=qg{Ehc$I?xL{~5#* zFHIbIbPWE7zB*%IG#vGTZpb;^pPQ zJ7>JQraIi`Tcqi_EM^3$~<51_e!LnWnHSD+2JS+oB1 zu3OE9Ss(1*EjfK^@!tM_iPBXZ3e5@O&CA~>1x%T8hGWu6wgyx7z2)r1!fGFWPD)*t z$LOrr=D!oBw3=^?*NcX8ziqQmY-FEOCA_FAeDUXBnV)8U(z?@c(Z$SH z>looMNoirp^#E~(-h+_}Wq;o%t=V!|)XwqG!GP%L9vhTHrd{pO)QPh!mwRlnN2}$b zfzK50(?OO08D5@AujLA3-1d%F-7G7vFY7%2XMO%(+6I#*9LvaFJa7H!GcVUAM_!J; zYx0h30@tQp6HIQ4eM(V0rr;CjYAMDWuBq4@ddJ=%*|TNZo&p&!Kdq$VeJPt{xpI!G zw#NG&WQkM#HG4%B`?Afe-YiyJ*tPt~s^!;hb6s+EEVGU7ye-NrjShOZ_aIA8&<9Jc z+07Dbk~*eM=bXlEEySoEa3FZ@JuM4<-bWE~DWh!~&!po>*k=G)x zx_Qn#7(M%JXkhr0ms1#Df0nX4lDl->@}Re8x3eUDXP9zs($tEemI;;Sx}I6)dvbQj z^Z03bPFG->-p&wEj!~h zC3fx>v7c_;o&Gb_QoYwOoEO)!Ic)G^z3}HZ{FBkGhW7^7On<-Wv3w0dIS?@&7Ft#_AymzZWx%v2VK^w_UK`&Gq79Cyp zEp3Mwi^aA1*Gs>C5iz|obFEo)ybhaJkc+R#2QRnvrU!n1(mlmi-B`x(F(X}Ct!B#_ zo|O@PLEOuxn5;iqoXhR{JUzub=D3#&*;o#;FzSMLOO6W=7}+S?B|FZYZyO4%-X$nlhplOtoZMZp&ntrdG* zCmOlDiBlFfe$4tQU;;ysqe#I6sjvl8?kOIbSzUTAdiHbfsm?YgpRa_xPHSnsx=_)n zapruVq-!iZ%F=a{JTA5iB zB+vb4;9#DTwepJTM9=Bzwc>M2_5Hq@sv4eo(ep3n(W1gv($d<^*@l<68>N)k-rj9lCQu;Y!F;)gytsUai-9{jPn< zM|R~?8k^Q{JoVMG*QCt&>E_k^TMo)S+%vV@ScB=HEc^0k=43@-gNT=vQ@p%cG`d9o zOw5#q@gJGW74-8t{Mhb_8x#A&8R9`N$~5hi_L@v>9~p_rWk<_Sq&(@j>t+;wF6 zbp5DPnF^7iPba3ft&-o9+M{5#_7#hws)Ko8_T%V|xmsBwO~5ja(Nc!WCywT+ zGxenGIhYf>fi&I|wS+4PV&gF$qPrTNUn`!*xm0!Hgd+lJhODYj+ zldtTGO`M`_92w_(gZuHEZ!0V2$O%;*S)zGi&1#-Ex-&I(-p*9(&CQf~-k)43W6a;_ zr>br^#Vvv1%=sxRH5V3yOmg6J*r+R)`C-1bsBHc<;VV~pHtQ@otG(pxVYf^F8NOYc z=4c&J9;@sxo+%O|WVq?@8LuBU3l2t3n83V|n?c0z^S*Cu%KUB@eAaJ2;j(d7(uGFd z4*4gi<}5zxALnrF`Q=*wDd~5UW%WFHzHB@`>*x$;+5WeQJ?=VNCihO*ePB~-ZR0G1 z^3RONvbzdiPFj+7L__F^z%NUWAj>5y#QrnHTw^k@@o)A&{Nd9YN69X(_*9vL+MfFY zHcgjO`Ok3Vh+5uU%PAGjk4?S^r&qeVO0Jy0Y}GSfndzb(>zOqe)7`ov8RCOB%?-Qt zWAVX4$xjtNJ5NUW9G)sHB(c;pKw+Ahrh+(+W48?BQQwCVj25bil`a<~`hVw%@Lq6a z*vnzECi&9!)SG4Drfh|t!PBp-%3d-lvuka#?4Id8P2Suauih=0mz%qvcfP_yN2j~K zkE$<7?Y;RT@^!3FPH@v)qk_2#ttuCN?8N>$yghx!*zwo5-KMW^?y-JlH}lhfhSxef z<*ZIkD|dRcOH4Ja{?ERSvnQTxjt^H?))d+hoTgIobmhuZ>vZ(KuP#dDd2@`-pW9EL zz1#l5EJu%pi9xIQ+m_99oM9YMY}0Tw)b~oKE1$2whTGc3k?en0#s^2XzF9hB)3)cH z3Uk+`e~?rBS+vG1=hBs@Ds!Xy&oG=?Q6(6$_HZH3r#?Zib3s2t|DAXA3T>7vTaxy8 z_2#NQd$nG5>HA21Ti~H^%VJ?gPpSvg>D4|dhC7pVJZ~h2x6W6qxiWR}ku7^`4*JHt zlANFIwp#GO6SeYkz0FNdbMCCrJh*#W(Sv}Di{=xbZ|U`(5t4Rj-li_kS5X;BbGw;3 zpRyS?Z)r2|-QBo)!i{g%-1k-Qt0g)w3(EP;bY(+TkkX?aMVGcL^H6k9?PLxLIltm; zZfEWyz2+t2TCJbCDvdLZZq0mZsh8+=E7~R4#4XKcAx}$V!GZ4Eg>4_^X>3`Wuy)!6 zt`8DF1AJ}b3UOZJ}r zTy!dLT)$S`sbwpt{5)yeuwicA(cKn1@4Ss{jd=KEq2`%sIlEF+Z(66CGUvL;v!xXL zlh=z=RrPS-Tzbg;kn`lov^kfvxGw+NzU1>Mo5Q*%&PB(1&kDJ8kNH}F^ zA3OO%U}-5^@Ql>2n$yCb?)1x7DY)Em$}Pd)hBbAEw$YWf#}2*y5nk{os%Wjz)&C5+ zF3e^8UccJq)4HA{r>$p6I~8!I)yLJ-^i0u>S96vZi^ps>$}E1h>&>goU1j_BEk3ns z>f+CKr>F1km^Am^18pVN8U6RU#U3QpDLkKaNn-M{XYVuv_zjLV8OJ#Dun8J{IN;;N zyu{k-z?l$cYmFcA?qM727N0g^e>L%8U#{QVD|>ANd4Br56Ll-;Ua?m81cTQ_jwh{O z86re(Oto0Xmc99Nfc+ALSDlMVVF zrdVdg`pp!wWlBIODfV&DC07W^%L^JV=+j>D&1w(13N@jm3zoAl=CW?spIDizJg ztRERUUf#K2)ysq_vgX0QD!SjQ*Z6C_J8^J*u2X2Ob>?o>w&w16(j8%;o=2`G|6TpN zGJI;b^t=;3aeCq9eII=t*|wj`an3nn8`o8}C(}+U?YQ|%EzXypcYf;Ia$3>r&MHa! zjHvV zZE9`bVO|-xI+>LfPmFSBs_&UrIcdV<2|>=XiK^c&1u<=?N%K4`E~=szsP|>Ydm9f~FKv16shPUM0vnth zE?@d)*FKkpwI-b3u4>X$MbEf}>%1H4%FNeYxDo8@eQ9s>%rwzV-kg%vx>k%kW;8Kt zNUyvf+S<5Z?8zMGjrWJ)@PF1*xIm=`Vk?VicH=2s8> z5+)ljS+sIp)4U9gER~{DuaZjVOX&#~bguSviO)Q5W87sG z940v{c|2fv<3D4P)?Ay+>V^BZSS)eVQA~_qvLn67mwV#Bd*Mluk5jEhbtNWt&oVl_ zFT3eMX#1fc-*QnggN~|y{`~wNS5j{-EtT~S4cdM6(iK%-))-g2^Pw^qr*7HA@uWA> zoBvOhZ(p0(jWsNDT(+K?!#v@Q!`U!}y@#b`?+5b6rv@Kw`EuSNDAI>RGs^q9qvtNS z1P|TST^3gRLmkb1g6ij##HxzA`&Eigxw%zVTQaeUz44ajBR$3pn-lL^dc7o$JbJw^ zzR^Es=X3tvRo#YWq1piv8f+@f@mL5q5Gv+%69Q>i{hk;o-R?! za=!jhwQWklms|e|FYY*1(Xvy^kHh;^$YF!Pk}HOM_D|P3R(H%ez3Pwt>z>C3>zlP- z>ol8o&2n%Gan|wRYWh^EWS}yY>+T_=Y_ZbV8#*y%dod#_Ut&-#5Gx~ z*=fQlm3=|8n7A#<6>N?A=DxhXI9~W=!qOFIUT#vIwW~ApReiqSWQkQPU%5_u)AV`5 zwT))IO|Q)_J`xkR^9=ma>o+Y)-(7j`_ul)$kDAh-JQEU1-xTtwWx2`LpI7||ow#?ms z+d?+$?))|Py;;Ssm_}62m^7v5?n#v?6O$cS|ID~w{8r4yol|3*44c=iPUb`LX4e+C zuMYg9q&x9SP{@Zs#xdO?k{_`pNm05_{K^< zxnGxVZMfDi5~=2WgyoU&qo64=vYZQ_t>TS3Ch0#bEplJDX-(i7OYCLvv zYLUOIYqaZj^HPV~Ir7|HYFQ~7d~Yrk>p0Cmk$U}}ygIX%%ZDbmP;vE8ArHHpY5GfJ z7ySHJeeLY!$cwY~^z>YH){Rx^^O`kZK{F+AOY)UPw}l)Nbe0S6Fw+W{v8drlQ|uPD z2W{&Ad@|m!*tZtElbI8Q&V4#FKPbdL704$Qr5|!xL${qSt_jx6P>&`B77o#g{`XVKKF3J$}`GqU;a$%H(t6Y zaN3zn&rh*`wx$_cbS%v1mKMn?xpTGfj7QA9ZxO2f28@13`mZMY&eGm-Wd5%sE?%dT1f z_N-a{dcMHobDP&qIc*B*3X< zndLQO)&4iD%nnWKT`oFNyY;jMpHxui=X0qVU6Ynoy;wBAJ%K}TP3gCK|0|Y9O~XQ+ zFB_x^Ub*b(J)L3Wr41S0l?#vd&E%W-N?~PwkC21Q94*zQDuGw>mb0$Nej{b@ckaXI zyc(zHg@rkFO^rD?g;_A&*O!rX@5`+p0-uUTE)fx#_;jLZX|GnQ!XieQ1C|9|Rz@4% zzuIvpzw=>Y7AwQr&?}~n(!36$o9|7mZH;=qkS%F*)ZuW=4LcNGo;beh(Yoay&Hsr1 zunx1DX0ql5--cIl^8`(F&TL2v`SNqAj*)<0$Jc{#qOC_S^zKglV7u67pJMOBOizy) zJBx4naFomx)?DAoS*H7l&C*RT>W6=BVNlPmWEIcoy>ebxHB*?2PCF_#uD%|x>wV?? zmBUu&f6cdCw5n<5re9KBrTzb&zi~3_%v!W{&v}jGB7XYo&I|6m(=AyM-0D>DBP3AK z^-IC>q+1i>bCp*o{%|-rCE>%SjW zd4YpI2MZE=CT*DMrNte%2duJTk@`LpC;j1ApLGf>5k2e7nW)Gq-xqpS(Wg)x6d~9xVSpvb&XA3n!_x? z2d5u-tYB+sEKl39E8>;Xwq@H_#cNfGShj~Yg{@Lu#__f5)|Snya*nhx&dxhBw^7q5 z$>80}u7t@CP49L{wOy{*Geh;EapyisDe;2IUY1*5*lue|+i4&8m6^UdS8GG3du2V!T6E^2 zgxQXzLAg4q(Z1hzy$)mbI2xzTF{diD?TGNigy@2+?B{d0H58sZar!80;&Rm`XC(c? z(iaQVd$KT}*wn}+nEWbVOyiHsIpbtjUUs)He?u<$U;ZK-DmDL;rksHk$MVuho9%@O z5z>))(N~TMPS)ntis3)lvtkKz`n^tnAK}|jf7cPz?TX;_#A&mX$uY)AV1-$ZIsc;#{`2{mduWH0>cb;Xgv6Ew z-H7|Xl-r;%?NcXD+s64?i^En=*`zmnH5>1}@A5H5m70O9k(H+V0{$ttc+T<4;MZ^u z4t*~AbFJ0$HjT|IM1oH0u}g~&`?EY&M#(Vp>Jxod)-$fhSdH3M06xfY5h zaaeC|y53uSbY0V_7rRz%wm7{xEA_;(C0#4dd-{EiEAf|o$SI+4w=?7WhTxSEb30Fb z&?)NUOOqDUGINtrgA7 zu1{s!B~+HXe%p;R^_Ml2$)v5F&8d)X`(O_9P6yCha`W~^;^^jNU) zvrCW9&Qqm#H3K$qvP}Bb#$ejnpY-(Pf#fG=W^pak*`U;@sIaSZXQ2bfNN!2`gH*7HKUpwqDQ_bo7LG)-Oh2ubcj2-Rzeuv+Pb^DYcv%D}H0^rjsF8KJ7Kke7gTlW>QM_3WXCVZgp-m zv51K{e#YlYL%W@rW8ab;$S201QZs`i?`7Hux zrOX?)MDF@=`r%ccnR3xHgHyM@$zGj#|8&3_bwlxncaEuC9KV1a@Ai40dz{@t zT^B-{7))6m=O0~|^vgDCy+r;|&r9|{%SBC%y+RF@wLHpmW;O&|?Vjgf^zoTg&3l8F z@3wsOS6b8)8u+aAva5>Dqht14Oio2PCxu80Ex6es{y?z&Acu`r$foHLULIzvoaN%2 z{UhEATjcbF^B+uFcHxwlhs`IJY0{s~ShmL58eXnbNS`U^?0fl2=*2Hz>Jy??CN0Uh zc+X9N<6d^mtKQ>F-hOX;a-Zqwhm&cE^8&<{tO)6Py7s_an=TQ*8U(^h7PM`iERkz3c8hvGCuyCxLfwb$`k@cq&XNWy^-rXOc1PIm@+0R;nJb zYA-Gn^_MZa)3y8Xi#?8~vR-Cwwpuy;&!wzTZJzs^OZb8lx!HQ6OssOQ+cY#FJ`oBwgPYla|Kd9pRC^nJu?g``dHr zy4z**;)JpS1g0ul1_#(U>x%LJzz^B3v2&HNGfpMh2F;Z%fylQeGN$E2`t^wW)y~N0^_j+`U0j+Vl1$#yb)B%ZfYG zr~h+sy6nWu5&lVjZl=ffH_xTr1!mqqQ?h2YCX1mb)56|HmiC?NCiaIfwPo|ue!9$y zZ_T9g*)hQu;SBHAMDApeZQOa~`m-BXz31$l|LyeBwS_M<4*wS1b4b)@{@o>eCNQkv z(=0d~pXj?d{ljUSQ)w?>)y+P;EOMpR4Z{U9^%uF`@ppLr%IKHJq`7O?I*TS+;c)HCl{Ql7X$y z_nr~MjtQpkSPzOH{bt|laa32Lv;Og{D?xeu7j|6Sw?R+$hM`j8@elVu`Z9?7E!(2G zDC9rGt9$Fk-wWW~S1tt)EZrytizbylQe<#OZ#ORdY`n83f+Z-EnL=YnNtZ zL_&k+iAh>==Om;mv!(Be9)7x5V5<(JF0+Q-SDj+ZI|9EgD|SD#4Vrpa|KaiQG=b+I zj&1prvN>&&!ln?1(6u}(xD6tnx@juC?s__9+v1jqy6ocid`*)UKW1FVtHrCw8|=Jp z;>-gD!P=7D4?lJJcldC~zuGR!)_7$SqvxsJVj0Er(q@*2e|f4`e{Hf=_)($DVJuF^ zvJMLNzG1z}(onhTz|`ad>!kIpp#7oK6`oWzcxS!2uB!GpJHUNM9#bB>$Qm_<3ePjM z4ZNSpob*5BxMJT&kJsx~tUZ@_+WW%OD?e*>tq!gCk#sq`K)k%T=u$%t`@8PG+jkS6 zdk38_%P+ql>}&G#t@z!U4>o;x8mAPt=uHW~16${ncM(7Nml?0QuQOe3%}LE$KDT-n zaw=ZwTB0|*n@zDX#q5UQtsnKmQ~I7Qnc%?t`_uJ$i+YKNlhl?asZX4kpS?GJs^sx| z<~iqRl_ruSCdc1i_ev@DV}4c=6TfDiN9gFl?RK4u+tW4c>&J|1%v46d_VK*CtiLg z8ym@dK2+4y*DNu5Qi!v1_VP2Ck3aM3^x4m)LCNG`2c;C!W`GCja(;Ox?AND=M9Lh93XNg&EM)?(u%V7pS zOob9XK}$~0*%>UrDH$Zjmla~D%^1$oW}b5~`=hu`>!w{Vi-I z8P2>s)5PlTc2~`aRmk~_(UBuDMjKUnw9p-b-khoG4^!zN8- zU6NXU%0WxTc$Y)b)W}T5R}z)8ZG?iJ-_^?MNt0SqzPzF{-;7`TlGoqcYo7$&nahyh z-J_`OtbE9k)A~!zEvsb=%hJT^%d(mnSywDyTV|r{b<}_9D>={NGiOfP`9}tPtnFIF zWOF3n!f4mTUEG>h{~5mSxWnUflp%eIf(64JvCUD(PhZmgab&_ot%#2;`x0_LY?L`Y zi?6hIk=**}t7;Tyu2dFyHnTj~^_h;~Tv3tvsngHo>j;=kdgUR%du7|C-6_Y!cRZSV z%Ynn_^uC|_?ngX-`5=Ds;|HrYJ@Yi*5mvIbk|FYILX3pCYx)NFYZppQ^^$`pT?~D) zv{=)08rMgU3uomO%vLz7zk!e7zLUrmcH|7Z;jp6)y8xv6N$*Dsr$zMWo>{#i55ZS$rz zYh@=naCovr%~+|hBPsCbnwc3I%}I-xg-n*M%ZbmBmwS{kjh~C5^(2R;Lcd^Igc!>b z@qmJmNz3>cJI*ax7!|X~Z~3aVE6(2VHoEBVCb?*RM@CxzU&Hx2ni2m#d8~Y?ohHQ- zbF<4bh^eS4V#3+1ytlRfRxam+?M>dFR8-_+OIW?+YTxrCe#uGGlV=`@dbQ-F(YlEz zimgOWXFZ;^sb&(-v1u0%&k0&27OAadeLPgAh;c>ZGj}E7Vo6;G=k!Q*Ef3v-S1UNJ zg_nKVeALXxs`CHB3|#-lvj5>|FB+J<8o7r zDUmgjx6vdts;}7g#Qr&=85WT{DsQY=S>Tc2%hx_{%bxg8c`Yj!%v)7+e8n#F<*Qa{ zJzBG1#boPGN17CD3tqVYUA6bGjMZ+<4IA_)_RR5^GSl1AS>Pn^`d3Pg_q{HCoc~C6 zk;?&Ly^xcQ3QHZl%gPtc>F4B)zvQ4CEc)t&Po!&n*wdfOytIr?b)-z|)0%x!hJ{hF zOJV+xrc;*trn$XgQ+)jF`w>qKQA;*D){|tr}mWyq<|1;Emw2s(0Eq2O% zuGWLeWh}iNW^Wr3yi&Zir-shjyXWl76aPL2w+Sut53|ZF$Sn-einzfQcjl39azFc%`JwttsU2Tc)#Dj_haQu{aq|eRdxjJ6nf`SyrS@_$PUZ2 zG~1_4E@z(Ye{)XyX5TG|rC(F?lH+FumYj{bVCyTwrqy{m=EQ+bD^GvW{~ybWwXPSZ zPEEI4_@6=lRaT|_=`+o5L#E0_@`Sw9SQ#5IP3>EO*^cA9kDsXbd(7-eyr&p+HL4-f zb7>5#`qb0z=W2Ix&%L#VbtMd*HxQRc*=(Ps%YhrzrM3i&rN4q zta~oCa?!k{mqSe*>~7=Nf<0ppV(tAJKHki%9t4AAE zAGyVbKhpPo`cE}2RAfu!Syd~uIt`r({wq8C)+%4I^*MHlK}Xd(!n9@I*>gr=Yq(;k z#Io$(u*=b^ZE0+)tK!@VN0nQXMGq}uN?kGUwP}dtT$VTNhrXWD4wT4Co?kzqcAA!Y zSk@H-Z{^F2yiUu`Pz-+68}UNv_|+wow>E5_e)!P5%;1&k<;=!cvQNw_>FaHNcvvj- zp0(1#PQ}yhWvL=p?_HBVB3C;9q$>M#PZ@(*J5pD!bie1!#JEs@`?5D@boU*QW7FA@ zb!BR2Yu9EL{(xe+Xz}M6TMrgI>D*`J;m9B>bLVx~@|9Lc<<-w@dhz5$%G%TCvXa_e zHcY#u(j%M}DPCIm?mF9{KRJJbrYzCa42zm%x%a@HjKUqJPiG0NSunlU(Y!FtX6Ay* z{r=B(u2l=3eVAu+%2#2NfKX1xUWpR|Yu4C*+%$A-1 zTFb*9YTQCv*Lmt}Y4AvT)@RiGkt=ZPlAKzPsdF8tJdt&r?JRS%)!23NvrSGl2R^wS zGnp~BY5Ii&vt?c4_ZipdOBuYF-Qd0KmG_$kuXuN5oR|5}&?XsCc{wHVMXSvbW-q}R z%XZtcO*8AMw?3==N@e94Wk<(YAE{+(&MT*V;=LWf^iARIuQ}1*g|4(bJnOjlCojW` z%45$N+onts@Jm_Mo-JLpqj$CL*;!d!nh7u0HO`OGRQ~xvLRNB$>6S$=m9KCpZS z`k*qeA#1R_fr^;wis;|ZCHAEARj%TiZF*4o@FNl3;&{V7al78D^)1#~an`7G;eQ5$ z{?pTibl8u`O};tn%7x<8RxWtEKGpn*%4U`;dMjR&7DBQ;Fiso&E97vU;%(*E^Qll}Ak#*?BfJ z&ehj8=GWqd>y~BQ2{x9t(wT5-Nv66Z%RA1?(_>EFn5VGE_SzJ+WbUF>D`Y~@0_J5$1R`I_Y*nrjKgQFN zw841NCOegteB2BN1RQy$DTb;991U93y!7!ao6U+J?yoVHIkt-}qwCp(r7~M4t(xuP zVHdLMsM*vA<_%l>R1?*bJ!aaA>u9oAzB`w&WZ&^4lT6uu$tt~z`4f0ZU-0OMhn&Ik zoZ4J$ufzh-@N_Go|mMo)=fOWN<%7B^Hs?4CtfPOTQwPO*`8j#Zh_;i zXFry5PY4rg<(zbB!Li1};+|#eTBP>Pe%^IV(bHH&-HSWHHAY~4@uJ2g)1LXOKE0~B zyu2vq-YR{)s9P%)R~l|w*J&ETdhgM3-*@Hazh=&zd(>xg@60yGRg#`{iO0U*h%&Pg zmRV`Bb4Ni&14p^Z)t8&M-Of0zXzQbMnxSGu- z*=xhf)y7X51eRspp76x`Ms`HYK2Zz#Gv2(qsYi3{ul#2a+0ZL)@N(Up*Y~EKS^1(^ z{?or*eSMi3n`X`R{gCdNvXR;och9sYgI@<4&vFO{SBO4W@-|uIUVc#Ngb4H(Nzi{9L zt)P(OPFW9TNm=|9_~3u!lYi=t4o-~#FK?+SyC-G5lX^DAdPl!$MPs0hoyoHa2VG@^ zRGkA&N;dwqx@BH*wqL3eQPG$(i;N2i#L3WWUZYuEo3>XWn*BK z29vF}`xY<#h~U$+pItS)_~h%>bLWFyE*-k0EYg4a(kk92Zsm8T?14L3cw)Q{Ej2nY z<4eu~&3#Jm&#ikCxviu7fwoYgNnYOuNw zOWHJ{h_F{@;?5oYDRuaU;@MyOjn3JKo+-{K%A3%(Z21bcxepb?3WX&Warqu8I&;eR zeZs!U|JE0Be)>H##wn`U*Mafgg9#E7u6nS0FfB{`U^MX-r-#;@1v3-uUlz{v(_6A2 zv&i|=h972ME_`+ON{L*vf#;q_w$c<$tNTyaLA5-U6aNd}yd|=Wf-j_RWeY<9`Z{evGE30m=D5^QMYEj$FtS6qSlV+TH zw{YggrhgypOKyC)`|ZfRGapSQjiyavh;rN=_2}s0WgmQMju!kBe!BFeht8XG42rjA zw@bNQ3{6OI4do8v`!PZuZwq*jBvc=Jq#sg~dkUh5ZsUJAbsg zYO!{tKjD8DWq6~c&b?c!PDg{Y?*U`|H=Zw2{l;<`uXfp7n>KIu($!TJPrptGHm#Vp zPBLiG(l9rj3Ho;n>i+hx=-8|KdwHmGhsSJ%GZWSvedRPq^>9V&;jG-iC8c$|R!&n2 zCp}B^>hM-+n()9daL3KJQH>RiCyZmZtF89gZN9YKdSUOSHB;v(Y6;HhalT{j3st!+eJF-u*RVkGzxBZ4-rEcJ|!o^yHg$h4JLv z#swmm3@c(xcD6kD$RO#eJE7@7n-1s2xm&ZimTBrvJb1l7bjFvzpZ+abJ}qqRf`~8f zvyQ!A|BaFHqC&8e(R@d3*)1P@LJB$*olR!3i~G-1Tkm8l`doa|j;0-TkrC@fwlWB< z5^7?uxc6Y1rt;CC_k}ASB|--4sp-UH!1qQ>mfn;6byJ3ti36CG9lzLi)4{i-p#l`p+P>slh3z z{={_tRg*u4ESYUI&!BHn+j4{RS*ji@Du=4p|S)2M+ z%~~aMi0ej%lSiNyPnAl>V`;IZMX#(E^}fv!4SvF`my{d0c^5B0?87k=-B?pn?ay%g|tX0T0tY9HIe)uH(}+&P1Go!VfQanm`vI=1ad zJkRlEkyhH%wYmt`+if>VnVefx$vM^tn8au9q}^ z$jRsSI4^5)U(Ca4j-Jy}Bd^-jf0bA{(RB09*867@S~jG!-CL^=Q?^lJ!x0&$R1MRb ze~rCmm()I5PELDxs%2T(;)VlrcZOdx>zlUzUKw}HE} zo;^O-k-L9xZimI0kBLSx=l-hR>rzjx$Y0hn|Bk!uVH6oI}4v?%$h3j zf^mK>huV}-j(-VvGPNhH6zUB~S*o(J>KUunW^R$$OC5H8lg1ha(3=yzV-zlS4@z-)ey3Og5T3c_Htp`B2(1abyj9FOr4tQV7ee{?ZGsU zB)!$H?{z-TTbH5GHGf&+saB1w z|4E!&>P)_hzmbO&q&l0{HtVG_4 zw44=wI`3T)U&{PTzq;zC_IFozEiqrTN99E7Yn#REYDCwXWKSyia&aG*! zs~)Uy-mOwPG1ydP&bh!`P;q3R<3);mQX1r71?UuH@*3x*VOJKx?BZIDkgbH{D}54 z7RvdvXz~@2%}-5lwgpZPbC2eA33-0Lv%I2&X_|Uzn!R&!ongLOqKD7RlgVMLZ){X} zed?_E9@C5?-fl`+5ldFhIkU9O$Ct8ZCv&6-CuBy}bmN?p-9uA{fgwTUzA zOiMyUo~ZJ_8+^WwXS_o~O(h*o)%IIFzp?I&*UY|^aP{Xz_oB&&S>+s3*jF#4d?;7d~Ir z>AiXqN0F@4mP^zBzGOO{#I?-pa8lT1rm4EQZqaFe@jq5D?9B`gnQ@}%&%OByuR2~8 zd7YYBbgSl+=F6!ktsYD=Ryy#Y_9=_E*yB>`#v()YF2$@#E4_kQ4VU#q-a2O&xcBIT zshtN!9toWe^3eTvQpaEa(!!0M_hz5|T)DDp@==k-SDNugk`W@t(`RcH{aQS8(bj+G z-pCm$-3^`6XmRbkeh2ov9(Aa$(5=yK=>LA2o`2t??=^;Mb(@ zWHJP?J97H?!#0skO%hD9V&?fmehtkI&P^AKCRNp} zUY=FdGugI(MOE0URRZZI$L8hi?3NWa$vJZQjDz-OBijS~I~Y1#t}Na=WB#WlQtS#l z(~6gNu1jcw8v=HUH2he3_3^?gla#sUVZ0_El!8N@ zj{Bv`JzN!i*JIa={Xd?#g)!$?w=7XgoZp$M!~A)Z;|}RrLPrjjSvS;QDZD@9XxEbI zo`2O{_>vyRuVfMMNwt-Xo)B14mORhrYRO&qZ(F-&S#Z0lHZb{Y%5!AtRme?ZyzjZ! zsGxghSKuZGK98O15BX#&f96Zq+02RB_~OvQsoI(<6W%P}aJzi1S;d{I;uTiWD;6z1 zReWcO$r+zX?ncK_dQb2~iHju{H2q#FCcbkYW5;2?!^`(f%V4&3wY)kzG2`$@8!>&p zP%EdIN=K|s_c-hjwU*80-Q)k^oz3!vo98V*@wvh{XX)bp^Pu^Klvuu&oC6lW`hzAm zf4KQs%_n5h{7W8pXDwP1Q!VB2Tlto$&m*G^o0mR`p7tbShh6e_Hk(H~uS}hKDedB- zrK%RLy%$n1B>)`j*%vo`E zrEllV7)I%#hv1aKN^A*>u(q$eCThv@B zGzm1?teS8ysaQp%m$P^#!@q6JkMxc&w14$(;`w6tS$&H&&u1F#Ij=W~tK;A#tHr`~ z=RJ2+dot{hHrlvxCp**Qh{|9lRi=L?t0!CA-dZQME0^W1kwL^8EupEbr=y-~2X2;HEV0y`KnE`K1yDi zr+nq2+@i{&m8+IcYJIe(vullIHyhW-Gt1v?ZdMXvot@-*MJDPapV+Y|ubG|!N`0;x zHEVcvizB8V+pVvcILWEWAwtu%q)A4glyl+ku5MvT{*IX`nrvZ3Ry#lYwB8B3=&u~v zYrXN_9Wmz@*##$ybR3WMw&lhUf!=~^2s?`_QL9;#Y;|@N%?+fkzG@E`@`|2qB@$sUjG@aOt*D&246M( zy!_p*%l_}wGCo&ks4O{UbnEG9r751@X02D1ZMuKuf!u$FUG~xw3?-F!PFeZdBdU`( zh+X~Z%1__d&usF!u<~>8jJA}Wa?am>xo@gT`0!rg!}2vnIn$RJ{W;gZRI8|G)uMG) zt6$6r4{#3BQr3zQiJW?3b97;LOiz*BnU6DP{NXCR942mi?d`&8TfH^cz05MbWnrGM zJGA?;gyrSJQ(SXqPImH0mpyY-S#wdu6aikw`xPJcL!MrKY8v`Qa%#+`uwy(f8$!5G z+Wl(Wp>+G5?Z@i{U0PmY{S9xcF8?s=Usz$MDjYC#lC#S7+I_Q*t-bKYEr zuAdF|_x~1jkfouuOzFBcx54eczaJZq6|y863ae_o`gVt}dQa+ZM{7ZKS(C&mLX%>5 zPLO{tHgDQl<5OKS+a}GkIkwF!vgDLf&q!XpmM&+ z&Q4ihE`;CHw7AFT5Ov+4<(Wv8fNSaE!(LrE`%I@B^z|@%y_ITK(3v2gKMh#a8c`)>w>>ve_3W&_7lT{bpPrj8>$tavu&Q^IW~yQq*V4|^$Q4KK9LtPv zKVcJSljVQ>uB4ZTiDuAdl^t129ony|ycLb=`L=y^Raw@YdsdBee(Hzq%HcU#dq5`Z z@}hrF9iL3@*4z;pv}|e6X~o0uI9K2PmK|jxR8g`=PrY)J|_IB);$=!?g<#D6H6*>!5mrGKq2?p@sZ zL^k~D& zG?}%=|HB#nM_g%aEn@mI{~0<8Vw4QI|8!OFyrR;zH07o-+mg$gJVB8sSHIsYeZg{F z?!~{=7cXD%pY%0XEo^V#0swmmg*V5BbE7&8qDzuI`rXne4(+XibY`7#Vb=)mi(3am-Vuyd%1C! z%(A+P^L+Y`a7?qRniX?vmD%$1>e-V%T(z6Je9kNGr`|7;mz`U3qgf9|`r?sSE0M z6HL3_L2%5^Ev1*0cw~gyIPW#dLpW)h_ z%eM2H6;2lUOQ$U~4Vl@s>!AM?qdiA2rY7a6WIUNA9{h4D@0=UbVQ1g`=@V{L=AT(n z5G1PoNQ39Jru?f24MSF~6-z&S{%!i7;q<1ZotvkulM^|rek7$~0=L12sGudY7hF}^ zI_ZP{(q%99ixt%^YF_^EZ{S)PC|ase;LDRu5}$&N5Tu z`BSsUpDSeQ`aMn;HmC{Q+{3c)Fu!1G8jr-)RZ8NIj*I)*Z~nYt)y%oxlZ|x_7%j~@ z;l(SFc6!E?V6B4*r`Wo4weqpZ{)?0H#n?vz@Ph+n3MRv-^!$Tmgw zpgFq&0@ozokFK~qN2TCGimdANFRiMry5=`CC$5r;Wb5VD`o+xaxF)3b_P-bE-t7naquC*UvaqOVp>=^FOg#_9$KQ)Xq|wHCj)kHf}w8J7RH(TOzMo{QPyn zaVI0ErJlH1>YAg_th70M%I!<9ix?8??!UC?KE@c~^=yf*RsdJ-%-IE{{~o*Ek}+C# zW}3SCVN-VtPcx;at?WU{ZfAErOsd&zzhFtm6b6||9ob@%b#=~{=Yo2$E% zHa9*{{a$RaY*Ex4>F$eP*`{qt*^!>!-_LEZbHZ}(jOWgK83Hy-cl3O?zAo#9P2dZi z^Ty>%I<8mpS_eg@8!c9G51OmEmHWmAljdzV*t34b$yV^Vi83uz*^LJIP0eOsX`{SU~3b_L?thuIsH>`a3M~O39Q-og%#qOZrrFBvxJ#Z}PjoDl%7-Zry)g?GwP$!l*v<#rY$)3l~Xf`wR8WLjOQA!&+OMf`K#XY zRbBJzKl?oOjH=drk~-{YuDyXnxd*@b&yctK zj+UFIXMjD=k&7zRnJr8m)y_rC&J>*aE^W$%i$NEi{T`q8s$~*$pBVLIMfR;DuWVwB z*8PrVo7!fqGF8+tsB_`Br%p1JN2dw1&h!d8dG8&&*{8)>oC~}3gMFhN!y~U$>N>dw zi)vlla&dyUw@h;|1Mi!|&bjYyZ1Q@=)ZXgCq-Aq_cfVTqJ3+meE9#sbB4>hbGilgr zf9j7ec^`i9{H!m_JFZBUiimur3o8CTrNePTd$RLcu&gb zJASN+!UhVvLS*@aUfPM=J?S)C*LmqS!=n|`*`iX;KinMVHAN_O#kSO$UaV#cQx)12 z%u>StHXO5Bt@G+V%N~dGzx?f&o_5PPRHe0}H6*xm-n_+rE5+0mQ%myqzPvg0La#&b ze6FJ>A6bN)^WyND>3C3_^^_;SmqA&T{gxY3Y&kqDqv(%TeZV z)6@{>S5sD^g z&}ma_)B2^Jj1kkcBqruhQ@Q6fAx14$g!}0Q-V<^UxNCI^B`TL1U2Ns?YF53~GV9X9 z<_&TOOaeCf&7Zt-(qh|XvyIN}nwmJTYR>#O?}GYux-4$^n%vs!@T$y^L1)Dp-`+;U z8ArVWVy^PNYY_h^oNe9`wd)3B@$yGD;h%Eb0yri;;OvtP7V$VWWvf(!#;i#WBAsVu z>zyx-`8Ho+NzU>=&sHs2W7x6sQCmVzgBU}%q0Ry!kL>*BDW31Hnr7%+TbDm`*`G&z z=F;tx+bue0Zc+)^)hTsrTDr-Kib*GuR-a5>pP9C1(HYI`%vFhQ0dliedhKieH(S_8vbOlN6kzx|98K@>k~j;;&qpdnM-Io}2ORmfKcO z)!2t}O6)rngnw7sWJbDGU-=t-xpJMmanh_msxKaV+Mc%4#nyS+Q`_h%r3?|LYo~6Ox*BL}a*D%c zbpq>wTV~U4P2IE2JZQP(LyKjZla?^~I*73CVyM&eIeuxwdV@yEM@f}c!Suyyi^6D{)#IQwdU9Bj%lf10YI8RKwz()5LL z220m|dG~b5RH37ru1$Q*SuE8zZ|zI3Su3BmRxTXf&vDAMg~ z)bk9P+R}FMcG0_ApI#gO6OQjI++psyL0&j4Xw2jxobDFJ#I1(Nssum>9kx}Vv@|Pxh??@4JW51?tl48GF`#g zx@SR+!|c}V6&o)cJ3IetD_iXJXFJcj`Oo_Daz%#hqba&>ZJ*pGG_0Dij>|wLPclMU zCe0wmz~_j!(Da$grv7cm);V7ZzqnH7bMptz?%Dc$GaD{GnX^<(`J*QumNl$Q?Q-Z-h!nHF-Q z;A?laLq%1?eL|kxToWR&gvaa3NtGb0>Cu&5Q3j_tHr%=OZL7ZUVb0*CQ!XscRNG}7 zTzp1;Nod*Oh9~pek9({5`J8lh{mR-3Hrv0$ZqUDQ?ZO(i-t2tfse9@~NLM}~_vdaRG%@taGA}BOeWs&l)#jLL@^4P*_ zLTyyN)eV%s$va6fd5AMuyPMCy`O;#Y&gMRc_adjKE$}E{3qCVtL&+ppPhU}=>j(c$ zI$bsUux8QLv*-M;M9wV@4!)cw*yuf@!z;reWaS>yjI&xVC$LOa)n4hHBUXrq{=afrF}evzKo;nI7L^YRNWUp5O%(GXeLyfgRhJO2fpbF7LA zRlTMySn0*I;KJVcXJsa968E~!yqHnm zv{@1_O@gmUW@#OrkNtaWeXBPeJW_YI=ym6uF#zYC8a*?1z z&%C&dMZ8u`5SpE_$RxeU?<|94o0IYNHK#t?EnZZtFl)-bfXx34SpjW=zrL$IdA~zk zZgqa9E|-_EYNZU4!!IRa-jc`s~; zwZC*l{#BjUW~rmU7kplGe6f}K3ahfk>YhneJF`ou?iIVb+NK+;F`Yt%HF8<@ zClw=?Hpys3T271$b==#0&*q`;)w}85?w(igM$RZrtDkf>0kXHdAV`>_dH zj&`!#T|!a|-dv15_))mvQ5wIfrP;1#zZZ@l}x=+PlPdBtUe{MyEq@r)y zY1g9UQ^%*IeBD`o^Yx}Ko4smH{vn&3RT}G#{o>@1d2=9U_6I%owx_futyYOD~%!*87o}F2WQ9qn(XHLoptH=_&HN$8l z%Z>M?=CNx#X6)pOS<07O;5>^fI(!MUcLbBU8Zd9*vJG|1Zs^G_hC&uh4v+m6OS+i&}=TY@D zM;5m~zI00G8fR$v-HrmA1A=etKhCv@wSM&7wZduRGh=<W%rva>oiS$f3CdzEO6#@L*KOJEe6|mx%2+vpI~UCE`FSm z`Lo2GzzsFa(j1-?J1}j0cxO}9oqG-8;i)EHgE{t{mkKfb&+u3*sQSL=2KCv|+pHPi z1zKtpPl>r4b-C(qzP!uhR5hP-@4RkUU);L?Gt-N_)n|;}OkA<@)>I|Wdj}V>bl-Sa zFBA4LN2&Vr!(V^9UHZ%qF7=%wwm69G;v1dgi$6!b={A3*&;R-}-|`nTG%qdMC9|n4 z=I}BP^)sSZR=Ga-w$l6fvHiMza{}r=8!gdEG~EAi#!`Vv&Wm!{Vpi=4Fg=yMxxJ5H zeT$J`=k(@BnnD{@Cd_$IUcCRxsYrRTPp9=07DgO0T60a!@>txnm`h8iROZU8SSvM& z%h+nRIRkTYVe#kgIHg6KI#=-=TVQQbkhxUr+=Cm(U5+P;um|#{ME*G8&6p^&DUADZWik*ug;`QIj85OI_^ntn}AZzN~1!Hs!bgU z(T5g?=YPF4k+VW_j-8uanXH!vyCy?}k;X=~)=zcQvuu-$?3SJl+tsdqCUxl~FWZof zkq!atN=38tuB%Uen|D}BMoIO&%gF|Lf#RLYfl|nPq zYrGEY@CHZEdKP@?Kg0Bc0+U>(RL)B_{G!x$W0Hrt+}#bFny01(lv=ax?s~cOens1c z4T4G$mYoS#(suvK=h^gc*%{+I*LJOa`J`=P!bHI^r_)b2$1GZ)T6THo);F~q^#!MD zct)B_F{bCu&rSGoPh6i@Qd~ z@V0<+PgijNoV=p<_o2ASCMI)zugJPf1zvZYoK|X?SJnGWEk4#(`QpEO2HrO1tGfd> zmTGNc$Z{y$Ais+org}x8S1I6OlakjlGXlL`}mHn;I&Jo^S!S8 zxOmy%%U|=(662t!vYFF$G&Wpg*7`f!+<`+wB{%SdyKa)>HUpXLmgQ}af)9uBKY9Kv zWNFO8wAE8GjxauQ*c>#ie^#~g&5MT%_y5`2r)}Z7H2bjbU58Z5C*q=-Jc0ii#P8Yk z|I;_}$h1E)y>qR<_jLDY`@oa&GRxBRlrk1+u&vJQ5qOY$Zv8U-{XB;E|4!!;TdJ{C zGIaZqjKfoJd%KtXVLHCHw(p=?qNU4(*$gtGXn?Y*#VU`_a!m zwa3w8(SZ*uOe;0&7RF}jaWsGQJJ#b6r{s9ja?{0|PWKe_&P6Tgy)|)d#lab=hZN_B z1*CpHAL;J@&8n^YI?J@6R@-jY&BYrwib*`Z+c(Kx+nn#8ipE~w<{h7^>rJ1_w?|~I zDVQ|H!D;FYTaJy@yQKr-ruS5vR;~=2ZrIkXw`z%)(aV5T7ir5+?_3&=05O0t(V~1xbGP+VpLkwCM^|Mm^*6mWli!_M64|!eVBkGwNJsQ$1Zc-q7nD^&#ansOqnm)wxQ6#G(Qd8o6i=d9AXtWukf$W>_s zc-9uIy{V&T#;{)Q-h&5wEWN`7B@b`lKXLKblKdY!SI$(y3rw<_@$a?UtJ@|y46M~Rq6>6=%~@Jivl({pSnFEC94ImG zLctc_X)_y7uMiAVxO-5P?M>aePj_7}+tyzTD*4Y4<2vc=mB^S+&mTZKGC3PmN1b*?YVK7aFN_NMLACT6}~u`H}-Rm|=q2O3^T zI|Lm%?wMbc!KdbPQe~;5N0q>_Z9HEJo;GfYJb%#hsMp5{o2FWcNL)McChCFsYUj9R zU5nRlo^xIMMVHu_U3FH8^Fl5MEi`fzo02T#!naaPTgZv|rPGs0Pk{h?wMMO?8T(?6 z-kh^BFpDWKGw8O=BS<}~-<O?Kwd+`a=NX?fP8X(qxX{W}%)~yKN$%aFUT5j@PSN1>ooW7$dahh@ z48J0<w*3|9qvG!t zZ_^g(|F_hGi`n3ehDX7J{WFrE3L8!eD-;$u$vI=n`m@dlR&mQWT&{f(n>_7l@zeJ+ zeNA&#uFVRPYG<()`Q_nmCb*%f_uO3~aC zf3=>^sCiUcCETy2r8dSlZByHAbP5gj$D z8b-#t2VG-(IrRRlxxTgeh~Cu)N8T85TB>^|O|4%w>FHTsjZ?K+*L$BI_jwf0SLLkx zLHW>%X^S;aW+%Sdx9$~J(rWeabRqq}JD6@A73DB;<=UulDt*?ywHxP7pV0T?W1OOo z%ZGr*u<4p+UjDx|dMBlnje_N0C_hp5u`D}XPHO@5$P$9eJ_j(e}l={xht&z3fuNpIud5TP9`29w@D9?GST_xEo!|>XB=H zwLeMaQQA_iLyS=yEj@J?hHsEM7T*1ExLFuv5yFJ+0cg{gK4{Qzt#9q&6AdO5{$}SgATi)}hP7MCD4y=Q-*IOWL%4 z71W)5HfPf!l{+Uz+Jkl9L>OGIp1Ae6a!zjAxoHb7GjDaROTJ{~bYy1B>Db#A)*=^V zcU_W9mt6ex!nM?rW9wF{Wi46Vvgx>l*NeSU>zB@0;67D{t0^T+`2w4ox5GlM(kWN2 z-q_!AJvPts$QwRh;}EsAEyon@{16S>!RNttF6+rG``h*>9Y%%j&9 z2|$Bgqft?tsCr#?k;h3Zt5w5RjFiObGtRiEeiKDkrd>5`{duvDe} z{kKx<&Obc0^cmA+iMCt+4*r|vdT!pzO?`7;Yh`TqQd_-#sVT2VbNpnkiHXY0NfD-B z-i13^D0C;pXkJtGH9F13yi#e~&BK=0eUql|65ePO5)c)cbF8m4p+x%DQWf!z`#fFSZCOV4aL#q=|Ju1rAx)Q9Uk=Bb80+nExg{p=dgu4EZeQL#xGi5D(vnY6t* zb)ZdfQc2a`14pNEv&>Yt&e5!^PF+&%9NIiGrk2zD{@Pwy<_^>o4Y;moWe(em6uabc`{ykRW+&cTbaz` z_*+YTgr1(9a@c65|G%bH`9=qS9{YN6bKj1aR{Q2onh;jX za=v#(AvR~K$OZ<1{|x)*P3#K0x->lC_|&JnmMrcJe8s#$^={rL;f)M2%=;7c zHyml*|MXt^k>v{AVU7J$@(XSh&C#4|)unfKFkM4hl zToqTQpOf8YcLqJ^6fm;-`*jCj!b8<%5$uMFou^ja*~y<6`_EtEYQfLDFM>qp1?o@v zAPh^%s6CB>zZUt1n;j^Q&7C!iwGY z9=rOcPI5J<6y3NsK;_k!?sE?#WDhdD({KJK|9HwientJc8UGn%^Fo>epDws#%l1M3 zuuh^3SIo|K%fpj3?%s>kxV27*llM%VZk$wb>d*P@a|<6HjjOk1bw2Jn`)PjI8l|fe zYHH2}JWakbaj&&kZWKFWr+fe5#!Vj`0uz=x1btDtc!2575ZT^b##(Zf@GRc*jSP6H_Gp96p}$R6fJaTqfA{ zB`)7HI8@EwRQRc-_F1`AmFksE=F`=uxa66s6OZ~kObt5A5w@Z83FFb( z`zAe+`z~)`GV@_Y$x>%S%lLV7^H~ok2^DBewAgip!DJ=p@(UJ)8Ozf}BU@c7&&-mF zS{1P{e1=xH%aIN2fp?=X0{Hy2uP+9_<|$`4EC%IHp#n)u*xAx^CS2hM`^nXugR8scc$}Ld# z-1bNM!O7Yi=a{p1da4=;MSfa(Qj2S8?CI-a%Qhai|B>==vRWBCbK9IvLJL+l@N++( zv9hl;)I&d6QZ0GeQ7Z}gkyrRnA?98rc^Tku+G+(A>JaK){ zbUMkA?F_$lb75Q3jxu>+-H4nhVY8s4L0OZx&&{&@EI8|t?5snDI#wTc+)T;})5$XD z-B{J6rN^Nw*ZFeK^@H)^pI>g4IktTH9>I@}6O&i0XfqV||0>dx|2slJc)Fj%e}3r3D$G0g(ND1DKf?>IjS=B?tYJCZ{xkf_vi}~uN%S$}rxlysl^#drT`bU1 zm^5Ke8SiP!8kYr@^MAjP*62J?qZMtE7N=LjmVMDS{fbECyujHhmXlo8KM8SNI>~G^ zTM#GT64S#gwGK3K%A9>uF7Ic}GIP?kso}M*9187cPqm1%nayo|Yco&v@RKvs4UGlj zk8n4++8kUgGOOu*curea#I3AMZS_^l*G9%n2(X`-yt=3&`^KGrbH7)9(zeo_wjkq6 z?>S{YCk_u*^<0z1^@58-zVs!}e0sHXOGiqU#I;DxKj&|GOE22ABlaZs`r-rc?@N9u zOjKz!JQU)=?%g&TlDvbXLi>iY1zqPHXXxl`d^%-g0H@&7 zvfQXIjZ5CY^Hi8!pqtozCdpOY{K$m+*8DsEK6>7n@R@yHv4F$Hb^jR@1Lcqp_`W-UmM5%P(9#jt^6g~q@trsR-R=`o%+<2Q?FBLb+~Xyh_-fq zfu>UQO18JzH*QPsQ|ydWm)7z34B4udD7|E{hF2qhP{^+)lg_=HA70*g$H~3wp{1IS zCm)x`iI#|~pI2;L%-)q+%CYeE)YGe_I%k`oisMk?T)5|0UvYZun~+|GJNNU-kMKzu zC!}BaA@VLeX?mXfiJ*^_jYq!E;QJQ6Wo2L7mJ(~(-Y*Fs^gsN*W>j*&GgdQl^1k#~ z|9LJy{mMNpc&>_sYA$j~viop#>Ei6zl->%3p4hVOtzJh9PX4G|#iGKxpqkK+E-abd);<5n# zPLPUfO!&{hwRy@)n-0ag9=a?!oAOy?+Gkb4{AOpLNvC(a_sx}zy)^S_)lq-V#b-Yq^xLd6>p|46)T=s$ z{mJXpnG4IRrFl|*+aGb$iZ~Z@I(Retsa4GHI81I$^!!nNeCgv2lb-4PShDu^*ObVV z$xj3P72OS=YQ>z^jL2GWw&=xX&J-a>$#jK_CreW&M#y_Ja6ITX=FZ<9>c^X)Iwfeq z(U)~d)RXHB*ZWMGxjEP{PRLO-mC3FBV*lDj*Bic7B?t#?o~W3xq&aW~llyA( z&w@LbmCkw+s~YXMbBWL;Z>Mgbgx)=~j+SkkA9~8j(mb|n>g_aUwmGSO@x44t?w+0< z(6+<+^)sa~{&tU}$&-zhHMR8KyiBYzQ1rZZMCyv0mtTsern1I{qX8>t=xy0J{d8G> zlzgq~`|OwnGLKGL>=(@nuJhA=nLpd@p^`^b_fG$YZGDW%ug*UESoTs;KhS;liJ+`m zFBb1Dot|;Z_Q0Di{;1sX!`-0v zOZW7fT3K4!FJA<0HkffhN71e5^cq>`$;oHW@6%qkR%pu7b728*Uthe?RD0iI<(igt z$tw1W%eMV=d;YL3eqMS(`aiLBlhB{xanE)dKeM&0X~W<_1iQfH_@UWN2anS zrk>%LwR-ukc}q?QPY#=EygRy!UtpubMXN`PmM<(i^ZclVRsE_>oS`pYEc>0}J?Dt& zmYfT#el3v^3*WF|hxPmAM;;h_-sryDYsr~~+RT5_a}Lb)Fl9V^Z=z-ltLE|5{D(wV z1~ZHDEIDOz##rpbC#CgnU1#l`FO)oU^}HIXA9Ox`;;hH9)1I4@yxD4ES{Kz4$z_nS zRDti&F$LFkj-D1XKW)^S-r?bO>q*Zl-);*<&nw(lnsl!H=(}*{=p*sgx$0VWtr|lder#z|9nN&7O00Q{0o6g4Ri5>fZBPqb4jeTCs$=YRRT%K_{E7{Z3!b#^3s- zHGR28Rh-qDNm5no7M;l4Z2Yb3){jM*jC~VUt+*(edBfx5-7h%}vTpW*A@jc5Tz#f# z8L~i9cX8J6si9O{+2B<6N|mNx-AD2QXE?j+viZ)jHCTGa`=IvHufgZF{+*d? z{W8^h-Z{2EkLraEugnZO8ZztdQSR+`cvqfMNiJA5L)^~!*|a0PT#l=Y*$#)yXlT~X z@?5R)WbT7?oq_5svRS8Gr`%6m>9(L;`hE6WrI0&MRm97`U4LF()Scs}y~itrz2$aR zf0b0eh*bS{#%Z-X17H3*wnxz7GbhWo3415R#O@GRu8BzZ3W^Z;Qr)jsH7{`StW=pp z;YYn@=e%EOmHkxw%%s^SuQXnKvUnwxk*T>V>&44kWmyrOJ^#)Ie2VN$db?58{n~!P z4v$owfLU#}`!x@?VSoDbW^L z@?pa=?qKbBw&}||8u%7iT+_cNo|b#{uI2Eg4IbL#G zogj2fnc=cw{Id!2N6u}@;JTb-9>BjRJ^V=F9nFmC$0w*goa<rFF!{N&l2rdq0A)?wXkJwZA<>-;I%Yu9{?OwNifZsiMqGGT@1 zR23)D-UbFHl_1ak%l8)+bT)b#9oTe!xnA7`9yv+d{|sf$bAG)D%6=!gWlh8;ll2Qf z+-o;24?FYn!oC0z_KZd4u}+TXr>%=}$lW-z@y31r!)Js;++VUx`ZagY`3tV;LLocn zga!#os{A;+iZRW4g5=hOYd!0aX!&fLZ?I_LX=9G;po(kDmaTlIlQOeWb}zo`<+hr6o^^r zyyKg~?c3W-W=whR85BA9(2ANpJC7aiPMm1xwJAjF_{8U*S)xp3?yq|NE3kNFQQpF{ zD>j|q>>BFJVp_Cn<>dbiJ=}lqHeSll7QFGuXk+nn$pitf6`sl2cT967?x?d(?J>G+ z7vyzk$)pKiuK(ou?W(NvQQvsY(pmf0YZR^9IOW(y*XK@CzTOMF^(EA0VdK6Drfgf! z9m#Dxcj&}i>pj!@UZ|~`#cuXt(h>gLWZ#}QTej$UJUkV$fcwr$h8HhXdhd387Vq{; zj=yL*&DHzLmEbF>A|@L4$x%zQx|9VL&eWB8v;Fcj_cUp?9&TY~-sV|iDJSAeCI+X8 ztFF2iD(&hs$6R1)heha*E1L^H>`YJps3N^-Wm(`Fv&Bo!71>-{zr1+PY?G5(DekMY zxK`YfOwd^r5$$>Rn#T>R4N@GbS{~Vty1UL#w&qDs>Eh$S&8T42Ua2!HYe5E&~<31}sgB2Bq|>1bmK)s`VJzP26z**`q(r`xCq78QK~>Nz?b<{F>+SPt}K_v%N0Q54&Y{ zZdQl)wSTPPp`5X^R~hZHeEjK)^&ZoY)3n@G^rr4Nzwzx@^p@f)>v)f(p0@v0Y|Atu zI$CPJ#(~1fr8f2*r``WET%55Wu4koJ`jfApI`cn@M%?`VqwV8yyG7Hyvb4^vJ^M=K z^E10d=aBbl?^o1X-WJ+zv01-4;c4@1otc>h5;Y9bS;14n{|P+olipNg&Tn8fSt)== z<0nI*)c(H6V~!I~KAhhaxpGm~yy-7Hp08fDRM{(C^vJahP9-MY>$eW$?k)86LH-^VX#9yJP@*|3B6cg^Cf zawcE(UB0%5l)6l@G(5ffrRpSp%j69!Q=>N<-Jd->=Kap+lJ5i$hc_Qymb2An=~mv` zzPDyYU$a=BXb~=OAW@}rvazCr*bVWtFPCgCPI@MCW$NTBwG*YLcB?H@ne?AQ_Sl2u zjDTiMuYixn{mbuJ-81J}Iq4JAky^WtZ{()Gw*P#u@08KIRr<5Ptcl;q^JHsSyvV#| zCVlR2j$ZBTxGQn%@jlCA=FL+dUt43}R<5NoaZ&p#QPCf*_dOoUgg5l)S*85pIB6ND z_E%eT!8y6*seumJOH%G>ROHY6;_E6}!|OS#WNN9_)u)kDgaoD*NcZ}(GQ!|uShTb+P zSz+2?mk=eJvh&T&5)sLxI-JG_{T-qn@al*@-tnFN^v2)<*Z1dm?YSmR^=4~XHM@yv z_Qe+^_Y?~GMATPCq>2@7`&3Z(MX|&1%(7{VE!UjUQtX?mGE+vR*FF3^V-9nOIq# z1|2STOn>Ti%g*gT!-M-BYZ46qSSDWSak^>7GRbo*L;h5TEfxC9g)D0H&-~h~&o+0_ z`Gh5I8f)3MXR{@KP3}4SpW(gc z0soaDn--s(!r-fMKzy(N7Vf2+&m1Vt_bvUKQYsqjES8nlr#?Ysvd5mxvrQBHZ3Oq9 zonJIjFHQ6hwovujK&0-F#Ym=5Q`K)zk z--@(Pda+?GZWm6SQ3+MPk=+w_?RM|l&K(L%5>FX2Xrz2(4p?BLQnNqflBdho4^QhV z6+KTFH-|Hb%wL#z&UUU{vi*-mABtCfjX z9d=r!`JU*PncaDE@&Ut7>L((@grycS|C`FDG;3GHEaPQY`{xzRF!`pw#mJ!cSq;BH z(5V_;bz4y0o%&QY%xbs)`2Ve} zV4HCHS#B^}e7Mb&uSQFIW^prkY_AWR_~rY-qKTS88@>j)C72WjynnjvST?L|{m(G*bN|-QtWVVgf6MJ#WMVh% z9AEmyoh?Sak5&fF&)9v_(alEo?2dgJJKALaGh7K`Sk-8K zE6;w%yTXV}V-v%pOSNBKOHuhC_TUDegrh>pDSNw#i|(joK@Q3z;IbouyM(xJkFgvj;^_|)W_%L(ivt~ z6tvd7%ZvNvbmGX_BaeB$3J7sJFdk-{w!D;m$I|9F<9(~!7Bwp@TfAhF*@;)HT-I{) zhuXa03=h5?G_gQWZ}Ds%-^o4NGw=HGEU8g1SQrrPkY)3|;@XFTsgHb){%5%3kWlWq z!+IlY=Y^jKR;Ei%O?Q5>N3O7%bs`q#r zYX9|YE{m;$Ro-r0uT|Sucb~OOpTIpeR!mD)I()m9gw=BXnn$@h&t*WSIW5PlQF}5Fhl17CdbK7SuHe@yupLzDuPv^S|7E&GhJP$pN z7p(DP7TNbzMK9>kDh5rjP3)o!+K==?-eoB}tX;hH70z7PVGuJ|FmneTA#!X zy};l|jr3NAJ8qL417)`}Foiulx|#E+WDv`xSF@E5E%HCoV51kcw8Q#+)>GA_>n(@> zGhA`hnyn!j%$YBh%EZ+Frz>RA_j?QW#a^20JEJ0d#pKVYZ|y4QCal@9zE>qA z>zdX5_$AVt=eP@6o1BTr>{O9;nz`y+M%m4m;!!#0qTa1uS7~?G^V-k+{|rT$%*Wo& zyX?-iTfA05)uY>c^K4Fs0*?ur3Fi)<`ZK+v?cJ1vj{OajZcdVac-CiET7l$|^z;Xt z9!zN2wBd7&LI`Kdip}gQ3syDMHJ#y-T{A`Ux%-#rv3i;R>aUx8J@GGEXMbHVaLfEDa#Tay(2pmJAQZ+Jh<+VG++4e#vl8{Qf3;R@+kM3 z(#kaH=ZXi@`IojPD}5B#^PBRKeTnvehF4j7OG;Lngl0_nbmW89&+@LY#NYldZx%LY zGv+ZI-Y_|BUC`#!C#I_2Jy;z+efv4H*a^pa^10@;dKkT&Y3f;CJX5E})O#ay?E}l@ zPG3v4&(FM?`td)*^aGzRtabKlomJ7z5fhRHgbOQyXK^{^c;BR`NXrJrsL-&#m`(`Z#JteI`=wh$*R?J zre)2|U7GC1`D9VI#1}aZwm)~GD=R8H-t$VfA6{_v*!$IgzD!oiS-E3!>m(1al4sd7 z=Ei54&Qh&0J?rb2&6n(}bLDGiyiBOq?=>q-t6cXURFFAGX3iV)l+6W=sx~r+XjTT8p4Pn_^lRmx#q;aL%9`>8k36b3{vbND z==*7vrOutH@#o}y+~L?YWhdXU z`y2k%DzE!k$5D7AbB4=ifvcOZt~@p2^N}<2Cab>UIn}Nkt{odHvtZ+{os}#nPMKXY zIkD(=)RCm!ZB|PKO|CNZZ!X@#zVVM^MURrW`F{o<^9`pQ@7>$p?@*DjQ)G&4^rAhf z*_o`U*9{RT7+p*bV zOTX7X*&rkM^0fOu<>iH(+BT-H49vQo0Z)$IdEucR-Z|&I`q~pKi@HwDdTDg3!|twW zYz#|#_;24wDVOs2=JIgd%`0c;Y(6IWc^RkTsgCpwDed3X=WDr!2}|qMoSbxN&PhwA zpqv^x^}0L`^QA}fx0-DFactx9_<%3_OfFsux$Yb*5F+_kq;PJww$_TM9U<(I5{(K@ z5={i|A`e@mfyO zS>)`O$qBZR;epyEfLppyFF#T;fj94CnanitBW~jxT;+1S)C{O z=irL&`1|{Fe2zS-_i}EGe}0AI^^BiCLsZqjT)#NO_0p=o$X)V!I#brLD9y}W;UT+~ zLBCeD)M;+xo%bCQI$2E5c6D&-l`fridty?ub=$ltA2}OrPhOm1zCy?EUx1d)w?!UB zZg!=1vF=md-DOf*BrL;`zLB2%vdMavMPCc)b$xZ z=eOUexieA3^3lpyJ(tbS#CdPE@eeyyU!l4~rZ~y%w(Q;wIc16(0h7Gdu3X6e+{@Ft zm{V2gz)t2XU+O+CcAxybcjB|3Uxb`xL{4m4KV4JU!IW1|>hOPtgq-|M6Wbqde8DYI zvM;)5gQ@)eYevgE6YGt73{_Ulb>({;Z@Z;iul7-V;Pc8lXZ4t;S7NiLw?(Xc^+ZYM zSi`CncA*h}7krs&6|qYoLR30RXN{PkV)itity=5s!;X6X@XbztE?D88`r*=ZAAi2c zNxP@qi<)!Uh`;4};MAzS_r#Cb%+oopdusKksIHw=dCTT`X8yb8e#>S??3Be53!=<) zw!be-I*@Zm&g4JCZN)2Jl^XWO*VUhiEX%4|ayH74fpewe?T4k@jmr|_q8eg9-pV@p zbY9iTh_28XE)yp3_D_@kq)`2zLAYCUwe0n>mUgD?_ZQvzd2CS^Sj z&ZD~WQH@c|LbEi1RNcS?himg!T@}9~alG)|iPh})3i-s&8|v&V6J`)mxM}Rnvnoyb z&~oq8FS%vq*J?HY$?r*gnNd9Rm-otF_A86-{F@WBXiwc@z36!6ttTeR*ebWCrqpYf z2KV{iyy9WIaLUrfMukGl6rQatF}X6|W@+i(hP6As$+vxYlb&T@v2)Uqn-M#MQ^gB1 zr`0TFOXBi~C~)-4(&$Kkq*gqkLZpmY>P=n5UXm zI&IR@9L})Lx1|YZN;Yhraqr0NNUy_^#XX&klh$^!*sYWQ-Z%U3lgWC0LaU@UHP&)e zwPjULV7_QvGk?*p!zL$R{3@IIOMdm7`5P}SK4}&n^=nB#t6woK zdpK#q(lmzY?2q{LL^R6RCjDn$vY1cj`zObbTAedy8Ba@D!xgQyQ{s^En@!7= z&Kc`WTFMjI<8#rcr72nKiGYNV#qH#7A2UmDj~Q40nf}hrcrk;=Dc#HG${%x?J0*#w zhbGO}?@8Ud)%cRhBNt)!rTgAZTC+XzWkPsz@_F_&#ZBqLpmRRX;Md8#=5xuf@(fK0UW|YsweP ztYhzrwOm(EeLD3?w2s0HErGjztLNGzZQfM7#9|-Q%!&{J)ss#kiyRnTIXM|ZR?l1@ z^O*NX@~tOl-yG=?6J(ibk;1Aofk`A_!j#5RknphYc z>v;vZx^u~Ji9yunCuOB3m*^)Ylat%6{nU6@p5rq^YT)2iiKFEn7J64 JT@R)11ONnn*>V5? literal 0 HcmV?d00001 -- GitLab From 9d8779eebdf0e813748fa1b81b975f443f84f73a Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Mon, 26 Mar 2018 10:52:46 -0700 Subject: [PATCH 485/960] [tf.data] Usability improvements to `tf.contrib.data.make_csv_dataset`. PiperOrigin-RevId: 190489086 --- .../contrib/data/python/kernel_tests/BUILD | 2 - .../kernel_tests/reader_dataset_ops_test.py | 268 +++++++++++++++--- tensorflow/contrib/data/python/ops/BUILD | 4 + tensorflow/contrib/data/python/ops/readers.py | 250 +++++++++++++--- 4 files changed, 438 insertions(+), 86 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 8cfe4a727a..d7cc2f14a4 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -294,9 +294,7 @@ py_test( "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:lib", - "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", - "//tensorflow/python:string_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:iterator_ops", "//third_party/py/numpy", diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 699e8e7865..6ee1b572f1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -35,9 +35,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.lib.io import python_io from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops -from tensorflow.python.ops import string_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -568,12 +566,20 @@ class MakeCsvDatasetTest(test.TestCase): dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64, dtypes.string ] COLUMNS = ["col%d" % i for i in range(len(COLUMN_TYPES))] + DEFAULT_VALS = [[], [], [], [], ["NULL"]] + DEFAULTS = [ + constant_op.constant([], dtype=dtypes.int32), + constant_op.constant([], dtype=dtypes.int64), + constant_op.constant([], dtype=dtypes.float32), + constant_op.constant([], dtype=dtypes.float64), + constant_op.constant(["NULL"], dtype=dtypes.string) + ] LABEL = COLUMNS[0] def setUp(self): super(MakeCsvDatasetTest, self).setUp() self._num_files = 2 - self._num_records = 7 + self._num_records = 11 self._test_filenames = self._create_files() def _csv_values(self, fileno, recordno): @@ -588,49 +594,63 @@ class MakeCsvDatasetTest(test.TestCase): def _csv_record(self, fileno, recordno): return ",".join(str(v) for v in self._csv_values(fileno, recordno)) + def _create_file(self, fileno, header=True, comment=True): + fn = os.path.join(self.get_temp_dir(), "csv_file%d.csv" % fileno) + f = open(fn, "w") + if header: + f.write(",".join(self.COLUMNS) + "\n") + for recno in range(self._num_records): + f.write(self._csv_record(fileno, recno) + "\n") + if comment: + f.write("# Some comment goes here. Should be ignored!\n") + f.close() + return fn + def _create_files(self): filenames = [] for i in range(self._num_files): - fn = os.path.join(self.get_temp_dir(), "csv_file%d.csv" % i) - filenames.append(fn) - f = open(fn, "w") - f.write(",".join(self.COLUMNS) + "\n") # header line - for j in range(self._num_records): - f.write(self._csv_record(i, j) + "\n") - f.write("# Some comment goes here. Should be ignored!\n") - f.close() + filenames.append(self._create_file(i)) return filenames - def _make_csv_dataset(self, - filenames, - defaults, - label_key=LABEL, - batch_size=1, - num_epochs=1, - shuffle=False, - shuffle_seed=None): + def _make_csv_dataset( + self, + filenames, + defaults, + column_names=COLUMNS, + label_name=LABEL, + batch_size=1, + num_epochs=1, + shuffle=False, + shuffle_seed=None, + header=True, + comment="#", + na_value="", + default_float_type=dtypes.float32, + ): return readers.make_csv_dataset( filenames, - column_keys=self.COLUMNS, - column_defaults=defaults, - label_key=label_key, batch_size=batch_size, + column_names=column_names, + column_defaults=defaults, + label_name=label_name, num_epochs=num_epochs, shuffle=shuffle, shuffle_seed=shuffle_seed, - skip=1, - filter_fn= - lambda line: math_ops.not_equal(string_ops.substr(line, 0, 1), "#"), + header=header, + comment=comment, + na_value=na_value, + default_float_type=default_float_type, ) - def _next_actual_batch(self, file_indices, batch_size, num_epochs): + def _next_actual_batch(self, file_indices, batch_size, num_epochs, defaults): features = {col: list() for col in self.COLUMNS} for _ in range(num_epochs): for i in file_indices: for j in range(self._num_records): values = self._csv_values(i, j) - if not values[-1]: - values[-1] = "NULL" # null values in csv are interpreted as default + for n, v in enumerate(values): + if v == "": # pylint: disable=g-explicit-bool-comparison + values[n] = defaults[n][0] values[-1] = values[-1].encode("utf-8") # Regroup lists by column instead of row @@ -651,7 +671,8 @@ class MakeCsvDatasetTest(test.TestCase): sess, dataset, file_indices, - label_key=LABEL, + defaults=tuple(DEFAULT_VALS), + label_name=LABEL, batch_size=1, num_epochs=1, ): @@ -659,11 +680,11 @@ class MakeCsvDatasetTest(test.TestCase): get_next = iterator.get_next() for expected_features in self._next_actual_batch(file_indices, batch_size, - num_epochs): + num_epochs, defaults): actual_features = sess.run(get_next) - if label_key is not None: - expected_labels = expected_features.pop(label_key) + if label_name is not None: + expected_labels = expected_features.pop(label_name) # Compare labels self.assertAllEqual(expected_labels, actual_features[1]) actual_features = actual_features[0] # Extract features dict from tuple @@ -676,10 +697,7 @@ class MakeCsvDatasetTest(test.TestCase): sess.run(get_next) def test_make_csv_dataset(self): - defaults = [ - constant_op.constant([], dtype=d) for d in self.COLUMN_TYPES[:-1] - ] - defaults.append(constant_op.constant(["NULL"], dtype=dtypes.string)) + defaults = self.DEFAULTS with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: @@ -705,11 +723,26 @@ class MakeCsvDatasetTest(test.TestCase): self._verify_records( sess, dataset, range(self._num_files), batch_size=2, num_epochs=10) + def test_make_csv_dataset_with_bad_columns(self): + """Tests that exception is raised when input is malformed. + """ + dupe_columns = self.COLUMNS[:-1] + self.COLUMNS[:1] + defaults = self.DEFAULTS + + # Duplicate column names + with self.assertRaises(ValueError): + self._make_csv_dataset( + self._test_filenames, defaults, column_names=dupe_columns) + + # Label key not one of column names + with self.assertRaises(ValueError): + self._make_csv_dataset( + self._test_filenames, defaults, label_name="not_a_real_label") + def test_make_csv_dataset_with_no_label(self): - defaults = [ - constant_op.constant([], dtype=d) for d in self.COLUMN_TYPES[:-1] - ] - defaults.append(constant_op.constant(["NULL"], dtype=dtypes.string)) + """Tests that CSV datasets can be created when no label is specified. + """ + defaults = self.DEFAULTS with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: # Read from both files. Make sure this works with no label key supplied. @@ -718,16 +751,64 @@ class MakeCsvDatasetTest(test.TestCase): defaults, batch_size=2, num_epochs=10, - label_key=None) + label_name=None) self._verify_records( sess, dataset, range(self._num_files), batch_size=2, num_epochs=10, - label_key=None) + label_name=None) + + def test_make_csv_dataset_with_no_comments(self): + """Tests that datasets can be created from CSV files with no header line. + """ + defaults = self.DEFAULTS + file_without_header = self._create_file( + len(self._test_filenames), comment=False) + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + file_without_header, + defaults, + batch_size=2, + num_epochs=10, + comment=None, + ) + self._verify_records( + sess, + dataset, + [len(self._test_filenames)], + batch_size=2, + num_epochs=10, + ) + + def test_make_csv_dataset_with_no_header(self): + """Tests that datasets can be created from CSV files with no header line. + """ + defaults = self.DEFAULTS + file_without_header = self._create_file( + len(self._test_filenames), header=False) + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + file_without_header, + defaults, + batch_size=2, + num_epochs=10, + header=False, + ) + self._verify_records( + sess, + dataset, + [len(self._test_filenames)], + batch_size=2, + num_epochs=10, + ) def test_make_csv_dataset_with_types(self): + """Tests that defaults can be a dtype instead of a Tensor for required vals. + """ defaults = [d for d in self.COLUMN_TYPES[:-1]] defaults.append(constant_op.constant(["NULL"], dtype=dtypes.string)) with ops.Graph().as_default() as g: @@ -735,10 +816,109 @@ class MakeCsvDatasetTest(test.TestCase): dataset = self._make_csv_dataset(self._test_filenames, defaults) self._verify_records(sess, dataset, range(self._num_files)) + def test_make_csv_dataset_with_no_col_names(self): + """Tests that datasets can be created when column names are not specified. + + In that case, we should infer the column names from the header lines. + """ + defaults = self.DEFAULTS + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + # Read from both files. Exercise the `batch` and `num_epochs` parameters + # of make_csv_dataset and make sure they work. + dataset = self._make_csv_dataset( + self._test_filenames, + defaults, + column_names=None, + batch_size=2, + num_epochs=10) + self._verify_records( + sess, dataset, range(self._num_files), batch_size=2, num_epochs=10) + + def test_make_csv_dataset_type_inference(self): + """Tests that datasets can be created when no defaults are specified. + + In that case, we should infer the types from the first N records. + """ + # Test that it works with standard test files (with comments, header, etc) + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + self._test_filenames, defaults=None, batch_size=2, num_epochs=10) + self._verify_records( + sess, + dataset, + range(self._num_files), + batch_size=2, + num_epochs=10, + defaults=[[], [], [], [], [""]]) + + # Test on a deliberately tricky file + fn = os.path.join(self.get_temp_dir(), "file.csv") + expected_dtypes = [ + dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float32, + dtypes.string, dtypes.string + ] + rows = [[0, 0, 0, "NAN", "", "a"], [1, 2**31 + 1, 2**64, 123, "NAN", ""], + ['"123"', 2, 2**64, 123.4, "NAN", '"cd,efg"']] + expected = [[0, 0, 0, 0, "", "a"], [1, 2**31 + 1, 2**64, 123, "", ""], + [123, 2, 2**64, 123.4, "", "cd,efg"]] + for row in expected: + row[-1] = row[-1].encode("utf-8") # py3 expects byte strings + row[-2] = row[-2].encode("utf-8") # py3 expects byte strings + col_names = ["col%d" % i for i in range(len(expected_dtypes))] + with open(fn, "w") as f: + f.write(",".join(col_names)) + f.write("\n") + for row in rows: + f.write(",".join([str(v) if v else "" for v in row]) + "\n") + + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + fn, + defaults=None, + column_names=None, + batch_size=1, + num_epochs=1, + label_name=None, + na_value="NAN", + default_float_type=dtypes.float32, + ) + features = dataset.make_one_shot_iterator().get_next() + # Check that types match + for i in range(len(expected_dtypes)): + assert features["col%d" % i].dtype == expected_dtypes[i] + for i in range(len(rows)): + assert sess.run(features) == dict(zip(col_names, expected[i])) + + # With float64 as default type for floats + expected_dtypes = [ + dtypes.int32, dtypes.int64, dtypes.float64, dtypes.float64, + dtypes.string, dtypes.string + ] + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + dataset = self._make_csv_dataset( + fn, + defaults=None, + column_names=None, + batch_size=1, + num_epochs=1, + label_name=None, + na_value="NAN", + default_float_type=dtypes.float64, + ) + features = dataset.make_one_shot_iterator().get_next() + # Check that types match + for i in range(len(expected_dtypes)): + assert features["col%d" % i].dtype == expected_dtypes[i] + for i in range(len(rows)): + assert sess.run(features) == dict(zip(col_names, expected[i])) + def test_make_csv_dataset_with_shuffle(self): total_records = self._num_files * self._num_records - defaults = [d for d in self.COLUMN_TYPES[:-1]] - defaults.append(constant_op.constant(["NULL"], dtype=dtypes.string)) + defaults = self.DEFAULTS for batch_size in [1, 2]: with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 4ecf02825f..647620eb84 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -72,14 +72,18 @@ py_library( "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:lib", + "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", "//tensorflow/python:platform", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:string_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:readers", "//tensorflow/python/data/util:nest", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index f70f9c881d..95edca6cdd 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -17,6 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import csv + +import numpy as np + from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import shuffle_ops from tensorflow.python.data.ops import dataset_ops @@ -26,8 +30,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.lib.io import file_io from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import string_ops from tensorflow.python.platform import gfile from tensorflow.python.util import deprecation @@ -35,21 +42,142 @@ _ACCEPTABLE_CSV_TYPES = (dtypes.float32, dtypes.float64, dtypes.int32, dtypes.int64, dtypes.string) +def _is_valid_int32(str_val): + try: + # Checks equality to prevent int32 overflow + return dtypes.int32.as_numpy_dtype(str_val) == dtypes.int64.as_numpy_dtype( + str_val) + except (ValueError, OverflowError): + return False + + +def _is_valid_int64(str_val): + try: + dtypes.int64.as_numpy_dtype(str_val) + return True + except (ValueError, OverflowError): + return False + + +def _is_valid_float(str_val, float_dtype): + try: + return float_dtype.as_numpy_dtype(str_val) < np.inf + except ValueError: + return False + + +def _infer_type(str_val, na_value, prev_type, float_dtype): + """Given a string, infers its tensor type. + + Infers the type of a value by picking the least 'permissive' type possible, + while still allowing the previous type inference for this column to be valid. + + Args: + str_val: String value to infer the type of. + na_value: Additional string to recognize as a NA/NaN CSV value. + prev_type: Type previously inferred based on values of this column that + we've seen up till now. + float_dtype: Either `tf.float32` or `tf.float64`. Denotes what float type + to parse float strings as. + Returns: + Inferred dtype. + """ + if str_val in ("", na_value): + return prev_type + + if _is_valid_int32(str_val) and prev_type in (None, dtypes.int32): + return dtypes.int32 + + if _is_valid_int64(str_val) and prev_type in (None, dtypes.int32, + dtypes.int64): + return dtypes.int64 + + if _is_valid_float(str_val, float_dtype) and prev_type != dtypes.string: + return float_dtype + + return dtypes.string + + +def _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header, + comment): + for fn in filenames: + with file_io.FileIO(fn, "r") as f: + rdr = csv.reader( + f, + delimiter=field_delim, + quoting=csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE) + if header: + next(rdr) # Skip header lines + + for csv_row in rdr: + if comment is not None and csv_row[0].startswith(comment): + continue # Skip comment lines + + if len(csv_row) != num_cols: + raise ValueError( + "Problem inferring types: CSV row has different number of fields " + "than expected.") + yield csv_row + + +def _infer_column_defaults(filenames, num_cols, field_delim, use_quote_delim, + na_value, header, comment, float_dtype, + rows_for_inference): + """Infers column types from the first N valid CSV records of files.""" + inferred_types = [None] * num_cols + + for rows_read, csv_row in enumerate( + _next_csv_row(filenames, num_cols, field_delim, use_quote_delim, header, + comment)): + if rows_for_inference is not None and rows_read >= rows_for_inference: + break + for i, str_val in enumerate(csv_row): + inferred_types[i] = _infer_type(str_val, na_value, inferred_types[i], + float_dtype) + + # Replace None's with a default type + inferred_types = [t or dtypes.string for t in inferred_types] + # Default to 0 or '' for null values + return [ + constant_op.constant([0 if t is not dtypes.string else ""], dtype=t) + for t in inferred_types + ] + + +def _infer_column_names(filenames, field_delim, use_quote_delim): + """Infers column names from first rows of files.""" + csv_kwargs = { + "delimiter": field_delim, + "quoting": csv.QUOTE_MINIMAL if use_quote_delim else csv.QUOTE_NONE + } + with file_io.FileIO(filenames[0], "r") as f: + column_names = next(csv.reader(f, **csv_kwargs)) + + for name in filenames[1:]: + with file_io.FileIO(name, "r") as f: + if next(csv.reader(f, **csv_kwargs)) != column_names: + raise ValueError("Files have different column names in the header row.") + return column_names + + def make_csv_dataset( file_pattern, batch_size, - column_keys, - column_defaults, - label_key=None, + column_names=None, + column_defaults=None, + label_name=None, field_delim=",", use_quote_delim=True, - skip=0, - filter_fn=None, + na_value="", + header=True, + comment=None, num_epochs=None, shuffle=True, shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=1, + default_float_type=dtypes.float32, + num_rows_for_inference=100, ): """Reads CSV files into a dataset. @@ -63,27 +191,36 @@ def make_csv_dataset( records. See @{tf.gfile.Glob} for pattern rules. batch_size: An int representing the number of consecutive elements of this dataset to combine in a single batch. - column_keys: A list of strings that corresponds to the CSV columns, in - order. One per column of the input record. - column_defaults: A list of default values for the CSV fields. One item per - column of the input record. Each item in the list is either one of the - following dtypes: float32, float64, int32, int64, or string, or a - `Tensor` with one of the aforementioned types. One item per column of - the input record, with either scalar default value for that column if it - is required, or, if the column is required, an empty `Tensor` or a dtype. - label_key: A optional string corresponding to the label column. If provided, - the data for this column is returned as a separate `Tensor` from the - features dictionary, so that the dataset complies with the format expected - by a `tf.Estimator.train` or `tf.Estimator.evaluate` input function. + column_names: An optional list of strings that corresponds to the CSV + columns, in order. One per column of the input record. If this is not + provided, infers the column names from the first row of the records. + These names will be the keys of the features dict of each dataset element. + column_defaults: A optional list of default values for the CSV fields. One + item per column of the input record. Each item in the list is either a + valid CSV dtype (float32, float64, int32, int64, or string), or a + `Tensor` with one of the aforementioned types. The tensor can either be + a scalar default value (if the column is optional), or an empty tensor (if + the column is required). If a dtype is provided instead of a tensor, the + column is also treated as required. If this list is not provided, tries + to infer types based on reading the first num_rows_for_inference rows of + files specified, and assumes all columns are optional, defaulting to `0` + for numeric values and `""` for string values. + label_name: A optional string corresponding to the label column. If + provided, the data for this column is returned as a separate `Tensor` from + the features dictionary, so that the dataset complies with the format + expected by a `tf.Estimator.train` or `tf.Estimator.evaluate` input + function. field_delim: An optional `string`. Defaults to `","`. Char delimiter to separate fields in a record. use_quote_delim: An optional bool. Defaults to `True`. If false, treats double quotation marks as regular characters inside of the string fields. - skip: An integer that corresponds to the number of lines to skip at the - head of each CSV file. Defaults to 0. - filter_fn: A callable function that takes in a CSV string and returns a - boolean that corresponds to whether the record should be included. If - None, does not filter records. + na_value: Additional string to recognize as NA/NaN. + header: A bool that indicates whether the first rows of provided CSV files + correspond to header lines with column names, and should not be included + in the data. + comment: An optional character string that marks lines that should not be + parsed as csv records. If this is provided, all lines that start with + this character will not be parsed. num_epochs: An int specifying the number of times this dataset is repeated. If None, cycles through the dataset forever. shuffle: A bool that indicates whether the input should be shuffled. @@ -94,50 +231,83 @@ def make_csv_dataset( prefetch_buffer_size: An int specifying the number of feature batches to prefetch for performance improvement. Recommended value is the number of batches consumed per training step. + default_float_type: Either `tf.float32` or `tf.float64`. If defaults are + not provided, float-like strings are interpreted to be this type. + num_rows_for_inference: Number of rows of a file to use for type inference + if record_defaults is not provided. If None, reads all the rows of all + the files. Defaults to 100. Returns: A dataset, where each element is a (features, labels) tuple that corresponds to a batch of `batch_size` CSV rows. The features dictionary maps feature column names to `Tensor`s containing the corresponding column data, and labels is a `Tensor` containing the column data for the label column - specified by `label_key`. + specified by `label_name`. + + Raises: + ValueError: If any of the arguments is malformed. """ - filenames = _get_file_names(file_pattern, False) - column_defaults = [ - constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x - for x in column_defaults - ] + filenames = _get_file_names(file_pattern, shuffle) + if comment is not None and len(comment) != 1: + raise ValueError("`comment` arg must be a single-character string or None") + + # Clean arguments; figure out column names and defaults + if column_names is None: + if not header: + raise ValueError("Cannot infer column names without a header line.") + # If column names are not provided, infer from the header lines + column_names = _infer_column_names(filenames, field_delim, use_quote_delim) + if len(column_names) != len(set(column_names)): + raise ValueError("Cannot have duplicate column names.") + + if column_defaults is not None: + column_defaults = [ + constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x + for x in column_defaults + ] + else: + # If column defaults are not provided, infer from records at graph + # construction time + column_defaults = _infer_column_defaults( + filenames, len(column_names), field_delim, use_quote_delim, na_value, + header, comment, default_float_type, num_rows_for_inference) dataset = dataset_ops.Dataset.from_tensor_slices(filenames) - if label_key is not None: - assert label_key in column_keys + if label_name is not None and label_name not in column_names: + raise ValueError("`label_name` provided must be one of the columns.") + + # Define map and filter functions + def filter_fn(line): + return math_ops.not_equal(string_ops.substr(line, 0, 1), comment) def filename_to_dataset(filename): ds = core_readers.TextLineDataset(filename) - if skip > 0: - ds = ds.skip(skip) - if filter_fn is not None: + if header: + ds = ds.skip(1) + if comment is not None: ds = ds.filter(filter_fn) return ds def decode_csv(line): - """Decodes csv line into features. + """Decodes CSV line into features. Args: line: String tensor corresponding to one csv record. Returns: A dictionary of feature names to values for that particular record. If - label_key is provided, extracts the label feature to be returned as the + label_name is provided, extracts the label feature to be returned as the second element of the tuple. """ columns = parsing_ops.decode_csv( line, column_defaults, field_delim=field_delim, - use_quote_delim=use_quote_delim) - features = dict(zip(column_keys, columns)) - if label_key is not None: - label = features.pop(label_key) + use_quote_delim=use_quote_delim, + na_value=na_value, + ) + features = dict(zip(column_names, columns)) + if label_name is not None: + label = features.pop(label_name) return features, label return features @@ -287,7 +457,7 @@ def make_batched_features_dataset(file_pattern, lambda x: parsing_ops.parse_example(x, features), num_parallel_calls=parser_num_threads) - # TODO(rachelim): Add an optional label_key argument for extracting the label + # TODO(rachelim): Add an optional label_name argument for extracting the label # from the features dictionary, to comply with the type expected by the # input_fn to a `tf.Estimator.train` or `tf.Estimator.evaluate` function. dataset = dataset.prefetch(prefetch_buffer_size) -- GitLab From 005b8aa42c273a0152642279d0c57aa9e08ccbe0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 11:05:37 -0700 Subject: [PATCH 486/960] Fixes an issue with calling tf.contrib.seq2seq.dynamic_decode with an extended BasicDecoder which for example returns a tf.contrib.seq2seq.AttentionWrapperState. In this case the internal while-loop fails when trying to store an instance tf.contrib.seq2seq.AttentionWrapperState in the internal TensorArray. PiperOrigin-RevId: 190491787 --- tensorflow/contrib/seq2seq/python/ops/decoder.py | 15 +++++---------- tensorflow/python/ops/rnn.py | 15 ++++++--------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/seq2seq/python/ops/decoder.py b/tensorflow/contrib/seq2seq/python/ops/decoder.py index f14974b9d5..898493662d 100644 --- a/tensorflow/contrib/seq2seq/python/ops/decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/decoder.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn +from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope from tensorflow.python.util import nest @@ -39,6 +40,7 @@ __all__ = ["Decoder", "dynamic_decode"] _transpose_batch_time = rnn._transpose_batch_time # pylint: disable=protected-access +_zero_state_tensors = rnn_cell_impl._zero_state_tensors # pylint: disable=protected-access @six.add_metaclass(abc.ABCMeta) @@ -133,16 +135,8 @@ class Decoder(object): def _create_zero_outputs(size, dtype, batch_size): """Create a zero outputs Tensor structure.""" - def _t(s): - return (s if isinstance(s, ops.Tensor) else constant_op.constant( - tensor_shape.TensorShape(s).as_list(), - dtype=dtypes.int32, - name="zero_suffix_shape")) - def _create(s, d): - return array_ops.zeros( - array_ops.concat( - ([batch_size], _t(s)), axis=0), dtype=d) + return _zero_state_tensors(s, batch_size, d) return nest.map_structure(_create, size, dtype) @@ -212,7 +206,8 @@ def dynamic_decode(decoder, initial_time = constant_op.constant(0, dtype=dtypes.int32) def _shape(batch_size, from_shape): - if not isinstance(from_shape, tensor_shape.TensorShape): + if (not isinstance(from_shape, tensor_shape.TensorShape) or + from_shape.ndims == 0): return tensor_shape.TensorShape(None) else: batch_size = tensor_util.constant_value( diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 42af7f8b27..1dd464d51d 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -49,24 +49,21 @@ _concat = rnn_cell_impl._concat def _transpose_batch_time(x): - """Transpose the batch and time dimensions of a Tensor. + """Transposes the batch and time dimensions of a Tensor. - Retains as much of the static shape information as possible. + If the input tensor has rank < 2 it returns the original tensor. Retains as + much of the static shape information as possible. Args: - x: A tensor of rank 2 or higher. + x: A Tensor. Returns: x transposed along the first two dimensions. - - Raises: - ValueError: if `x` is rank 1 or lower. """ x_static_shape = x.get_shape() if x_static_shape.ndims is not None and x_static_shape.ndims < 2: - raise ValueError( - "Expected input tensor %s to have rank at least 2, but saw shape: %s" % - (x, x_static_shape)) + return x + x_rank = array_ops.rank(x) x_t = array_ops.transpose( x, array_ops.concat( -- GitLab From 7e4432ca4da28621deb20b8f3ce7cec6aa0e8e67 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Mon, 26 Mar 2018 11:15:37 -0700 Subject: [PATCH 487/960] BUGFIX: Fix failure-to-broadcast in Wishart.sample. PiperOrigin-RevId: 190493969 --- .../python/kernel_tests/wishart_test.py | 20 +++++++++++++++++++ .../distributions/python/ops/wishart.py | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py index 9044aa2850..dcecce981f 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py @@ -390,6 +390,26 @@ class WishartCholeskyTest(test.TestCase): chol_scale, dtype=np.int32), validate_args=False) + def testSampleBroadcasts(self): + dims = 2 + batch_shape = [2, 3] + sample_shape = [2, 1] + scale = np.float32([ + [[1., 0.5], + [0.5, 1.]], + [[0.5, 0.25], + [0.25, 0.75]], + ]) + scale = np.reshape(np.concatenate([scale, scale, scale], axis=0), + batch_shape + [dims, dims]) + wishart = distributions.WishartFull(df=5, scale=scale) + x = wishart.sample(sample_shape, seed=42) + with self.test_session() as sess: + x_ = sess.run(x) + expected_shape = sample_shape + batch_shape + [dims, dims] + self.assertAllEqual(expected_shape, x.shape) + self.assertAllEqual(expected_shape, x_.shape) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index e4ac65012b..5a8c94dabf 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -228,9 +228,12 @@ class _WishartLinearOperator(distribution.Distribution): # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) + expanded_df = self.df * array_ops.ones( + self.scale_operator.batch_shape_tensor(), + dtype=self.df.dtype.base_dtype) g = random_ops.random_gamma(shape=[n], alpha=self._multi_gamma_sequence( - 0.5 * self.df, self.dimension), + 0.5 * expanded_df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( -- GitLab From a7588a70a5de8ece6920f4eb8b877104ede898f7 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 26 Mar 2018 11:18:26 -0700 Subject: [PATCH 488/960] tf.GradientTape: Clearly say that tf.while_loop and tf.cond are not supported by tf.GradientTape.gradient() at this time. PiperOrigin-RevId: 190494436 --- tensorflow/python/eager/BUILD | 1 + tensorflow/python/eager/backprop.py | 8 ++++ tensorflow/python/eager/backprop_test.py | 44 ++++++++++++++++++++++ tensorflow/python/ops/control_flow_grad.py | 5 +-- 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 5bedf9c6fd..0e089a26eb 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -105,6 +105,7 @@ cuda_py_test( ":test", "//tensorflow/python:embedding_ops", "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:math_ops", "//tensorflow/python:nn_ops", "//tensorflow/python:resource_variable_ops", diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 06e11f6ef9..cdcce65c52 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -86,6 +86,14 @@ class _MockOp(object): return make_attr(typ, self.attrs[i + 1]) raise KeyError(attr) + def _get_control_flow_context(self): + raise NotImplementedError( + "tf.GradientTape.gradients() does not support graph control flow " + "operations like tf.cond or tf.while at this time. Use tf.gradients() " + "instead. If you need this feature, please file a feature request at " + "https://github.com/tensorflow/tensorflow/issues/new" + ) + def _magic_gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, out_grads): diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index bca2928708..f04d89a6d9 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import custom_gradient from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import gradients @@ -384,6 +385,49 @@ class BackpropTest(test.TestCase): grad = g.gradient(y, [x])[0] self.assertEqual(self.evaluate(grad), 6.0) + @test_util.run_in_graph_and_eager_modes() + def testGradientTapeWithCond(self): + x = constant_op.constant(3.0) + + def true_fn(): + return x + + def false_fn(): + return x * x + + with backprop.GradientTape() as g: + g.watch(x) + y = control_flow_ops.cond(x < x, true_fn, false_fn) + + if not context.executing_eagerly(): + with self.assertRaisesRegexp(NotImplementedError, 'tf.gradients'): + dy = g.gradient(y, [x])[0] + else: + dy = g.gradient(y, [x])[0] + self.assertEqual(self.evaluate(dy), 6.0) + + @test_util.run_in_graph_and_eager_modes() + def testGradientTapeWithWhileLoop(self): + i = constant_op.constant(1) + x = constant_op.constant(2.) + + def cond(i, _): + return i < 3 + + def body(i, x): + return i + 1, x * 2 + + with backprop.GradientTape() as g: + g.watch([x]) + _, y = control_flow_ops.while_loop(cond, body, [i, x]) + + if not context.executing_eagerly(): + with self.assertRaisesRegexp(NotImplementedError, 'tf.gradients'): + dy = g.gradient(y, [x])[0] + else: + dy = g.gradient(y, [x])[0] + self.assertEqual(self.evaluate(dy), 4.0) + @test_util.assert_no_new_tensors def testGradientTapeGradientCalledMultipleTimes(self): with backprop.GradientTape() as g: diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py index 21354b5ae8..45955554ca 100644 --- a/tensorflow/python/ops/control_flow_grad.py +++ b/tensorflow/python/ops/control_flow_grad.py @@ -142,6 +142,7 @@ def _ExitGrad(op, grad): """Gradients for an exit op are calculated using an Enter op.""" graph = ops.get_default_graph() # pylint: disable=protected-access + op_ctxt = op._get_control_flow_context() grad_ctxt = graph._get_control_flow_context() # pylint: enable=protected-access if not grad_ctxt.back_prop: @@ -150,10 +151,8 @@ def _ExitGrad(op, grad): # no gradient computation. return None - # pylint: disable=protected-access - if op._get_control_flow_context().grad_state: + if op_ctxt.grad_state: raise TypeError("Second-order gradient for while loops not supported.") - # pylint: enable=protected-access if isinstance(grad, ops.Tensor): grad_ctxt.AddName(grad.name) -- GitLab From 73937a7096908a9ae01dd7da2d76932a7fed194b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Mar 2018 11:38:15 -0700 Subject: [PATCH 489/960] Made the NumElements function more accurate PiperOrigin-RevId: 190497916 --- tensorflow/core/framework/shape_inference.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 641681973a..54ecaa5dd4 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -298,13 +298,23 @@ bool InferenceContext::FullyDefined(ShapeHandle s) { DimensionHandle InferenceContext::NumElements(ShapeHandle s) { const auto rank = Rank(s); if (rank == kUnknownRank) return UnknownDim(); + bool found_unknown = false; int64 size = 1; for (int i = 0; i < rank; ++i) { int64 dim_val = Value(Dim(s, i)); - if (dim_val == kUnknownDim) return UnknownDim(); - size *= dim_val; + if (dim_val == kUnknownDim) { + found_unknown = true; + } else if (dim_val == 0) { + return MakeDim(0); + } else { + size *= dim_val; + } + } + if (found_unknown) { + return UnknownDim(); + } else { + return MakeDim(size); } - return MakeDim(size); } string InferenceContext::DebugString(ShapeHandle s) { -- GitLab From 6d46c21e9f300d07e30a2185671f07d34fac3999 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 11:44:19 -0700 Subject: [PATCH 490/960] Make the CSE ("node deduping") pass in ArithmeticOptimizer more robust in the presence of ops that modify their inputs in-place: Do not dedup nodes if the underlying buffers for their outputs may be passed to an in-place op. PiperOrigin-RevId: 190499037 --- tensorflow/core/grappler/op_types.cc | 13 ++++-- tensorflow/core/grappler/op_types.h | 4 ++ .../optimizers/arithmetic_optimizer.cc | 42 ++++++++++++++++--- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 259168bb33..1a6751befc 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -396,12 +396,17 @@ bool IsFreeOfSideEffect(const NodeDef& node) { return false; } } + return !ModifiesInputsInPlace(node); +} + +bool ModifiesInputsInPlace(const NodeDef& node) { // Some nodes do in-place updates on regular tensor inputs. - if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace") || - StringPiece(op_name).starts_with("Inplace")) { - return false; + string op_name = node.op(); + std::transform(op_name.begin(), op_name.end(), op_name.begin(), ::tolower); + if (StringPiece(op_name).contains("inplace")) { + return true; } - return true; + return GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace"); } bool ModifiesFrameInfo(const NodeDef& node) { diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 49e01f68e3..1ec1cd46e3 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -154,8 +154,12 @@ bool IsCommutative(const NodeDef& node); bool IsPersistent(const NodeDef& node); bool IsFreeOfSideEffect(const NodeDef& node); + bool ModifiesFrameInfo(const NodeDef& node); +// Returns true if the op is known to write to one or more of its inputs. +bool ModifiesInputsInPlace(const NodeDef& node); + // Returns true if the op is an element-wise involution, i.e. if it is its // own inverse such that f(f(x)) == x. bool IsInvolution(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index bc004df608..23e21855c8 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1085,6 +1085,24 @@ bool ArithmeticOptimizer::OptimizedNodeExists(const NodeDef& node, return node_map_->NodeExists(OptimizedNodeName(node, suffix)); } +namespace { + +bool FeedsInPlaceOp(const SimpleGraphView& graph_view, const NodeDef& node) { + const std::unordered_set op_types_to_traverse = { + node.op(), "Identity", "IdentityN", "Reshape"}; + int node_idx = graph_view.index(node.name()); + std::set node_fanout; + graph_view.DepthFirstSearch(op_types_to_traverse, node_idx, &node_fanout); + for (int fanout : node_fanout) { + if (ModifiesInputsInPlace(graph_view.graph()->node(fanout))) { + return true; + } + } + return false; +} + +} // namespace + bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { return false; @@ -1104,6 +1122,11 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { void ArithmeticOptimizer::DedupComputations() { bool stop = true; + SimpleGraphView graph_view; + if (!graph_view.Initialize(*optimized_graph_).ok()) { + LOG(WARNING) << "Failed to build SimpleGraphView."; + return; + } std::set duplicates; do { stop = true; @@ -1120,19 +1143,28 @@ void ArithmeticOptimizer::DedupComputations() { if (rep == node) { continue; } + // If either node feeds an inplace op, deduping them may cause data races. + // For example: If we dedup nodes initializing two independent inplace + // accumulations, they will write to the same buffer, clobbering each + // other's results. + if (FeedsInPlaceOp(graph_view, *rep) || + FeedsInPlaceOp(graph_view, *node)) { + continue; + } const std::set& fanouts = node_map_->GetOutputs(node->name()); for (NodeDef* fanout : fanouts) { - for (string& name : *fanout->mutable_input()) { + for (int i = 0; i < fanout->input_size(); ++i) { + string* name = fanout->mutable_input(i); int position; - const string nodename = ParseNodeName(name, &position); + const string nodename = ParseNodeName(*name, &position); if (nodename == node->name()) { // Update name in-place. if (position > 0) { - name = StrCat(rep->name(), ":", position); + *name = StrCat(rep->name(), ":", position); } else if (position == 0) { - name = rep->name(); + *name = rep->name(); } else { - name = StrCat("^", rep->name()); + *name = StrCat("^", rep->name()); } node_map_->AddOutput(rep->name(), fanout->name()); } -- GitLab From 3fbdba0c84941f34782a5e074b691916bca61a93 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 26 Mar 2018 11:49:03 -0700 Subject: [PATCH 491/960] update GPU installation instructions --- tensorflow/docs_src/install/install_linux.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 378946b459..3c5db9bced 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -33,7 +33,7 @@ must be installed on your system: * CUDA® Toolkit 9.0. For details, see [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/#axzz4VZnqTJ2A). - Ensure that you append the relevant Cuda pathnames to the + Ensure that you append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environment variable as described in the NVIDIA documentation. * The NVIDIA drivers associated with CUDA Toolkit 9.0. @@ -56,7 +56,7 @@ must be installed on your system: and add its path to your `LD_LIBRARY_PATH` environment variable:

-    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64
+    $ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64
     
For CUDA Toolkit <= 7.5 do: @@ -64,6 +64,16 @@ must be installed on your system:
     $ sudo apt-get install libcupti-dev
     
+ * **[OPTIONAL]** For optimized inferencing performance, you can also install + NVIDIA TensorRT 3.0. For details, see + [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing-tar). + Only steps 1-4 in the TensorRT Tar File installation instructions are + required for compatibility with TensorFlow; the Python package installation + in steps 5 and 6 can be omitted. Detailed installation instructions can be found at [package documentataion](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#installing-tensorrt-304) + + **IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu` + package, please use the Ubuntu **14.04** tar file package of TensorRT + even when installing onto an Ubuntu 16.04 system. If you have an earlier version of the preceding packages, please upgrade to the specified versions. If upgrading is not possible, then you may still run -- GitLab From d2604f8dcb8a63ca063f712c24ce5aa63403b0aa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 11:47:50 -0700 Subject: [PATCH 492/960] Revert to initializing number of threads when SetNumThreads is called. Requiring it to happen before OpInit() is way too confusing for users. PiperOrigin-RevId: 190499644 --- tensorflow/contrib/lite/BUILD | 2 +- tensorflow/contrib/lite/interpreter.cc | 6 ++++++ tensorflow/contrib/lite/kernels/conv.cc | 5 ++--- tensorflow/contrib/lite/kernels/eigen_support.cc | 7 +++++++ tensorflow/contrib/lite/kernels/eigen_support.h | 3 +++ tensorflow/contrib/lite/kernels/gemm_support.cc | 6 ++++++ tensorflow/contrib/lite/kernels/gemm_support.h | 3 +++ 7 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index dafe6f136e..18efa64507 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -133,10 +133,10 @@ cc_library( ":schema_fbs_version", ":simple_memory_arena", ":util", + "//tensorflow/contrib/lite/kernels:eigen_support", "//tensorflow/contrib/lite/kernels:gemm_support", "//tensorflow/contrib/lite/nnapi:nnapi_lib", "//tensorflow/contrib/lite/schema:schema_fbs", - "//tensorflow/core:lib_platform", ], ) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 937c185b0a..4575fe884d 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/error_reporter.h" #include "tensorflow/contrib/lite/graph_info.h" +#include "tensorflow/contrib/lite/kernels/eigen_support.h" #include "tensorflow/contrib/lite/kernels/gemm_support.h" #include "tensorflow/contrib/lite/memory_planner.h" #include "tensorflow/contrib/lite/nnapi_delegate.h" @@ -762,6 +763,11 @@ void Interpreter::UseNNAPI(bool enable) { void Interpreter::SetNumThreads(int num_threads) { context_.recommended_num_threads = num_threads; + + // TODO(ahentz): find a way to avoid this. It causes gemmlowp and eigen to + // be required in order to compile the framework. + gemm_support::SetNumThreads(&context_, num_threads); + eigen_support::SetNumThreads(&context_, num_threads); } TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate, diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index e0cd12f1b4..18ff33bf9f 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -89,9 +89,6 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { auto* data = new OpData; gemm_support::IncrementUsageCounter(context); eigen_support::IncrementUsageCounter(context); - - data->run_multithreaded_kernel = context->recommended_num_threads != 1; - return data; } @@ -176,6 +173,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); OpData* data = reinterpret_cast(node->user_data); + data->run_multithreaded_kernel = context->recommended_num_threads != 1; + TF_LITE_ENSURE_STATUS(AllocateTemporaryTensorsIfRequired(context, node)); bool hasBias = node->inputs->size == 3; diff --git a/tensorflow/contrib/lite/kernels/eigen_support.cc b/tensorflow/contrib/lite/kernels/eigen_support.cc index 213e465552..f1fdb42624 100644 --- a/tensorflow/contrib/lite/kernels/eigen_support.cc +++ b/tensorflow/contrib/lite/kernels/eigen_support.cc @@ -46,8 +46,15 @@ void DecrementUsageCounter(TfLiteContext* context) { } if (--ptr->num_references == 0) { delete ptr; + context->eigen_context = nullptr; } } +void SetNumThreads(TfLiteContext* context, int num_threads) { + IncrementUsageCounter(context); + Eigen::setNbThreads(num_threads); + DecrementUsageCounter(context); +} + } // namespace eigen_support } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/eigen_support.h b/tensorflow/contrib/lite/kernels/eigen_support.h index d47e691123..aa8c351fd8 100644 --- a/tensorflow/contrib/lite/kernels/eigen_support.h +++ b/tensorflow/contrib/lite/kernels/eigen_support.h @@ -28,6 +28,9 @@ void IncrementUsageCounter(TfLiteContext* context); // usages all temporary Eigen objects will be deleted. void DecrementUsageCounter(TfLiteContext* context); +// Set the number of threads that can be used by Eigen. +void SetNumThreads(TfLiteContext* context, int num_threads); + } // namespace eigen_support } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/gemm_support.cc b/tensorflow/contrib/lite/kernels/gemm_support.cc index 76a5165d14..95f45ea768 100644 --- a/tensorflow/contrib/lite/kernels/gemm_support.cc +++ b/tensorflow/contrib/lite/kernels/gemm_support.cc @@ -61,5 +61,11 @@ gemmlowp::GemmContext* GetFromContext(TfLiteContext* context) { return ptr->gemm_context_; } +void SetNumThreads(TfLiteContext* context, int num_threads) { + IncrementUsageCounter(context); + GetFromContext(context)->set_max_num_threads(num_threads); + DecrementUsageCounter(context); +} + } // namespace gemm_support } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/gemm_support.h b/tensorflow/contrib/lite/kernels/gemm_support.h index 37af772c68..f033501cb6 100644 --- a/tensorflow/contrib/lite/kernels/gemm_support.h +++ b/tensorflow/contrib/lite/kernels/gemm_support.h @@ -45,6 +45,9 @@ void IncrementUsageCounter(TfLiteContext* context); // 'context'. If there are no more usages the GemmContext will be deleted. void DecrementUsageCounter(TfLiteContext* context); +// Set the number of threads that can be used by gemmlowp. +void SetNumThreads(TfLiteContext* context, int num_threads); + } // namespace gemm_support } // namespace tflite -- GitLab From f9cfb9e917c8937152b248c300b095798d79501a Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Mar 2018 11:50:19 -0700 Subject: [PATCH 493/960] Extended experimental C API with MNIST dataset/iterators support. PiperOrigin-RevId: 190500020 --- tensorflow/c/c_api_experimental.cc | 1151 ++++++++++++++++++++++- tensorflow/c/c_api_experimental.h | 11 +- tensorflow/c/c_api_experimental_test.cc | 4 +- 3 files changed, 1149 insertions(+), 17 deletions(-) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 1c809cb21e..f411efc941 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -138,7 +138,7 @@ static std::vector CreateFunctionsFromTextProto( return {}; } std::vector ret; - for (const auto& fdef : fdef_lib.function()) { + for (const FunctionDef& fdef : fdef_lib.function()) { // Make a copy so that we can mutate it. FunctionDef fdef_to_load = fdef; if (mutate_proto_func) { @@ -148,8 +148,8 @@ static std::vector CreateFunctionsFromTextProto( std::vector binary_proto_buf(fdef_to_load.ByteSizeLong()); fdef_to_load.SerializeToArray(binary_proto_buf.data(), binary_proto_buf.size()); - auto func = TF_FunctionImportFunctionDef(binary_proto_buf.data(), - binary_proto_buf.size(), status); + TF_Function* func = TF_FunctionImportFunctionDef( + binary_proto_buf.data(), binary_proto_buf.size(), status); if (!status->status.ok()) return {}; ret.push_back(UniqueFuncPtr(func, TF_DeleteFunction)); } @@ -7120,6 +7120,1130 @@ library { return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); } +// On success, returns a set of TF_Function instances encoding a dataset +// node stack that reads an MNIST file dataset from `file_path`, and +// sets `dataset_name` to the created dataset name. The returned functions must +// be deleted by calling TF_DeleteFunction. +static std::vector CreateMNISTDatasetFunctions( + const char* file_path, std::string* dataset_name, TF_Status* status) { + const char* func_def = R"PREFIX( +library { + function { + signature { + name: "tf_map_func_521bfd08" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "truediv" + type: DT_FLOAT + } + description: "A wrapper for Defun that facilitates shape inference." + } + node_def { + name: "DecodeRaw" + op: "DecodeRaw" + input: "arg0" + attr { + key: "little_endian" + value { + b: true + } + } + attr { + key: "out_type" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "Cast" + op: "Cast" + input: "DecodeRaw:output:0" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "Reshape/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 784 + } + } + } + } + node_def { + name: "Reshape" + op: "Reshape" + input: "Cast:y:0" + input: "Reshape/shape:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "truediv/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 255.0 + } + } + } + } + node_def { + name: "truediv" + op: "RealDiv" + input: "Reshape:output:0" + input: "truediv/y:output:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "truediv" + value: "truediv:z:0" + } + } + function { + signature { + name: "tf_map_func_9a08860d" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "ToInt32" + type: DT_INT32 + } + description: "A wrapper for Defun that facilitates shape inference." + } + node_def { + name: "DecodeRaw" + op: "DecodeRaw" + input: "arg0" + attr { + key: "little_endian" + value { + b: true + } + } + attr { + key: "out_type" + value { + type: DT_UINT8 + } + } + } + node_def { + name: "Reshape/shape" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "Reshape" + op: "Reshape" + input: "DecodeRaw:output:0" + input: "Reshape/shape:output:0" + attr { + key: "T" + value { + type: DT_UINT8 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "ToInt32" + op: "Cast" + input: "Reshape:output:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_UINT8 + } + } + } + ret { + key: "ToInt32" + value: "ToInt32:y:0" + } + } + function { + signature { + name: "tf_predicate_7089b845" + input_arg { + name: "arg0" + type: DT_FLOAT + } + input_arg { + name: "arg1" + type: DT_INT32 + } + input_arg { + name: "Equal/Placeholder" + type: DT_INT64 + } + output_arg { + name: "Equal" + type: DT_BOOL + } + description: "A wrapper for Defun that facilitates shape inference." + } + node_def { + name: "Shape" + op: "Shape" + input: "arg0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "out_type" + value { + type: DT_INT64 + } + } + } + node_def { + name: "strided_slice/stack" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } + } + node_def { + name: "strided_slice/stack_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "strided_slice/stack_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } + } + node_def { + name: "strided_slice" + op: "StridedSlice" + input: "Shape:output:0" + input: "strided_slice/stack:output:0" + input: "strided_slice/stack_1:output:0" + input: "strided_slice/stack_2:output:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } + } + node_def { + name: "Equal" + op: "Equal" + input: "strided_slice:output:0" + input: "Equal/Placeholder" + attr { + key: "T" + value { + type: DT_INT64 + } + } + } + ret { + key: "Equal" + value: "Equal:z:0" + } + } + function { + signature { + name: "_make_dataset_2451e43a" + output_arg { + name: "FilterDataset" + type: DT_VARIANT + } + is_stateful: true + } + node_def { + name: "FixedLengthRecordDataset/filenames" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "$(DATA_DIR)/train-images-idx3-ubyte" + } + } + } + } + node_def { + name: "FixedLengthRecordDataset/header_bytes" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 16 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset/record_bytes" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 784 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset/footer_bytes" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset/buffer_size" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 262144 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset" + op: "FixedLengthRecordDataset" + input: "FixedLengthRecordDataset/filenames:output:0" + input: "FixedLengthRecordDataset/header_bytes:output:0" + input: "FixedLengthRecordDataset/record_bytes:output:0" + input: "FixedLengthRecordDataset/footer_bytes:output:0" + input: "FixedLengthRecordDataset/buffer_size:output:0" + } + node_def { + name: "MapDataset" + op: "MapDataset" + input: "FixedLengthRecordDataset:handle:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "tf_map_func_521bfd08" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 784 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + } + } + } + } + node_def { + name: "FixedLengthRecordDataset_1/filenames_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "$(DATA_DIR)/train-labels-idx1-ubyte" + } + } + } + } + node_def { + name: "FixedLengthRecordDataset_1/header_bytes_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 8 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset_1/record_bytes_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset_1/footer_bytes_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset_1/buffer_size_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 262144 + } + } + } + } + node_def { + name: "FixedLengthRecordDataset_1" + op: "FixedLengthRecordDataset" + input: "FixedLengthRecordDataset_1/filenames_1:output:0" + input: "FixedLengthRecordDataset_1/header_bytes_1:output:0" + input: "FixedLengthRecordDataset_1/record_bytes_1:output:0" + input: "FixedLengthRecordDataset_1/footer_bytes_1:output:0" + input: "FixedLengthRecordDataset_1/buffer_size_1:output:0" + } + node_def { + name: "MapDataset_1" + op: "MapDataset" + input: "FixedLengthRecordDataset_1:handle:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "tf_map_func_9a08860d" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + } + } + } + } + node_def { + name: "ZipDataset" + op: "ZipDataset" + input: "MapDataset:handle:0" + input: "MapDataset_1:handle:0" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 784 + } + } + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "CacheDataset/filename" + op: "Const" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "CacheDataset" + op: "CacheDataset" + input: "ZipDataset:handle:0" + input: "CacheDataset/filename:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 784 + } + } + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "RepeatDataset/count" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: -1 + } + } + } + } + node_def { + name: "RepeatDataset" + op: "RepeatDataset" + input: "CacheDataset:handle:0" + input: "RepeatDataset/count:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 784 + } + } + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "ShuffleDataset/buffer_size_2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 50000 + } + } + } + } + node_def { + name: "ShuffleDataset/seed" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset/seed2" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } + } + node_def { + name: "ShuffleDataset" + op: "ShuffleDataset" + input: "RepeatDataset:handle:0" + input: "ShuffleDataset/buffer_size_2:output:0" + input: "ShuffleDataset/seed:output:0" + input: "ShuffleDataset/seed2:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 784 + } + } + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } + } + node_def { + name: "BatchDataset/batch_size" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 128 + } + } + } + } + node_def { + name: "BatchDataset" + op: "BatchDataset" + input: "ShuffleDataset:handle:0" + input: "BatchDataset/batch_size:output:0" + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 784 + } + } + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + } + node_def { + name: "FilterDataset/batch_size_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 128 + } + } + } + } + node_def { + name: "FilterDataset" + op: "FilterDataset" + input: "BatchDataset:handle:0" + input: "FilterDataset/batch_size_1:output:0" + attr { + key: "Targuments" + value { + list { + type: DT_INT64 + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 784 + } + } + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_FLOAT + type: DT_INT32 + } + } + } + attr { + key: "predicate" + value { + func { + name: "tf_predicate_7089b845" + } + } + } + } + ret { + key: "FilterDataset" + value: "FilterDataset:handle:0" + } + } +} +)PREFIX"; + + *dataset_name = "_make_dataset_2451e43a"; + std::function mutate_proto_func = + [dataset_name, file_path](FunctionDef* fdef) { + VLOG(1) << "Processsing function " << fdef->DebugString(); + if (std::string(fdef->signature().name()) != *dataset_name) return; + // Change the input file pattern to `file_path`. + bool found = false; + // `node_def` may be mutated. + for (auto& node_def : *fdef->mutable_node_def()) { + if (node_def.name() != "FixedLengthRecordDataset/filenames" && + node_def.name() != "FixedLengthRecordDataset_1/filenames_1") + continue; + DCHECK_EQ(node_def.op(), "Const"); + DCHECK_GT(node_def.attr().count("value"), 0); + found = true; + // Replace $(DATA_DIR)/foo with /foo + // TODO(hongm): Use StringPiece manipulation for better efficiency. + const std::string cur_value = + node_def.attr().at("value").tensor().string_val(0); + const std::string pattern = "$(DATA_DIR)"; + DCHECK_EQ(cur_value.compare(0, pattern.length(), pattern), 0); + const std::string new_value = + file_path + cur_value.substr(pattern.length()); + VLOG(1) << "Setting the value of node_def " << node_def.name() + << " to " << new_value; + auto* tensor = (*node_def.mutable_attr())["value"].mutable_tensor(); + tensor->clear_string_val(); + tensor->add_string_val(new_value); + } + VLOG(1) << "Rewrote function to " << fdef->DebugString(); + DCHECK(found); + }; + return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); +} + // Adds the input functions to `graph`. On success, returns the created // IteratorGetNext node. static TF_Operation* AddDatasetFunctionAndIteratorNodesToGraph( @@ -7209,15 +8333,16 @@ TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets(TF_Graph* graph, return getnext_node; } -TF_Operation* TF_MakeImagenetIteratorGetNextWithDatasets(TF_Graph* graph, - const char* file_path, - int batch_size, - TF_Status* status) { +TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( + TF_Graph* graph, const char* file_path, int batch_size, + unsigned char is_mnist, TF_Status* status) { tensorflow::Status s; std::string dataset_name; const auto& funcs = - CreateImagenetDatasetFunctions(file_path, &dataset_name, status); + is_mnist + ? CreateMNISTDatasetFunctions(file_path, &dataset_name, status) + : CreateImagenetDatasetFunctions(file_path, &dataset_name, status); if (!status->status.ok()) { return nullptr; } @@ -7226,9 +8351,13 @@ TF_Operation* TF_MakeImagenetIteratorGetNextWithDatasets(TF_Graph* graph, // batch_size X 224 X 224 X 3 auto image_shape = tensorflow::TensorShapeProto(); image_shape.add_dim()->set_size(batch_size); - image_shape.add_dim()->set_size(224); - image_shape.add_dim()->set_size(224); - image_shape.add_dim()->set_size(3); + if (is_mnist) { + image_shape.add_dim()->set_size(784); + } else { + image_shape.add_dim()->set_size(224); + image_shape.add_dim()->set_size(224); + image_shape.add_dim()->set_size(3); + } output_shape_list.push_back(image_shape); // batch_size diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index a9c551d73e..ebcec8176b 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -96,13 +96,16 @@ TF_CAPI_EXPORT extern TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets( TF_Graph* graph, TF_Status* status); // Similar to the above API, except that the returned iterator reads the -// TFRecord files from `file_path`. +// file based dataset from `file_path`. +// If `is_mnist` is 0, the dataset corresponds to ImageNet. // The iterators outputs 2 tensors: -// - A float tensor of shape `batch_size` X 224 X 224 X 3 +// - A float tensor of shape `batch_size` X 784 when `is_mnist` is non-zero, or +// `batch_size` X 224 X 224 X 3 otherwise. // - An int32 tensor of shape `batch_size` // TODO(hongm): Extend the API to allow customization of the nodes created. -TF_CAPI_EXPORT extern TF_Operation* TF_MakeImagenetIteratorGetNextWithDatasets( - TF_Graph* graph, const char* file_path, int batch_size, TF_Status* status); +TF_CAPI_EXPORT extern TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( + TF_Graph* graph, const char* file_path, int batch_size, + unsigned char is_mnist, TF_Status* status); #ifdef __cplusplus } /* end extern "C" */ diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc index 49d64d18bf..30fcfd401d 100644 --- a/tensorflow/c/c_api_experimental_test.cc +++ b/tensorflow/c/c_api_experimental_test.cc @@ -68,8 +68,8 @@ TEST(CAPI_EXPERIMENTAL, ImagenetIteratorGetNext) { tensorflow::testing::TensorFlowSrcRoot(), "c/testdata/tf_record"); VLOG(1) << "data file path is " << file_path; const int batch_size = 64; - TF_Operation* get_next = TF_MakeImagenetIteratorGetNextWithDatasets( - graph, file_path.c_str(), batch_size, s); + TF_Operation* get_next = TF_MakeFileBasedIteratorGetNextWithDatasets( + graph, file_path.c_str(), batch_size, /*is_mnist*/ false, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); CSession csession(graph, s); -- GitLab From af0fe569f48f3d5e8405eab76e14abde3c4e3d36 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 12:14:05 -0700 Subject: [PATCH 494/960] LSTM support: Support fused activation functions in int16 Add ops. PiperOrigin-RevId: 190503823 --- .../kernels/internal/optimized/optimized_ops.h | 17 +++++++++++++++-- .../kernels/internal/reference/reference_ops.h | 17 +++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index f7840258ec..d7a0005f27 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1660,11 +1660,21 @@ template inline void Add(const int16* input1_data, const Dims<4>& input1_dims, int input1_shift, const int16* input2_data, const Dims<4>& input2_dims, int input2_shift, + int16 output_activation_min, int16 output_activation_max, int16* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Add/Int16"); // This is a copy of the reference implementation. We do not currently have a // properly optimized version. - static_assert(Ac == FusedActivationFunctionType::kNone, ""); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, -32768); + TFLITE_DCHECK_EQ(output_activation_max, 32767); + } const int flat_size = RequiredBufferSizeForDims(output_dims); TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); @@ -1685,7 +1695,10 @@ inline void Add(const int16* input1_data, const Dims<4>& input1_dims, F0 scaled_input = F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_shift)); F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); - output_data[i] = result.raw(); + const int16 raw_output = result.raw(); + const int16 clamped_output = std::min( + output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; } } diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 472ddc60df..ce12fad95d 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -956,8 +956,18 @@ template inline void Add(const int16* input1_data, const Dims<4>& input1_dims, int input1_shift, const int16* input2_data, const Dims<4>& input2_dims, int input2_shift, + int16 output_activation_min, int16 output_activation_max, int16* output_data, const Dims<4>& output_dims) { - static_assert(Ac == FusedActivationFunctionType::kNone, ""); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, -32768); + TFLITE_DCHECK_EQ(output_activation_max, 32767); + } const int flat_size = RequiredBufferSizeForDims(output_dims); TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); @@ -978,7 +988,10 @@ inline void Add(const int16* input1_data, const Dims<4>& input1_dims, F0 scaled_input = F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_shift)); F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); - output_data[i] = result.raw(); + const int16 raw_output = result.raw(); + const int16 clamped_output = std::min( + output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; } } -- GitLab From 04b1e736897505ccf5b483379289d02a274ea586 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Mon, 26 Mar 2018 12:16:34 -0700 Subject: [PATCH 495/960] tfdbg CLI: Allow node exclusion with tensor filters Fixes: #16619 See the referred GitHub issue for details, but users want to be able to skip certain nodes when searching for inf/nans, because some nodes generate inf/nans even in nominal conditions. This CL adds a new optional flag `--filter_exclude_node_names` (or `-fenn` for short), which allows users to do exactly that, by using a regex for node names. RELNOTES: tfdbg CLI: Allow exclusion of nodes by regular expressions during tensor filter-enabled Session runs: see the new flags `--filter_exclude_node_names` (or `-fenn` for short). PiperOrigin-RevId: 190504225 --- .../docs_src/programmers_guide/debugger.md | 16 ++++++ tensorflow/python/debug/cli/analyzer_cli.py | 22 ++++++++- .../python/debug/cli/analyzer_cli_test.py | 26 ++++++++++ tensorflow/python/debug/lib/debug_data.py | 14 +++++- .../python/debug/lib/session_debug_testlib.py | 49 +++++++++++++++++++ .../debug/wrappers/local_cli_wrapper.py | 39 +++++++++++++-- .../debug/wrappers/local_cli_wrapper_test.py | 36 +++++++++++++- 7 files changed, 196 insertions(+), 6 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index d1399814ee..d1cd7e7c06 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -155,6 +155,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `-n ` | List dumped tensors with names matching given regular-expression pattern. | `lt -n Softmax.*` | | | `-t ` | List dumped tensors with op types matching given regular-expression pattern. | `lt -t MatMul` | | | `-f ` | List only the tensors that pass a registered tensor filter. | `lt -f has_inf_or_nan` | +| | `-f -fenn ` | List only the tensors that pass a registered tensor filter, excluding nodes with names matching the regular expression. | `lt -f has_inf_or_nan` `-fenn .*Sqrt.*` | | | `-s ` | Sort the output by given `sort_key`, whose possible values are `timestamp` (default), `dump_size`, `op_type` and `tensor_name`. | `lt -s dump_size` | | | `-r` | Sort in reverse order. | `lt -r -s dump_size` | | **`pt`** | | **Print value of a dumped tensor.** | | @@ -200,6 +201,7 @@ Try the following commands at the `tfdbg>` prompt (referencing the code at | | `-n` | Execute through the next `Session.run` without debugging, and drop to CLI right before the run after that. | `run -n` | | | `-t ` | Execute `Session.run` `T - 1` times without debugging, followed by a run with debugging. Then drop to CLI right after the debugged run. | `run -t 10` | | | `-f ` | Continue executing `Session.run` until any intermediate tensor triggers the specified Tensor filter (causes the filter to return `True`). | `run -f has_inf_or_nan` | +| | `-f -fenn ` | Continue executing `Session.run` until any intermediate tensor whose node names doesn't match the regular expression triggers the specified Tensor filter (causes the filter to return `True`). | `run -f has_inf_or_nan -fenn .*Sqrt.*` | | | `--node_name_filter ` | Execute the next `Session.run`, watching only nodes with names matching the given regular-expression pattern. | `run --node_name_filter Softmax.*` | | | `--op_type_filter ` | Execute the next `Session.run`, watching only nodes with op types matching the given regular-expression pattern. | `run --op_type_filter Variable.*` | | | `--tensor_dtype_filter ` | Execute the next `Session.run`, dumping only Tensors with data types (`dtype`s) matching the given regular-expression pattern. | `run --tensor_dtype_filter int.*` | @@ -813,6 +815,20 @@ sess.run(b) the constant-folding would not occur and `tfdbg` should show the intermediate tensor dumps. + +**Q**: I am debugging a model that generates unwanted infinities or NaNs. But + there are some nodes in my model that are known to generate infinities + or NaNs in their output tensors even under completely normal conditions. + How can I skip those nodes during my `run -f has_inf_or_nan` actions? + +**A**: Use the `--filter_exclude_node_names` (`-fenn` for short) flag. For + example, if you known you have a node with name matching the regular + expression `.*Sqrt.*` that generates infinities or NaNs regardless + of whether the model is behaving correctly, you can exclude the nodes + from the infinity/NaN-finding runs with the command + `run -f has_inf_or_nan -fenn .*Sqrt.*`. + + **Q**: Is there a GUI for tfdbg? **A**: Yes, the **TensorBoard Debugger Plugin** is the GUI of tfdbg. diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py index 156afdfd4c..9a47cd12b4 100644 --- a/tensorflow/python/debug/cli/analyzer_cli.py +++ b/tensorflow/python/debug/cli/analyzer_cli.py @@ -185,6 +185,15 @@ class DebugAnalyzer(object): type=str, default="", help="List only Tensors passing the filter of the specified name") + ap.add_argument( + "-fenn", + "--filter_exclude_node_names", + dest="filter_exclude_node_names", + type=str, + default="", + help="When applying the tensor filter, exclude node with names " + "matching the regular expression. Applicable only if --tensor_filter " + "or -f is used.") ap.add_argument( "-n", "--node_name_filter", @@ -484,6 +493,10 @@ class DebugAnalyzer(object): Returns: Output text lines as a RichTextLines object. + + Raises: + ValueError: If `--filter_exclude_node_names` is used without `-f` or + `--tensor_filter` being used. """ # TODO(cais): Add annotations of substrings for dumped tensor names, to @@ -520,8 +533,15 @@ class DebugAnalyzer(object): _add_main_menu(output, node_name=None, enable_list_tensors=False) return output - data_to_show = self._debug_dump.find(filter_callable) + data_to_show = self._debug_dump.find( + filter_callable, + exclude_node_names=parsed.filter_exclude_node_names) else: + if parsed.filter_exclude_node_names: + raise ValueError( + "The flag --filter_exclude_node_names is valid only when " + "the flag -f or --tensor_filter is used.") + data_to_show = self._debug_dump.dumped_tensor_data # TODO(cais): Implement filter by lambda on tensor value. diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py index 6b110fda9e..55231954d1 100644 --- a/tensorflow/python/debug/cli/analyzer_cli_test.py +++ b/tensorflow/python/debug/cli/analyzer_cli_test.py @@ -820,6 +820,32 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): op_type_regex="(Add|MatMul)") check_main_menu(self, out, list_tensors_enabled=False) + def testListTensorWithFilterAndNodeNameExclusionWorks(self): + # First, create and register the filter. + def is_2x1_vector(datum, tensor): + del datum # Unused. + return list(tensor.shape) == [2, 1] + self._analyzer.add_tensor_filter("is_2x1_vector", is_2x1_vector) + + # Use shorthand alias for the command prefix. + out = self._registry.dispatch_command( + "lt", ["-f", "is_2x1_vector", "--filter_exclude_node_names", ".*v.*"]) + + # If the --filter_exclude_node_names were not used, then the matching + # tensors would be: + # - simple_mul_add/v:0 + # - simple_mul_add/v/read:0 + # - simple_mul_add/matmul:0 + # - simple_mul_add/add:0 + # + # With the --filter_exclude_node_names option, only the last two should + # show up in the result. + assert_listed_tensors( + self, + out, ["simple_mul_add/matmul:0", "simple_mul_add/add:0"], + ["MatMul", "Add"], tensor_filter_name="is_2x1_vector") + check_main_menu(self, out, list_tensors_enabled=False) + def testListTensorsFilterNanOrInf(self): """Test register and invoke a tensor filter.""" diff --git a/tensorflow/python/debug/lib/debug_data.py b/tensorflow/python/debug/lib/debug_data.py index 8d355aa27f..8a65ad087b 100644 --- a/tensorflow/python/debug/lib/debug_data.py +++ b/tensorflow/python/debug/lib/debug_data.py @@ -23,6 +23,7 @@ import glob import json import os import platform +import re import numpy as np import six @@ -1411,7 +1412,11 @@ class DebugDumpDir(object): return self._watch_key_to_datum[device_name].get(debug_watch_key, []) - def find(self, predicate, first_n=0, device_name=None): + def find(self, + predicate, + first_n=0, + device_name=None, + exclude_node_names=None): """Find dumped tensor data by a certain predicate. Args: @@ -1430,17 +1435,24 @@ class DebugDumpDir(object): time order) for which the predicate returns True. To return all the `DebugTensotDatum` instances, let first_n be <= 0. device_name: optional device name. + exclude_node_names: Optional regular expression to exclude nodes with + names matching the regular expression. Returns: A list of all `DebugTensorDatum` objects in this `DebugDumpDir` object for which predicate returns True, sorted in ascending order of the timestamp. """ + if exclude_node_names: + exclude_node_names = re.compile(exclude_node_names) matched_data = [] for device in (self._dump_tensor_data if device_name is None else (self._dump_tensor_data[device_name],)): for datum in self._dump_tensor_data[device]: + if exclude_node_names and exclude_node_names.match(datum.node_name): + continue + if predicate(datum, datum.get_tensor()): matched_data.append(datum) diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py index f4fac14019..070d9c4cd7 100644 --- a/tensorflow/python/debug/lib/session_debug_testlib.py +++ b/tensorflow/python/debug/lib/session_debug_testlib.py @@ -669,6 +669,55 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): self.assertEqual(1, len(first_bad_datum)) self.assertEqual(x_name, first_bad_datum[0].node_name) + def testFindInfOrNanWithOpNameExclusion(self): + with session.Session() as sess: + u_name = "testFindInfOrNanWithOpNameExclusion/u" + v_name = "testFindInfOrNanWithOpNameExclusion/v" + w_name = "testFindInfOrNanWithOpNameExclusion/w" + x_name = "testFindInfOrNanWithOpNameExclusion/x" + y_name = "testFindInfOrNanWithOpNameExclusion/y" + z_name = "testFindInfOrNanWithOpNameExclusion/z" + + u_init = constant_op.constant([2.0, 4.0]) + u = variables.Variable(u_init, name=u_name) + v_init = constant_op.constant([2.0, 1.0]) + v = variables.Variable(v_init, name=v_name) + + # Expected output: [0.0, 3.0] + w = math_ops.subtract(u, v, name=w_name) + + # Expected output: [inf, 1.3333] + x = math_ops.div(u, w, name=x_name) + + # Expected output: [nan, 4.0] + y = math_ops.multiply(w, x, name=y_name) + + z = math_ops.multiply(y, y, name=z_name) + + u.initializer.run() + v.initializer.run() + + _, dump = self._debug_run_and_get_dump( + sess, z, + expected_partition_graph_count=self._expected_partition_graph_count) + + # Find all "offending tensors". + bad_data = dump.find(debug_data.has_inf_or_nan, + exclude_node_names=".*/x$") + + # Verify that the nodes with bad values are caught through running find + # on the debug dump. + self.assertEqual(2, len(bad_data)) + # Assert that the node `x` should have been excluded. + self.assertEqual(y_name, bad_data[0].node_name) + self.assertEqual(z_name, bad_data[1].node_name) + + first_bad_datum = dump.find( + debug_data.has_inf_or_nan, first_n=1, exclude_node_names=".*/x$") + + self.assertEqual(1, len(first_bad_datum)) + self.assertEqual(y_name, first_bad_datum[0].node_name) + def _session_run_for_graph_structure_lookup(self): with session.Session(config=no_rewrite_session_config()) as sess: u_name = "testDumpGraphStructureLookup/u" diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper.py b/tensorflow/python/debug/wrappers/local_cli_wrapper.py index 1465cb7295..c8625655e5 100644 --- a/tensorflow/python/debug/wrappers/local_cli_wrapper.py +++ b/tensorflow/python/debug/wrappers/local_cli_wrapper.py @@ -115,6 +115,7 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): # unavailable (i.e., is None), the run-start CLI will be launched to ask # the user. This is the case, e.g., right before the first run starts. self._active_tensor_filter = None + self._active_filter_exclude_node_names = None self._active_tensor_filter_run_start_response = None self._run_through_times = 1 self._skip_debug = False @@ -148,6 +149,15 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): type=str, default="", help="Run until a tensor in the graph passes the specified filter.") + ap.add_argument( + "-fenn", + "--filter_exclude_node_names", + dest="filter_exclude_node_names", + type=str, + default="", + help="When applying the tensor filter, exclude node with names " + "matching the regular expression. Applicable only if --tensor_filter " + "or -f is used.") ap.add_argument( "--node_name_filter", dest="node_name_filter", @@ -324,9 +334,11 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): debug_dump.set_python_graph(self._sess.graph) passed_filter = None + passed_filter_exclude_node_names = None if self._active_tensor_filter: if not debug_dump.find( - self._tensor_filters[self._active_tensor_filter], first_n=1): + self._tensor_filters[self._active_tensor_filter], first_n=1, + exclude_node_names=self._active_filter_exclude_node_names): # No dumped tensor passes the filter in this run. Clean up the dump # directory and move on. self._remove_dump_root() @@ -334,10 +346,14 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): else: # Some dumped tensor(s) from this run passed the filter. passed_filter = self._active_tensor_filter + passed_filter_exclude_node_names = ( + self._active_filter_exclude_node_names) self._active_tensor_filter = None + self._active_filter_exclude_node_names = None self._prep_debug_cli_for_run_end( - debug_dump, request.tf_error, passed_filter) + debug_dump, request.tf_error, passed_filter, + passed_filter_exclude_node_names) self._run_start_response = self._launch_cli() @@ -358,7 +374,11 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): if os.path.isdir(self._dump_root): shutil.rmtree(self._dump_root) - def _prep_debug_cli_for_run_end(self, debug_dump, tf_error, passed_filter): + def _prep_debug_cli_for_run_end(self, + debug_dump, + tf_error, + passed_filter, + passed_filter_exclude_node_names): """Prepare (but not launch) CLI for run-end, with debug dump from the run. Args: @@ -368,6 +388,9 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): (if any). passed_filter: (None or str) Name of the tensor filter that just passed and caused the preparation of this run-end CLI (if any). + passed_filter_exclude_node_names: (None or str) Regular expression used + with the tensor filter to exclude ops with names matching the regular + expresssion. """ if tf_error: @@ -383,6 +406,9 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): if passed_filter is not None: # Some dumped tensor(s) from this run passed the filter. self._init_command = "lt -f %s" % passed_filter + if passed_filter_exclude_node_names: + self._init_command += (" --filter_exclude_node_names %s" % + passed_filter_exclude_node_names) self._title_color = "red_on_white" self._run_cli = analyzer_cli.create_analyzer_ui( @@ -496,6 +522,11 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): parsed.op_type_filter = parsed.op_type_filter or None parsed.tensor_dtype_filter = parsed.tensor_dtype_filter or None + if parsed.filter_exclude_node_names and not parsed.till_filter_pass: + raise ValueError( + "The --filter_exclude_node_names (or -feon) flag is valid only if " + "the --till_filter_pass (or -f) flag is used.") + if parsed.profile: raise debugger_cli_common.CommandLineExit( exit_token=framework.OnRunStartResponse( @@ -525,6 +556,8 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): if parsed.till_filter_pass in self._tensor_filters: action = framework.OnRunStartAction.DEBUG_RUN self._active_tensor_filter = parsed.till_filter_pass + self._active_filter_exclude_node_names = ( + parsed.filter_exclude_node_names) self._active_tensor_filter_run_start_response = run_start_response else: # Handle invalid filter name. diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py index 490812c96d..b06fa26a93 100644 --- a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py @@ -87,7 +87,11 @@ class LocalCLIDebuggerWrapperSessionForTest( def _prep_cli_for_run_start(self): pass - def _prep_debug_cli_for_run_end(self, debug_dump, tf_error, passed_filter): + def _prep_debug_cli_for_run_end(self, + debug_dump, + tf_error, + passed_filter, + passed_filter_exclude_op_names): self.observers["debug_dumps"].append(debug_dump) self.observers["tf_errors"].append(tf_error) @@ -451,6 +455,36 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): self.assertEqual(2, len(wrapped_sess.observers["debug_dumps"])) self.assertEqual([None, None], wrapped_sess.observers["tf_errors"]) + def testRunTillFilterPassesWithExcludeOpNames(self): + wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( + [["run", "-f", "greater_than_twelve", + "--filter_exclude_node_names", "inc_v.*"], + ["run"], ["run"]], + self.sess, + dump_root=self._tmp_dir) + + def greater_than_twelve(datum, tensor): + del datum # Unused. + return tensor > 12.0 + + # Verify that adding the same tensor filter more than once is tolerated + # (i.e., as if it were added only once). + wrapped_sess.add_tensor_filter("greater_than_twelve", greater_than_twelve) + + # run five times. + wrapped_sess.run(self.inc_v) + wrapped_sess.run(self.inc_v) + wrapped_sess.run(self.inc_v) + wrapped_sess.run(self.inc_v) + + self.assertAllClose(14.0, self.sess.run(self.v)) + + self.assertEqual([1], wrapped_sess.observers["run_start_cli_run_numbers"]) + + # Due to the --filter_exclude_op_names flag, the run-end CLI should show up + # not after run 3, but after run 4. + self.assertEqual([4], wrapped_sess.observers["run_end_cli_run_numbers"]) + def testRunTillFilterPassesWorksInConjunctionWithOtherNodeNameFilter(self): """Test that --.*_filter flags work in conjunction with -f. -- GitLab From 8158adc21db1612c42607dff41c083dd3a435e58 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Mar 2018 12:20:32 -0700 Subject: [PATCH 496/960] Internal change. PiperOrigin-RevId: 190504933 --- tensorflow/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ece1da0332..dbe1bd437e 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1089,6 +1089,7 @@ cuda_py_test( tags = [ "no_windows", "noasan", + "noguitar", "notap", ], ) -- GitLab From b704d1488d5c15d9e8497843e0bbc667117383ae Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Mar 2018 12:40:56 -0700 Subject: [PATCH 497/960] Internal change. PiperOrigin-RevId: 190507631 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index d7cc2f14a4..0b3bf63f79 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -480,6 +480,7 @@ py_test( tags = [ "manual", "no_oss", + "notap", ], deps = [ "//tensorflow/contrib/data/python/ops:prefetching_ops", -- GitLab From 5890401336c149f49892579bb1a7f4e7c6a52fea Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 26 Mar 2018 12:47:47 -0700 Subject: [PATCH 498/960] Clarify doc strings on gradient methods PiperOrigin-RevId: 190508614 --- tensorflow/python/eager/backprop.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index cdcce65c52..a7837b8a7f 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -172,7 +172,7 @@ def implicit_val_and_grad(f): The wrapped function returns the value and the gradient of f when called with the same arguments. The gradient is with respect to all TFE variables which - have `variable.watch()` called on them by f. + are either trainable or have `variable.watch()` called on them by f. This function is useful when the exact set of variables to differentiate with is not known ahead of time. @@ -249,8 +249,8 @@ def implicit_grad(f): """Returns a function which differentiates f with respect to variables. The wrapped function returns the gradient of f when called with the same - arguments. The gradient is with respect to all TFE variables which have - `variable.watch()` called on them by f. + arguments. The gradient is with respect to all TFE variables which are + either trainable or have `variable.watch()` called on them by f. This function is useful when the exact set of variables to differentiate with is not known ahead of time. @@ -653,10 +653,10 @@ class GradientTape(object): Operations are recorded if they are executed within this context manager and at least one of their inputs is being "watched". - Variables (created by `tf.contrib.eager.Variable` or @{tf.get_variable}) - are automatically watched. Tensors can be manually watched by invoking the - `watch` - method on this context manager. + Trainable variables (created by `tf.contrib.eager.Variable` or + @{tf.get_variable}, trainable=True is default in both cases) are automatically + watched. Tensors can be manually watched by invoking the `watch` method on + this context manager. For example, consider the function `y = x * x`. The gradient at `x = 3.0` can be computed as: -- GitLab From ea644ac0783537a6ac8a2c8a2432829b3db69aeb Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 26 Mar 2018 13:05:52 -0700 Subject: [PATCH 499/960] Disabling the state_management_test. For non-pip builds also. --- tensorflow/contrib/timeseries/python/timeseries/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 64f5cd8357..d72cc1b8a2 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -233,6 +233,7 @@ py_test( ], srcs_version = "PY2AND3", tags = [ + "manual", "no_pip", # b/64527635 "no_pip_gpu", # b/63391119 ], -- GitLab From 383ce820e5221511cb57904ebd9c32d42d797ac9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 13:08:54 -0700 Subject: [PATCH 500/960] Optimized ops, move code to early, common, section so that it can be shared. PiperOrigin-RevId: 190511964 --- .../internal/optimized/optimized_ops.h | 384 +++++++++--------- 1 file changed, 192 insertions(+), 192 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index d7a0005f27..f08d9d6d57 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -324,6 +324,198 @@ void Gemm(const Eigen::MatrixBase& lhs, const Eigen::MatrixBase& rhs, } } +#ifdef GEMMLOWP_NEON +// In the common case of batch size 1, a fully-connected node degenerates +// to a matrix*vector product. LSTM cells contain a fully-connected node; +// when quantized, this becomes a special type of GEMV operation where +// the output is 16bit-quantized, thus needs its own special path. +inline void GEMVForLstmCell(const uint8* input_data, const Dims<4>& input_dims, + const uint8* weights_data, + const Dims<4>& weights_dims, + uint8 weights_zero_point, const int32* bias_data, + const Dims<4>& bias_dims, int32 accum_multiplier, + int accum_shift, int16* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("GEMVForLstmCell"); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); + TFLITE_DCHECK_EQ(ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3), + 1); + const int input_size = input_dims.strides[3]; + const int output_size = MatchingArraySize(weights_dims, 1, output_dims, 0); + // This special fast path for quantized LSTM cells does not try to support + // odd sizes that we haven't encountered in any LSTM cell, that would + // require special code (that would go untested until any LSTM cell + // exercises it). We just guard our assumptions about size evenness with + // the following assertions. + TFLITE_DCHECK(!(output_size % 4)); + TFLITE_DCHECK(!(input_size % 8)); + const int32* bias_ptr = bias_data; + int16* output_ptr = output_data; + for (int out = 0; out < output_size; out += 4) { + int32x4_t acc_0 = vdupq_n_s32(0); + int32x4_t acc_1 = vdupq_n_s32(0); + int32x4_t acc_2 = vdupq_n_s32(0); + int32x4_t acc_3 = vdupq_n_s32(0); + const int16x8_t input_offset_vec = vdupq_n_s16(-128); + const int16x8_t weights_offset_vec = vdupq_n_s16(-weights_zero_point); + int in = 0; + // Handle 16 levels of depth at a time. + for (; in <= input_size - 16; in += 16) { + const uint8x16_t input_val_u8 = vld1q_u8(input_data + in); + const uint8* weights_ptr = weights_data + in + out * input_size; + uint8x16_t weights_val_u8_0 = vld1q_u8(weights_ptr + 0 * input_size); + uint8x16_t weights_val_u8_1 = vld1q_u8(weights_ptr + 1 * input_size); + uint8x16_t weights_val_u8_2 = vld1q_u8(weights_ptr + 2 * input_size); + uint8x16_t weights_val_u8_3 = vld1q_u8(weights_ptr + 3 * input_size); + int16x8_t input_val_0, input_val_1; + const uint8x8_t low = vget_low_u8(input_val_u8); + const uint8x8_t high = vget_high_u8(input_val_u8); + input_val_0 = vreinterpretq_s16_u16(vmovl_u8(low)); + input_val_1 = vreinterpretq_s16_u16(vmovl_u8(high)); + input_val_0 = vaddq_s16(input_val_0, input_offset_vec); + input_val_1 = vaddq_s16(input_val_1, input_offset_vec); + int16x8_t weights_val_0_0, weights_val_1_0, weights_val_2_0, + weights_val_3_0; + int16x8_t weights_val_0_1, weights_val_1_1, weights_val_2_1, + weights_val_3_1; + weights_val_0_0 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_0))), + weights_offset_vec); + weights_val_0_1 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_0))), + weights_offset_vec); + weights_val_1_0 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_1))), + weights_offset_vec); + weights_val_1_1 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_1))), + weights_offset_vec); + weights_val_2_0 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_2))), + weights_offset_vec); + weights_val_2_1 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_2))), + weights_offset_vec); + weights_val_3_0 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_3))), + weights_offset_vec); + weights_val_3_1 = vaddq_s16( + vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_3))), + weights_offset_vec); + acc_0 = vmlal_s16(acc_0, vget_low_s16(weights_val_0_0), + vget_low_s16(input_val_0)); + acc_1 = vmlal_s16(acc_1, vget_low_s16(weights_val_1_0), + vget_low_s16(input_val_0)); + acc_2 = vmlal_s16(acc_2, vget_low_s16(weights_val_2_0), + vget_low_s16(input_val_0)); + acc_3 = vmlal_s16(acc_3, vget_low_s16(weights_val_3_0), + vget_low_s16(input_val_0)); + acc_0 = vmlal_s16(acc_0, vget_high_s16(weights_val_0_0), + vget_high_s16(input_val_0)); + acc_1 = vmlal_s16(acc_1, vget_high_s16(weights_val_1_0), + vget_high_s16(input_val_0)); + acc_2 = vmlal_s16(acc_2, vget_high_s16(weights_val_2_0), + vget_high_s16(input_val_0)); + acc_3 = vmlal_s16(acc_3, vget_high_s16(weights_val_3_0), + vget_high_s16(input_val_0)); + acc_0 = vmlal_s16(acc_0, vget_low_s16(weights_val_0_1), + vget_low_s16(input_val_1)); + acc_1 = vmlal_s16(acc_1, vget_low_s16(weights_val_1_1), + vget_low_s16(input_val_1)); + acc_2 = vmlal_s16(acc_2, vget_low_s16(weights_val_2_1), + vget_low_s16(input_val_1)); + acc_3 = vmlal_s16(acc_3, vget_low_s16(weights_val_3_1), + vget_low_s16(input_val_1)); + acc_0 = vmlal_s16(acc_0, vget_high_s16(weights_val_0_1), + vget_high_s16(input_val_1)); + acc_1 = vmlal_s16(acc_1, vget_high_s16(weights_val_1_1), + vget_high_s16(input_val_1)); + acc_2 = vmlal_s16(acc_2, vget_high_s16(weights_val_2_1), + vget_high_s16(input_val_1)); + acc_3 = vmlal_s16(acc_3, vget_high_s16(weights_val_3_1), + vget_high_s16(input_val_1)); + } + // Handle 8 levels of depth at a time. + for (; in < input_size; in += 8) { + const uint8x8_t input_val_u8 = vld1_u8(input_data + in); + const uint8* weights_ptr = weights_data + in + out * input_size; + uint8x8_t weights_val_u8_0 = vld1_u8(weights_ptr + 0 * input_size); + uint8x8_t weights_val_u8_1 = vld1_u8(weights_ptr + 1 * input_size); + uint8x8_t weights_val_u8_2 = vld1_u8(weights_ptr + 2 * input_size); + uint8x8_t weights_val_u8_3 = vld1_u8(weights_ptr + 3 * input_size); + int16x8_t input_val; + input_val = vreinterpretq_s16_u16(vmovl_u8(input_val_u8)); + input_val = vaddq_s16(input_val, input_offset_vec); + int16x8_t weights_val_0, weights_val_1, weights_val_2, weights_val_3; + weights_val_0 = + vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_0)), + weights_offset_vec); + weights_val_1 = + vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_1)), + weights_offset_vec); + weights_val_2 = + vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_2)), + weights_offset_vec); + weights_val_3 = + vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_3)), + weights_offset_vec); + acc_0 = vmlal_s16(acc_0, vget_low_s16(weights_val_0), + vget_low_s16(input_val)); + acc_1 = vmlal_s16(acc_1, vget_low_s16(weights_val_1), + vget_low_s16(input_val)); + acc_2 = vmlal_s16(acc_2, vget_low_s16(weights_val_2), + vget_low_s16(input_val)); + acc_3 = vmlal_s16(acc_3, vget_low_s16(weights_val_3), + vget_low_s16(input_val)); + acc_0 = vmlal_s16(acc_0, vget_high_s16(weights_val_0), + vget_high_s16(input_val)); + acc_1 = vmlal_s16(acc_1, vget_high_s16(weights_val_1), + vget_high_s16(input_val)); + acc_2 = vmlal_s16(acc_2, vget_high_s16(weights_val_2), + vget_high_s16(input_val)); + acc_3 = vmlal_s16(acc_3, vget_high_s16(weights_val_3), + vget_high_s16(input_val)); + } + // Horizontally reduce accumulators + int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, + pairwise_reduced_acc_2, pairwise_reduced_acc_3; + pairwise_reduced_acc_0 = + vpadd_s32(vget_low_s32(acc_0), vget_high_s32(acc_0)); + pairwise_reduced_acc_1 = + vpadd_s32(vget_low_s32(acc_1), vget_high_s32(acc_1)); + pairwise_reduced_acc_2 = + vpadd_s32(vget_low_s32(acc_2), vget_high_s32(acc_2)); + pairwise_reduced_acc_3 = + vpadd_s32(vget_low_s32(acc_3), vget_high_s32(acc_3)); + const int32x2_t reduced_lo = + vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); + const int32x2_t reduced_hi = + vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); + int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); + // Add bias values. + int32x4_t bias_vec = vld1q_s32(bias_ptr); + bias_ptr += 4; + reduced = vaddq_s32(reduced, bias_vec); + int left_shift = accum_shift > 0 ? accum_shift : 0; + int right_shift = accum_shift > 0 ? 0 : -accum_shift; + reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); + // Multiply by the fixed-point multiplier. + reduced = vqrdmulhq_n_s32(reduced, accum_multiplier); + // Rounding-shift-right. + using gemmlowp::RoundingDivideByPOT; + reduced = RoundingDivideByPOT(reduced, right_shift); + // Narrow values down to 16 bit signed. + const int16x4_t res16 = vqmovn_s32(reduced); + vst1_s16(output_ptr, res16); + output_ptr += 4; + } +} +#endif + inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, const float* weights_data, const Dims<4>& weights_dims, const float* bias_data, @@ -2478,198 +2670,6 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, output_state_map.tanh(); } -#ifdef GEMMLOWP_NEON -// In the common case of batch size 1, a fully-connected node degenerates -// to a matrix*vector product. LSTM cells contain a fully-connected node; -// when quantized, this becomes a special type of GEMV operation where -// the output is 16bit-quantized, thus needs its own special path. -inline void GEMVForLstmCell(const uint8* input_data, const Dims<4>& input_dims, - const uint8* weights_data, - const Dims<4>& weights_dims, - uint8 weights_zero_point, const int32* bias_data, - const Dims<4>& bias_dims, int32 accum_multiplier, - int accum_shift, int16* output_data, - const Dims<4>& output_dims) { - gemmlowp::ScopedProfilingLabel label("GEMVForLstmCell"); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(output_dims)); - TFLITE_DCHECK_EQ(ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * - ArraySize(output_dims, 3), - 1); - const int input_size = input_dims.strides[3]; - const int output_size = MatchingArraySize(weights_dims, 1, output_dims, 0); - // This special fast path for quantized LSTM cells does not try to support - // odd sizes that we haven't encountered in any LSTM cell, that would - // require special code (that would go untested until any LSTM cell - // exercises it). We just guard our assumptions about size evenness with - // the following assertions. - TFLITE_DCHECK(!(output_size % 4)); - TFLITE_DCHECK(!(input_size % 8)); - const int32* bias_ptr = bias_data; - int16* output_ptr = output_data; - for (int out = 0; out < output_size; out += 4) { - int32x4_t acc_0 = vdupq_n_s32(0); - int32x4_t acc_1 = vdupq_n_s32(0); - int32x4_t acc_2 = vdupq_n_s32(0); - int32x4_t acc_3 = vdupq_n_s32(0); - const int16x8_t input_offset_vec = vdupq_n_s16(-128); - const int16x8_t weights_offset_vec = vdupq_n_s16(-weights_zero_point); - int in = 0; - // Handle 16 levels of depth at a time. - for (; in <= input_size - 16; in += 16) { - const uint8x16_t input_val_u8 = vld1q_u8(input_data + in); - const uint8* weights_ptr = weights_data + in + out * input_size; - uint8x16_t weights_val_u8_0 = vld1q_u8(weights_ptr + 0 * input_size); - uint8x16_t weights_val_u8_1 = vld1q_u8(weights_ptr + 1 * input_size); - uint8x16_t weights_val_u8_2 = vld1q_u8(weights_ptr + 2 * input_size); - uint8x16_t weights_val_u8_3 = vld1q_u8(weights_ptr + 3 * input_size); - int16x8_t input_val_0, input_val_1; - const uint8x8_t low = vget_low_u8(input_val_u8); - const uint8x8_t high = vget_high_u8(input_val_u8); - input_val_0 = vreinterpretq_s16_u16(vmovl_u8(low)); - input_val_1 = vreinterpretq_s16_u16(vmovl_u8(high)); - input_val_0 = vaddq_s16(input_val_0, input_offset_vec); - input_val_1 = vaddq_s16(input_val_1, input_offset_vec); - int16x8_t weights_val_0_0, weights_val_1_0, weights_val_2_0, - weights_val_3_0; - int16x8_t weights_val_0_1, weights_val_1_1, weights_val_2_1, - weights_val_3_1; - weights_val_0_0 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_0))), - weights_offset_vec); - weights_val_0_1 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_0))), - weights_offset_vec); - weights_val_1_0 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_1))), - weights_offset_vec); - weights_val_1_1 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_1))), - weights_offset_vec); - weights_val_2_0 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_2))), - weights_offset_vec); - weights_val_2_1 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_2))), - weights_offset_vec); - weights_val_3_0 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(weights_val_u8_3))), - weights_offset_vec); - weights_val_3_1 = vaddq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(weights_val_u8_3))), - weights_offset_vec); - acc_0 = vmlal_s16(acc_0, vget_low_s16(weights_val_0_0), - vget_low_s16(input_val_0)); - acc_1 = vmlal_s16(acc_1, vget_low_s16(weights_val_1_0), - vget_low_s16(input_val_0)); - acc_2 = vmlal_s16(acc_2, vget_low_s16(weights_val_2_0), - vget_low_s16(input_val_0)); - acc_3 = vmlal_s16(acc_3, vget_low_s16(weights_val_3_0), - vget_low_s16(input_val_0)); - acc_0 = vmlal_s16(acc_0, vget_high_s16(weights_val_0_0), - vget_high_s16(input_val_0)); - acc_1 = vmlal_s16(acc_1, vget_high_s16(weights_val_1_0), - vget_high_s16(input_val_0)); - acc_2 = vmlal_s16(acc_2, vget_high_s16(weights_val_2_0), - vget_high_s16(input_val_0)); - acc_3 = vmlal_s16(acc_3, vget_high_s16(weights_val_3_0), - vget_high_s16(input_val_0)); - acc_0 = vmlal_s16(acc_0, vget_low_s16(weights_val_0_1), - vget_low_s16(input_val_1)); - acc_1 = vmlal_s16(acc_1, vget_low_s16(weights_val_1_1), - vget_low_s16(input_val_1)); - acc_2 = vmlal_s16(acc_2, vget_low_s16(weights_val_2_1), - vget_low_s16(input_val_1)); - acc_3 = vmlal_s16(acc_3, vget_low_s16(weights_val_3_1), - vget_low_s16(input_val_1)); - acc_0 = vmlal_s16(acc_0, vget_high_s16(weights_val_0_1), - vget_high_s16(input_val_1)); - acc_1 = vmlal_s16(acc_1, vget_high_s16(weights_val_1_1), - vget_high_s16(input_val_1)); - acc_2 = vmlal_s16(acc_2, vget_high_s16(weights_val_2_1), - vget_high_s16(input_val_1)); - acc_3 = vmlal_s16(acc_3, vget_high_s16(weights_val_3_1), - vget_high_s16(input_val_1)); - } - // Handle 8 levels of depth at a time. - for (; in < input_size; in += 8) { - const uint8x8_t input_val_u8 = vld1_u8(input_data + in); - const uint8* weights_ptr = weights_data + in + out * input_size; - uint8x8_t weights_val_u8_0 = vld1_u8(weights_ptr + 0 * input_size); - uint8x8_t weights_val_u8_1 = vld1_u8(weights_ptr + 1 * input_size); - uint8x8_t weights_val_u8_2 = vld1_u8(weights_ptr + 2 * input_size); - uint8x8_t weights_val_u8_3 = vld1_u8(weights_ptr + 3 * input_size); - int16x8_t input_val; - input_val = vreinterpretq_s16_u16(vmovl_u8(input_val_u8)); - input_val = vaddq_s16(input_val, input_offset_vec); - int16x8_t weights_val_0, weights_val_1, weights_val_2, weights_val_3; - weights_val_0 = - vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_0)), - weights_offset_vec); - weights_val_1 = - vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_1)), - weights_offset_vec); - weights_val_2 = - vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_2)), - weights_offset_vec); - weights_val_3 = - vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(weights_val_u8_3)), - weights_offset_vec); - acc_0 = vmlal_s16(acc_0, vget_low_s16(weights_val_0), - vget_low_s16(input_val)); - acc_1 = vmlal_s16(acc_1, vget_low_s16(weights_val_1), - vget_low_s16(input_val)); - acc_2 = vmlal_s16(acc_2, vget_low_s16(weights_val_2), - vget_low_s16(input_val)); - acc_3 = vmlal_s16(acc_3, vget_low_s16(weights_val_3), - vget_low_s16(input_val)); - acc_0 = vmlal_s16(acc_0, vget_high_s16(weights_val_0), - vget_high_s16(input_val)); - acc_1 = vmlal_s16(acc_1, vget_high_s16(weights_val_1), - vget_high_s16(input_val)); - acc_2 = vmlal_s16(acc_2, vget_high_s16(weights_val_2), - vget_high_s16(input_val)); - acc_3 = vmlal_s16(acc_3, vget_high_s16(weights_val_3), - vget_high_s16(input_val)); - } - // Horizontally reduce accumulators - int32x2_t pairwise_reduced_acc_0, pairwise_reduced_acc_1, - pairwise_reduced_acc_2, pairwise_reduced_acc_3; - pairwise_reduced_acc_0 = - vpadd_s32(vget_low_s32(acc_0), vget_high_s32(acc_0)); - pairwise_reduced_acc_1 = - vpadd_s32(vget_low_s32(acc_1), vget_high_s32(acc_1)); - pairwise_reduced_acc_2 = - vpadd_s32(vget_low_s32(acc_2), vget_high_s32(acc_2)); - pairwise_reduced_acc_3 = - vpadd_s32(vget_low_s32(acc_3), vget_high_s32(acc_3)); - const int32x2_t reduced_lo = - vpadd_s32(pairwise_reduced_acc_0, pairwise_reduced_acc_1); - const int32x2_t reduced_hi = - vpadd_s32(pairwise_reduced_acc_2, pairwise_reduced_acc_3); - int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); - // Add bias values. - int32x4_t bias_vec = vld1q_s32(bias_ptr); - bias_ptr += 4; - reduced = vaddq_s32(reduced, bias_vec); - int left_shift = accum_shift > 0 ? accum_shift : 0; - int right_shift = accum_shift > 0 ? 0 : -accum_shift; - reduced = vshlq_s32(reduced, vdupq_n_s32(left_shift)); - // Multiply by the fixed-point multiplier. - reduced = vqrdmulhq_n_s32(reduced, accum_multiplier); - // Rounding-shift-right. - using gemmlowp::RoundingDivideByPOT; - reduced = RoundingDivideByPOT(reduced, right_shift); - // Narrow values down to 16 bit signed. - const int16x4_t res16 = vqmovn_s32(reduced); - vst1_s16(output_ptr, res16); - output_ptr += 4; - } -} -#endif - // Quantized LSTM cell. Currently just a copy of the reference impl in // reference_ops.h. See the big function comment there, not replicating it // here. -- GitLab From 4c909d283d7efab3e0dde68eb27d31d68407e207 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 13:11:18 -0700 Subject: [PATCH 501/960] Add header guard to lstm_utils. PiperOrigin-RevId: 190512302 --- .../contrib/lite/toco/graph_transformations/lstm_utils.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/lstm_utils.h b/tensorflow/contrib/lite/toco/graph_transformations/lstm_utils.h index 881c2d4dc8..4a9974ed4e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/lstm_utils.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/lstm_utils.h @@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_LSTM_UTILS_H_ +#define TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_LSTM_UTILS_H_ + #include #include #include @@ -100,3 +103,5 @@ bool GetMatchingRnnArray(Model* model, const string& back_edge_source_array, string* rnn_array); } // namespace toco + +#endif // TENSORFLOW_CONTRIB_LITE_TOCO_GRAPH_TRANSFORMATIONS_LSTM_UTILS_H_ -- GitLab From 3f708534f7fa5d548c2ccd0a77a229a815868e8f Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Mar 2018 13:15:53 -0700 Subject: [PATCH 502/960] Internal change. PiperOrigin-RevId: 190512928 --- tensorflow/contrib/distributions/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index e9c827a618..1c381cc354 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -490,6 +490,7 @@ cuda_py_test( "manual", "noasan", "noguitar", + "optonly", ], ) -- GitLab From 5427f60f69c3f22bc5e40b3c51a484dd3af504fb Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 26 Mar 2018 13:25:58 -0700 Subject: [PATCH 503/960] Add additional protobuf imports. PiperOrigin-RevId: 190514839 --- tensorflow/core/platform/default/protobuf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/platform/default/protobuf.h b/tensorflow/core/platform/default/protobuf.h index 03d8b6c238..c732c76ff7 100644 --- a/tensorflow/core/platform/default/protobuf.h +++ b/tensorflow/core/platform/default/protobuf.h @@ -22,6 +22,7 @@ limitations under the License. #include "google/protobuf/arena.h" #include "google/protobuf/compiler/importer.h" #include "google/protobuf/descriptor.h" +#include "google/protobuf/dynamic_message.h" #include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/zero_copy_stream.h" #include "google/protobuf/io/zero_copy_stream_impl_lite.h" -- GitLab From 54cc8b35f1062f385f0e97c397e1ae96c91c9f62 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 13:30:17 -0700 Subject: [PATCH 504/960] Global rename of py2tf to autograph PiperOrigin-RevId: 190515509 --- tensorflow/BUILD | 12 ++-- tensorflow/contrib/BUILD | 2 +- tensorflow/contrib/{py2tf => autograph}/BUILD | 8 +-- .../contrib/{py2tf => autograph}/README.md | 2 +- .../contrib/{py2tf => autograph}/__init__.py | 20 +++--- .../{py2tf => autograph}/converters/BUILD | 15 +++-- .../converters/__init__.py | 2 +- .../converters/asserts.py | 4 +- .../converters/asserts_test.py | 4 +- .../converters/break_statements.py | 8 +-- .../converters/break_statements_test.py | 4 +- .../converters/builtin_functions.py | 8 +-- .../converters/builtin_functions_test.py | 4 +- .../converters/call_trees.py | 21 +++--- .../converters/call_trees_test.py | 4 +- .../converters/continue_statements.py | 8 +-- .../converters/continue_statements_test.py | 4 +- .../converters/control_flow.py | 16 ++--- .../converters/control_flow_test.py | 4 +- .../converters/converter_test_base.py | 22 +++---- .../converters/decorators.py | 4 +- .../converters/decorators_test.py | 6 +- .../converters/for_loops.py | 20 +++--- .../converters/for_loops_test.py | 4 +- .../{py2tf => autograph}/converters/ifexp.py | 6 +- .../converters/ifexp_test.py | 18 ++--- .../converters/list_comprehension.py | 6 +- .../converters/list_comprehension_test.py | 4 +- .../{py2tf => autograph}/converters/lists.py | 8 +-- .../converters/lists_test.py | 6 +- .../converters/logical_expressions.py | 12 ++-- .../converters/logical_expressions_test.py | 4 +- .../converters/name_scopes.py | 4 +- .../converters/name_scopes_test.py | 4 +- .../converters/side_effect_guards.py | 18 ++--- .../converters/side_effect_guards_test.py | 4 +- .../converters/single_return.py | 12 ++-- .../converters/single_return_test.py | 4 +- .../contrib/{py2tf => autograph}/impl/BUILD | 10 +-- .../contrib/{py2tf => autograph}/impl/api.py | 16 ++--- .../{py2tf => autograph}/impl/api_test.py | 18 +++-- .../{py2tf => autograph}/impl/config.py | 16 ++--- .../{py2tf => autograph}/impl/conversion.py | 66 +++++++++---------- .../impl/conversion_test.py | 4 +- .../{py2tf => autograph}/impl/naming.py | 2 +- .../{py2tf => autograph}/impl/naming_test.py | 2 +- .../contrib/{py2tf => autograph}/pyct/BUILD | 0 .../{py2tf => autograph}/pyct/__init__.py | 0 .../contrib/{py2tf => autograph}/pyct/anno.py | 0 .../{py2tf => autograph}/pyct/anno_test.py | 2 +- .../{py2tf => autograph}/pyct/ast_util.py | 2 +- .../pyct/ast_util_test.py | 8 +-- .../{py2tf => autograph}/pyct/compiler.py | 0 .../pyct/compiler_test.py | 4 +- .../{py2tf => autograph}/pyct/context.py | 0 .../pyct/inspect_utils.py | 0 .../pyct/inspect_utils_test.py | 2 +- .../{py2tf => autograph}/pyct/parser.py | 0 .../{py2tf => autograph}/pyct/parser_test.py | 2 +- .../pyct/pretty_printer.py | 0 .../pyct/pretty_printer_test.py | 2 +- .../{py2tf => autograph}/pyct/qual_names.py | 2 +- .../pyct/qual_names_test.py | 10 +-- .../pyct/static_analysis/BUILD | 10 +-- .../pyct/static_analysis/__init__.py | 0 .../pyct/static_analysis/activity.py | 8 +-- .../pyct/static_analysis/activity_test.py | 14 ++-- .../pyct/static_analysis/annos.py | 0 .../pyct/static_analysis/live_values.py | 6 +- .../pyct/static_analysis/live_values_test.py | 14 ++-- .../pyct/static_analysis/type_info.py | 4 +- .../pyct/static_analysis/type_info_test.py | 16 ++--- .../{py2tf => autograph}/pyct/templates.py | 6 +- .../pyct/templates_test.py | 6 +- .../{py2tf => autograph}/pyct/transformer.py | 12 ++-- .../contrib/{py2tf => autograph}/utils/BUILD | 0 .../contrib/autograph/utils/__init__.py | 36 ++++++++++ .../{py2tf => autograph}/utils/builtins.py | 4 +- .../utils/builtins_test.py | 2 +- .../utils/context_managers.py | 0 .../utils/context_managers_test.py | 2 +- .../{py2tf => autograph}/utils/misc.py | 0 .../{py2tf => autograph}/utils/misc_test.py | 2 +- .../utils/multiple_dispatch.py | 4 +- .../utils/multiple_dispatch_test.py | 2 +- .../{py2tf => autograph}/utils/py_func.py | 5 +- .../utils/py_func_test.py | 2 +- .../{py2tf => autograph}/utils/tensor_list.py | 0 .../utils/tensor_list_test.py | 4 +- .../{py2tf => autograph}/utils/testing.py | 0 .../{py2tf => autograph}/utils/type_check.py | 2 +- .../utils/type_check_test.py | 2 +- .../{py2tf => autograph}/utils/type_hints.py | 0 tensorflow/contrib/py2tf/utils/__init__.py | 36 ---------- tensorflow/tools/pip_package/BUILD | 12 ++-- 95 files changed, 346 insertions(+), 349 deletions(-) rename tensorflow/contrib/{py2tf => autograph}/BUILD (75%) rename tensorflow/contrib/{py2tf => autograph}/README.md (87%) rename tensorflow/contrib/{py2tf => autograph}/__init__.py (64%) rename tensorflow/contrib/{py2tf => autograph}/converters/BUILD (92%) rename tensorflow/contrib/{py2tf => autograph}/converters/__init__.py (95%) rename tensorflow/contrib/{py2tf => autograph}/converters/asserts.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/asserts_test.py (90%) rename tensorflow/contrib/{py2tf => autograph}/converters/break_statements.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/break_statements_test.py (95%) rename tensorflow/contrib/{py2tf => autograph}/converters/builtin_functions.py (92%) rename tensorflow/contrib/{py2tf => autograph}/converters/builtin_functions_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/converters/call_trees.py (94%) rename tensorflow/contrib/{py2tf => autograph}/converters/call_trees_test.py (97%) rename tensorflow/contrib/{py2tf => autograph}/converters/continue_statements.py (94%) rename tensorflow/contrib/{py2tf => autograph}/converters/continue_statements_test.py (95%) rename tensorflow/contrib/{py2tf => autograph}/converters/control_flow.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/control_flow_test.py (95%) rename tensorflow/contrib/{py2tf => autograph}/converters/converter_test_base.py (85%) rename tensorflow/contrib/{py2tf => autograph}/converters/decorators.py (96%) rename tensorflow/contrib/{py2tf => autograph}/converters/decorators_test.py (95%) rename tensorflow/contrib/{py2tf => autograph}/converters/for_loops.py (80%) rename tensorflow/contrib/{py2tf => autograph}/converters/for_loops_test.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/ifexp.py (88%) rename tensorflow/contrib/{py2tf => autograph}/converters/ifexp_test.py (86%) rename tensorflow/contrib/{py2tf => autograph}/converters/list_comprehension.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/list_comprehension_test.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/lists.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/lists_test.py (90%) rename tensorflow/contrib/{py2tf => autograph}/converters/logical_expressions.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/logical_expressions_test.py (92%) rename tensorflow/contrib/{py2tf => autograph}/converters/name_scopes.py (93%) rename tensorflow/contrib/{py2tf => autograph}/converters/name_scopes_test.py (95%) rename tensorflow/contrib/{py2tf => autograph}/converters/side_effect_guards.py (91%) rename tensorflow/contrib/{py2tf => autograph}/converters/side_effect_guards_test.py (97%) rename tensorflow/contrib/{py2tf => autograph}/converters/single_return.py (96%) rename tensorflow/contrib/{py2tf => autograph}/converters/single_return_test.py (97%) rename tensorflow/contrib/{py2tf => autograph}/impl/BUILD (82%) rename tensorflow/contrib/{py2tf => autograph}/impl/api.py (95%) rename tensorflow/contrib/{py2tf => autograph}/impl/api_test.py (92%) rename tensorflow/contrib/{py2tf => autograph}/impl/config.py (79%) rename tensorflow/contrib/{py2tf => autograph}/impl/conversion.py (84%) rename tensorflow/contrib/{py2tf => autograph}/impl/conversion_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/impl/naming.py (98%) rename tensorflow/contrib/{py2tf => autograph}/impl/naming_test.py (98%) rename tensorflow/contrib/{py2tf => autograph}/pyct/BUILD (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/__init__.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/anno.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/anno_test.py (97%) rename tensorflow/contrib/{py2tf => autograph}/pyct/ast_util.py (98%) rename tensorflow/contrib/{py2tf => autograph}/pyct/ast_util_test.py (93%) rename tensorflow/contrib/{py2tf => autograph}/pyct/compiler.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/compiler_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/pyct/context.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/inspect_utils.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/inspect_utils_test.py (98%) rename tensorflow/contrib/{py2tf => autograph}/pyct/parser.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/parser_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/pyct/pretty_printer.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/pretty_printer_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/pyct/qual_names.py (99%) rename tensorflow/contrib/{py2tf => autograph}/pyct/qual_names_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/BUILD (83%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/__init__.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/activity.py (97%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/activity_test.py (95%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/annos.py (100%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/live_values.py (96%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/live_values_test.py (89%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/type_info.py (98%) rename tensorflow/contrib/{py2tf => autograph}/pyct/static_analysis/type_info_test.py (93%) rename tensorflow/contrib/{py2tf => autograph}/pyct/templates.py (98%) rename tensorflow/contrib/{py2tf => autograph}/pyct/templates_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/pyct/transformer.py (89%) rename tensorflow/contrib/{py2tf => autograph}/utils/BUILD (100%) create mode 100644 tensorflow/contrib/autograph/utils/__init__.py rename tensorflow/contrib/{py2tf => autograph}/utils/builtins.py (98%) rename tensorflow/contrib/{py2tf => autograph}/utils/builtins_test.py (98%) rename tensorflow/contrib/{py2tf => autograph}/utils/context_managers.py (100%) rename tensorflow/contrib/{py2tf => autograph}/utils/context_managers_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/utils/misc.py (100%) rename tensorflow/contrib/{py2tf => autograph}/utils/misc_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/utils/multiple_dispatch.py (95%) rename tensorflow/contrib/{py2tf => autograph}/utils/multiple_dispatch_test.py (98%) rename tensorflow/contrib/{py2tf => autograph}/utils/py_func.py (97%) rename tensorflow/contrib/{py2tf => autograph}/utils/py_func_test.py (98%) rename tensorflow/contrib/{py2tf => autograph}/utils/tensor_list.py (100%) rename tensorflow/contrib/{py2tf => autograph}/utils/tensor_list_test.py (97%) rename tensorflow/contrib/{py2tf => autograph}/utils/testing.py (100%) rename tensorflow/contrib/{py2tf => autograph}/utils/type_check.py (95%) rename tensorflow/contrib/{py2tf => autograph}/utils/type_check_test.py (96%) rename tensorflow/contrib/{py2tf => autograph}/utils/type_hints.py (100%) delete mode 100644 tensorflow/contrib/py2tf/utils/__init__.py diff --git a/tensorflow/BUILD b/tensorflow/BUILD index c75bf8abab..b073adfee9 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -448,6 +448,12 @@ filegroup( "//tensorflow/contrib:all_files", "//tensorflow/contrib/all_reduce:all_files", "//tensorflow/contrib/android:all_files", + "//tensorflow/contrib/autograph:all_files", + "//tensorflow/contrib/autograph/converters:all_files", + "//tensorflow/contrib/autograph/impl:all_files", + "//tensorflow/contrib/autograph/pyct:all_files", + "//tensorflow/contrib/autograph/pyct/static_analysis:all_files", + "//tensorflow/contrib/autograph/utils:all_files", "//tensorflow/contrib/batching:all_files", "//tensorflow/contrib/bayesflow:all_files", "//tensorflow/contrib/boosted_trees:all_files", @@ -541,12 +547,6 @@ filegroup( "//tensorflow/contrib/opt:all_files", "//tensorflow/contrib/periodic_resample:all_files", "//tensorflow/contrib/predictor:all_files", - "//tensorflow/contrib/py2tf:all_files", - "//tensorflow/contrib/py2tf/converters:all_files", - "//tensorflow/contrib/py2tf/impl:all_files", - "//tensorflow/contrib/py2tf/pyct:all_files", - "//tensorflow/contrib/py2tf/pyct/static_analysis:all_files", - "//tensorflow/contrib/py2tf/utils:all_files", "//tensorflow/contrib/quantize:all_files", "//tensorflow/contrib/receptive_field:all_files", "//tensorflow/contrib/reduce_slice_ops:all_files", diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 2d7bbc016f..bdbd738906 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -79,7 +79,7 @@ py_library( "//tensorflow/contrib/predictor", "//tensorflow/contrib/quantization:quantization_py", "//tensorflow/contrib/quantize:quantize_graph", - "//tensorflow/contrib/py2tf", + "//tensorflow/contrib/autograph", "//tensorflow/contrib/receptive_field:receptive_field_py", "//tensorflow/contrib/reduce_slice_ops:reduce_slice_ops_py", "//tensorflow/contrib/remote_fused_graph/pylib:remote_fused_graph_ops_py", diff --git a/tensorflow/contrib/py2tf/BUILD b/tensorflow/contrib/autograph/BUILD similarity index 75% rename from tensorflow/contrib/py2tf/BUILD rename to tensorflow/contrib/autograph/BUILD index d91220f6dd..30dd846893 100644 --- a/tensorflow/contrib/py2tf/BUILD +++ b/tensorflow/contrib/autograph/BUILD @@ -15,16 +15,16 @@ filegroup( ) py_library( - name = "py2tf", + name = "autograph", srcs = [ "__init__.py", ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - "//tensorflow/contrib/py2tf/impl", - "//tensorflow/contrib/py2tf/pyct", - "//tensorflow/contrib/py2tf/utils", + "//tensorflow/contrib/autograph/impl", + "//tensorflow/contrib/autograph/pyct", + "//tensorflow/contrib/autograph/utils", "@gast_archive//:gast", "@six_archive//:six", ], diff --git a/tensorflow/contrib/py2tf/README.md b/tensorflow/contrib/autograph/README.md similarity index 87% rename from tensorflow/contrib/py2tf/README.md rename to tensorflow/contrib/autograph/README.md index cd50675ad5..7e84f237dc 100644 --- a/tensorflow/contrib/py2tf/README.md +++ b/tensorflow/contrib/autograph/README.md @@ -1,4 +1,4 @@ -# Py2TF +# Autograph A compiler for generating TensorFlow numeric and control flow ops from Python code. diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/autograph/__init__.py similarity index 64% rename from tensorflow/contrib/py2tf/__init__.py rename to tensorflow/contrib/autograph/__init__.py index a4b62a0976..a39f44b21a 100644 --- a/tensorflow/contrib/py2tf/__init__.py +++ b/tensorflow/contrib/autograph/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Py2TF compiles Python code into equivalent TensorFlow code. +"""Autograph compiles Python code into equivalent TensorFlow code. Equivalent here means that they have the same effect when executed. """ @@ -21,19 +21,19 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.impl.api import convert -from tensorflow.contrib.py2tf.impl.api import converted_call -from tensorflow.contrib.py2tf.impl.api import do_not_convert -from tensorflow.contrib.py2tf.impl.api import RunMode -from tensorflow.contrib.py2tf.impl.api import to_code -from tensorflow.contrib.py2tf.impl.api import to_graph -from tensorflow.contrib.py2tf.pyct.transformer import PyFlowParseError +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.impl.api import convert +from tensorflow.contrib.autograph.impl.api import converted_call +from tensorflow.contrib.autograph.impl.api import do_not_convert +from tensorflow.contrib.autograph.impl.api import RunMode +from tensorflow.contrib.autograph.impl.api import to_code +from tensorflow.contrib.autograph.impl.api import to_graph +from tensorflow.contrib.autograph.pyct.transformer import AutographParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'utils', 'convert', 'converted_call', 'do_not_convert', 'RunMode', - 'to_code', 'to_graph', 'PyFlowParseError' + 'to_code', 'to_graph', 'AutographParseError' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/autograph/converters/BUILD similarity index 92% rename from tensorflow/contrib/py2tf/converters/BUILD rename to tensorflow/contrib/autograph/converters/BUILD index f624c42686..608bd82722 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/autograph/converters/BUILD @@ -49,9 +49,9 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":converters", - "//tensorflow/contrib/py2tf/pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis", - "//tensorflow/contrib/py2tf/utils", + "//tensorflow/contrib/autograph/pyct", + "//tensorflow/contrib/autograph/pyct/static_analysis", + "//tensorflow/contrib/autograph/utils", "@gast_archive//:gast", "@six_archive//:six", ], @@ -89,11 +89,12 @@ py_test( py_test( name = "call_trees_test", + size = "large", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/impl", + "//tensorflow/contrib/autograph/impl", "//tensorflow/python:client_testlib", ], ) @@ -143,7 +144,7 @@ py_test( srcs = ["name_scopes_test.py"], deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/autograph/pyct", "//tensorflow/python:client_testlib", ], ) @@ -199,7 +200,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/autograph/pyct", "//tensorflow/python:client_testlib", ], ) @@ -210,7 +211,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/autograph/pyct", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/converters/__init__.py b/tensorflow/contrib/autograph/converters/__init__.py similarity index 95% rename from tensorflow/contrib/py2tf/converters/__init__.py rename to tensorflow/contrib/autograph/converters/__init__.py index ca10896ee5..e4e8eda42f 100644 --- a/tensorflow/contrib/py2tf/converters/__init__.py +++ b/tensorflow/contrib/autograph/converters/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Code converters used by Py2TF.""" +"""Code converters used by Autograph.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/py2tf/converters/asserts.py b/tensorflow/contrib/autograph/converters/asserts.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/asserts.py rename to tensorflow/contrib/autograph/converters/asserts.py index 5b9b8e772b..f011a97ade 100644 --- a/tensorflow/contrib/py2tf/converters/asserts.py +++ b/tensorflow/contrib/autograph/converters/asserts.py @@ -20,8 +20,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer class AssertsTransformer(transformer.Base): diff --git a/tensorflow/contrib/py2tf/converters/asserts_test.py b/tensorflow/contrib/autograph/converters/asserts_test.py similarity index 90% rename from tensorflow/contrib/py2tf/converters/asserts_test.py rename to tensorflow/contrib/autograph/converters/asserts_test.py index 6611f2777a..cc913febe8 100644 --- a/tensorflow/contrib/py2tf/converters/asserts_test.py +++ b/tensorflow/contrib/autograph/converters/asserts_test.py @@ -20,8 +20,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.converters import asserts -from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.autograph.converters import asserts +from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/break_statements.py b/tensorflow/contrib/autograph/converters/break_statements.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/break_statements.py rename to tensorflow/contrib/autograph/converters/break_statements.py index bfb709c5e3..721bc0ccd0 100644 --- a/tensorflow/contrib/py2tf/converters/break_statements.py +++ b/tensorflow/contrib/autograph/converters/break_statements.py @@ -20,10 +20,10 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class BreakCanonicalizationTransformer(transformer.Base): diff --git a/tensorflow/contrib/py2tf/converters/break_statements_test.py b/tensorflow/contrib/autograph/converters/break_statements_test.py similarity index 95% rename from tensorflow/contrib/py2tf/converters/break_statements_test.py rename to tensorflow/contrib/autograph/converters/break_statements_test.py index 095fcdff07..dd4914a022 100644 --- a/tensorflow/contrib/py2tf/converters/break_statements_test.py +++ b/tensorflow/contrib/autograph/converters/break_statements_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import break_statements -from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.autograph.converters import break_statements +from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/autograph/converters/builtin_functions.py similarity index 92% rename from tensorflow/contrib/py2tf/converters/builtin_functions.py rename to tensorflow/contrib/autograph/converters/builtin_functions.py index f1129ef153..0349ce29ce 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/autograph/converters/builtin_functions.py @@ -20,8 +20,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer class BuiltinFunctionTransformer(transformer.Base): @@ -38,13 +38,13 @@ class BuiltinFunctionTransformer(transformer.Base): def _convert_builtin(self, node): template = """ - py2tf_utils.dynamic_builtin(func, args) + autograph_utils.dynamic_builtin(func, args) """ return templates.replace(template, func=node.func, args=node.args)[0].value def _convert_print(self, node): template = """ - py2tf_utils.dynamic_print(args) + autograph_utils.dynamic_print(args) """ return templates.replace(template, args=node.args)[0].value diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions_test.py b/tensorflow/contrib/autograph/converters/builtin_functions_test.py similarity index 96% rename from tensorflow/contrib/py2tf/converters/builtin_functions_test.py rename to tensorflow/contrib/autograph/converters/builtin_functions_test.py index eb60a1d8ae..ac7e756c47 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions_test.py +++ b/tensorflow/contrib/autograph/converters/builtin_functions_test.py @@ -22,8 +22,8 @@ import sys import six -from tensorflow.contrib.py2tf.converters import builtin_functions -from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.autograph.converters import builtin_functions +from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/autograph/converters/call_trees.py similarity index 94% rename from tensorflow/contrib/py2tf/converters/call_trees.py rename to tensorflow/contrib/autograph/converters/call_trees.py index f498b814bf..61f6bfd7e7 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/autograph/converters/call_trees.py @@ -27,12 +27,12 @@ import types import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import ast_util -from tensorflow.contrib.py2tf.pyct import inspect_utils -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import inspect_utils +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer from tensorflow.python.util import tf_inspect @@ -199,7 +199,7 @@ class CallTreeTransformer(transformer.Base): def _wrap_to_py_func_no_return(self, node): # TODO(mdan): Properly handle varargs, etc. template = """ - py2tf_utils.wrap_py_func(func, None, (args,), kwargs, True) + autograph_utils.wrap_py_func(func, None, (args,), kwargs, True) """ return templates.replace( template, @@ -210,7 +210,7 @@ class CallTreeTransformer(transformer.Base): def _wrap_to_py_func_single_return(self, node, dtype): # TODO(mdan): Properly handle varargs, etc. template = """ - py2tf_utils.wrap_py_func(func, dtype, (args,), kwargs, False) + autograph_utils.wrap_py_func(func, dtype, (args,), kwargs, False) """ return templates.replace_as_expression( template, @@ -238,10 +238,9 @@ class CallTreeTransformer(transformer.Base): # Before we could convert all the time though, we'd need a reasonable # caching mechanism. template = """ - py2tf_api.converted_call(func, True, False, {}, args) + autograph_api.converted_call(func, True, False, {}, args) """ - call_expr = templates.replace( - template, func=node.func, args=node.args) + call_expr = templates.replace(template, func=node.func, args=node.args) new_call = call_expr[0].value # TODO(mdan): Improve the template mechanism to better support this. new_call.keywords = node.keywords diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/autograph/converters/call_trees_test.py similarity index 97% rename from tensorflow/contrib/py2tf/converters/call_trees_test.py rename to tensorflow/contrib/autograph/converters/call_trees_test.py index 1106432da6..c666dcb73b 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/autograph/converters/call_trees_test.py @@ -20,8 +20,8 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.py2tf.converters import call_trees -from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.autograph.converters import call_trees +from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops diff --git a/tensorflow/contrib/py2tf/converters/continue_statements.py b/tensorflow/contrib/autograph/converters/continue_statements.py similarity index 94% rename from tensorflow/contrib/py2tf/converters/continue_statements.py rename to tensorflow/contrib/autograph/converters/continue_statements.py index 4069a678b1..4299a8a9d5 100644 --- a/tensorflow/contrib/py2tf/converters/continue_statements.py +++ b/tensorflow/contrib/autograph/converters/continue_statements.py @@ -18,10 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class ContinueCanonicalizationTransformer(transformer.Base): diff --git a/tensorflow/contrib/py2tf/converters/continue_statements_test.py b/tensorflow/contrib/autograph/converters/continue_statements_test.py similarity index 95% rename from tensorflow/contrib/py2tf/converters/continue_statements_test.py rename to tensorflow/contrib/autograph/converters/continue_statements_test.py index a598dcd1ae..bcbb316d74 100644 --- a/tensorflow/contrib/py2tf/converters/continue_statements_test.py +++ b/tensorflow/contrib/autograph/converters/continue_statements_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import continue_statements -from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.autograph.converters import continue_statements +from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/control_flow.py rename to tensorflow/contrib/autograph/converters/control_flow.py index 762c26f0c7..49d932026f 100644 --- a/tensorflow/contrib/py2tf/converters/control_flow.py +++ b/tensorflow/contrib/autograph/converters/control_flow.py @@ -20,11 +20,11 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import ast_util -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class SymbolNamer(object): @@ -82,7 +82,7 @@ class ControlFlowTransformer(transformer.Base): def _create_cond_expr(self, results, test, body_name, orelse_name): if results is not None: template = """ - results = py2tf_utils.run_cond(test, body_name, orelse_name) + results = autograph_utils.run_cond(test, body_name, orelse_name) """ return templates.replace( template, @@ -92,7 +92,7 @@ class ControlFlowTransformer(transformer.Base): orelse_name=orelse_name) else: template = """ - py2tf_utils.run_cond(test, body_name, orelse_name) + autograph_utils.run_cond(test, body_name, orelse_name) """ return templates.replace( template, test=test, body_name=body_name, orelse_name=orelse_name) @@ -204,7 +204,7 @@ class ControlFlowTransformer(transformer.Base): def body_name(state_ssf): body return state_ssf, - state_ast_tuple = py2tf_utils.run_while(test_name, body_name, [state]) + state_ast_tuple = autograph_utils.run_while(test_name, body_name, [state]) """ node = templates.replace( template, diff --git a/tensorflow/contrib/py2tf/converters/control_flow_test.py b/tensorflow/contrib/autograph/converters/control_flow_test.py similarity index 95% rename from tensorflow/contrib/py2tf/converters/control_flow_test.py rename to tensorflow/contrib/autograph/converters/control_flow_test.py index b785b284a7..86fed51f27 100644 --- a/tensorflow/contrib/py2tf/converters/control_flow_test.py +++ b/tensorflow/contrib/autograph/converters/control_flow_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import control_flow -from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.autograph.converters import control_flow +from tensorflow.contrib.autograph.converters import converter_test_base from tensorflow.python.framework import constant_op from tensorflow.python.ops import control_flow_ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/converter_test_base.py b/tensorflow/contrib/autograph/converters/converter_test_base.py similarity index 85% rename from tensorflow/contrib/py2tf/converters/converter_test_base.py rename to tensorflow/contrib/autograph/converters/converter_test_base.py index 8c08c5492a..3ea2cfd668 100644 --- a/tensorflow/contrib/py2tf/converters/converter_test_base.py +++ b/tensorflow/contrib/autograph/converters/converter_test_base.py @@ -21,15 +21,15 @@ from __future__ import print_function import contextlib import imp -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.pyct import compiler -from tensorflow.contrib.py2tf.pyct import context -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import pretty_printer -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct.static_analysis import activity -from tensorflow.contrib.py2tf.pyct.static_analysis import live_values -from tensorflow.contrib.py2tf.pyct.static_analysis import type_info +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.pyct import compiler +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import pretty_printer +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.static_analysis import activity +from tensorflow.contrib.autograph.pyct.static_analysis import live_values +from tensorflow.contrib.autograph.pyct.static_analysis import type_info from tensorflow.python.platform import test @@ -75,8 +75,8 @@ class TestCase(test.TestCase): try: result, source = compiler.ast_to_object(node) result.tf = self.make_fake_mod('fake_tf', *symbols) - result.py2tf_utils = utils - result.py2tf_api = self.make_fake_mod('fake_api', converted_call) + result.autograph_utils = utils + result.autograph_api = self.make_fake_mod('fake_api', converted_call) yield result except Exception: # pylint:disable=broad-except if source is None: diff --git a/tensorflow/contrib/py2tf/converters/decorators.py b/tensorflow/contrib/autograph/converters/decorators.py similarity index 96% rename from tensorflow/contrib/py2tf/converters/decorators.py rename to tensorflow/contrib/autograph/converters/decorators.py index 68bf241ef3..92445f3174 100644 --- a/tensorflow/contrib/py2tf/converters/decorators.py +++ b/tensorflow/contrib/autograph/converters/decorators.py @@ -24,8 +24,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import pretty_printer +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import pretty_printer class DecoratorsTransformer(gast.NodeTransformer): diff --git a/tensorflow/contrib/py2tf/converters/decorators_test.py b/tensorflow/contrib/autograph/converters/decorators_test.py similarity index 95% rename from tensorflow/contrib/py2tf/converters/decorators_test.py rename to tensorflow/contrib/autograph/converters/decorators_test.py index c75e546174..e67ab1cd6a 100644 --- a/tensorflow/contrib/py2tf/converters/decorators_test.py +++ b/tensorflow/contrib/autograph/converters/decorators_test.py @@ -20,9 +20,9 @@ from __future__ import print_function from functools import wraps -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import decorators -from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import decorators +from tensorflow.contrib.autograph.pyct import compiler from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/for_loops.py b/tensorflow/contrib/autograph/converters/for_loops.py similarity index 80% rename from tensorflow/contrib/py2tf/converters/for_loops.py rename to tensorflow/contrib/autograph/converters/for_loops.py index 8d28b149a8..4999c47bdc 100644 --- a/tensorflow/contrib/py2tf/converters/for_loops.py +++ b/tensorflow/contrib/autograph/converters/for_loops.py @@ -22,10 +22,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class ForLoopCanonicalizationTransformer(transformer.Base): @@ -45,12 +45,12 @@ class ForLoopCanonicalizationTransformer(transformer.Base): if anno.hasanno(node, 'extra_cond'): template = """ i = 0 - smart_loop_iter = py2tf_utils.dynamic_dataset(loop_iter) - cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) + smart_loop_iter = autograph_utils.dynamic_dataset(loop_iter) + cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) while cont and extra_cond: body i += 1 - cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) + cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) """ return templates.replace( template, @@ -64,12 +64,12 @@ class ForLoopCanonicalizationTransformer(transformer.Base): else: template = """ i = 0 - smart_loop_iter = py2tf_utils.dynamic_dataset(loop_iter) - cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) + smart_loop_iter = autograph_utils.dynamic_dataset(loop_iter) + cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) while cont: body i += 1 - cont, target = py2tf_utils.dynamic_for_cond(i, smart_loop_iter) + cont, target = autograph_utils.dynamic_for_cond(i, smart_loop_iter) """ repl = templates.replace( template, diff --git a/tensorflow/contrib/py2tf/converters/for_loops_test.py b/tensorflow/contrib/autograph/converters/for_loops_test.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/for_loops_test.py rename to tensorflow/contrib/autograph/converters/for_loops_test.py index b6e3e8c8d8..943f52de55 100644 --- a/tensorflow/contrib/py2tf/converters/for_loops_test.py +++ b/tensorflow/contrib/autograph/converters/for_loops_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import for_loops +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import for_loops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/ifexp.py b/tensorflow/contrib/autograph/converters/ifexp.py similarity index 88% rename from tensorflow/contrib/py2tf/converters/ifexp.py rename to tensorflow/contrib/autograph/converters/ifexp.py index 5fd6f348af..aff94d2b79 100644 --- a/tensorflow/contrib/py2tf/converters/ifexp.py +++ b/tensorflow/contrib/autograph/converters/ifexp.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer class IfExp(transformer.Base): @@ -27,7 +27,7 @@ class IfExp(transformer.Base): def visit_IfExp(self, node): template = """ - py2tf_utils.run_cond(test, lambda: body, lambda: orelse) + autograph_utils.run_cond(test, lambda: body, lambda: orelse) """ desugared_ifexp = templates.replace_as_expression( template, test=node.test, body=node.body, orelse=node.orelse) diff --git a/tensorflow/contrib/py2tf/converters/ifexp_test.py b/tensorflow/contrib/autograph/converters/ifexp_test.py similarity index 86% rename from tensorflow/contrib/py2tf/converters/ifexp_test.py rename to tensorflow/contrib/autograph/converters/ifexp_test.py index 9c357ef35b..ac6849dcb4 100644 --- a/tensorflow/contrib/py2tf/converters/ifexp_test.py +++ b/tensorflow/contrib/autograph/converters/ifexp_test.py @@ -18,9 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import ifexp +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import ifexp from tensorflow.python.platform import test @@ -38,7 +38,7 @@ class IfExpTest(converter_test_base.TestCase): return 1 if x else 0 with self.compiled_fn(test_fn) as result: - result.py2tf_util = utils + result.autograph_util = utils for x in [0, 1]: self.assertEqual(test_fn(x), result.test_fn(x)) @@ -52,7 +52,7 @@ class IfExpTest(converter_test_base.TestCase): return y with self.compiled_fn(test_fn) as result: - result.py2tf_util = utils + result.autograph_util = utils result.f = f for x in [-2, 2]: self.assertEqual(test_fn(x), result.test_fn(x)) @@ -63,7 +63,7 @@ class IfExpTest(converter_test_base.TestCase): return x * x if x > 0 else x with self.compiled_fn(test_fn) as result: - result.py2tf_util = utils + result.autograph_util = utils for x in [-2, 2]: self.assertEqual(test_fn(x), result.test_fn(x)) @@ -73,7 +73,7 @@ class IfExpTest(converter_test_base.TestCase): return x * x if x > 0 else x if x else 1 with self.compiled_fn(test_fn) as result: - result.py2tf_util = utils + result.autograph_util = utils for x in [-2, 0, 2]: self.assertEqual(test_fn(x), result.test_fn(x)) @@ -85,7 +85,7 @@ class IfExpTest(converter_test_base.TestCase): return -x with self.compiled_fn(test_fn) as result: - result.py2tf_util = utils + result.autograph_util = utils for x in [-2, 2, 5]: self.assertEqual(test_fn(x), result.test_fn(x)) @@ -97,7 +97,7 @@ class IfExpTest(converter_test_base.TestCase): return x with self.compiled_fn(test_fn) as result: - result.py2tf_util = utils + result.autograph_util = utils for x in [-2, 2, 5]: self.assertEqual(test_fn(x), result.test_fn(x)) diff --git a/tensorflow/contrib/py2tf/converters/list_comprehension.py b/tensorflow/contrib/autograph/converters/list_comprehension.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/list_comprehension.py rename to tensorflow/contrib/autograph/converters/list_comprehension.py index e874483110..d7f2920151 100644 --- a/tensorflow/contrib/py2tf/converters/list_comprehension.py +++ b/tensorflow/contrib/autograph/converters/list_comprehension.py @@ -31,9 +31,9 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer class ListCompCanonicalizationTransformer(transformer.Base): diff --git a/tensorflow/contrib/py2tf/converters/list_comprehension_test.py b/tensorflow/contrib/autograph/converters/list_comprehension_test.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/list_comprehension_test.py rename to tensorflow/contrib/autograph/converters/list_comprehension_test.py index 025fac11e4..4758671f5e 100644 --- a/tensorflow/contrib/py2tf/converters/list_comprehension_test.py +++ b/tensorflow/contrib/autograph/converters/list_comprehension_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import list_comprehension +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import list_comprehension from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/lists.py b/tensorflow/contrib/autograph/converters/lists.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/lists.py rename to tensorflow/contrib/autograph/converters/lists.py index 3e62037a50..234a0a7487 100644 --- a/tensorflow/contrib/py2tf/converters/lists.py +++ b/tensorflow/contrib/autograph/converters/lists.py @@ -32,9 +32,9 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer from tensorflow.python.framework import dtypes @@ -74,7 +74,7 @@ class ListTransformer(transformer.Base): if qn.qn[-1] == 'append' and (len(call_node.args) == 1): template = """ - target = py2tf_utils.dynamic_list_append(target, element) + target = autograph_utils.dynamic_list_append(target, element) """ node = templates.replace( template, diff --git a/tensorflow/contrib/py2tf/converters/lists_test.py b/tensorflow/contrib/autograph/converters/lists_test.py similarity index 90% rename from tensorflow/contrib/py2tf/converters/lists_test.py rename to tensorflow/contrib/autograph/converters/lists_test.py index 671a1cc7b1..749ba14347 100644 --- a/tensorflow/contrib/py2tf/converters/lists_test.py +++ b/tensorflow/contrib/autograph/converters/lists_test.py @@ -18,9 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import lists +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import lists from tensorflow.python.framework import dtypes from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions.py b/tensorflow/contrib/autograph/converters/logical_expressions.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/logical_expressions.py rename to tensorflow/contrib/autograph/converters/logical_expressions.py index e0abf74ebc..3a795a315a 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions.py +++ b/tensorflow/contrib/autograph/converters/logical_expressions.py @@ -23,10 +23,10 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer # TODO(mdan): Properly extrack boolean ops according to lazy eval rules. @@ -57,8 +57,8 @@ class LogicalExpressionTransformer(transformer.Base): gast.NotEq: 'tf.not_equal', gast.Or: 'tf.logical_or', gast.USub: 'tf.negative', - gast.Is: 'py2tf_utils.dynamic_is', - gast.IsNot: 'py2tf_utils.dynamic_is_not' + gast.Is: 'autograph_utils.dynamic_is', + gast.IsNot: 'autograph_utils.dynamic_is_not' } def _expect_simple_symbol(self, operand): diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions_test.py b/tensorflow/contrib/autograph/converters/logical_expressions_test.py similarity index 92% rename from tensorflow/contrib/py2tf/converters/logical_expressions_test.py rename to tensorflow/contrib/autograph/converters/logical_expressions_test.py index eb28c309a4..2814060c4d 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions_test.py +++ b/tensorflow/contrib/autograph/converters/logical_expressions_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import logical_expressions +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import logical_expressions from tensorflow.python.ops import math_ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/name_scopes.py b/tensorflow/contrib/autograph/converters/name_scopes.py similarity index 93% rename from tensorflow/contrib/py2tf/converters/name_scopes.py rename to tensorflow/contrib/autograph/converters/name_scopes.py index c702823fcf..2a3f474360 100644 --- a/tensorflow/contrib/py2tf/converters/name_scopes.py +++ b/tensorflow/contrib/autograph/converters/name_scopes.py @@ -21,8 +21,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer class FunctionNameScopeTransformer(transformer.Base): diff --git a/tensorflow/contrib/py2tf/converters/name_scopes_test.py b/tensorflow/contrib/autograph/converters/name_scopes_test.py similarity index 95% rename from tensorflow/contrib/py2tf/converters/name_scopes_test.py rename to tensorflow/contrib/autograph/converters/name_scopes_test.py index a8ca341602..61e5db2af8 100644 --- a/tensorflow/contrib/py2tf/converters/name_scopes_test.py +++ b/tensorflow/contrib/autograph/converters/name_scopes_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import name_scopes +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import name_scopes from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/converters/side_effect_guards.py b/tensorflow/contrib/autograph/converters/side_effect_guards.py similarity index 91% rename from tensorflow/contrib/py2tf/converters/side_effect_guards.py rename to tensorflow/contrib/autograph/converters/side_effect_guards.py index 30976b3ec6..1c1293d2c4 100644 --- a/tensorflow/contrib/py2tf/converters/side_effect_guards.py +++ b/tensorflow/contrib/autograph/converters/side_effect_guards.py @@ -36,12 +36,12 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import ast_util -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class SymbolNamer(object): @@ -160,8 +160,8 @@ class SideEffectGuardTransformer(transformer.Base): [alias_map.get(s, s).ast() for s in guarded_args], None) template = """ - with py2tf_utils.control_dependency_on_returns(call): - aliased_guarded_args = py2tf_utils.alias_tensors(guarded_args) + with autograph_utils.control_dependency_on_returns(call): + aliased_guarded_args = autograph_utils.alias_tensors(guarded_args) """ control_deps_guard = templates.replace( template, @@ -172,7 +172,7 @@ class SideEffectGuardTransformer(transformer.Base): alias_map = {} template = """ - with py2tf_utils.control_dependency_on_returns(call): + with autograph_utils.control_dependency_on_returns(call): pass """ control_deps_guard = templates.replace(template, call=node.value)[-1] diff --git a/tensorflow/contrib/py2tf/converters/side_effect_guards_test.py b/tensorflow/contrib/autograph/converters/side_effect_guards_test.py similarity index 97% rename from tensorflow/contrib/py2tf/converters/side_effect_guards_test.py rename to tensorflow/contrib/autograph/converters/side_effect_guards_test.py index 463db2e770..ce0ce33243 100644 --- a/tensorflow/contrib/py2tf/converters/side_effect_guards_test.py +++ b/tensorflow/contrib/autograph/converters/side_effect_guards_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import side_effect_guards +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import side_effect_guards from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/autograph/converters/single_return.py similarity index 96% rename from tensorflow/contrib/py2tf/converters/single_return.py rename to tensorflow/contrib/autograph/converters/single_return.py index 1194b98f5e..bcc9ca9dfe 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/autograph/converters/single_return.py @@ -20,11 +20,11 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import ast_util -from tensorflow.contrib.py2tf.pyct import templates -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import templates +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno # TODO(mdan): Move this logic into transformer_base. @@ -232,7 +232,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def visit_Return(self, node): if self.cant_return: raise ValueError( - 'Pyflow currently does not support `return` statements in loops. ' + '`return` statements are not supported in loops. ' 'Try assigning to a variable in the while loop, and returning ' 'outside of the loop') diff --git a/tensorflow/contrib/py2tf/converters/single_return_test.py b/tensorflow/contrib/autograph/converters/single_return_test.py similarity index 97% rename from tensorflow/contrib/py2tf/converters/single_return_test.py rename to tensorflow/contrib/autograph/converters/single_return_test.py index 2ea7a9d6d3..d483005a09 100644 --- a/tensorflow/contrib/py2tf/converters/single_return_test.py +++ b/tensorflow/contrib/autograph/converters/single_return_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import single_return +from tensorflow.contrib.autograph.converters import converter_test_base +from tensorflow.contrib.autograph.converters import single_return from tensorflow.python.framework.ops import name_scope from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/impl/BUILD b/tensorflow/contrib/autograph/impl/BUILD similarity index 82% rename from tensorflow/contrib/py2tf/impl/BUILD rename to tensorflow/contrib/autograph/impl/BUILD index cc49d71b78..e468176da1 100644 --- a/tensorflow/contrib/py2tf/impl/BUILD +++ b/tensorflow/contrib/autograph/impl/BUILD @@ -25,10 +25,10 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], deps = [ - "//tensorflow/contrib/py2tf/converters", - "//tensorflow/contrib/py2tf/pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis", - "//tensorflow/contrib/py2tf/utils", + "//tensorflow/contrib/autograph/converters", + "//tensorflow/contrib/autograph/pyct", + "//tensorflow/contrib/autograph/pyct/static_analysis", + "//tensorflow/contrib/autograph/utils", "@gast_archive//:gast", "@six_archive//:six", ], @@ -40,7 +40,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":impl", - "//tensorflow/contrib/py2tf/utils", + "//tensorflow/contrib/autograph/utils", "//tensorflow/python:client_testlib", "//third_party/py/numpy", ], diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/autograph/impl/api.py similarity index 95% rename from tensorflow/contrib/py2tf/impl/api.py rename to tensorflow/contrib/autograph/impl/api.py index a9e8ea2043..1c4fcaa622 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/autograph/impl/api.py @@ -27,13 +27,13 @@ import gast import six # pylint:enable=g-bad-import-order -from tensorflow.contrib.py2tf.impl import config -from tensorflow.contrib.py2tf.impl import conversion -from tensorflow.contrib.py2tf.pyct import compiler -from tensorflow.contrib.py2tf.pyct import inspect_utils -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.utils import builtins -from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.contrib.autograph.impl import config +from tensorflow.contrib.autograph.impl import conversion +from tensorflow.contrib.autograph.pyct import compiler +from tensorflow.contrib.autograph.pyct import inspect_utils +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.utils import builtins +from tensorflow.contrib.autograph.utils import py_func from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect @@ -89,7 +89,7 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None): Args: run_as: RunMode value. Whether to run the function as-is, or wrap it into a py_func. - return_dtypes: See py2tf.utils.py_func.wrap_py_func. Setting to None or + return_dtypes: See autograph.utils.py_func.wrap_py_func. Setting to None or empty list or tuple will create a dummy return value that can be used to set control dependencies. diff --git a/tensorflow/contrib/py2tf/impl/api_test.py b/tensorflow/contrib/autograph/impl/api_test.py similarity index 92% rename from tensorflow/contrib/py2tf/impl/api_test.py rename to tensorflow/contrib/autograph/impl/api_test.py index a7b1aba852..ee2d301d75 100644 --- a/tensorflow/contrib/py2tf/impl/api_test.py +++ b/tensorflow/contrib/autograph/impl/api_test.py @@ -20,11 +20,11 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.impl import api -from tensorflow.contrib.py2tf.impl import config -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.impl import api +from tensorflow.contrib.autograph.impl import config +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.utils import py_func from tensorflow.python.framework import constant_op from tensorflow.python.platform import test @@ -37,10 +37,8 @@ class ApiTest(test.TestCase): def setUp(self): config.COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', - 'from tensorflow.contrib.py2tf import utils as ' - 'py2tf_utils', - 'tf = py2tf_utils.fake_tf()' - ) + 'from tensorflow.contrib.autograph import utils as ' + 'autograph_utils', 'tf = autograph_utils.fake_tf()') def test_decorator_recurses(self): @@ -200,7 +198,7 @@ class ApiTest(test.TestCase): compiled_code = api.to_code(test_fn) # Just check for some key words and that it is parseable Python code. - self.assertRegexpMatches(compiled_code, 'py2tf_utils\\.run_while') + self.assertRegexpMatches(compiled_code, 'autograph_utils\\.run_while') self.assertIsNotNone(parser.parse_str(compiled_code)) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/autograph/impl/config.py similarity index 79% rename from tensorflow/contrib/py2tf/impl/config.py rename to tensorflow/contrib/autograph/impl/config.py index bdbc6663dd..543c1486e6 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/autograph/impl/config.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf import utils +from tensorflow.contrib.autograph import utils PYTHON_LITERALS = { @@ -35,16 +35,16 @@ DEFAULT_UNCOMPILED_MODULES = set(( # All of tensorflow's subpackages. Unlike the root tf module, they don't # have well-known names. Not refering to the module directly to avoid # circular imports. - (utils.__name__[:-len('.contrib.py2tf.utils')],), + ( + utils.__name__[:-len('.contrib.autograph.utils')],), )) NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan): Also allow controlling the generated names (for testability). COMPILED_IMPORT_STATEMENTS = ( - 'from __future__ import print_function', - 'import tensorflow as tf', - 'from tensorflow.contrib.py2tf.impl import api as ' - 'py2tf_api', - 'from tensorflow.contrib.py2tf import utils as ' - 'py2tf_utils') + 'from __future__ import print_function', 'import tensorflow as tf', + 'from tensorflow.contrib.autograph.impl import api as ' + 'autograph_api', + 'from tensorflow.contrib.autograph import utils as ' + 'autograph_utils') diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/autograph/impl/conversion.py similarity index 84% rename from tensorflow/contrib/py2tf/impl/conversion.py rename to tensorflow/contrib/autograph/impl/conversion.py index 37b24ab55f..62a49cd92d 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/autograph/impl/conversion.py @@ -20,31 +20,31 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.converters import asserts -from tensorflow.contrib.py2tf.converters import break_statements -from tensorflow.contrib.py2tf.converters import builtin_functions -from tensorflow.contrib.py2tf.converters import call_trees -from tensorflow.contrib.py2tf.converters import continue_statements -from tensorflow.contrib.py2tf.converters import control_flow -from tensorflow.contrib.py2tf.converters import decorators -from tensorflow.contrib.py2tf.converters import for_loops -from tensorflow.contrib.py2tf.converters import ifexp -from tensorflow.contrib.py2tf.converters import lists -from tensorflow.contrib.py2tf.converters import logical_expressions -from tensorflow.contrib.py2tf.converters import name_scopes -from tensorflow.contrib.py2tf.converters import side_effect_guards -from tensorflow.contrib.py2tf.converters import single_return -from tensorflow.contrib.py2tf.impl import config -from tensorflow.contrib.py2tf.impl import naming -from tensorflow.contrib.py2tf.pyct import context -from tensorflow.contrib.py2tf.pyct import inspect_utils -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct.static_analysis import activity -from tensorflow.contrib.py2tf.pyct.static_analysis import live_values -from tensorflow.contrib.py2tf.pyct.static_analysis import type_info -from tensorflow.contrib.py2tf.utils import type_hints +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.converters import asserts +from tensorflow.contrib.autograph.converters import break_statements +from tensorflow.contrib.autograph.converters import builtin_functions +from tensorflow.contrib.autograph.converters import call_trees +from tensorflow.contrib.autograph.converters import continue_statements +from tensorflow.contrib.autograph.converters import control_flow +from tensorflow.contrib.autograph.converters import decorators +from tensorflow.contrib.autograph.converters import for_loops +from tensorflow.contrib.autograph.converters import ifexp +from tensorflow.contrib.autograph.converters import lists +from tensorflow.contrib.autograph.converters import logical_expressions +from tensorflow.contrib.autograph.converters import name_scopes +from tensorflow.contrib.autograph.converters import side_effect_guards +from tensorflow.contrib.autograph.converters import single_return +from tensorflow.contrib.autograph.impl import config +from tensorflow.contrib.autograph.impl import naming +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import inspect_utils +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.static_analysis import activity +from tensorflow.contrib.autograph.pyct.static_analysis import live_values +from tensorflow.contrib.autograph.pyct.static_analysis import type_info +from tensorflow.contrib.autograph.utils import type_hints from tensorflow.python.util import tf_inspect @@ -213,19 +213,19 @@ def class_to_graph(c, conversion_map): def _add_self_references(namespace, api_module): """Self refs are only required for analysis and are not used directly.""" # Manually add the utils namespace which may be used from generated code. - if 'py2tf_util' not in namespace: - namespace['py2tf_utils'] = utils - elif namespace['py2tf_utils'] != utils: + if 'autograph_util' not in namespace: + namespace['autograph_utils'] = utils + elif namespace['autograph_utils'] != utils: raise ValueError( - 'The module name "py2tf_utils" is reserved and may not be used.') + 'The module name "autograph_utils" is reserved and may not be used.') # We also make reference to the api module for dynamic conversion, but # to avoid circular references we don't import it here. - if 'py2tf_api' not in namespace: - namespace['py2tf_api'] = api_module - elif namespace['py2tf_api'] != api_module: + if 'autograph_api' not in namespace: + namespace['autograph_api'] = api_module + elif namespace['autograph_api'] != api_module: raise ValueError( - 'The module name "py2tf_api" is reserved and may not be used.') + 'The module name "autograph_api" is reserved and may not be used.') def function_to_graph(f, conversion_map, arg_values, arg_types, diff --git a/tensorflow/contrib/py2tf/impl/conversion_test.py b/tensorflow/contrib/autograph/impl/conversion_test.py similarity index 96% rename from tensorflow/contrib/py2tf/impl/conversion_test.py rename to tensorflow/contrib/autograph/impl/conversion_test.py index 9ff256aace..7066739eb8 100644 --- a/tensorflow/contrib/py2tf/impl/conversion_test.py +++ b/tensorflow/contrib/autograph/impl/conversion_test.py @@ -20,8 +20,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.impl import conversion +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.impl import conversion from tensorflow.python.framework import constant_op from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/impl/naming.py b/tensorflow/contrib/autograph/impl/naming.py similarity index 98% rename from tensorflow/contrib/py2tf/impl/naming.py rename to tensorflow/contrib/autograph/impl/naming.py index 51326091de..1facaa0ca0 100644 --- a/tensorflow/contrib/py2tf/impl/naming.py +++ b/tensorflow/contrib/autograph/impl/naming.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.pyct import qual_names +from tensorflow.contrib.autograph.pyct import qual_names class Namer(object): diff --git a/tensorflow/contrib/py2tf/impl/naming_test.py b/tensorflow/contrib/autograph/impl/naming_test.py similarity index 98% rename from tensorflow/contrib/py2tf/impl/naming_test.py rename to tensorflow/contrib/autograph/impl/naming_test.py index beb4e54937..73fc089465 100644 --- a/tensorflow/contrib/py2tf/impl/naming_test.py +++ b/tensorflow/contrib/autograph/impl/naming_test.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.impl import naming +from tensorflow.contrib.autograph.impl import naming from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/BUILD b/tensorflow/contrib/autograph/pyct/BUILD similarity index 100% rename from tensorflow/contrib/py2tf/pyct/BUILD rename to tensorflow/contrib/autograph/pyct/BUILD diff --git a/tensorflow/contrib/py2tf/pyct/__init__.py b/tensorflow/contrib/autograph/pyct/__init__.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/__init__.py rename to tensorflow/contrib/autograph/pyct/__init__.py diff --git a/tensorflow/contrib/py2tf/pyct/anno.py b/tensorflow/contrib/autograph/pyct/anno.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/anno.py rename to tensorflow/contrib/autograph/pyct/anno.py diff --git a/tensorflow/contrib/py2tf/pyct/anno_test.py b/tensorflow/contrib/autograph/pyct/anno_test.py similarity index 97% rename from tensorflow/contrib/py2tf/pyct/anno_test.py rename to tensorflow/contrib/autograph/pyct/anno_test.py index 6c29918fdf..1d4d9d119e 100644 --- a/tensorflow/contrib/py2tf/pyct/anno_test.py +++ b/tensorflow/contrib/autograph/pyct/anno_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import ast -from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.autograph.pyct import anno from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/ast_util.py b/tensorflow/contrib/autograph/pyct/ast_util.py similarity index 98% rename from tensorflow/contrib/py2tf/pyct/ast_util.py rename to tensorflow/contrib/autograph/pyct/ast_util.py index 6f7e656c26..5a41b5e4a9 100644 --- a/tensorflow/contrib/py2tf/pyct/ast_util.py +++ b/tensorflow/contrib/autograph/pyct/ast_util.py @@ -22,7 +22,7 @@ import ast import gast -from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.autograph.pyct import anno class CleanCopier(gast.NodeVisitor): diff --git a/tensorflow/contrib/py2tf/pyct/ast_util_test.py b/tensorflow/contrib/autograph/pyct/ast_util_test.py similarity index 93% rename from tensorflow/contrib/py2tf/pyct/ast_util_test.py rename to tensorflow/contrib/autograph/pyct/ast_util_test.py index 8d123679e3..8faf92c705 100644 --- a/tensorflow/contrib/py2tf/pyct/ast_util_test.py +++ b/tensorflow/contrib/autograph/pyct/ast_util_test.py @@ -20,10 +20,10 @@ from __future__ import print_function import ast -from tensorflow.contrib.py2tf.pyct import ast_util -from tensorflow.contrib.py2tf.pyct import compiler -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names +from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import compiler +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/compiler.py b/tensorflow/contrib/autograph/pyct/compiler.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/compiler.py rename to tensorflow/contrib/autograph/pyct/compiler.py diff --git a/tensorflow/contrib/py2tf/pyct/compiler_test.py b/tensorflow/contrib/autograph/pyct/compiler_test.py similarity index 96% rename from tensorflow/contrib/py2tf/pyct/compiler_test.py rename to tensorflow/contrib/autograph/pyct/compiler_test.py index 243f4c8153..98cdc1506b 100644 --- a/tensorflow/contrib/py2tf/pyct/compiler_test.py +++ b/tensorflow/contrib/autograph/pyct/compiler_test.py @@ -22,8 +22,8 @@ import textwrap import gast -from tensorflow.contrib.py2tf.pyct import compiler -from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.autograph.pyct import compiler +from tensorflow.contrib.autograph.pyct import parser from tensorflow.python.platform import test from tensorflow.python.util import tf_inspect diff --git a/tensorflow/contrib/py2tf/pyct/context.py b/tensorflow/contrib/autograph/pyct/context.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/context.py rename to tensorflow/contrib/autograph/pyct/context.py diff --git a/tensorflow/contrib/py2tf/pyct/inspect_utils.py b/tensorflow/contrib/autograph/pyct/inspect_utils.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/inspect_utils.py rename to tensorflow/contrib/autograph/pyct/inspect_utils.py diff --git a/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py b/tensorflow/contrib/autograph/pyct/inspect_utils_test.py similarity index 98% rename from tensorflow/contrib/py2tf/pyct/inspect_utils_test.py rename to tensorflow/contrib/autograph/pyct/inspect_utils_test.py index 5528ac851f..ddca6f963b 100644 --- a/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py +++ b/tensorflow/contrib/autograph/pyct/inspect_utils_test.py @@ -22,7 +22,7 @@ from functools import wraps import six -from tensorflow.contrib.py2tf.pyct import inspect_utils +from tensorflow.contrib.autograph.pyct import inspect_utils from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/parser.py b/tensorflow/contrib/autograph/pyct/parser.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/parser.py rename to tensorflow/contrib/autograph/pyct/parser.py diff --git a/tensorflow/contrib/py2tf/pyct/parser_test.py b/tensorflow/contrib/autograph/pyct/parser_test.py similarity index 96% rename from tensorflow/contrib/py2tf/pyct/parser_test.py rename to tensorflow/contrib/autograph/pyct/parser_test.py index c58ffc7e0c..007a4c6fb0 100644 --- a/tensorflow/contrib/py2tf/pyct/parser_test.py +++ b/tensorflow/contrib/autograph/pyct/parser_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import textwrap -from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.autograph.pyct import parser from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/pretty_printer.py b/tensorflow/contrib/autograph/pyct/pretty_printer.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/pretty_printer.py rename to tensorflow/contrib/autograph/pyct/pretty_printer.py diff --git a/tensorflow/contrib/py2tf/pyct/pretty_printer_test.py b/tensorflow/contrib/autograph/pyct/pretty_printer_test.py similarity index 96% rename from tensorflow/contrib/py2tf/pyct/pretty_printer_test.py rename to tensorflow/contrib/autograph/pyct/pretty_printer_test.py index 81e3f47b80..0cb48f3576 100644 --- a/tensorflow/contrib/py2tf/pyct/pretty_printer_test.py +++ b/tensorflow/contrib/autograph/pyct/pretty_printer_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import ast -from tensorflow.contrib.py2tf.pyct import pretty_printer +from tensorflow.contrib.autograph.pyct import pretty_printer from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/qual_names.py b/tensorflow/contrib/autograph/pyct/qual_names.py similarity index 99% rename from tensorflow/contrib/py2tf/pyct/qual_names.py rename to tensorflow/contrib/autograph/pyct/qual_names.py index 7dec13db92..4d5764a974 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names.py +++ b/tensorflow/contrib/autograph/pyct/qual_names.py @@ -29,7 +29,7 @@ import collections import gast -from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.autograph.pyct import anno class Symbol(collections.namedtuple('Symbol', ['name'])): diff --git a/tensorflow/contrib/py2tf/pyct/qual_names_test.py b/tensorflow/contrib/autograph/pyct/qual_names_test.py similarity index 96% rename from tensorflow/contrib/py2tf/pyct/qual_names_test.py rename to tensorflow/contrib/autograph/pyct/qual_names_test.py index 6583fa243b..103bd25aa3 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names_test.py +++ b/tensorflow/contrib/autograph/pyct/qual_names_test.py @@ -20,11 +20,11 @@ from __future__ import print_function import textwrap -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct.qual_names import QN -from tensorflow.contrib.py2tf.pyct.qual_names import resolve +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.qual_names import QN +from tensorflow.contrib.autograph.pyct.qual_names import resolve from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD b/tensorflow/contrib/autograph/pyct/static_analysis/BUILD similarity index 83% rename from tensorflow/contrib/py2tf/pyct/static_analysis/BUILD rename to tensorflow/contrib/autograph/pyct/static_analysis/BUILD index 2799b56a00..d192bc7aab 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD +++ b/tensorflow/contrib/autograph/pyct/static_analysis/BUILD @@ -25,7 +25,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/autograph/pyct", "@gast_archive//:gast", ], ) @@ -36,7 +36,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":static_analysis", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/autograph/pyct", "//tensorflow/python:client_testlib", "@gast_archive//:gast", ], @@ -48,7 +48,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":static_analysis", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/autograph/pyct", "//tensorflow/python:client_testlib", ], ) @@ -59,8 +59,8 @@ py_test( srcs_version = "PY2AND3", deps = [ ":static_analysis", - "//tensorflow/contrib/py2tf/pyct", - "//tensorflow/contrib/py2tf/utils", + "//tensorflow/contrib/autograph/pyct", + "//tensorflow/contrib/autograph/utils", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/__init__.py b/tensorflow/contrib/autograph/pyct/static_analysis/__init__.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/static_analysis/__init__.py rename to tensorflow/contrib/autograph/pyct/static_analysis/__init__.py diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py similarity index 97% rename from tensorflow/contrib/py2tf/pyct/static_analysis/activity.py rename to tensorflow/contrib/autograph/pyct/static_analysis/activity.py index 716672a53b..da6a2f6f05 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity.py @@ -22,10 +22,10 @@ import copy import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.qual_names import QN -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.qual_names import QN +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno # TODO(mdan): Add support for PY3 (e.g. Param vs arg). diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py similarity index 95% rename from tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py rename to tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py index b16d15b39d..37c28872bb 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/activity_test.py @@ -20,13 +20,13 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import context -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct.qual_names import QN -from tensorflow.contrib.py2tf.pyct.static_analysis import activity -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.qual_names import QN +from tensorflow.contrib.autograph.pyct.static_analysis import activity +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/annos.py b/tensorflow/contrib/autograph/pyct/static_analysis/annos.py similarity index 100% rename from tensorflow/contrib/py2tf/pyct/static_analysis/annos.py rename to tensorflow/contrib/autograph/pyct/static_analysis/annos.py diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py b/tensorflow/contrib/autograph/pyct/static_analysis/live_values.py similarity index 96% rename from tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py rename to tensorflow/contrib/autograph/pyct/static_analysis/live_values.py index ac5697900a..5f813355e6 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/live_values.py @@ -25,9 +25,9 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import transformer +from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class LiveValueResolver(transformer.Base): diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py similarity index 89% rename from tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py rename to tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py index a56dff824e..b66439624e 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py @@ -18,13 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import context -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct.static_analysis import activity -from tensorflow.contrib.py2tf.pyct.static_analysis import live_values -from tensorflow.contrib.py2tf.pyct.static_analysis import type_info +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.static_analysis import activity +from tensorflow.contrib.autograph.pyct.static_analysis import live_values +from tensorflow.contrib.autograph.pyct.static_analysis import type_info from tensorflow.python.framework import constant_op from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py similarity index 98% rename from tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py rename to tensorflow/contrib/autograph/pyct/static_analysis/type_info.py index a969adbeca..203aa3c3d1 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info.py @@ -43,8 +43,8 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import transformer from tensorflow.python.util import tf_inspect diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py similarity index 93% rename from tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py rename to tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py index 8a8956197d..c0de4a6043 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/type_info_test.py @@ -18,14 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf import utils -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import context -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names -from tensorflow.contrib.py2tf.pyct.static_analysis import activity -from tensorflow.contrib.py2tf.pyct.static_analysis import live_values -from tensorflow.contrib.py2tf.pyct.static_analysis import type_info +from tensorflow.contrib.autograph import utils +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import context +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names +from tensorflow.contrib.autograph.pyct.static_analysis import activity +from tensorflow.contrib.autograph.pyct.static_analysis import live_values +from tensorflow.contrib.autograph.pyct.static_analysis import type_info from tensorflow.python.client import session from tensorflow.python.platform import test from tensorflow.python.training import training diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/autograph/pyct/templates.py similarity index 98% rename from tensorflow/contrib/py2tf/pyct/templates.py rename to tensorflow/contrib/autograph/pyct/templates.py index 590be68234..fb99e0d4e5 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/autograph/pyct/templates.py @@ -26,9 +26,9 @@ import textwrap import gast -from tensorflow.contrib.py2tf.pyct import ast_util -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names +from tensorflow.contrib.autograph.pyct import ast_util +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import qual_names class ReplaceTransformer(gast.NodeTransformer): diff --git a/tensorflow/contrib/py2tf/pyct/templates_test.py b/tensorflow/contrib/autograph/pyct/templates_test.py similarity index 96% rename from tensorflow/contrib/py2tf/pyct/templates_test.py rename to tensorflow/contrib/autograph/pyct/templates_test.py index af939caf32..a01f8bf04c 100644 --- a/tensorflow/contrib/py2tf/pyct/templates_test.py +++ b/tensorflow/contrib/autograph/pyct/templates_test.py @@ -22,9 +22,9 @@ import imp import gast -from tensorflow.contrib.py2tf.pyct import compiler -from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.autograph.pyct import compiler +from tensorflow.contrib.autograph.pyct import parser +from tensorflow.contrib.autograph.pyct import templates from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/pyct/transformer.py b/tensorflow/contrib/autograph/pyct/transformer.py similarity index 89% rename from tensorflow/contrib/py2tf/pyct/transformer.py rename to tensorflow/contrib/autograph/pyct/transformer.py index 31ef7e1c05..35f114b6e1 100644 --- a/tensorflow/contrib/py2tf/pyct/transformer.py +++ b/tensorflow/contrib/autograph/pyct/transformer.py @@ -23,12 +23,12 @@ import sys import gast import six -from tensorflow.contrib.py2tf.pyct import anno -from tensorflow.contrib.py2tf.pyct import compiler -from tensorflow.contrib.py2tf.pyct import pretty_printer +from tensorflow.contrib.autograph.pyct import anno +from tensorflow.contrib.autograph.pyct import compiler +from tensorflow.contrib.autograph.pyct import pretty_printer -class PyFlowParseError(SyntaxError): +class AutographParseError(SyntaxError): pass @@ -77,8 +77,8 @@ class Base(gast.NodeTransformer): line = source_code.splitlines()[self._lineno - 1] else: line = '' - six.reraise(PyFlowParseError, - PyFlowParseError( + six.reraise(AutographParseError, + AutographParseError( msg, (source_file, self._lineno, self._col_offset + 1, line)), sys.exc_info()[2]) diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/autograph/utils/BUILD similarity index 100% rename from tensorflow/contrib/py2tf/utils/BUILD rename to tensorflow/contrib/autograph/utils/BUILD diff --git a/tensorflow/contrib/autograph/utils/__init__.py b/tensorflow/contrib/autograph/utils/__init__.py new file mode 100644 index 0000000000..22898b17e9 --- /dev/null +++ b/tensorflow/contrib/autograph/utils/__init__.py @@ -0,0 +1,36 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utility module that contains APIs usable in the generated code.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.autograph.utils.builtins import dynamic_builtin +from tensorflow.contrib.autograph.utils.builtins import dynamic_dataset +from tensorflow.contrib.autograph.utils.builtins import dynamic_for_cond +from tensorflow.contrib.autograph.utils.builtins import dynamic_print +from tensorflow.contrib.autograph.utils.builtins import dynamic_range +from tensorflow.contrib.autograph.utils.context_managers import control_dependency_on_returns +from tensorflow.contrib.autograph.utils.misc import alias_tensors +from tensorflow.contrib.autograph.utils.multiple_dispatch import dynamic_is +from tensorflow.contrib.autograph.utils.multiple_dispatch import dynamic_is_not +from tensorflow.contrib.autograph.utils.multiple_dispatch import run_cond +from tensorflow.contrib.autograph.utils.multiple_dispatch import run_while +from tensorflow.contrib.autograph.utils.py_func import wrap_py_func +from tensorflow.contrib.autograph.utils.tensor_list import dynamic_list_append +from tensorflow.contrib.autograph.utils.testing import fake_tf +from tensorflow.contrib.autograph.utils.type_check import is_tensor +from tensorflow.contrib.autograph.utils.type_hints import set_element_type diff --git a/tensorflow/contrib/py2tf/utils/builtins.py b/tensorflow/contrib/autograph/utils/builtins.py similarity index 98% rename from tensorflow/contrib/py2tf/utils/builtins.py rename to tensorflow/contrib/autograph/utils/builtins.py index 251b4ed8ee..4ab32ee47d 100644 --- a/tensorflow/contrib/py2tf/utils/builtins.py +++ b/tensorflow/contrib/autograph/utils/builtins.py @@ -20,8 +20,8 @@ from __future__ import print_function import six -from tensorflow.contrib.py2tf.utils import py_func -from tensorflow.contrib.py2tf.utils import type_check +from tensorflow.contrib.autograph.utils import py_func +from tensorflow.contrib.autograph.utils import type_check from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops diff --git a/tensorflow/contrib/py2tf/utils/builtins_test.py b/tensorflow/contrib/autograph/utils/builtins_test.py similarity index 98% rename from tensorflow/contrib/py2tf/utils/builtins_test.py rename to tensorflow/contrib/autograph/utils/builtins_test.py index 59b3573d38..d9f7913d89 100644 --- a/tensorflow/contrib/py2tf/utils/builtins_test.py +++ b/tensorflow/contrib/autograph/utils/builtins_test.py @@ -22,7 +22,7 @@ import sys import six -from tensorflow.contrib.py2tf.utils import builtins +from tensorflow.contrib.autograph.utils import builtins from tensorflow.python.framework import constant_op from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/utils/context_managers.py b/tensorflow/contrib/autograph/utils/context_managers.py similarity index 100% rename from tensorflow/contrib/py2tf/utils/context_managers.py rename to tensorflow/contrib/autograph/utils/context_managers.py diff --git a/tensorflow/contrib/py2tf/utils/context_managers_test.py b/tensorflow/contrib/autograph/utils/context_managers_test.py similarity index 96% rename from tensorflow/contrib/py2tf/utils/context_managers_test.py rename to tensorflow/contrib/autograph/utils/context_managers_test.py index 404f6e44e5..42e27724b9 100644 --- a/tensorflow/contrib/py2tf/utils/context_managers_test.py +++ b/tensorflow/contrib/autograph/utils/context_managers_test.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.utils import context_managers +from tensorflow.contrib.autograph.utils import context_managers from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import tensor_array_ops diff --git a/tensorflow/contrib/py2tf/utils/misc.py b/tensorflow/contrib/autograph/utils/misc.py similarity index 100% rename from tensorflow/contrib/py2tf/utils/misc.py rename to tensorflow/contrib/autograph/utils/misc.py diff --git a/tensorflow/contrib/py2tf/utils/misc_test.py b/tensorflow/contrib/autograph/utils/misc_test.py similarity index 96% rename from tensorflow/contrib/py2tf/utils/misc_test.py rename to tensorflow/contrib/autograph/utils/misc_test.py index 8aedd4cd64..71e358c33e 100644 --- a/tensorflow/contrib/py2tf/utils/misc_test.py +++ b/tensorflow/contrib/autograph/utils/misc_test.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.utils.misc import alias_tensors +from tensorflow.contrib.autograph.utils.misc import alias_tensors from tensorflow.python.framework.constant_op import constant from tensorflow.python.ops.variables import Variable from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/utils/multiple_dispatch.py b/tensorflow/contrib/autograph/utils/multiple_dispatch.py similarity index 95% rename from tensorflow/contrib/py2tf/utils/multiple_dispatch.py rename to tensorflow/contrib/autograph/utils/multiple_dispatch.py index 427a936c35..b756ccfaee 100644 --- a/tensorflow/contrib/py2tf/utils/multiple_dispatch.py +++ b/tensorflow/contrib/autograph/utils/multiple_dispatch.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities for type-dependent behavior used in py2tf-generated code.""" +"""Utilities for type-dependent behavior used in autograph-generated code.""" from __future__ import absolute_import from __future__ import division @@ -20,7 +20,7 @@ from __future__ import print_function import six -from tensorflow.contrib.py2tf.utils.type_check import is_tensor +from tensorflow.contrib.autograph.utils.type_check import is_tensor from tensorflow.python.ops import control_flow_ops diff --git a/tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py b/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py similarity index 98% rename from tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py rename to tensorflow/contrib/autograph/utils/multiple_dispatch_test.py index 75e8fdd5ed..8c7daa6ded 100644 --- a/tensorflow/contrib/py2tf/utils/multiple_dispatch_test.py +++ b/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.py2tf.utils import multiple_dispatch +from tensorflow.contrib.autograph.utils import multiple_dispatch from tensorflow.python.client.session import Session from tensorflow.python.framework.constant_op import constant from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/utils/py_func.py b/tensorflow/contrib/autograph/utils/py_func.py similarity index 97% rename from tensorflow/contrib/py2tf/utils/py_func.py rename to tensorflow/contrib/autograph/utils/py_func.py index 34f2a8b70b..11ebfb2e49 100644 --- a/tensorflow/contrib/py2tf/utils/py_func.py +++ b/tensorflow/contrib/autograph/utils/py_func.py @@ -118,9 +118,8 @@ def wrap_py_func(f, return_dtypes, args, kwargs=None, use_dummy_return=False): assert isinstance(return_dtypes, dtypes.DType) def f_wrapper(*tensor_args): - f_args = tuple( - tensor_args[tensor_args_idx[i]] if arg_is_tensor[i] else a - for i, a in enumerate(args)) + f_args = tuple(tensor_args[tensor_args_idx[i]] if arg_is_tensor[i] else a + for i, a in enumerate(args)) f_kwargs = { k: tensor_args[tensor_args_idx[k]] if kwarg_is_tensor[k] else kwargs[k] for i, k in enumerate(kwarg_keys) diff --git a/tensorflow/contrib/py2tf/utils/py_func_test.py b/tensorflow/contrib/autograph/utils/py_func_test.py similarity index 98% rename from tensorflow/contrib/py2tf/utils/py_func_test.py rename to tensorflow/contrib/autograph/utils/py_func_test.py index 3b7a35365a..2468263142 100644 --- a/tensorflow/contrib/py2tf/utils/py_func_test.py +++ b/tensorflow/contrib/autograph/utils/py_func_test.py @@ -18,7 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.contrib.autograph.utils import py_func from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/utils/tensor_list.py b/tensorflow/contrib/autograph/utils/tensor_list.py similarity index 100% rename from tensorflow/contrib/py2tf/utils/tensor_list.py rename to tensorflow/contrib/autograph/utils/tensor_list.py diff --git a/tensorflow/contrib/py2tf/utils/tensor_list_test.py b/tensorflow/contrib/autograph/utils/tensor_list_test.py similarity index 97% rename from tensorflow/contrib/py2tf/utils/tensor_list_test.py rename to tensorflow/contrib/autograph/utils/tensor_list_test.py index 110e4d105e..d58489eb68 100644 --- a/tensorflow/contrib/py2tf/utils/tensor_list_test.py +++ b/tensorflow/contrib/autograph/utils/tensor_list_test.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for PyFlow list.""" +"""Tests for Autograph lists.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.utils import tensor_list as tl +from tensorflow.contrib.autograph.utils import tensor_list as tl from tensorflow.python.client.session import Session from tensorflow.python.eager import context from tensorflow.python.framework import dtypes diff --git a/tensorflow/contrib/py2tf/utils/testing.py b/tensorflow/contrib/autograph/utils/testing.py similarity index 100% rename from tensorflow/contrib/py2tf/utils/testing.py rename to tensorflow/contrib/autograph/utils/testing.py diff --git a/tensorflow/contrib/py2tf/utils/type_check.py b/tensorflow/contrib/autograph/utils/type_check.py similarity index 95% rename from tensorflow/contrib/py2tf/utils/type_check.py rename to tensorflow/contrib/autograph/utils/type_check.py index b9b2b451a4..8748abc47b 100644 --- a/tensorflow/contrib/py2tf/utils/type_check.py +++ b/tensorflow/contrib/autograph/utils/type_check.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities used in py2tf-generated code.""" +"""Utilities used in autograph-generated code.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/py2tf/utils/type_check_test.py b/tensorflow/contrib/autograph/utils/type_check_test.py similarity index 96% rename from tensorflow/contrib/py2tf/utils/type_check_test.py rename to tensorflow/contrib/autograph/utils/type_check_test.py index 7d0428e9cc..3b67b7194c 100644 --- a/tensorflow/contrib/py2tf/utils/type_check_test.py +++ b/tensorflow/contrib/autograph/utils/type_check_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import numpy -from tensorflow.contrib.py2tf.utils import type_check +from tensorflow.contrib.autograph.utils import type_check from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util from tensorflow.python.platform import test diff --git a/tensorflow/contrib/py2tf/utils/type_hints.py b/tensorflow/contrib/autograph/utils/type_hints.py similarity index 100% rename from tensorflow/contrib/py2tf/utils/type_hints.py rename to tensorflow/contrib/autograph/utils/type_hints.py diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py deleted file mode 100644 index 4e6003c852..0000000000 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility module that contains APIs usable in the generated code.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin -from tensorflow.contrib.py2tf.utils.builtins import dynamic_dataset -from tensorflow.contrib.py2tf.utils.builtins import dynamic_for_cond -from tensorflow.contrib.py2tf.utils.builtins import dynamic_print -from tensorflow.contrib.py2tf.utils.builtins import dynamic_range -from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns -from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.multiple_dispatch import dynamic_is -from tensorflow.contrib.py2tf.utils.multiple_dispatch import dynamic_is_not -from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond -from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while -from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func -from tensorflow.contrib.py2tf.utils.tensor_list import dynamic_list_append -from tensorflow.contrib.py2tf.utils.testing import fake_tf -from tensorflow.contrib.py2tf.utils.type_check import is_tensor -from tensorflow.contrib.py2tf.utils.type_hints import set_element_type diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 8a80d6443b..e01306f953 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -164,12 +164,12 @@ sh_binary( "//tensorflow/contrib/lite/toco/python:toco_from_protos", "//tensorflow/contrib/nn:nn_py", "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/py2tf:py2tf", - "//tensorflow/contrib/py2tf/converters:converters", - "//tensorflow/contrib/py2tf/converters:test_lib", - "//tensorflow/contrib/py2tf/impl:impl", - "//tensorflow/contrib/py2tf/pyct:pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/autograph:autograph", + "//tensorflow/contrib/autograph/converters:converters", + "//tensorflow/contrib/autograph/converters:test_lib", + "//tensorflow/contrib/autograph/impl:impl", + "//tensorflow/contrib/autograph/pyct:pyct", + "//tensorflow/contrib/autograph/pyct/static_analysis:static_analysis", "//tensorflow/contrib/receptive_field:receptive_field_pip", "//tensorflow/contrib/session_bundle:session_bundle_pip", "//tensorflow/contrib/signal:signal_py", -- GitLab From 710ba88846c9aca71ad1f83000255db4d3bb17e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 13:41:35 -0700 Subject: [PATCH 505/960] Quick fix to assign_moving_average documentation formatting. PiperOrigin-RevId: 190517622 --- tensorflow/python/training/moving_averages.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py index b9ecb27df1..61fc828a84 100644 --- a/tensorflow/python/training/moving_averages.py +++ b/tensorflow/python/training/moving_averages.py @@ -52,16 +52,19 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None): they were created in and the scope of the variables they debias. They are also given a uniqifying-suffix. - Ex: + E.g.: + + ``` with tf.variable_scope('scope1'): with tf.variable_scope('scope2'): var = tf.get_variable('foo') - assign_moving_average(var, 0.0, 1.0) - assign_moving_average(var, 0.0, 0.9) + tf.assign_moving_average(var, 0.0, 1.0) + tf.assign_moving_average(var, 0.0, 0.9) - var.name: 'scope1/scope2/foo' - shadow var names: 'scope1/scope2/scope1/scope2/foo/biased' - 'scope1/scope2/scope1/scope2/foo/biased_1' + # var.name: 'scope1/scope2/foo' + # shadow var names: 'scope1/scope2/scope1/scope2/foo/biased' + # 'scope1/scope2/scope1/scope2/foo/biased_1' + ``` Args: variable: A Variable. -- GitLab From 1fcef75aaa1989376324ff8dfc25033b443a69df Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 26 Mar 2018 13:48:00 -0700 Subject: [PATCH 506/960] Update BUILD --- tensorflow/contrib/timeseries/python/timeseries/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index d72cc1b8a2..67ee644d3b 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -233,7 +233,7 @@ py_test( ], srcs_version = "PY2AND3", tags = [ - "manual", + "no_oss", "no_pip", # b/64527635 "no_pip_gpu", # b/63391119 ], -- GitLab From 72ed3c3b743e5feef99e37058dbd2f4344bcc5e3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 14:04:35 -0700 Subject: [PATCH 507/960] Add description of shapes and a pointer to external tutorial notebook in `tf.distributions.Distribution`. PiperOrigin-RevId: 190521666 --- .../python/ops/distributions/distribution.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py index 0866fa8b0b..7c43bf54fc 100644 --- a/tensorflow/python/ops/distributions/distribution.py +++ b/tensorflow/python/ops/distributions/distribution.py @@ -338,6 +338,27 @@ class Distribution(_BaseDistribution): cum_prob_invalid = u.cdf([4.0, 5.0, 6.0]) ``` + #### Shapes + + There are three important concepts associated with TensorFlow Distributions + shapes: + - Event shape describes the shape of a single draw from the distribution; + it may be dependent across dimensions. For scalar distributions, the event + shape is `[]`. For a 5-dimensional MultivariateNormal, the event shape is + `[5]`. + - Batch shape describes independent, not identically distributed draws, aka a + "collection" or "bunch" of distributions. + - Sample shape describes independent, identically distributed draws of batches + from the distribution family. + + The event shape and the batch shape are properties of a Distribution object, + whereas the sample shape is associated with a specific call to `sample` or + `log_prob`. + + For detailed usage examples of TensorFlow Distributions shapes, see + [this tutorial]( + https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Understanding%20TensorFlow%20Distributions%20Shapes.ipynb) + #### Parameter values leading to undefined statistics or distributions. Some distributions do not have well-defined statistics for all initialization -- GitLab From 2ff8e913ad000d379405c284857e7fc81eef9fed Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 26 Mar 2018 14:33:10 -0700 Subject: [PATCH 508/960] Clarify eager gradient doc strings PiperOrigin-RevId: 190526387 --- tensorflow/python/eager/backprop.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index a7837b8a7f..c54a5a1445 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -171,8 +171,8 @@ def implicit_val_and_grad(f): """Returns a function which differentiates f with respect to variables. The wrapped function returns the value and the gradient of f when called with - the same arguments. The gradient is with respect to all TFE variables which - are either trainable or have `variable.watch()` called on them by f. + the same arguments. The gradient is with respect to all trainable TFE + variables accessed by `f`. This function is useful when the exact set of variables to differentiate with is not known ahead of time. @@ -249,8 +249,8 @@ def implicit_grad(f): """Returns a function which differentiates f with respect to variables. The wrapped function returns the gradient of f when called with the same - arguments. The gradient is with respect to all TFE variables which are - either trainable or have `variable.watch()` called on them by f. + arguments. The gradient is with respect to all trainable TFE variables + accessed by `f`. This function is useful when the exact set of variables to differentiate with is not known ahead of time. -- GitLab From 0a86c23860968c66e95b9b6e930d14fac2699889 Mon Sep 17 00:00:00 2001 From: Jayaram Bobba Date: Mon, 26 Mar 2018 14:52:32 -0700 Subject: [PATCH 509/960] reverting mkl allocator inline modifier from #17396. causes build issues on linux systems (#18006) --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 73abf18d97..55c8411ad0 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -50,7 +50,7 @@ class MklCPUAllocator : public VisitableAllocator { // Constructor and other standard functions /// Environment variable that user can set to upper bound on memory allocation - static inline constexpr const char* kMaxLimitStr = "TF_MKL_ALLOC_MAX_BYTES"; + static constexpr const char* kMaxLimitStr = "TF_MKL_ALLOC_MAX_BYTES"; /// Default upper limit on allocator size - 64GB static constexpr size_t kDefaultMaxLimit = 64LL << 30; -- GitLab From 2c548819707bdafc8057cdd9c997f2a7b420d577 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 15:07:10 -0700 Subject: [PATCH 510/960] Fix some compiler warnings in MKL-DNN build. PiperOrigin-RevId: 190532168 --- tensorflow/core/graph/mkl_layout_pass.cc | 13 ++++++------- tensorflow/core/kernels/BUILD | 5 +++++ tensorflow/core/kernels/mkl_concat_op.cc | 6 ++++-- tensorflow/core/kernels/mkl_conv_ops.h | 9 ++++++--- tensorflow/core/kernels/mkl_fused_batch_norm_op.cc | 12 ++++++------ tensorflow/core/kernels/mkl_lrn_op.cc | 7 ++++--- tensorflow/core/kernels/mkl_reshape_op.cc | 3 ++- tensorflow/core/kernels/mkl_softmax_op.cc | 1 - tensorflow/core/util/mkl_util.h | 4 ++-- 9 files changed, 35 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 1507b6eae2..5368774f2d 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -3103,8 +3103,7 @@ void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr* g, TensorProto proto; proto.set_dtype(dt); uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - proto.set_tensor_content(const_cast(static_cast(&zero)), - 8); + proto.set_tensor_content(string(reinterpret_cast(&zero), 8)); TensorShape dummy_shape({8}); dummy_shape.AsProto(proto.mutable_tensor_shape()); TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") @@ -3219,7 +3218,8 @@ int MklLayoutRewritePass::SetUpContiguousInputs( // For that let's first find filter node that is 2nd input (slot 1) // of BackpropInput. Node* filter_node = nullptr; - old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node); + TF_CHECK_OK(old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, + &filter_node)); CHECK_NOTNULL(filter_node); // Now check which nodes receive from filter_node. Filter feeds as @@ -3399,8 +3399,7 @@ void MklLayoutRewritePass::GetDummyWorkspaceTensorNode( TensorProto proto; proto.set_dtype(dt); float zero[1] = {0}; - proto.set_tensor_content(const_cast(static_cast(&zero)), - 4); + proto.set_tensor_content(string(reinterpret_cast(&zero), 4)); TensorShape dummy_shape({1}); dummy_shape.AsProto(proto.mutable_tensor_shape()); TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") @@ -3876,7 +3875,7 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, // Create node. Node* new_node; - nb.Finalize(&**g, &new_node); + TF_CHECK_OK(nb.Finalize(&**g, &new_node)); CHECK_NOTNULL(new_node); // Incoming data edges from 'pred' node and 'succ' node to new 'new_node' @@ -3987,7 +3986,7 @@ Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad( // Create node. Node* new_node; - nb.Finalize(&**g, &new_node); + TF_CHECK_OK(nb.Finalize(&**g, &new_node)); CHECK_NOTNULL(new_node); // Incoming data edges from BiasAddGrad node and Conv2DBackpropFilter node to diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 8d235e79c0..9bb80eb892 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5959,6 +5959,7 @@ tf_mkl_kernel_library( "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", ], ) @@ -5979,6 +5980,7 @@ tf_mkl_kernel_library( "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", ], ) @@ -6010,6 +6012,7 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", + "//third_party/eigen3", "//third_party/mkl:intel_binary_blob", "@mkl_dnn", ], @@ -6029,6 +6032,7 @@ tf_mkl_kernel_library( prefix = "mkl_aggregate_ops", deps = MATH_DEPS + [ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", ], ) @@ -6046,6 +6050,7 @@ tf_mkl_kernel_library( prefix = "mkl_reshape_op", deps = ARRAY_DEPS + [ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", ], ) diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index aa3ea890b0..9ab95d765c 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -803,8 +803,10 @@ class MklConcatOp : public OpKernel { Tensor* output_tensor = nullptr; TensorShape tf_shape_output; tf_shape_output.AddDim(dnn_shape_output.GetSerializeBufferSize()); - context->allocate_output(GetTensorMetaDataIndex(0, context->num_outputs()), - tf_shape_output, &output_tensor); + OP_REQUIRES_OK(context, + context->allocate_output( + GetTensorMetaDataIndex(0, context->num_outputs()), + tf_shape_output, &output_tensor)); dnn_shape_output.SerializeMklDnnShape( output_tensor->flat().data(), output_tensor->flat().size() * sizeof(uint8)); diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 7ca10db895..8333a09316 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -65,9 +65,12 @@ class MklDnnConvUtil { public: MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, Padding pad, TensorFormat fm, - const std::vector& dilations) : - context_(context), strides_(strides), padding_(pad), - data_format_(fm), dilations_(dilations) {} + const std::vector& dilations) + : context_(context), + strides_(strides), + dilations_(dilations), + padding_(pad), + data_format_(fm) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 9e564b016f..333a6570dc 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -817,8 +817,8 @@ class MklFusedBatchNormOp : public OpKernel { // set weights primitive // MKL-DNN packs scale & shift as "weights": // ...... - auto weights_desc = - memory::desc({2, depth_}, MklDnnType(), memory::format::nc); + auto weights_desc = memory::desc({2, static_cast(depth_)}, + MklDnnType(), memory::format::nc); auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine); auto weights_m = memory(weights_pd); T* weights_data = reinterpret_cast(weights_m.get_data_handle()); @@ -833,8 +833,8 @@ class MklFusedBatchNormOp : public OpKernel { } // set mean primitive - auto mean_desc = - memory::desc({1, depth_}, MklDnnType(), memory::format::nc); + auto mean_desc = memory::desc({1, static_cast(depth_)}, + MklDnnType(), memory::format::nc); auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine); char* saved_mean_data_tf = reinterpret_cast(saved_mean_tensor->flat().data()); @@ -844,8 +844,8 @@ class MklFusedBatchNormOp : public OpKernel { memory(mean_pd, reinterpret_cast(saved_mean_data_tf)); // set variance primitive - auto variance_desc = - memory::desc({1, depth_}, MklDnnType(), memory::format::nc); + auto variance_desc = memory::desc({1, static_cast(depth_)}, + MklDnnType(), memory::format::nc); auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine); char* saved_variance_data_tf = reinterpret_cast(saved_variance_tensor->flat().data()); diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 282012c719..eef254cdad 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -752,7 +752,8 @@ class MklLRNOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); workspace_enabled_ = false; - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -1001,7 +1002,8 @@ class MklLRNGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); workspace_enabled_ = false; - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -1043,7 +1045,6 @@ class MklLRNGradOp : public OpKernel { // Naming: diff_dst is input_gradient_tensor; src is orig_input_tensor. const Tensor& input_grad_tensor = MklGetInput(context, kIdxGradient); const Tensor& orig_input_tensor = MklGetInput(context, kIdxOrigInput); - const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput); // Get input sizes in MKL-DNN required NCHW format. // LRN does not have data_format attribute. But by default it has diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc index 5dbc4a2709..e12f6f437a 100644 --- a/tensorflow/core/kernels/mkl_reshape_op.cc +++ b/tensorflow/core/kernels/mkl_reshape_op.cc @@ -266,7 +266,8 @@ class MklReshapeOp : public OpKernel { &net)) { stream(stream::kind::eager).submit(net).wait(); } else { - output_tensor->CopyFrom(input_tensor, shape_to); + OP_REQUIRES(context, + output_tensor->CopyFrom(input_tensor, shape_to)); } return; } else { diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index aceef1e234..170523b5b4 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -27,7 +27,6 @@ limitations under the License. #include "mkldnn.h" #include "mkldnn_types.h" -#include "tensorflow/core/platform/default/logging.h" #include "tensorflow/core/util/mkl_util.h" #include "mkldnn.hpp" diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 34db96075d..9f58e40d94 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1579,10 +1579,10 @@ class MklDnnData { } /// Set function for data buffer of user memory primitive. - inline void* SetUsrMemDataHandle(void* data_buffer) { + inline void SetUsrMemDataHandle(void* data_buffer) { CHECK_NOTNULL(user_memory_); CHECK_NOTNULL(data_buffer); - return user_memory_->set_data_handle(data_buffer); + user_memory_->set_data_handle(data_buffer); } /// Set function for data buffer of user memory primitive. -- GitLab From 3a00d79b16348f0a53379e81b8e98bdd93d4833e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 15:19:29 -0700 Subject: [PATCH 511/960] [XLA] Redesign: implement and test unary and binary ops. Also, - Templatized ComputeAndCompareRX and CreateRXParameter so that they accept XlaBuilder and XlaOp. - Clear data held by an XlaBuilder when Build() is called, otherwise errors will occur when the builder is reused. PiperOrigin-RevId: 190534245 --- .../xla/client/xla_client/xla_builder.cc | 136 +++-- .../xla/client/xla_client/xla_builder.h | 3 + .../xla/client/xla_client/xla_builder_test.cc | 14 + .../compiler/xla/service/shape_inference.cc | 9 +- .../compiler/xla/service/shape_inference.h | 2 + tensorflow/compiler/xla/tests/BUILD | 1 + .../xla/tests/array_elementwise_ops_test.cc | 496 +++++++++--------- .../xla/tests/client_library_test_base.h | 123 ++--- 8 files changed, 430 insertions(+), 354 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 596f39b4fd..bf91efcfd6 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -164,6 +164,11 @@ StatusOr XlaBuilder::Build() { } module->add_computations()->Swap(&entry); + // Clear data held by this builder. + this->instructions_.clear(); + this->embedded_.clear(); + this->parameter_numbers_.clear(); + return std::move(computation); } @@ -216,6 +221,16 @@ StatusOr XlaBuilder::AddBroadcastSequence(const Shape& output_shape, broadcast_dimensions); } +XlaOp XlaBuilder::UnaryOp(HloOpcode unop, const XlaOp& operand) { + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& operand_shape, operand.GetShape()); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferUnaryOpShape(unop, operand_shape)); + return AddInstruction(std::move(instr), unop, {operand}); + }()); +} + XlaOp XlaBuilder::BinaryOp( HloOpcode binop, const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { @@ -447,32 +462,32 @@ XlaOp XlaBuilder::GetTupleElement(const XlaOp& tuple_data, int64 index) { XlaOp XlaBuilder::Eq(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kEq, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Ne(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kNe, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Ge(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kGe, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Gt(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kGt, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Le(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kLe, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Lt(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kLt, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Dot(const XlaOp& lhs, const XlaOp& rhs) { @@ -551,102 +566,134 @@ XlaOp XlaBuilder::HostCompute(tensorflow::gtl::ArraySlice operands, XlaOp XlaBuilder::Complex( const XlaOp& real, const XlaOp& imag, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kComplex, real, imag, broadcast_dimensions); } XlaOp XlaBuilder::Conj(const XlaOp& operand) { return UnimplementedOp(); } XlaOp XlaBuilder::Sub(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kSubtract, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Div(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kDivide, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Rem(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kRemainder, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Max(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kMaximum, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Min(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kMinimum, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::And(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kAnd, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::Or(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kOr, lhs, rhs, broadcast_dimensions); } +// TODO(b/65209188): Create a dedicated lowering for Xor. XlaOp XlaBuilder::Xor(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return Or(And(Not(lhs), rhs, broadcast_dimensions), + And(lhs, Not(rhs), broadcast_dimensions)); } -XlaOp XlaBuilder::Not(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Not(const XlaOp& operand) { + return UnaryOp(HloOpcode::kNot, operand); +} XlaOp XlaBuilder::ShiftLeft( const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kShiftLeft, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::ShiftRightArithmetic( const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kShiftRightArithmetic, lhs, rhs, + broadcast_dimensions); } XlaOp XlaBuilder::ShiftRightLogical( const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kShiftRightLogical, lhs, rhs, + broadcast_dimensions); } -XlaOp XlaBuilder::Abs(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Abs(const XlaOp& operand) { + return UnaryOp(HloOpcode::kAbs, operand); +} XlaOp XlaBuilder::Atan2( const XlaOp& y, const XlaOp& x, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kAtan2, y, x, broadcast_dimensions); } -XlaOp XlaBuilder::Exp(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Exp(const XlaOp& operand) { + return UnaryOp(HloOpcode::kExp, operand); +} -XlaOp XlaBuilder::Floor(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Floor(const XlaOp& operand) { + return UnaryOp(HloOpcode::kFloor, operand); +} -XlaOp XlaBuilder::Ceil(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Ceil(const XlaOp& operand) { + return UnaryOp(HloOpcode::kCeil, operand); +} -XlaOp XlaBuilder::Round(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Round(const XlaOp& operand) { + return UnaryOp(HloOpcode::kRoundNearestAfz, operand); +} -XlaOp XlaBuilder::Log(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Log(const XlaOp& operand) { + return UnaryOp(HloOpcode::kLog, operand); +} -XlaOp XlaBuilder::Sign(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Sign(const XlaOp& operand) { + return UnaryOp(HloOpcode::kSign, operand); +} -XlaOp XlaBuilder::Cos(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Cos(const XlaOp& operand) { + return UnaryOp(HloOpcode::kCos, operand); +} -XlaOp XlaBuilder::Sin(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Sin(const XlaOp& operand) { + return UnaryOp(HloOpcode::kSin, operand); +} -XlaOp XlaBuilder::Tanh(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Tanh(const XlaOp& operand) { + return UnaryOp(HloOpcode::kTanh, operand); +} -XlaOp XlaBuilder::Real(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Real(const XlaOp& operand) { + return UnaryOp(HloOpcode::kReal, operand); +} -XlaOp XlaBuilder::Imag(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Imag(const XlaOp& operand) { + return UnaryOp(HloOpcode::kImag, operand); +} -XlaOp XlaBuilder::IsFinite(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::IsFinite(const XlaOp& operand) { + return UnaryOp(HloOpcode::kIsFinite, operand); +} XlaOp XlaBuilder::Transpose(const XlaOp& operand, tensorflow::gtl::ArraySlice permutation) { @@ -668,13 +715,18 @@ XlaOp XlaBuilder::Rev(const XlaOp& operand, return UnimplementedOp(); } -XlaOp XlaBuilder::Sort(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Sort(const XlaOp& operand) { + return UnaryOp(HloOpcode::kSort, operand); +} -XlaOp XlaBuilder::SqrtF32(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::SqrtF32(const XlaOp& operand) { + return BinaryOp(HloOpcode::kPower, operand, ConstantR0(0.5), + /*broadcast_dimensions=*/{}); +} XlaOp XlaBuilder::Pow(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kPower, lhs, rhs, broadcast_dimensions); } XlaOp XlaBuilder::ConvertElementType(const XlaOp& operand, @@ -687,13 +739,19 @@ XlaOp XlaBuilder::BitcastConvertType(const XlaOp& operand, return UnimplementedOp(); } -XlaOp XlaBuilder::SquareF32(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::SquareF32(const XlaOp& operand) { + return BinaryOp(HloOpcode::kPower, operand, ConstantR0(2.0), + /*broadcast_dimensions=*/{}); +} XlaOp XlaBuilder::ReciprocalF32(const XlaOp& operand) { - return UnimplementedOp(); + return BinaryOp(HloOpcode::kPower, operand, ConstantR0(-1.0), + /*broadcast_dimensions=*/{}); } -XlaOp XlaBuilder::Neg(const XlaOp& operand) { return UnimplementedOp(); } +XlaOp XlaBuilder::Neg(const XlaOp& operand) { + return UnaryOp(HloOpcode::kNegate, operand); +} XlaOp XlaBuilder::Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max) { diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index c19eb47165..22cf094512 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -730,6 +730,9 @@ class XlaBuilder { StatusOr LookUpInstruction(const XlaOp& op) const; + // Internal helper method that does the building for an arbitrary unary op. + XlaOp UnaryOp(HloOpcode unop, const XlaOp& operand); + // Internal helper method that does the building for an arbitrary binary op. // broadcast_dimensions specifies which dimensions to use for broadcasting // when the operation is between tensors of different ranks. diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc index 529287a57a..85d4227ba4 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc @@ -217,5 +217,19 @@ TEST_F(XlaBuilderTest, Transpose) { EXPECT_THAT(root, op::Transpose(op::Parameter())); } +// TODO(b/65209188): Create a dedicated lowering for Xor. +TEST_F(XlaBuilderTest, Xor) { + XlaBuilder b(TestName()); + auto x = b.Parameter(0, ShapeUtil::MakeShape(PRED, {}), "x"); + auto y = b.Parameter(1, ShapeUtil::MakeShape(PRED, {}), "y"); + b.Xor(x, y); + TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b)); + auto root = module->entry_computation()->root_instruction(); + LOG(ERROR) << module->ToString(); + EXPECT_THAT(root, + op::Or(op::And(op::Not(op::Parameter(0)), op::Parameter(1)), + op::And(op::Parameter(0), op::Not(op::Parameter(1))))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 8c8bd6d73a..2a70ea0354 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -304,12 +304,17 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, /* static */ StatusOr ShapeInference::InferUnaryOpShape( HloOpcode opcode, const HloInstruction* operand) { + return InferUnaryOpShape(opcode, operand->shape()); +} + +/* static */ StatusOr ShapeInference::InferUnaryOpShape( + HloOpcode opcode, const Shape& shape) { // There is no copy operation at the proto level, so handle copy explicitly. if (opcode == HloOpcode::kCopy) { - return operand->shape(); + return shape; } - return InferUnaryOpShape(OpcodeToUnaryOperation(opcode), operand->shape()); + return InferUnaryOpShape(OpcodeToUnaryOperation(opcode), shape); } /* static */ StatusOr ShapeInference::InferUnaryOpShape( diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 085fdac60c..b6552a34ae 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -48,6 +48,8 @@ class ShapeInference { // given input shape. static StatusOr InferUnaryOpShape(UnaryOperation operation, const Shape& arg); + static StatusOr InferUnaryOpShape(HloOpcode opcode, + const Shape& shape); static StatusOr InferUnaryOpShape(HloOpcode opcode, const HloInstruction* operand); diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 26022278e5..3705d6c271 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -598,6 +598,7 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:global_data", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 6e21dda25d..fa7ac3ca9b 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/global_data.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -50,28 +51,28 @@ class ArrayElementwiseOpTestParamCount public ::testing::WithParamInterface {}; XLA_TEST_F(ArrayElementwiseOpTest, NegConstantZeroElementF32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto result = builder.Neg(a); + builder.Neg(a); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, NegConstantF32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-2.5f, 3.14f, 2.25f, -10.0f, 6.0f}); - auto result = builder.Neg(a); + builder.Neg(a); ComputeAndCompareR1(&builder, {2.5f, -3.14f, -2.25f, 10.0f, -6.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-1, 0, 1, 324, std::numeric_limits::min(), std::numeric_limits::max()}); - auto result = builder.Neg(a); + builder.Neg(a); // -min == min for int32 due to an overflow. In C++ it is undefined behavior // to do this calculation. For XLA we have not specified that, so it @@ -83,18 +84,18 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS32) { } XLA_TEST_F(ArrayElementwiseOpTest, NegConstantZeroElementC64) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto result = builder.Neg(a); + builder.Neg(a); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, NegConstantC64) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {{-2.5f, 1.0f}, {0.0f, 3.14f}, {2.25f, -1.0f}, {-10.0f, 0.0f}}); - auto result = builder.Neg(a); + builder.Neg(a); ComputeAndCompareR1( &builder, {{2.5f, -1.0f}, {0.0f, -3.14f}, {-2.25f, 1.0f}, {10.0f, 0.0f}}, @@ -102,7 +103,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantC64) { } XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS64) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({ -1, 1, @@ -112,7 +113,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS64) { static_cast(0x8000000000000000LL), static_cast(0x8000000000000001LL), }); - auto result = builder.Neg(a); + builder.Neg(a); LOG(INFO) << -static_cast(0x7FFFFFFFFFFFFFFFLL); ComputeAndCompareR1(&builder, @@ -129,9 +130,9 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS64) { } XLA_TEST_F(ArrayElementwiseOpTest, IsFiniteZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto result = builder.IsFinite(a); + builder.IsFinite(a); ComputeAndCompareR1(&builder, {}, {}); } @@ -140,64 +141,63 @@ XLA_TEST_F(ArrayElementwiseOpTest, IsFiniteZeroElementF32s) { static const float kNonCanonicalNaN = tensorflow::bit_cast(0x7FD01234); XLA_TEST_F(ArrayElementwiseOpTest, IsFiniteScalarF32) { - ComputationBuilder builder(client_, TestName()); - auto result = builder.IsFinite(builder.ConstantR0(NAN)); + XlaBuilder builder(TestName()); + builder.IsFinite(builder.ConstantR0(NAN)); ComputeAndCompareR0(&builder, false, {}); EXPECT_TRUE(std::isnan(kNonCanonicalNaN)); - auto result_non_canonical = - builder.IsFinite(builder.ConstantR0(kNonCanonicalNaN)); + builder.IsFinite(builder.ConstantR0(kNonCanonicalNaN)); ComputeAndCompareR0(&builder, false, {}); const float inf = std::numeric_limits::infinity(); - auto result_inf = builder.IsFinite(builder.ConstantR0(inf)); + builder.IsFinite(builder.ConstantR0(inf)); ComputeAndCompareR0(&builder, false, {}); - auto result_neg_inf = builder.IsFinite(builder.ConstantR0(-inf)); + builder.IsFinite(builder.ConstantR0(-inf)); ComputeAndCompareR0(&builder, false, {}); - auto result_zero = builder.IsFinite(builder.ConstantR0(0.0f)); + builder.IsFinite(builder.ConstantR0(0.0f)); ComputeAndCompareR0(&builder, true, {}); } XLA_TEST_F(ArrayElementwiseOpTest, IsFiniteR1F32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const float inf = std::numeric_limits::infinity(); EXPECT_TRUE(std::isnan(kNonCanonicalNaN)); auto a = builder.ConstantR1( {{NAN, 7.0f, kNonCanonicalNaN, -1.0f, inf, -inf}}); - auto result = builder.IsFinite(a); + builder.IsFinite(a); ComputeAndCompareR1(&builder, {false, true, false, true, false, false}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-2.5f, 3.14f, 2.25f, -10.0f, 6.0f}); auto b = builder.ConstantR1({100.0f, 3.13f, 2.75f, 10.5f, -999.0f}); - auto add = builder.Add(a, b); + builder.Add(a, b); ComputeAndCompareR1(&builder, {97.5f, 6.27f, 5.0f, 0.5f, -993.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Add(a, b); + builder.Add(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {{-2.5f, 0.0f}, {0.0f, 3.14f}, {2.25f, 0.0f}, {1.0f, -10.0f}}); auto b = builder.ConstantR1( {{100.0f, 0.0f}, {3.13f, 0.0f}, {2.75f, 1.0f}, {-2.0f, 10.5f}}); - auto add = builder.Add(a, b); + builder.Add(a, b); ComputeAndCompareR1( &builder, {97.5f, {3.13f, 3.14f}, {5.0f, 1.0f}, {-1.0f, 0.5f}}, {}, @@ -205,10 +205,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantC64s) { } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantZeroElementC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Add(a, b); + builder.Add(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } @@ -295,7 +295,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantS64s) { TEST_P(ArrayElementwiseOpTestParamCount, AddManyValues) { const int count = GetParam(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector a_values; std::vector b_values; for (int i = 0; i < count; ++i) { @@ -334,49 +334,49 @@ TEST_P(ArrayElementwiseOpTestParamCount, AddManyValues) { } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-2.5f, 3.14f, 2.25f, -10.0f, 6.0f}); auto b = builder.ConstantR1({100.0f, 3.13f, 2.75f, 10.5f, -999.0f}); - auto add = builder.Sub(a, b); + builder.Sub(a, b); ComputeAndCompareR1(&builder, {-102.5f, 0.01f, -0.5f, -20.5f, 1005.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Sub(a, b); + builder.Sub(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-1, 0, 2, 1000000000}); auto b = builder.ConstantR1({-1, 2, 1, -1}); - auto add = builder.Sub(a, b); + builder.Sub(a, b); ComputeAndCompareR1(&builder, {0, -2, 1, 1000000001}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Sub(a, b); + builder.Sub(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {{-2.5f, 0.0f}, {0.0f, 3.14f}, {3.0f, 2.25f}}); auto b = builder.ConstantR1( {{0.0f, 10.0f}, {3.13f, 0.0f}, {2.75f, -0.25f}}); - auto add = builder.Sub(a, b); + builder.Sub(a, b); ComputeAndCompareR1( &builder, {{-2.5f, -10.0f}, {-3.13f, 3.14f}, {0.25f, 2.5f}}, {}, @@ -384,29 +384,29 @@ XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantC64s) { } XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantZeroElementC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Sub(a, b); + builder.Sub(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); auto b = builder.ConstantR1({10.0f, 5.1f, 1.0f, 10.0f, -6.0f}); - auto add = builder.Div(a, b); + builder.Div(a, b); ComputeAndCompareR1(&builder, {-0.25f, 5.0f, 2.25f, -1.0f, -1.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Div(a, b); + builder.Div(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } @@ -436,9 +436,9 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivS32s) { } { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; - ComputationDataHandle divisor; + XlaBuilder builder(TestName()); + XlaOp dividend; + XlaOp divisor; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); auto divisor_data = @@ -451,8 +451,8 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivS32s) { // Test with a compile-time constant divisor. { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; + XlaBuilder builder(TestName()); + XlaOp dividend; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); builder.Div(dividend, builder.ConstantR1(divisors)); @@ -461,9 +461,9 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivS32s) { } { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; - ComputationDataHandle divisor; + XlaBuilder builder(TestName()); + XlaOp dividend; + XlaOp divisor; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); auto divisor_data = @@ -476,8 +476,8 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivS32s) { // Test with a compile-time constant divisor. { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; + XlaBuilder builder(TestName()); + XlaOp dividend; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); builder.Rem(dividend, builder.ConstantR1(divisors)); @@ -507,9 +507,9 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivU32s) { } { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; - ComputationDataHandle divisor; + XlaBuilder builder(TestName()); + XlaOp dividend; + XlaOp divisor; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); auto divisor_data = @@ -521,8 +521,8 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivU32s) { } { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; + XlaBuilder builder(TestName()); + XlaOp dividend; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); builder.Div(dividend, builder.ConstantR1(divisors)); @@ -531,9 +531,9 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivU32s) { } { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; - ComputationDataHandle divisor; + XlaBuilder builder(TestName()); + XlaOp dividend; + XlaOp divisor; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); auto divisor_data = @@ -545,8 +545,8 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivU32s) { } { - ComputationBuilder builder(client_, TestName()); - ComputationDataHandle dividend; + XlaBuilder builder(TestName()); + XlaOp dividend; auto dividend_data = CreateR1Parameter(dividends, 0, "dividend", &builder, ÷nd); builder.Rem(dividend, builder.ConstantR1(divisors)); @@ -556,33 +556,33 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivU32s) { } XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {{-2.5f, 1.0f}, {-25.5f, 0.0f}, {2.0f, -1.0f}}); auto b = builder.ConstantR1( {{10.0f, 0.0f}, {0.0f, 1.0f}, {2.0f, -1.0f}}); - auto div = builder.Div(a, b); + builder.Div(a, b); ComputeAndCompareR1( &builder, {{-0.25f, 0.1f}, {0.0f, 25.5f}, {1.0f, 0.0f}}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantZeroElementC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto div = builder.Div(a, b); + builder.Div(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, RemF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {-2.5f, 25.5f, 2.25f, -10.0f, 6.0f, 3.0f, 3.0f, -1.0f, -8.0f}); auto b = builder.ConstantR1( {10.0f, 5.1f, 1.0f, 10.0f, -6.0f, 2.0f, -2.0f, 7.0f, -4.0f}); - auto add = builder.Rem(a, b); + builder.Rem(a, b); ComputeAndCompareR1( &builder, {-2.5f, 0.0f, 0.25f, 0.0f, -0.0f, 1.0f, 1.0f, -1.0f, -0.0f}, {}, @@ -590,21 +590,21 @@ XLA_TEST_F(ArrayElementwiseOpTest, RemF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, RemZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Rem(a, b); + builder.Rem(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, RemF64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {-2.5, 25.5, 2.25, -10.0, 6.0, 3.0, 3.0, -1.0, -8.0}); auto b = builder.ConstantR1( {10.0, 5.1, 1.0, 10.0, -6.0, 2.0, -2.0, 7.0, -4.0}); - auto add = builder.Rem(a, b); + builder.Rem(a, b); ComputeAndCompareR1( &builder, {-2.5, 0.0, 0.25, 0.0, -0.0, 1.0, 1.0, -1.0, -0.0}, {}, @@ -612,20 +612,20 @@ XLA_TEST_F(ArrayElementwiseOpTest, RemF64s) { } XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-2.5f, 25.5f, 2.25f, -10.0f, 6.0f}); auto b = builder.ConstantR1({10.0f, 5.0f, 1.0f, 10.0f, -6.0f}); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1(&builder, {-25.0f, 127.5f, 2.25f, -100.0f, -36.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } @@ -648,19 +648,19 @@ XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantS32s) { } } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1(a_data); auto b = builder.ConstantR1(b_data); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1(&builder, expected, {}); } XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantZeroElementS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1(&builder, {}, {}); } @@ -679,21 +679,21 @@ XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantU32s) { } } - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1(a_data); auto b = builder.ConstantR1(b_data); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1(&builder, expected, {}); } XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {{-2.5f, 0.0f}, {0.0f, 25.5f}, {2.0f, -10.0f}}); auto b = builder.ConstantR1( {{0.0f, 10.0f}, {5.0f, 1.0f}, {10.0f, -6.0f}}); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1( &builder, {{0.0f, -25.0f}, {-25.5f, 127.5f}, {-40.0f, -112.0}}, {}, @@ -701,264 +701,264 @@ XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantC64s) { } XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantZeroElementC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto add = builder.Mul(a, b); + builder.Mul(a, b); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, AndPredR1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({false, false, true, true}); auto b = builder.ConstantR1({false, true, false, true}); - auto out = builder.And(a, b); + builder.And(a, b); ComputeAndCompareR1(&builder, {false, false, false, true}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndPredR2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{false, false}, {true, true}}); auto b = builder.ConstantR2({{false, true}, {false, true}}); - auto out = builder.And(a, b); + builder.And(a, b); Array2D expected_array({{false, false}, {false, true}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndZeroElementPredR1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.And(a, b); + builder.And(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndS32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0, -1, -8}); auto b = builder.ConstantR1({5, -7, 12}); - auto out = builder.And(a, b); + builder.And(a, b); ComputeAndCompareR1(&builder, {0, -7, 8}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndS32R2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{0, -5}, {-1, 5}}); auto b = builder.ConstantR2({{1, -6}, {4, 5}}); - auto out = builder.And(a, b); + builder.And(a, b); Array2D expected_array({{0, -6}, {4, 5}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndZeroElementS32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.And(a, b); + builder.And(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndU32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0, 1, 8}); auto b = builder.ConstantR1({5, 7, 12}); - auto out = builder.And(a, b); + builder.And(a, b); ComputeAndCompareR1(&builder, {0, 1, 8}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndU32R2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{0, 1}, {3, 8}}); auto b = builder.ConstantR2({{1, 0}, {7, 6}}); - auto out = builder.And(a, b); + builder.And(a, b); Array2D expected_array({{0, 0}, {3, 0}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AndZeroElementU32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.And(a, b); + builder.And(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrPredR1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({false, false, true, true}); auto b = builder.ConstantR1({false, true, false, true}); - auto out = builder.Or(a, b); + builder.Or(a, b); ComputeAndCompareR1(&builder, {false, true, true, true}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrPredR2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{false, false}, {true, true}}); auto b = builder.ConstantR2({{false, true}, {false, true}}); - auto out = builder.Or(a, b); + builder.Or(a, b); Array2D expected_array({{false, true}, {true, true}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrZeroElementPredR1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.Or(a, b); + builder.Or(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrS32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0, -1, 8}); auto b = builder.ConstantR1({5, -7, 4}); - auto out = builder.Or(a, b); + builder.Or(a, b); ComputeAndCompareR1(&builder, {5, -1, 12}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrS32R2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{0, -1}, {8, 8}}); auto b = builder.ConstantR2({{5, -7}, {4, 1}}); - auto out = builder.Or(a, b); + builder.Or(a, b); Array2D expected_array({{5, -1}, {12, 9}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrZeroElementS32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.Or(a, b); + builder.Or(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrU32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0, 1, 8}); auto b = builder.ConstantR1({5, 7, 4}); - auto out = builder.Or(a, b); + builder.Or(a, b); ComputeAndCompareR1(&builder, {5, 7, 12}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrU32R2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{0, 1}, {8, 8}}); auto b = builder.ConstantR2({{5, 7}, {4, 1}}); - auto out = builder.Or(a, b); + builder.Or(a, b); Array2D expected_array({{5, 7}, {12, 9}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, OrZeroElementU32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.Or(a, b); + builder.Or(a, b); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotPredR1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({false, true, true, false}); - auto out = builder.Not(a); + builder.Not(a); ComputeAndCompareR1(&builder, {true, false, false, true}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotPredR2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{false, true}, {true, false}}); - auto out = builder.Not(a); + builder.Not(a); Array2D expected_array({{true, false}, {false, true}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotZeroElementPredR1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto out = builder.Not(a); + builder.Not(a); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotS32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-1, 0, 1}); - auto out = builder.Not(a); + builder.Not(a); ComputeAndCompareR1(&builder, {0, -1, -2}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotS32R2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{-1, 0}, {1, 8}}); - auto out = builder.Not(a); + builder.Not(a); Array2D expected_array({{0, -1}, {-2, -9}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotZeroElementS32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto out = builder.Not(a); + builder.Not(a); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotU32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0, 4294967295}); - auto out = builder.Not(a); + builder.Not(a); ComputeAndCompareR1(&builder, {4294967295, 0}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotU32R2) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{0, 4294967295}, {1, 4294967294}}); - auto out = builder.Not(a); + builder.Not(a); Array2D expected_array({{4294967295, 0}, {4294967294, 1}}); ComputeAndCompareR2(&builder, expected_array, {}); } XLA_TEST_F(ArrayElementwiseOpTest, NotZeroElementU32R1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({}); - auto out = builder.Not(a); + builder.Not(a); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ShiftLeftS32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({static_cast(0x12345678), static_cast(0xF0001000), 1, 3, 77, 1, -3, 77}); auto b = builder.ConstantR1({4, 8, 2, 7, 15, 32, 100, -1}); - auto out = builder.ShiftLeft(a, b); + builder.ShiftLeft(a, b); ComputeAndCompareR1(&builder, {static_cast(0x23456780), 0x00100000, 0x4, @@ -967,12 +967,12 @@ XLA_TEST_F(ArrayElementwiseOpTest, ShiftLeftS32) { } XLA_TEST_F(ArrayElementwiseOpTest, ShiftRightArithmeticS32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({static_cast(0x92345678), static_cast(0x10001000), 1, 3, 77, 1, -3, 77}); auto b = builder.ConstantR1({4, 8, 2, 7, 2, 32, 100, -1}); - auto out = builder.ShiftRightArithmetic(a, b); + builder.ShiftRightArithmetic(a, b); ComputeAndCompareR1( &builder, @@ -982,45 +982,45 @@ XLA_TEST_F(ArrayElementwiseOpTest, ShiftRightArithmeticS32) { } XLA_TEST_F(ArrayElementwiseOpTest, ShiftRightLogicalS32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({static_cast(0x92345678), static_cast(0x10001000), 1, 3, 77, 1, -3, 77}); auto b = builder.ConstantR1({4, 8, 2, 7, 5, 32, 100, -1}); - auto out = builder.ShiftRightLogical(a, b); + builder.ShiftRightLogical(a, b); ComputeAndCompareR1(&builder, {0x09234567, 0x00100010, 0, 0, 2, 0, 0, 0}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ShiftLeftU32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {0x12345678, 0xF0001000, 1, 3, 77, 1, ~3u, 77}); auto b = builder.ConstantR1({4, 8, 2, 7, 15, 32, 100, ~0u}); - auto out = builder.ShiftLeft(a, b); + builder.ShiftLeft(a, b); ComputeAndCompareR1( &builder, {0x23456780, 0x00100000, 0x4, 0x180, 2523136, 0, 0, 0}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ShiftRightArithmeticU32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {0x92345678, 0x10001000, 1, 3, 77, 1, ~3u, 77}); auto b = builder.ConstantR1({4, 8, 2, 7, 2, 32, 100, ~0u}); - auto out = builder.ShiftRightArithmetic(a, b); + builder.ShiftRightArithmetic(a, b); ComputeAndCompareR1( &builder, {0xF9234567, 0x00100010, 0, 0, 19, 0, ~0u, 0}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ShiftRightLogicalU32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1( {0x92345678, 0x10001000, 1, 3, 77, 1, ~3u, 77}); auto b = builder.ConstantR1({4, 8, 2, 7, 5, 32, 100, ~0u}); - auto out = builder.ShiftRightLogical(a, b); + builder.ShiftRightLogical(a, b); ComputeAndCompareR1(&builder, {0x09234567, 0x00100010, 0, 0, 2, 0, 0, 0}, {}); @@ -1028,59 +1028,59 @@ XLA_TEST_F(ArrayElementwiseOpTest, ShiftRightLogicalU32) { XLA_TEST_F(ArrayElementwiseOpTest, CompareEqF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({10.0f, 5.0f, 2.25f, 10.0f, NAN}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1(&builder, {false, false, true, false, false}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({}); auto rhs = builder.ConstantR1({}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareGeF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({10.0f, 5.0f, 1.0f, 10.0f, NAN}); - auto compare = builder.Ge(lhs, rhs); + builder.Ge(lhs, rhs); ComputeAndCompareR1(&builder, {false, true, true, false, false}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareGtF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({10.0f, 5.0f, 1.0f, 10.0f, NAN}); - auto compare = builder.Gt(lhs, rhs); + builder.Gt(lhs, rhs); ComputeAndCompareR1(&builder, {false, true, true, false, false}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareLeF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.5f, 5.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({10.0f, 5.0f, 1.0f, 10.0f, NAN}); - auto compare = builder.Le(lhs, rhs); + builder.Le(lhs, rhs); ComputeAndCompareR1(&builder, {true, true, false, false, false}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareLtF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({10.0f, 5.0f, 1.0f, 10.0f, NAN}); - auto compare = builder.Lt(lhs, rhs); + builder.Lt(lhs, rhs); ComputeAndCompareR1(&builder, {true, false, false, false, false}, {}); } @@ -1088,10 +1088,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareLtF32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareEqS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({min, min, min, 0, 0, 0, max, max, max}); auto rhs = builder.ConstantR1({min, 0, max, -1, 0, 1, min, 0, max}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1( &builder, {true, false, false, false, true, false, false, false, true}, @@ -1099,17 +1099,17 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqS32s) { } XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({}); auto rhs = builder.ConstantR1({}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1(&builder, {}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareEqC64s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({{-2.5f, 10.0f}, {1.0f, 25.5f}, {2.25f, -3.0f}, @@ -1120,16 +1120,16 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqC64s) { {2.25f, -3.0f}, {10.0f, 0.0f}, {1.0f, NAN}}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1(&builder, {false, false, true, false, false}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementC64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({}); auto rhs = builder.ConstantR1({}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1(&builder, {}, {}); } @@ -1138,7 +1138,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareNeC64s) { // Disable fast-math because we're operating on NaNs. SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({{-2.5f, 10.0f}, {1.0f, 25.5f}, {2.25f, -3.0f}, @@ -1149,7 +1149,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareNeC64s) { {2.25f, -3.0f}, {10.0f, 0.0f}, {1.0f, NAN}}); - auto compare = builder.Ne(lhs, rhs); + builder.Ne(lhs, rhs); ComputeAndCompareR1(&builder, {true, true, false, true, true}, {}); } @@ -1158,10 +1158,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareNeF32s) { // Disable fast-math because we're operating on NaNs. SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.5f, 25.5f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({10.0f, 25.5f, 1.0f, 10.0f, NAN}); - auto compare = builder.Ne(lhs, rhs); + builder.Ne(lhs, rhs); ComputeAndCompareR1(&builder, {true, false, true, true, true}, {}); } @@ -1169,10 +1169,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareNeF32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareNeS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({min, min, min, 0, 0, 0, max, max, max}); auto rhs = builder.ConstantR1({min, 0, max, -1, 0, 1, min, 0, max}); - auto compare = builder.Ne(lhs, rhs); + builder.Ne(lhs, rhs); ComputeAndCompareR1( &builder, {false, true, true, true, false, true, true, true, false}, {}); @@ -1181,10 +1181,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareNeS32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareGeS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({min, min, min, 0, 0, 0, max, max, max}); auto rhs = builder.ConstantR1({min, 0, max, -1, 0, 1, min, 0, max}); - auto compare = builder.Ge(lhs, rhs); + builder.Ge(lhs, rhs); ComputeAndCompareR1( &builder, {true, false, false, true, true, false, true, true, true}, {}); @@ -1193,10 +1193,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGeS32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareGtS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({min, min, min, 0, 0, 0, max, max, max}); auto rhs = builder.ConstantR1({min, 0, max, -1, 0, 1, min, 0, max}); - auto compare = builder.Gt(lhs, rhs); + builder.Gt(lhs, rhs); ComputeAndCompareR1( &builder, {false, false, false, true, false, false, true, true, false}, @@ -1206,10 +1206,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGtS32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareLeS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({min, min, min, 0, 0, 0, max, max, max}); auto rhs = builder.ConstantR1({min, 0, max, -1, 0, 1, min, 0, max}); - auto compare = builder.Le(lhs, rhs); + builder.Le(lhs, rhs); ComputeAndCompareR1( &builder, {true, true, true, false, true, true, false, false, true}, {}); @@ -1218,10 +1218,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareLeS32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareLtS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({min, min, min, 0, 0, 0, max, max, max}); auto rhs = builder.ConstantR1({min, 0, max, -1, 0, 1, min, 0, max}); - auto compare = builder.Lt(lhs, rhs); + builder.Lt(lhs, rhs); ComputeAndCompareR1( &builder, {false, true, true, false, false, true, false, false, false}, @@ -1230,10 +1230,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareLtS32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareEqU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({0, 0, 0, 5, 5, 5, max, max, max}); auto rhs = builder.ConstantR1({0, 1, max, 4, 5, 6, 0, 1, max}); - auto compare = builder.Eq(lhs, rhs); + builder.Eq(lhs, rhs); ComputeAndCompareR1( &builder, {true, false, false, false, true, false, false, false, true}, @@ -1242,10 +1242,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqU32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareNeU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({0, 0, 0, 5, 5, 5, max, max, max}); auto rhs = builder.ConstantR1({0, 1, max, 4, 5, 6, 0, 1, max}); - auto compare = builder.Ne(lhs, rhs); + builder.Ne(lhs, rhs); ComputeAndCompareR1( &builder, {false, true, true, true, false, true, true, true, false}, {}); @@ -1253,10 +1253,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareNeU32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareGeU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({0, 0, 0, 5, 5, 5, max, max, max}); auto rhs = builder.ConstantR1({0, 1, max, 4, 5, 6, 0, 1, max}); - auto compare = builder.Ge(lhs, rhs); + builder.Ge(lhs, rhs); ComputeAndCompareR1( &builder, {true, false, false, true, true, false, true, true, true}, {}); @@ -1264,10 +1264,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGeU32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareGtU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({0, 0, 0, 5, 5, 5, max, max, max}); auto rhs = builder.ConstantR1({0, 1, max, 4, 5, 6, 0, 1, max}); - auto compare = builder.Gt(lhs, rhs); + builder.Gt(lhs, rhs); ComputeAndCompareR1( &builder, {false, false, false, true, false, false, true, true, false}, @@ -1276,10 +1276,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGtU32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareLeU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({0, 0, 0, 5, 5, 5, max, max, max}); auto rhs = builder.ConstantR1({0, 1, max, 4, 5, 6, 0, 1, max}); - auto compare = builder.Le(lhs, rhs); + builder.Le(lhs, rhs); ComputeAndCompareR1( &builder, {true, true, true, false, true, true, false, false, true}, {}); @@ -1287,10 +1287,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareLeU32s) { XLA_TEST_F(ArrayElementwiseOpTest, CompareLtU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({0, 0, 0, 5, 5, 5, max, max, max}); auto rhs = builder.ConstantR1({0, 1, max, 4, 5, 6, 0, 1, max}); - auto compare = builder.Lt(lhs, rhs); + builder.Lt(lhs, rhs); ComputeAndCompareR1( &builder, {false, true, true, false, false, true, false, false, false}, @@ -1299,12 +1299,12 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareLtU32s) { XLA_TEST_F(ArrayElementwiseOpTest, PowF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({4.0f, 2.0f, 2.0f, NAN, 6.0f, -2.0f, -2.0f}); auto rhs = builder.ConstantR1({2.0f, -2.0f, 3.0f, 10.0f, NAN, 3.0f, 4.0f}); - auto minimum = builder.Pow(lhs, rhs); + builder.Pow(lhs, rhs); ComputeAndCompareR1( &builder, {16.0f, 0.25f, 8.0f, NAN, NAN, -8.0f, 16.0f}, {}, error_spec_); @@ -1312,20 +1312,20 @@ XLA_TEST_F(ArrayElementwiseOpTest, PowF32s) { XLA_TEST_F(ArrayElementwiseOpTest, PowNonIntegerF32s) { SetFastMathDisabled(true); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({-2.0f, -0.6f, -0.6f, 0.0f}); auto rhs = builder.ConstantR1({0.5f, 0.6f, -0.6f, -0.6f}); - auto minimum = builder.Pow(lhs, rhs); + builder.Pow(lhs, rhs); ComputeAndCompareR1(&builder, {NAN, NAN, NAN, INFINITY}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, PowZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({}); auto rhs = builder.ConstantR1({}); - auto minimum = builder.Pow(lhs, rhs); + builder.Pow(lhs, rhs); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } @@ -1599,14 +1599,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, Div4F32) { TEST_P(ArrayElementwiseOpTestParamCount, SquareManyValues) { const int count = GetParam(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::vector values; values.reserve(count); for (int i = 0; i < count; ++i) { values.push_back(i / static_cast(count)); } auto x = builder.ConstantR1(values); - auto exp = builder.Pow(x, builder.ConstantR0(2.0f)); + builder.Pow(x, builder.ConstantR0(2.0f)); std::vector expected; expected.reserve(values.size()); @@ -1618,7 +1618,7 @@ TEST_P(ArrayElementwiseOpTestParamCount, SquareManyValues) { } XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4D) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D values(2, 2, 2, 2); std::vector values_vector; @@ -1632,77 +1632,77 @@ XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4D) { Array4D expected(2, 2, 2, 2, expected_vector); auto x = builder.ConstantR4FromArray4D(values); - auto exp = builder.Pow(x, builder.ConstantR0(2.0f)); + builder.Pow(x, builder.ConstantR0(2.0f)); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4DZeroElements) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array4D values(2, 2, 0, 2); Array4D expected(2, 2, 0, 2); auto x = builder.ConstantR4FromArray4D(values); - auto exp = builder.Pow(x, builder.ConstantR0(2.0f)); + builder.Pow(x, builder.ConstantR0(2.0f)); ComputeAndCompareR4(&builder, expected, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MinF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); - auto minimum = builder.Min(lhs, rhs); + builder.Min(lhs, rhs); ComputeAndCompareR1(&builder, {1.0f, -5.0f, 1.0f, NAN, NAN}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({}); auto rhs = builder.ConstantR1({}); - auto minimum = builder.Min(lhs, rhs); + builder.Min(lhs, rhs); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MinF64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); - auto minimum = builder.Min(lhs, rhs); + builder.Min(lhs, rhs); ComputeAndCompareR1(&builder, {1.0, -5.0, 1.0, NAN, NAN}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); - auto maximum = builder.Max(lhs, rhs); + builder.Max(lhs, rhs); ComputeAndCompareR1(&builder, {2.0f, 1.0f, 2.25f, NAN, NAN}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto lhs = builder.ConstantR1({}); auto rhs = builder.ConstantR1({}); - auto minimum = builder.Max(lhs, rhs); + builder.Max(lhs, rhs); ComputeAndCompareR1(&builder, {}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); - auto maximum = builder.Max(lhs, rhs); + builder.Max(lhs, rhs); ComputeAndCompareR1(&builder, {2.0, 1.0, 2.25, NAN, NAN}, {}, error_spec_); @@ -1711,7 +1711,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) { XLA_TEST_F(ArrayElementwiseOpTest, MaxS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1( {min, min, min, -1, -1, 0, 0, 0, 1, 1, max, max, max}); auto y = builder.ConstantR1( @@ -1726,7 +1726,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxS32s) { XLA_TEST_F(ArrayElementwiseOpTest, MinS32s) { const int32 min = std::numeric_limits::min(); const int32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1( {min, min, min, -1, -1, 0, 0, 0, 1, 1, max, max, max}); auto y = builder.ConstantR1( @@ -1740,7 +1740,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinS32s) { XLA_TEST_F(ArrayElementwiseOpTest, MaxU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1({0, 0, 1, 1, 1, max, max, max}); auto y = builder.ConstantR1({0, 1, 0, 1, 10, 0, 234234, max}); builder.Max(x, y); @@ -1751,7 +1751,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxU32s) { XLA_TEST_F(ArrayElementwiseOpTest, MinU32s) { const uint32 max = std::numeric_limits::max(); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1({0, 0, 1, 1, 1, max, max, max}); auto y = builder.ConstantR1({0, 1, 0, 1, 10, 0, 234234, max}); builder.Min(x, y); @@ -1761,7 +1761,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinU32s) { } XLA_TEST_F(ArrayElementwiseOpTest, MaxTenF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1( {-0.0, 1.0, 2.0, -3.0, -4.0, 5.0, 6.0, -7.0, -8.0, 9.0}); auto y = builder.ConstantR1( @@ -1774,7 +1774,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxTenF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, MaxR1S1AndR1S0F32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto u = builder.ConstantR1({3.5}); auto v = builder.ConstantR1({}); builder.Max(u, v); @@ -1784,7 +1784,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxR1S1AndR1S0F32s) { XLA_TEST_F(ArrayElementwiseOpTest, MaxR1S0AndR2S0x2F32s) { for (int broadcast_dim : {0, 1}) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto u = builder.ConstantR1({3.5}); auto v = builder.ConstantR2FromArray2D(Array2D(0, 2)); builder.Max(u, v, /*broadcast_dimensions=*/{broadcast_dim}); @@ -1794,7 +1794,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxR1S0AndR2S0x2F32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Max1DAnd2DF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({2.0f, 3.0f, 4.0f}); auto m = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); @@ -1805,7 +1805,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Max1DAnd2DF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Max1DAnd2DZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({}); auto m = builder.ConstantR2({{}, {}}); builder.Max(v, m, /*broadcast_dimensions=*/{1}); @@ -1815,7 +1815,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Max1DAnd2DZeroElementF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Max3DAndScalarS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto scalar = builder.ConstantR0(2); Array3D a_3d({{{3, 9, -1}, {2, -10, 3}}, {{-2, 2, 8}, {12, 10, 4}}}); auto array = builder.ConstantR3FromArray3D(a_3d); @@ -1826,7 +1826,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Max3DAndScalarS32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Max3DAndScalarZeroElementS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto scalar = builder.ConstantR0(2); Array3D a_3d(2, 0, 3); auto array = builder.ConstantR3FromArray3D(a_3d); @@ -1837,7 +1837,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Max3DAndScalarZeroElementS32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo1DF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto m = builder.ConstantR2({{-10.4f, 64.0f, 6.0f}, {0.1f, 32.0f, 16.1f}}); auto v = builder.ConstantR1({-10.2f, 16.4f}); @@ -1848,7 +1848,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo1DF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo1DZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto m = builder.ConstantR2({{}, {}}); auto v = builder.ConstantR1({-10.2f, 16.4f}); builder.Min(m, v, /*broadcast_dimensions=*/{0}); @@ -1858,7 +1858,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo1DZeroElementF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo4DF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto array2d = builder.ConstantR2({{-12.2f, 64.3f, 6.1f}, {0.0f, 32.2f, 2.5f}}); auto array4d = builder.ConstantR4FromArray4D( @@ -1873,7 +1873,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo4DF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo4DZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto array2d = builder.ConstantR2({{-12.2f, 64.3f, 6.1f}, {0.0f, 32.2f, 2.5f}}); Array4D arg(2, 2, 0, 3); @@ -1885,7 +1885,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Min2DTo4DZeroElementF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, MinTenS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); auto y = builder.ConstantR1({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); builder.Min(x, y); @@ -1895,7 +1895,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinTenS32s) { } XLA_TEST_F(ArrayElementwiseOpTest, MaxTenS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto x = builder.ConstantR1({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); auto y = builder.ConstantR1({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); builder.Max(x, y); @@ -1905,10 +1905,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxTenS32s) { } XLA_TEST_F(ArrayElementwiseOpTest, RemTwoConstantS32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-3, 26, 2, -1, 1}); auto b = builder.ConstantR1({10, 5, 1, 10, -10}); - auto add = builder.Rem(a, b); + builder.Rem(a, b); ComputeAndCompareR1(&builder, {-3, 1, 0, -1, 1}, {}); } @@ -2635,7 +2635,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGtR3F32sWithDegenerateDim2) { Array3D b_3d({{{7.0f, 1.0f}, {3.0f, 10.0f}, {15.0f, 6.0f}}}); auto b = builder.ConstantR3FromArray3D(b_3d); - auto compare = builder.Gt(a, b); + builder.Gt(a, b); Array3D expected_3d( {{{0, 1}, {0, 0}, {0, 0}}, {{0, 1}, {1, 0}, {0, 1}}}); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 01aa6c756f..c39597c4e1 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -96,6 +96,9 @@ class ClientLibraryTestBase : public ::testing::Test { ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments); + // TODO(b/74197823): Remove the template type 'BuilderT' in all methods once + // the migration to XlaBuilder is complete. + template StatusOr> ExecuteAndTransfer( BuilderT* builder, tensorflow::gtl::ArraySlice arguments, @@ -127,14 +130,14 @@ class ClientLibraryTestBase : public ::testing::Test { // Convenience methods for building and running a computation, transferring // the result, and comparing it to the expected value(s). Methods are // templated on the native host type which maps to specific XLA types (See - // ComputationBuilder for details). For each rank, two forms are provided: one - // for floating point types with an ErrorSpec parameter, and one for integral - // types without the ErrorSpec parameter. - template - void ComputeAndCompareR0(ComputationBuilder* builder, NativeT expected, + // ComputationBuilder/XlaBuilder for details). For each rank, two forms are + // provided: one for floating point types with an ErrorSpec parameter, and one + // for integral types without the ErrorSpec parameter. + template + void ComputeAndCompareR0(BuilderT* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR0(ComputationBuilder* builder, NativeT expected, + template + void ComputeAndCompareR0(BuilderT* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); @@ -154,33 +157,27 @@ class ClientLibraryTestBase : public ::testing::Test { const tensorflow::core::Bitmap& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR2(ComputationBuilder* builder, - const Array2D& expected, + template + void ComputeAndCompareR2(BuilderT* builder, const Array2D& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR2(ComputationBuilder* builder, - const Array2D& expected, + template + void ComputeAndCompareR2(BuilderT* builder, const Array2D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - template - void ComputeAndCompareR3(ComputationBuilder* builder, - const Array3D& expected, + template + void ComputeAndCompareR3(BuilderT* builder, const Array3D& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR3(ComputationBuilder* builder, - const Array3D& expected, + template + void ComputeAndCompareR3(BuilderT* builder, const Array3D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); - template - void ComputeAndCompareR4(ComputationBuilder* builder, - const Array4D& expected, + template + void ComputeAndCompareR4(BuilderT* builder, const Array4D& expected, tensorflow::gtl::ArraySlice arguments); - template - void ComputeAndCompareR4(ComputationBuilder* builder, - const Array4D& expected, + template + void ComputeAndCompareR4(BuilderT* builder, const Array4D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); @@ -337,10 +334,12 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template - std::unique_ptr CreateR0Parameter( - NativeT value, int64 parameter_number, const string& name, - ComputationBuilder* builder, ComputationDataHandle* data_handle); + template + std::unique_ptr CreateR0Parameter(NativeT value, + int64 parameter_number, + const string& name, + BuilderT* builder, + HandleT* data_handle); // Creates a parameter instruction that wraps the given values and then stores // into "data_handle" the global handle for that parameter. @@ -350,11 +349,10 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR1Parameter( tensorflow::gtl::ArraySlice values, int64 parameter_number, - const string& name, ComputationBuilder* builder, - ComputationDataHandle* data_handle); + const string& name, BuilderT* builder, HandleT* data_handle); // Creates a parameter instruction that wraps the given constant array // "array_2d" and then stores to "data_handle" the global handle for that @@ -365,11 +363,10 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR2Parameter( const Array2D& array_2d, int64 parameter_number, - const string& name, ComputationBuilder* builder, - ComputationDataHandle* data_handle); + const string& name, BuilderT* builder, HandleT* data_handle); // Creates a parameter instruction that wraps the given constant array // "array_3d" and then stores to "data_handle" the global handle for that @@ -380,11 +377,10 @@ class ClientLibraryTestBase : public ::testing::Test { // // When the use_bfloat16 flag is set but NativeT is float, the data will be // converted to bfloat16. - template + template std::unique_ptr CreateR3Parameter( const Array3D& array_3d, int64 parameter_number, - const string& name, ComputationBuilder* builder, - ComputationDataHandle* data_handle); + const string& name, BuilderT* builder, HandleT* data_handle); // Getter and setter for the use_bfloat16 flag, which indicates whether to run // tests with all float-type input/output converted to bfloat16. @@ -440,9 +436,9 @@ class ClientLibraryTestBase : public ::testing::Test { std::vector> arguments_; }; -template +template void ClientLibraryTestBase::ComputeAndCompareR0( - ComputationBuilder* builder, NativeT expected, + BuilderT* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR0(expected); @@ -450,9 +446,9 @@ void ClientLibraryTestBase::ComputeAndCompareR0( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR0( - ComputationBuilder* builder, NativeT expected, + BuilderT* builder, NativeT expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -492,9 +488,9 @@ void ClientLibraryTestBase::ComputeAndCompareR1( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR2( - ComputationBuilder* builder, const Array2D& expected, + BuilderT* builder, const Array2D& expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR2FromArray2D(expected); @@ -502,9 +498,9 @@ void ClientLibraryTestBase::ComputeAndCompareR2( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR2( - ComputationBuilder* builder, const Array2D& expected, + BuilderT* builder, const Array2D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -518,9 +514,9 @@ void ClientLibraryTestBase::ComputeAndCompareR2( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR3( - ComputationBuilder* builder, const Array3D& expected, + BuilderT* builder, const Array3D& expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR3FromArray3D(expected); @@ -528,9 +524,9 @@ void ClientLibraryTestBase::ComputeAndCompareR3( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR3( - ComputationBuilder* builder, const Array3D& expected, + BuilderT* builder, const Array3D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -544,9 +540,9 @@ void ClientLibraryTestBase::ComputeAndCompareR3( arguments, error); } -template +template void ClientLibraryTestBase::ComputeAndCompareR4( - ComputationBuilder* builder, const Array4D& expected, + BuilderT* builder, const Array4D& expected, tensorflow::gtl::ArraySlice arguments) { std::unique_ptr expected_literal = Literal::CreateR4FromArray4D(expected); @@ -554,9 +550,9 @@ void ClientLibraryTestBase::ComputeAndCompareR4( arguments); } -template +template void ClientLibraryTestBase::ComputeAndCompareR4( - ComputationBuilder* builder, const Array4D& expected, + BuilderT* builder, const Array4D& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { static_assert(std::is_same::value || std::is_same::value || @@ -570,10 +566,10 @@ void ClientLibraryTestBase::ComputeAndCompareR4( arguments, error); } -template +template std::unique_ptr ClientLibraryTestBase::CreateR0Parameter( NativeT value, int64 parameter_number, const string& name, - ComputationBuilder* builder, ComputationDataHandle* data_handle) { + BuilderT* builder, HandleT* data_handle) { std::unique_ptr literal = Literal::CreateR0(value); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -584,11 +580,10 @@ std::unique_ptr ClientLibraryTestBase::CreateR0Parameter( return data; } -template +template std::unique_ptr ClientLibraryTestBase::CreateR1Parameter( tensorflow::gtl::ArraySlice values, int64 parameter_number, - const string& name, ComputationBuilder* builder, - ComputationDataHandle* data_handle) { + const string& name, BuilderT* builder, HandleT* data_handle) { std::unique_ptr literal = Literal::CreateR1(values); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -599,11 +594,10 @@ std::unique_ptr ClientLibraryTestBase::CreateR1Parameter( return data; } -template +template std::unique_ptr ClientLibraryTestBase::CreateR2Parameter( const Array2D& array_2d, int64 parameter_number, - const string& name, ComputationBuilder* builder, - ComputationDataHandle* data_handle) { + const string& name, BuilderT* builder, HandleT* data_handle) { std::unique_ptr literal = Literal::CreateR2FromArray2D(array_2d); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); @@ -614,11 +608,10 @@ std::unique_ptr ClientLibraryTestBase::CreateR2Parameter( return data; } -template +template std::unique_ptr ClientLibraryTestBase::CreateR3Parameter( const Array3D& array_3d, int64 parameter_number, - const string& name, ComputationBuilder* builder, - ComputationDataHandle* data_handle) { + const string& name, BuilderT* builder, HandleT* data_handle) { std::unique_ptr literal = Literal::CreateR3FromArray3D(array_3d); if (use_bfloat16_ && literal->shape().element_type() == F32) { literal = LiteralTestUtil::ConvertF32ToBF16(*literal); -- GitLab From eee15c1f8ea56dbb516fa9e35392e0a224e99966 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 15:34:21 -0700 Subject: [PATCH 512/960] Update recompute_grad for TPU PiperOrigin-RevId: 190536468 --- .../layers/python/layers/rev_block_lib.py | 105 +++++++++++++++++- .../python/layers/rev_block_lib_test.py | 61 +++++++--- 2 files changed, 146 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index 123275e1fd..0b38c0c3fd 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -29,6 +29,7 @@ from __future__ import print_function import functools import re +import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.framework.python import ops as contrib_framework_ops @@ -37,6 +38,7 @@ from tensorflow.python.framework import ops as framework_ops from tensorflow.python.layers import base from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope @@ -46,6 +48,7 @@ from tensorflow.python.util import nest __all__ = ["rev_block", "RevBlock", "recompute_grad"] LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*") +_USE_DEFAULT = "__rev_block_lib_default" def _acc_grads(*lists_of_grads): @@ -219,7 +222,13 @@ class RevBlock(base.Layer): def _efficient_grad_fn(self, inputs, variables, ys, grad_ys): """Custom gradient fn for a block of reversible residual layers.""" + # Inputs have passed through an Identity. Recover the original Tensors to + # be able to match up side inputs. + assert [u"Identity"] == list(set([x.op.type for x in inputs])) + inputs = [x.op.inputs[0] for x in inputs] side_inputs = inputs[2:] + del inputs + f_side_idxs = [None] * len(self.f_side_input) g_side_idxs = [None] * len(self.g_side_input) assert len(side_inputs) == len(self.f_side_input) + len(self.g_side_input) @@ -405,12 +414,36 @@ def rev_block(x1, return block.forward(x1, x2) -def recompute_grad(fn): +def enable_with_args(dec): + """A decorator for decorators to enable their usage with or without args.""" + + @functools.wraps(dec) + def new_dec(*args, **kwargs): + if len(args) == 1 and not kwargs and callable(args[0]): + # Used as decorator without args + fn = args[0] + return dec(fn) + else: + return lambda fn: dec(fn, *args, **kwargs) + + return new_dec + + +@enable_with_args +def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """Decorator that recomputes the function on the backwards pass. Args: fn: a function that takes Tensors (all as positional arguments) and returns a tuple of Tensors. + use_data_dep: `bool`, if `True` will use a dummy data dependency to force + the recompute to happen. If `False` will use a control dependency. By + default will be `True` if in an XLA context and `False` otherwise. XLA + ignores control dependencies and so this data dependency is necessary. + tupleize_grads: `bool`, if `True` will use control dependencies to ensure + that all gradients are produced before any are consumed by downstream ops. + If `use_data_dep` is also `True`, will use a data dependency instead of + a control dependency. Returns: A wrapped fn that is identical to fn when called, but its activations will @@ -420,13 +453,25 @@ def recompute_grad(fn): @functools.wraps(fn) def wrapped(*args): - return _recompute_grad(fn, args) + return _recompute_grad( + fn, args, use_data_dep=use_data_dep, tupleize_grads=tupleize_grads) return wrapped -def _recompute_grad(fn, args): +def _is_on_tpu(): + ctxt = framework_ops.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access + return control_flow_util.GetContainingXLAContext(ctxt) is not None + + +def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """See recompute_grad.""" + for arg in args: + if not isinstance(arg, framework_ops.Tensor): + raise ValueError("All inputs to function must be Tensors") + use_data_dep_ = use_data_dep + if use_data_dep_ == _USE_DEFAULT: + use_data_dep_ = _is_on_tpu() cached_vs = [] cached_arg_scope = [] @@ -436,6 +481,8 @@ def _recompute_grad(fn, args): del outputs # Recompute outputs with framework_ops.control_dependencies(output_grads): + if use_data_dep_: + inputs = _force_data_dependency(output_grads, inputs) with contrib_framework_ops.arg_scope(cached_arg_scope[0]): with variable_scope.variable_scope(cached_vs[0], reuse=True): outputs = fn(*inputs) @@ -444,6 +491,13 @@ def _recompute_grad(fn, args): outputs = [outputs] outputs = list(outputs) grads = gradients_impl.gradients(outputs, inputs + variables, output_grads) + + if tupleize_grads: + if use_data_dep_: + grads = _tuple_with_data_dep(grads) + else: + grads = control_flow_ops.tuple(grads) + grad_inputs = grads[:len(inputs)] grad_vars = grads[len(inputs):] return grad_inputs, grad_vars @@ -532,7 +586,7 @@ def _fn_with_custom_grad_internal(fn, inputs, grad_fn, use_global_vars=False): get_vars_fn = ( vs.global_variables if use_global_vars else vs.trainable_variables) len_before_vars = len(get_vars_fn()) - inputs = list(inputs) + inputs = [array_ops.identity(x) for x in inputs] outputs = fn(*inputs) train_vars = get_vars_fn()[len_before_vars:] @@ -581,3 +635,46 @@ def _fn_with_custom_grad_internal(fn, inputs, grad_fn, use_global_vars=False): flat_inputs = nest.flatten(defun_inputs) id_out = identity(*flat_inputs) return id_out + + +def _force_data_dependency(first_compute, then_compute): + """Force all of `then_compute` to depend on all of `first_compute`. + + Uses a dummy data dependency, which is useful when running on TPUs because + XLA ignores control dependencies. Only supports float arguments. + + Args: + first_compute: `list`. These will be made to run before the + `Tensor`s `then_compute`. + then_compute: `list`. These will run after all the `Tensor`s in + `first_compute`. + + Returns: + `list`, same length as `then_compute`. + + Raises: + ValueError: if ranks are unknown or types are not floating. + """ + + def _first_element(x): + if x.get_shape().ndims is None: + raise ValueError("Rank of Tensor %s must be known" % x) + ndims = x.get_shape().ndims + return array_ops.reshape(array_ops.slice(x, [0] * ndims, [1] * ndims), []) + + first_compute_sum = math_ops.add_n( + [_first_element(x) for x in first_compute if x is not None]) + dtype = first_compute_sum.dtype + if not dtype.is_floating: + raise ValueError("_force_data_dependency only supports floating dtypes.") + epsilon = np.finfo(dtype.as_numpy_dtype).tiny + zero = array_ops.stop_gradient(epsilon * first_compute_sum) + + return [ + array_ops.identity(x) + zero if x is not None else None + for x in then_compute + ] + + +def _tuple_with_data_dep(tensors): + return _force_data_dependency(tensors, tensors) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py index cbcbcd7511..d1ad4e8c98 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py @@ -154,7 +154,7 @@ class RevBlockTest(test.TestCase): y_val, yd_val, gd_val, g_val = sess.run([y, y_rev, grads_rev, grads]) self.assertAllClose(y_val, yd_val) for g1, g2 in zip(gd_val, g_val): - self.assertAllClose(g1, g2) + self.assertAllClose(g1, g2, rtol=1e-5) def testRevBlock(self): self._testRevBlock() @@ -255,25 +255,54 @@ class RecomputeTest(test.TestCase): def fn_recompute(x): return fn(x) + @rev_block_lib.recompute_grad(use_data_dep=True) + def fn_use_data_dep(x): + return fn(x) + + @rev_block_lib.recompute_grad(tupleize_grads=True) + def fn_tupleize(x): + return fn(x) + + @rev_block_lib.recompute_grad(use_data_dep=True, tupleize_grads=True) + def fn_both(x): + return fn(x) + x = random_ops.random_uniform((3, 1, 3)) - recompute_vars = None - with variable_scope.variable_scope("recompute") as vs: - out1 = math_ops.reduce_sum(fn_recompute(x)) - recompute_vars = vs.trainable_variables() - reg_vars = None - with variable_scope.variable_scope("regular") as vs: - out2 = math_ops.reduce_sum(fn(x)) - reg_vars = vs.trainable_variables() - - grad1 = gradients_impl.gradients(out1, recompute_vars) - grad2 = gradients_impl.gradients(out2, reg_vars) + + names_and_fns = [ + ("recompute", fn_recompute), + ("regular", fn), + ("use_data_dep", fn_use_data_dep), + ("tupleize", fn_tupleize), + ("tuple_and_data_dep", fn_both), + ] + outputs_and_vars = [] + for name, wrapped_fn in names_and_fns: + with variable_scope.variable_scope(name) as vs: + out = math_ops.reduce_sum(wrapped_fn(x)) + outputs_and_vars.append((out, vs.trainable_variables())) + + all_grads = [] + for out, scope_vars in outputs_and_vars: + all_grads.append(gradients_impl.gradients(out, scope_vars)) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) - outs = sess.run([out1, out2, grad1, grad2]) - self.assertAllClose(outs[0], outs[1]) - for g1, g2 in zip(outs[2], outs[3]): - self.assertAllClose(g1, g2) + outputs = list(zip(*outputs_and_vars))[0] + outs, all_grads_val = sess.run([outputs, all_grads]) + + # All outputs are the same + current = outs[0] + for out in outs[1:]: + self.assertAllClose(current, out) + current = out + + # All gradients are the same + for grads in zip(all_grads_val): + current = grads[0] + for g in grads[1:]: + self.assertAllClose(current, g) + current = g class FnWithCustomGradTest(test.TestCase): -- GitLab From 290632966fae0619db30c1ba777634db9a43b757 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Mar 2018 15:37:40 -0700 Subject: [PATCH 513/960] In the experimental C API, parametrized batch_size for the generate dataset / iterator stack. PiperOrigin-RevId: 190536945 --- tensorflow/c/c_api_experimental.cc | 67 ++++++++++++++++++------------ 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index f411efc941..bea9378571 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -7125,7 +7125,8 @@ library { // sets `dataset_name` to the created dataset name. The returned functions must // be deleted by calling TF_DeleteFunction. static std::vector CreateMNISTDatasetFunctions( - const char* file_path, std::string* dataset_name, TF_Status* status) { + const char* file_path, int batch_size, std::string* dataset_name, + TF_Status* status) { const char* func_def = R"PREFIX( library { function { @@ -8089,7 +8090,7 @@ library { dtype: DT_INT64 tensor_shape { } - int64_val: 128 + int64_val: -123 } } } @@ -8145,7 +8146,7 @@ library { dtype: DT_INT64 tensor_shape { } - int64_val: 128 + int64_val: -123 } } } @@ -8211,35 +8212,48 @@ library { *dataset_name = "_make_dataset_2451e43a"; std::function mutate_proto_func = - [dataset_name, file_path](FunctionDef* fdef) { + [dataset_name, file_path, batch_size](FunctionDef* fdef) { VLOG(1) << "Processsing function " << fdef->DebugString(); if (std::string(fdef->signature().name()) != *dataset_name) return; // Change the input file pattern to `file_path`. - bool found = false; + bool found_file_path = false, found_batch_size = false; // `node_def` may be mutated. for (auto& node_def : *fdef->mutable_node_def()) { - if (node_def.name() != "FixedLengthRecordDataset/filenames" && - node_def.name() != "FixedLengthRecordDataset_1/filenames_1") - continue; - DCHECK_EQ(node_def.op(), "Const"); - DCHECK_GT(node_def.attr().count("value"), 0); - found = true; - // Replace $(DATA_DIR)/foo with /foo - // TODO(hongm): Use StringPiece manipulation for better efficiency. - const std::string cur_value = - node_def.attr().at("value").tensor().string_val(0); - const std::string pattern = "$(DATA_DIR)"; - DCHECK_EQ(cur_value.compare(0, pattern.length(), pattern), 0); - const std::string new_value = - file_path + cur_value.substr(pattern.length()); - VLOG(1) << "Setting the value of node_def " << node_def.name() - << " to " << new_value; - auto* tensor = (*node_def.mutable_attr())["value"].mutable_tensor(); - tensor->clear_string_val(); - tensor->add_string_val(new_value); + if (node_def.name() == "FixedLengthRecordDataset/filenames" || + node_def.name() == "FixedLengthRecordDataset_1/filenames_1") { + DCHECK_EQ(node_def.op(), "Const"); + DCHECK_GT(node_def.attr().count("value"), 0); + found_file_path = true; + // Replace $(DATA_DIR)/foo with /foo + // TODO(hongm): Use StringPiece manipulation for better efficiency. + const std::string cur_value = + node_def.attr().at("value").tensor().string_val(0); + const std::string pattern = "$(DATA_DIR)"; + DCHECK_EQ(cur_value.compare(0, pattern.length(), pattern), 0); + const std::string new_value = + file_path + cur_value.substr(pattern.length()); + VLOG(1) << "Setting the value of node_def " << node_def.name() + << " to " << new_value; + auto* tensor = (*node_def.mutable_attr())["value"].mutable_tensor(); + tensor->clear_string_val(); + tensor->add_string_val(new_value); + } else if (node_def.name() == "BatchDataset/batch_size" || + node_def.name() == "FilterDataset/batch_size_1") { + DCHECK_EQ(node_def.op(), "Const"); + DCHECK_GT(node_def.attr().count("value"), 0); + found_batch_size = true; + // Replace $(BATCH_SIZE) with `batch_size` + DCHECK_EQ(node_def.attr().at("value").tensor().int64_val(0), -123); + VLOG(1) << "Setting the batch size attr value of node_def " + << node_def.name() << " to " << batch_size; + auto* tensor = (*node_def.mutable_attr())["value"].mutable_tensor(); + tensor->clear_int64_val(); + tensor->add_int64_val(batch_size); + } } VLOG(1) << "Rewrote function to " << fdef->DebugString(); - DCHECK(found); + DCHECK(found_file_path); + DCHECK(found_batch_size); }; return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status); } @@ -8341,7 +8355,8 @@ TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets( std::string dataset_name; const auto& funcs = is_mnist - ? CreateMNISTDatasetFunctions(file_path, &dataset_name, status) + ? CreateMNISTDatasetFunctions(file_path, batch_size, &dataset_name, + status) : CreateImagenetDatasetFunctions(file_path, &dataset_name, status); if (!status->status.ok()) { return nullptr; -- GitLab From c83a54adcface7d4bb666d7c4fd3968ba980a50d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Mar 2018 15:39:54 -0700 Subject: [PATCH 514/960] Makes tf.gather not silently snapshot resource variables. PiperOrigin-RevId: 190537320 --- .../kernel_tests/attention_wrapper_test.py | 29 +++++++++++-------- tensorflow/python/ops/array_ops.py | 17 +++++++---- tensorflow/python/ops/embedding_ops.py | 29 ++++--------------- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index c4139dde49..07b3ad71d4 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -785,26 +785,31 @@ class AttentionWrapperTest(test.TestCase): wrapper.BahdanauAttention, wrapper.LuongAttention) expected_final_output = BasicDecoderOutput( - rnn_output=ResultSummary( - shape=(5, 3, 20), dtype=dtype('float32'), mean=0.11798714846372604), - sample_id=ResultSummary( - shape=(5, 3), dtype=dtype('int32'), mean=7.933333333333334)) + rnn_output=ResultSummary(shape=(5, 3, 20), + dtype=dtype('float32'), + mean=0.11723966), + sample_id=ResultSummary(shape=(5, 3), + dtype=dtype('int32'), + mean=9.2666666666666675)) expected_final_state = AttentionWrapperState( cell_state=LSTMStateTuple( - c=ResultSummary( - shape=(5, 9), dtype=dtype('float32'), mean=-0.0036486709), - h=ResultSummary( - shape=(5, 9), dtype=dtype('float32'), mean=-0.0018835809)), - attention=ResultSummary( - shape=(5, 20), dtype=dtype('float32'), mean=0.11798714846372604), + c=ResultSummary(shape=(5, 9), + dtype=dtype('float32'), + mean=-0.003545674), + h=ResultSummary(shape=(5, 9), + dtype=dtype('float32'), + mean=-0.0018327223)), + attention=ResultSummary(shape=(5, 20), + dtype=dtype('float32'), + mean=0.11728073), time=3, alignments=( ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), + alignment_history=(), attention_state=( ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), - ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), - alignment_history=()) + ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125))) expected_final_alignment_history = ( ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125), ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125)) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index ec7c14f7d8..9106461c60 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2691,12 +2691,17 @@ reverse_sequence.__doc__ = deprecation.rewrite_argument_docstring( @tf_export("gather") def gather(params, indices, validate_indices=None, name=None, axis=0): - # TODO(rjryan): Remove "Gather" creation in favor of GatherV2 once the forward - # compatibility 3 week period has passed. - if axis == 0: - return gen_array_ops.gather( - params, indices, validate_indices=validate_indices, name=name) - else: + del validate_indices + if axis != 0: + # Note that we do a sparse_read here to avoid snapshotting the entire + # resource variable and doing a gather, which can be inefficient and lead to + # subtle race conditions. TODO(apassos) implement axis != 0 on sparse_read + return gen_array_ops.gather_v2(params, indices, axis, name=name) + try: + # TODO(apassos) find a less bad way of detecting resource variables without + # introducing a circular dependency. + return params.sparse_read(indices, name=name) + except AttributeError: return gen_array_ops.gather_v2(params, indices, axis, name=name) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 20e4a28b9c..f0120f2957 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -35,34 +35,14 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export -def _gather(params, ids, name=None): - """Helper function for _embedding_lookup_and_transform. - - This function gathers embeddings from a single tensor. The gather deals with - resource variables specially. - - Args: - params: A `Tensor` of embeddings. - ids: A `Tensor` indexing the embeddings to be retrieved from `params`. - name: A name for the operation (optional). - - Returns: - A `Tensor` with the same type as `params`. - """ - if isinstance(params, resource_variable_ops.ResourceVariable): - return params.sparse_read(ids, name=name) - else: - return array_ops.gather(params, ids, name=name) - - def _clip(params, ids, max_norm): """Helper function for _embedding_lookup_and_transform. This function optionally clips embeddings to an l2-norm of max_norm. Args: - params: A `Tensor` of embeddings retrieved by `_gather`. - ids: The `ids` argument that was passed to `_gather`. + params: A `Tensor` of embeddings retrieved by `gather`. + ids: The `ids` argument that was passed to `gather`. max_norm: If provided, the embeddings are l2-normalized to the value of max_norm. @@ -148,7 +128,8 @@ def _embedding_lookup_and_transform(params, ids = ops.convert_to_tensor(ids, name="ids") if np == 1 and (not transform_fn or ids.get_shape().ndims == 1): with ops.colocate_with(params[0]): - result = _clip(_gather(params[0], ids, name=name), ids, max_norm) + result = _clip(array_ops.gather(params[0], ids, name=name), + ids, max_norm) if transform_fn: result = transform_fn(result) return result @@ -212,7 +193,7 @@ def _embedding_lookup_and_transform(params, for p in xrange(np): pids = gather_ids[p] with ops.colocate_with(params[p]): - result = _gather(params[p], pids) + result = array_ops.gather(params[p], pids) if transform_fn: # If transform_fn is provided, the clip_by_norm precedes # the transform and hence must be co-located. See below -- GitLab From db076ca01f12368c9476fa4db9d87756f22f9670 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 26 Mar 2018 15:52:12 -0700 Subject: [PATCH 515/960] Rename convert_savedmodel to convert_saved_model to be consistent with export_saved_model PiperOrigin-RevId: 190539064 --- tensorflow/contrib/lite/python/BUILD | 12 ++++++------ ...vert_savedmodel.py => convert_saved_model.py} | 15 ++++++++------- ...model_test.py => convert_saved_model_test.py} | 16 ++++++++-------- 3 files changed, 22 insertions(+), 21 deletions(-) rename tensorflow/contrib/lite/python/{convert_savedmodel.py => convert_saved_model.py} (96%) rename tensorflow/contrib/lite/python/{convert_savedmodel_test.py => convert_saved_model_test.py} (96%) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index ce1a81d06b..411d5c0d27 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -85,8 +85,8 @@ py_test( ) py_binary( - name = "convert_savedmodel", - srcs = ["convert_savedmodel.py"], + name = "convert_saved_model", + srcs = ["convert_saved_model.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ @@ -98,12 +98,12 @@ py_binary( ) py_test( - name = "convert_savedmodel_test", - srcs = ["convert_savedmodel_test.py"], + name = "convert_saved_model_test", + srcs = ["convert_saved_model_test.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ - ":convert_savedmodel", + ":convert_saved_model", "//tensorflow/python:client_testlib", "//tensorflow/python:platform_test", "//tensorflow/python:session", @@ -115,7 +115,7 @@ py_test( py_library( name = "tf_lite_py_pip", deps = [ - ":convert_savedmodel", + ":convert_saved_model", ], ) diff --git a/tensorflow/contrib/lite/python/convert_savedmodel.py b/tensorflow/contrib/lite/python/convert_saved_model.py similarity index 96% rename from tensorflow/contrib/lite/python/convert_savedmodel.py rename to tensorflow/contrib/lite/python/convert_saved_model.py index d39e1a1d98..a2b5ef488e 100644 --- a/tensorflow/contrib/lite/python/convert_savedmodel.py +++ b/tensorflow/contrib/lite/python/convert_saved_model.py @@ -16,7 +16,7 @@ r"""TensorFlow Lite flatbuffer generation from saved_models. Example: -bazel run third_party/tensorflow/contrib/lite/python:convert_savedmodel -- \ +bazel run third_party/tensorflow/contrib/lite/python:convert_saved_model -- \ --saved_model_dir=/tmp/test_saved_model/1519865537 \ --output_tflite=/tmp/test.lite @@ -68,16 +68,16 @@ def log_tensor_details(tensor_info): dims = [str(dim.size) for dim in val.tensor_shape.dim] shape = "({})".format(", ".join(dims)) - logging.info("Tensor's key in savedmodel's tensor_map: %s", key) + logging.info("Tensor's key in saved_model's tensor_map: %s", key) logging.info(" tensor name: %s, shape: %s, type: %s", val.name, shape, dtype) def get_meta_graph_def(saved_model_dir, tag_set): - """Validate savedmodel and extract MetaGraphDef. + """Validate saved_model and extract MetaGraphDef. Args: - saved_model_dir: Savedmodel path to convert. + saved_model_dir: saved_model path to convert. tag_set: Set of tag(s) of the MetaGraphDef to load. Returns: @@ -94,7 +94,8 @@ def get_meta_graph_def(saved_model_dir, tag_set): tag_sets.append(meta_graph_tag_set) if meta_graph_tag_set == tag_set: result_meta_graph_def = meta_graph_def - logging.info("The given SavedModel contains the following tags: %s", tag_sets) + logging.info("The given saved_model contains the following tags: %s", + tag_sets) if result_meta_graph_def is not None: return result_meta_graph_def else: @@ -118,7 +119,7 @@ def get_signature_def(meta_graph, signature_key): signature_def_map = meta_graph.signature_def signature_def_keys = set(signature_def_map.keys()) logging.info( - "The given SavedModel MetaGraphDef contains SignatureDefs with the " + "The given saved_model MetaGraphDef contains SignatureDefs with the " "following keys: %s", signature_def_keys) if signature_key not in signature_def_keys: raise ValueError("No '{}' in the saved_model\'s SignatureDefs. Possible " @@ -159,7 +160,7 @@ def convert(saved_model_dir, tag_set=None, signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, batch_size=1): - """Convert a savedmodel to tflite flatbuffer. + """Convert a saved_model to tflite flatbuffer. Args: saved_model_dir: Saved model directory to convert. diff --git a/tensorflow/contrib/lite/python/convert_savedmodel_test.py b/tensorflow/contrib/lite/python/convert_saved_model_test.py similarity index 96% rename from tensorflow/contrib/lite/python/convert_savedmodel_test.py rename to tensorflow/contrib/lite/python/convert_saved_model_test.py index 70cff9ef7f..d87fbeb91c 100644 --- a/tensorflow/contrib/lite/python/convert_savedmodel_test.py +++ b/tensorflow/contrib/lite/python/convert_saved_model_test.py @@ -24,7 +24,7 @@ from __future__ import division from __future__ import print_function import os -from tensorflow.contrib.lite.python import convert_savedmodel +from tensorflow.contrib.lite.python import convert_saved_model from tensorflow.python import estimator from tensorflow.python import keras from tensorflow.python import layers @@ -60,13 +60,13 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): # Create a simple savedmodel saved_model_dir = self._createSimpleSavedModel(shape=[1, 16, 16, 3]) # Convert to tflite - result = convert_savedmodel.convert(saved_model_dir=saved_model_dir) + result = convert_saved_model.convert(saved_model_dir=saved_model_dir) self.assertTrue(result) def testSimpleSavedModelWithNoneBatchSizeInShape(self): """Test a simple savedmodel, with None in input tensor's shape.""" saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, 16, 3]) - result = convert_savedmodel.convert(saved_model_dir=saved_model_dir) + result = convert_saved_model.convert(saved_model_dir=saved_model_dir) self.assertTrue(result) def testSimpleSavedModelWithMoreNoneInShape(self): @@ -74,7 +74,7 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): saved_model_dir = self._createSimpleSavedModel(shape=[None, 16, None, 3]) # Convert to tflite: this should raise ValueError, as 3rd dim is None. with self.assertRaises(ValueError): - convert_savedmodel.convert(saved_model_dir=saved_model_dir) + convert_saved_model.convert(saved_model_dir=saved_model_dir) def testSimpleSavedModelWithWrongSignatureKey(self): """Test a simple savedmodel, fail as given signature is invalid.""" @@ -82,7 +82,7 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): # Convert to tflite: this should raise ValueError, as # signature_key does not exit in the saved_model. with self.assertRaises(ValueError): - convert_savedmodel.convert( + convert_saved_model.convert( saved_model_dir=saved_model_dir, signature_key="wrong-key") def testSimpleSavedModelWithWrongOutputArray(self): @@ -92,7 +92,7 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): # Convert to tflite: this should raise ValueError, as # output_arrays is not valid for the saved_model. with self.assertRaises(ValueError): - convert_savedmodel.convert( + convert_saved_model.convert( saved_model_dir=saved_model_dir, output_arrays="wrong-output") def testMultipleMetaGraphDef(self): @@ -124,7 +124,7 @@ class ConvertSavedModelTestBasicGraph(test_util.TensorFlowTestCase): builder.save(True) # Convert to tflite - convert_savedmodel.convert( + convert_saved_model.convert( saved_model_dir=saved_model_dir, tag_set=set([saved_model.tag_constants.SERVING, "additional_test_tag"])) @@ -264,7 +264,7 @@ class ConvertSavedModelTestTrainGraph(test_util.TensorFlowTestCase): saved_model_final_dir + ".lite") # TODO(zhixianyan): no need to limit output_arrays to `Softmax' # once b/74205001 fixed and argmax implemented in tflite. - result = convert_savedmodel.convert( + result = convert_saved_model.convert( saved_model_dir=saved_model_final_dir, output_arrays="Softmax", output_tflite=output_tflite) -- GitLab From 73f40467bde137e2e2b31297b73944cc2830bdb7 Mon Sep 17 00:00:00 2001 From: Ou Changkun Date: Tue, 27 Mar 2018 00:57:52 +0200 Subject: [PATCH 516/960] Fix missing interpretation of document (#17990) * Fix missing interpretation of document * Rephrase the sentence of missing interpretation --- tensorflow/docs_src/mobile/optimizing.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index ca9cb043e9..778e4d3a62 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -233,6 +233,8 @@ order by how long they took. From left to right, the columns are: - The cumulative total time of this and the previous ops in the table. This is handy for understanding what the distribution of work is across the layers, to see if just a few of the nodes are taking up most of the time. + +- The amount of memory consumed by outputs of this type of op. - Name of the node. -- GitLab From e5dcaf921cf9feefd42b2ab176590c696b3b0285 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Tue, 27 Mar 2018 07:21:54 +0800 Subject: [PATCH 517/960] Fix #15900 (#16154) - Added `save_checkpoint_steps` attribute to `MonitoredTrainingSession`. If both `save_checkpoint_steps` and `save_checkpoint_secs` are both `None` then default saver is disabled. Default is `save_checkpoint_secs=600` - Added `test_save_checkpoint_steps` - Updated golden file --- .../python/training/monitored_session.py | 33 +++++++++++++---- .../python/training/monitored_session_test.py | 36 +++++++++++++++++++ .../tools/api/golden/tensorflow.train.pbtxt | 2 +- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 6c5c9e01a7..2d4f09a60a 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -281,13 +281,14 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name scaffold=None, hooks=None, chief_only_hooks=None, - save_checkpoint_secs=600, + save_checkpoint_secs=USE_DEFAULT, save_summaries_steps=USE_DEFAULT, save_summaries_secs=USE_DEFAULT, config=None, stop_grace_period_secs=120, log_step_count_steps=100, - max_wait_secs=7200): + max_wait_secs=7200, + save_checkpoint_steps=USE_DEFAULT): """Creates a `MonitoredSession` for training. For a chief, this utility sets proper session initializer/restorer. It also @@ -310,8 +311,10 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if `is_chief==True`, ignore otherwise. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. + using a default checkpoint saver. If both `save_checkpoint_steps` and + `save_checkpoint_secs` are set to `None`, then the default checkpoint + saver isn't used. If both are provided, then only `save_checkpoint_secs` + is used. Default 600. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then @@ -330,6 +333,11 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name become available. This should be kept relatively short to help detect incorrect code, but sometimes may need to be increased if the chief takes a while to start up. + save_checkpoint_steps: The frequency, in number of global steps, that a + checkpoint is saved using a default checkpoint saver. If both + `save_checkpoint_steps` and `save_checkpoint_secs` are set to `None`, then + the default checkpoint saver isn't used. If both are provided, then only + `save_checkpoint_secs` is used. Default not enabled. Returns: A `MonitoredSession` object. @@ -342,6 +350,15 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name elif save_summaries_steps == USE_DEFAULT: save_summaries_steps = None + if save_checkpoint_steps == USE_DEFAULT and \ + save_checkpoint_secs == USE_DEFAULT: + save_checkpoint_steps = None + save_checkpoint_secs = 600 + elif save_checkpoint_secs == USE_DEFAULT: + save_checkpoint_secs = None + elif save_checkpoint_steps == USE_DEFAULT: + save_checkpoint_steps = None + scaffold = scaffold or Scaffold() if not is_chief: session_creator = WorkerSessionCreator( @@ -374,9 +391,13 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name save_steps=save_summaries_steps, save_secs=save_summaries_secs, output_dir=checkpoint_dir)) - if save_checkpoint_secs and save_checkpoint_secs > 0: + if (save_checkpoint_secs and save_checkpoint_secs > 0) or ( + save_checkpoint_steps and save_checkpoint_steps > 0): all_hooks.append(basic_session_run_hooks.CheckpointSaverHook( - checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold)) + checkpoint_dir, + save_steps=save_checkpoint_steps, + save_secs=save_checkpoint_secs, + scaffold=scaffold)) if hooks: all_hooks.extend(hooks) diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py index 159b2d5c16..3806056f01 100644 --- a/tensorflow/python/training/monitored_session_test.py +++ b/tensorflow/python/training/monitored_session_test.py @@ -282,6 +282,42 @@ class MonitoredTrainingSessionTest(test.TestCase): is_chief=True, checkpoint_dir=logdir) as session: self.assertEqual(2, session.run(gstep)) + def test_save_checkpoint_steps(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_steps') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_steps=100, + log_step_count_steps=10) as session: + for _ in range(100): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(100, session.run(gstep)) + + def test_save_checkpoint_secs(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_secs') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_secs=0.1, + log_step_count_steps=10) as session: + session.run(new_gstep) + time.sleep(0.2) + for _ in range(10): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(11, session.run(gstep)) + def test_summaries_steps(self): logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps') with ops.Graph().as_default(): diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index c75ee474aa..bec72e1e60 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -238,7 +238,7 @@ tf_module { } member_method { name: "MonitoredTrainingSession" - argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'600\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\'], " + argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\', \'save_checkpoint_steps\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\', \'\'], " } member_method { name: "NewCheckpointReader" -- GitLab From 307cfe7ab7e2c475b2741fc2a2f7663b46223e6d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 16:19:50 -0700 Subject: [PATCH 518/960] Save the last loss reduction method (for future use). PiperOrigin-RevId: 190543066 --- tensorflow/python/framework/ops.py | 3 +++ tensorflow/python/ops/losses/losses_impl.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index e579289a8d..25a951a2de 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2788,6 +2788,9 @@ class Graph(object): # being called inside function definitions behave as if they were seeing the # actual outside graph). self._graph_key = "grap-key-%d/" % (uid(),) + # A string with the last reduction method passed to + # losses.compute_weighted_loss(), or None. + self._last_loss_reduction = None self._container = "" self._registered_ops = op_def_registry.get_registered_ops() diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 0840760810..34ca1adc3e 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -194,6 +194,11 @@ def compute_weighted_loss( """ Reduction.validate(reduction) with ops.name_scope(scope, "weighted_loss", (losses, weights)): + # Save the `reduction` argument for loss normalization when distributing + # to multiple towers. + # TODO(josh11b): Associate it with the returned op for more precision. + ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access + with ops.control_dependencies(( weights_broadcast_ops.assert_broadcastable(weights, losses),)): losses = ops.convert_to_tensor(losses) -- GitLab From eda7aa3f7e763734f5f3550bed8b044a384b2ce8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 17:02:55 -0700 Subject: [PATCH 519/960] Add missing parameter to OP_REQUIRES call. PiperOrigin-RevId: 190548854 --- tensorflow/core/kernels/mkl_reshape_op.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc index e12f6f437a..2cfde1f6fd 100644 --- a/tensorflow/core/kernels/mkl_reshape_op.cc +++ b/tensorflow/core/kernels/mkl_reshape_op.cc @@ -266,8 +266,9 @@ class MklReshapeOp : public OpKernel { &net)) { stream(stream::kind::eager).submit(net).wait(); } else { - OP_REQUIRES(context, - output_tensor->CopyFrom(input_tensor, shape_to)); + OP_REQUIRES( + context, output_tensor->CopyFrom(input_tensor, shape_to), + errors::InvalidArgument("invalid input tensor shape")); } return; } else { -- GitLab From 931f6d553172ddfc9ec4a7a94ea2c6233bf33cb0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 17:39:51 -0700 Subject: [PATCH 520/960] [XLA] Redesign: handle metadata and sharding. - Add a xla.OpSharding field to the HloInstructionProto. - Metatdata handling is tested. PiperOrigin-RevId: 190553731 --- .../xla/client/xla_client/xla_builder.cc | 7 +++- .../xla/client/xla_client/xla_builder.h | 32 +++++++++++++++++++ tensorflow/compiler/xla/service/hlo.proto | 2 ++ tensorflow/compiler/xla/tests/BUILD | 3 +- .../compiler/xla/tests/hlo_metadata_test.cc | 9 +++--- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index bf91efcfd6..1b90b45bfb 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -896,8 +896,13 @@ StatusOr XlaBuilder::AddInstruction( << "Do not add XlaOp from builder " << operand.builder_->name() << " to builder " << this->name(); instr.add_operand_ids(operand.handle()); - // TODO(b/74197823): Set metadata and sharding. } + + *instr.mutable_metadata() = metadata_; + if (sharding_) { + *instr.mutable_sharding() = *sharding_; + } + instructions_.push_back(instr); XlaOp op(handle, this); diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index 22cf094512..cc33356cc1 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -85,6 +85,29 @@ class XlaBuilder { // Returns the computation name. const string& name() const { return name_; } + // Sets OpMetadata that will be added to all instructions until cleared. + // + // OpMetadata is often applied to a series of XLA HLO instructions. As a + // result, OpMetadata is set on the Computation Builder. All subsequent + // instructions generated via this Computation Builder will have the same + // OpMetadata attached until a call to ClearOpMetadata. + void SetOpMetadata(const OpMetadata& metadata) { metadata_ = metadata; } + + // Clears the HloMetadata state. + void ClearOpMetadata() { metadata_.Clear(); } + + // Sets an OpSharding that will be attached to all instructions until cleared. + void SetSharding(const OpSharding& sharding) { sharding_ = sharding; } + + // Clears the sharding. Ops will be sharded according to the default placement + // policy. + void ClearSharding() { sharding_ = tensorflow::gtl::nullopt; } + + // Returns the OpSharding that will be attached to all instructions. + const tensorflow::gtl::optional& sharding() const { + return sharding_; + } + // Sets the builder to a mode where it will die immediately when an error is // encountered, rather than producing it in a deferred fashion when Build() is // called (which is the default). @@ -776,6 +799,15 @@ class XlaBuilder { // The unique parameter numbers. tensorflow::gtl::FlatSet parameter_numbers_; + // The metadata to attach to each op. This is structured as a "modal"-like + // operation, in order to simplify client code (and not sprinkle this metadata + // throughout the TensorFlow op kernel implementations). + OpMetadata metadata_; + + // Sharding for this operator. This is structured as a "model"-like operation, + // in order to simplify client code, similar to metadata_. + tensorflow::gtl::optional sharding_; + // Mode bit that indicates whether to die when a first error is encountered. bool die_immediately_on_error_ = false; }; diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 406feadfd4..0b446c6547 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -141,6 +141,8 @@ message HloInstructionProto { repeated int64 operand_ids = 36; repeated int64 control_predecessor_ids = 37; repeated int64 called_computation_ids = 38; + + xla.OpSharding sharding = 40; } // Serialization of HloComputation. diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 3705d6c271..5ab25f2264 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1810,9 +1810,8 @@ tf_cc_test( deps = [ ":local_client_test_base", "//tensorflow/compiler/xla:test_helpers", - "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", - "//tensorflow/compiler/xla/service:computation_tracker", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", "//tensorflow/compiler/xla/service:cpu_plugin", "//tensorflow/compiler/xla/service:local_service", "//tensorflow/core:test_main", diff --git a/tensorflow/compiler/xla/tests/hlo_metadata_test.cc b/tensorflow/compiler/xla/tests/hlo_metadata_test.cc index eded2077fc..cf971dd61b 100644 --- a/tensorflow/compiler/xla/tests/hlo_metadata_test.cc +++ b/tensorflow/compiler/xla/tests/hlo_metadata_test.cc @@ -13,9 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" -#include "tensorflow/compiler/xla/service/computation_tracker.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" #include "tensorflow/compiler/xla/service/local_service.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/local_client_test_base.h" @@ -30,7 +29,7 @@ class HloMetadataTest : public LocalClientTestBase { metadata_.set_op_name("my_sum_op"); } - void BuildAddComputation(ComputationBuilder* builder) { + void BuildAddComputation(XlaBuilder* builder) { auto x = builder->Parameter(0, ShapeUtil::MakeShape(F32, {}), "x"); auto y = builder->Parameter(1, ShapeUtil::MakeShape(F32, {}), "y"); builder->Add(x, y); @@ -40,7 +39,7 @@ class HloMetadataTest : public LocalClientTestBase { }; TEST_F(HloMetadataTest, MetadataPropagation) { - ComputationBuilder builder(local_client_, "add"); + XlaBuilder builder("add"); builder.SetOpMetadata(metadata_); BuildAddComputation(&builder); builder.ClearOpMetadata(); @@ -61,7 +60,7 @@ TEST_F(HloMetadataTest, MetadataPropagation) { } TEST_F(HloMetadataTest, MetadataClearing) { - ComputationBuilder builder(local_client_, "add"); + XlaBuilder builder("add"); builder.SetOpMetadata(metadata_); // Some other pretend computation here. builder.ClearOpMetadata(); -- GitLab From 0be974c423f6e5c363db2d95ed335dde4cb4e69b Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Mon, 26 Mar 2018 18:50:27 -0700 Subject: [PATCH 521/960] Finish deprecation of tf.contrib.bayesflow.{HMC,MetropolisHastings}. New home: https://github.com/tensorflow/probability/tree/master/tensorflow_probability/python/mcmc PiperOrigin-RevId: 190560180 --- tensorflow/contrib/bayesflow/BUILD | 41 - tensorflow/contrib/bayesflow/README.md | 17 + tensorflow/contrib/bayesflow/__init__.py | 8 - .../bayesflow/python/kernel_tests/hmc_test.py | 737 -------------- .../kernel_tests/metropolis_hastings_test.py | 340 ------- .../contrib/bayesflow/python/ops/hmc.py | 30 - .../contrib/bayesflow/python/ops/hmc_impl.py | 961 ------------------ .../python/ops/metropolis_hastings.py | 34 - .../python/ops/metropolis_hastings_impl.py | 527 ---------- 9 files changed, 17 insertions(+), 2678 deletions(-) create mode 100644 tensorflow/contrib/bayesflow/README.md delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/hmc.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/hmc_impl.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index c6feec68e0..a55029b314 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -37,25 +37,6 @@ py_library( ], ) -cuda_py_test( - name = "metropolis_hastings_test", - size = "large", - srcs = ["python/kernel_tests/metropolis_hastings_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_ops", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - ], -) - cuda_py_test( name = "monte_carlo_test", size = "small", @@ -77,28 +58,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "hmc_test", - size = "large", - srcs = ["python/kernel_tests/hmc_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/contrib/layers:layers_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_seed", - ], - tags = ["nomsan"], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/bayesflow/README.md b/tensorflow/contrib/bayesflow/README.md new file mode 100644 index 0000000000..10323dc6d5 --- /dev/null +++ b/tensorflow/contrib/bayesflow/README.md @@ -0,0 +1,17 @@ +# Notice + +`tf.contrib.bayesflow` has moved! + +See new code at [github.com/tensorflow/probability]( +https://github.com/tensorflow/probability). + +Switch imports with: + +```python +# old +import tensorflow as tf +tfp = tf.contrib.bayesflow + +# new +import tensorflow_probability as tfp +``` diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index f868203826..41a8c920fc 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -21,8 +21,6 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long -from tensorflow.contrib.bayesflow.python.ops import hmc -from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo # pylint: enable=unused-import,line-too-long @@ -30,13 +28,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'entropy', - 'hmc', - 'metropolis_hastings', 'monte_carlo', - 'special_math', - 'stochastic_variables', - 'variational_inference', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py deleted file mode 100644 index dabadfc7b6..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py +++ /dev/null @@ -1,737 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Hamiltonian Monte Carlo.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import numpy as np -from scipy import stats - -from tensorflow.contrib.bayesflow.python.ops import hmc -from tensorflow.contrib.bayesflow.python.ops.hmc_impl import _compute_energy_change -from tensorflow.contrib.bayesflow.python.ops.hmc_impl import _leapfrog_integrator - -from tensorflow.contrib.distributions.python.ops import independent as independent_lib -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_linalg_ops -from tensorflow.python.ops import gradients_impl as gradients_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.distributions import gamma as gamma_lib -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging_ops - - -def _reduce_variance(x, axis=None, keepdims=False): - sample_mean = math_ops.reduce_mean(x, axis, keepdims=True) - return math_ops.reduce_mean( - math_ops.squared_difference(x, sample_mean), axis, keepdims) - - -class HMCTest(test.TestCase): - - def setUp(self): - self._shape_param = 5. - self._rate_param = 10. - - random_seed.set_random_seed(10003) - np.random.seed(10003) - - def assertAllFinite(self, x): - self.assertAllEqual(np.ones_like(x).astype(bool), np.isfinite(x)) - - def _log_gamma_log_prob(self, x, event_dims=()): - """Computes log-pdf of a log-gamma random variable. - - Args: - x: Value of the random variable. - event_dims: Dimensions not to treat as independent. - - Returns: - log_prob: The log-pdf up to a normalizing constant. - """ - return math_ops.reduce_sum(self._shape_param * x - - self._rate_param * math_ops.exp(x), - event_dims) - - def _integrator_conserves_energy(self, x, independent_chain_ndims, sess, - feed_dict=None): - step_size = array_ops.placeholder(np.float32, [], name="step_size") - hmc_lf_steps = array_ops.placeholder(np.int32, [], name="hmc_lf_steps") - - if feed_dict is None: - feed_dict = {} - feed_dict[hmc_lf_steps] = 1000 - - event_dims = math_ops.range(independent_chain_ndims, - array_ops.rank(x)) - - m = random_ops.random_normal(array_ops.shape(x)) - log_prob_0 = self._log_gamma_log_prob(x, event_dims) - grad_0 = gradients_ops.gradients(log_prob_0, x) - old_energy = -log_prob_0 + 0.5 * math_ops.reduce_sum(m**2., event_dims) - - new_m, _, log_prob_1, _ = _leapfrog_integrator( - current_momentums=[m], - target_log_prob_fn=lambda x: self._log_gamma_log_prob(x, event_dims), - current_state_parts=[x], - step_sizes=[step_size], - num_leapfrog_steps=hmc_lf_steps, - current_target_log_prob=log_prob_0, - current_grads_target_log_prob=grad_0) - new_m = new_m[0] - - new_energy = -log_prob_1 + 0.5 * math_ops.reduce_sum(new_m * new_m, - event_dims) - - x_shape = sess.run(x, feed_dict).shape - event_size = np.prod(x_shape[independent_chain_ndims:]) - feed_dict[step_size] = 0.1 / event_size - old_energy_, new_energy_ = sess.run([old_energy, new_energy], - feed_dict) - logging_ops.vlog(1, "average energy relative change: {}".format( - (1. - new_energy_ / old_energy_).mean())) - self.assertAllClose(old_energy_, new_energy_, atol=0., rtol=0.02) - - def _integrator_conserves_energy_wrapper(self, independent_chain_ndims): - """Tests the long-term energy conservation of the leapfrog integrator. - - The leapfrog integrator is symplectic, so for sufficiently small step - sizes it should be possible to run it more or less indefinitely without - the energy of the system blowing up or collapsing. - - Args: - independent_chain_ndims: Python `int` scalar representing the number of - dims associated with independent chains. - """ - with self.test_session(graph=ops.Graph()) as sess: - x_ph = array_ops.placeholder(np.float32, name="x_ph") - feed_dict = {x_ph: np.random.rand(50, 10, 2)} - self._integrator_conserves_energy(x_ph, independent_chain_ndims, - sess, feed_dict) - - def testIntegratorEnergyConservationNullShape(self): - self._integrator_conserves_energy_wrapper(0) - - def testIntegratorEnergyConservation1(self): - self._integrator_conserves_energy_wrapper(1) - - def testIntegratorEnergyConservation2(self): - self._integrator_conserves_energy_wrapper(2) - - def testIntegratorEnergyConservation3(self): - self._integrator_conserves_energy_wrapper(3) - - def testSampleChainSeedReproducibleWorksCorrectly(self): - with self.test_session(graph=ops.Graph()) as sess: - num_results = 10 - independent_chain_ndims = 1 - - def log_gamma_log_prob(x): - event_dims = math_ops.range(independent_chain_ndims, - array_ops.rank(x)) - return self._log_gamma_log_prob(x, event_dims) - - kwargs = dict( - target_log_prob_fn=log_gamma_log_prob, - current_state=np.random.rand(4, 3, 2), - step_size=0.1, - num_leapfrog_steps=2, - num_burnin_steps=150, - seed=52, - ) - - samples0, kernel_results0 = hmc.sample_chain( - **dict(list(kwargs.items()) + list(dict( - num_results=2 * num_results, - num_steps_between_results=0).items()))) - - samples1, kernel_results1 = hmc.sample_chain( - **dict(list(kwargs.items()) + list(dict( - num_results=num_results, - num_steps_between_results=1).items()))) - - [ - samples0_, - samples1_, - target_log_prob0_, - target_log_prob1_, - ] = sess.run([ - samples0, - samples1, - kernel_results0.current_target_log_prob, - kernel_results1.current_target_log_prob, - ]) - self.assertAllClose(samples0_[::2], samples1_, - atol=1e-5, rtol=1e-5) - self.assertAllClose(target_log_prob0_[::2], target_log_prob1_, - atol=1e-5, rtol=1e-5) - - def _chain_gets_correct_expectations(self, x, independent_chain_ndims, - sess, feed_dict=None): - counter = collections.Counter() - def log_gamma_log_prob(x): - counter["target_calls"] += 1 - event_dims = math_ops.range(independent_chain_ndims, - array_ops.rank(x)) - return self._log_gamma_log_prob(x, event_dims) - - num_results = array_ops.placeholder( - np.int32, [], name="num_results") - step_size = array_ops.placeholder( - np.float32, [], name="step_size") - num_leapfrog_steps = array_ops.placeholder( - np.int32, [], name="num_leapfrog_steps") - - if feed_dict is None: - feed_dict = {} - feed_dict.update({num_results: 150, - step_size: 0.05, - num_leapfrog_steps: 2}) - - samples, kernel_results = hmc.sample_chain( - num_results=num_results, - target_log_prob_fn=log_gamma_log_prob, - current_state=x, - step_size=step_size, - num_leapfrog_steps=num_leapfrog_steps, - num_burnin_steps=150, - seed=42) - - self.assertAllEqual(dict(target_calls=2), counter) - - expected_x = (math_ops.digamma(self._shape_param) - - np.log(self._rate_param)) - - expected_exp_x = self._shape_param / self._rate_param - - log_accept_ratio_, samples_, expected_x_ = sess.run( - [kernel_results.log_accept_ratio, samples, expected_x], - feed_dict) - - actual_x = samples_.mean() - actual_exp_x = np.exp(samples_).mean() - acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) - - logging_ops.vlog(1, "True E[x, exp(x)]: {}\t{}".format( - expected_x_, expected_exp_x)) - logging_ops.vlog(1, "Estimated E[x, exp(x)]: {}\t{}".format( - actual_x, actual_exp_x)) - self.assertNear(actual_x, expected_x_, 2e-2) - self.assertNear(actual_exp_x, expected_exp_x, 2e-2) - self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), - acceptance_probs > 0.5) - self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), - acceptance_probs <= 1.) - - def _chain_gets_correct_expectations_wrapper(self, independent_chain_ndims): - with self.test_session(graph=ops.Graph()) as sess: - x_ph = array_ops.placeholder(np.float32, name="x_ph") - feed_dict = {x_ph: np.random.rand(50, 10, 2)} - self._chain_gets_correct_expectations(x_ph, independent_chain_ndims, - sess, feed_dict) - - def testHMCChainExpectationsNullShape(self): - self._chain_gets_correct_expectations_wrapper(0) - - def testHMCChainExpectations1(self): - self._chain_gets_correct_expectations_wrapper(1) - - def testHMCChainExpectations2(self): - self._chain_gets_correct_expectations_wrapper(2) - - def testKernelResultsUsingTruncatedDistribution(self): - def log_prob(x): - return array_ops.where( - x >= 0., - -x - x**2, # Non-constant gradient. - array_ops.fill(x.shape, math_ops.cast(-np.inf, x.dtype))) - # This log_prob has the property that it is likely to attract - # the flow toward, and below, zero...but for x <=0, - # log_prob(x) = -inf, which should result in rejection, as well - # as a non-finite log_prob. Thus, this distribution gives us an opportunity - # to test out the kernel results ability to correctly capture rejections due - # to finite AND non-finite reasons. - # Why use a non-constant gradient? This ensures the leapfrog integrator - # will not be exact. - - num_results = 1000 - # Large step size, will give rejections due to integration error in addition - # to rejection due to going into a region of log_prob = -inf. - step_size = 0.1 - num_leapfrog_steps = 5 - num_chains = 2 - - with self.test_session(graph=ops.Graph()) as sess: - - # Start multiple independent chains. - initial_state = ops.convert_to_tensor([0.1] * num_chains) - - states, kernel_results = hmc.sample_chain( - num_results=num_results, - target_log_prob_fn=log_prob, - current_state=initial_state, - step_size=step_size, - num_leapfrog_steps=num_leapfrog_steps, - seed=42) - - states_, kernel_results_ = sess.run([states, kernel_results]) - pstates_ = kernel_results_.proposed_state - - neg_inf_mask = np.isneginf(kernel_results_.proposed_target_log_prob) - - # First: Test that the mathematical properties of the above log prob - # function in conjunction with HMC show up as expected in kernel_results_. - - # We better have log_prob = -inf some of the time. - self.assertLess(0, neg_inf_mask.sum()) - # We better have some rejections due to something other than -inf. - self.assertLess(neg_inf_mask.sum(), (~kernel_results_.is_accepted).sum()) - # We better have accepted a decent amount, even near end of the chain. - self.assertLess( - 0.1, kernel_results_.is_accepted[int(0.9 * num_results):].mean()) - # We better not have any NaNs in states or log_prob. - # We may have some NaN in grads, which involve multiplication/addition due - # to gradient rules. This is the known "NaN grad issue with tf.where." - self.assertAllEqual(np.zeros_like(states_), - np.isnan(kernel_results_.proposed_target_log_prob)) - self.assertAllEqual(np.zeros_like(states_), - np.isnan(states_)) - # We better not have any +inf in states, grads, or log_prob. - self.assertAllEqual(np.zeros_like(states_), - np.isposinf(kernel_results_.proposed_target_log_prob)) - self.assertAllEqual( - np.zeros_like(states_), - np.isposinf(kernel_results_.proposed_grads_target_log_prob[0])) - self.assertAllEqual(np.zeros_like(states_), - np.isposinf(states_)) - - # Second: Test that kernel_results is congruent with itself and - # acceptance/rejection of states. - - # Proposed state is negative iff proposed target log prob is -inf. - np.testing.assert_array_less(pstates_[neg_inf_mask], 0.) - np.testing.assert_array_less(0., pstates_[~neg_inf_mask]) - - # Acceptance probs are zero whenever proposed state is negative. - acceptance_probs = np.exp(np.minimum( - kernel_results_.log_accept_ratio, 0.)) - self.assertAllEqual( - np.zeros_like(pstates_[neg_inf_mask]), - acceptance_probs[neg_inf_mask]) - - # The move is accepted ==> state = proposed state. - self.assertAllEqual( - states_[kernel_results_.is_accepted], - pstates_[kernel_results_.is_accepted], - ) - # The move was rejected <==> state[t] == state[t - 1]. - for t in range(1, num_results): - for i in range(num_chains): - if kernel_results_.is_accepted[t, i]: - self.assertNotEqual(states_[t, i], states_[t - 1, i]) - else: - self.assertEqual(states_[t, i], states_[t - 1, i]) - - def _kernel_leaves_target_invariant(self, initial_draws, - independent_chain_ndims, - sess, feed_dict=None): - def log_gamma_log_prob(x): - event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) - return self._log_gamma_log_prob(x, event_dims) - - def fake_log_prob(x): - """Cooled version of the target distribution.""" - return 1.1 * log_gamma_log_prob(x) - - step_size = array_ops.placeholder(np.float32, [], name="step_size") - - if feed_dict is None: - feed_dict = {} - - feed_dict[step_size] = 0.4 - - sample, kernel_results = hmc.kernel( - target_log_prob_fn=log_gamma_log_prob, - current_state=initial_draws, - step_size=step_size, - num_leapfrog_steps=5, - seed=43) - - bad_sample, bad_kernel_results = hmc.kernel( - target_log_prob_fn=fake_log_prob, - current_state=initial_draws, - step_size=step_size, - num_leapfrog_steps=5, - seed=44) - - [ - log_accept_ratio_, - bad_log_accept_ratio_, - initial_draws_, - updated_draws_, - fake_draws_, - ] = sess.run([ - kernel_results.log_accept_ratio, - bad_kernel_results.log_accept_ratio, - initial_draws, - sample, - bad_sample, - ], feed_dict) - - # Confirm step size is small enough that we usually accept. - acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) - bad_acceptance_probs = np.exp(np.minimum(bad_log_accept_ratio_, 0.)) - self.assertGreater(acceptance_probs.mean(), 0.5) - self.assertGreater(bad_acceptance_probs.mean(), 0.5) - - # Confirm step size is large enough that we sometimes reject. - self.assertLess(acceptance_probs.mean(), 0.99) - self.assertLess(bad_acceptance_probs.mean(), 0.99) - - _, ks_p_value_true = stats.ks_2samp(initial_draws_.flatten(), - updated_draws_.flatten()) - _, ks_p_value_fake = stats.ks_2samp(initial_draws_.flatten(), - fake_draws_.flatten()) - - logging_ops.vlog(1, "acceptance rate for true target: {}".format( - acceptance_probs.mean())) - logging_ops.vlog(1, "acceptance rate for fake target: {}".format( - bad_acceptance_probs.mean())) - logging_ops.vlog(1, "K-S p-value for true target: {}".format( - ks_p_value_true)) - logging_ops.vlog(1, "K-S p-value for fake target: {}".format( - ks_p_value_fake)) - # Make sure that the MCMC update hasn't changed the empirical CDF much. - self.assertGreater(ks_p_value_true, 1e-3) - # Confirm that targeting the wrong distribution does - # significantly change the empirical CDF. - self.assertLess(ks_p_value_fake, 1e-6) - - def _kernel_leaves_target_invariant_wrapper(self, independent_chain_ndims): - """Tests that the kernel leaves the target distribution invariant. - - Draws some independent samples from the target distribution, - applies an iteration of the MCMC kernel, then runs a - Kolmogorov-Smirnov test to determine if the distribution of the - MCMC-updated samples has changed. - - We also confirm that running the kernel with a different log-pdf - does change the target distribution. (And that we can detect that.) - - Args: - independent_chain_ndims: Python `int` scalar representing the number of - dims associated with independent chains. - """ - with self.test_session(graph=ops.Graph()) as sess: - initial_draws = np.log(np.random.gamma(self._shape_param, - size=[50000, 2, 2])) - initial_draws -= np.log(self._rate_param) - x_ph = array_ops.placeholder(np.float32, name="x_ph") - - feed_dict = {x_ph: initial_draws} - - self._kernel_leaves_target_invariant(x_ph, independent_chain_ndims, - sess, feed_dict) - - def testKernelLeavesTargetInvariant1(self): - self._kernel_leaves_target_invariant_wrapper(1) - - def testKernelLeavesTargetInvariant2(self): - self._kernel_leaves_target_invariant_wrapper(2) - - def testKernelLeavesTargetInvariant3(self): - self._kernel_leaves_target_invariant_wrapper(3) - - def testNanRejection(self): - """Tests that an update that yields NaN potentials gets rejected. - - We run HMC with a target distribution that returns NaN - log-likelihoods if any element of x < 0, and unit-scale - exponential log-likelihoods otherwise. The exponential potential - pushes x towards 0, ensuring that any reasonably large update will - push us over the edge into NaN territory. - """ - def _unbounded_exponential_log_prob(x): - """An exponential distribution with log-likelihood NaN for x < 0.""" - per_element_potentials = array_ops.where( - x < 0., - array_ops.fill(array_ops.shape(x), x.dtype.as_numpy_dtype(np.nan)), - -x) - return math_ops.reduce_sum(per_element_potentials) - - with self.test_session(graph=ops.Graph()) as sess: - initial_x = math_ops.linspace(0.01, 5, 10) - updated_x, kernel_results = hmc.kernel( - target_log_prob_fn=_unbounded_exponential_log_prob, - current_state=initial_x, - step_size=2., - num_leapfrog_steps=5, - seed=46) - initial_x_, updated_x_, log_accept_ratio_ = sess.run( - [initial_x, updated_x, kernel_results.log_accept_ratio]) - acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) - - logging_ops.vlog(1, "initial_x = {}".format(initial_x_)) - logging_ops.vlog(1, "updated_x = {}".format(updated_x_)) - logging_ops.vlog(1, "log_accept_ratio = {}".format(log_accept_ratio_)) - - self.assertAllEqual(initial_x_, updated_x_) - self.assertEqual(acceptance_probs, 0.) - - def testNanFromGradsDontPropagate(self): - """Test that update with NaN gradients does not cause NaN in results.""" - def _nan_log_prob_with_nan_gradient(x): - return np.nan * math_ops.reduce_sum(x) - - with self.test_session(graph=ops.Graph()) as sess: - initial_x = math_ops.linspace(0.01, 5, 10) - updated_x, kernel_results = hmc.kernel( - target_log_prob_fn=_nan_log_prob_with_nan_gradient, - current_state=initial_x, - step_size=2., - num_leapfrog_steps=5, - seed=47) - initial_x_, updated_x_, log_accept_ratio_ = sess.run( - [initial_x, updated_x, kernel_results.log_accept_ratio]) - acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) - - logging_ops.vlog(1, "initial_x = {}".format(initial_x_)) - logging_ops.vlog(1, "updated_x = {}".format(updated_x_)) - logging_ops.vlog(1, "log_accept_ratio = {}".format(log_accept_ratio_)) - - self.assertAllEqual(initial_x_, updated_x_) - self.assertEqual(acceptance_probs, 0.) - - self.assertAllFinite( - gradients_ops.gradients(updated_x, initial_x)[0].eval()) - self.assertAllEqual([True], [g is None for g in gradients_ops.gradients( - kernel_results.proposed_grads_target_log_prob, initial_x)]) - self.assertAllEqual([False], [g is None for g in gradients_ops.gradients( - kernel_results.proposed_grads_target_log_prob, - kernel_results.proposed_state)]) - - # Gradients of the acceptance probs and new log prob are not finite. - # self.assertAllFinite( - # gradients_ops.gradients(acceptance_probs, initial_x)[0].eval()) - # self.assertAllFinite( - # gradients_ops.gradients(new_log_prob, initial_x)[0].eval()) - - def _testChainWorksDtype(self, dtype): - with self.test_session(graph=ops.Graph()) as sess: - states, kernel_results = hmc.sample_chain( - num_results=10, - target_log_prob_fn=lambda x: -math_ops.reduce_sum(x**2., axis=-1), - current_state=np.zeros(5).astype(dtype), - step_size=0.01, - num_leapfrog_steps=10, - seed=48) - states_, log_accept_ratio_ = sess.run( - [states, kernel_results.log_accept_ratio]) - self.assertEqual(dtype, states_.dtype) - self.assertEqual(dtype, log_accept_ratio_.dtype) - - def testChainWorksIn64Bit(self): - self._testChainWorksDtype(np.float64) - - def testChainWorksIn16Bit(self): - self._testChainWorksDtype(np.float16) - - def testChainWorksCorrelatedMultivariate(self): - dtype = np.float32 - true_mean = dtype([0, 0]) - true_cov = dtype([[1, 0.5], - [0.5, 1]]) - num_results = 2000 - counter = collections.Counter() - with self.test_session(graph=ops.Graph()) as sess: - def target_log_prob(x, y): - counter["target_calls"] += 1 - # Corresponds to unnormalized MVN. - # z = matmul(inv(chol(true_cov)), [x, y] - true_mean) - z = array_ops.stack([x, y], axis=-1) - true_mean - z = array_ops.squeeze( - gen_linalg_ops.matrix_triangular_solve( - np.linalg.cholesky(true_cov), - z[..., array_ops.newaxis]), - axis=-1) - return -0.5 * math_ops.reduce_sum(z**2., axis=-1) - states, _ = hmc.sample_chain( - num_results=num_results, - target_log_prob_fn=target_log_prob, - current_state=[dtype(-2), dtype(2)], - step_size=[0.5, 0.5], - num_leapfrog_steps=2, - num_burnin_steps=200, - num_steps_between_results=1, - seed=54) - self.assertAllEqual(dict(target_calls=2), counter) - states = array_ops.stack(states, axis=-1) - self.assertEqual(num_results, states.shape[0].value) - sample_mean = math_ops.reduce_mean(states, axis=0) - x = states - sample_mean - sample_cov = math_ops.matmul(x, x, transpose_a=True) / dtype(num_results) - [sample_mean_, sample_cov_] = sess.run([ - sample_mean, sample_cov]) - self.assertAllClose(true_mean, sample_mean_, - atol=0.05, rtol=0.) - self.assertAllClose(true_cov, sample_cov_, - atol=0., rtol=0.1) - - -class _EnergyComputationTest(object): - - def testHandlesNanFromPotential(self): - with self.test_session(graph=ops.Graph()) as sess: - x = [1, np.inf, -np.inf, np.nan] - target_log_prob, proposed_target_log_prob = [ - self.dtype(x.flatten()) for x in np.meshgrid(x, x)] - num_chains = len(target_log_prob) - dummy_momentums = [-1, 1] - momentums = [self.dtype([dummy_momentums] * num_chains)] - proposed_momentums = [self.dtype([dummy_momentums] * num_chains)] - - target_log_prob = ops.convert_to_tensor(target_log_prob) - momentums = [ops.convert_to_tensor(momentums[0])] - proposed_target_log_prob = ops.convert_to_tensor(proposed_target_log_prob) - proposed_momentums = [ops.convert_to_tensor(proposed_momentums[0])] - - energy = _compute_energy_change( - target_log_prob, - momentums, - proposed_target_log_prob, - proposed_momentums, - independent_chain_ndims=1) - grads = gradients_ops.gradients(energy, momentums) - - [actual_energy, grads_] = sess.run([energy, grads]) - - # Ensure energy is `inf` (note: that's positive inf) in weird cases and - # finite otherwise. - expected_energy = self.dtype([0] + [np.inf]*(num_chains - 1)) - self.assertAllEqual(expected_energy, actual_energy) - - # Ensure gradient is finite. - self.assertAllEqual(np.ones_like(grads_).astype(np.bool), - np.isfinite(grads_)) - - def testHandlesNanFromKinetic(self): - with self.test_session(graph=ops.Graph()) as sess: - x = [1, np.inf, -np.inf, np.nan] - momentums, proposed_momentums = [ - [np.reshape(self.dtype(x), [-1, 1])] - for x in np.meshgrid(x, x)] - num_chains = len(momentums[0]) - target_log_prob = np.ones(num_chains, self.dtype) - proposed_target_log_prob = np.ones(num_chains, self.dtype) - - target_log_prob = ops.convert_to_tensor(target_log_prob) - momentums = [ops.convert_to_tensor(momentums[0])] - proposed_target_log_prob = ops.convert_to_tensor(proposed_target_log_prob) - proposed_momentums = [ops.convert_to_tensor(proposed_momentums[0])] - - energy = _compute_energy_change( - target_log_prob, - momentums, - proposed_target_log_prob, - proposed_momentums, - independent_chain_ndims=1) - grads = gradients_ops.gradients(energy, momentums) - - [actual_energy, grads_] = sess.run([energy, grads]) - - # Ensure energy is `inf` (note: that's positive inf) in weird cases and - # finite otherwise. - expected_energy = self.dtype([0] + [np.inf]*(num_chains - 1)) - self.assertAllEqual(expected_energy, actual_energy) - - # Ensure gradient is finite. - g = grads_[0].reshape([len(x), len(x)])[:, 0] - self.assertAllEqual(np.ones_like(g).astype(np.bool), np.isfinite(g)) - - # The remaining gradients are nan because the momentum was itself nan or - # inf. - g = grads_[0].reshape([len(x), len(x)])[:, 1:] - self.assertAllEqual(np.ones_like(g).astype(np.bool), np.isnan(g)) - - -class EnergyComputationTest16(test.TestCase, _EnergyComputationTest): - dtype = np.float16 - - -class EnergyComputationTest32(test.TestCase, _EnergyComputationTest): - dtype = np.float32 - - -class EnergyComputationTest64(test.TestCase, _EnergyComputationTest): - dtype = np.float64 - - -class _HMCHandlesLists(object): - - def testStateParts(self): - with self.test_session(graph=ops.Graph()) as sess: - dist_x = normal_lib.Normal(loc=self.dtype(0), scale=self.dtype(1)) - dist_y = independent_lib.Independent( - gamma_lib.Gamma(concentration=self.dtype([1, 2]), - rate=self.dtype([0.5, 0.75])), - reinterpreted_batch_ndims=1) - def target_log_prob(x, y): - return dist_x.log_prob(x) + dist_y.log_prob(y) - x0 = [dist_x.sample(seed=1), dist_y.sample(seed=2)] - samples, _ = hmc.sample_chain( - num_results=int(2e3), - target_log_prob_fn=target_log_prob, - current_state=x0, - step_size=0.85, - num_leapfrog_steps=3, - num_burnin_steps=int(250), - seed=49) - actual_means = [math_ops.reduce_mean(s, axis=0) for s in samples] - actual_vars = [_reduce_variance(s, axis=0) for s in samples] - expected_means = [dist_x.mean(), dist_y.mean()] - expected_vars = [dist_x.variance(), dist_y.variance()] - [ - actual_means_, - actual_vars_, - expected_means_, - expected_vars_, - ] = sess.run([ - actual_means, - actual_vars, - expected_means, - expected_vars, - ]) - self.assertAllClose(expected_means_, actual_means_, atol=0.05, rtol=0.16) - self.assertAllClose(expected_vars_, actual_vars_, atol=0., rtol=0.25) - - -class HMCHandlesLists32(_HMCHandlesLists, test.TestCase): - dtype = np.float32 - - -class HMCHandlesLists64(_HMCHandlesLists, test.TestCase): - dtype = np.float64 - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py deleted file mode 100644 index f508e5b114..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Metropolis-Hastings.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings_impl as mh -from tensorflow.contrib.distributions.python.ops import mvn_tril as mvn_tril_lib -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.platform import test - - -class MetropolisHastingsTest(test.TestCase): - - def testKernelStateTensor(self): - """Test that transition kernel works with tensor input to `state`.""" - loc = variable_scope.get_variable("loc", initializer=0.) - - def target_log_prob_fn(loc): - return normal_lib.Normal(loc=0.0, scale=0.1).log_prob(loc) - - new_state, _ = mh.kernel( - target_log_prob_fn=target_log_prob_fn, - proposal_fn=mh.proposal_normal(scale=0.05), - current_state=loc, - seed=231251) - loc_update = loc.assign(new_state) - - init = variables.initialize_all_variables() - with self.test_session() as sess: - sess.run(init) - loc_samples = [] - for _ in range(2500): - loc_sample = sess.run(loc_update) - loc_samples.append(loc_sample) - loc_samples = loc_samples[500:] # drop samples for burn-in - - self.assertAllClose(np.mean(loc_samples), 0.0, rtol=1e-5, atol=1e-1) - self.assertAllClose(np.std(loc_samples), 0.1, rtol=1e-5, atol=1e-1) - - def testKernelStateList(self): - """Test that transition kernel works with list input to `state`.""" - num_chains = 2 - loc_one = variable_scope.get_variable( - "loc_one", [num_chains], - initializer=init_ops.zeros_initializer()) - loc_two = variable_scope.get_variable( - "loc_two", [num_chains], initializer=init_ops.zeros_initializer()) - - def target_log_prob_fn(loc_one, loc_two): - loc = array_ops.stack([loc_one, loc_two]) - log_prob = mvn_tril_lib.MultivariateNormalTriL( - loc=constant_op.constant([0., 0.]), - scale_tril=constant_op.constant([[0.1, 0.1], [0.0, 0.1]])).log_prob( - loc) - return math_ops.reduce_sum(log_prob, 0) - - def proposal_fn(loc_one, loc_two): - loc_one_proposal = mh.proposal_normal(scale=0.05) - loc_two_proposal = mh.proposal_normal(scale=0.05) - loc_one_sample, _ = loc_one_proposal(loc_one) - loc_two_sample, _ = loc_two_proposal(loc_two) - return [loc_one_sample, loc_two_sample], None - - new_state, _ = mh.kernel( - target_log_prob_fn=target_log_prob_fn, - proposal_fn=proposal_fn, - current_state=[loc_one, loc_two], - seed=12415) - loc_one_update = loc_one.assign(new_state[0]) - loc_two_update = loc_two.assign(new_state[1]) - - init = variables.initialize_all_variables() - with self.test_session() as sess: - sess.run(init) - loc_one_samples = [] - loc_two_samples = [] - for _ in range(10000): - loc_one_sample, loc_two_sample = sess.run( - [loc_one_update, loc_two_update]) - loc_one_samples.append(loc_one_sample) - loc_two_samples.append(loc_two_sample) - - loc_one_samples = np.array(loc_one_samples) - loc_two_samples = np.array(loc_two_samples) - loc_one_samples = loc_one_samples[1000:] # drop samples for burn-in - loc_two_samples = loc_two_samples[1000:] # drop samples for burn-in - - self.assertAllClose(np.mean(loc_one_samples, 0), - np.array([0.] * num_chains), - rtol=1e-5, atol=1e-1) - self.assertAllClose(np.mean(loc_two_samples, 0), - np.array([0.] * num_chains), - rtol=1e-5, atol=1e-1) - self.assertAllClose(np.std(loc_one_samples, 0), - np.array([0.1] * num_chains), - rtol=1e-5, atol=1e-1) - self.assertAllClose(np.std(loc_two_samples, 0), - np.array([0.1] * num_chains), - rtol=1e-5, atol=1e-1) - - def testKernelResultsUsingTruncatedDistribution(self): - def log_prob(x): - return array_ops.where( - x >= 0., - -x - x**2, - array_ops.fill(x.shape, math_ops.cast(-np.inf, x.dtype))) - # The truncated distribution has the property that it is likely to attract - # the flow toward, and below, zero...but for x <=0, - # log_prob(x) = -inf, which should result in rejection, as well - # as a non-finite log_prob. Thus, this distribution gives us an opportunity - # to test out the kernel results ability to correctly capture rejections due - # to finite AND non-finite reasons. - - num_results = 1000 - # Large step size, will give rejections due to going into a region of - # log_prob = -inf. - step_size = 0.3 - num_chains = 2 - - with self.test_session(graph=ops.Graph()) as sess: - - # Start multiple independent chains. - initial_state = ops.convert_to_tensor([0.1] * num_chains) - - states = [] - is_accepted = [] - proposed_states = [] - current_state = initial_state - for _ in range(num_results): - current_state, kernel_results = mh.kernel( - target_log_prob_fn=log_prob, - proposal_fn=mh.proposal_uniform(step_size=step_size), - current_state=current_state, - seed=42) - states.append(current_state) - proposed_states.append(kernel_results.proposed_state) - is_accepted.append(kernel_results.is_accepted) - - states = array_ops.stack(states) - proposed_states = array_ops.stack(proposed_states) - is_accepted = array_ops.stack(is_accepted) - states_, pstates_, is_accepted_ = sess.run( - [states, proposed_states, is_accepted]) - - # We better have accepted a decent amount, even near end of the chain. - self.assertLess( - 0.1, is_accepted_[int(0.9 * num_results):].mean()) - # We better not have any NaNs in states. - self.assertAllEqual(np.zeros_like(states_), - np.isnan(states_)) - # We better not have any +inf in states. - self.assertAllEqual(np.zeros_like(states_), - np.isposinf(states_)) - - # The move is accepted ==> state = proposed state. - self.assertAllEqual( - states_[is_accepted_], - pstates_[is_accepted_], - ) - - # The move was rejected <==> state[t] == state[t - 1]. - for t in range(1, num_results): - for i in range(num_chains): - if is_accepted_[t, i]: - self.assertNotEqual(states_[t, i], states_[t - 1, i]) - else: - self.assertEqual(states_[t, i], states_[t - 1, i]) - - def testDensityIncreasingStepAccepted(self): - """Tests that if a transition increases density, it is always accepted.""" - target_log_density = lambda x: - x * x - state = variable_scope.get_variable("state", initializer=10.) - state_log_density = variable_scope.get_variable( - "state_log_density", - initializer=target_log_density(state.initialized_value())) - log_accept_ratio = variable_scope.get_variable( - "log_accept_ratio", initializer=0.) - - get_next_proposal = lambda x: (x - 1., None) - step = mh.evolve(state, state_log_density, log_accept_ratio, - target_log_density, get_next_proposal, seed=1234) - init = variables.initialize_all_variables() - with self.test_session() as sess: - sess.run(init) - for j in range(9): - sess.run(step) - sample = sess.run(state) - sample_log_density = sess.run(state_log_density) - self.assertAlmostEqual(sample, 9 - j) - self.assertAlmostEqual(sample_log_density, - (9 - j) * (9 - j)) - - def testSampleProperties(self): - """Tests that the samples converge to the target distribution.""" - - def target_log_density(x): - """Log-density corresponding to a normal distribution with mean = 4.""" - return - (x - 2.0) * (x - 2.0) * 0.5 - - # Use the uniform random walker to generate proposals. - proposal_fn = mh.proposal_uniform( - step_size=1.0, seed=1234) - - state = variable_scope.get_variable("state", initializer=0.0) - state_log_density = variable_scope.get_variable( - "state_log_density", - initializer=target_log_density(state.initialized_value())) - log_accept_ratio = variable_scope.get_variable( - "log_accept_ratio", initializer=0.) - - # Random walk MCMC converges slowly so need to put in enough iterations. - num_iterations = 5000 - step = mh.evolve(state, state_log_density, log_accept_ratio, - target_log_density, proposal_fn, seed=4321) - - init = variables.global_variables_initializer() - - sample_sum, sample_sq_sum = 0.0, 0.0 - with self.test_session() as sess: - sess.run(init) - for _ in np.arange(num_iterations): - # Allow for the mixing of the chain and discard these samples. - sess.run(step) - for _ in np.arange(num_iterations): - sess.run(step) - sample = sess.run(state) - sample_sum += sample - sample_sq_sum += sample * sample - - sample_mean = sample_sum / num_iterations - sample_variance = sample_sq_sum / num_iterations - sample_mean * sample_mean - # The samples have large autocorrelation which reduces the effective sample - # size. - self.assertAlmostEqual(sample_mean, 2.0, delta=0.1) - self.assertAlmostEqual(sample_variance, 1.0, delta=0.1) - - def testProposalNormal(self): - """Tests that the normal proposals are correctly distributed.""" - - initial_points = array_ops.ones([10000], dtype=dtypes.float32) - proposal_fn = mh.proposal_normal( - scale=2.0, seed=1234) - proposal_points, _ = proposal_fn(initial_points) - - with self.test_session() as sess: - sample = sess.run(proposal_points) - - # It is expected that the elements in proposal_points have the same mean as - # initial_points and have the standard deviation that was supplied to the - # proposal scheme. - self.assertAlmostEqual(np.mean(sample), 1.0, delta=0.1) - self.assertAlmostEqual(np.std(sample), 2.0, delta=0.1) - - def testDocstringExample(self): - """Tests the simplified docstring example with multiple chains.""" - - n = 2 # dimension of the problem - - # Generate 300 initial values randomly. Each of these would be an - # independent starting point for a Markov chain. - state = variable_scope.get_variable( - "state", initializer=random_ops.random_normal( - [300, n], mean=3.0, dtype=dtypes.float32, seed=42)) - - # Computes the log(p(x)) for the unit normal density and ignores the - # normalization constant. - def log_density(x): - return - math_ops.reduce_sum(x * x, reduction_indices=-1) / 2.0 - - # Initial log-density value - state_log_density = variable_scope.get_variable( - "state_log_density", - initializer=log_density(state.initialized_value())) - - # A variable to store the log_acceptance_ratio: - log_acceptance_ratio = variable_scope.get_variable( - "log_acceptance_ratio", - initializer=array_ops.zeros([300], dtype=dtypes.float32)) - - # Generates random proposals by moving each coordinate uniformly and - # independently in a box of size 2 centered around the current value. - # Returns the new point and also the log of the Hastings ratio (the - # ratio of the probability of going from the proposal to origin and the - # probability of the reverse transition). When this ratio is 1, the value - # may be omitted and replaced by None. - def random_proposal(x): - return (x + random_ops.random_uniform( - array_ops.shape(x), minval=-1, maxval=1, - dtype=x.dtype, seed=12)), None - - # Create the op to propagate the chain for 100 steps. - stepper = mh.evolve( - state, state_log_density, log_acceptance_ratio, - log_density, random_proposal, n_steps=100, seed=123) - init = variables.initialize_all_variables() - with self.test_session() as sess: - sess.run(init) - # Run the chains for a total of 1000 steps. - for _ in range(10): - sess.run(stepper) - samples = sess.run(state) - covariance = np.eye(n) - # Verify that the estimated mean and covariance are close to the true - # values. - self.assertAlmostEqual( - np.max(np.abs(np.mean(samples, 0) - - np.zeros(n))), 0, - delta=0.1) - self.assertAlmostEqual( - np.max(np.abs(np.reshape(np.cov(samples, rowvar=False), [n**2]) - - np.reshape(covariance, [n**2]))), 0, - delta=0.2) - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc.py b/tensorflow/contrib/bayesflow/python/ops/hmc.py deleted file mode 100644 index c8a5a195d3..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/hmc.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Hamiltonian Monte Carlo, a gradient-based MCMC algorithm.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -from tensorflow.contrib.bayesflow.python.ops.hmc_impl import * # pylint: disable=wildcard-import,unused-wildcard-import,g-importing-member -from tensorflow.python.util import all_util - -_allowed_symbols = [ - "sample_chain", - "kernel", -] - -all_util.remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py deleted file mode 100644 index 66afcc7497..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py +++ /dev/null @@ -1,961 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Hamiltonian Monte Carlo, a gradient-based MCMC algorithm. - -@@sample_chain -@@kernel -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import numpy as np - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import gradients_impl as gradients_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.distributions import util as distributions_util - -__all__ = [ - "sample_chain", - "kernel", -] - - -KernelResults = collections.namedtuple( - "KernelResults", - [ - "log_accept_ratio", - "current_grads_target_log_prob", # "Current result" means "accepted". - "current_target_log_prob", # "Current result" means "accepted". - "is_accepted", - "proposed_grads_target_log_prob", - "proposed_state", - "proposed_target_log_prob", - ]) - - -def _make_dummy_kernel_results( - dummy_state, - dummy_target_log_prob, - dummy_grads_target_log_prob): - return KernelResults( - log_accept_ratio=dummy_target_log_prob, - current_grads_target_log_prob=dummy_grads_target_log_prob, - current_target_log_prob=dummy_target_log_prob, - is_accepted=array_ops.ones_like(dummy_target_log_prob, dtypes.bool), - proposed_grads_target_log_prob=dummy_grads_target_log_prob, - proposed_state=dummy_state, - proposed_target_log_prob=dummy_target_log_prob, - ) - - -def sample_chain( - num_results, - target_log_prob_fn, - current_state, - step_size, - num_leapfrog_steps, - num_burnin_steps=0, - num_steps_between_results=0, - seed=None, - current_target_log_prob=None, - current_grads_target_log_prob=None, - name=None): - """Runs multiple iterations of one or more Hamiltonian Monte Carlo chains. - - Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) algorithm - that takes a series of gradient-informed steps to produce a Metropolis - proposal. This function samples from an HMC Markov chain at `current_state` - and whose stationary distribution has log-unnormalized-density - `target_log_prob_fn()`. - - This function samples from multiple chains in parallel. It assumes that the - the leftmost dimensions of (each) `current_state` (part) index an independent - chain. The function `target_log_prob_fn()` sums log-probabilities across - event dimensions (i.e., current state (part) rightmost dimensions). Each - element of the output of `target_log_prob_fn()` represents the (possibly - unnormalized) log-probability of the joint distribution over (all) the current - state (parts). - - The `current_state` can be represented as a single `Tensor` or a `list` of - `Tensors` which collectively represent the current state. When specifying a - `list`, one must also specify a list of `step_size`s. - - Note: `target_log_prob_fn` is called exactly twice. - - Since HMC states are correlated, it is sometimes desirable to produce - additional intermediate states, and then discard them, ending up with a set of - states with decreased autocorrelation. See [1]. Such "thinning" is made - possible by setting `num_steps_between_results > 0`. The chain then takes - `num_steps_between_results` extra steps between the steps that make it into - the results. The extra steps are never materialized (in calls to `sess.run`), - and thus do not increase memory requirements. - - [1]: "Statistically efficient thinning of a Markov chain sampler." - Art B. Owen. April 2017. - http://statweb.stanford.edu/~owen/reports/bestthinning.pdf - - #### Examples: - - ##### Sample from a diagonal-variance Gaussian. - - ```python - tfd = tf.contrib.distributions - - def make_likelihood(true_variances): - return tfd.MultivariateNormalDiag( - scale_diag=tf.sqrt(true_variances)) - - dims = 10 - dtype = np.float32 - true_variances = tf.linspace(dtype(1), dtype(3), dims) - likelihood = make_likelihood(true_variances) - - states, kernel_results = hmc.sample_chain( - num_results=1000, - target_log_prob_fn=likelihood.log_prob, - current_state=tf.zeros(dims), - step_size=0.5, - num_leapfrog_steps=2, - num_burnin_steps=500) - - # Compute sample stats. - sample_mean = tf.reduce_mean(states, axis=0) - sample_var = tf.reduce_mean( - tf.squared_difference(states, sample_mean), - axis=0) - ``` - - ##### Sampling from factor-analysis posteriors with known factors. - - I.e., - - ```none - for i=1..n: - w[i] ~ Normal(0, eye(d)) # prior - x[i] ~ Normal(loc=matmul(w[i], F)) # likelihood - ``` - - where `F` denotes factors. - - ```python - tfd = tf.contrib.distributions - - def make_prior(dims, dtype): - return tfd.MultivariateNormalDiag( - loc=tf.zeros(dims, dtype)) - - def make_likelihood(weights, factors): - return tfd.MultivariateNormalDiag( - loc=tf.tensordot(weights, factors, axes=[[0], [-1]])) - - # Setup data. - num_weights = 10 - num_factors = 4 - num_chains = 100 - dtype = np.float32 - - prior = make_prior(num_weights, dtype) - weights = prior.sample(num_chains) - factors = np.random.randn(num_factors, num_weights).astype(dtype) - x = make_likelihood(weights, factors).sample(num_chains) - - def target_log_prob(w): - # Target joint is: `f(w) = p(w, x | factors)`. - return prior.log_prob(w) + make_likelihood(w, factors).log_prob(x) - - # Get `num_results` samples from `num_chains` independent chains. - chains_states, kernels_results = hmc.sample_chain( - num_results=1000, - target_log_prob_fn=target_log_prob, - current_state=tf.zeros([num_chains, dims], dtype), - step_size=0.1, - num_leapfrog_steps=2, - num_burnin_steps=500) - - # Compute sample stats. - sample_mean = tf.reduce_mean(chains_states, axis=[0, 1]) - sample_var = tf.reduce_mean( - tf.squared_difference(chains_states, sample_mean), - axis=[0, 1]) - ``` - - Args: - num_results: Integer number of Markov chain draws. - target_log_prob_fn: Python callable which takes an argument like - `current_state` (or `*current_state` if it's a list) and returns its - (possibly unnormalized) log-density under the target distribution. - current_state: `Tensor` or Python `list` of `Tensor`s representing the - current state(s) of the Markov chain(s). The first `r` dimensions index - independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. - step_size: `Tensor` or Python `list` of `Tensor`s representing the step size - for the leapfrog integrator. Must broadcast with the shape of - `current_state`. Larger step sizes lead to faster progress, but too-large - step sizes make rejection exponentially more likely. When possible, it's - often helpful to match per-variable step sizes to the standard deviations - of the target distribution in each variable. - num_leapfrog_steps: Integer number of steps to run the leapfrog integrator - for. Total progress per HMC step is roughly proportional to `step_size * - num_leapfrog_steps`. - num_burnin_steps: Integer number of chain steps to take before starting to - collect results. - Default value: 0 (i.e., no burn-in). - num_steps_between_results: Integer number of chain steps between collecting - a result. Only one out of every `num_steps_between_samples + 1` steps is - included in the returned results. The number of returned chain states is - still equal to `num_results`. Default value: 0 (i.e., no thinning). - seed: Python integer to seed the random number generator. - current_target_log_prob: (Optional) `Tensor` representing the value of - `target_log_prob_fn` at the `current_state`. The only reason to specify - this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - current_grads_target_log_prob: (Optional) Python list of `Tensor`s - representing gradient of `target_log_prob` at the `current_state` and wrt - the `current_state`. Must have same shape as `current_state`. The only - reason to specify this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - name: Python `str` name prefixed to Ops created by this function. - Default value: `None` (i.e., "hmc_sample_chain"). - - Returns: - next_states: Tensor or Python list of `Tensor`s representing the - state(s) of the Markov chain(s) at each result step. Has same shape as - input `current_state` but with a prepended `num_results`-size dimension. - kernel_results: `collections.namedtuple` of internal calculations used to - advance the chain. - """ - with ops.name_scope( - name, "hmc_sample_chain", - [num_results, current_state, step_size, num_leapfrog_steps, - num_burnin_steps, num_steps_between_results, seed, - current_target_log_prob, current_grads_target_log_prob]): - with ops.name_scope("initialize"): - [ - current_state, - step_size, - current_target_log_prob, - current_grads_target_log_prob, - ] = _prepare_args( - target_log_prob_fn, - current_state, - step_size, - current_target_log_prob, - current_grads_target_log_prob) - num_results = ops.convert_to_tensor( - num_results, - dtype=dtypes.int32, - name="num_results") - num_leapfrog_steps = ops.convert_to_tensor( - num_leapfrog_steps, - dtype=dtypes.int32, - name="num_leapfrog_steps") - num_burnin_steps = ops.convert_to_tensor( - num_burnin_steps, - dtype=dtypes.int32, - name="num_burnin_steps") - num_steps_between_results = ops.convert_to_tensor( - num_steps_between_results, - dtype=dtypes.int32, - name="num_steps_between_results") - - def _run_chain(num_steps, current_state, kernel_results): - """Runs the chain(s) for `num_steps`.""" - def _loop_body(iter_, current_state, kernel_results): - return [iter_ + 1] + list(kernel( - target_log_prob_fn, - current_state, - step_size, - num_leapfrog_steps, - seed, - kernel_results.current_target_log_prob, - kernel_results.current_grads_target_log_prob)) - while_loop_kwargs = dict( - cond=lambda iter_, *args: iter_ < num_steps, - body=_loop_body, - loop_vars=[ - np.int32(0), - current_state, - kernel_results, - ], - ) - if seed is not None: - while_loop_kwargs["parallel_iterations"] = 1 - return control_flow_ops.while_loop( - **while_loop_kwargs)[1:] # Lop-off "iter_". - - def _scan_body(args_list, iter_): - """Closure which implements `tf.scan` body.""" - current_state, kernel_results = args_list - return _run_chain( - 1 + array_ops.where(math_ops.equal(iter_, 0), - num_burnin_steps, - num_steps_between_results), - current_state, - kernel_results) - - scan_kwargs = dict( - fn=_scan_body, - elems=math_ops.range(num_results), # iter_: used to choose burnin. - initializer=[ - current_state, - _make_dummy_kernel_results( - current_state, - current_target_log_prob, - current_grads_target_log_prob), - ]) - if seed is not None: - scan_kwargs["parallel_iterations"] = 1 - return functional_ops.scan(**scan_kwargs) - - -def kernel(target_log_prob_fn, - current_state, - step_size, - num_leapfrog_steps, - seed=None, - current_target_log_prob=None, - current_grads_target_log_prob=None, - name=None): - """Runs one iteration of Hamiltonian Monte Carlo. - - Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) - algorithm that takes a series of gradient-informed steps to produce - a Metropolis proposal. This function applies one step of HMC to - randomly update the variable `x`. - - This function can update multiple chains in parallel. It assumes that all - leftmost dimensions of `current_state` index independent chain states (and are - therefore updated independently). The output of `target_log_prob_fn()` should - sum log-probabilities across all event dimensions. Slices along the rightmost - dimensions may have different target distributions; for example, - `current_state[0, :]` could have a different target distribution from - `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of - independent chains is `tf.size(target_log_prob_fn(*current_state))`.) - - #### Examples: - - ##### Simple chain with warm-up. - - ```python - tfd = tf.contrib.distributions - - # Tuning acceptance rates: - dtype = np.float32 - target_accept_rate = 0.631 - num_warmup_iter = 500 - num_chain_iter = 500 - - x = tf.get_variable(name="x", initializer=dtype(1)) - step_size = tf.get_variable(name="step_size", initializer=dtype(1)) - - target = tfd.Normal(loc=dtype(0), scale=dtype(1)) - - next_x, other_results = hmc.kernel( - target_log_prob_fn=target.log_prob, - current_state=x, - step_size=step_size, - num_leapfrog_steps=3)[:4] - - x_update = x.assign(next_x) - - step_size_update = step_size.assign_add( - step_size * tf.where( - tf.exp(tf.minimum(other_results.log_accept_ratio), 0.) > - target_accept_rate, - 0.01, -0.01)) - - warmup = tf.group([x_update, step_size_update]) - - tf.global_variables_initializer().run() - - sess.graph.finalize() # No more graph building. - - # Warm up the sampler and adapt the step size - for _ in xrange(num_warmup_iter): - sess.run(warmup) - - # Collect samples without adapting step size - samples = np.zeros([num_chain_iter]) - for i in xrange(num_chain_iter): - _, x_, target_log_prob_, grad_ = sess.run([ - x_update, - x, - other_results.target_log_prob, - other_results.grads_target_log_prob]) - samples[i] = x_ - - print(samples.mean(), samples.std()) - ``` - - ##### Sample from more complicated posterior. - - I.e., - - ```none - W ~ MVN(loc=0, scale=sigma * eye(dims)) - for i=1...num_samples: - X[i] ~ MVN(loc=0, scale=eye(dims)) - eps[i] ~ Normal(loc=0, scale=1) - Y[i] = X[i].T * W + eps[i] - ``` - - ```python - tfd = tf.contrib.distributions - - def make_training_data(num_samples, dims, sigma): - dt = np.asarray(sigma).dtype - zeros = tf.zeros(dims, dtype=dt) - x = tfd.MultivariateNormalDiag( - loc=zeros).sample(num_samples, seed=1) - w = tfd.MultivariateNormalDiag( - loc=zeros, - scale_identity_multiplier=sigma).sample(seed=2) - noise = tfd.Normal( - loc=dt(0), - scale=dt(1)).sample(num_samples, seed=3) - y = tf.tensordot(x, w, axes=[[1], [0]]) + noise - return y, x, w - - def make_prior(sigma, dims): - # p(w | sigma) - return tfd.MultivariateNormalDiag( - loc=tf.zeros([dims], dtype=sigma.dtype), - scale_identity_multiplier=sigma) - - def make_likelihood(x, w): - # p(y | x, w) - return tfd.MultivariateNormalDiag( - loc=tf.tensordot(x, w, axes=[[1], [0]])) - - # Setup assumptions. - dtype = np.float32 - num_samples = 150 - dims = 10 - num_iters = int(5e3) - - true_sigma = dtype(0.5) - y, x, true_weights = make_training_data(num_samples, dims, true_sigma) - - # Estimate of `log(true_sigma)`. - log_sigma = tf.get_variable(name="log_sigma", initializer=dtype(0)) - sigma = tf.exp(log_sigma) - - # State of the Markov chain. - weights = tf.get_variable( - name="weights", - initializer=np.random.randn(dims).astype(dtype)) - - prior = make_prior(sigma, dims) - - def joint_log_prob_fn(w): - # f(w) = log p(w, y | x) - return prior.log_prob(w) + make_likelihood(x, w).log_prob(y) - - weights_update = weights.assign( - hmc.kernel(target_log_prob_fn=joint_log_prob, - current_state=weights, - step_size=0.1, - num_leapfrog_steps=5)[0]) - - with tf.control_dependencies([weights_update]): - loss = -prior.log_prob(weights) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) - log_sigma_update = optimizer.minimize(loss, var_list=[log_sigma]) - - sess.graph.finalize() # No more graph building. - - tf.global_variables_initializer().run() - - sigma_history = np.zeros(num_iters, dtype) - weights_history = np.zeros([num_iters, dims], dtype) - - for i in xrange(num_iters): - _, sigma_, weights_, _ = sess.run([log_sigma_update, sigma, weights]) - weights_history[i, :] = weights_ - sigma_history[i] = sigma_ - - true_weights_ = sess.run(true_weights) - - # Should converge to something close to true_sigma. - plt.plot(sigma_history); - plt.ylabel("sigma"); - plt.xlabel("iteration"); - ``` - - Args: - target_log_prob_fn: Python callable which takes an argument like - `current_state` (or `*current_state` if it's a list) and returns its - (possibly unnormalized) log-density under the target distribution. - current_state: `Tensor` or Python `list` of `Tensor`s representing the - current state(s) of the Markov chain(s). The first `r` dimensions index - independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. - step_size: `Tensor` or Python `list` of `Tensor`s representing the step size - for the leapfrog integrator. Must broadcast with the shape of - `current_state`. Larger step sizes lead to faster progress, but too-large - step sizes make rejection exponentially more likely. When possible, it's - often helpful to match per-variable step sizes to the standard deviations - of the target distribution in each variable. - num_leapfrog_steps: Integer number of steps to run the leapfrog integrator - for. Total progress per HMC step is roughly proportional to `step_size * - num_leapfrog_steps`. - seed: Python integer to seed the random number generator. - current_target_log_prob: (Optional) `Tensor` representing the value of - `target_log_prob_fn` at the `current_state`. The only reason to - specify this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - current_grads_target_log_prob: (Optional) Python list of `Tensor`s - representing gradient of `current_target_log_prob` at the `current_state` - and wrt the `current_state`. Must have same shape as `current_state`. The - only reason to specify this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - name: Python `str` name prefixed to Ops created by this function. - Default value: `None` (i.e., "hmc_kernel"). - - Returns: - next_state: Tensor or Python list of `Tensor`s representing the state(s) - of the Markov chain(s) at each result step. Has same shape as - `current_state`. - kernel_results: `collections.namedtuple` of internal calculations used to - advance the chain. - - Raises: - ValueError: if there isn't one `step_size` or a list with same length as - `current_state`. - """ - with ops.name_scope( - name, "hmc_kernel", - [current_state, step_size, num_leapfrog_steps, seed, - current_target_log_prob, current_grads_target_log_prob]): - with ops.name_scope("initialize"): - [current_state_parts, step_sizes, current_target_log_prob, - current_grads_target_log_prob] = _prepare_args( - target_log_prob_fn, current_state, step_size, - current_target_log_prob, current_grads_target_log_prob, - maybe_expand=True) - independent_chain_ndims = distributions_util.prefer_static_rank( - current_target_log_prob) - current_momentums = [] - for s in current_state_parts: - current_momentums.append(random_ops.random_normal( - shape=array_ops.shape(s), - dtype=s.dtype.base_dtype, - seed=seed)) - seed = distributions_util.gen_new_seed( - seed, salt="hmc_kernel_momentums") - - num_leapfrog_steps = ops.convert_to_tensor( - num_leapfrog_steps, - dtype=dtypes.int32, - name="num_leapfrog_steps") - [ - proposed_momentums, - proposed_state_parts, - proposed_target_log_prob, - proposed_grads_target_log_prob, - ] = _leapfrog_integrator(current_momentums, - target_log_prob_fn, - current_state_parts, - step_sizes, - num_leapfrog_steps, - current_target_log_prob, - current_grads_target_log_prob) - - energy_change = _compute_energy_change(current_target_log_prob, - current_momentums, - proposed_target_log_prob, - proposed_momentums, - independent_chain_ndims) - log_accept_ratio = -energy_change - - # u < exp(log_accept_ratio), where u~Uniform[0,1) - # ==> log(u) < log_accept_ratio - random_value = random_ops.random_uniform( - shape=array_ops.shape(energy_change), - dtype=energy_change.dtype, - seed=seed) - random_negative = math_ops.log(random_value) - is_accepted = random_negative < log_accept_ratio - - accepted_target_log_prob = array_ops.where(is_accepted, - proposed_target_log_prob, - current_target_log_prob) - - next_state_parts = [_choose(is_accepted, - proposed_state_part, - current_state_part, - independent_chain_ndims) - for current_state_part, proposed_state_part - in zip(current_state_parts, proposed_state_parts)] - - accepted_grads_target_log_prob = [ - _choose(is_accepted, - proposed_grad, - grad, - independent_chain_ndims) - for proposed_grad, grad - in zip(proposed_grads_target_log_prob, current_grads_target_log_prob)] - - maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0] - return [ - maybe_flatten(next_state_parts), - KernelResults( - log_accept_ratio=log_accept_ratio, - current_grads_target_log_prob=accepted_grads_target_log_prob, - current_target_log_prob=accepted_target_log_prob, - is_accepted=is_accepted, - proposed_grads_target_log_prob=proposed_grads_target_log_prob, - proposed_state=maybe_flatten(proposed_state_parts), - proposed_target_log_prob=proposed_target_log_prob, - ), - ] - - -def _leapfrog_integrator(current_momentums, - target_log_prob_fn, - current_state_parts, - step_sizes, - num_leapfrog_steps, - current_target_log_prob=None, - current_grads_target_log_prob=None, - name=None): - """Applies `num_leapfrog_steps` of the leapfrog integrator. - - Assumes a simple quadratic kinetic energy function: `0.5 ||momentum||**2`. - - #### Examples: - - ##### Simple quadratic potential. - - ```python - tfd = tf.contrib.distributions - - dims = 10 - num_iter = int(1e3) - dtype = np.float32 - - position = tf.placeholder(np.float32) - momentum = tf.placeholder(np.float32) - - [ - next_momentums, - next_positions, - ] = hmc._leapfrog_integrator( - current_momentums=[momentum], - target_log_prob_fn=tfd.MultivariateNormalDiag( - loc=tf.zeros(dims, dtype)).log_prob, - current_state_parts=[position], - step_sizes=0.1, - num_leapfrog_steps=3)[:2] - - sess.graph.finalize() # No more graph building. - - momentum_ = np.random.randn(dims).astype(dtype) - position_ = np.random.randn(dims).astype(dtype) - - positions = np.zeros([num_iter, dims], dtype) - for i in xrange(num_iter): - position_, momentum_ = sess.run( - [next_momentums[0], next_position[0]], - feed_dict={position: position_, momentum: momentum_}) - positions[i] = position_ - - plt.plot(positions[:, 0]); # Sinusoidal. - ``` - - Args: - current_momentums: Tensor containing the value(s) of the momentum - variable(s) to update. - target_log_prob_fn: Python callable which takes an argument like - `*current_state_parts` and returns its (possibly unnormalized) log-density - under the target distribution. - current_state_parts: Python `list` of `Tensor`s representing the current - state(s) of the Markov chain(s). The first `independent_chain_ndims` of - the `Tensor`(s) index different chains. - step_sizes: Python `list` of `Tensor`s representing the step size for the - leapfrog integrator. Must broadcast with the shape of - `current_state_parts`. Larger step sizes lead to faster progress, but - too-large step sizes make rejection exponentially more likely. When - possible, it's often helpful to match per-variable step sizes to the - standard deviations of the target distribution in each variable. - num_leapfrog_steps: Integer number of steps to run the leapfrog integrator - for. Total progress per HMC step is roughly proportional to `step_size * - num_leapfrog_steps`. - current_target_log_prob: (Optional) `Tensor` representing the value of - `target_log_prob_fn(*current_state_parts)`. The only reason to specify - this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - current_grads_target_log_prob: (Optional) Python list of `Tensor`s - representing gradient of `target_log_prob_fn(*current_state_parts`) wrt - `current_state_parts`. Must have same shape as `current_state_parts`. The - only reason to specify this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - name: Python `str` name prefixed to Ops created by this function. - Default value: `None` (i.e., "hmc_leapfrog_integrator"). - - Returns: - proposed_momentums: Updated value of the momentum. - proposed_state_parts: Tensor or Python list of `Tensor`s representing the - state(s) of the Markov chain(s) at each result step. Has same shape as - input `current_state_parts`. - proposed_target_log_prob: `Tensor` representing the value of - `target_log_prob_fn` at `next_state`. - proposed_grads_target_log_prob: Gradient of `proposed_target_log_prob` wrt - `next_state`. - - Raises: - ValueError: if `len(momentums) != len(state_parts)`. - ValueError: if `len(state_parts) != len(step_sizes)`. - ValueError: if `len(state_parts) != len(grads_target_log_prob)`. - TypeError: if `not target_log_prob.dtype.is_floating`. - """ - def _loop_body(step, - current_momentums, - current_state_parts, - ignore_current_target_log_prob, # pylint: disable=unused-argument - current_grads_target_log_prob): - return [step + 1] + list(_leapfrog_step(current_momentums, - target_log_prob_fn, - current_state_parts, - step_sizes, - current_grads_target_log_prob)) - - with ops.name_scope( - name, "hmc_leapfrog_integrator", - [current_momentums, current_state_parts, step_sizes, num_leapfrog_steps, - current_target_log_prob, current_grads_target_log_prob]): - if len(current_momentums) != len(current_state_parts): - raise ValueError("`momentums` must be in one-to-one correspondence " - "with `state_parts`") - num_leapfrog_steps = ops.convert_to_tensor(num_leapfrog_steps, - name="num_leapfrog_steps") - current_target_log_prob, current_grads_target_log_prob = ( - _maybe_call_fn_and_grads( - target_log_prob_fn, - current_state_parts, - current_target_log_prob, - current_grads_target_log_prob)) - return control_flow_ops.while_loop( - cond=lambda iter_, *args: iter_ < num_leapfrog_steps, - body=_loop_body, - loop_vars=[ - np.int32(0), # iter_ - current_momentums, - current_state_parts, - current_target_log_prob, - current_grads_target_log_prob, - ], - back_prop=False)[1:] # Lop-off "iter_". - - -def _leapfrog_step(current_momentums, - target_log_prob_fn, - current_state_parts, - step_sizes, - current_grads_target_log_prob, - name=None): - """Applies one step of the leapfrog integrator.""" - with ops.name_scope( - name, "_leapfrog_step", - [current_momentums, current_state_parts, step_sizes, - current_grads_target_log_prob]): - proposed_momentums = [m + 0.5 * ss * g for m, ss, g - in zip(current_momentums, - step_sizes, - current_grads_target_log_prob)] - proposed_state_parts = [x + ss * m for x, ss, m - in zip(current_state_parts, - step_sizes, - proposed_momentums)] - proposed_target_log_prob = target_log_prob_fn(*proposed_state_parts) - if not proposed_target_log_prob.dtype.is_floating: - raise TypeError("`target_log_prob_fn` must produce a `Tensor` " - "with `float` `dtype`.") - proposed_grads_target_log_prob = gradients_ops.gradients( - proposed_target_log_prob, proposed_state_parts) - if any(g is None for g in proposed_grads_target_log_prob): - raise ValueError( - "Encountered `None` gradient. Does your target `target_log_prob_fn` " - "access all `tf.Variable`s via `tf.get_variable`?\n" - " current_state_parts: {}\n" - " proposed_state_parts: {}\n" - " proposed_grads_target_log_prob: {}".format( - current_state_parts, - proposed_state_parts, - proposed_grads_target_log_prob)) - proposed_momentums = [m + 0.5 * ss * g for m, ss, g - in zip(proposed_momentums, - step_sizes, - proposed_grads_target_log_prob)] - return [ - proposed_momentums, - proposed_state_parts, - proposed_target_log_prob, - proposed_grads_target_log_prob, - ] - - -def _compute_energy_change(current_target_log_prob, - current_momentums, - proposed_target_log_prob, - proposed_momentums, - independent_chain_ndims, - name=None): - """Helper to `kernel` which computes the energy change.""" - with ops.name_scope( - name, "compute_energy_change", - ([current_target_log_prob, proposed_target_log_prob, - independent_chain_ndims] + - current_momentums + proposed_momentums)): - # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy - # since they're a mouthful and lets us inline more. - lk0, lk1 = [], [] - for current_momentum, proposed_momentum in zip(current_momentums, - proposed_momentums): - axis = math_ops.range(independent_chain_ndims, - array_ops.rank(current_momentum)) - lk0.append(_log_sum_sq(current_momentum, axis)) - lk1.append(_log_sum_sq(proposed_momentum, axis)) - - lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1), - axis=-1) - lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1), - axis=-1) - lp0 = -current_target_log_prob # potential - lp1 = -proposed_target_log_prob # proposed_potential - x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)], - axis=-1) - - # The sum is NaN if any element is NaN or we see both +Inf and -Inf. - # Thus we will replace such rows with infinite energy change which implies - # rejection. Recall that float-comparisons with NaN are always False. - is_sum_determinate = ( - math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) & - math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1)) - is_sum_determinate = array_ops.tile( - is_sum_determinate[..., array_ops.newaxis], - multiples=array_ops.concat([ - array_ops.ones(array_ops.rank(is_sum_determinate), - dtype=dtypes.int32), - [4], - ], axis=0)) - x = array_ops.where(is_sum_determinate, - x, - array_ops.fill(array_ops.shape(x), - value=x.dtype.as_numpy_dtype(np.inf))) - - return math_ops.reduce_sum(x, axis=-1) - - -def _choose(is_accepted, - accepted, - rejected, - independent_chain_ndims, - name=None): - """Helper to `kernel` which expand_dims `is_accepted` to apply tf.where.""" - def _expand_is_accepted_like(x): - with ops.name_scope("_choose"): - expand_shape = array_ops.concat([ - array_ops.shape(is_accepted), - array_ops.ones([array_ops.rank(x) - array_ops.rank(is_accepted)], - dtype=dtypes.int32), - ], axis=0) - multiples = array_ops.concat([ - array_ops.ones([array_ops.rank(is_accepted)], dtype=dtypes.int32), - array_ops.shape(x)[independent_chain_ndims:], - ], axis=0) - m = array_ops.tile(array_ops.reshape(is_accepted, expand_shape), - multiples) - m.set_shape(x.shape) - return m - with ops.name_scope(name, "_choose", values=[ - is_accepted, accepted, rejected, independent_chain_ndims]): - return array_ops.where(_expand_is_accepted_like(accepted), - accepted, - rejected) - - -def _maybe_call_fn_and_grads(fn, - fn_arg_list, - fn_result=None, - grads_fn_result=None, - description="target_log_prob"): - """Helper which computes `fn_result` and `grads` if needed.""" - fn_arg_list = (list(fn_arg_list) if _is_list_like(fn_arg_list) - else [fn_arg_list]) - if fn_result is None: - fn_result = fn(*fn_arg_list) - if not fn_result.dtype.is_floating: - raise TypeError("`{}` must be a `Tensor` with `float` `dtype`.".format( - description)) - if grads_fn_result is None: - grads_fn_result = gradients_ops.gradients( - fn_result, fn_arg_list) - if len(fn_arg_list) != len(grads_fn_result): - raise ValueError("`{}` must be in one-to-one correspondence with " - "`grads_{}`".format(*[description]*2)) - if any(g is None for g in grads_fn_result): - raise ValueError("Encountered `None` gradient.") - return fn_result, grads_fn_result - - -def _prepare_args(target_log_prob_fn, state, step_size, - target_log_prob=None, grads_target_log_prob=None, - maybe_expand=False, description="target_log_prob"): - """Helper which processes input args to meet list-like assumptions.""" - state_parts = list(state) if _is_list_like(state) else [state] - state_parts = [ops.convert_to_tensor(s, name="state") - for s in state_parts] - target_log_prob, grads_target_log_prob = _maybe_call_fn_and_grads( - target_log_prob_fn, - state_parts, - target_log_prob, - grads_target_log_prob, - description) - step_sizes = list(step_size) if _is_list_like(step_size) else [step_size] - step_sizes = [ - ops.convert_to_tensor( - s, name="step_size", dtype=target_log_prob.dtype) - for s in step_sizes] - if len(step_sizes) == 1: - step_sizes *= len(state_parts) - if len(state_parts) != len(step_sizes): - raise ValueError("There should be exactly one `step_size` or it should " - "have same length as `current_state`.") - maybe_flatten = lambda x: x if maybe_expand or _is_list_like(state) else x[0] - return [ - maybe_flatten(state_parts), - maybe_flatten(step_sizes), - target_log_prob, - grads_target_log_prob, - ] - - -def _is_list_like(x): - """Helper which returns `True` if input is `list`-like.""" - return isinstance(x, (tuple, list)) - - -def _log_sum_sq(x, axis=None): - """Computes log(sum(x**2)).""" - return math_ops.reduce_logsumexp(2. * math_ops.log(math_ops.abs(x)), axis) diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py deleted file mode 100644 index e7fcbc65ef..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions to create a Markov Chain Monte Carlo Metropolis step.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.metropolis_hastings_impl import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'kernel', - 'evolve', - 'proposal_uniform', - 'proposal_normal', -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py deleted file mode 100644 index 05aa134ed5..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py +++ /dev/null @@ -1,527 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Metropolis-Hastings and proposal distributions. - -@@kernel -@@evolve -@@proposal_uniform -@@proposal_normal -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import state_ops - -__all__ = [ - "kernel", - "evolve", - "proposal_uniform", - "proposal_normal", -] - - -KernelResults = collections.namedtuple( - "KernelResults", - [ - "log_accept_ratio", - "current_target_log_prob", # "Current result" means "accepted". - "is_accepted", - "proposed_state", - ]) - - -def kernel(target_log_prob_fn, - proposal_fn, - current_state, - seed=None, - current_target_log_prob=None, - name=None): - """Runs the Metropolis-Hastings transition kernel. - - This function can update multiple chains in parallel. It assumes that all - leftmost dimensions of `current_state` index independent chain states (and are - therefore updated independently). The output of `target_log_prob_fn()` should - sum log-probabilities across all event dimensions. Slices along the rightmost - dimensions may have different target distributions; for example, - `current_state[0, :]` could have a different target distribution from - `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of - independent chains is `tf.size(target_log_prob_fn(*current_state))`.) - - Args: - target_log_prob_fn: Python callable which takes an argument like - `current_state` (or `*current_state` if it's a list) and returns its - (possibly unnormalized) log-density under the target distribution. - proposal_fn: Python callable which takes an argument like `current_state` - (or `*current_state` if it's a list) and returns a tuple of proposed - states of same shape as `state`, and a log ratio `Tensor` of same shape - as `current_target_log_prob`. The log ratio is the log-probability of - `state` given proposed states minus the log-probability of proposed - states given `state`. If the proposal is symmetric, set the second value - to `None`: this enables more efficient computation than explicitly - supplying a tensor of zeros. - current_state: `Tensor` or Python `list` of `Tensor`s representing the - current state(s) of the Markov chain(s). The first `r` dimensions index - independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. - seed: Python integer to seed the random number generator. - current_target_log_prob: (Optional) `Tensor` representing the value of - `target_log_prob_fn` at the `current_state`. The only reason to - specify this argument is to reduce TF graph size. - Default value: `None` (i.e., compute as needed). - name: A name of the operation (optional). - - Returns: - next_state: Tensor or Python list of `Tensor`s representing the state(s) - of the Markov chain(s) at each result step. Has same shape as - `current_state`. - kernel_results: `collections.namedtuple` of internal calculations used to - advance the chain. - - #### Examples - - We illustrate Metropolis-Hastings on a Normal likelihood with - unknown mean. - - ```python - tfd = tf.contrib.distributions - tfp = tf.contrib.bayesflow - - loc = tf.get_variable("loc", initializer=1.) - x = tf.constant([0.0] * 50) - - def make_target_log_prob_fn(x): - def target_log_prob_fn(loc): - prior = tfd.Normal(loc=0., scale=1.) - likelihood = tfd.Independent( - tfd.Normal(loc=loc, scale=0.1), - reinterpreted_batch_ndims=1) - return prior.log_prob(loc) + likelihood.log_prob(x) - return target_log_prob_fn - - next_state, kernel_results = tfp.metropolis_hastings.kernel( - target_log_prob_fn=make_target_log_prob_fn(x), - proposal_fn=tfp.metropolis_hastings.proposal_normal(), - current_state=loc) - loc_update = loc.assign(next_state) - ``` - - We illustrate Metropolis-Hastings on a Normal likelihood with - unknown mean and variance. We apply 4 chains. - - ```python - tfd = tf.contrib.distributions - tfp = tf.contrib.bayesflow - - num_chains = 4 - loc = tf.get_variable("loc", shape=[num_chains], - initializer=tf.random_normal_initializer()) - scale = tf.get_variable("scale", shape=[num_chains], - initializer=tf.ones_initializer()) - x = tf.constant([0.0] * 50) - - def make_target_log_prob_fn(x): - data = tf.reshape(x, shape=[-1, 1]) - def target_log_prob_fn(loc, scale): - prior_loc = tfd.Normal(loc=0., scale=1.) - prior_scale = tfd.InverseGamma(concentration=1., rate=1.) - likelihood = tfd.Independent( - tfd.Normal(loc=loc, scale=scale), - reinterpreted_batch_ndims=1) - return (prior_loc.log_prob(loc) + - prior_scale.log_prob(scale) + - likelihood.log_prob(data)) - return target_log_prob_fn - - def proposal_fn(loc, scale): - loc_proposal = tfp.metropolis_hastings.proposal_normal() - scale_proposal = tfp.metropolis_hastings.proposal_uniform(minval=-1.) - proposed_loc, _ = loc_proposal(loc) - proposed_scale, _ = scale_proposal(scale) - proposed_scale = tf.maximum(proposed_scale, 0.01) - return [proposed_loc, proposed_scale], None - - next_state, kernel_results = tfp.metropolis_hastings.kernel( - target_log_prob_fn=make_target_log_prob_fn(x), - proposal_fn=proposal_fn, - current_state=[loc, scale]) - train_op = tf.group(loc.assign(next_state[0]), - scale.assign(next_state[1])) - ``` - - """ - with ops.name_scope( - name, "metropolis_hastings_kernel", - [current_state, seed, current_target_log_prob]): - with ops.name_scope("initialize"): - maybe_expand = lambda x: list(x) if _is_list_like(x) else [x] - current_state_parts = maybe_expand(current_state) - if current_target_log_prob is None: - current_target_log_prob = target_log_prob_fn(*current_state_parts) - - proposed_state, log_transit_ratio = proposal_fn(*current_state_parts) - proposed_state_parts = maybe_expand(proposed_state) - - proposed_target_log_prob = target_log_prob_fn(*proposed_state_parts) - - with ops.name_scope( - "accept_reject", - [current_state_parts, proposed_state_parts, - current_target_log_prob, proposed_target_log_prob]): - log_accept_ratio = proposed_target_log_prob - current_target_log_prob - if log_transit_ratio is not None: - # If the log_transit_ratio is None, then assume the proposal is - # symmetric, i.e., - # log p(old | new) - log p(new | old) = 0. - log_accept_ratio += log_transit_ratio - - # u < exp(log_accept_ratio), where u~Uniform[0,1) - # ==> log(u) < log_accept_ratio - random_value = random_ops.random_uniform( - array_ops.shape(log_accept_ratio), - dtype=log_accept_ratio.dtype, - seed=seed) - random_negative = math_ops.log(random_value) - is_accepted = random_negative < log_accept_ratio - next_state_parts = [array_ops.where(is_accepted, - proposed_state_part, - current_state_part) - for proposed_state_part, current_state_part in - zip(proposed_state_parts, current_state_parts)] - accepted_log_prob = array_ops.where(is_accepted, - proposed_target_log_prob, - current_target_log_prob) - maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0] - return [ - maybe_flatten(next_state_parts), - KernelResults( - log_accept_ratio=log_accept_ratio, - current_target_log_prob=accepted_log_prob, - is_accepted=is_accepted, - proposed_state=maybe_flatten(proposed_state_parts), - ), - ] - - -def evolve(initial_sample, - initial_log_density, - initial_log_accept_ratio, - target_log_prob_fn, - proposal_fn, - n_steps=1, - seed=None, - name=None): - """Performs `n_steps` of the Metropolis-Hastings update. - - Given a probability density function, `f(x)` and a proposal scheme which - generates new points from old, this `Op` returns a tensor - which may be used to generate approximate samples from the target distribution - using the Metropolis-Hastings algorithm. These samples are from a Markov chain - whose equilibrium distribution matches the target distribution. - - The probability distribution may have an unknown normalization constan. - We parameterize the probability density as follows: - - ```none - f(x) = exp(L(x) + constant) - ``` - - Here `L(x)` is any continuous function with an (possibly unknown but finite) - upper bound, i.e. there exists a number beta such that - `L(x)< beta < infinity` for all x. The constant is the normalization needed - to make `f(x)` a probability density (as opposed to just a finite measure). - - Although `initial_sample` can be arbitrary, a poor choice may result in a - slow-to-mix chain. In many cases the best choice is the one that maximizes - the target density, i.e., choose `initial_sample` such that - `f(initial_sample) >= f(x)` for all `x`. - - - If the support of the distribution is a strict subset of R^n (but of non zero - measure), then the unnormalized log-density `L(x)` should return `-infinity` - outside the support domain. This effectively forces the sampler to only - explore points in the regions of finite support. - - Usage: - This function is meant to be wrapped up with some of the common proposal - schemes (e.g. random walk, Langevin diffusion etc) to produce a more user - friendly interface. However, it may also be used to create bespoke samplers. - - The following example, demonstrates the use to generate a 1000 uniform random - walk Metropolis samplers run in parallel for the normal target distribution. - - ```python - n = 3 # dimension of the problem - - # Generate 1000 initial values randomly. Each of these would be an - # independent starting point for a Markov chain. - state = tf.get_variable( - "state", - initializer=tf.random_normal([1000, n], - mean=3.0, - dtype=tf.float64, - seed=42)) - - # Computes the log(p(x)) for the unit normal density and ignores the - # normalization constant. - def log_density(x): - return -tf.reduce_sum(x * x, reduction_indices=-1) / 2.0 - - # Initial log-density value - state_log_density = tf.get_variable( - "state_log_density", - initializer=log_density(state.initialized_value())) - - # A variable to store the log_acceptance_ratio: - log_acceptance_ratio = tf.get_variable( - "log_acceptance_ratio", - initializer=tf.zeros([1000], dtype=tf.float64)) - - # Generates random proposals by moving each coordinate uniformly and - # independently in a box of size 2 centered around the current value. - # Returns the new point and also the log of the Hastings ratio (the - # ratio of the probability of going from the proposal to origin and the - # probability of the reverse transition). When this ratio is 1, the value - # may be omitted and replaced by None. - def random_proposal(x): - return (x + tf.random_uniform(tf.shape(x), minval=-1, maxval=1, - dtype=x.dtype, seed=12)), None - - # Create the op to propagate the chain for 100 steps. - stepper = mh.evolve( - state, state_log_density, log_acceptance_ratio, - log_density, random_proposal, n_steps=100, seed=123) - init = tf.initialize_all_variables() - with tf.Session() as sess: - sess.run(init) - # Run the chains for a total of 1000 steps and print out the mean across - # the chains every 100 iterations. - for n_iter in range(10): - # Executing the stepper advances the chain to the next state. - sess.run(stepper) - # Print out the current value of the mean(sample) for every dimension. - print(np.mean(sess.run(state), 0)) - # Estimated covariance matrix - samples = sess.run(state) - print(np.cov(samples, rowvar=False)) - ``` - - Args: - initial_sample: A float-like `tf.Variable` of any shape that can - be consumed by the `target_log_prob_fn` and `proposal_fn` - callables. - initial_log_density: Float-like `tf.Variable` with `dtype` and shape - equivalent to `target_log_prob_fn(initial_sample)`, i.e., matching - the result of `target_log_prob_fn` invoked at `current_state`. - initial_log_accept_ratio: A `tf.Variable` with `dtype` and shape matching - `initial_log_density`. Stands for the log of Metropolis-Hastings - acceptance ratio after propagating the chain for `n_steps`. - target_log_prob_fn: A Python callable evaluated at - `current_state` and returning a float-like `Tensor` of log target-density - up to a normalizing constant. In other words, - `target_log_prob_fn(x) = log(g(x))`, where - `target_density = g(x)/Z` for some constant `A`. The shape of the input - tensor is the same as the shape of the `current_state`. The shape of the - output tensor is either - (a). Same as the input shape if the density being sampled is one - dimensional, or - (b). If the density is defined for `events` of shape - `event_shape = [E1, E2, ... Ee]`, then the input tensor should be of - shape `batch_shape + event_shape`, here `batch_shape = [B1, ..., Bb]` - and the result must be of shape [B1, ..., Bb]. For example, if the - distribution that is being sampled is a 10 dimensional normal, - then the input tensor may be of shape [100, 10] or [30, 20, 10]. The - last dimension will then be 'consumed' by `target_log_prob_fn` - and it should return tensors of shape [100] and [30, 20] respectively. - proposal_fn: A callable accepting a real valued `Tensor` of current sample - points and returning a tuple of two `Tensors`. The first element of the - pair should be a `Tensor` containing the proposal state and should have - the same shape as the input `Tensor`. The second element of the pair gives - the log of the ratio of the probability of transitioning from the - proposal points to the input points and the probability of transitioning - from the input points to the proposal points. If the proposal is - symmetric, i.e. - Probability(Proposal -> Current) = Probability(Current -> Proposal) - the second value should be set to None instead of explicitly supplying a - tensor of zeros. In addition to being convenient, this also leads to a - more efficient graph. - n_steps: A positive `int` or a scalar `int32` tensor. Sets the number of - iterations of the chain. - seed: `int` or None. The random seed for this `Op`. If `None`, no seed is - applied. - name: A string that sets the name for this `Op`. - - Returns: - forward_step: an `Op` to step the Markov chain forward for `n_steps`. - """ - - with ops.name_scope(name, "metropolis_hastings", [initial_sample]): - current_state = initial_sample - current_target_log_prob = initial_log_density - log_accept_ratio = initial_log_accept_ratio - - def step(i, current_state, current_target_log_prob, log_accept_ratio): - """Wrap single Markov chain iteration in `while_loop`.""" - next_state, kernel_results = kernel( - target_log_prob_fn=target_log_prob_fn, - proposal_fn=proposal_fn, - current_state=current_state, - current_target_log_prob=current_target_log_prob, - seed=seed) - accepted_log_prob = kernel_results.current_target_log_prob - log_accept_ratio = kernel_results.log_accept_ratio - return i + 1, next_state, accepted_log_prob, log_accept_ratio - - (_, accepted_state, accepted_target_log_prob, accepted_log_accept_ratio) = ( - control_flow_ops.while_loop( - cond=lambda i, *ignored_args: i < n_steps, - body=step, - loop_vars=[ - 0, # i - current_state, - current_target_log_prob, - log_accept_ratio, - ], - parallel_iterations=1 if seed is not None else 10, - # TODO(b/73775595): Confirm optimal setting of swap_memory. - swap_memory=1)) - - forward_step = control_flow_ops.group( - state_ops.assign(current_target_log_prob, accepted_target_log_prob), - state_ops.assign(current_state, accepted_state), - state_ops.assign(log_accept_ratio, accepted_log_accept_ratio)) - - return forward_step - - -def proposal_uniform(step_size=1., - seed=None, - name=None): - """Returns a callable that adds a random uniform tensor to the input. - - This function returns a callable that accepts one `Tensor` argument of any - shape and a real data type (i.e. `tf.float32` or `tf.float64`). It adds a - sample from a random uniform distribution drawn from [-stepsize, stepsize] - to its input. It also returns the log of the ratio of the probability of - moving from the input point to the proposed point, but since this log ratio is - identically equal to 0 (because the probability of drawing a value `x` from - the symmetric uniform distribution is the same as the probability of drawing - `-x`), it simply returns None for the second element of the returned tuple. - - Args: - step_size: A positive `float` or a scalar tensor of real dtype - controlling the scale of the uniform distribution. - If step_size = a, then draws are made uniformly from [-a, a]. - seed: `int` or None. The random seed for this `Op`. If `None`, no seed is - applied. - name: A string that sets the name for this `Op`. - - Returns: - proposal_fn: A callable accepting one float-like `Tensor` and returning a - 2-tuple. The first value in the tuple is a `Tensor` of the same shape and - dtype as the input argument and the second element of the tuple is None. - """ - - with ops.name_scope(name, "proposal_uniform", [step_size]): - step_size = ops.convert_to_tensor(step_size, name="step_size") - - def proposal_fn(input_state, name=None): - """Adds a uniform perturbation to the input state. - - Args: - input_state: A `Tensor` of any shape and real dtype. - name: A string that sets the name for this `Op`. - - Returns: - proposal_state: A float-like `Tensor` with `dtype` and shape matching - `input_state`. - log_transit_ratio: `None`. Proposal is symmetric. - """ - with ops.name_scope(name, "proposer", [input_state]): - input_state = ops.convert_to_tensor(input_state, name="input_state") - return input_state + random_ops.random_uniform( - array_ops.shape(input_state), - minval=-step_size, - maxval=step_size, - seed=seed), None - return proposal_fn - - -def proposal_normal(scale=1., - seed=None, - name=None): - """Returns a callable that adds a random normal tensor to the input. - - This function returns a callable that accepts one `Tensor` argument of any - shape and a real data type (i.e. `tf.float32` or `tf.float64`). The callable - adds a sample from a normal distribution with the supplied standard deviation - and zero mean to its input argument (called the proposal point). - The callable returns a tuple with the proposal point as the first element. - The second element is identically `None`. It is included so the callable is - compatible with the expected signature of the proposal scheme argument in the - `metropolis_hastings` function. A value of `None` indicates that the - probability of going from the input point to the proposal point is equal to - the probability of going from the proposal point to the input point. - - Args: - scale: A positive `float` or a scalar tensor of any real dtype controlling - the scale of the normal distribution. - seed: `int` or None. The random seed for this `Op`. If `None`, no seed is - applied. - name: A string that sets the name for this `Op`. - - Returns: - proposal_fn: A callable accepting one float-like `Tensor` and returning a - 2-tuple. The first value in the tuple is a `Tensor` of the same shape and - dtype as the input argument and the second element of the tuple is None. - """ - - with ops.name_scope(name, "proposal_normal", [scale]): - scale = ops.convert_to_tensor(scale, name="scale") - - def proposal_fn(input_state, name=None): - """Adds a normal perturbation to the input state. - - Args: - input_state: A `Tensor` of any shape and real dtype. - name: A string that sets the name for this `Op`. - - Returns: - proposal_state: A float-like `Tensor` with `dtype` and shape matching - `input_state`. - log_transit_ratio: `None`. Proposal is symmetric. - """ - - with ops.name_scope(name, "proposer", [input_state]): - input_state = ops.convert_to_tensor(input_state, name="input_state") - return input_state + random_ops.random_normal( - array_ops.shape(input_state), - mean=0., - stddev=scale, - dtype=scale.dtype, - seed=seed), None - return proposal_fn - - -def _is_list_like(x): - """Helper which returns `True` if input is `list`-like.""" - return isinstance(x, (tuple, list)) -- GitLab From 56d1cfde15c04ebe27fe31409a724a56e7051b15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 19:08:15 -0700 Subject: [PATCH 522/960] [XLA] Redesign: implement and test ternary ops. PiperOrigin-RevId: 190561679 --- .../xla/client/xla_client/xla_builder.cc | 42 +++- .../xla/client/xla_client/xla_builder.h | 4 + .../compiler/xla/service/shape_inference.cc | 8 +- .../compiler/xla/service/shape_inference.h | 3 + .../xla/tests/array_elementwise_ops_test.cc | 205 +++++++++--------- .../xla/tests/client_library_test_base.cc | 21 +- .../xla/tests/client_library_test_base.h | 2 + 7 files changed, 175 insertions(+), 110 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 1b90b45bfb..fcaf393b6b 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -288,6 +288,44 @@ XlaOp XlaBuilder::BinaryOp( }()); } +XlaOp XlaBuilder::TernaryOp(HloOpcode triop, const XlaOp& lhs, const XlaOp& rhs, + const XlaOp& ehs) { + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, lhs.GetShape()); + TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, rhs.GetShape()); + TF_ASSIGN_OR_RETURN(const Shape& ehs_shape, ehs.GetShape()); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferTernaryOpShape( + triop, lhs_shape, rhs_shape, ehs_shape)); + XlaOp updated_lhs = lhs; + XlaOp updated_rhs = rhs; + XlaOp updated_ehs = ehs; + if (!ShapeUtil::IsTuple(instr.shape())) { + if (!ShapeUtil::IsTuple(lhs_shape) && + !ShapeUtil::SameDimensions(instr.shape(), lhs_shape)) { + // lhs is being implicitly broadcasted. Change to explicit. + TF_ASSIGN_OR_RETURN(updated_lhs, + AddBroadcastSequence(instr.shape(), lhs)); + } + if (!ShapeUtil::IsTuple(rhs_shape) && + !ShapeUtil::SameDimensions(instr.shape(), rhs_shape)) { + // rhs is being implicitly broadcasted. Change to explicit. + TF_ASSIGN_OR_RETURN(updated_rhs, + AddBroadcastSequence(instr.shape(), rhs)); + } + if (!ShapeUtil::IsTuple(ehs_shape) && + !ShapeUtil::SameDimensions(instr.shape(), ehs_shape)) { + // ehs is being implicitly broadcasted. Change to explicit. + TF_ASSIGN_OR_RETURN(updated_ehs, + AddBroadcastSequence(instr.shape(), ehs)); + } + } + return AddInstruction(std::move(instr), triop, + {updated_lhs, updated_rhs, updated_ehs}); + }()); +} + XlaOp XlaBuilder::Add(const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { return BinaryOp(HloOpcode::kAdd, lhs, rhs, broadcast_dimensions); @@ -449,7 +487,7 @@ void XlaBuilder::Trace(const string& tag, const XlaOp& operand) { XlaOp XlaBuilder::Select(const XlaOp& pred, const XlaOp& on_true, const XlaOp& on_false) { - return UnimplementedOp(); + return TernaryOp(HloOpcode::kSelect, pred, on_true, on_false); } XlaOp XlaBuilder::Tuple(tensorflow::gtl::ArraySlice elements) { @@ -755,7 +793,7 @@ XlaOp XlaBuilder::Neg(const XlaOp& operand) { XlaOp XlaBuilder::Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max) { - return UnimplementedOp(); + return TernaryOp(HloOpcode::kClamp, min, operand, max); } XlaOp XlaBuilder::Map(tensorflow::gtl::ArraySlice operands, diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index cc33356cc1..c5c35159e0 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -762,6 +762,10 @@ class XlaBuilder { XlaOp BinaryOp(HloOpcode binop, const XlaOp& lhs, const XlaOp& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions); + // Internal helper method that does the building for an arbitrary ternary op. + XlaOp TernaryOp(HloOpcode triop, const XlaOp& lhs, const XlaOp& rhs, + const XlaOp& ehs); + StatusOr InDimBroadcast( const Shape& shape, const XlaOp& operand, tensorflow::gtl::ArraySlice broadcast_dimensions); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 2a70ea0354..36456d552d 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1038,8 +1038,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( /* static */ StatusOr ShapeInference::InferTernaryOpShape( HloOpcode opcode, const HloInstruction* lhs, const HloInstruction* rhs, const HloInstruction* ehs) { - return InferTernaryOpShape(OpcodeToTernaryOperation(opcode), lhs->shape(), - rhs->shape(), ehs->shape()); + return InferTernaryOpShape(opcode, lhs->shape(), rhs->shape(), ehs->shape()); +} + +/* static */ StatusOr ShapeInference::InferTernaryOpShape( + HloOpcode opcode, const Shape& lhs, const Shape& rhs, const Shape& ehs) { + return InferTernaryOpShape(OpcodeToTernaryOperation(opcode), lhs, rhs, ehs); } /* static */ StatusOr ShapeInference::InferTernaryOpShape( diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index b6552a34ae..88830e6d25 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -70,6 +70,9 @@ class ShapeInference { static StatusOr InferTernaryOpShape(TernaryOperation operation, const Shape& lhs, const Shape& rhs, const Shape& ehs); + static StatusOr InferTernaryOpShape(HloOpcode opcode, const Shape& lhs, + const Shape& rhs, + const Shape& ehs); static StatusOr InferTernaryOpShape(HloOpcode opcode, const HloInstruction* lhs, const HloInstruction* rhs, diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index fa7ac3ca9b..03c91745b9 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -244,7 +244,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoConstantU64s) { std::unique_ptr rhs_data = client_->TransferToServer(*rhs_literal).ConsumeValueOrDie(); - auto add = b.Add(lhs_param, rhs_param); + b.Add(lhs_param, rhs_param); std::vector expected(lhs.size()); for (int64 i = 0; i < lhs.size(); ++i) { @@ -1914,101 +1914,98 @@ XLA_TEST_F(ArrayElementwiseOpTest, RemTwoConstantS32s) { } XLA_TEST_F(ArrayElementwiseOpTest, NonNanClampF32) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto minimum = builder.ConstantR1({1.0f, -6.5f, 1.0f, 2.25f, 0.0f}); auto argument = builder.ConstantR1({2.0f, 10.0f, -5.0f, 1.0f, 10.0f}); auto maximum = builder.ConstantR1({3.0f, 0.5f, 25.5f, 5.0f, 123.0}); - auto clamp = builder.Clamp(minimum, argument, maximum); + builder.Clamp(minimum, argument, maximum); ComputeAndCompareR1(&builder, {2.0f, 0.5f, 1.0f, 2.25f, 10.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, ClampF32Scalar) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto minimum = builder.ConstantR0(0.0f); auto argument = builder.ConstantR1({2.0f, 10.0f, -5.0f, 1.0f, 4.0f}); auto maximum = builder.ConstantR0(5.0f); - auto clamp = builder.Clamp(minimum, argument, maximum); + builder.Clamp(minimum, argument, maximum); ComputeAndCompareR1(&builder, {2.0f, 5.0f, 0.0f, 1.0f, 4.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, ClampF32ScalarVector) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto min_scalar = builder.ConstantR0(0.0f); auto min_vector = builder.ConstantR1({1.0f, -6.5f, 1.0f, 2.25f, 0.0f}); auto arg_vector = builder.ConstantR1({2.0f, 10.0f, -5.0f, 1.0f, 4.0f}); auto max_scalar = builder.ConstantR0(3.0f); auto max_vector = builder.ConstantR1({3.0f, 0.5f, 25.5f, 5.0f, 123.0}); // Perform clamp with broadcasted scalar and vector. - auto clamp = builder.Add( - builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), - builder.Clamp(min_scalar, arg_vector, max_vector)), - builder.Add(builder.Clamp(min_vector, arg_vector, max_vector), - builder.Clamp(min_scalar, arg_vector, max_scalar))); + builder.Add(builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), + builder.Clamp(min_scalar, arg_vector, max_vector)), + builder.Add(builder.Clamp(min_vector, arg_vector, max_vector), + builder.Clamp(min_scalar, arg_vector, max_scalar))); ComputeAndCompareR1(&builder, {8.0f, 7.0f, 2.0f, 6.5f, 14.0f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, ClampS32Vector) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto min_vector = builder.ConstantR1({1, -6, 1, 2, 0, -5}); auto arg_vector = builder.ConstantR1({2, 10, -5, 1, 4, 10}); auto max_vector = builder.ConstantR1({3, 0, 25, 5, 123, -1}); - auto clamp = builder.Clamp(min_vector, arg_vector, max_vector); + builder.Clamp(min_vector, arg_vector, max_vector); ComputeAndCompareR1(&builder, {2, 0, 1, 2, 4, -1}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ClampS32ScalarVector) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto min_scalar = builder.ConstantR0(0); auto min_vector = builder.ConstantR1({1, -6, 1, 2, 0}); auto arg_vector = builder.ConstantR1({2, 10, -5, 1, 4}); auto max_scalar = builder.ConstantR0(3); auto max_vector = builder.ConstantR1({3, 1, 25, 5, 123}); // Perform clamp with broadcasted scalar and vector. - auto clamp = builder.Add( - builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), - builder.Clamp(min_scalar, arg_vector, max_vector)), - builder.Add(builder.Clamp(min_vector, arg_vector, max_vector), - builder.Clamp(min_scalar, arg_vector, max_scalar))); + builder.Add(builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), + builder.Clamp(min_scalar, arg_vector, max_vector)), + builder.Add(builder.Clamp(min_vector, arg_vector, max_vector), + builder.Clamp(min_scalar, arg_vector, max_scalar))); ComputeAndCompareR1(&builder, {8, 8, 2, 6, 14}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ClampU32Vector) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto min_vector = builder.ConstantR1({1, 2, 1, 2, 0, ~0u - 4}); auto arg_vector = builder.ConstantR1({2, 10, 5, 1, 4, 10}); auto max_vector = builder.ConstantR1({3, 5, 25, 5, 123, ~0u}); - auto clamp = builder.Clamp(min_vector, arg_vector, max_vector); + builder.Clamp(min_vector, arg_vector, max_vector); ComputeAndCompareR1(&builder, {2, 5, 5, 2, 4, ~0u - 4}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, ClampU32ScalarVector) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto min_scalar = builder.ConstantR0(0); auto min_vector = builder.ConstantR1({1, 0, 1, 2, 0}); auto arg_vector = builder.ConstantR1({2, 10, 0, 1, 4}); auto max_scalar = builder.ConstantR0(3); auto max_vector = builder.ConstantR1({3, 1, 25, 5, 123}); // Perform clamp with broadcasted scalar and vector. - auto clamp = builder.Add( - builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), - builder.Clamp(min_scalar, arg_vector, max_vector)), - builder.Add(builder.Clamp(min_vector, arg_vector, max_vector), - builder.Clamp(min_scalar, arg_vector, max_scalar))); + builder.Add(builder.Add(builder.Clamp(min_vector, arg_vector, max_scalar), + builder.Clamp(min_scalar, arg_vector, max_vector)), + builder.Add(builder.Clamp(min_vector, arg_vector, max_vector), + builder.Clamp(min_scalar, arg_vector, max_scalar))); ComputeAndCompareR1(&builder, {8, 8, 2, 6, 14}, {}); } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({1.1f, 2.2f, 3.3f, 5.5f}); @@ -2022,7 +2019,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersF32s) { auto p0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto p1 = builder.Parameter(1, param1_literal->shape(), "param1"); - auto add = builder.Add(p0, p1); + builder.Add(p0, p1); ComputeAndCompareR1(&builder, {8.3f, 4.5f, 6.7f, 11.1f}, {param0_data.get(), param1_data.get()}, @@ -2030,7 +2027,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersZeroElementF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR3FromArray3D(Array3D(0, 7, 0)); @@ -2044,7 +2041,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersZeroElementF32s) { auto p0 = builder.Parameter(0, param0_literal->shape(), "param0"); auto p1 = builder.Parameter(1, param1_literal->shape(), "param1"); - auto add = builder.Add(p0, p1); + builder.Add(p0, p1); Array3D expected(0, 7, 0); ComputeAndCompareR3( @@ -2052,7 +2049,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddTwoParametersZeroElementF32s) { } XLA_TEST_F(ArrayElementwiseOpTest, AddParameterToConstantF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr param0_literal = Literal::CreateR1({1.1f, 2.2f, 3.3f, 5.5f}); @@ -2061,35 +2058,35 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddParameterToConstantF32s) { auto a = builder.ConstantR1({1.1f, 2.2f, 3.3f, 4.4f}); auto p = builder.Parameter(0, param0_literal->shape(), "param0"); - auto add = builder.Add(a, p); + builder.Add(a, p); ComputeAndCompareR1(&builder, {2.2f, 4.4f, 6.6f, 9.9f}, {param0_data.get()}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, CosF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({3.14159f, 0.0f, 1.570796f, -0.78539f}); - auto result = builder.Cos(a); + builder.Cos(a); ComputeAndCompareR1(&builder, {-1.0f, 1.0f, 0.0f, 0.707107f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, SinF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({3.14159f, 0.0f, 1.570796f, -0.78539f}); - auto result = builder.Sin(a); + builder.Sin(a); ComputeAndCompareR1(&builder, {0.0f, 0.0f, 1.0f, -0.707107f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, Atan2F32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({0.0f, 5.0f, 0.0f, -3.0f, 2.0f, -8.0f}); auto b = builder.ConstantR1({6.0f, 0.0f, -4.0f, 0.0f, 2.0f, 8.0f}); - auto atan = builder.Atan2(a, b); + builder.Atan2(a, b); ComputeAndCompareR1( &builder, @@ -2098,9 +2095,9 @@ XLA_TEST_F(ArrayElementwiseOpTest, Atan2F32s) { } XLA_TEST_F(ArrayElementwiseOpTest, TanhF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({-2.5f, 3.14f, 2.25f}); - auto result = builder.Tanh(a); + builder.Tanh(a); ComputeAndCompareR1(&builder, {-0.986614f, 0.996260f, 0.978026}, {}, error_spec_); @@ -2110,7 +2107,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, TanhF32sVector) { // This is like the test ArrayElementwiseOpTest.TanhF32s above, except that // the input tensor is large enough to exercise the vectorized tanh // implementation on XLA CPU. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto input_literal = Literal::CreateR1( {1.02, -0.32, 0.85, 0.90, 1.23, -0.91, -0.49, 0.80, -0.67, 0.16, -0.07, 0.39, -0.41, 0.04, 1.36, 1.25, 0.41, 0.65, -1.08, 0.32, @@ -2149,7 +2146,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, TanhF32sVector) { XLA_TEST_F(ArrayElementwiseOpTest, ExpF32sVector) { // The input tensor is large enough to exercise the vectorized exp // implementation on XLA CPU. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // Just to help make sense of the scales here -- exp(89) saturates float32 and // exp(-10) is smaller than our error spec. @@ -2185,7 +2182,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, ExpF32sVector) { XLA_TEST_F(ArrayElementwiseOpTest, LogF32sVector) { // The input tensor is large enough to exercise the vectorized exp // implementation on XLA CPU. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr input_literal = Literal::CreateR1( {-1.29, -1.41, -1.25, -13.5, -11.7, -17.9, -198, @@ -2225,14 +2222,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddChainFoldLeft) { // / / // b -----/ / // c---------------------/ - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({1.1f, 2.2f, 3.3f, 4.4f}); auto b = builder.ConstantR1({2.1f, 3.2f, 4.3f, 5.4f}); auto c = builder.ConstantR1({-3.3f, -15.5f, -7.7f, -29.9f}); auto add = builder.Add(a, b); - auto add2 = builder.Add(add, c); + builder.Add(add, c); ComputeAndCompareR1(&builder, {-0.1f, -10.1f, -0.1f, -20.1f}, {}, error_spec_); @@ -2243,14 +2240,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddChainFoldRight) { // / / // c -----/ / // a---------------------/ - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({91.1f, 2.2f, 3.3f, 4.4f}); auto b = builder.ConstantR1({2.1f, 3.2f, 4.3f, 5.4f}); auto c = builder.ConstantR1({-3.3f, -15.5f, -7.7f, -29.9f}); auto add = builder.Add(b, c); - auto add2 = builder.Add(a, add); + builder.Add(a, add); ComputeAndCompareR1(&builder, {89.9f, -10.1f, -0.1f, -20.1f}, {}, error_spec_); @@ -2260,14 +2257,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddWithNeg) { // a ----- (neg) ----- (add) // / // b ----- (neg) ----/ - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({91.1f, 2.2f, 3.3f, 4.4f}); auto b = builder.ConstantR1({2.1f, 3.2f, 4.3f, 5.4f}); auto neg_a = builder.Neg(a); auto neg_b = builder.Neg(b); - auto result = builder.Add(neg_a, neg_b); + builder.Add(neg_a, neg_b); ComputeAndCompareR1(&builder, {-93.2f, -5.4f, -7.6f, -9.8f}, {}, error_spec_); @@ -2281,7 +2278,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddChainTwoSide) { // c ------ (add) ------------/ // / // d -----/ - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR1({91.1f, 2.2f, 3.3f, 4.4f}); auto b = builder.ConstantR1({2.1f, 3.2f, 4.3f, 5.4f}); @@ -2290,19 +2287,19 @@ XLA_TEST_F(ArrayElementwiseOpTest, AddChainTwoSide) { auto add_ab = builder.Add(a, b); auto add_cd = builder.Add(c, d); - auto add_all = builder.Add(add_ab, add_cd); + builder.Add(add_ab, add_cd); ComputeAndCompareR1(&builder, {70.9f, -0.1f, -40.1f, 0.1f}, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, 2DBinaryOpF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto b = builder.ConstantR2({{-1.5f, 8.14f, 42.0}, {-1.0f, -4.0f, 5.55f}}); - auto add = builder.Add(a, b); + builder.Add(a, b); Array2D expected_array( {{-4.0f, 11.28f, 43.0f}, {1.25f, -14.0f, 8.88f}}); @@ -2311,11 +2308,11 @@ XLA_TEST_F(ArrayElementwiseOpTest, 2DBinaryOpF32s) { XLA_TEST_F(ArrayElementwiseOpTest, ScalarPlus2DF32) { // Add a scalar + matrix. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto scalar = builder.ConstantR0(3.0f); - auto add = builder.Add(scalar, a); + builder.Add(scalar, a); Array2D expected_array({{0.5f, 6.14f, 4.0f}, {5.25f, -7.0f, 6.33f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); @@ -2323,11 +2320,11 @@ XLA_TEST_F(ArrayElementwiseOpTest, ScalarPlus2DF32) { XLA_TEST_F(ArrayElementwiseOpTest, 2DPlusScalarF32) { // Add a matrix + scalar. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto scalar = builder.ConstantR0(3.0f); - auto add = builder.Add(a, scalar); + builder.Add(a, scalar); Array2D expected_array({{0.5f, 6.14f, 4.0f}, {5.25f, -7.0f, 6.33f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); @@ -2336,14 +2333,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, 2DPlusScalarF32) { XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32) { // Test simple broadcasting of a R1F32 over R2F32. The vector's size matches // only dim 0 of the matrix. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({20.0f, 40.0f, 60.0f}); // clang-format off auto m = builder.ConstantR2({ {-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); // clang-format on - auto add = builder.Add(v, m, /*broadcast_dimensions=*/{1}); + builder.Add(v, m, /*broadcast_dimensions=*/{1}); Array2D expected_array( {{17.5f, 43.14f, 61.0f}, {22.25f, 30.0f, 63.33f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); @@ -2369,10 +2366,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Eq) { XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Ne) { // Test broadcasting in Ne comparison. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({42, 73}); auto m = builder.ConstantR2({{42, 73}, {42, 52}}); - auto cmp = builder.Ne(v, m, /*broadcast_dimensions=*/{1}); + builder.Ne(v, m, /*broadcast_dimensions=*/{1}); const string expected = R"(pred[2,2] { { 00 }, @@ -2383,10 +2380,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Ne) { XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Ge) { // Test broadcasting in Ge comparison. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({1, 2, 3, 4}); auto m = builder.ConstantR2({{1, 0, 5, 6}, {42, 52, 10, 4}}); - auto cmp = builder.Ge(v, m, /*broadcast_dimensions=*/{1}); + builder.Ge(v, m, /*broadcast_dimensions=*/{1}); const string expected = R"(pred[2,4] { { 1100 }, @@ -2397,10 +2394,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Ge) { XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Gt) { // Test broadcasting in Gt comparison. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({1, 2, 3, 4}); auto m = builder.ConstantR2({{1, 0, 5, 6}, {42, 52, 10, 4}}); - auto cmp = builder.Gt(v, m, /*broadcast_dimensions=*/{1}); + builder.Gt(v, m, /*broadcast_dimensions=*/{1}); const string expected = R"(pred[2,4] { { 0100 }, @@ -2411,10 +2408,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Gt) { XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Le) { // Test broadcasting in Le comparison. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({1, 2, 3, 4}); auto m = builder.ConstantR2({{1, 0, 5, 6}, {42, 52, 10, 4}}); - auto cmp = builder.Le(v, m, /*broadcast_dimensions=*/{1}); + builder.Le(v, m, /*broadcast_dimensions=*/{1}); const string expected = R"(pred[2,4] { { 1011 }, @@ -2425,10 +2422,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Le) { XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Lt) { // Test broadcasting in Lt comparison. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({1, 2, 3, 4}); auto m = builder.ConstantR2({{1, 0, 5, 6}, {42, 52, 10, 4}}); - auto cmp = builder.Lt(v, m, /*broadcast_dimensions=*/{1}); + builder.Lt(v, m, /*broadcast_dimensions=*/{1}); const string expected = R"(pred[2,4] { { 0011 }, @@ -2440,24 +2437,24 @@ XLA_TEST_F(ArrayElementwiseOpTest, Compare1DTo2DS32Lt) { XLA_TEST_F(ArrayElementwiseOpTest, Mul2Dby1DF32) { // Test simple broadcasting of a R1F32 over R2F32 when the order of binary op // arguments is reversed. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto m = builder.ConstantR2({{1.5f, 2.5f, 3.5f}, {4.5f, 5.5f, 6.5f}}); auto v = builder.ConstantR1({2.0f, 4.0f, 6.0f}); - auto add = builder.Mul(m, v, /*broadcast_dimensions=*/{1}); + builder.Mul(m, v, /*broadcast_dimensions=*/{1}); Array2D expected_array({{3.0f, 10.0f, 21.0f}, {9.0f, 22.0f, 39.0f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo2DWithDegenerateDim1) { // Tests broadcasting for arrays with degenerate (size == 1) dimensions. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // m's shape in XLA notation is {3, 2} // md's shape in XLA notation is {3, 1} // The result has shape {3, 2}, where md is broadcast over m auto m = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto md = builder.ConstantR2({{10.0f, 20.0f, 30.0f}}); - auto add = builder.Add(m, md); + builder.Add(m, md); Array2D expected_array( {{7.5f, 23.14f, 31.0f}, {12.25f, 10.0f, 33.33f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); @@ -2465,14 +2462,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo2DWithDegenerateDim1) { XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo2DWithDegenerateDim0) { // Tests broadcasting for arrays with degenerate (size == 1) dimensions. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // m's shape in XLA notation is {3, 2} // md's shape in XLA notation is {1, 2} // The result has shape {3, 2}, where md is broadcast over m auto m = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto md = builder.ConstantR2({{10.0f}, {20.0f}}); - auto add = builder.Add(m, md); + builder.Add(m, md); Array2D expected_array( {{7.5f, 13.14f, 11.0f}, {22.25f, 10.0f, 23.33f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); @@ -2483,13 +2480,13 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add2DsWithDegenerateDimsOuterProduct) { // effectively creates an "outer product" operation. // This is taken from the Numpy docs example at: // http://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // a's shape in XLA notation is {1, 4} // b's shape in XLA notation is {3, 1} // The result has shape {3, 4}. auto a = builder.ConstantR2({{0.0f}, {10.0f}, {20.0f}, {30.0f}}); auto b = builder.ConstantR2({{1.0f, 2.0f, 3.0f}}); - auto add = builder.Add(a, b); + builder.Add(a, b); Array2D expected_array({{1.0f, 2.0f, 3.0f}, {11.0f, 12.0f, 13.0f}, {21.0f, 22.0f, 23.0f}, @@ -2500,10 +2497,10 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add2DsWithDegenerateDimsOuterProduct) { XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32TwoWaysOver1) { // Add together a (2,2) array and a (2) array, using dimension 0 for // broadcasting (though there are two ways to broadcast these shapes). - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({20.0f, 40.0f}); auto m = builder.ConstantR2({{10.0f, 50.0f}, {77.0f, 88.0f}}); - auto add = builder.Add(v, m, /*broadcast_dimensions=*/{1}); + builder.Add(v, m, /*broadcast_dimensions=*/{1}); Array2D expected_array({{30.0f, 90.0f}, {97.0f, 128.0f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); } @@ -2511,17 +2508,17 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32TwoWaysOver1) { XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo2DF32TwoWaysOver0) { // Add together a (2,2) array and a (2) array, using dimension 1 for // broadcasting (though there are two ways to broadcast these shapes). - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto v = builder.ConstantR1({20.0f, 40.0f}); auto m = builder.ConstantR2({{10.0f, 50.0f}, {77.0f, 88.0f}}); - auto add = builder.Add(v, m, /*broadcast_dimensions=*/{0}); + builder.Add(v, m, /*broadcast_dimensions=*/{0}); Array2D expected_array({{30.0f, 70.0f}, {117.0f, 128.0f}}); ComputeAndCompareR2(&builder, expected_array, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, 3DBinaryOpF32s) { // Binary add of two R3s together - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array3D a_3d({{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}); auto a = builder.ConstantR3FromArray3D(a_3d); @@ -2529,7 +2526,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, 3DBinaryOpF32s) { Array3D b_3d({{{2.0f, 4.0f}, {6.0f, 8.0f}, {10.0f, 12.0f}}, {{14.0f, 16.0f}, {18.0f, 20.0f}, {22.0f, 24.0f}}}); auto b = builder.ConstantR3FromArray3D(b_3d); - auto add = builder.Add(a, b); + builder.Add(a, b); Array3D expected_3d( {{{3.0f, 6.0f}, {9.0f, 12.0f}, {15.0f, 18.0f}}, @@ -2540,7 +2537,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, 3DBinaryOpF32s) { XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver2) { // Add together a (2, 3, 2) array with a (2) array, using dimension 0 for // broadcasting (though there are two ways to broadcast these shapes). - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // clang-format off Array3D a_3d({ {{1.0f, 2.0f}, @@ -2553,7 +2550,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver2) { // clang-format on auto a = builder.ConstantR3FromArray3D(a_3d); auto v = builder.ConstantR1({10.0f, 20.0f}); - auto add = builder.Add(a, v, /*broadcast_dimensions=*/{2}); + builder.Add(a, v, /*broadcast_dimensions=*/{2}); Array3D expected_3d( {{{11.0f, 22.0f}, {13.0f, 24.0f}, {15.0f, 26.0f}}, @@ -2564,7 +2561,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver2) { XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver0) { // Add together a (2, 3, 2) array with a (2) array, using dimension 2 for // broadcasting (though there are two ways to broadcast these shapes). - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // clang-format off Array3D a_3d({ {{1.0f, 2.0f}, @@ -2577,7 +2574,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver0) { // clang-format on auto a = builder.ConstantR3FromArray3D(a_3d); auto v = builder.ConstantR1({10.0f, 20.0f}); - auto add = builder.Add(a, v, /*broadcast_dimensions=*/{0}); + builder.Add(a, v, /*broadcast_dimensions=*/{0}); // clang-format off Array3D expected_3d({ @@ -2595,7 +2592,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add1DTo3DTwoWaysOver0) { XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo3D) { // Add together a (2, 3, 2) array with a (3, 2) array, using dimensions {1,2} // for broadcasting. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); // clang-format off Array3D a_3d({ {{1.0f, 2.0f}, @@ -2610,7 +2607,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo3D) { {10.0f, 20.0f, 30.0f}, {40.0f, 50.0f, 60.0f}, }); - auto add = builder.Add(a, m, /*broadcast_dimensions=*/{0, 1}); + builder.Add(a, m, /*broadcast_dimensions=*/{0, 1}); Array3D expected_3d({ {{11.0f, 12.0f}, @@ -2627,7 +2624,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, Add2DTo3D) { XLA_TEST_F(ArrayElementwiseOpTest, CompareGtR3F32sWithDegenerateDim2) { // Comparison between two 3D arrays of compatible shapes: // (2, 3, 2) and (2, 3, 1): expected to produce a (2, 3, 2) shape of PREDs. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Array3D a_3d({{{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}}, {{7.0f, 8.0f}, {9.0f, 10.0f}, {11.0f, 12.0f}}}); auto a = builder.ConstantR3FromArray3D(a_3d); @@ -2651,7 +2648,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGtR3F32sWithDegenerateDim2) { } XLA_TEST_F(ArrayElementwiseOpTest, 4DBinaryOpF32s) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr> operand_a_4d(new Array4D(2, 3, 4, 5)); std::unique_ptr> operand_b_4d(new Array4D(2, 3, 4, 5)); @@ -2672,13 +2669,13 @@ XLA_TEST_F(ArrayElementwiseOpTest, 4DBinaryOpF32s) { auto a = builder.ConstantR4FromArray4D(*operand_a_4d); auto b = builder.ConstantR4FromArray4D(*operand_b_4d); - auto add = builder.Add(a, b); + builder.Add(a, b); ComputeAndCompareR4(&builder, *expected_4d, {}, error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, R4PlusR1InDim1) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr> operand_a_4d(new Array4D(2, 3, 4, 5)); std::unique_ptr> expected_4d(new Array4D(2, 3, 4, 5)); @@ -2700,7 +2697,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, R4PlusR1InDim1) { auto a = builder.ConstantR4FromArray4D(*operand_a_4d); auto b = builder.ConstantR1(operand_b_1d); - auto add = builder.Add(a, b, {1}); + builder.Add(a, b, {1}); ComputeAndCompareR4(&builder, *expected_4d, {}, error_spec_); } @@ -2715,7 +2712,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, R4_16x16x2x2_Plus_R1_16) { std::vector r1(d1); std::iota(r1.begin(), r1.end(), 1.0); - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::unique_ptr a_literal = Literal::CreateR4FromArray4DWithLayout( r4, LayoutUtil::MakeLayout({0, 1, 2, 3})); auto a = builder.ConstantLiteral(*a_literal); @@ -2736,11 +2733,11 @@ XLA_TEST_F(ArrayElementwiseOpTest, R4_16x16x2x2_Plus_R1_16) { // Show that we can't add two opaques. XLA_TEST_F(ArrayElementwiseOpTest, CannotAddOpaques) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto shape = ShapeUtil::MakeOpaqueShape(); auto x = builder.Parameter(0, shape, "x"); - auto concatenated = builder.Add(x, x); - StatusOr computation_status = builder.Build(); + builder.Add(x, x); + auto computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT(computation_status.status().ToString(), ::testing::ContainsRegex( @@ -2748,12 +2745,12 @@ XLA_TEST_F(ArrayElementwiseOpTest, CannotAddOpaques) { } XLA_TEST_F(ArrayElementwiseOpTest, IdentityBroadcastOfSameRankIsAllowed) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto b = builder.ConstantR2({{-1.5f, 8.14f, 42.0}, {-1.0f, -4.0f, 5.55f}}); - auto add = builder.Add(a, b, /*broadcast_dimensions=*/{0, 1}); + builder.Add(a, b, /*broadcast_dimensions=*/{0, 1}); Array2D expected_array( {{-4.0f, 11.28f, 43.0f}, {1.25f, -14.0f, 8.88f}}); @@ -2761,14 +2758,14 @@ XLA_TEST_F(ArrayElementwiseOpTest, IdentityBroadcastOfSameRankIsAllowed) { } XLA_TEST_F(ArrayElementwiseOpTest, NonIdentityBroadcastOfSameRankIsDisallowed) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto a = builder.ConstantR2({{-2.5f, 3.14f, 1.0f}, {2.25f, -10.0f, 3.33f}}); auto b = builder.ConstantR2({{-1.5f, 8.14f, 42.0}, {-1.0f, -4.0f, 5.55f}}); - auto add = builder.Add(a, b, /*broadcast_dimensions=*/{1, 0}); + builder.Add(a, b, /*broadcast_dimensions=*/{1, 0}); - StatusOr computation_status = builder.Build(); + auto computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT(computation_status.status().error_message(), ::testing::ContainsRegex("must.*be the identity")); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index d9bd1ce6eb..ec95a68ead 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -139,14 +139,31 @@ std::unique_ptr ClientLibraryTestBase::ExecuteAndTransferOrDie( return ExecuteAndTransfer(builder, arguments).ConsumeValueOrDie(); } +string ClientLibraryTestBase::ExecuteToString( + XlaBuilder* builder, tensorflow::gtl::ArraySlice arguments) { + auto computation_status = builder->Build(); + if (!computation_status.ok()) { + return computation_status.status().ToString(); + } + auto computation = computation_status.ConsumeValueOrDie(); + + auto result = + client_->ExecuteAndTransfer(computation, arguments, &execution_options_); + if (!result.ok()) { + return result.status().ToString(); + } else { + return result.ValueOrDie()->ToString(); + } +} + string ClientLibraryTestBase::ExecuteToString( ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments) { - StatusOr computation_status = builder->Build(); + auto computation_status = builder->Build(); if (!computation_status.ok()) { return computation_status.status().ToString(); } - Computation computation = computation_status.ConsumeValueOrDie(); + auto computation = computation_status.ConsumeValueOrDie(); auto result = client_->ExecuteAndTransfer(computation, arguments, &execution_options_); diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index c39597c4e1..5ff200be03 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -124,6 +124,8 @@ class ClientLibraryTestBase : public ::testing::Test { // Run a computation and return its value as a string. If an error // occurs, then instead return the error as a string. + string ExecuteToString(XlaBuilder* builder, + tensorflow::gtl::ArraySlice arguments); string ExecuteToString(ComputationBuilder* builder, tensorflow::gtl::ArraySlice arguments); -- GitLab From 071e32d7334b0ff6452111c83ae0b139f28b36ff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 19:29:48 -0700 Subject: [PATCH 523/960] Add "distribute" argument to tf.estimator.RunConfig() in anticipation of upcoming DistributionStrategy support in Estimator. PiperOrigin-RevId: 190563074 --- tensorflow/python/estimator/run_config.py | 20 +++++++++++++++---- .../tensorflow.estimator.-run-config.pbtxt | 6 +++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 820fda7765..141eaeff64 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -43,7 +43,8 @@ _DEFAULT_REPLACEABLE_LIST = [ 'session_config', 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', - 'log_step_count_steps' + 'log_step_count_steps', + 'distribute' ] _SAVE_CKPT_ERR = ( @@ -300,7 +301,8 @@ class RunConfig(object): session_config=None, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, - log_step_count_steps=100): + log_step_count_steps=100, + distribute=None): """Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, @@ -424,7 +426,10 @@ class RunConfig(object): the feature. log_step_count_steps: The frequency, in number of global steps, that the global step/sec and the loss will be logged during training. - + distribute: an optional instance of + `tf.contrib.distribute.DistributionStrategy`. If specified, + then Estimator will distribute the user's model according to the policy + specified by that strategy. Raises: ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs` @@ -460,7 +465,8 @@ class RunConfig(object): session_config=session_config, keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, - log_step_count_steps=log_step_count_steps) + log_step_count_steps=log_step_count_steps, + distribute=distribute) self._init_distributed_setting_from_environment_var(tf_config) @@ -671,6 +677,12 @@ class RunConfig(object): """Returns the platform defined (in TF_CONFIG) service dict.""" return self._service + @property + def distribute(self): + """Returns the optional `tf.contrib.distribute.DistributionStrategy` object. + """ + return self._distribute + def replace(self, **kwargs): """Returns a new instance of `RunConfig` replacing specified properties. diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt index 091b1be0c8..759ff752b0 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt @@ -6,6 +6,10 @@ tf_class { name: "cluster_spec" mtype: "" } + member { + name: "distribute" + mtype: "" + } member { name: "evaluation_master" mtype: "" @@ -80,7 +84,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\'], " + argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'distribute\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\'], " } member_method { name: "replace" -- GitLab From 41982886efaa2ab9cc75d0d5ab6c27368468d061 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 19:30:26 -0700 Subject: [PATCH 524/960] Fix inconsistency in run_cond. PiperOrigin-RevId: 190563114 --- .../contrib/autograph/converters/ifexp.py | 2 +- .../autograph/utils/multiple_dispatch.py | 11 +++++++++-- .../autograph/utils/multiple_dispatch_test.py | 17 ++++++++--------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/autograph/converters/ifexp.py b/tensorflow/contrib/autograph/converters/ifexp.py index aff94d2b79..bb0c0a36a7 100644 --- a/tensorflow/contrib/autograph/converters/ifexp.py +++ b/tensorflow/contrib/autograph/converters/ifexp.py @@ -27,7 +27,7 @@ class IfExp(transformer.Base): def visit_IfExp(self, node): template = """ - autograph_utils.run_cond(test, lambda: body, lambda: orelse) + autograph_utils.run_cond(test, lambda: (body,), lambda: (orelse,)) """ desugared_ifexp = templates.replace_as_expression( template, test=node.test, body=node.body, orelse=node.orelse) diff --git a/tensorflow/contrib/autograph/utils/multiple_dispatch.py b/tensorflow/contrib/autograph/utils/multiple_dispatch.py index b756ccfaee..47049255f3 100644 --- a/tensorflow/contrib/autograph/utils/multiple_dispatch.py +++ b/tensorflow/contrib/autograph/utils/multiple_dispatch.py @@ -55,10 +55,17 @@ def run_cond(condition, true_fn, false_fn): def py_cond(condition, true_fn, false_fn): + """Functional version of Python's conditional.""" if condition: - return true_fn() + results = true_fn() else: - return false_fn() + results = false_fn() + + # The contract for the branch functions is to return tuples, but they should + # be collapsed to a single element when there is only one output. + if len(results) == 1: + return results[0] + return results def run_while(cond_fn, body_fn, init_args): diff --git a/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py b/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py index 8c7daa6ded..e6a41bb416 100644 --- a/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py +++ b/tensorflow/contrib/autograph/utils/multiple_dispatch_test.py @@ -56,20 +56,19 @@ class MultipleDispatchTest(test.TestCase): self.assertFalse(should_be_false2) def test_run_cond_python(self): - true_fn = lambda: 2.0 - false_fn = lambda: 3.0 - self.assertEqual(multiple_dispatch.run_cond(True, true_fn, false_fn), 2.0) - self.assertEqual(multiple_dispatch.run_cond(False, true_fn, false_fn), 3.0) + true_fn = lambda: (2,) + false_fn = lambda: (3,) + self.assertEqual(multiple_dispatch.run_cond(True, true_fn, false_fn), 2) + self.assertEqual(multiple_dispatch.run_cond(False, true_fn, false_fn), 3) def test_run_cond_tf(self): - - true_fn = lambda: constant([2.0]) - false_fn = lambda: constant([3.0]) + true_fn = lambda: (constant(2),) + false_fn = lambda: (constant(3),) with Session() as sess: out = multiple_dispatch.run_cond(constant(True), true_fn, false_fn) - self.assertEqual(sess.run(out), 2.0) + self.assertEqual(sess.run(out), 2) out = multiple_dispatch.run_cond(constant(False), true_fn, false_fn) - self.assertEqual(sess.run(out), 3.0) + self.assertEqual(sess.run(out), 3) def test_run_while_python(self): cond_fn = lambda x, t, s: x > t -- GitLab From 591b6a7709fa05d490b0c718253492dfad35557f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 19:46:51 -0700 Subject: [PATCH 525/960] Include additional cases for evaluating the fqn annotation. PiperOrigin-RevId: 190564036 --- .../contrib/autograph/pyct/static_analysis/live_values.py | 8 +++++++- .../autograph/pyct/static_analysis/live_values_test.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/live_values.py b/tensorflow/contrib/autograph/pyct/static_analysis/live_values.py index 5f813355e6..53ae154590 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/live_values.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/live_values.py @@ -59,9 +59,15 @@ class LiveValueResolver(transformer.Base): obj = self.context.namespace[node.id] anno.setanno(node, 'live_val', obj) if hasattr(obj, '__name__'): + anno.setanno(node, 'fqn', (obj.__name__,)) + elif hasattr(obj, '__class__'): + obj_class = obj.__class__ + anno.setanno(node, 'fqn', + (obj_class.__module__, obj_class.__name__)) + else: # If the symbol value is for example a primitive, then it will not # have a name. - anno.setanno(node, 'fqn', (obj.__name__,)) + pass else: pass # TODO(mdan): Should we raise an error here? diff --git a/tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py b/tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py index b66439624e..69e428bde1 100644 --- a/tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py +++ b/tensorflow/contrib/autograph/pyct/static_analysis/live_values_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six + from tensorflow.contrib.autograph.pyct import anno from tensorflow.contrib.autograph.pyct import context from tensorflow.contrib.autograph.pyct import parser @@ -75,7 +77,11 @@ class LiveValuesResolverTest(test.TestCase): node = self._parse_and_analyze(test_fn, {'a': True}) retval_node = node.body[0].body[0].value - self.assertFalse(anno.hasanno(retval_node, 'fqn')) + if six.PY2: + self.assertEqual( + anno.getanno(retval_node, 'fqn'), ('__builtin__', 'bool')) + else: + self.assertEqual(anno.getanno(retval_node, 'fqn'), ('builtins', 'bool')) def test_namespace(self): -- GitLab From c6bc514ffcc601abb7018721c2518cf91a39eeb1 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Mon, 26 Mar 2018 19:53:09 -0700 Subject: [PATCH 526/960] Updating documentation of supported ops. PiperOrigin-RevId: 190564365 --- .../lite/g3doc/tf_ops_compatibility.md | 183 +++++++++++++++++- 1 file changed, 175 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index b1bbb7c670..61ea5231e3 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -30,13 +30,18 @@ quantized training is necessary before conversion. ## Data Format and Broadcasting At the moment TensorFlow Lite supports only TensorFlow's "NHWC" format, and -broadcasting in operations like tf.add and tf.mul is generally not supported. +broadcasting is only support in a limited number of ops (tf.add, tf.mul, tf.sub, +and tf.div). ## Compatible Operations The following TensorFlow operations are usually mapped to their TensorFlow Lite counterparts: +* [tf.batch_to_space_nd](https://www.tensorflow.org/api_docs/python/tf/batch_to_space_nd) - + *as long as the input tensor is 4D (1 batch + 2 spatial + 1 other) and the + crops attribute is not used* +* [tf.exp](https://www.tensorflow.org/api_docs/python/tf/exp) * [tf.matmul](https://www.tensorflow.org/api_docs/python/tf/matmul) - *as long as the second argument is constant and transposition is not used* * [tf.nn.avg_pool](https://www.tensorflow.org/api_docs/python/tf/nn/avg_pool) @@ -47,12 +52,30 @@ counterparts: * [tf.nn.l2_normalize](https://www.tensorflow.org/api_docs/python/tf/nn/l2_normalize) - *as long as normalization is done along the last dimension* * [tf.nn.local_response_normalization](https://www.tensorflow.org/api_docs/python/tf/nn/local_response_normalization) +* [tf.nn.log_softmax](https://www.tensorflow.org/api_docs/python/tf/nn/log_softmax) - + *as long as axis is not provided* * [tf.nn.max_pool](https://www.tensorflow.org/api_docs/python/tf/nn/max_pool) * [tf.nn.softmax](https://www.tensorflow.org/api_docs/python/tf/nn/softmax) - *as long as tensors are 2D and axis is the last dimension* +* [tf.nn.top_k](https://www.tensorflow.org/api_docs/python/tf/nn/top_k) +* [tf.pad](https://www.tensorflow.org/api_docs/python/tf/pad) - *as long as + mode and constant_values are not used* +* [tf.reduce_mean](https://www.tensorflow.org/api_docs/python/tf/reduce_mean) - + *as long as the reduction_indices attribute is not used* * [tf.reshape](https://www.tensorflow.org/api_docs/python/tf/reshape) * [tf.sigmoid](https://www.tensorflow.org/api_docs/python/tf/sigmoid) +* [tf.space_to_batch_nd](https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd) - + *as long as the input tensor is 4D (1 batch + 2 spatial + 1 other)* * [tf.space_to_depth](https://www.tensorflow.org/api_docs/python/tf/space_to_depth) +* [tf.split](https://www.tensorflow.org/api_docs/python/tf/split) - *as long + as num is not provided and num_or_size_split contains number of splits as a + 0D tensor* +* [tf.squeeze](https://www.tensorflow.org/api_docs/python/tf/squeeze) - *as + long as axis is not provided* +* [tf.strided_slice](https://www.tensorflow.org/api_docs/python/tf/strided_slice) - + *as long as ellipsis_mask and new_axis_mask are not used* +* [tf.transpose](https://www.tensorflow.org/versions/master/api_docs/python/tf/transpose) - + *as long as conjugate is not used* ## Straightforward Conversions, Constant-Folding and Fusing @@ -91,7 +114,6 @@ Here is a list of TensorFlow operations that are usually removed from the graph: * [tf.shape](https://www.tensorflow.org/api_docs/python/tf/shape) * [tf.sqrt](https://www.tensorflow.org/api_docs/python/tf/sqrt) * [tf.square](https://www.tensorflow.org/api_docs/python/tf/square) -* [tf.squeeze](https://www.tensorflow.org/api_docs/python/tf/squeeze) * [tf.subtract](https://www.tensorflow.org/api_docs/python/tf/subtract) * [tf.tile](https://www.tensorflow.org/api_docs/python/tf/tile) * [tf.nn.batch_norm_with_global_normalization](https://www.tensorflow.org/api_docs/python/tf/nn/batch_norm_with_global_normalization) @@ -109,17 +131,11 @@ fused. TensorFlow operation not listed above are likely unsupported. Notably, the following common ops are not supported at the moment: -* [tf.batch_to_space_nd](https://www.tensorflow.org/api_docs/python/tf/batch_to_space_nd) * [tf.depth_to_space](https://www.tensorflow.org/api_docs/python/tf/depth_to_space) * [tf.floor](https://www.tensorflow.org/api_docs/python/tf/floor) * [tf.gather](https://www.tensorflow.org/api_docs/python/tf/gather) * [tf.image.resize_bilinear](https://www.tensorflow.org/api_docs/python/tf/image/resize_bilinear) -* [tf.pad](https://www.tensorflow.org/api_docs/python/tf/pad) -* [tf.reduce_mean](https://www.tensorflow.org/api_docs/python/tf/reduce_mean) * [tf.slice](https://www.tensorflow.org/api_docs/python/tf/slice) -* [tf.space_to_batch_nd](https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd) -* [tf.split](https://www.tensorflow.org/api_docs/python/tf/split) -* [tf.strided_slice](https://www.tensorflow.org/api_docs/python/tf/strided_slice) * [tf.tanh](https://www.tensorflow.org/api_docs/python/tf/tanh) ## TensorFlow Lite Operations @@ -160,6 +176,20 @@ Options { } ``` +**BATCH_TO_SPACE_ND** + +``` +Inputs { + 0: 4D tensor + 1: 1D tensor + 2: 2D tensor +} +Outputs { + 0: tensor rearranged using block_shape. See tf.batch_to_space_nd for + details. +} +``` + **CONCATENATION** ``` @@ -213,6 +243,17 @@ Options { } ``` +**EXP** + +``` +Inputs { + 0: tensor +} +Outputs { + 0: result of computing element-wise exponential of the input tensor +} +``` + **FULLY_CONNECTED** ``` @@ -289,6 +330,17 @@ Outputs { } ``` +**LOG_SOFTMAX** + +``` +Inputs { + 0: tensor +} +Outputs { + 0: tensor equivalent to logits - log(reduce_sum(exp(logits), -1)) +} +``` + **MAX_POOL_2D** ``` @@ -322,6 +374,34 @@ Options { } ``` +**PAD** + +``` +Inputs { + 0: tensor + 1: tensor +} +Outputs { + 0: tensor where additional values are added before and after the contents of + each dimension +} +``` + +**MEAN (tf.reduce_mean)** + +``` +Inputs { + 0: tensor + 1: tensor +} +Outputs { + 0: tensor containing the mean of the elements +} +Options { + keep_dims: whether to retain reduced dimensions +} +``` + **RELU** ``` @@ -399,6 +479,93 @@ Options { } ``` +**SPACE_TO_BATCH_ND** + +``` +Inputs { + 0: 4D tensor + 1: 1D tensor + 2: 2D tensor +} +Outputs { + 0: a tensor rearranged using block_shape. See tf.space_to_batch_nd for + details. +} +``` + +**SPLIT** + +``` +Inputs { + 0: 0D tensor (axis) + 1: tensor (input) +} +Outputs { + 0-N: subtensors built from the input tensors +} +Options { + num_splits: Specifies number of outputs +} +``` + +**SQUEEZE** + +``` +Inputs { + 0: tensor +} +Outputs { + 0: tensor without any dimensions of size 1 +} +Options { + squeeze_dims +} +``` + +**STRIDED_SLICE** + +``` +Inputs { + 0: tensor + 1: 1D tensor + 2: 1D tensor + 3: 1D tensor +} +Outputs { + 0: slice of the input tensor of the given size +} +Options { + begin_mask: mask for begin indicies + end_mask: mask for end indices + shrink_axis_mask: mask that indicates which dimensions to remove +} +``` + +**TOP_K** + +``` +Inputs { + 0: tensor + 1: OD tensor +} +Outputs { + 0: k largest element along each last dimensional slice + 1: indicies of values within the last dimension of the input ensor +} +``` + +**TRANSPOSE** + +``` +Inputs { + 0: tensor + 1: tensor +} +Outputs { + 0: tensor permuted according to perm +} +``` + And these are TensorFlow Lite operations that are present but not ready for custom models yet: -- GitLab From 63cfd006fa1e848daeaf9ac74e2c9f8c42e401b1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 20:01:35 -0700 Subject: [PATCH 527/960] Do not assume Attribute nodes always have a QN - it may be missing for attributes of dynamic objects, like function calls. PiperOrigin-RevId: 190564784 --- tensorflow/contrib/autograph/pyct/ast_util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/autograph/pyct/ast_util.py b/tensorflow/contrib/autograph/pyct/ast_util.py index 5a41b5e4a9..4f76a69522 100644 --- a/tensorflow/contrib/autograph/pyct/ast_util.py +++ b/tensorflow/contrib/autograph/pyct/ast_util.py @@ -84,7 +84,10 @@ class SymbolRenamer(gast.NodeTransformer): return self._process(node) def visit_Attribute(self, node): - return self._process(node) + if anno.hasanno(node, anno.Basic.QN): + return self._process(node) + # Attributes of dynamic objects will not have a QN. + return self.generic_visit(node) def rename_symbols(node, name_map): -- GitLab From 8bcc574711b8770e8341f77d1a9b8370d72d7477 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 20:02:24 -0700 Subject: [PATCH 528/960] Include subscripts in the list of nodes accepted for replacement. PiperOrigin-RevId: 190564824 --- tensorflow/contrib/autograph/pyct/templates.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/contrib/autograph/pyct/templates.py b/tensorflow/contrib/autograph/pyct/templates.py index fb99e0d4e5..baf7923fff 100644 --- a/tensorflow/contrib/autograph/pyct/templates.py +++ b/tensorflow/contrib/autograph/pyct/templates.py @@ -95,6 +95,15 @@ class ReplaceTransformer(gast.NodeTransformer): self._check_inner_children_have_context(e) for e in node.values: self._check_inner_children_have_context(e) + elif isinstance(node, gast.Subscript): + self._check_inner_children_have_context(node.value) + self._check_inner_children_have_context(node.slice) + elif isinstance(node, gast.Slice): + self._check_inner_children_have_context(node.lower) + if node.upper: + self._check_inner_children_have_context(node.upper) + if node.step: + self._check_inner_children_have_context(node.step) elif isinstance(node, gast.Name): self._check_has_context(node) elif isinstance(node, (gast.Str, gast.Num)): @@ -127,6 +136,9 @@ class ReplaceTransformer(gast.NodeTransformer): self._check_inner_children_have_context(e) for e in node.values: self._check_inner_children_have_context(e) + elif isinstance(node, gast.Subscript): + self._set_inner_child_context(node.value, ctx) + self._check_inner_children_have_context(node.slice) elif isinstance(node, (gast.Str, gast.Num)): pass else: -- GitLab From 2f208bb73054109390ef9565e8038c14329f73ad Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Mar 2018 20:18:03 -0700 Subject: [PATCH 529/960] [XLA] Add tests for R1 PRED Slices. PiperOrigin-RevId: 190566036 --- tensorflow/compiler/xla/tests/slice_test.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index fe36df160d..a14a365bd0 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -193,7 +193,9 @@ class SliceR1Test : public ClientLibraryTestBase, protected: template void Run(const R1Spec& spec) { - std::vector input(spec.input_dim0); + // This can't be an std::vector, since you can't grab an ArraySlice of a + // vector. + tensorflow::gtl::InlinedVector input(spec.input_dim0); std::iota(input.begin(), input.end(), NativeT()); ComputationBuilder builder(client_, TestName()); @@ -201,7 +203,8 @@ class SliceR1Test : public ClientLibraryTestBase, builder.Slice(original, {spec.slice_start}, {spec.slice_limit}, {spec.slice_stride}); - std::vector expected; + // Ditto. + tensorflow::gtl::InlinedVector expected; for (int i = spec.slice_start; i < spec.slice_limit; i += spec.slice_stride) { expected.push_back(i); @@ -230,6 +233,8 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } +XLA_TEST_P(SliceR1Test, DoIt_PRED) { Run(GetParam()); } + // Tests for R1 slice ops. // The format for each testcase is {input size, start, limit, stride}. // clang-format off -- GitLab From 574303015eb2b6fc4e002f5d2400c3e7f512ae82 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 21:30:56 -0700 Subject: [PATCH 530/960] Add class DistributionStrategy to python/training/, though not part of the exposed TF API. PiperOrigin-RevId: 190570489 --- tensorflow/python/BUILD | 13 + tensorflow/python/training/distribute.py | 1118 +++++++++++++++++ tensorflow/python/training/distribute_test.py | 104 ++ 3 files changed, 1235 insertions(+) create mode 100644 tensorflow/python/training/distribute.py create mode 100644 tensorflow/python/training/distribute_test.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 30ecc477f2..20d7e81045 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2914,6 +2914,7 @@ py_library( ":variables", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", + "//tensorflow/python/ops/losses", "//third_party/py/numpy", "@six_archive//:six", ], @@ -2943,6 +2944,18 @@ py_test( ], ) +py_test( + name = "distribute_test", + size = "small", + srcs = ["training/distribute_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":client_testlib", + ":training", + ":variable_scope", + ], +) + py_test( name = "evaluation_test", size = "small", diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py new file mode 100644 index 0000000000..9261e13230 --- /dev/null +++ b/tensorflow/python/training/distribute.py @@ -0,0 +1,1118 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Class DistributionStrategy, TowerContext, and supporting APIs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import threading + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops.losses import losses_impl +from tensorflow.python.training import device_util +from tensorflow.python.util import nest + + +# ------------------------------------------------------------------------------ +# Internal API for setting the current thread mode as being either in a +# tower or cross-tower context for a particular distribution strategy. + + +class _ThreadMode(object): + + def __init__(self, dist, cross, tower): + self.distribution_strategy = dist + self.cross_tower_context = cross + self.tower_context = tower + + +class _CrossTowerThreadMode(_ThreadMode): + + def __init__(self, distribution_strategy): + _ThreadMode.__init__( + self, distribution_strategy, distribution_strategy, None) + + +class _InTowerThreadMode(_ThreadMode): + + def __init__(self, tower_ctx): + _ThreadMode.__init__( + self, tower_ctx.distribution_strategy, None, tower_ctx) + + +_per_thread_mode = threading.local() + + +def _push_per_thread_mode(context): + if not hasattr(_per_thread_mode, "stack"): + _per_thread_mode.stack = [] + _per_thread_mode.stack.append(context) + + +def _pop_per_thread_mode(): + _per_thread_mode.stack.pop(-1) + + +class _DefaultTowerThreadMode(_ThreadMode): + """Type of default value returned by `_get_per_thread_mode()`. + + Used when the thread-local stack is empty. + """ + + def __init__(self): + # _default_distribution_strategy and _default_tower_context are + # defined at the bottom of this file. + _ThreadMode.__init__( + self, _default_distribution_strategy, None, _default_tower_context) + + +def _get_per_thread_mode(): + try: + return _per_thread_mode.stack[-1] + except (AttributeError, IndexError): + # _default_tower_mode is defined at the bottom of this file. + return _default_tower_mode + + +# ------------------------------------------------------------------------------ +# Context tracking whether in a distribution.update() or .update_non_slot() +# call. + + +_update_device = threading.local() + + +def get_update_device(): + try: + return _update_device.current + except AttributeError: + return None + + +class UpdateContext(object): + """Context manager when you are in `update()` or `update_non_slot()`.""" + + def __init__(self, device): + self._device = device + self._old_device = None + + def __enter__(self): + self._old_device = get_update_device() + _update_device.current = self._device + + def __exit__(self, exception_type, exception_value, traceback): + del exception_type, exception_value, traceback + _update_device.current = self._old_device + + +# ------------------------------------------------------------------------------ +# Public API for accessing the current thread mode + + +def get_tower_context(): + """Returns the current TowerContext or None. + + Note that execution: + 1. starts in the default (single-tower) tower context; + 2. switches to cross-tower context when entering a + `with DistributionStrategy.scope():` block; + 3. switches to a (non-default) tower context inside + `call_for_each_tower(fn, ...)`; + 4. if `fn` calls `get_tower_context()->merge_call(merge_fn, ...)`, then + inside `merge_fn` you are back in the cross-tower context. + + Note that you can also go directly from step 1 to 4 to switch to a + cross-tower context for the default `DistributionStrategy`. You may + also switch from the cross-tower context of 4 to a tower context by + calling `call_for_each_tower()`, jumping back to step 3. + + Most `DistributionStrategy` methods may only be executed in + a cross-tower context, in a tower context you should use the + `TowerContext` API instead. + + Returns: + The current `TowerContext` object when in a tower context scope, else None. + + Exactly one of `get_tower_context()` and `get_cross_tower_context()` + will return None in a particular block. + """ + return _get_per_thread_mode().tower_context + + +def get_cross_tower_context(): + """Returns the current DistributionStrategy if in a cross-tower context. + + Note that execution: + 1. starts in the default (single-tower) tower context; + 2. switches to cross-tower context when entering a + `with DistributionStrategy.scope():` block; + 3. switches to a (non-default) tower context inside + `call_for_each_tower(fn, ...)`; + 4. if `fn` calls `get_tower_context()->merge_call(merge_fn, ...)`, then + inside `merge_fn` you are back in the cross-tower context. + + Note that you can also go directly from step 1 to 4 to switch to a + cross-tower context for the default `DistributionStrategy`. You may + also switch from the cross-tower context of 4 to a tower context by + calling `call_for_each_tower()`, jumping back to step 3. + + Most `DistributionStrategy` methods may only be executed in + a cross-tower context. + + Returns: + Returns the current `DistributionStrategy` object in a cross-tower + context, or None. + + Exactly one of `get_tower_context()` and `get_cross_tower_context()` + will return None in a particular block. + """ + return _get_per_thread_mode().cross_tower_context + + +def get_distribution_strategy(): + """Returns the current `DistributionStrategy` object. + + Returns: + A `DistributionStrategy` object. Inside a + `with distribution_strategy.scope()` block, it returns + `distribution_strategy`, otherwise it returns the default + (single-tower) `DistributionStrategy` object. + """ + return _get_per_thread_mode().distribution_strategy + + +def has_distribution_strategy(): + """Return if there is a current non-default `DistributionStrategy`. + + Returns: + True if inside a `with distribution_strategy.scope():`. + """ + return get_distribution_strategy() is not _default_distribution_strategy + + +# ------------------------------------------------------------------------------ +# Public utility functions. + + +def get_loss_reduction(): + """Reduce `method_string` corresponding to the last loss reduction.""" + loss_reduction = ops.get_default_graph()._last_loss_reduction # pylint: disable=protected-access + if loss_reduction == losses_impl.Reduction.SUM: + return "sum" + return "mean" + + +# ------------------------------------------------------------------------------ +# Internal API for validating the current thread mode + + +def _require_cross_tower_context(distribution_strategy): + """Verify in cross-tower context for `distribution_strategy`.""" + context = _get_per_thread_mode() + if context.cross_tower_context is distribution_strategy: return + # We have an error to report, figure out the right message. + if context.distribution_strategy is not distribution_strategy: + if context.distribution_strategy is _default_distribution_strategy: + raise RuntimeError( + 'Need to be inside "with distribution_strategy.scope()" for %s' % + (distribution_strategy,)) + else: + raise RuntimeError( + "Mixing different DistributionStrategy objects: %s is not %s" % + (context.distribution_strategy, distribution_strategy)) + assert context.cross_tower_context is None + raise RuntimeError("Method requires being in cross-tower context, use " + "get_tower_context().merge_call()") + + +def require_tower_context(tower_ctx): + """Verify in `tower_ctx` tower context.""" + context = _get_per_thread_mode() + if context.tower_context is tower_ctx: return + # We have an error to report, figure out the right message. + if context.tower_context is None: + raise RuntimeError("Need to be inside `call_for_each_tower()`") + if context.distribution_strategy is tower_ctx.distribution_strategy: + # Two different TowerContexts with the same DistributionStrategy. + raise RuntimeError("Mismatching tower context.") + raise RuntimeError( + "Mismatching DistributionStrategy objects: %s is not %s." % + (context.distribution_strategy, tower_ctx.distribution_strategy)) + + +def _require_distribution_strategy_scope(distribution_strategy): + """Verify in a `distribution_strategy.scope()` in this thread.""" + context = _get_per_thread_mode() + if context.distribution_strategy is distribution_strategy: return + # We have an error to report, figure out the right message. + if context.distribution_strategy is _default_distribution_strategy: + raise RuntimeError( + 'Need to be inside "with distribution_strategy.scope()" for %s' % + (distribution_strategy,)) + else: + raise RuntimeError( + "Mixing different DistributionStrategy objects: %s is not %s" % + (context.distribution_strategy, distribution_strategy)) + + +# ------------------------------------------------------------------------------ +# Internal context managers used to implement the DistributionStrategy +# base class + + +class _CurrentDistributionContext(object): + """Context manager for setting the `DistributionStrategy` and var creator.""" + + def __init__(self, distribution_strategy, var_creator_scope): + self._context = _CrossTowerThreadMode(distribution_strategy) + self._var_creator_scope = var_creator_scope + + def __enter__(self): + _push_per_thread_mode(self._context) + self._var_creator_scope.__enter__() + return self._context.distribution_strategy + + def __exit__(self, exception_type, exception_value, traceback): + self._var_creator_scope.__exit__(exception_type, exception_value, traceback) + _pop_per_thread_mode() + + +class _SameScopeAgainContext(object): + """Trivial context manager when you are already in `scope()`.""" + + def __init__(self, distribution_strategy): + self._distribution_strategy = distribution_strategy + + def __enter__(self): + return self._distribution_strategy + + def __exit__(self, exception_type, exception_value, traceback): + del exception_type, exception_value, traceback + + +# ------------------------------------------------------------------------------ +# Base classes for all distribution strategies. + + +class DistributionStrategy(object): + """A list of devices with a state & compute distribution policy. + + The intent is that you can write an algorithm in a stylized way and + it will be usable with a variety of different `DistributionStrategy` + implementations. Each descendant will implement a different strategy + for distributing the algorithm across multiple devices/machines. + Furthermore, these changes can be hidden inside the specific layers + and other library classes that need special treatment to run in a + distributed setting, so that most users' model definition code can + run unchanged. The `DistributionStrategy` API works the same way + with eager and graph execution. + + First let's introduce a few high-level concepts: + + * _Data parallelism_ is where we run multiple copies of the model + on different slices of the input data. This is in contrast to + _model parallelism_ where we divide up a single copy of a model + across multiple devices. + Note: for now we only support data parallelism at this time, but + hope to add support for model parallelism in the future. + * A _tower_ is one copy of the model, running on one slice of the + input data. + * _Synchronous_, or more commonly _sync_, training is when the + updates from each tower are aggregated together before updating + the model variables. This is in contrast to _asynchronous_, or + _async_ training where each tower updates the model variables + independently. + * Furthermore you might run your computation on multiple devices + on one machine (or "host"), or on multiple machines/hosts. + If you are running on multiple machines, you might have a + single master host that drives computation across all of them, + or you might have multiple clients driving the computation + asynchronously. + + To distribute an algorithm, we might use some of these ingredients: + + * Parameter servers: These are hosts that hold a single copy of + parameters/variables. All towers that want to operate on a variable + retrieve it at the beginning of a step and send an update to be + applied at the end of the step. Can support either sync or async + training. + * Mirrored variables: These are variables that are copied to multiple + devices, where we keep the copies in sync by applying the same + updates to every copy. Normally would only be used with sync training. + * Reductions and Allreduce: A _reduction_ is some method of + aggregating multiple values into one value, like "sum" or + "mean". If doing sync training, we will perform a reduction on the + gradients to a parameter from each tower before applying the + update. Allreduce is an algorithm for performing a reduction on + values from multiple devices and making the result available on + all of those devices. + * TODO(josh11b): Future: partitioned variables + + We have then a few approaches we want to support: + * Code written (as if) with no knowledge of class `DistributionStrategy`. + This code should work as before, even if some of the layers, etc. + used by that code are written to be distribution-aware. This is done + by having a default `DistributionStrategy` that gives ordinary behavior, + and by default being in a single tower context. + * Ordinary model code that you want to run using a specific + `DistributionStrategy`. This can be as simple as: + + ``` + with my_distribution.scope(): + iterator = my_distribution.distribute_dataset(dataset) + # TODO(josh11b): iterator = dataset.make_one_shot_iterator() + tower_train_ops = my_distribution.call_for_each_tower( + tower_fn, iterator.get_next()) + train_op = tf.group(my_distribution.unwrap(tower_train_ops)) + ``` + + This takes an ordinary `dataset` and `tower_fn` and runs it + distributed using a particular `DistributionStrategy` in + `my_distribution`. Any variables created in `tower_fn` are created + using `my_distribution`'s policy, and library functions called by + `tower_fn` can use the `get_tower_context()` API to get enhanced + behavior in this case. + * If you want to write a distributed algorithm, you may use any of + the `DistributionStrategy` APIs inside a + `with my_distribution.scope():` block of code. + + Lower-level concepts: + + * Wrapped values: In order to represent values parallel across devices + (either towers or the devices associated with a particular value), we + wrap them in a "PerDevice" or "Mirrored" object that contains a map + from device to values. "PerDevice" is used when the value may be + different across devices, and "Mirrored" when the value are the same. + * Unwrapping and merging: Consider calling a function `fn` on + multiple devices, like `call_for_each_tower(fn, w)` with an + argument `w that is a wrapped value. This means `w` will have a + map taking tower device `d0` to `w0`, tower device `d1` to `w1`, + etc. `call_for_each_tower()` unwraps `w` before calling `fn`, so + it calls `fn(w0)` on `d0`, `fn(w1)` on `d1`, etc. It then merges + the return values from `fn()`, which can possibly result in + wrapped values. For example, let's say `fn()` returns a tuple with + three components: (x, a, v0) from tower 0, (x, b, v1) on tower 1, + etc. If the first component is the same object `x` from every + tower, then the first component of the merged result will also be + `x`. If the second component is different (`a`, `b`, ...) from + each tower, then the merged value will have a wrapped map from + tower device to the different values. If the third component is + the members of a mirrored variable (`v` maps `d0` to `v0, `d1` to + `v1`, etc.), then the merged result will be that mirrored variable + (`v`). + * Tower context vs. Cross-tower context: _tower context_ is when we + are in some function that is being called once for each tower. + Otherwise we are in cross-tower context, which is useful for + calling `DistributionStrategy` methods which operate across the + towers (like `reduce()`). By default you start in a tower context + (the default "single tower context") and then some methods can + switch you back and forth, as described below. + * Worker devices vs. parameter devices: Most tower computations will + happen on worker devices. Since we don't yet support model + parallelism, there will be one worker device per tower. When using + parameter servers (see above), the set of devices holding + variables may be different, otherwise the parameter devices might + match the worker devices. + * Non-slot devices are some subset of the parameter devices where we + put all the non-slot variables. We need to ensure that all + non-slot variables are allocated on the same device, or mirrored + across the same set of devices. If you have some variable you want + to colocate all the non-slot variables with, you can use + `colocate_vars_with()` to get the remaining non-slot variables on + the same device. Otherwise you can use `non_slot_devices()` to + pick a consistent set of devices to pass to both + `colocate_vars_with()` and `update_non_slot()`. + + When using a `DistributionStrategy`, we have a new type dimension + called _locality_ that says what values are compatible with which + APIs: + + * T: different value for each tower (e.g. a PerDevice-wrapped value). + * M: value is "mirrored" across towers, i.e. there are copies with the + same value on each tower (e.g. a Mirrored-wrapped value). + * V(`v`): value is "mirrored" across all the devices which have a + copy of variable `v` (also a Mirrored-wrapped value, but over + parameter devices instead of worker devices). + * N: value is "mirrored" across all the "non-slot" devices + + Rules for methods with respect to locality and single-tower vs. + cross-tower context: + + * `with d.scope()`: default single-tower context -> cross-tower context for + `d` + * `with d.colocate_vars_with(v)`: in tower/cross-tower context, variables + will be created with locality V(`v`). That is, if we write + `with d.colocate_vars_with(v1): v2 = tf.get_variable(...)`, then + `v2` will have locality V(`v1`), i.e. locality V(`v2`) will equal + V(`v1`). + * `with d.colocate_vars_with(d.non_slot_devices(...))`: in + tower/cross-tower context, variables will be created with locality N + * `v = tf.get_variable(...)`: in tower/cross-tower context, creates + a variable (which by definition will have locality V(`v`), though + will match another locality if inside a `colocate_vars_with` + scope). + * `d.distribute_dataset(dataset)`: in cross-tower context, produces an + iterator with locality T + * `d.broadcast(t)`: in cross-tower context, produces a value with locality M + * `d.broadcast(t, v)`: in cross-tower context, produces a value with + locality V(`v`) + * `d.call_for_each_tower(fn, ...)`: in cross-tower context, runs + `fn()` in a tower context (and so may call `get_tower_context()` and + use its API, including `merge_call()` to get back to cross-tower + context), once for each tower. May use values with locality T or + M, and any variable. + * `d.reduce(m, t)`: in cross-tower context, accepts t with locality T + and produces a value with locality M. + * `d.reduce(m, t, v)`: in cross-tower context, accepts t with + locality T and produces a value with locality V(`v`). + * `d.batch_reduce(m, [(t, v)]): see `d.reduce()` + * `d.update(v, fn, ...)`: in cross-tower context, runs `fn()` once + for each device `v` is copied to, all inputs should have locality + V(`v`), output will have locality V(`v`) as well. + * `d.update_non_slot(d.non_slot_devices(), fn)`: in cross-tower + context, like `d.update()` except with locality N. + * `d.fetch(t)`: Copy `t` with any locality to the client's CPU device. + + The standard pattern for updating variables is to: + + 1. Wrap your input dataset in `d.distribute_dataset()`. + 2. Define each tower `d.call_for_each_tower()` up to the point of + getting a list of gradient, variable pairs. + 3. Call `d.reduce("sum", t, v)` or `d.batch_reduce()` to sum the + gradients (with locality T) into values with locality V(`v`). + 4. Call `d.update(v)` for each variable to update its value. + + Steps 3 and 4 are done automatically by class `Optimizer` if you call + its `apply_gradients` method in a tower context. Otherwise you can + manually call its `distributed_apply` method in a cross-tower context. + + Another thing you might want to do in the middle of your tower function + is an all-reduce of some intermediate value, using `d.reduce()` or + `d.batch_reduce()` without supplying a variable as the destination. + + Layers should expect to be called in a tower context, and can use + the `get_tower_context()` function to get a `TowerContext` object. The + `TowerContext` object has a `merge_call()` method for entering + cross-tower context where you can use `reduce()` (or + `batch_reduce()`) and then optionally `update()` to update state. + + You may use this API whether or not a `DistributionStrategy` is + being used, since there is a default implementation of + `TowerContext` and `DistributionStrategy`. Or you can use the + `get_tower_context().is_single_tower` property to run different code + in the distributed vs. single tower cases. + """ + + # TODO(josh11b): Raise an exception if variable paritioning requested before + # we add support. + # TODO(josh11b): Also `parameter_device_index` property? + # TODO(josh11b): `map()` + # TODO(josh11b): ClusterSpec/ClusterResolver + # TODO(josh11b): Partitioned computations, state; sharding + # TODO(josh11b): Model parallelism: "towers" with multiple devices; shuffling + # TODO(josh11b): Tower-local variables + # TODO(josh11b): List of towers with their worker and parameter devices + # (where the parameter devices may overlap in the ps case). + + def scope(self): + """Returns a context manager selecting this DistributionStrategy as current. + + Inside a `with distribution_strategy.scope():` code block, this thread + will use a variable creator set by `distribution_strategy`, and will + enter its "cross-tower context". + + Returns: + A context manager. + """ + if has_distribution_strategy(): + _require_cross_tower_context(self) + return _SameScopeAgainContext(self) + + def creator_with_resource_vars(*args, **kwargs): + _require_distribution_strategy_scope(self) + kwargs["use_resource"] = True + return self._create_variable(*args, **kwargs) + + return _CurrentDistributionContext( + self, variable_scope.variable_creator_scope(creator_with_resource_vars)) + + def _create_variable(self, next_creator, *args, **kwargs): + # Note: should support "colocate_with" argument. + raise NotImplementedError("must be implemented in descendants") + + def colocate_vars_with(self, colocate_with_variable): + """Controls which devices variables will be created on. + + Note this may only be used inside `self.scope()`. + + Example usage: + + ``` + with distribution_strategy.scope(): + var1 = tf.get_variable(...) + with distribution_strategy.colocate_vars_with(v1): + # var2 and var3 will be created on the same device(s) as var1 + var2 = tf.get_variable(...) + var3 = tf.get_variable(...) + + def fn(v1, v2, v3): + # operates on v1 from var1, v2 from var2, and v3 from var3 + + # `fn` runs on every device `v1` is on, `v2` and `v3` will be there too. + distribution_strategy.update(v1, fn, v2, v3) + ``` + + Args: + colocate_with_variable: A created in `self.scope()`. Variables created + while in the returned context manager will be on the same set of + devices as `colocate_with_variable`. + + Returns: + A context manager. + """ + def create_colocated_variable(next_creator, *args, **kwargs): + _require_distribution_strategy_scope(self) + kwargs["use_resource"] = True + kwargs["colocate_with"] = colocate_with_variable + return next_creator(*args, **kwargs) + + _require_distribution_strategy_scope(self) + return variable_scope.variable_creator_scope(create_colocated_variable) + + # TODO(josh11b): Currently this returns an iterator, but should return + # something implementing (a subset of) the Dataset API. + def distribute_dataset(self, dataset): + """Return an iterator into `dataset` split across all towers. + + Suitable for providing input to for `call_for_each_tower()`, as in: + + ``` + with distribution_strategy.scope(): + iterator = distribution_strategy.distribute_dataset(dataset) + tower_results = distribution_strategy.call_for_each_tower( + tower_fn, iterator.get_next()) + ``` + + Args: + dataset: A `tf.data.Dataset`. + + Returns: + A Dataset iterator that will produce separate splits for each tower. + """ + raise NotImplementedError("must be implemented in descendants") + + def broadcast(self, tensor, destinations=None): + """Mirror a tensor on one device to all worker devices. + + Args: + tensor: A Tensor value to broadcast. + destinations: An optional mirrored variable, device string, or + list of device strings, specifying the destination devices + to copy `tensor` to. Defaults to `self.worker_devices`. + + Returns: + A value mirrored to `destinations` devices. + """ + # TODO(josh11b): More docstring + _require_cross_tower_context(self) + return self._broadcast(tensor, destinations) + + def _broadcast(self, tensor, destinations): + raise NotImplementedError("must be implemented in descendants") + + def call_for_each_tower(self, fn, *args, **kwargs): + """Run `fn` once per tower. + + `fn` may call `tf.get_tower_context()` to access methods such as + `tower_id()` and `merge_call()`. + + `merge_call()` is used to communicate betwen the towers and + re-enter the cross-tower context. All towers pause their execution + having encountered a `merge_call()` call. After that the + `merge_fn`-function is executed. Its results are then unwrapped and + given back to each tower call. After that execution resumes until + `fn` is complete or encounters another `merge_call()`. Example: + + ```python + # Called once in "cross-tower" context. + def merge_fn(distribution, three_plus_tower_id): + # sum the values across towers + return sum(distribution.unwrap(three_plus_tower_id)) + + # Called once per tower in `distribution`, in a "tower" context. + def fn(three): + tower_ctx = tf.get_tower_context() + v = three + tower_ctx.tower_id + # Computes the sum of the `v` values across all towers. + s = tower_ctx.merge_call(merge_fn, v) + return s + v + + with distribution.scope(): + # in "cross-tower" context + ... + merged_results = distribution.call_for_each_tower(fn, 3) + # merged_results has the values from every tower execution of `fn`. + print(distribution.unwrap(merged_results)) # Prints a list + ``` + + Args: + fn: function to run (will be run once per tower). + *args: positional arguments for `fn` + **kwargs: keyword arguments for `fn`. + `"run_concurrently"`: Boolean indicating whether executions of `fn` + can be run concurrently (under eager execution only), defaults to + `True`. + + Returns: + Merged return value of `fn` across all towers. + """ + _require_cross_tower_context(self) + return self._call_for_each_tower(fn, *args, **kwargs) + + def _call_for_each_tower(self, fn, *args, **kwargs): + raise NotImplementedError("must be implemented in descendants") + + def reduce(self, method_string, value, destinations=None): + """Combine (via e.g. sum or mean) values across towers. + + Args: + method_string: A string indicating how to combine values, either + "sum" or "mean". + value: A per-device value with one value per tower. + destinations: An optional mirrored variable, a device string, + list of device strings. The return value will be copied to all + destination devices (or all the devices where the mirrored + variable resides). If `None` or unspecified, the destinations + will match the devices `value` resides on. + + Returns: + A value mirrored to `destinations`. + """ + # TODO(josh11b): More docstring + # TODO(josh11b): Return an unwrapped value if colocate_with is a + # single device. + _require_cross_tower_context(self) + return self._reduce(method_string, value, destinations) + + def _reduce(self, method_string, value, destinations): + raise NotImplementedError("must be implemented in descendants") + + def batch_reduce(self, method_string, value_destination_pairs): + """Combine multiple `reduce` calls into one for faster execution. + + Args: + method_string: A string indicating how to combine values, either + "sum" or "mean". + value_destination_pairs: A sequence of (value, destinations) + pairs. See `reduce()` for a description. + + Returns: + A list of mirrored values, one per pair in `value_destination_pairs`. + """ + # TODO(josh11b): More docstring + _require_cross_tower_context(self) + assert method_string in ("sum", "mean") + return self._batch_reduce(method_string, value_destination_pairs) + + def _batch_reduce(self, method_string, value_destination_pairs): + return [self.reduce(method_string, t, destinations=v) + for t, v in value_destination_pairs] + + def update(self, var, fn, *args, **kwargs): + """Run `fn` to update `var` using inputs mirrored to the same devices. + + If `var` is mirrored across multiple devices, then this implements + logic like: + + ``` + results = {} + for device, v in var: + with tf.device(device): + # *args and **kwargs will be unwrapped if they are mirrored. + results[device] = fn(v, *args, **kwargs) + return merged(results) + ``` + + Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`.' + + Neither *args nor **kwargs may contain per-device values. + If they contain mirrored values, they will be unwrapped before + calling `fn`. + + Args: + var: Variable, possibly mirrored to multiple devices, to operate on. + fn: Function to call. Should take the variable as the first argument. + *args: Additional positional arguments to pass to `fn()`. + **kwargs: Keyword arguments to pass to `fn()`. + + Returns: + Merged return value of `fn` across all towers. + """ + _require_cross_tower_context(self) + return self._update(var, fn, *args, **kwargs) + + def _update(self, var, fn, *args, **kwargs): + raise NotImplementedError("must be implemented in descendants") + + def update_non_slot(self, colocate_with, fn, *args, **kwargs): + """Runs `fn(*args, **kwargs)` on `colocate_with` devices. + + Args: + colocate_with: The return value of `non_slot_devices()`. + fn: Function to execute. + *args: Positional arguments to pass to `fn()`. + **kwargs: Keyword arguments to pass to `fn()`. + + Returns: + Return value of `fn`, possibly merged across devices. + """ + _require_cross_tower_context(self) + return self._update_non_slot(colocate_with, fn, *args, **kwargs) + + def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + raise NotImplementedError("must be implemented in descendants") + + def fetch(self, val, destination="/device:CPU:0", fn=lambda x: x): + """Return a copy of `val` or `fn(val)` on `destination`. + + This is useful for getting a mirrored value onto a device. It + will attempt to avoid a copy by checking if the value is already + on the destination device. + + Args: + val: Value (which may be mirrored) to copy. + destination: A device string to copy the value to. + fn: An optional function to apply to the value on the source + device, before copying. + + Returns: + A `Tensor` on `destination`. + """ + _require_cross_tower_context(self) + return self._fetch(val, destination, fn) + + def _fetch(self, val, destination, fn): + raise NotImplementedError("must be implemented in descendants") + + def unwrap(self, value): + """Returns the list of all per-device values contained in `value`. + + Args: + value: A value returned by `call_for_each_tower()` or a variable + created in `scope()`. + + Returns: + A list of values contained in `value`. If `value` represents a single + value, this returns `[value].` + """ + _require_cross_tower_context(self) + return self._unwrap(value) + + def _unwrap(self, distributed_value): + raise NotImplementedError("must be implemented in descendants") + + def group(self, value, name=None): + """Shortcut for `tf.group(distribution.unwrap(value))`.""" + value = nest.flatten(self.unwrap(value)) + + if len(value) != 1 or name is not None: + return control_flow_ops.group(value, name=name) + # Special handling for the common case of one op. + v, = value + if isinstance(v, ops.Tensor): + v = v.op + return v + + @property + def is_single_tower(self): + """Returns whether there is a single tower or multiple. + + Returns: + A boolean. If `True`, `call_for_each_tower(fn)` will only call `fn` once. + If `False`, `call_for_each_tower(fn)` may call `fn` multiple times. + """ + raise NotImplementedError("must be implemented in descendants") + + @property + def num_towers(self): + """Returns number of towers, for purposes of averaging across towers.""" + raise NotImplementedError("must be implemented in descendants") + + @property + def worker_devices(self): + """Returns the list of devices used to run `call_for_each_tower()` calls.""" + # TODO(josh11b): More docstring + raise NotImplementedError("must be implemented in descendants") + + @property + def parameter_devices(self): + """Returns the list of devices used for variable and `update` placement.""" + # TODO(josh11b): More docstring + raise NotImplementedError("must be implemented in descendants") + + def non_slot_devices(self, var_list): + """Device(s) for non-slot variables. + + Create variables on these devices in a + `with colocate_vars_with(non_slot_devices(...)):` block. + Update those using `update_non_slot()`. + + Args: + var_list: The list of variables being optimized, needed with the + default `DistributionStrategy`. + """ + raise NotImplementedError("must be implemented in descendants") + + @property + def worker_device_index(self): + """An object mapping worker device to an id. + + This might be passed as an argument to `call_for_each_tower()`, as in: + + ``` + with distribution_strategy.scope(): + + def fn(device_id): + # device_id is an integer. `fn` is being executed on device: + # distribution_strategy.worker_devices[device_id]. + + distribution_strategy.call_for_each_tower( + fn, distribution_strategy.worker_device_index) + ``` + + Returns: + An index object, or the integer 0 if there is only a single tower. + """ + _require_cross_tower_context(self) + return self._worker_device_index() + + def _worker_device_index(self): + raise NotImplementedError("must be implemented in descendants") + + +# A note about the difference between the context managers +# `TowerContext` (defined here) and `_CurrentDistributionContext` +# (defined above) used by `DistributionStrategy.scope()`: +# +# * a TowerContext is only present during a `call_for_each_tower()` +# call (except during a `merge_run` call) and in such a scope it +# will be returned by calls to `get_tower_context()`. Implementers of new +# DistributionStrategy descendants will frequently also need to +# define a descendant of TowerContext, and are responsible for +# entering and exiting this context. +# +# * DistributionStrategy.scope() sets up a variable_creator scope that +# changes variable creation calls (e.g. to make mirrored +# variables). This is intended as an outer scope that users enter once +# around their model creation and graph definition. There is no +# anticipated need to define descendants of _CurrentDistributionContext. +# It sets the current DistributionStrategy for purposes of +# `get_distribution_strategy()` and `has_distribution_strategy()` +# and switches the thread mode to a "cross-tower context". +class TowerContext(object): + """DistributionStrategy API inside a `call_for_each_tower()` call.""" + + def __init__(self, distribution_strategy, tower_id): + self._distribution_strategy = distribution_strategy + self._thread_context = _InTowerThreadMode(self) + self._tower_id = tower_id + + def __enter__(self): + _push_per_thread_mode(self._thread_context) + + def __exit__(self, exception_type, exception_value, traceback): + _pop_per_thread_mode() + + def merge_call(self, merge_fn, *args, **kwargs): + """Merge args across towers and run `merge_fn` in a cross-tower context. + + This allows communication and coordination when there are multiple calls + to a model function triggered by a call to + `distribution.call_for_each_tower(model_fn, ...)`. + + See `MirroredDistribution.call_for_each_tower()` for an explanation. + + Otherwise, this is equivalent to: + + ``` + distribution = get_distribution_strategy() + with cross-tower-context(distribution): + return merge_fn(distribution, *args, **kwargs) + ``` + + Args: + merge_fn: function that joins arguments from threads that are given as + PerDevice. It accepts `DistributionStrategy` object as the first + argument. + *args: positional per-thread arguments for `merge_fn` + **kwargs: keyword per-thread arguments for `merge_fn`. + + Returns: + The return value of `merge_fn`, except for `PerDevice` values which are + unpacked. + """ + require_tower_context(self) + return self._merge_call(merge_fn, *args, **kwargs) + + def _merge_call(self, merge_fn, *args, **kwargs): + """Default implementation for single tower.""" + _push_per_thread_mode( # thread-local, so not needed with multiple threads + _CrossTowerThreadMode(self._distribution_strategy)) + try: + return merge_fn(self._distribution_strategy, *args, **kwargs) + finally: + _pop_per_thread_mode() + + @property + def is_single_tower(self): + """Returns whether there is a single tower or multiple.""" + require_tower_context(self) + return self._distribution_strategy.is_single_tower + + @property + def num_towers(self): + """Returns number of towers, for purposes of averaging across towers.""" + return self._distribution_strategy.num_towers + + @property + def tower_id(self): + """Which tower is being defined, a number from 0 to `num_towers - 1`.""" + require_tower_context(self) + return self._tower_id + + @property + def distribution_strategy(self): + """The current `DistributionStrategy` object.""" + return self._distribution_strategy + + @property + def device(self): + """The device this tower is to be executed on, as a string.""" + require_tower_context(self) + return device_util.current() + + # TODO(josh11b): Implement `start_all_reduce(method, t)` that returns + # a function returning the result of reducing `t` across all + # towers. Most likely can be implemented in terms of `merge_call()` + # and `batch_reduce()`. + +# ------------------------------------------------------------------------------ + + +class _DefaultDistributionStrategy(DistributionStrategy): + """Default `DistributionStrategy` if none is explicitly selected.""" + + def scope(self): + """Context manager setting a variable creator and `self` as current.""" + if has_distribution_strategy(): + raise RuntimeError("Must not nest DistributionStrategy scopes.") + + def creator(next_creator, *args, **kwargs): + _require_distribution_strategy_scope(self) + return next_creator(*args, **kwargs) + + return _CurrentDistributionContext( + self, variable_scope.variable_creator_scope(creator)) + + def colocate_vars_with(self, colocate_with_variable): + """Does not require `self.scope`.""" + def create_colocated_variable(next_creator, *args, **kwargs): + _require_distribution_strategy_scope(self) + with ops.colocate_with(colocate_with_variable): + return next_creator(*args, **kwargs) + + _require_distribution_strategy_scope(self) + return variable_scope.variable_creator_scope(create_colocated_variable) + + def distribute_dataset(self, dataset): + # TODO(josh11b): Support for this when executing eagerly is currently only + # in contrib. + return dataset.make_one_shot_iterator() + + def _broadcast(self, tensor, destinations): + if destinations is None: + return tensor + else: + raise NotImplementedError("TODO") + + def _call_for_each_tower(self, fn, *args, **kwargs): + # We don't run `fn` in multiple threads in _DefaultDistributionStrategy. + kwargs.pop("run_concurrently", None) + with TowerContext(self, tower_id=0): + return fn(*args, **kwargs) + + def _reduce(self, method_string, value, destinations): + # TODO(josh11b): Use destinations? + del method_string, destinations + return value + + def _update(self, var, fn, *args, **kwargs): + # TODO(josh11b): Figure out what we should be passing to UpdateContext() + # once that value is used for something. + with ops.colocate_with(var), UpdateContext(var): + return fn(var, *args, **kwargs) + + def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + # TODO(josh11b): Figure out what we should be passing to UpdateContext() + # once that value is used for something. + with ops.colocate_with(colocate_with), UpdateContext(colocate_with): + return fn(*args, **kwargs) + + def _fetch(self, var, destination, fn): + with ops.colocate_with(var): + var = fn(var) + with ops.device(destination): + return array_ops.identity(var) + + def _unwrap(self, distributed_value): + return [distributed_value] + + @property + def is_single_tower(self): + return True + + @property + def num_towers(self): + return 1 + + @property + def worker_devices(self): + raise RuntimeError( + "worker_devices() method unsupported by _DefaultDistributionStrategy.") + + @property + def parameter_devices(self): + raise RuntimeError("parameter_devices() method unsupported by " + "_DefaultDistributionStrategy.") + + def non_slot_devices(self, var_list): + return min(var_list, key=lambda x: x.name) + + def _worker_device_index(self): + raise RuntimeError("worker_device_index() method unsupported by " + "_DefaultDistributionStrategy.") + + +# ------------------------------------------------------------------------------ +# Singletons + +_default_distribution_strategy = _DefaultDistributionStrategy() +_default_tower_context = TowerContext( + _default_distribution_strategy, tower_id=0) +_default_tower_mode = _DefaultTowerThreadMode() diff --git a/tensorflow/python/training/distribute_test.py b/tensorflow/python/training/distribute_test.py new file mode 100644 index 0000000000..0a4f19c31f --- /dev/null +++ b/tensorflow/python/training/distribute_test.py @@ -0,0 +1,104 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test DistributionStrategy, TowerContext, and supporting APIs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import variable_scope +from tensorflow.python.platform import test +from tensorflow.python.training import distribute + + +class _TestTowerContext(distribute.TowerContext): + + def merge_call(self, fn, *args, **kwargs): + return kwargs["test_arg"] + + +class _TestStrategy(distribute.DistributionStrategy): + + def _call_for_each_tower(self, fn, *args, **kwargs): + with _TestTowerContext(self, tower_id=0): + return fn(*args, **kwargs) + + def _create_variable(self, next_creator, *args, **kwargs): + return kwargs["name"] + + +def _assert_in_default_state(t): + t.assertIs(distribute._default_tower_context, + distribute.get_tower_context()) + t.assertIs(None, distribute.get_cross_tower_context()) + t.assertIs(distribute._default_distribution_strategy, + distribute.get_distribution_strategy()) + t.assertFalse(distribute.has_distribution_strategy()) + + +class TestStrategyTest(test.TestCase): + + def testCallForEachTower(self): + _assert_in_default_state(self) + dist = _TestStrategy() + + def run_fn(): + tower_context = distribute.get_tower_context() + self.assertTrue(tower_context is not None) + self.assertIs(None, distribute.get_cross_tower_context()) + self.assertTrue(distribute.has_distribution_strategy()) + self.assertIs(dist, distribute.get_distribution_strategy()) + self.assertEqual("foo", tower_context.merge_call(None, test_arg="foo")) + self.assertEqual("bar", variable_scope.variable(1.0, name="bar")) + + with self.assertRaises(RuntimeError): + dist.call_for_each_tower(run_fn) + with dist.scope(): + dist.call_for_each_tower(run_fn) + _assert_in_default_state(self) + + def testScope(self): + _assert_in_default_state(self) + dist = _TestStrategy() + with dist.scope(): + self.assertIs(None, distribute.get_tower_context()) + self.assertIs(dist, distribute.get_cross_tower_context()) + self.assertTrue(distribute.has_distribution_strategy()) + self.assertIs(dist, distribute.get_distribution_strategy()) + self.assertEqual("baz", variable_scope.variable(1.0, name="baz")) + _assert_in_default_state(self) + + +class DefaultDistributionStrategyTest(test.TestCase): + + def testMergeCall(self): + _assert_in_default_state(self) + + def merge_fn(dist, s): + self.assertIs(distribute._default_distribution_strategy, dist) + self.assertIs(None, distribute.get_tower_context()) + self.assertIs(dist, distribute.get_cross_tower_context()) + self.assertIs(dist, distribute.get_distribution_strategy()) + self.assertFalse(distribute.has_distribution_strategy()) + return "foo_" + s + + tower_ctx = distribute.get_tower_context() + self.assertIs(distribute._default_tower_context, tower_ctx) + self.assertEqual("foo_bar", tower_ctx.merge_call(merge_fn, "bar")) + _assert_in_default_state(self) + + +if __name__ == "__main__": + test.main() -- GitLab From b16ec315e7e9d41645634398da202629c3baa5af Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Mar 2018 22:30:15 -0700 Subject: [PATCH 531/960] Use is_resource_variable() in train.assert_gloabl_step. PiperOrigin-RevId: 190573872 --- tensorflow/python/training/training_util.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/training/training_util.py b/tensorflow/python/training/training_util.py index 4f1abccc96..d05e1d2c83 100644 --- a/tensorflow/python/training/training_util.py +++ b/tensorflow/python/training/training_util.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_io @@ -31,7 +30,6 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export - # Picked a long key value to minimize the chance of collision with user defined # collection keys. GLOBAL_STEP_READ_KEY = 'global_step_read_op_cache' @@ -170,8 +168,7 @@ def assert_global_step(global_step_tensor): """ if not (isinstance(global_step_tensor, variables.Variable) or isinstance(global_step_tensor, ops.Tensor) or - isinstance(global_step_tensor, - resource_variable_ops.ResourceVariable)): + resource_variable_ops.is_resource_variable(global_step_tensor)): raise TypeError( 'Existing "global_step" must be a Variable or Tensor: %s.' % global_step_tensor) -- GitLab From 307794e156bc21b2f122bf5e7d907299392023c5 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 26 Mar 2018 22:44:27 -0700 Subject: [PATCH 532/960] [XLA:CPU] Allow the shape partition algorithm to partition the most minor dimension. The current shape paritition algorithm does not partition the most minor dimension, because doing so causes dynamic loop bounds for the inner loop and used to prohibit LLVM vectorization. This constraint has been removed with revision 328478 and LLVM can now vectorize loops with dynamic bounds. Allow partitioning the most minor dimension is also necessary to support the parallelization of matrix-vector multiplication. Adjust shape_partition_test to reflect this change in the shape partition algorithm. PiperOrigin-RevId: 190574615 --- .../xla/service/cpu/shape_partition.cc | 5 +- .../xla/service/cpu/shape_partition_test.cc | 116 ++++++------------ .../exhaustive_f32_elementwise_op_test.cc | 4 +- 3 files changed, 43 insertions(+), 82 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition.cc b/tensorflow/compiler/xla/service/cpu/shape_partition.cc index 61b408b8c2..42fe955f19 100644 --- a/tensorflow/compiler/xla/service/cpu/shape_partition.cc +++ b/tensorflow/compiler/xla/service/cpu/shape_partition.cc @@ -20,12 +20,13 @@ namespace cpu { std::vector ShapePartitionAssigner::Run(int64 target_partition_count) { // Gather outer-most dims where dim_size >= 'target_partition_count'. - // Note: always leave inner-dim static for vectorization/optimizations. + // This may include the inner-dim as LLVM can vectorize loops with dynamic + // bounds. std::vector outer_dims; int64 outer_dim_size = 1; // TODO(b/27458679) Consider reserving enough minor dimensions (based on // target vector register width) to enable vector instructions. - for (int i = shape_.layout().minor_to_major_size() - 1; i >= 1; --i) { + for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) { const int64 dimension = shape_.layout().minor_to_major(i); outer_dims.push_back(dimension); outer_dim_size *= shape_.dimensions(dimension); diff --git a/tensorflow/compiler/xla/service/cpu/shape_partition_test.cc b/tensorflow/compiler/xla/service/cpu/shape_partition_test.cc index ee0c53fa6d..ae80a6f497 100644 --- a/tensorflow/compiler/xla/service/cpu/shape_partition_test.cc +++ b/tensorflow/compiler/xla/service/cpu/shape_partition_test.cc @@ -30,105 +30,65 @@ class ShapePartitionAssignerTest : public HloTestBase { protected: typedef std::vector Vec; - void RunR2Test(const Shape& shape, const int64 expected_max_partition_count) { + void RunR2Test(const Shape& shape, int64 max_target_partition_count, + const std::vector* expected_partitions) { ShapePartitionAssigner assigner(shape); - // Check all partitions of outer dimension. - for (int64 i = 1; i <= expected_max_partition_count; ++i) { - EXPECT_TRUE(ContainersEqual(Vec({i}), - assigner.Run(/*target_partition_count=*/i))); + // Iterate through 1..max_target_partition_count. + for (int64 i = 1; i <= max_target_partition_count; ++i) { + std::vector actual_partitions = + assigner.Run(/*target_partition_count=*/i); + EXPECT_THAT(actual_partitions, expected_partitions[i - 1]); } - // Check target_partition_count > outer dimension size. - EXPECT_TRUE(ContainersEqual( - Vec({expected_max_partition_count}), - assigner.Run( - /*target_partition_count=*/expected_max_partition_count + 1))); } }; TEST_F(ShapePartitionAssignerTest, Shape13WithLayout10) { - RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {1, 3}, {1, 0}), 1); + std::vector expected_partitions[] = {{1} /* 1 */, {1, 2} /* 2 */}; + RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {1, 3}, {1, 0}), 2, + expected_partitions); } TEST_F(ShapePartitionAssignerTest, Shape31WithLayout01) { - RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {3, 1}, {0, 1}), 1); + std::vector expected_partitions[] = { + {1} /* 1 */, {1, 2} /* 2 */ + }; + RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {3, 1}, {0, 1}), 2, + expected_partitions); } TEST_F(ShapePartitionAssignerTest, Shape53WithLayout10) { - RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {1, 0}), 5); + std::vector expected_partitions[] = {{1} /* 1 */, {2} /* 2 */, + {3} /* 3 */, {4} /* 4 */, + {5} /* 5 */, {3, 2} /* 6 */}; + RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {1, 0}), 6, + expected_partitions); } TEST_F(ShapePartitionAssignerTest, Shape53WithLayout01) { - RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {0, 1}), 3); + std::vector expected_partitions[] = { + {1} /* 1 */, {2} /* 2 */, {3} /* 3 */, {2, 2} /* 4 */}; + RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {0, 1}), 4, + expected_partitions); } TEST_F(ShapePartitionAssignerTest, Shape532WithLayout210) { - Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 1, 0}); - ShapePartitionAssigner assigner(shape); - - for (int64 i = 1; i <= 5; ++i) { - EXPECT_TRUE(ContainersEqual(Vec({i}), assigner.Run( - /*target_partition_count=*/i))); - } - - EXPECT_TRUE( - ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/6))); - EXPECT_TRUE( - ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/7))); - EXPECT_TRUE( - ContainersEqual(Vec({4, 2}), assigner.Run(/*target_partition_count=*/8))); - EXPECT_TRUE( - ContainersEqual(Vec({3, 3}), assigner.Run(/*target_partition_count=*/9))); - EXPECT_TRUE(ContainersEqual(Vec({3, 3}), - assigner.Run(/*target_partition_count=*/10))); - EXPECT_TRUE(ContainersEqual(Vec({3, 3}), - assigner.Run(/*target_partition_count=*/11))); - EXPECT_TRUE(ContainersEqual(Vec({4, 3}), - assigner.Run(/*target_partition_count=*/12))); - EXPECT_TRUE(ContainersEqual(Vec({4, 3}), - assigner.Run(/*target_partition_count=*/13))); - EXPECT_TRUE(ContainersEqual(Vec({4, 3}), - assigner.Run(/*target_partition_count=*/14))); - EXPECT_TRUE(ContainersEqual(Vec({5, 3}), - assigner.Run(/*target_partition_count=*/15))); - EXPECT_TRUE(ContainersEqual(Vec({5, 3}), - assigner.Run(/*target_partition_count=*/16))); + std::vector expected_partitions[] = { + {1} /* 1 */, {2} /* 2 */, {3} /* 3 */, {4} /* 4 */, + {5} /* 5 */, {3, 2} /* 6 */, {3, 2} /* 7 */, {4, 2} /* 8 */, + {3, 3} /* 9 */, {3, 3} /* 10 */, {3, 3} /* 11 */, {4, 3} /* 12 */, + {4, 3} /* 13 */, {4, 3} /* 14 */, {5, 3} /* 15 */, {4, 2, 2} /* 16 */}; + RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 1, 0}), 16, + expected_partitions); } TEST_F(ShapePartitionAssignerTest, Shape532WithLayout201) { - Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 0, 1}); - ShapePartitionAssigner assigner(shape); - - for (int64 i = 1; i <= 3; ++i) { - EXPECT_TRUE(ContainersEqual(Vec({i}), assigner.Run( - /*target_partition_count=*/i))); - } - - EXPECT_TRUE( - ContainersEqual(Vec({2, 2}), assigner.Run(/*target_partition_count=*/4))); - EXPECT_TRUE( - ContainersEqual(Vec({2, 2}), assigner.Run(/*target_partition_count=*/5))); - EXPECT_TRUE( - ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/6))); - EXPECT_TRUE( - ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/7))); - EXPECT_TRUE( - ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/8))); - EXPECT_TRUE( - ContainersEqual(Vec({3, 3}), assigner.Run(/*target_partition_count=*/9))); - EXPECT_TRUE(ContainersEqual(Vec({3, 3}), - assigner.Run(/*target_partition_count=*/10))); - EXPECT_TRUE(ContainersEqual(Vec({3, 3}), - assigner.Run(/*target_partition_count=*/11))); - EXPECT_TRUE(ContainersEqual(Vec({3, 4}), - assigner.Run(/*target_partition_count=*/12))); - EXPECT_TRUE(ContainersEqual(Vec({3, 4}), - assigner.Run(/*target_partition_count=*/13))); - EXPECT_TRUE(ContainersEqual(Vec({3, 4}), - assigner.Run(/*target_partition_count=*/14))); - EXPECT_TRUE(ContainersEqual(Vec({3, 5}), - assigner.Run(/*target_partition_count=*/15))); - EXPECT_TRUE(ContainersEqual(Vec({3, 5}), - assigner.Run(/*target_partition_count=*/16))); + std::vector expected_partitions[] = { + {1} /* 1 */, {2} /* 2 */, {3} /* 3 */, {2, 2} /* 4 */, + {2, 2} /* 5 */, {3, 2} /* 6 */, {3, 2} /* 7 */, {3, 2} /* 8 */, + {3, 3} /* 9 */, {3, 3} /* 10 */, {3, 3} /* 11 */, {3, 4} /* 12 */, + {3, 4} /* 13 */, {3, 4} /* 14 */, {3, 5} /* 15 */, {3, 2, 2} /* 16 */}; + RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 0, 1}), 16, + expected_partitions); } class ShapePartitionIteratorTest : public HloTestBase { diff --git a/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc b/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc index 6fe7737de7..b28fe0c15a 100644 --- a/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc +++ b/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc @@ -71,8 +71,8 @@ XLA_TEST_P(ExhaustiveF32ElementwiseOpTest, LogF32) { #ifdef XLA_TEST_BACKEND_CPU // TODO(b/73141998): The vectorized Log implementation gives results outside // our error spec in this range (these numbers are bitwise representations of - // floats expressed as a zero extended int64): - std::pair known_incorrect_range = {1, 8315654}; + // floats expressed as a zero extended int64). + std::pair known_incorrect_range = {1, 8388608}; #else std::pair known_incorrect_range = {0, 0}; #endif -- GitLab From 1c38584cb9793642928bf888be1a98698d3b8c44 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Mon, 26 Mar 2018 23:34:05 -0700 Subject: [PATCH 533/960] Fix acknowledgment to say "Blade Team of Tencent" in security.md file. Team is incorrectly referred to as "TenCent Blade Team" PiperOrigin-RevId: 190577449 --- SECURITY.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 5ca304404d..a5ce3a62ee 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -244,7 +244,7 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= ### Known vulnerabilities -| Type | Versions affected | Reported by | Additional Information | -|--------------------|:-----------------:|--------------------|-----------------------------| -| Out Of Bounds Read | <=1.4 | TenCent Blade Team | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| Type | Versions affected | Reported by | Additional Information | +|--------------------|:-----------------:|-----------------------|-----------------------------| +| Out Of Bounds Read | <=1.4 | Blade Team of Tencent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | -- GitLab From c66b2ed3c23240be3d6a4a609e5b87c109fb0cea Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 27 Mar 2018 00:02:42 -0700 Subject: [PATCH 534/960] Remove broken ibiblio url I suspect ibiblio selectively mirrors or perhaps only mirrors highly popular artifacts. PiperOrigin-RevId: 190578860 --- tensorflow/workspace.bzl | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ebb9e9412f..206a5a3d99 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -576,7 +576,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): jar_urls = [ "http://mirror.bazel.build/repo1.maven.org/maven2/com/google/testing/compile/compile-testing/0.11/compile-testing-0.11.jar", "http://repo1.maven.org/maven2/com/google/testing/compile/compile-testing/0.11/compile-testing-0.11.jar", - "http://maven.ibiblio.org/maven2/com/google/testing/compile/compile-testing/0.11/compile-testing-0.11.jar", ], licenses = ["notice"], # New BSD License testonly_ = True, -- GitLab From 7555534be3c6138cbcca138556fe4dbf4cc6b8ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 00:30:02 -0700 Subject: [PATCH 535/960] Handle out of range values when casting from floating point to integer in quantize. PiperOrigin-RevId: 190580805 --- .../lite/kernels/internal/quantization_util.h | 69 +++++++++- .../internal/quantization_util_test.cc | 126 ++++++++++++++++++ .../toco/graph_transformations/quantize.cc | 7 +- 3 files changed, 195 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h index f7706c7938..9a04b76e56 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.h +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -97,6 +97,71 @@ QuantizationParams ChooseQuantizationParams(double rmin, double rmax) { return quantization_params; } +// Converts a floating-point number to an integer. For all inputs x where +// static_cast(x) is legal according to the C++ standard, the result +// is identical to that cast (i.e. the result is x with its fractional part +// truncated whenever that is representable as IntOut). +// +// static_cast would cause undefined behavior for the following cases, which +// have well-defined behavior for this function: +// +// 1. If x is NaN, the result is zero. +// +// 2. If the truncated form of x is above the representable range of IntOut, +// the result is std::numeric_limits::max(). +// +// 3. If the truncated form of x is below the representable range of IntOut, +// the result is std::numeric_limits::min(). +// +// Note that cases #2 and #3 cover infinities as well as finite numbers. +// +// The range of FloatIn must include the range of IntOut, otherwise +// the results are undefined. +// TODO(sfeuz): Replace by absl::SafeCast once available. +template +IntOut SafeCast(FloatIn x) { + static_assert(!std::numeric_limits::is_integer, + "FloatIn is integer"); + static_assert(std::numeric_limits::is_integer, + "IntOut is not integer"); + static_assert(std::numeric_limits::radix == 2, "IntOut is base 2"); + + // Special case NaN, for which the logic below doesn't work. + if (std::isnan(x)) { + return 0; + } + + // Negative values all clip to zero for unsigned results. + if (!std::numeric_limits::is_signed && x < 0) { + return 0; + } + + // Handle infinities. + if (std::isinf(x)) { + return x < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); + } + + // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0), + // unless x is zero in which case exp == 0. Note that this implies that the + // magnitude of x is strictly less than 2^exp. + int exp = 0; + std::frexp(x, &exp); + + // Let N be the number of non-sign bits in the representation of IntOut. If + // the magnitude of x is strictly less than 2^N, the truncated version of x + // is representable as IntOut. The only representable integer for which this + // is not the case is kMin for signed types (i.e. -2^N), but that is covered + // by the fall-through below. + if (exp <= std::numeric_limits::digits) { + return x; + } + + // Handle numbers with magnitude >= 2^N. + return x < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); +} + // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of NEGATIVE its exponent --- // this is intended as a RIGHT-shift. @@ -135,8 +200,8 @@ void PreprocessSoftmaxScaling(double beta, double input_scale, // Calculate the largest input that will result in a within-bounds intermediate // result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, // it must not overflow before we reduce the value by multiplication by the -// input multiplier. The negative radius is used as the minimum difference -// in Softmax. +// input multiplier. The negative radius is used as the minimum difference in +// Softmax. int CalculateInputRadius(int input_integer_bits, int input_left_shift); } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc index 4ae2085c30..3e9a3c29ee 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc @@ -22,6 +22,132 @@ namespace { using ::testing::Pair; +template +void RunSafeCastTests() { + const IntOut imax = std::numeric_limits::max(); + EXPECT_GT(imax, 0); + const IntOut imin = std::numeric_limits::min(); + const bool s = std::numeric_limits::is_signed; + if (s) { + EXPECT_LT(imin, 0); + } else { + EXPECT_EQ(0, imin); + } + + // Some basic tests. + EXPECT_EQ(SafeCast(static_cast(0.0)), 0); + EXPECT_EQ(SafeCast(static_cast(-0.0)), 0); + EXPECT_EQ(SafeCast(static_cast(0.99)), 0); + EXPECT_EQ(SafeCast(static_cast(1.0)), 1); + EXPECT_EQ(SafeCast(static_cast(1.01)), 1); + EXPECT_EQ(SafeCast(static_cast(1.99)), 1); + EXPECT_EQ(SafeCast(static_cast(2.0)), 2); + EXPECT_EQ(SafeCast(static_cast(2.01)), 2); + EXPECT_EQ(SafeCast(static_cast(-0.99)), 0); + EXPECT_EQ(SafeCast(static_cast(-1.0)), s ? -1 : 0); + EXPECT_EQ(SafeCast(static_cast(-1.01)), s ? -1 : 0); + EXPECT_EQ(SafeCast(static_cast(-1.99)), s ? -1 : 0); + EXPECT_EQ(SafeCast(static_cast(-2.0)), s ? -2 : 0); + EXPECT_EQ(SafeCast(static_cast(-2.01)), s ? -2 : 0); + EXPECT_EQ(SafeCast(static_cast(117.9)), 117); + EXPECT_EQ(SafeCast(static_cast(118.0)), 118); + EXPECT_EQ(SafeCast(static_cast(118.1)), 118); + EXPECT_EQ(SafeCast(static_cast(-117.9)), s ? -117 : 0); + EXPECT_EQ(SafeCast(static_cast(-118.0)), s ? -118 : 0); + EXPECT_EQ(SafeCast(static_cast(-118.1)), s ? -118 : 0); + + // Some edge cases. + EXPECT_EQ(SafeCast(std::numeric_limits::max()), imax); + EXPECT_EQ(SafeCast(std::numeric_limits::lowest()), imin); + EXPECT_EQ(SafeCast(std::numeric_limits::infinity()), imax); + EXPECT_EQ(SafeCast(-std::numeric_limits::infinity()), imin); + EXPECT_EQ(SafeCast(std::numeric_limits::quiet_NaN()), 0); + + // Some larger numbers. + if (sizeof(IntOut) >= 4 && sizeof(FloatIn) > 4) { + EXPECT_EQ(SafeCast(static_cast(0x76543210)), 0x76543210); + } + + if (sizeof(FloatIn) > sizeof(IntOut)) { + // Check values near imax. + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 0.1)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 0.99)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 1.0)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 1.99)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 2.0)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 0.1)), + imax - 1); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 0.99)), + imax - 1); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 1.0)), + imax - 1); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 1.01)), + imax - 2); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 1.99)), + imax - 2); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 2.0)), + imax - 2); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 2.01)), + imax - 3); + } + + // Check values considerably larger in magnitude than imin and imax + EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) * 2)), + imax); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) * 20)), + imax); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) * 100)), + imax); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imin) * 2)), + imin); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imin) * 20)), + imin); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imin) * 100)), + imin); +} + +TEST(QuantizationUtilTest, SafeCast) { + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); +} + // Example taken from http://www.tensorflow.org/performance/quantization // // Quantized | Float diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index ad3f05274b..9679ea0a77 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -65,8 +65,6 @@ std::unique_ptr QuantizeBuffer( static_cast&>(buffer); auto* quantized_buffer = new Buffer; quantized_buffer->data.resize(float_buffer.data.size()); - const auto qmin = static_cast(std::numeric_limits>::min()); - const auto qmax = static_cast(std::numeric_limits>::max()); for (std::size_t i = 0; i < float_buffer.data.size(); i++) { const float src_val = float_buffer.data[i]; double scaled_val; // Astonishingly, using 'float' degrades accuracy just @@ -78,9 +76,8 @@ std::unique_ptr QuantizeBuffer( } else { scaled_val = quantization_params.zero_point + inverse_scale * src_val; } - const auto rounded_val = static_cast(std::round(scaled_val)); - const auto clamped_val = std::min(qmax, std::max(qmin, rounded_val)); - quantized_buffer->data[i] = static_cast>(clamped_val); + quantized_buffer->data[i] = + tflite::SafeCast>(std::round(scaled_val)); } return std::unique_ptr(quantized_buffer); } -- GitLab From 1c055f0679ea6cdae28b3c78c3bf98cb40f00e13 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 03:23:58 -0700 Subject: [PATCH 536/960] Avoid reading the input file twice for InitializableLookupTable in combination with HashTable. Before this cl, TextFileLineIterator::total_size() was called for HashTable::DoPrepare, even though HashTable::DoPrepare ignores the size parameter. In order to have a result ready for TextFileLineIterator::total_size(), Init() called GetNumLinesInTextFile(), which read the whole file. Just to throw away the result :-/ This cl: - adds a DoLazyPrepare, that gets a functor to get the size, only if needed. - add HashTable::DoLazyPrepare which does not call this functor. - modify TextFileLineIterator::Init() to not call GetNumLinesInTextFile() anymore, when vocab_size was given as -1. - modify TextFileLineIterator::total_size() to call GetNumLinesInTextFile() lazily on the first call, if vocab_size_ was passed as -1. PiperOrigin-RevId: 190593744 --- .../kernels/initializable_lookup_table.cc | 2 +- .../core/kernels/initializable_lookup_table.h | 12 ++++++++++ tensorflow/core/kernels/lookup_table_op.h | 5 ++++ tensorflow/core/kernels/lookup_util.cc | 24 +++++++++++++------ 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/initializable_lookup_table.cc b/tensorflow/core/kernels/initializable_lookup_table.cc index 9c428cdedc..06d53eba30 100644 --- a/tensorflow/core/kernels/initializable_lookup_table.cc +++ b/tensorflow/core/kernels/initializable_lookup_table.cc @@ -44,7 +44,7 @@ Status InitializableLookupTable::Initialize(InitTableIterator& iter) { return errors::FailedPrecondition("Table already initialized."); } - TF_RETURN_IF_ERROR(DoPrepare(iter.total_size())); + TF_RETURN_IF_ERROR(DoLazyPrepare([&iter]() { return iter.total_size(); })); while (iter.Valid()) { TF_RETURN_IF_ERROR(DoInsert(iter.keys(), iter.values())); iter.Next(); diff --git a/tensorflow/core/kernels/initializable_lookup_table.h b/tensorflow/core/kernels/initializable_lookup_table.h index e9eae9f863..b16c76dc7f 100644 --- a/tensorflow/core/kernels/initializable_lookup_table.h +++ b/tensorflow/core/kernels/initializable_lookup_table.h @@ -114,6 +114,7 @@ class InitializableLookupTable : public LookupInterface { virtual Status status() const = 0; // Returns the total number of elements that the iterator will produce. + // It might return -1 in case of error. virtual int64 total_size() const = 0; private: @@ -129,6 +130,17 @@ class InitializableLookupTable : public LookupInterface { // number of expected elements. virtual Status DoPrepare(size_t expected_num_elements) = 0; + // Same as DoPrepare() but derived implementations might choose to skip + // calling get_expected_num_elements if size is not needed for DoPrepare. + virtual Status DoLazyPrepare( + std::function get_expected_num_elements) { + int64 expected_num_elements = get_expected_num_elements(); + if (expected_num_elements < 0) { + return errors::FailedPrecondition("Got negative expected_num_elements."); + } + return DoPrepare(expected_num_elements); + } + // Populates the table in batches given keys and values as tensors into the // underlying data structure. virtual Status DoInsert(const Tensor& keys, const Tensor& values) = 0; diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h index 5ba9b936e4..3657fd5b6a 100644 --- a/tensorflow/core/kernels/lookup_table_op.h +++ b/tensorflow/core/kernels/lookup_table_op.h @@ -191,6 +191,11 @@ class HashTable : public InitializableLookupTable { return Status::OK(); }; + Status DoLazyPrepare(std::function unused) override { + constexpr size_t kUnusedSize = 0; + return DoPrepare(kUnusedSize); + } + Status DoInsert(const Tensor& keys, const Tensor& values) override { if (!table_) { return errors::FailedPrecondition("HashTable is not prepared."); diff --git a/tensorflow/core/kernels/lookup_util.cc b/tensorflow/core/kernels/lookup_util.cc index c7ce1c3747..27031d9216 100644 --- a/tensorflow/core/kernels/lookup_util.cc +++ b/tensorflow/core/kernels/lookup_util.cc @@ -75,9 +75,6 @@ class TextFileLineIterator Status Init(const string& filename, int64 vocab_size, char delimiter, DataType key_dtype, int64 key_index, DataType value_dtype, int64 value_index, Env* env) { - if (vocab_size == -1) { - TF_RETURN_IF_ERROR(GetNumLinesInTextFile(env, filename, &vocab_size)); - } filename_ = filename; vocab_size_ = vocab_size; delimiter_ = delimiter; @@ -85,6 +82,7 @@ class TextFileLineIterator value_ = Tensor(value_dtype, TensorShape({})); key_index_ = key_index; value_index_ = value_index; + env_ = env; status_ = env->NewRandomAccessFile(filename_, &file_); if (!status_.ok()) return status_; @@ -103,15 +101,15 @@ class TextFileLineIterator string line; status_ = input_buffer_->ReadLine(&line); if (!status_.ok()) { - if (errors::IsOutOfRange(status_) && next_id_ != vocab_size_) { + if (errors::IsOutOfRange(status_) && next_id_ != total_size()) { status_ = errors::InvalidArgument("Invalid vocab_size in ", filename_, - ": expected ", vocab_size_, + ": expected ", total_size(), " but got ", next_id_); } valid_ = false; return; } - if (next_id_ >= vocab_size_) { + if (vocab_size_ != -1 && next_id_ >= vocab_size_) { LOG(WARNING) << "Truncated " << filename_ << " before its end at " << vocab_size_ << " records."; LOG(WARNING) << "next_id_ : " << next_id_; @@ -162,7 +160,18 @@ class TextFileLineIterator Status status() const override { return status_; } - int64 total_size() const override { return vocab_size_; } + int64 total_size() const override { + if (vocab_size_ == -1) { + int64 new_size; + Status status = GetNumLinesInTextFile(env_, filename_, &new_size); + if (!status.ok()) { + LOG(WARNING) << "Unable to get line count: " << status; + new_size = -1; + } + *const_cast(&vocab_size_) = new_size; + } + return vocab_size_; + } private: Tensor key_; @@ -170,6 +179,7 @@ class TextFileLineIterator bool valid_; // true if the iterator points to an existing range. int64 key_index_; int64 value_index_; + Env* env_; int64 next_id_; int64 vocab_size_; string filename_; -- GitLab From 3d9f820ff2b4c7e79f9e3239b2a09472e99448e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 03:48:57 -0700 Subject: [PATCH 537/960] Don't flush denormals when calling Eigen::SelfAdjointEigenSolver. PiperOrigin-RevId: 190595222 --- tensorflow/BUILD | 1 + tensorflow/contrib/cmake/python_modules.txt | 1 + tensorflow/core/kernels/BUILD | 4 +-- .../core/kernels/self_adjoint_eig_op.cc | 4 +++ .../kernels/self_adjoint_eig_v2_op_impl.h | 4 +++ tensorflow/python/kernel_tests/BUILD | 1 + .../kernel_tests/self_adjoint_eig_op_test.py | 17 ++++++++++ tensorflow/python/kernel_tests/testdata/BUILD | 24 ++++++++++++++ ...lf_adjoint_eig_fail_if_denorms_flushed.txt | 32 +++++++++++++++++++ tensorflow/tools/pip_package/BUILD | 1 + 10 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 tensorflow/python/kernel_tests/testdata/BUILD create mode 100644 tensorflow/python/kernel_tests/testdata/self_adjoint_eig_fail_if_denorms_flushed.txt diff --git a/tensorflow/BUILD b/tensorflow/BUILD index b073adfee9..6ab43638ba 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -654,6 +654,7 @@ filegroup( "//tensorflow/python/kernel_tests/distributions:all_files", "//tensorflow/python/kernel_tests/linalg:all_files", "//tensorflow/python/kernel_tests/random:all_files", + "//tensorflow/python/kernel_tests/testdata:all_files", "//tensorflow/python/ops/distributions:all_files", "//tensorflow/python/ops/linalg:all_files", "//tensorflow/python/ops/losses:all_files", diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index f7d3c73b2c..112b690511 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -82,6 +82,7 @@ tensorflow/python/kernel_tests tensorflow/python/kernel_tests/distributions tensorflow/python/kernel_tests/linalg tensorflow/python/kernel_tests/random +tensorflow/python/kernel_tests/testdata tensorflow/python/layers tensorflow/python/lib tensorflow/python/lib/core diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 9bb80eb892..b469c01881 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2528,13 +2528,13 @@ tf_kernel_library( tf_kernel_library( name = "self_adjoint_eig_op", prefix = "self_adjoint_eig_op", - deps = LINALG_DEPS, + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], ) tf_kernel_library( name = "self_adjoint_eig_v2_op", prefix = "self_adjoint_eig_v2_op", - deps = LINALG_DEPS + if_cuda([ + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ ":cast_op", ":cwise_op", ]), diff --git a/tensorflow/core/kernels/self_adjoint_eig_op.cc b/tensorflow/core/kernels/self_adjoint_eig_op.cc index bcd8877390..cea5883db7 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_op.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_op.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -55,6 +56,9 @@ class SelfAdjointEigOp : public LinearAlgebraOp { return; } + // This algorithm relies on denormals, so switch them back on locally. + port::ScopedDontFlushDenormal dont_flush_denormals; + Eigen::SelfAdjointEigenSolver< Eigen::Matrix> es(inputs[0]); diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h b/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h index 8c0633f422..271dd2c485 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -61,6 +62,9 @@ class SelfAdjointEigV2Op : public LinearAlgebraOp { return; } + // This algorithm relies on denormals, so switch them back on locally. + port::ScopedDontFlushDenormal dont_flush_denormals; + Eigen::SelfAdjointEigenSolver eig( inputs[0], compute_v_ ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly); diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index dbe1bd437e..228d1c2452 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2724,6 +2724,7 @@ cuda_py_test( "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", ], + data = ["//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files"], shard_count = 20, ) diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py index 4de5f4e4db..d2647088c5 100644 --- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py +++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py @@ -71,6 +71,23 @@ class SelfAdjointEigTest(test.TestCase): self.assertAllEqual(val[4], val[5]) self.assertAllEqual(val[1], val[3]) + def testMatrixThatFailsWhenFlushingDenormsToZero(self): + # Test a 32x32 matrix which is known to fail if denorm floats are flushed to + # zero. + matrix = np.genfromtxt( + test.test_src_dir_path( + "python/kernel_tests/testdata/" + "self_adjoint_eig_fail_if_denorms_flushed.txt")).astype(np.float32) + self.assertEqual(matrix.shape, (32, 32)) + matrix_tensor = constant_op.constant(matrix) + with self.test_session(use_gpu=True) as sess: + (e, v) = sess.run(linalg_ops.self_adjoint_eig(matrix_tensor)) + self.assertEqual(e.size, 32) + self.assertAllClose( + np.matmul(v, v.transpose()), np.eye(32, dtype=np.float32), atol=2e-3) + self.assertAllClose(matrix, + np.matmul(np.matmul(v, np.diag(e)), v.transpose())) + def SortEigenDecomposition(e, v): if v.ndim < 2: diff --git a/tensorflow/python/kernel_tests/testdata/BUILD b/tensorflow/python/kernel_tests/testdata/BUILD new file mode 100644 index 0000000000..a4a0dfc139 --- /dev/null +++ b/tensorflow/python/kernel_tests/testdata/BUILD @@ -0,0 +1,24 @@ +# Data files for kernel tests. + +package( + default_visibility = ["//tensorflow:internal"], +) + +licenses(["notice"]) # Apache 2.0 + +filegroup( + name = "self_adjoint_eig_op_test_files", + srcs = ["self_adjoint_eig_fail_if_denorms_flushed.txt"], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/python/kernel_tests/testdata/self_adjoint_eig_fail_if_denorms_flushed.txt b/tensorflow/python/kernel_tests/testdata/self_adjoint_eig_fail_if_denorms_flushed.txt new file mode 100644 index 0000000000..d56a690a79 --- /dev/null +++ b/tensorflow/python/kernel_tests/testdata/self_adjoint_eig_fail_if_denorms_flushed.txt @@ -0,0 +1,32 @@ +2.60986303e-17 -9.66826148e-21 -1.68610775e-24 -9.16104778e-17 -1.1039539e-18 -1.66460338e-25 -2.12362492e-23 1.90946688e-21 -3.34190535e-22 1.2000634e-18 -7.31782583e-20 2.57851762e-20 -2.55509e-20 -9.54284927e-20 -1.04248315e-17 -5.32450516e-22 -1.81712853e-17 6.0044594e-18 3.96602716e-11 2.89077487e-25 -2.47461475e-25 1.77941757e-24 -7.30388687e-21 -3.84350041e-16 -3.88532388e-21 -4.29928618e-21 4.13551131e-16 -2.63408791e-25 -2.84830375e-21 -1.6450072e-16 -2.8585296e-21 -3.65413296e-21 +-9.66826148e-21 5.03939189e-22 9.17361108e-26 5.17304053e-20 1.99338895e-20 1.25259775e-28 -8.70441942e-26 9.91474109e-25 -5.80960164e-24 -1.19022314e-21 3.90467165e-22 -1.38179098e-22 1.79253406e-22 2.23977705e-22 1.1864143e-19 7.16291934e-24 4.10159639e-20 -2.16798529e-20 -4.95460504e-14 -2.6881406e-27 5.32861213e-27 -4.54567085e-28 1.99794328e-23 1.26854541e-17 -1.92916739e-23 8.60632417e-24 -1.04721097e-18 -7.00607669e-28 6.86771954e-23 8.65173173e-19 1.24469175e-22 6.03883081e-24 +-1.68610775e-24 9.17361108e-26 1.34889529e-26 2.65059e-22 2.39713735e-23 -2.00915344e-30 -1.135692e-27 -6.46049964e-26 -1.03607712e-26 -1.57623654e-23 -1.63805162e-24 -5.95741642e-25 3.24984759e-25 6.49561204e-24 2.28504969e-21 2.8319611e-25 3.96494845e-22 -2.1988623e-22 6.26027228e-16 1.2418479e-30 2.1016041e-30 6.22813846e-30 -1.0708067e-25 6.90778045e-21 1.86361622e-25 7.08789674e-26 -9.23628499e-21 1.65335067e-30 -1.12173032e-26 8.2257321e-22 -4.72686764e-27 -2.58501275e-26 +-9.16104778e-17 5.17304053e-20 2.65059e-22 2.69965968e-14 7.06005733e-17 1.69851446e-22 -2.75994304e-21 -6.61589523e-20 3.8682048e-20 -1.69253147e-17 -2.68580354e-18 -7.74994098e-19 -9.75466696e-19 2.13537585e-18 2.13185342e-16 6.89417478e-21 1.35805044e-16 -3.48309239e-16 1.0448622e-09 -2.17287918e-23 7.41749185e-24 -7.36683057e-23 -1.31083094e-20 1.574e-14 5.72646592e-19 -9.85673749e-21 -1.0654985e-14 2.70679318e-23 4.0943479e-20 -3.42938568e-15 8.57373804e-20 -2.18094505e-20 +-1.1039539e-18 1.99338895e-20 2.39713735e-23 7.06005733e-17 1.83801666e-17 1.09735975e-24 -5.73058223e-24 7.2227645e-22 -8.94843118e-22 -2.30558605e-19 -7.84892038e-20 -1.88692532e-20 -1.02217713e-20 2.95458834e-20 2.42873413e-17 8.89161401e-22 1.21669872e-17 -6.85317731e-18 -7.345906e-12 -3.1158751e-25 1.36359449e-24 -1.57981417e-24 3.89633371e-21 9.94580899e-16 1.45732115e-20 6.92065325e-22 -1.86114433e-16 6.00601346e-26 3.26844e-21 4.38573742e-17 1.06803444e-20 4.60203933e-22 +-1.66460338e-25 1.25259775e-28 -2.00915344e-30 1.69851446e-22 1.09735975e-24 5.75549306e-30 4.74050864e-29 -5.99239043e-28 -1.5784658e-27 -1.74631273e-25 -1.22702975e-25 -1.03371979e-26 -1.96967552e-26 -1.56446725e-26 -3.06462576e-25 -6.33857393e-28 -6.08829397e-24 -7.07478859e-24 -4.82614847e-18 -2.7324345e-31 1.23830207e-31 -7.96172e-31 -1.9034503e-27 -3.82709848e-22 -2.69257733e-26 -3.84934809e-27 -1.48572725e-22 4.14585761e-31 2.5611404e-28 -2.77402858e-24 3.10373361e-28 -5.09669241e-28 +-2.12362492e-23 -8.70441942e-26 -1.135692e-27 -2.75994304e-21 -5.73058223e-24 4.74050864e-29 6.28162e-26 -3.30076462e-25 -3.30065418e-25 -1.1370873e-23 -8.97722764e-24 -1.03190629e-24 -9.52908672e-25 -3.27285413e-24 1.36216664e-22 -8.0549564e-26 -1.94826821e-22 -3.64999226e-22 -2.92500975e-15 -3.00986528e-29 2.39712646e-29 -1.02470704e-28 -4.99034099e-25 -1.32277916e-19 -5.05595e-24 -3.04012473e-25 -1.44724215e-20 5.04614184e-30 -4.12370105e-26 4.20735765e-21 -1.02818953e-25 3.41267575e-26 +1.90946688e-21 9.91474109e-25 -6.46049964e-26 -6.61589523e-20 7.2227645e-22 -5.99239043e-28 -3.30076462e-25 1.8948059e-22 1.83367373e-23 1.06616038e-21 -2.81616502e-22 1.18347412e-22 8.3458038e-23 9.67703245e-24 -1.37445558e-20 2.11412652e-24 2.64820742e-21 8.02510339e-20 4.39926334e-13 9.58727772e-27 2.9838033e-28 1.29183353e-26 1.78626483e-22 3.03531056e-19 9.62612316e-23 1.33722715e-23 2.92905627e-18 -9.42286262e-28 3.23170971e-24 4.10885529e-19 -8.38673724e-25 -8.63732285e-25 +-3.34190535e-22 -5.80960164e-24 -1.03607712e-26 3.8682048e-20 -8.94843118e-22 -1.5784658e-27 -3.30065418e-25 1.83367373e-23 9.30693173e-23 1.48929558e-21 1.83278606e-21 1.08468362e-22 2.61703785e-22 4.42441537e-23 1.23906316e-20 2.55235433e-24 8.36323349e-20 1.2152038e-19 9.83332204e-14 5.14523933e-27 -3.28220159e-28 8.22099066e-27 3.34939233e-23 4.3309476e-19 5.82711129e-22 1.14299394e-22 3.25240717e-18 5.84184241e-28 -1.76991199e-24 5.5568966e-20 -2.80294941e-24 4.59071175e-24 +1.2000634e-18 -1.19022314e-21 -1.57623654e-23 -1.69253147e-17 -2.30558605e-19 -1.74631273e-25 -1.1370873e-23 1.06616038e-21 1.48929558e-21 2.05547703e-18 2.01471341e-20 2.65473229e-20 1.36331708e-20 -2.19777252e-20 -3.09825792e-18 -1.93365673e-22 -2.25608735e-18 7.98997246e-18 1.45582661e-11 6.29004356e-25 -1.14866332e-25 -5.51419319e-26 2.97082139e-21 -2.39052259e-16 1.48920411e-20 1.28589326e-21 4.27717466e-16 -4.44694851e-26 -1.80270052e-22 3.29932795e-18 -5.11645591e-22 5.53091711e-23 +-7.31782583e-20 3.90467165e-22 -1.63805162e-24 -2.68580354e-18 -7.84892038e-20 -1.22702975e-25 -8.97722764e-24 -2.81616502e-22 1.83278606e-21 2.01471341e-20 4.38037939e-19 -4.46678177e-21 3.48516266e-20 7.32592348e-21 1.11928135e-18 8.58541052e-23 8.80645183e-18 4.80109643e-21 -1.7163557e-11 1.92262335e-26 -2.78003951e-26 5.48322572e-25 8.95330117e-23 -1.11570766e-17 3.13666242e-20 4.47195205e-21 -1.09014604e-17 7.69340111e-26 1.64649306e-22 1.71054085e-17 1.33471053e-23 6.40747815e-22 +2.57851762e-20 -1.38179098e-22 -5.95741642e-25 -7.74994098e-19 -1.88692532e-20 -1.03371979e-26 -1.03190629e-24 1.18347412e-22 1.08468362e-22 2.65473229e-20 -4.46678177e-21 5.22731861e-21 1.06412616e-21 -8.0508039e-22 -1.68829721e-19 -2.7699538e-23 -2.15173717e-19 7.46895651e-19 1.71858101e-12 5.41956e-26 -6.15013064e-27 1.54884457e-26 2.54028029e-22 -1.50009535e-18 1.11920465e-21 1.05890428e-22 3.6487132e-17 -2.06798384e-27 -5.5143889e-23 -1.71529414e-18 -7.38099094e-23 -6.5250472e-24 +-2.55509e-20 1.79253406e-22 3.24984759e-25 -9.75466696e-19 -1.02217713e-20 -1.96967552e-26 -9.52908672e-25 8.3458038e-23 2.61703785e-22 1.36331708e-20 3.48516266e-20 1.06412616e-21 4.08927657e-20 -2.76503659e-21 -6.81059804e-20 5.13487959e-23 1.80612902e-18 5.32462054e-19 -3.89327199e-12 3.60012729e-26 -2.5575456e-26 3.14316426e-25 4.56614351e-22 -1.24545392e-17 9.14707146e-21 7.97421952e-22 2.84371096e-17 2.98359736e-26 1.33439467e-23 1.00242743e-17 -4.94476664e-23 3.28816461e-22 +-9.54284927e-20 2.23977705e-22 6.49561204e-24 2.13537585e-18 2.95458834e-20 -1.56446725e-26 -3.27285413e-24 9.67703245e-24 4.42441537e-23 -2.19777252e-20 7.32592348e-21 -8.0508039e-22 -2.76503659e-21 5.02409342e-20 1.57549297e-18 2.63027228e-22 6.11241908e-19 -2.71906856e-19 1.41003203e-12 2.66730019e-26 2.25679315e-26 1.00596535e-25 3.02875382e-22 3.85539387e-17 6.79708607e-22 1.60452617e-22 -2.08440846e-17 -5.40071056e-28 4.56236979e-23 -1.00868521e-17 1.22265047e-22 -1.81997389e-23 +-1.04248315e-17 1.1864143e-19 2.28504969e-21 2.13185342e-16 2.42873413e-17 -3.06462576e-25 1.36216664e-22 -1.37445558e-20 1.23906316e-20 -3.09825792e-18 1.11928135e-18 -1.68829721e-19 -6.81059804e-20 1.57549297e-18 2.5311263e-15 9.97996576e-20 2.26115975e-16 -3.86907114e-17 3.68487445e-12 8.23669787e-24 1.00324064e-23 3.38722042e-24 8.64234911e-21 2.46521189e-15 1.72823337e-19 9.24995431e-20 -3.16903295e-15 5.94130048e-25 1.73965082e-20 1.17371651e-15 2.26718703e-20 4.16709318e-21 +-5.32450516e-22 7.16291934e-24 2.8319611e-25 6.89417478e-21 8.89161401e-22 -6.33857393e-28 -8.0549564e-26 2.11412652e-24 2.55235433e-24 -1.93365673e-22 8.58541052e-23 -2.7699538e-23 5.13487959e-23 2.63027228e-22 9.97996576e-20 2.88326168e-23 1.35358898e-20 5.43364968e-21 4.24011412e-14 1.88486064e-27 8.93106076e-29 4.5748278e-27 2.48573168e-24 5.81165621e-19 1.96505062e-23 5.84813631e-24 -2.46866108e-20 1.912471e-29 2.0243857e-24 -2.88983463e-20 1.35761502e-24 1.40424791e-27 +-1.81712853e-17 4.10159639e-20 3.96494845e-22 1.35805044e-16 1.21669872e-17 -6.08829397e-24 -1.94826821e-22 2.64820742e-21 8.36323349e-20 -2.25608735e-18 8.80645183e-18 -2.15173717e-19 1.80612902e-18 6.11241908e-19 2.26115975e-16 1.35358898e-20 3.66013906e-15 1.35652384e-17 -1.97764849e-09 4.16586597e-24 1.28936031e-24 6.96597122e-23 2.43147439e-21 -1.25627342e-15 1.52711738e-18 2.61025243e-19 -2.00782109e-15 9.75835691e-24 4.0203e-21 1.40790259e-15 -7.8869e-21 8.51983e-20 +6.0044594e-18 -2.16798529e-20 -2.1988623e-22 -3.48309239e-16 -6.85317731e-18 -7.07478859e-24 -3.64999226e-22 8.02510339e-20 1.2152038e-19 7.98997246e-18 4.80109643e-21 7.46895651e-19 5.32462054e-19 -2.71906856e-19 -3.86907114e-17 5.43364968e-21 1.35652384e-17 1.19795414e-15 1.18472676e-09 2.74214961e-23 -7.6305178e-26 1.25969175e-23 1.68466447e-19 1.33873166e-15 1.0739288e-18 1.02533716e-19 2.73480291e-14 -1.87024011e-24 -9.73944425e-21 2.74769918e-16 -1.48632788e-20 1.69142815e-21 +3.96602716e-11 -4.95460504e-14 6.26027228e-16 1.0448622e-09 -7.345906e-12 -4.82614847e-18 -2.92500975e-15 4.39926334e-13 9.83332204e-14 1.45582661e-11 -1.7163557e-11 1.71858101e-12 -3.89327199e-12 1.41003203e-12 3.68487445e-12 4.24011412e-14 -1.97764849e-09 1.18472676e-09 0.0257282555 5.64106473e-17 5.83845666e-18 -1.72409096e-16 1.02886027e-12 1.42563525e-08 -1.57067415e-12 -4.61972799e-13 3.30651737e-08 -5.20615037e-17 -1.71347193e-14 2.87764201e-10 5.03749196e-14 -1.97989316e-13 +2.89077487e-25 -2.6881406e-27 1.2418479e-30 -2.17287918e-23 -3.1158751e-25 -2.7324345e-31 -3.00986528e-29 9.58727772e-27 5.14523933e-27 6.29004356e-25 1.92262335e-26 5.41956e-26 3.60012729e-26 2.66730019e-26 8.23669787e-24 1.88486064e-27 4.16586597e-24 2.74214961e-23 5.64106473e-17 1.2555855e-29 -1.30304595e-31 8.42884087e-31 1.75222077e-26 -2.89058862e-23 3.0225144e-26 6.67962117e-27 8.54181718e-22 -1.2385176e-32 -5.78078369e-28 3.34704626e-23 -2.00599605e-27 2.05674681e-28 +-2.47461475e-25 5.32861213e-27 2.1016041e-30 7.41749185e-24 1.36359449e-24 1.23830207e-31 2.39712646e-29 2.9838033e-28 -3.28220159e-28 -1.14866332e-25 -2.78003951e-26 -6.15013064e-27 -2.5575456e-26 2.25679315e-26 1.00324064e-23 8.93106076e-29 1.28936031e-24 -7.6305178e-26 5.83845666e-18 -1.30304595e-31 2.26490979e-30 -4.25637053e-31 1.40697e-27 5.91197152e-22 -2.08475892e-26 -5.64982671e-28 -3.97199197e-23 -5.06794406e-32 1.11993943e-27 -2.94280711e-23 2.65858181e-27 -2.23093754e-28 +1.77941757e-24 -4.54567085e-28 6.22813846e-30 -7.36683057e-23 -1.57981417e-24 -7.96172e-31 -1.02470704e-28 1.29183353e-26 8.22099066e-27 -5.51419319e-26 5.48322572e-25 1.54884457e-26 3.14316426e-25 1.00596535e-25 3.38722042e-24 4.5748278e-27 6.96597122e-23 1.25969175e-23 -1.72409096e-16 8.42884087e-31 -4.25637053e-31 1.40764294e-28 1.38735442e-26 -1.93810515e-22 1.93660175e-25 1.97417449e-26 1.62145272e-22 2.52533191e-31 -3.42833345e-28 6.34130774e-22 -2.01859e-27 6.1781768e-27 +-7.30388687e-21 1.99794328e-23 -1.0708067e-25 -1.31083094e-20 3.89633371e-21 -1.9034503e-27 -4.99034099e-25 1.78626483e-22 3.34939233e-23 2.97082139e-21 8.95330117e-23 2.54028029e-22 4.56614351e-22 3.02875382e-22 8.64234911e-21 2.48573168e-24 2.43147439e-21 1.68466447e-19 1.02886027e-12 1.75222077e-26 1.40697e-27 1.38735442e-26 1.18400807e-21 1.40670976e-18 2.40320429e-22 3.69528133e-23 4.81603371e-18 -1.49322683e-27 -2.70670724e-25 1.59463723e-19 6.40406749e-24 1.17170599e-23 +-3.84350041e-16 1.26854541e-17 6.90778045e-21 1.574e-14 9.94580899e-16 -3.82709848e-22 -1.32277916e-19 3.03531056e-19 4.3309476e-19 -2.39052259e-16 -1.11570766e-17 -1.50009535e-18 -1.24545392e-17 3.85539387e-17 2.46521189e-15 5.81165621e-19 -1.25627342e-15 1.33873166e-15 1.42563525e-08 -2.89058862e-23 5.91197152e-22 -1.93810515e-22 1.40670976e-18 4.40677789e-12 7.86017934e-19 7.73466606e-19 1.96690791e-15 -1.65941347e-22 2.63659933e-18 -3.0624544e-14 5.87194631e-18 -3.46291098e-19 +-3.88532388e-21 -1.92916739e-23 1.86361622e-25 5.72646592e-19 1.45732115e-20 -2.69257733e-26 -5.05595e-24 9.62612316e-23 5.82711129e-22 1.48920411e-20 3.13666242e-20 1.11920465e-21 9.14707146e-21 6.79708607e-22 1.72823337e-19 1.96505062e-23 1.52711738e-18 1.0739288e-18 -1.57067415e-12 3.0225144e-26 -2.08475892e-26 1.93660175e-25 2.40320429e-22 7.86017934e-19 1.80741048e-20 9.85491491e-22 5.08456938e-17 1.08072265e-26 -1.75036654e-23 4.36436952e-18 -1.77728563e-23 1.01268548e-22 +-4.29928618e-21 8.60632417e-24 7.08789674e-26 -9.85673749e-21 6.92065325e-22 -3.84934809e-27 -3.04012473e-25 1.33722715e-23 1.14299394e-22 1.28589326e-21 4.47195205e-21 1.05890428e-22 7.97421952e-22 1.60452617e-22 9.24995431e-20 5.84813631e-24 2.61025243e-19 1.02533716e-19 -4.61972799e-13 6.67962117e-27 -5.64982671e-28 1.97417449e-26 3.69528133e-23 7.73466606e-19 9.85491491e-22 3.68332283e-22 1.76753773e-18 2.6167718e-27 3.55918682e-25 1.95786374e-19 -2.60077304e-24 1.84790635e-23 +4.13551131e-16 -1.04721097e-18 -9.23628499e-21 -1.0654985e-14 -1.86114433e-16 -1.48572725e-22 -1.44724215e-20 2.92905627e-18 3.25240717e-18 4.27717466e-16 -1.09014604e-17 3.6487132e-17 2.84371096e-17 -2.08440846e-17 -3.16903295e-15 -2.46866108e-20 -2.00782109e-15 2.73480291e-14 3.30651737e-08 8.54181718e-22 -3.97199197e-23 1.62145272e-22 4.81603371e-18 1.96690791e-15 5.08456938e-17 1.76753773e-18 1.57092991e-12 -4.31425852e-23 -3.78241e-19 -1.15899865e-14 -7.61890782e-19 -1.15344546e-19 +-2.63408791e-25 -7.00607669e-28 1.65335067e-30 2.70679318e-23 6.00601346e-26 4.14585761e-31 5.04614184e-30 -9.42286262e-28 5.84184241e-28 -4.44694851e-26 7.69340111e-26 -2.06798384e-27 2.98359736e-26 -5.40071056e-28 5.94130048e-25 1.912471e-29 9.75835691e-24 -1.87024011e-24 -5.20615037e-17 -1.2385176e-32 -5.06794406e-32 2.52533191e-31 -1.49322683e-27 -1.65941347e-22 1.08072265e-26 2.6167718e-27 -4.31425852e-23 1.5576233e-30 -6.14697676e-29 -5.39097603e-24 -8.01112167e-29 1.81063126e-27 +-2.84830375e-21 6.86771954e-23 -1.12173032e-26 4.0943479e-20 3.26844e-21 2.5611404e-28 -4.12370105e-26 3.23170971e-24 -1.76991199e-24 -1.80270052e-22 1.64649306e-22 -5.5143889e-23 1.33439467e-23 4.56236979e-23 1.73965082e-20 2.0243857e-24 4.0203e-21 -9.73944425e-21 -1.71347193e-14 -5.78078369e-28 1.11993943e-27 -3.42833345e-28 -2.70670724e-25 2.63659933e-18 -1.75036654e-23 3.55918682e-25 -3.78241e-19 -6.14697676e-29 2.71732416e-23 2.4136621e-19 2.38938648e-23 1.21468477e-24 +-1.6450072e-16 8.65173173e-19 8.2257321e-22 -3.42938568e-15 4.38573742e-17 -2.77402858e-24 4.20735765e-21 4.10885529e-19 5.5568966e-20 3.29932795e-18 1.71054085e-17 -1.71529414e-18 1.00242743e-17 -1.00868521e-17 1.17371651e-15 -2.88983463e-20 1.40790259e-15 2.74769918e-16 2.87764201e-10 3.34704626e-23 -2.94280711e-23 6.34130774e-22 1.59463723e-19 -3.0624544e-14 4.36436952e-18 1.95786374e-19 -1.15899865e-14 -5.39097603e-24 2.4136621e-19 2.10373291e-13 4.84257897e-20 2.71571227e-19 +-2.8585296e-21 1.24469175e-22 -4.72686764e-27 8.57373804e-20 1.06803444e-20 3.10373361e-28 -1.02818953e-25 -8.38673724e-25 -2.80294941e-24 -5.11645591e-22 1.33471053e-23 -7.38099094e-23 -4.94476664e-23 1.22265047e-22 2.26718703e-20 1.35761502e-24 -7.8869e-21 -1.48632788e-20 5.03749196e-14 -2.00599605e-27 2.65858181e-27 -2.01859e-27 6.40406749e-24 5.87194631e-18 -1.77728563e-23 -2.60077304e-24 -7.61890782e-19 -8.01112167e-29 2.38938648e-23 4.84257897e-20 7.77486414e-23 -7.38542574e-25 +-3.65413296e-21 6.03883081e-24 -2.58501275e-26 -2.18094505e-20 4.60203933e-22 -5.09669241e-28 3.41267575e-26 -8.63732285e-25 4.59071175e-24 5.53091711e-23 6.40747815e-22 -6.5250472e-24 3.28816461e-22 -1.81997389e-23 4.16709318e-21 1.40424791e-27 8.51983e-20 1.69142815e-21 -1.97989316e-13 2.05674681e-28 -2.23093754e-28 6.1781768e-27 1.17170599e-23 -3.46291098e-19 1.01268548e-22 1.84790635e-23 -1.15344546e-19 1.81063126e-27 1.21468477e-24 2.71571227e-19 -7.38542574e-25 3.49516247e-23 diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index e01306f953..16c47f7555 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -190,6 +190,7 @@ sh_binary( "//tensorflow/python:util_example_parser_configuration", "//tensorflow/python/debug:debug_pip", "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files", "//tensorflow/python/saved_model:saved_model", "//tensorflow/python/tools:tools_pip", "//tensorflow/python:test_ops", -- GitLab From cd98c3ac0e4ab094f00dcb2dfc1188c0c5ee08e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 08:01:25 -0700 Subject: [PATCH 538/960] - Added support a different strategy for cov computations in the multi-tower scenario. In this strategy we do the cov computations locally on each tower and then sum the results, as opposed to concatenating everything onto a single device. This other strategy can be enabled by setting the global variable TOWER_STRATEGY to "separate" (default value is "concat", which implements the old strategy). We might change this to use "separate" by default if this turns out to be the best default. - The code and documentation now no longer refer to the towers as computing different "mini-batches", since this was a confusing use of terminology. The best way to think about things is that the combine data over all the towers forms the mini-batch. Note however when factors process multiple towers using the "separate" strategy their batch_size variable will still refer to the amount of data in a single tower. - Fixed a bug in how the "option 1" and "option 2" RNN Fisher approximations were computed in the multi-tower scenario. - The "time-folded-into-batch" feature recently added has now changed in terms of what format it uses. Time is now the first dimension before the reshape, not the second, which is consistent with the convention used in other codebases. PiperOrigin-RevId: 190615398 --- .../python/kernel_tests/fisher_blocks_test.py | 72 ++-- .../kernel_tests/fisher_factors_test.py | 77 +++-- .../kernel_tests/layer_collection_test.py | 8 +- .../contrib/kfac/python/ops/fisher_blocks.py | 269 ++++++++++----- .../contrib/kfac/python/ops/fisher_factors.py | 317 ++++++++++++------ .../kfac/python/ops/layer_collection.py | 46 +-- tensorflow/contrib/kfac/python/ops/utils.py | 12 +- 7 files changed, 525 insertions(+), 276 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py index b70c700f09..6eda6c31e3 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py @@ -63,7 +63,7 @@ class FullFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) self.assertAllEqual(params, block.tensors_to_compute_grads()) @@ -72,7 +72,7 @@ class FullFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) self.assertAllEqual(params, block.tensors_to_compute_grads()) @@ -81,7 +81,7 @@ class FullFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors(grads, 0.5) @@ -91,7 +91,7 @@ class FullFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors((grads,), 0.5) block._factor.instantiate_cov_variables() @@ -112,7 +112,7 @@ class FullFBTest(test.TestCase): random_seed.set_random_seed(200) params = array_ops.constant([[1.], [2.]]) block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = params**2 block.instantiate_factors((grads,), 0.5) block._factor.instantiate_cov_variables() @@ -133,7 +133,7 @@ class FullFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) damping = 0.5 block.instantiate_factors((grads,), damping) @@ -163,7 +163,7 @@ class NaiveDiagonalFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) self.assertAllEqual(params, block.tensors_to_compute_grads()) @@ -172,7 +172,7 @@ class NaiveDiagonalFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) self.assertAllEqual(params, block.tensors_to_compute_grads()) @@ -181,7 +181,7 @@ class NaiveDiagonalFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors(grads, 0.5) @@ -191,7 +191,7 @@ class NaiveDiagonalFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors((grads,), 0.5) block._factor.instantiate_cov_variables() @@ -210,7 +210,7 @@ class NaiveDiagonalFBTest(test.TestCase): random_seed.set_random_seed(200) params = array_ops.constant([[1.], [2.]]) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = params**2 block.instantiate_factors((grads,), 0.5) block._factor.instantiate_cov_variables() @@ -228,7 +228,7 @@ class NaiveDiagonalFBTest(test.TestCase): random_seed.set_random_seed(200) params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_minibatch(32) + block.register_additional_tower(32) grads = (params[0]**2, math_ops.sqrt(params[1])) damping = 0.5 block.instantiate_factors((grads,), damping) @@ -324,8 +324,8 @@ class FullyConnectedDiagonalFBTest(test.TestCase): self.assertAllClose(expected_result, result) - def testRegisterAdditionalMinibatch(self): - """Ensure 1 big minibatch and 2 small minibatches are equivalent.""" + def testRegisterAdditionalTower(self): + """Ensure 1 big tower and 2 small towers are equivalent.""" multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps( self.w, [self.inputs], [self.outputs], [self.output_grads]) multiply_result_small, multiply_inverse_result_small = ( @@ -376,7 +376,7 @@ class FullyConnectedDiagonalFBTest(test.TestCase): block = fb.FullyConnectedDiagonalFB( lc.LayerCollection(), has_bias=isinstance(params, (tuple, list))) for (i, o) in zip(inputs, outputs): - block.register_additional_minibatch(i, o) + block.register_additional_tower(i, o) block.instantiate_factors((output_grads,), damping=0.0) block._factor.instantiate_cov_variables() @@ -402,7 +402,7 @@ class EmbeddingKFACFBTest(test.TestCase): # Add some examples. inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) outputs = array_ops.constant([[0.], [1.], [2.]]) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) # Instantiate factor's variables. Ensure it doesn't fail. grads = outputs**2. @@ -420,7 +420,7 @@ class EmbeddingKFACFBTest(test.TestCase): # Add some examples. inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) outputs = array_ops.constant([[0.], [1.], [2.]]) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) # Instantiate factor's variables. Ensure it doesn't fail. grads = outputs**2. @@ -461,7 +461,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): inputs = array_ops.constant([1., 2.]) outputs = array_ops.constant([3., 4.]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection()) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self.assertAllEqual([outputs], block.tensors_to_compute_grads()) @@ -471,7 +471,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): inputs = array_ops.constant([[1., 2.], [3., 4.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=True) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) @@ -482,7 +482,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): inputs = array_ops.constant([[1., 2.], [3., 4.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) @@ -493,7 +493,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): inputs = array_ops.constant([[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) @@ -525,7 +525,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): inputs = array_ops.constant([[1., 2.], [3., 4.]]) outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) block._input_factor.instantiate_cov_variables() @@ -553,7 +553,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): outputs = array_ops.zeros([32, output_dim]) params = array_ops.zeros([input_dim, output_dim]) block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(((grads,),), damping) @@ -689,8 +689,8 @@ class ConvDiagonalFBTest(test.TestCase): self.assertAllClose(expected_result, result, atol=1e-3) - def testRegisterAdditionalMinibatch(self): - """Ensure 1 big minibatch and 2 small minibatches are equivalent.""" + def testRegisterAdditionalTower(self): + """Ensure 1 big tower and 2 small towers are equivalent.""" multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps( self.w, [self.inputs], [self.outputs], [self.output_grads]) multiply_result_small, multiply_inverse_result_small = ( @@ -751,7 +751,7 @@ class ConvDiagonalFBTest(test.TestCase): block = fb.ConvDiagonalFB( lc.LayerCollection(), params, strides=[1, 1, 1, 1], padding='SAME') for (i, o) in zip(inputs, outputs): - block.register_additional_minibatch(i, o) + block.register_additional_tower(i, o) block.instantiate_factors((output_grads,), damping=0.0) block._factor.instantiate_cov_variables() @@ -775,7 +775,7 @@ class DepthwiseConvKFCBasicFBTest(test.TestCase): layer_collection = lc.LayerCollection() block = fb.DepthwiseConvKFCBasicFB( layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads],), 0.5) @@ -788,7 +788,7 @@ class DepthwiseConvKFCBasicFBTest(test.TestCase): layer_collection = lc.LayerCollection() block = fb.DepthwiseConvKFCBasicFB( layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(([grads],), 0.5) block._input_factor.instantiate_cov_variables() @@ -825,7 +825,7 @@ class ConvKFCBasicFBTest(test.TestCase): outputs = random_ops.random_normal((2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self.assertAllEqual([outputs], block.tensors_to_compute_grads()) @@ -843,7 +843,7 @@ class ConvKFCBasicFBTest(test.TestCase): outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) block._input_factor.instantiate_cov_variables() @@ -874,7 +874,7 @@ class ConvKFCBasicFBTest(test.TestCase): outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self.assertFalse(block._has_bias) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) @@ -902,7 +902,7 @@ class ConvKFCBasicFBTest(test.TestCase): outputs = random_ops.random_normal((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self.assertTrue(block._has_bias) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) @@ -930,7 +930,7 @@ class ConvKFCBasicFBTest(test.TestCase): outputs = array_ops.zeros((2, 2, 2, 2)) block = fb.ConvKFCBasicFB( lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. block.instantiate_factors(((grads,),), damping) @@ -964,7 +964,7 @@ class FullyConnectedSeriesFBTest(test.TestCase): inputs = array_ops.constant([1., 2.]) outputs = array_ops.constant([3., 4.]) block = fb.FullyConnectedSeriesFB(lc.LayerCollection()) - block.register_additional_minibatch([inputs], [outputs]) + block.register_additional_tower([inputs], [outputs]) self.assertAllEqual([[outputs]], block.tensors_to_compute_grads()) def testInstantiateFactorsHasBias(self): @@ -975,7 +975,7 @@ class FullyConnectedSeriesFBTest(test.TestCase): block = fb.FullyConnectedSeriesFB( lc.LayerCollection(), has_bias=True) - block.register_additional_minibatch([inputs], [outputs]) + block.register_additional_tower([inputs], [outputs]) grads = outputs**2 block.instantiate_factors((((grads,),),), 0.5) @@ -987,7 +987,7 @@ class FullyConnectedSeriesFBTest(test.TestCase): block = fb.FullyConnectedSeriesFB( lc.LayerCollection(), has_bias=False) - block.register_additional_minibatch([inputs], [outputs]) + block.register_additional_tower([inputs], [outputs]) grads = outputs**2 block.instantiate_factors((((grads,),),), 0.5) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index e007f70939..2a3592c53f 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -85,6 +85,12 @@ class FisherFactorTestingDummy(ff.FisherFactor): def instantiate_inv_variables(self): return NotImplementedError + def _num_towers(self): + raise NotImplementedError + + def _get_data_device(self): + raise NotImplementedError + class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor): """Dummy class to test the non-abstract methods on ff.InverseProvidingFactor. @@ -116,6 +122,12 @@ class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor): def instantiate_covariance(self): pass + def _num_towers(self): + raise NotImplementedError + + def _get_data_device(self): + raise NotImplementedError + class NumericalUtilsTest(test.TestCase): @@ -430,7 +442,7 @@ class EmbeddingInputKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(): input_ids = array_ops.constant([[0], [1], [4]]) vocab_size = 5 - factor = ff.EmbeddingInputKroneckerFactor(input_ids, vocab_size) + factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) factor.instantiate_cov_variables() cov = factor.get_cov_var() self.assertEqual(cov.shape.as_list(), [vocab_size]) @@ -439,7 +451,7 @@ class EmbeddingInputKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(): input_ids = array_ops.constant([[0], [1], [4]]) vocab_size = 5 - factor = ff.EmbeddingInputKroneckerFactor(input_ids, vocab_size) + factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) factor.instantiate_cov_variables() cov_update_op = factor.make_covariance_update_op(0.0) @@ -477,8 +489,8 @@ class ConvDiagonalFactorTest(test.TestCase): ] factor = ff.ConvDiagonalFactor( - inputs, - outputs_grads, + (inputs,), + (outputs_grads,), self.kernel_shape, self.strides, self.padding, @@ -508,7 +520,8 @@ class ConvDiagonalFactorTest(test.TestCase): self.out_channels) factor = ff.ConvDiagonalFactor( - constant_op.constant(inputs), [constant_op.constant(outputs_grad)], + (constant_op.constant(inputs),), + ((constant_op.constant(outputs_grad),),), self.kernel_shape, strides=[1, 1, 1, 1], padding='VALID') @@ -537,8 +550,8 @@ class ConvDiagonalFactorTest(test.TestCase): ] factor = ff.ConvDiagonalFactor( - inputs, - outputs_grads, + (inputs,), + (outputs_grads,), self.kernel_shape, self.strides, self.padding, @@ -569,7 +582,7 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=has_bias) + factor = ff.FullyConnectedKroneckerFactor(((tensor,),), has_bias=has_bias) factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) @@ -587,7 +600,7 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=True) + factor = ff.FullyConnectedKroneckerFactor(((tensor,),), has_bias=True) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -598,7 +611,7 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedKroneckerFactor((tensor,)) + factor = ff.FullyConnectedKroneckerFactor(((tensor,),)) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -629,8 +642,8 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): out_channels = 4 factor = ff.ConvInputKroneckerFactor( - inputs=random_ops.random_uniform( - (batch_size, width, width, width, in_channels), seed=0), + inputs=(random_ops.random_uniform( + (batch_size, width, width, width, in_channels), seed=0),), filter_shape=(width, width, width, in_channels, out_channels), padding='SAME', strides=(2, 2, 2), @@ -661,8 +674,8 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): out_channels = 4 factor = ff.ConvInputKroneckerFactor( - inputs=random_ops.random_uniform( - (batch_size, width, width, in_channels), seed=0), + inputs=(random_ops.random_uniform( + (batch_size, width, width, in_channels), seed=0),), filter_shape=(1, 1, in_channels, out_channels), padding='SAME', strides=(1, 1, 1, 1), @@ -691,8 +704,8 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): out_channels = 4 factor = ff.ConvInputKroneckerFactor( - inputs=random_ops.random_uniform( - (batch_size, width, width, in_channels), seed=0), + inputs=(random_ops.random_uniform( + (batch_size, width, width, in_channels), seed=0),), filter_shape=(1, 1, in_channels, out_channels), padding='SAME', strides=(1, 2, 1, 1), @@ -716,8 +729,8 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): out_channels = 4 factor = ff.ConvInputKroneckerFactor( - inputs=random_ops.random_uniform( - (batch_size, width, width, in_channels), seed=0), + inputs=(random_ops.random_uniform( + (batch_size, width, width, in_channels), seed=0),), filter_shape=(3, 3, in_channels, out_channels), padding='SAME', extract_patches_fn='extract_image_patches', @@ -739,7 +752,7 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): with tf_ops.Graph().as_default(): tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c') factor = ff.ConvInputKroneckerFactor( - inputs=tensor, + inputs=(tensor,), filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=False) @@ -751,7 +764,7 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): with tf_ops.Graph().as_default(): tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c') factor = ff.ConvInputKroneckerFactor( - tensor, filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) + (tensor,), filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) factor.instantiate_cov_variables() self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], factor.get_cov().get_shape().as_list()) @@ -761,7 +774,7 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): dtype = dtypes.float64_ref tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c', dtype=dtypes.float64) factor = ff.ConvInputKroneckerFactor( - tensor, filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) + (tensor,), filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) @@ -775,7 +788,7 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): np.arange(1, 1 + np.prod(input_shape)).reshape(input_shape).astype( np.float32)) factor = ff.ConvInputKroneckerFactor( - tensor, filter_shape=(1, 1, 1, 1), padding='SAME', has_bias=True) + (tensor,), filter_shape=(1, 1, 1, 1), padding='SAME', has_bias=True) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -794,7 +807,7 @@ class ConvInputKroneckerFactorTest(ConvFactorTestCase): np.arange(1, 1 + np.prod(input_shape)).reshape(input_shape).astype( np.float32)) factor = ff.ConvInputKroneckerFactor( - tensor, filter_shape=(1, 1, 1, 1), padding='SAME') + (tensor,), filter_shape=(1, 1, 1, 1), padding='SAME') factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -810,10 +823,10 @@ class ConvOutputKroneckerFactorTest(ConvFactorTestCase): width = 3 out_channels = width**3 - factor = ff.ConvOutputKroneckerFactor(outputs_grads=[ + factor = ff.ConvOutputKroneckerFactor(outputs_grads=([ random_ops.random_uniform( (batch_size, width, width, width, out_channels), seed=0) - ]) + ],)) factor.instantiate_cov_variables() with self.test_session() as sess: @@ -829,7 +842,7 @@ class ConvOutputKroneckerFactorTest(ConvFactorTestCase): with tf_ops.Graph().as_default(): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3, 4, 5), name='a/b/c') - factor = ff.ConvOutputKroneckerFactor((tensor,)) + factor = ff.ConvOutputKroneckerFactor(((tensor,),)) factor.instantiate_cov_variables() self.assertEqual([5, 5], factor.get_cov().get_shape().as_list()) @@ -838,7 +851,7 @@ class ConvOutputKroneckerFactorTest(ConvFactorTestCase): dtype = dtypes.float64_ref random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3, 4, 5), dtype=dtype, name='a/b/c') - factor = ff.ConvOutputKroneckerFactor((tensor,)) + factor = ff.ConvOutputKroneckerFactor(((tensor,),)) factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) @@ -848,7 +861,7 @@ class ConvOutputKroneckerFactorTest(ConvFactorTestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = np.arange(1, 17).reshape(2, 2, 2, 2).astype(np.float32) - factor = ff.ConvOutputKroneckerFactor((array_ops.constant(tensor),)) + factor = ff.ConvOutputKroneckerFactor(((array_ops.constant(tensor),),)) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -862,7 +875,7 @@ class FullyConnectedMultiKFTest(test.TestCase): with tf_ops.Graph().as_default(): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') - factor = ff.FullyConnectedMultiKF((tensor,), has_bias=False) + factor = ff.FullyConnectedMultiKF(((tensor,),), has_bias=False) factor.instantiate_cov_variables() self.assertEqual([3, 3], factor.get_cov().get_shape().as_list()) @@ -871,7 +884,7 @@ class FullyConnectedMultiKFTest(test.TestCase): dtype = dtypes.float64_ref random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - factor = ff.FullyConnectedMultiKF((tensor,), has_bias=False) + factor = ff.FullyConnectedMultiKF(((tensor,),), has_bias=False) factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) @@ -881,7 +894,7 @@ class FullyConnectedMultiKFTest(test.TestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedMultiKF((tensor,), has_bias=True) + factor = ff.FullyConnectedMultiKF(((tensor,),), has_bias=True) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) @@ -892,7 +905,7 @@ class FullyConnectedMultiKFTest(test.TestCase): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedMultiKF((tensor,)) + factor = ff.FullyConnectedMultiKF(((tensor,),)) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py index ba22099340..cb80fca370 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py @@ -35,7 +35,7 @@ from tensorflow.python.platform import test class MockFisherBlock(object): """A fake FisherBlock.""" - num_registered_minibatches = 2 + num_registered_towers = 2 def __init__(self, name='MockFisherBlock'): self.name = name @@ -468,13 +468,13 @@ class LayerCollectionTest(test.TestCase): b = variable_scope.get_variable('b', [3]) lc = layer_collection.LayerCollection() lc.register_fully_connected(w, inputs, outputs) - self.assertEqual(lc.fisher_blocks[w].num_registered_minibatches, 1) + self.assertEqual(lc.fisher_blocks[w].num_registered_towers, 1) with self.assertRaises(KeyError): lc.register_fully_connected((w, b), inputs, outputs, reuse=True) self.assertNotIn((w, b), lc.fisher_blocks) - self.assertEqual(lc.fisher_blocks[w].num_registered_minibatches, 1) + self.assertEqual(lc.fisher_blocks[w].num_registered_towers, 1) lc.register_fully_connected(w, inputs, outputs, reuse=True) - self.assertEqual(lc.fisher_blocks[w].num_registered_minibatches, 2) + self.assertEqual(lc.fisher_blocks[w].num_registered_towers, 2) def testMakeOrGetFactor(self): with ops.Graph().as_default(): diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index f517e3148f..b04bf76a88 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -75,37 +75,6 @@ def set_global_constants(normalize_damping_power=None, pi_type=None): PI_TYPE = pi_type -def _make_partitionedtensors_inputs(inputs): - """Constructs PartitionedTensor for inputs. - - The purpose of this method is to package up the towers/minibatch dimension - of these arrays into PartitionedTensor objects. - - Args: - inputs: a 1-D list of Tensors. Index is tower/mini-batch. - - Returns: - A PartitionedTensor. - """ - return utils.PartitionedTensor(inputs) - - -def _make_partitionedtensors_grads(grads_list): - """Constructs PartitionedTensor for grads_list. - - The purpose of this method is to package up the towers/minibatch dimension - of these arrays into PartitionedTensor objects. - - Args: - grads_list: 2-D list of Tensors. First index is for source, second - index for tower. - - Returns: - Tuple of PartitionedTensors, one per source. - """ - return tuple(utils.PartitionedTensor(grads) for grads in grads_list) - - def normalize_damping(damping, num_replications): """Normalize damping after adjusting scale by NORMALIZE_DAMPING_POWER.""" if NORMALIZE_DAMPING_POWER: @@ -191,7 +160,7 @@ class FisherBlock(object): """Abstract base class for objects modeling approximate Fisher matrix blocks. Subclasses must implement register_matpower, multiply_matpower, - instantiate_factors, tensors_to_compute_grads, and num_registered_minibatches + instantiate_factors, tensors_to_compute_grads, and num_registered_towers methods. """ @@ -266,8 +235,8 @@ class FisherBlock(object): pass @abc.abstractproperty - def num_registered_minibatches(self): - """Number of minibatches registered for this FisherBlock. + def num_registered_towers(self): + """Number of towers registered for this FisherBlock. Typically equal to the number of towers in a multi-tower setup. """ @@ -319,8 +288,8 @@ class FullFB(FisherBlock): def tensors_to_compute_grads(self): return self._params - def register_additional_minibatch(self, batch_size): - """Register an additional minibatch. + def register_additional_tower(self, batch_size): + """Register an additional tower. Args: batch_size: The batch size, used in the covariance estimator. @@ -328,7 +297,7 @@ class FullFB(FisherBlock): self._batch_sizes.append(batch_size) @property - def num_registered_minibatches(self): + def num_registered_towers(self): return len(self._batch_sizes) @property @@ -381,8 +350,8 @@ class NaiveDiagonalFB(FisherBlock): def tensors_to_compute_grads(self): return self._params - def register_additional_minibatch(self, batch_size): - """Register an additional minibatch. + def register_additional_tower(self, batch_size): + """Register an additional tower. Args: batch_size: The batch size, used in the covariance estimator. @@ -390,7 +359,7 @@ class NaiveDiagonalFB(FisherBlock): self._batch_sizes.append(batch_size) @property - def num_registered_minibatches(self): + def num_registered_towers(self): return len(self._batch_sizes) @property @@ -398,24 +367,78 @@ class NaiveDiagonalFB(FisherBlock): return math_ops.reduce_sum(self._batch_sizes) -class InputOutputMultiMinibatch(object): +class InputOutputMultiTower(object): """Mix-in class for blocks with inputs & outputs and multiple mini-batches.""" def __init__(self, *args, **kwargs): self.__inputs = [] self.__outputs = [] - super(InputOutputMultiMinibatch, self).__init__(*args, **kwargs) + super(InputOutputMultiTower, self).__init__(*args, **kwargs) + + def _process_data(self, grads_list): + """Process data into the format used by the factors. + + This function takes inputs and grads_lists data and processes it into + one of the formats expected by the FisherFactor classes (depending on + the value of the global configuration variable TOWER_STRATEGY). + + The initial format of self._inputs is expected to be a list of Tensors + over towers. Similarly grads_lists is expected to be a list over sources + of such lists. + + If TOWER_STRATEGY is "concat", 'inputs' becomes a tuple containing a single + tensor (represented as a PartitionedTensor object) equal to the + concatenation (across towers) of all of the elements of self._inputs. And + similarly grads_list is formatted into a tuple (over sources) of such + tensors (also represented as PartitionedTensors). + + If TOWER_STRATEGY is "separate", formatting of inputs and grads_list + remains unchanged from the initial format (although possibly converting + from lists into tuples). + + Args: + grads_list: grads_list in its initial format (see above). + + Returns: + inputs: self._inputs transformed into the appropriate format (see + above). + grads_list: grads_list transformed into the appropriate format (see + above). + + Raises: + ValueError: if TOWER_STRATEGY is not one of "separate" or "concat". + """ + inputs = self._inputs + # inputs is a list over towers of Tensors + # grads_list is a list of list with the first index being sources and the + # second being towers. + if fisher_factors.TOWER_STRATEGY == "concat": + # Merge towers together into a PartitionedTensor. We package it in + # a singleton tuple since the factors will expect a list over towers + inputs = (utils.PartitionedTensor(inputs),) + # Do the same for grads_list but preserve leading sources dimension + grads_list = tuple((utils.PartitionedTensor(grads),) + for grads in grads_list) + elif fisher_factors.TOWER_STRATEGY == "separate": + inputs = tuple(inputs) + grads_list = tuple(grads_list) + + else: + raise ValueError("Global config variable TOWER_STRATEGY must be one of " + "'concat' or 'separate'.") + + return inputs, grads_list def tensors_to_compute_grads(self): """Tensors to compute derivative of loss with respect to.""" - return self._outputs + return tuple(self._outputs) - def register_additional_minibatch(self, inputs, outputs): + def register_additional_tower(self, inputs, outputs): self._inputs.append(inputs) self._outputs.append(outputs) @property - def num_registered_minibatches(self): + def num_registered_towers(self): result = len(self._inputs) assert result == len(self._outputs) return result @@ -429,7 +452,7 @@ class InputOutputMultiMinibatch(object): return self.__outputs -class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): +class FullyConnectedDiagonalFB(InputOutputMultiTower, FisherBlock): """FisherBlock for fully-connected (dense) layers using a diagonal approx. Estimates the Fisher Information matrix's diagonal entries for a fully @@ -466,8 +489,7 @@ class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): super(FullyConnectedDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - inputs = _make_partitionedtensors_inputs(self._inputs) - grads_list = _make_partitionedtensors_grads(grads_list) + inputs, grads_list = self._process_data(grads_list) self._factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedDiagonalFactor, @@ -500,7 +522,7 @@ class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): return utils.mat2d_to_layer_params(vector, reshaped_out) -class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): +class ConvDiagonalFB(InputOutputMultiTower, FisherBlock): """FisherBlock for 2-D convolutional layers using a diagonal approx. Estimates the Fisher Information matrix's diagonal entries for a convolutional @@ -580,11 +602,10 @@ class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): super(ConvDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - inputs = _make_partitionedtensors_inputs(self._inputs) - grads_list = _make_partitionedtensors_grads(grads_list) + inputs, grads_list = self._process_data(grads_list) # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(inputs.shape.as_list(), + self._num_locations = num_conv_locations(inputs[0].shape.as_list(), self._strides) self._factor = self._layer_collection.make_or_get_factor( @@ -691,7 +712,7 @@ class KroneckerProductFB(FisherBlock): right_factor) -class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): +class EmbeddingKFACFB(InputOutputMultiTower, KroneckerProductFB): """K-FAC FisherBlock for embedding layers. This FisherBlock is similar to FullyConnectedKFACBasicFB, except that its @@ -723,8 +744,7 @@ class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): damping: 0-D Tensor or float. 'damping' * identity is approximately added to this FisherBlock's Fisher approximation. """ - inputs = _make_partitionedtensors_inputs(self._inputs) - grads_list = _make_partitionedtensors_grads(grads_list) + inputs, grads_list = self._process_data(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.EmbeddingInputKroneckerFactor, @@ -734,7 +754,7 @@ class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): self._setup_damping(damping) -class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): +class FullyConnectedKFACBasicFB(InputOutputMultiTower, KroneckerProductFB): """K-FAC FisherBlock for fully-connected (dense) layers. This uses the Kronecker-factorized approximation from the original @@ -764,8 +784,7 @@ class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): damping: 0-D Tensor or float. 'damping' * identity is approximately added to this FisherBlock's Fisher approximation. """ - inputs = _make_partitionedtensors_inputs(self._inputs) - grads_list = _make_partitionedtensors_grads(grads_list) + inputs, grads_list = self._process_data(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedKroneckerFactor, @@ -776,7 +795,7 @@ class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): self._setup_damping(damping) -class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): +class ConvKFCBasicFB(InputOutputMultiTower, KroneckerProductFB): """FisherBlock for convolutional layers using the basic KFC approx. Estimates the Fisher Information matrix's blog for a convolutional @@ -846,8 +865,7 @@ class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): self._num_locations = num_conv_locations(self._inputs[0].shape.as_list(), self._strides) - inputs = _make_partitionedtensors_inputs(self._inputs) - grads_list = _make_partitionedtensors_grads(grads_list) + inputs, grads_list = self._process_data(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvInputKroneckerFactor, @@ -1122,22 +1140,67 @@ def num_conv_locations(input_shape, strides): return spatial_input_locations // spatial_strides_divisor -class InputOutputMultiMinibatchMultiUse(InputOutputMultiMinibatch): - """Adds methods for multi-use/time-step case to InputOutputMultiMinibatch.""" +class InputOutputMultiTowerMultiUse(InputOutputMultiTower): + """Adds methods for multi-use/time-step case to InputOutputMultiTower.""" def __init__(self, num_uses=None, *args, **kwargs): self._num_uses = num_uses - super(InputOutputMultiMinibatchMultiUse, self).__init__(*args, **kwargs) + super(InputOutputMultiTowerMultiUse, self).__init__(*args, **kwargs) def _process_data(self, grads_list): - """Process temporal/multi-use data into a standard format.""" + """Process temporal/multi-use data into the format used by the factors. + + This function takes inputs and grads_lists data and processes it into + one of the formats expected by the FisherFactor classes (depending on + the value of the global configuration variable TOWER_STRATEGY). + + It accepts the data in one of two initial formats. The first possible + format is where self._inputs is a list of list of Tensors. The first index + is tower, the second is use/time-step. grads_list, meanwhile, is a list + over sources of such lists of lists. + + The second possible data format is where self._inputs is a Tensor with + uses/times-steps folded into the batch dimension. i.e. it is a Tensor + of shape [num_uses * size_batch, ...] which represents a reshape of a + Tensor of shape [num_uses, size_batch, ...]. And similarly grads_list is + a list over sources of such Tensors. + + There are two possible formats which inputs and grads_list are transformed + into. + + If TOWER_STRATEGY is "concat", 'inputs' becomes a tuple containing + a single tensor (represented as a PartitionedTensor object) with all of + the data from the towers, as well as the uses/time-steps, concatenated + together. In this tensor the leading dimension is the batch and + use/time-step dimensions folded together (with 'use' being the major of + these two, so that the tensors can be thought of as reshapes of ones of + shape [num_uses, batch_size, ...]). grads_list is similarly formatted as a + tuple over sources of such tensors. + + If TOWER_STRATEGY is "separate" the inputs are formatted into lists of + tensors over towers. Each of these tensors has a similar format to + the tensor produced by the "concat" option, except that each contains + only the data from a single tower. grads_list is similarly formatted + into a tuple over sources of such tuples. + + Args: + grads_list: grads_list in its initial format (see above). + + Returns: + inputs: self._inputs transformed into the appropriate format (see + above). + grads_list: grads_list transformed into the appropriate format (see + above). + + Raises: + ValueError: If TOWER_STRATEGY is not one of "separate" or "concat". + ValueError: If the given/initial format of self._inputs and grads_list + isn't recognized, or doesn't agree with self._num_uses. + """ inputs = self._inputs - # The first possible data format is where inputs is a list of tensors, - # one for each use/time-step. if isinstance(inputs[0], (list, tuple)): - # The first index is tower/minibatch, the second is use/time-step num_uses = len(inputs[0]) if self._num_uses is not None and self._num_uses != num_uses: raise ValueError("num_uses argument doesn't match length of inputs.") @@ -1147,15 +1210,29 @@ class InputOutputMultiMinibatchMultiUse(InputOutputMultiMinibatch): # Check that all mini-batches/towers have the same number of uses if not all(len(input_) == num_uses for input_ in inputs): raise ValueError("Length of inputs argument is inconsistent across " - "mini-batches/towers.") - # Fold uses/time-step and towers/minibatches dimensions together - inputs = nest.flatten(inputs) + "towers.") - inputs = _make_partitionedtensors_inputs(inputs) - # If inputs is not a tuple then we assume that inputs is a tensor - # with 'uses' folded into the batch dimension. (And grads_list is a list - # across sources of such Tensors.) This is the native format that the - # factor will take as arguments. + if fisher_factors.TOWER_STRATEGY == "concat": + # Reverse the tower and use/time-step indices, so that use is now first, + # and towers is second + inputs = tuple(zip(*inputs)) + + # Flatten the two dimensions + inputs = nest.flatten(inputs) + + # Merge everything together into a PartitionedTensor. We package it in + # a singleton tuple since the factors will expect a list over towers + inputs = (utils.PartitionedTensor(inputs),) + + elif fisher_factors.TOWER_STRATEGY == "separate": + # Merge together the uses/time-step dimension into PartitionedTensors, + # but keep the leading dimension (towers) intact for the factors to + # process individually. + inputs = tuple(utils.PartitionedTensor(input_) for input_ in inputs) + + else: + raise ValueError("Global config variable TOWER_STRATEGY must be one of " + "'concat' or 'separate'.") # Now we perform the analogous processing for grads_list if isinstance(grads_list[0][0], (list, tuple)): @@ -1170,10 +1247,34 @@ class InputOutputMultiMinibatchMultiUse(InputOutputMultiMinibatch): if not all(len(grad) == num_uses for grads in grads_list for grad in grads): raise ValueError("Length of outputs argument is inconsistent across " - "mini-batches/towers.") + "towers.") + + if fisher_factors.TOWER_STRATEGY == "concat": + # Reverse the tower and use/time-step indices, so that use is now first, + # and towers is second + grads_list = tuple(tuple(zip(*grads)) for grads in grads_list) + + # Flatten the two dimensions, leaving the leading dimension (source) + # intact + grads_list = tuple(nest.flatten(grads) for grads in grads_list) + + # Merge inner dimensions together into PartitionedTensors. We package + # them in a singleton tuple since the factors will expect a list over + # towers + grads_list = tuple((utils.PartitionedTensor(grads),) + for grads in grads_list) + + elif fisher_factors.TOWER_STRATEGY == "separate": + # Merge together the uses/time-step dimension into PartitionedTensors, + # but keep the leading dimension (towers) intact for the factors to + # process individually. + grads_list = tuple(tuple(utils.PartitionedTensor(grad) + for grad in grads) + for grads in grads_list) - grads_list = tuple(nest.flatten(grads) for grads in grads_list) - grads_list = _make_partitionedtensors_grads(grads_list) + else: + raise ValueError("Global config variable TOWER_STRATEGY must be one of " + "'concat' or 'separate'.") if self._num_uses is None: raise ValueError("You must supply a value for the num_uses argument if " @@ -1184,7 +1285,7 @@ class InputOutputMultiMinibatchMultiUse(InputOutputMultiMinibatch): return inputs, grads_list -class FullyConnectedMultiIndepFB(InputOutputMultiMinibatchMultiUse, +class FullyConnectedMultiIndepFB(InputOutputMultiTowerMultiUse, KroneckerProductFB): """FisherBlock for fully-connected layers that share parameters. @@ -1228,7 +1329,7 @@ class FullyConnectedMultiIndepFB(InputOutputMultiMinibatchMultiUse, return float(self._num_uses) -class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatchMultiUse, +class ConvKFCBasicMultiIndepFB(InputOutputMultiTowerMultiUse, KroneckerProductFB): """FisherBlock for 2D convolutional layers using the basic KFC approx. @@ -1309,7 +1410,7 @@ class ConvKFCBasicMultiIndepFB(InputOutputMultiMinibatchMultiUse, return self._num_locations * self._num_uses -class EmbeddingKFACMultiIndepFB(InputOutputMultiMinibatchMultiUse, +class EmbeddingKFACMultiIndepFB(InputOutputMultiTowerMultiUse, KroneckerProductFB): """K-FAC FisherBlock for embedding layers used multiple times in the graph. @@ -1320,7 +1421,7 @@ class EmbeddingKFACMultiIndepFB(InputOutputMultiMinibatchMultiUse, Does not support bias parameters. """ - def __init__(self, layer_collection, vocab_size, num_uses): + def __init__(self, layer_collection, vocab_size, num_uses=None): """Creates a EmbeddingKFACMultiIndepFB block. Args: @@ -1368,7 +1469,7 @@ class SeriesFBApproximation(enum.IntEnum): option2 = 2 -class FullyConnectedSeriesFB(InputOutputMultiMinibatchMultiUse, +class FullyConnectedSeriesFB(InputOutputMultiTowerMultiUse, KroneckerProductFB): """FisherBlock for fully-connected layers that share parameters across time. diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index f521363536..353e1c6abb 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import abc +import contextlib import numpy as np import six @@ -35,6 +36,8 @@ from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import moving_averages +from tensorflow.python.util import nest + # Whether to initialize covariance estimators at a zero matrix (or the identity # matrix). @@ -52,16 +55,25 @@ EIGENVALUE_DECOMPOSITION_THRESHOLD = 2 # matrix powers. Must be nonnegative. EIGENVALUE_CLIPPING_THRESHOLD = 0.0 +# TOWER_STRATEGY can be one of "concat" or "separate". If "concat", the data +# passed to the factors from the blocks will be concatenated across towers +# (lazilly via PartitionedTensor objects). Otherwise a tuple of tensors over +# towers will be passed in, and the factors will iterate over this and do the +# cov computations separately for each one, averaging the results together. +TOWER_STRATEGY = "concat" + def set_global_constants(init_covariances_at_zero=None, zero_debias=None, eigenvalue_decomposition_threshold=None, - eigenvalue_clipping_threshold=None): + eigenvalue_clipping_threshold=None, + tower_strategy=None): """Sets various global constants used by the classes in this module.""" global INIT_COVARIANCES_AT_ZERO global ZERO_DEBIAS global EIGENVALUE_DECOMPOSITION_THRESHOLD global EIGENVALUE_CLIPPING_THRESHOLD + global TOWER_STRATEGY if init_covariances_at_zero is not None: INIT_COVARIANCES_AT_ZERO = init_covariances_at_zero @@ -71,6 +83,8 @@ def set_global_constants(init_covariances_at_zero=None, EIGENVALUE_DECOMPOSITION_THRESHOLD = eigenvalue_decomposition_threshold if eigenvalue_clipping_threshold is not None: EIGENVALUE_CLIPPING_THRESHOLD = eigenvalue_clipping_threshold + if tower_strategy is not None: + TOWER_STRATEGY = tower_strategy def inverse_initializer(shape, dtype, partition_info=None): # pylint: disable=unused-argument @@ -89,6 +103,15 @@ def diagonal_covariance_initializer(shape, dtype, partition_info): # pylint: di return array_ops.ones(shape, dtype) +@contextlib.contextmanager +def place_on_device(device): + if device is not None and len(device): + with tf_ops.device(device): + yield + else: + yield + + def compute_cov(tensor, tensor_right=None, normalizer=None): """Compute the empirical second moment of the rows of a 2D Tensor. @@ -255,6 +278,10 @@ class FisherFactor(object): """ pass + @abc.abstractproperty + def _num_towers(self): + pass + @abc.abstractproperty def _dtype(self): """dtype for variable backing this factor.""" @@ -277,12 +304,14 @@ class FisherFactor(object): dtype=self._dtype) @abc.abstractmethod - def _compute_new_cov(self, idx=0): + def _compute_new_cov(self, source, tower): """Computes minibatch-estimated covariance for a single source. Args: - idx: int in [0, self._num_sources). Which source to use when estimating - covariance. + source: int in [0, self._num_sources). Which source to use when computing + the cov update. + tower: int in [0, self._num_towers). Which tower to use when computing + the cov update. Returns: Tensor of same shape as self.get_cov_var(). @@ -297,15 +326,29 @@ class FisherFactor(object): Returns: An Op for updating the covariance Variable referenced by _cov. """ - new_cov_contribs = tuple(self._compute_new_cov(idx) - for idx in range(self._num_sources)) - new_cov = math_ops.add_n(new_cov_contribs) + new_cov_contribs = [] + for source in range(self._num_sources): + for tower in range(self._num_towers): + device = (self._get_data_device(tower) + if TOWER_STRATEGY == "separate" else None) + with place_on_device(device): + new_cov_contribs.append(self._compute_new_cov(source, tower)) + + new_cov = math_ops.add_n(new_cov_contribs) / float(self._num_towers) + + # I have no idea if the TPU code below is still correct since I don't know + # what it actually does. Also, this code is not present in some of the + # other versions of make_covariance_update_op. Does it matter? # Synchronize value across all TPU cores. if utils.on_tpu(): new_cov = utils.cross_replica_mean(new_cov) return moving_averages.assign_moving_average( self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS) + @abc.abstractmethod + def _get_data_device(self, tower): + pass + @abc.abstractmethod def instantiate_inv_variables(self): """Makes the internal "inverse" variable(s).""" @@ -596,17 +639,26 @@ class FullFactor(InverseProvidingFactor): def _num_sources(self): return len(self._params_grads) + @property + def _num_towers(self): + return 1 + @property def _dtype(self): return self._params_grads[0][0].dtype - def _compute_new_cov(self, idx=0): + def _compute_new_cov(self, source, tower): + assert tower == 0 + # This will be a very basic rank 1 estimate - params_grads_flat = utils.tensors_to_column(self._params_grads[idx]) + params_grads_flat = utils.tensors_to_column(self._params_grads[source]) return ((params_grads_flat * array_ops.transpose( params_grads_flat)) / math_ops.cast(self._batch_size, params_grads_flat.dtype)) + def _get_data_device(self, tower): + return None + class DiagonalFactor(FisherFactor): """A base class for FisherFactors that use diagonal approximations. @@ -691,15 +743,24 @@ class NaiveDiagonalFactor(DiagonalFactor): def _num_sources(self): return len(self._params_grads) + @property + def _num_towers(self): + return 1 + @property def _dtype(self): return self._params_grads[0][0].dtype - def _compute_new_cov(self, idx=0): - params_grads_flat = utils.tensors_to_column(self._params_grads[idx]) + def _compute_new_cov(self, source, tower): + assert tower == 0 + + params_grads_flat = utils.tensors_to_column(self._params_grads[source]) return (math_ops.square(params_grads_flat) / math_ops.cast( self._batch_size, params_grads_flat.dtype)) + def _get_data_device(self, tower): + return None + class EmbeddingInputKroneckerFactor(DiagonalFactor): r"""FisherFactor for input to an embedding layer. @@ -719,8 +780,8 @@ class EmbeddingInputKroneckerFactor(DiagonalFactor): """Instantiate EmbeddingInputKroneckerFactor. Args: - input_ids: Tensor of shape [batch_size, input_size] and dtype int32. - Indices into embedding matrix. + input_ids: List of Tensors of shape [batch_size, input_size] and dtype + int32. Indices into embedding matrix. List index is tower. vocab_size: int or 0-D Tensor. Maximum value for entries in 'input_ids'. dtype: dtype for covariance statistics. Must be a floating point type. Defaults to float32. @@ -743,15 +804,18 @@ class EmbeddingInputKroneckerFactor(DiagonalFactor): def _num_sources(self): return 1 + @property + def _num_towers(self): + return len(self._input_ids) + @property def _dtype(self): return self._cov_dtype - def _compute_new_cov(self, idx=0): - if idx != 0: - raise ValueError("EmbeddingInputKroneckerFactor only supports idx = 0") + def _compute_new_cov(self, source, tower): + assert source == 0 - input_ids = self._input_ids + input_ids = self._input_ids[tower] if len(input_ids.shape) > 2: raise ValueError( @@ -781,6 +845,9 @@ class EmbeddingInputKroneckerFactor(DiagonalFactor): return new_cov + def _get_data_device(self, tower): + return self._input_ids[tower].device + class FullyConnectedDiagonalFactor(DiagonalFactor): r"""FisherFactor for a diagonal approx of a fully-connected layer's Fisher. @@ -800,10 +867,11 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): """Instantiate FullyConnectedDiagonalFactor. Args: - inputs: Tensor of shape [batch_size, input_size]. Inputs to this layer. + inputs: List of Tensors of shape [batch_size, input_size]. Inputs to this + layer. List index is towers. outputs_grads: List of Tensors, each of shape [batch_size, output_size], which are the gradients of the loss with respect to the layer's - outputs. One Tensor for each "source". + outputs. First index is source, second is tower. has_bias: bool. If True, append '1' to each input. """ @@ -817,47 +885,58 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): @property def _var_scope(self): return "ff_diagfc_" + scope_string_from_params( - (self._inputs,) + tuple(self._outputs_grads)) + tuple(self._inputs) + tuple(nest.flatten(self._outputs_grads))) @property def _cov_shape(self): - input_size = self._inputs.shape[1] + self._has_bias - output_size = self._outputs_grads[0].shape[1] + input_size = self._inputs[0].shape[1] + self._has_bias + output_size = self._outputs_grads[0][0].shape[1] return [input_size, output_size] @property def _num_sources(self): return len(self._outputs_grads) + @property + def _num_towers(self): + return len(self._inputs) + @property def _dtype(self): - return self._outputs_grads[0].dtype + return self._outputs_grads[0][0].dtype def make_covariance_update_op(self, ema_decay): - inputs = self._inputs - if self._has_bias: - inputs = append_homog(inputs) - self._squared_inputs = math_ops.square(inputs) + self._squared_inputs = [] + for tower in range(self._num_towers): + inputs = self._inputs[tower] + + with place_on_device(self._get_data_device(tower)): + if self._has_bias: + inputs = append_homog(inputs) + self._squared_inputs.append(math_ops.square(inputs)) return super(FullyConnectedDiagonalFactor, self).make_covariance_update_op( ema_decay) - def _compute_new_cov(self, idx=0): - batch_size = array_ops.shape(self._squared_inputs)[0] - outputs_grad = self._outputs_grads[idx] + def _compute_new_cov(self, source, tower): + batch_size = array_ops.shape(self._squared_inputs[tower])[0] + outputs_grad = self._outputs_grads[source][tower] # The well-known special formula that uses the fact that the entry-wise # square of an outer product is the outer-product of the entry-wise squares. # The gradient is the outer product of the input and the output gradients, # so we just square both and then take their outer-product. new_cov = math_ops.matmul( - self._squared_inputs, + self._squared_inputs[tower], math_ops.square(outputs_grad), transpose_a=True) new_cov /= math_ops.cast(batch_size, new_cov.dtype) return new_cov + def _get_data_device(self, tower): + return self._inputs[tower].device + class ConvDiagonalFactor(DiagonalFactor): """FisherFactor for a diagonal approx of a convolutional layer's Fisher.""" @@ -874,11 +953,12 @@ class ConvDiagonalFactor(DiagonalFactor): """Creates a ConvDiagonalFactor object. Args: - inputs: Tensor of shape [batch_size, height, width, in_channels]. - Input activations to this layer. + inputs: List of Tensors of shape [batch_size, height, width, in_channels]. + Input activations to this layer. List index is towers. outputs_grads: List of Tensors, each of shape [batch_size, height, width, out_channels], which are the gradients of the loss - with respect to the layer's outputs. One Tensor for each "source". + with respect to the layer's outputs. First index is source, second + index is tower. filter_shape: Tuple of 4 ints: (kernel_height, kernel_width, in_channels, out_channels). Represents shape of kernel used in this layer. strides: The stride size in this layer (1-D Tensor of length 4). @@ -896,14 +976,15 @@ class ConvDiagonalFactor(DiagonalFactor): """ if not utils.is_data_format_channel_last(data_format): raise ValueError("Channel must be last.") - if inputs.shape.ndims != 4: - raise ValueError("inputs must be 4-D Tensor.") - if inputs.shape.as_list()[-1] != filter_shape[-2]: + if any(input_.shape.ndims != 4 for input_ in inputs): + raise ValueError("inputs must be a list of 4-D Tensors.") + if any(input_.shape.as_list()[-1] != filter_shape[-2] for input_ in inputs): raise ValueError("inputs and filter_shape must agree on in_channels.") for i, outputs_grad in enumerate(outputs_grads): - if outputs_grad.shape.ndims != 4: + if any(output_grad.shape.ndims != 4 for output_grad in outputs_grad): raise ValueError("outputs[%d] must be 4-D Tensor." % i) - if outputs_grad.shape.as_list()[-1] != filter_shape[-1]: + if any(output_grad.shape.as_list()[-1] != filter_shape[-1] + for output_grad in outputs_grad): raise ValueError( "outputs[%d] and filter_shape must agree on out_channels." % i) if len(strides) != 4: @@ -926,7 +1007,7 @@ class ConvDiagonalFactor(DiagonalFactor): @property def _var_scope(self): return "ff_convdiag_" + scope_string_from_params( - (self._inputs,) + tuple(self._outputs_grads)) + tuple(self._inputs) + tuple(nest.flatten(self._outputs_grads))) @property def _cov_shape(self): @@ -940,9 +1021,13 @@ class ConvDiagonalFactor(DiagonalFactor): def _num_sources(self): return len(self._outputs_grads) + @property + def _num_towers(self): + return len(self._inputs) + @property def _dtype(self): - return self._outputs_grads[0].dtype + return self._inputs[0].dtype def make_covariance_update_op(self, ema_decay): filter_height, filter_width, _, _ = self._filter_shape @@ -953,25 +1038,30 @@ class ConvDiagonalFactor(DiagonalFactor): rates = (1, 1, 1, 1) else: rates = tuple(self._dilations) - patches = array_ops.extract_image_patches( - self._inputs, - ksizes=[1, filter_height, filter_width, 1], - strides=self._strides, - rates=rates, - padding=self._padding) - if self._has_bias: - patches = append_homog(patches) + self._patches = [] + for tower in range(self._num_towers): + with place_on_device(self._get_data_device(tower)): + patches = array_ops.extract_image_patches( + self._inputs[tower], + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=rates, + padding=self._padding) + + if self._has_bias: + patches = append_homog(patches) - self._patches = patches + self._patches.append(patches) return super(ConvDiagonalFactor, self).make_covariance_update_op(ema_decay) - def _compute_new_cov(self, idx=0): - batch_size = array_ops.shape(self._patches)[0] - outputs_grad = self._outputs_grads[idx] + def _compute_new_cov(self, source, tower): + patches = self._patches[tower] + batch_size = array_ops.shape(patches)[0] + outputs_grad = self._outputs_grads[source][tower] - new_cov = self._convdiag_sum_of_squares(self._patches, outputs_grad) + new_cov = self._convdiag_sum_of_squares(patches, outputs_grad) new_cov /= math_ops.cast(batch_size, new_cov.dtype) return new_cov @@ -984,6 +1074,9 @@ class ConvDiagonalFactor(DiagonalFactor): outputs_grad) return math_ops.reduce_sum(math_ops.square(case_wise_gradients), axis=0) + def _get_data_device(self, tower): + return self._inputs[tower].device + class FullyConnectedKroneckerFactor(InverseProvidingFactor): """Kronecker factor for the input or output side of a fully-connected layer. @@ -995,9 +1088,9 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): """Instantiate FullyConnectedKroneckerFactor. Args: - tensors: List of Tensors, each of shape [batch_size, n], one for each - source. The Tensors are typically either a layer's inputs or its - output's gradients. + tensors: List of list of Tensors, each of shape [batch_size, n]. The + Tensors are typically either a layer's inputs or its output's gradients. + The first list index is source, the second is tower. has_bias: bool. If True, append '1' to each row. """ # The tensor argument is either a tensor of input activations or a tensor of @@ -1009,27 +1102,34 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): @property def _var_scope(self): return "ff_fckron_" + scope_string_from_params( - tuple(self._tensors) + (self._has_bias,)) + tuple(nest.flatten(self._tensors)) + (self._has_bias,)) @property def _cov_shape(self): - size = self._tensors[0].shape[1] + self._has_bias + size = self._tensors[0][0].shape[1] + self._has_bias return [size, size] @property def _num_sources(self): return len(self._tensors) + @property + def _num_towers(self): + return len(self._tensors[0]) + @property def _dtype(self): - return self._tensors[0].dtype + return self._tensors[0][0].dtype - def _compute_new_cov(self, idx=0): - tensor = self._tensors[idx] + def _compute_new_cov(self, source, tower): + tensor = self._tensors[source][tower] if self._has_bias: tensor = append_homog(tensor) return compute_cov(tensor) + def _get_data_device(self, tower): + return self._tensors[0][tower].device + class ConvInputKroneckerFactor(InverseProvidingFactor): r"""Kronecker factor for the input side of a convolutional layer. @@ -1053,8 +1153,8 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): """Initializes ConvInputKroneckerFactor. Args: - inputs: Tensor of shape [batch_size, ..spatial_input_size.., in_channels]. - Inputs to layer. + inputs: List of Tensors of shape [batch_size, ..spatial_input_size.., + in_channels]. Inputs to layer. List index is tower. filter_shape: List of ints. Contains [..spatial_filter_size.., in_channels, out_channels]. Shape of convolution kernel. padding: str. Padding method for layer. "SAME" or "VALID". @@ -1083,10 +1183,10 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): @property def _var_scope(self): - return "ff_convinkron_" + scope_string_from_params([ - self._inputs, self._filter_shape, self._strides, self._padding, - self._dilation_rate, self._data_format, self._has_bias - ]) + return "ff_convinkron_" + scope_string_from_params( + tuple(self._inputs) + + tuple((self._filter_shape, self._strides, self._padding, + self._dilation_rate, self._data_format, self._has_bias))) @property def _cov_shape(self): @@ -1099,19 +1199,24 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): def _num_sources(self): return 1 + @property + def _num_towers(self): + return len(self._inputs) + @property def _dtype(self): - return self._inputs.dtype + return self._inputs[0].dtype - def _compute_new_cov(self, idx=0): - if idx != 0: - raise ValueError("ConvInputKroneckerFactor only supports idx = 0") + def _compute_new_cov(self, source, tower): + assert source == 0 + + inputs = self._inputs[tower] # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. if self._extract_patches_fn in [None, "extract_convolution_patches"]: patches = utils.extract_convolution_patches( - self._inputs, + inputs, self._filter_shape, padding=self._padding, strides=self._strides, @@ -1119,7 +1224,7 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): data_format=self._data_format) elif self._extract_patches_fn == "extract_image_patches": - assert self._inputs.shape.ndims == 4 + assert inputs.shape.ndims == 4 assert len(self._filter_shape) == 4 assert len(self._strides) == 4, self._strides if self._dilation_rate is None: @@ -1129,7 +1234,7 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): assert len(rates) == 4 assert rates[0] == rates[-1] == 1 patches = array_ops.extract_image_patches( - self._inputs, + inputs, ksizes=[1] + list(self._filter_shape[0:-2]) + [1], strides=self._strides, rates=rates, @@ -1139,7 +1244,7 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): assert self._strides in [None, [1, 1, 1, 1], (1, 1, 1, 1)] assert self._filter_shape[0] == self._filter_shape[1] == 1 patches = utils.extract_pointwise_conv2d_patches( - self._inputs, self._filter_shape, data_format=None) + inputs, self._filter_shape, data_format=None) else: raise NotImplementedError(self._extract_patches_fn) @@ -1164,6 +1269,9 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): # (Tilde omitted over A for clarity.) return compute_cov(patches_flat) + def _get_data_device(self, tower): + return self._inputs[tower].device + class ConvOutputKroneckerFactor(InverseProvidingFactor): r"""Kronecker factor for the output side of a convolutional layer. @@ -1180,9 +1288,9 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): """Initializes ConvOutputKroneckerFactor. Args: - outputs_grads: list of Tensors. Each Tensor is of shape - [batch_size, ..spatial_input_size.., out_channels]. One Tensor per - source. + outputs_grads: List of list of Tensors. Each Tensor is of shape + [batch_size, ..spatial_input_size.., out_channels]. First list index + is source, the second is tower. data_format: None or str. Format of outputs_grads. Raises: @@ -1190,13 +1298,14 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): """ if not utils.is_data_format_channel_last(data_format): raise ValueError("Channel must be last.") - self._out_channels = outputs_grads[0].shape.as_list()[-1] + self._out_channels = outputs_grads[0][0].shape.as_list()[-1] self._outputs_grads = outputs_grads super(ConvOutputKroneckerFactor, self).__init__() @property def _var_scope(self): - return "ff_convoutkron_" + scope_string_from_params(self._outputs_grads) + return "ff_convoutkron_" + scope_string_from_params( + nest.flatten(self._outputs_grads)) @property def _cov_shape(self): @@ -1207,12 +1316,16 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): def _num_sources(self): return len(self._outputs_grads) + @property + def _num_towers(self): + return len(self._outputs_grads[0]) + @property def _dtype(self): - return self._outputs_grads[0].dtype + return self._outputs_grads[0][0].dtype - def _compute_new_cov(self, idx=0): - outputs_grad = self._outputs_grads[idx] + def _compute_new_cov(self, source, tower): + outputs_grad = self._outputs_grads[source][tower] # reshaped_tensor below is the matrix DS_l defined in the KFC paper # (tilde omitted over S for clarity). It has shape M|T| x I, where @@ -1225,6 +1338,9 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): # (Tilde omitted over S for clarity.) return compute_cov(reshaped_tensor) + def _get_data_device(self, tower): + return self._outputs_grads[0][tower].device + class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): """Kronecker factor for a fully connected layer used multiple times.""" @@ -1236,9 +1352,11 @@ class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): """Constructs a new `FullyConnectedMultiKF`. Args: - tensors: List of Tensors of shape, each of shape [batch_size, n]. Each of - these tensors is usually a layer's inputs or its output's gradients. - The list is over sources. + tensors: List of list of Tensors of shape, each of shape + [num_uses * batch_size, n], and is a reshape version of a Tensor of + shape [num_uses, batch_size, n]. Each of these tensors is usually a + layer's inputs or its output's gradients. The first list index is + sources, the second is towers. num_uses: int. The number of time-steps / uses. has_bias: bool. If True, '1' is appended to each row. """ @@ -1262,16 +1380,24 @@ class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): @property def _var_scope(self): return "ff_fc_multi_" + scope_string_from_params( - tuple(self._tensors) + (self._num_timesteps, self._has_bias,)) + tuple(nest.flatten(self._tensors)) + + (self._num_timesteps, self._has_bias,)) def make_covariance_update_op(self, ema_decay): op = super(FullyConnectedMultiKF, self).make_covariance_update_op(ema_decay) if self._cov_dt1 is not None: - new_cov_dt1_contribs = tuple(self._compute_new_cov_dt1(idx) - for idx in range(self._num_sources)) - new_cov_dt1 = math_ops.add_n(new_cov_dt1_contribs) + new_cov_dt1_contribs = [] + for source in range(self._num_sources): + for tower in range(self._num_towers): + with place_on_device(self._get_data_device(tower)): + new_cov_dt1_contribs.append(self._compute_new_cov_dt1(source, + tower)) + + new_cov_dt1 = (math_ops.add_n(new_cov_dt1_contribs) + / float(self._num_towers)) + op2 = moving_averages.assign_moving_average( self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS) @@ -1284,8 +1410,8 @@ class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): return op - def _compute_new_cov_dt1(self, idx=0): # pylint: disable=missing-docstring - tensor = self._tensors[idx] + def _compute_new_cov_dt1(self, source, tower): # pylint: disable=missing-docstring + tensor = self._tensors[source][tower] if self._has_bias: # This appending is technically done twice (the other time is for # _compute_new_cov()) @@ -1303,9 +1429,12 @@ class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): return compute_cov( tensor_future, tensor_right=tensor_present, normalizer=total_len) + def _get_data_device(self, tower): + return self._tensors[0][tower].device + @property def _vec_shape(self): - size = self._tensors[0].shape[1] + self._has_bias + size = self._tensors[0][0].shape[1] + self._has_bias return [size] def get_option1quants(self, damping_func): diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 7727c607db..586a004f88 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -390,7 +390,7 @@ class LayerCollection(object): if name in self._loss_dict: raise KeyError( "Loss function named {} already exists. Set reuse=True to append " - "another minibatch/tower.".format(name)) + "another tower.".format(name)) loss_list = [] self._loss_dict[name] = loss_list @@ -596,7 +596,7 @@ class LayerCollection(object): vocab_size = int(params.shape[0]) block = self.register_block( params, block_type(self, vocab_size), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self._add_uses(params, 1) @@ -637,7 +637,7 @@ class LayerCollection(object): has_bias = isinstance(params, (tuple, list)) block = self.register_block(params, block_type(self, has_bias=has_bias), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self._add_uses(params, 1) @@ -716,7 +716,7 @@ class LayerCollection(object): else: raise NotImplementedError(approx) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self._add_uses(params, 1) @@ -774,7 +774,7 @@ class LayerCollection(object): dilation_rate=dilation_rate, data_format=data_format), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self._add_uses(params, 1) @@ -830,7 +830,7 @@ class LayerCollection(object): rate=rate, data_format=data_format), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self._add_uses(params, 1) @@ -913,7 +913,7 @@ class LayerCollection(object): Args: params: Tensor or tuple of Tensors corresponding to the parameters. - batch_size: 0-D Tensor. Size of the minibatch. + batch_size: 0-D Tensor. Size of the minibatch (for this tower). approx: str or None. It not None, must be one of "full" or "diagonal". The Fisher approximation to use. If None the default value is used. (Default: None) @@ -932,7 +932,7 @@ class LayerCollection(object): _GENERIC_APPROX_TO_BLOCK_TYPES) block = self.register_block(params, block_type(self, params), reuse=reuse) - block.register_additional_minibatch(batch_size) + block.register_additional_tower(batch_size) self._add_uses(params, float("inf")) @@ -952,14 +952,14 @@ class LayerCollection(object): inputs: A list of Tensors, each of shape [batch_size, input_size]. Inputs to layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). OR, can be single Tensor, of - shape [batch_size * num_uses, input_size], which is a reshaped version - of a Tensor of shape [batch_size, num_uses, input_size]. + shape [num_uses * batch_size , input_size], which is a reshaped version + of a Tensor of shape [num_uses, batch_size, input_size]. outputs: A list of Tensors, the same length as 'inputs', each of shape [batch_size, output_size]. Outputs produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). Needs to correspond with the order used in 'inputs'. OR, can be - a single Tensor of shape [batch_size * num_uses, output_size], which is - a reshaped version of a Tensor of shape [batch_size, num_uses, + a single Tensor of shape [num_uses * batch_size, output_size], which is + a reshaped version of a Tensor of shape [num_uses, batch_size, output_size]. num_uses: int or None. The number uses/time-steps in the graph where the layer appears. Only needed if both inputs and outputs are given in the @@ -989,7 +989,7 @@ class LayerCollection(object): block = self.register_block(params, block_type(self, has_bias=has_bias, num_uses=num_uses), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) assert len(inputs) == len(outputs) self._add_uses(params, len(inputs)) @@ -1017,16 +1017,16 @@ class LayerCollection(object): inputs: A list of Tensors, each of shape [batch_size, height, width, in_channels]. Inputs to layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). OR, can be single - Tensor, of shape [batch_size * num_uses, height, width, in_channels], - which is a reshaped version of a Tensor of shape [batch_size, num_uses, + Tensor, of shape [num_uses * batch_size, height, width, in_channels], + which is a reshaped version of a Tensor of shape [num_uses, batch_size, height, width, in_channels]. outputs: A list of Tensors, each of shape [batch_size, height, width, out_channels]. Output produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). Needs to correspond with the order used in 'inputs'. OR, can be a - single Tensor, of shape [batch_size*num_uses, height, width, + single Tensor, of shape [num_uses * batch_size, height, width, out_channels], which is a reshaped version of a Tensor of shape - [batch_size, num_uses, height, width, out_channels]. + [num_uses, batch_size, height, width, out_channels]. num_uses: int or None. The number uses/time-steps in the graph where the layer appears. Only needed if both inputs and outputs are given in the single Tensor format. (Default: None) @@ -1065,7 +1065,7 @@ class LayerCollection(object): num_uses=num_uses), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) assert len(inputs) == len(outputs) self._add_uses(params, len(inputs)) @@ -1088,15 +1088,15 @@ class LayerCollection(object): inputs: A list of Tensors, each of shape [batch_size, input_size] and dtype int32. Indices into embedding matrix. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). - OR, can be single Tensor, of shape [batch_size * num_uses, input_size], - which is a reshaped version of a Tensor of shape [batch_size, num_uses, + OR, can be single Tensor, of shape [num_uses, batch_size, input_size], + which is a reshaped version of a Tensor of shape [num_uses, batch_size, input_size]. outputs: A list of Tensors, each of shape [batch_size, embedding_size]. Outputs produced by layer. The list indexes each use in the graph (which might correspond to a "time-step" in an RNN). Needs to correspond with the order used in 'inputs'. OR, can be a - single Tensor, of shape [batch_size*num_uses, embedding_size], which - is a reshaped version of a Tensor of shape [batch_size, num_uses, + single Tensor, of shape [num_uses * batch_size, embedding_size], which + is a reshaped version of a Tensor of shape [num_uses, batch_size, embedding_size]. num_uses: int or None. The number uses/time-steps in the graph where the layer appears. Only needed if both inputs and outputs are given in the @@ -1127,7 +1127,7 @@ class LayerCollection(object): block = self.register_block( params, block_type(self, vocab_size, num_uses=num_uses), reuse=reuse) - block.register_additional_minibatch(inputs, outputs) + block.register_additional_tower(inputs, outputs) self._add_uses(params, len(inputs)) diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index c9de0c7270..b6f42815e7 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -649,9 +649,6 @@ class PartitionedTensor(object): def dtype(self): return self.tensors[0].dtype - def devices(self): - return set(tensor.device for tensor in self.tensors) - def __str__(self): return "PartitionedTensor([%s, ...], dtype=%s, shape=%s)" % ( self.tensors[0].name, self.dtype.name, tuple(self.shape.as_list())) @@ -681,6 +678,15 @@ class PartitionedTensor(object): self._concats[result.device] = result return self._concats[result.device] + @property + def device(self): + # PartitionedTensors in general do not live on a single device. If the + # device cannot be determined unambiguously this property will return None. + device = self.tensors[0].device + if all(tensor.device == device for tensor in self.tensors): + return device + return None + ops.register_tensor_conversion_function( PartitionedTensor, -- GitLab From 85bec0af5e4bd036a9cb922c794bbe7191f7b76d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 08:16:59 -0700 Subject: [PATCH 539/960] Optimized quantized fully-connected op for LSTMs. PiperOrigin-RevId: 190617310 --- .../internal/optimized/optimized_ops.h | 82 +++++++++++-------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index f08d9d6d57..e079ff3f4c 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -802,21 +802,20 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, input_offset, output_pipeline); } -inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, int16* output_data, - const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { +inline void FullyConnected( + const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, + const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset, + int32 output_multiplier, int output_shift, int32 output_activation_min, + int32 output_activation_max, int16* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { gemmlowp::ScopedProfilingLabel label("FullyConnected/Uint8Int16"); // This is a copy of the reference implementation. We do not currently have a // properly optimized version. (void)gemm_context; // only used in properly optimized code. TFLITE_DCHECK_LE(output_activation_min, output_activation_max); TFLITE_DCHECK_EQ(output_offset, 0); + // TODO(benoitjacob): This really should be: // const int batches = ArraySize(output_dims, 1); // but the current --variable_batch hack consists in overwriting the 3rd @@ -828,30 +827,49 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, const int accum_depth = ArraySize(filter_dims, 0); TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32 accum = bias_data[out_c]; - // Accumulation loop. - for (int d = 0; d < accum_depth; ++d) { - int16 input_val = input_data[b * accum_depth + d] + input_offset; - int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset; - accum += filter_val * input_val; - } - // Down-scale the final int32 accumulator to the scale used by our - // (16-bit, typically 3 integer bits) fixed-point format. The quantized - // multiplier and shift here have been pre-computed offline - // (e.g. by toco). - accum = MultiplyByQuantizedMultiplier(accum, output_multiplier, - -output_shift); - // Saturate, cast to int16, and store to output array. - accum = std::max(accum, output_activation_min - output_offset); - accum = std::min(accum, output_activation_max - output_offset); - accum += output_offset; - output_data[out_c + output_depth * b] = accum; - } + + // Implementation of the fully connected node suited to the inside of an LSTM + // cell. The operands are 8-bit integers, the accumulators are internally + // 32bit integers, and the output is 16-bit fixed-point with 3 integer bits so + // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that + // is explained in the function comment above. +#ifdef GEMMLOWP_NEON + if (batches == 1 && !(output_depth % 4) && !(accum_depth % 8) && + input_offset == -128 && output_activation_min == -32768 && + output_activation_max == 32767) { + GEMVForLstmCell(input_data, input_dims, filter_data, filter_dims, + filter_offset, bias_data_int32, bias_dims, + output_multiplier, -output_shift, output_data, output_dims); + return; } +#endif + gemmlowp::MatrixMap weights_matrix( + filter_data, output_depth, accum_depth); + gemmlowp::MatrixMap input_matrix( + input_data, accum_depth, batches); + gemmlowp::MatrixMap output_matrix( + output_data, output_depth, batches); + typedef gemmlowp::VectorMap + ColVectorMap; + ColVectorMap bias_vector(bias_data_int32, output_depth); + gemmlowp::OutputStageBiasAddition bias_addition_stage; + bias_addition_stage.bias_vector = bias_vector; + gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent scale_stage; + scale_stage.result_offset_after_shift = 0; + scale_stage.result_fixedpoint_multiplier = output_multiplier; + // Note that this shift is negated wrt ordinary FC. + scale_stage.result_exponent = -output_shift; + gemmlowp::OutputStageClamp clamp_stage; + clamp_stage.min = output_activation_min; + clamp_stage.max = output_activation_max; + gemmlowp::OutputStageSaturatingCastToInt16 saturating_cast_int16_stage; + auto output_pipeline = + std::make_tuple(bias_addition_stage, scale_stage, clamp_stage, + saturating_cast_int16_stage); + gemmlowp::GemmWithOutputPipeline( + gemm_context, weights_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); } // legacy, for compatibility with old checked-in code -- GitLab From cf24990855b4418f86ffc5cfe65b502cd0d8b924 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 27 Mar 2018 08:33:22 -0700 Subject: [PATCH 540/960] Automated g4 rollback of changelist 188385868 PiperOrigin-RevId: 190618988 --- tensorflow/compiler/tests/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index bbb6089ea8..1c5a8f8e69 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -542,7 +542,6 @@ tf_xla_py_test( size = "medium", srcs = ["spacetobatch_op_test.py"], shard_count = 3, - tags = ["notsan"], deps = [ ":xla_test", "//tensorflow/python:array_ops", -- GitLab From ff9040f645a042ef62782be0eed8b4597e80ce6c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 09:20:54 -0700 Subject: [PATCH 541/960] Flush the output of print (fixes out-of-order prints in public colab) PiperOrigin-RevId: 190624708 --- tensorflow/contrib/autograph/utils/builtins.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/autograph/utils/builtins.py b/tensorflow/contrib/autograph/utils/builtins.py index 4ab32ee47d..c6af0e4d13 100644 --- a/tensorflow/contrib/autograph/utils/builtins.py +++ b/tensorflow/contrib/autograph/utils/builtins.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import sys + import six from tensorflow.contrib.autograph.utils import py_func @@ -97,7 +99,13 @@ def dynamic_print(*values): if all(map(is_tf_print_compatible, values)): return logging_ops.Print(1, values) - return py_func.wrap_py_func(print, None, values, use_dummy_return=True) + + def flushed_print(*vals): + print(*vals) + sys.stdout.flush() + + return py_func.wrap_py_func( + flushed_print, None, values, use_dummy_return=True) def dynamic_dataset(iterated): -- GitLab From 2ad47da4fb9896290eb9bc87fe809a4138269f2c Mon Sep 17 00:00:00 2001 From: brett koonce Date: Tue, 27 Mar 2018 09:24:40 -0700 Subject: [PATCH 542/960] Seq2seq minorsp (#18010) * contrib/seq2seq: minor spelling tweaks * contrib/timeseries: minor spelling tweaks * contrib/slim: minor spelling tweaks --- tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc | 2 +- .../contrib/seq2seq/python/ops/attention_wrapper.py | 8 ++++---- .../contrib/seq2seq/python/ops/beam_search_decoder.py | 6 +++--- .../contrib/slim/python/slim/data/parallel_reader.py | 4 ++-- .../contrib/slim/python/slim/data/prefetch_queue.py | 4 ++-- .../contrib/slim/python/slim/data/tfexample_decoder.py | 2 +- .../contrib/timeseries/python/timeseries/ar_model.py | 2 +- .../contrib/timeseries/python/timeseries/math_utils.py | 2 +- .../python/timeseries/state_space_models/varma.py | 4 ++-- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc index dfa12e873a..a9a32b7b25 100644 --- a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc +++ b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc @@ -74,7 +74,7 @@ class GatherTreeOp : public OpKernel { ctx, step_ids_shape.dim_size(1) == max_sequence_lengths.shape().dim_size(0), errors::InvalidArgument("batch size dimensions step_ids.shape[1] and " - "max_seqeuence_lengths.shape[0] must match. " + "max_sequence_lengths.shape[0] must match. " "but shapes are: ", step_ids_shape.DebugString(), " and ", max_sequence_lengths.shape().DebugString())); diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 9ff8a343f1..be53779826 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -736,7 +736,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): """Base attention mechanism for monotonic attention. Simply overrides the initial_alignments function to provide a dirac - distribution,which is needed in order for the monotonic attention + distribution, which is needed in order for the monotonic attention distributions to have the correct behavior. """ @@ -763,7 +763,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Bahadanau-style energy function. - This type of attention encorces a monotonic constraint on the attention + This type of attention enforces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -867,7 +867,7 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Luong-style energy function. - This type of attention encorces a monotonic constraint on the attention + This type of attention enforces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -1133,7 +1133,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): output_attention: Python bool. If `True` (default), the output at each time step is the attention value. This is the behavior of Luong-style attention mechanisms. If `False`, the output at each time step is - the output of `cell`. This is the beahvior of Bhadanau-style + the output of `cell`. This is the behavior of Bhadanau-style attention mechanisms. In both cases, the `attention` tensor is propagated to the next time step via the state and is used there. This flag only controls whether the attention mechanism is propagated diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index a26107b0d7..184144f64a 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -821,9 +821,9 @@ def _get_scores(log_probs, sequence_lengths, length_penalty_weight): Returns: The scores normalized by the length_penalty. """ - length_penality_ = _length_penalty( + length_penalty_ = _length_penalty( sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight) - return log_probs / length_penality_ + return log_probs / length_penalty_ def _length_penalty(sequence_lengths, penalty_factor): @@ -860,7 +860,7 @@ def _mask_probs(probs, eos_token, finished): unfinished beams remain unchanged. Args: - probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]` + probs: Log probabilities of shape `[batch_size, beam_width, vocab_size]` eos_token: An int32 id corresponding to the EOS token to allocate probability to. finished: A boolean tensor of shape `[batch_size, beam_width]` that diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index b3343aef47..99ad487630 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -115,8 +115,8 @@ class ParallelReader(io_ops.ReaderBase): reader needs to start reading from a new file since it has finished with the previous file). - A queue runner for enqueing in the `common_queue` is automatically added to - the TF QueueRunners collection. + A queue runner for enqueuing in the `common_queue` is automatically added + to the TF QueueRunners collection. Args: queue: A Queue or a mutable string Tensor representing a handle diff --git a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py index 37e9c4754c..62bd200361 100644 --- a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py +++ b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py @@ -36,9 +36,9 @@ def prefetch_queue(tensors, dynamic_pad=False, shared_name=None, name=None): - """Creates a queue to prefetech tensors from `tensors`. + """Creates a queue to prefetch tensors from `tensors`. - A queue runner for enqueing tensors into the prefetch_queue is automatically + A queue runner for enqueuing tensors into the prefetch_queue is automatically added to the TF QueueRunners collection. Example: diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index b3b61e1dfe..f2d31dc8db 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -124,7 +124,7 @@ class BoundingBox(ItemHandler): super(BoundingBox, self).__init__(self._full_keys) def tensors_to_item(self, keys_to_tensors): - """Maps the given dictionary of tensors to a contatenated list of bboxes. + """Maps the given dictionary of tensors to a concatenated list of bboxes. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py index ff140efd48..4f6527a546 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py @@ -70,7 +70,7 @@ class ARModel(model.TimeSeriesModel): input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that - setting it to > 1 empiricaly seems to give a better fit. + setting it to > 1 empirically seems to give a better fit. num_features: number of input features per time step. num_time_buckets: Number of buckets into which to divide (time % periodicity) for generating time based features. diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 23452a81c3..26793c80bf 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -185,7 +185,7 @@ def batch_matrix_pow(matrices, powers): { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p power(A, 0) = I - The power(A, 0) = I case is handeled by starting with accumulator set to the + The power(A, 0) = I case is handled by starting with accumulator set to the identity matrix; matrices with zero residual powers are passed through unchanged. diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py index 1afc58cfb2..6746dd7b43 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py @@ -107,7 +107,7 @@ class VARMA(state_space_model.StateSpaceModel): Returns: the state transition matrix. It has shape - [self.state_dimendion, self.state_dimension]. + [self.state_dimension, self.state_dimension]. """ # Pad any unused AR blocks with zeros. The extra state is necessary if # ma_order >= ar_order. @@ -127,7 +127,7 @@ class VARMA(state_space_model.StateSpaceModel): Returns: the state noise transform matrix. It has shape - [self.state_dimendion, self.num_features]. + [self.state_dimension, self.num_features]. """ # Noise is broadcast, through the moving average coefficients, to # un-observed parts of the latent state. -- GitLab From fdec18588d7f8b5f6383601f1030ed71f634d1c0 Mon Sep 17 00:00:00 2001 From: James Keeling Date: Tue, 27 Mar 2018 09:36:52 -0700 Subject: [PATCH 543/960] Prevent warning every time someone imports contrib.learn.datasets.base Everything in contrib/learn/python/learn/datasets/base.py has been deprecated. One of the function in there is a decorator, retry. Because another function in that file is decorated with retry, the function is called upon import, which prints a warning. I have fixed this by adding a private function, _internal_retry, which is used internally, and redefining retry to simply call this. That way, using retry in user-code will still print the deprecated warning, but it's not printed upon every import. I also cleaned up the docstrings slightly. PiperOrigin-RevId: 190626717 --- .../learn/python/learn/datasets/base.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py index 3b5c9b97c0..4676eedb20 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/base.py +++ b/tensorflow/contrib/learn/python/learn/datasets/base.py @@ -139,15 +139,48 @@ def retry(initial_delay, Args: initial_delay: the initial delay. + max_delay: the maximum delay allowed (actual max is + max_delay * (1 + jitter). factor: each subsequent retry, the delay is multiplied by this value. (must be >= 1). jitter: to avoid lockstep, the returned delay is multiplied by a random number between (1-jitter) and (1+jitter). To add a 20% jitter, set jitter = 0.2. Must be < 1. + is_retriable: (optional) a function that takes an Exception as an argument + and returns true if retry should be applied. + + Returns: + A function that wraps another function to automatically retry it. + """ + return _internal_retry( + initial_delay=initial_delay, + max_delay=max_delay, + factor=factor, + jitter=jitter, + is_retriable=is_retriable) + + +def _internal_retry(initial_delay, + max_delay, + factor=2.0, + jitter=0.25, + is_retriable=None): + """Simple decorator for wrapping retriable functions, for internal use only. + + Args: + initial_delay: the initial delay. max_delay: the maximum delay allowed (actual max is max_delay * (1 + jitter). + factor: each subsequent retry, the delay is multiplied by this value. + (must be >= 1). + jitter: to avoid lockstep, the returned delay is multiplied by a random + number between (1-jitter) and (1+jitter). To add a 20% jitter, set + jitter = 0.2. Must be < 1. is_retriable: (optional) a function that takes an Exception as an argument and returns true if retry should be applied. + + Returns: + A function that wraps another function to automatically retry it. """ if factor < 1: raise ValueError('factor must be >= 1; was %f' % (factor,)) @@ -195,7 +228,7 @@ def _is_retriable(e): @deprecated(None, 'Please use urllib or similar directly.') -@retry(initial_delay=1.0, max_delay=16.0, is_retriable=_is_retriable) +@_internal_retry(initial_delay=1.0, max_delay=16.0, is_retriable=_is_retriable) def urlretrieve_with_retry(url, filename=None): return urllib.request.urlretrieve(url, filename) -- GitLab From bba3c8f13516b4d4df83f179913376ab36807f9f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 10:04:41 -0700 Subject: [PATCH 544/960] import tpu profiler analysis grpc python stub to tensorflow. PiperOrigin-RevId: 190630641 --- tensorflow/contrib/tpu/BUILD | 2 ++ tensorflow/contrib/tpu/profiler/BUILD | 4 +--- .../contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py | 2 +- tensorflow/contrib/tpu/python/profiler/__init__.py | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index eea19e9465..95dc6f5ced 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -119,6 +119,8 @@ py_library( srcs = ["python/profiler/__init__.py"], srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/tpu/profiler:tpu_profiler_analysis_pb2_grpc", + "//tensorflow/contrib/tpu/profiler:tpu_profiler_analysis_proto_py", "//tensorflow/contrib/tpu/profiler:trace_events_proto_py", "//tensorflow/python:util", ], diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index 0a52d0b13b..56ddd7eff1 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -127,7 +127,5 @@ py_library( srcs = ["tpu_profiler_analysis_pb2_grpc.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = [ - ":tpu_profiler_analysis_proto_py", - ], + deps = [":tpu_profiler_analysis_proto_py"], ) diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py index c28fef22a9..8f51488288 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler_analysis_pb2_grpc.py @@ -22,7 +22,7 @@ from __future__ import print_function import grpc -from third_party.tensorflow.contrib.tpu.profiler import tpu_profiler_analysis_pb2 as third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2 +from tensorflow.contrib.tpu.profiler import tpu_profiler_analysis_pb2 as third__party_dot_tensorflow_dot_contrib_dot_tpu_dot_profiler_dot_tpu__profiler__analysis__pb2 class TPUProfileAnalysisStub(object): diff --git a/tensorflow/contrib/tpu/python/profiler/__init__.py b/tensorflow/contrib/tpu/python/profiler/__init__.py index bde13f0527..15ce6aceec 100644 --- a/tensorflow/contrib/tpu/python/profiler/__init__.py +++ b/tensorflow/contrib/tpu/python/profiler/__init__.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import,unused-import +from tensorflow.contrib.tpu.profiler.tpu_profiler_analysis_pb2 import * from tensorflow.contrib.tpu.profiler.trace_events_pb2 import * # pylint: enable=wildcard-import,unused-import -- GitLab From 1712002ad02f044f7569224bf465e0ea00e6a6c4 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Tue, 27 Mar 2018 10:11:49 -0700 Subject: [PATCH 545/960] Update tb-nightly dep to >= 1.8.0a0, < 1.9.0a0 (#18009) Synchronize tf-nightly dep on current tb-nightly. --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index ff30016cc2..3e4f9b0fdd 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -62,7 +62,7 @@ else: if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.7.0a0, < 1.8.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.8.0a0, < 1.9.0a0' break # weakref.finalize and enum were introduced in Python 3.4 -- GitLab From bdaa9a0ce84798eb13b97de664451cd87c3f8210 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 10:20:05 -0700 Subject: [PATCH 546/960] Internal cleanup. PiperOrigin-RevId: 190633067 --- tensorflow/contrib/lite/testing/BUILD | 2 +- tensorflow/contrib/lite/testing/generated_examples_zip_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 555ea90034..dc9492f5e2 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -29,7 +29,7 @@ gen_zipped_test_files( "exp.zip", "fully_connected.zip", "fused_batch_norm.zip", - "gather.zip", + # "gather.zip", #TODO(b/76437794) "global_batch_norm.zip", "l2_pool.zip", "l2norm.zip", diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index ba2d259462..08354b762c 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -244,7 +244,7 @@ INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(exp) INSTANTIATE_TESTS(fully_connected) INSTANTIATE_TESTS(fused_batch_norm) -INSTANTIATE_TESTS(gather) +// INSTANTIATE_TESTS(gather) //TODO(b/76437794) INSTANTIATE_TESTS(global_batch_norm) INSTANTIATE_TESTS(l2_pool) INSTANTIATE_TESTS(l2norm) -- GitLab From f04822a1bb5b1bc50e8b41d4bc3a04d0641d93e1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 27 Mar 2018 11:09:50 -0700 Subject: [PATCH 547/960] Match behavior of py_func in graph and eager. PiperOrigin-RevId: 190641841 --- tensorflow/python/ops/script_ops.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 1b4111bca6..96fb024715 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -334,7 +334,11 @@ def py_func(func, inp, Tout, stateful=True, name=None): result = func(*[x.numpy() for x in inp]) result = nest.flatten(result) - return [x if x is None else ops.convert_to_tensor(x) for x in result] + result = [x if x is None else ops.convert_to_tensor(x) for x in result] + if len(result) == 1: + # Mimic the automatic unwrapping in graph-mode py_func + result, = result + return result return _internal_py_func( func=func, inp=inp, Tout=Tout, stateful=stateful, eager=False, name=name) -- GitLab From 5c1ad16bfd265da2268ab1820d411dfaeaca5e05 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Tue, 27 Mar 2018 11:27:11 -0700 Subject: [PATCH 548/960] Fix: Clamp takes three arguments after computation, not arbitrarily many. PiperOrigin-RevId: 190644837 --- tensorflow/docs_src/performance/xla/operation_semantics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 5e39e710a0..4d12c7ab6d 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -241,7 +241,7 @@ See also Clamps an operand to within the range between a minimum and maximum value. - `Clamp(computation, args...)` + `Clamp(computation, min, operand, max)` | Arguments | Type | Semantics | | ------------- | ----------------------- | -------------------------------- | -- GitLab From a185f4a4c203853506b0b1989f2322210ef27660 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 27 Mar 2018 11:35:18 -0700 Subject: [PATCH 549/960] Trying to fix libtensorflow GPU build. CUDNN path error. Invalid path to cuDNN 7 toolkit. None of the following files can be found: C:/tools/cuda\lib/x64/cudnn.lib C:/tools/cuda\lib/x64/cudnn.lib --- tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 7b2d7e1a56..d654b433e7 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -120,7 +120,9 @@ function run_configure_for_gpu_build { export TF_CUDA_VERSION=9.0 export CUDA_TOOLKIT_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0" export TF_CUDNN_VERSION=7.0 - export CUDNN_INSTALL_PATH="C:/tools/cuda" + if [ -z "$CUDNN_INSTALL_PATH" ]; then + export CUDNN_INSTALL_PATH="C:/tools/cuda" + fi export TF_CUDA_COMPUTE_CAPABILITIES="3.7" if [ -z "$TF_ENABLE_XLA" ]; then export TF_ENABLE_XLA=0 -- GitLab From aec2496567a7bfd508fc487dec474263b6a7481f Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 27 Mar 2018 11:54:26 -0700 Subject: [PATCH 550/960] Exclude Python C extension from tensorflow/c:srcs target. The Python extensions aren't part of the official C API. PiperOrigin-RevId: 190649576 --- tensorflow/c/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 426f97b844..7f03e40d38 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -34,6 +34,8 @@ filegroup( exclude = [ "c_api_experimental.cc", "c_api_experimental.h", + "python_api.cc", + "python_api.h", "*test*", ], ), -- GitLab From fd77211de17bf053cc8f5a82c8eff1818451120c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 12:00:44 -0700 Subject: [PATCH 551/960] Replaced calls to deprecated tensorflow::StringPiece methods with their tensorflow::str_util equivalents. This will allow the deprecated methods to be removed. PiperOrigin-RevId: 190650553 --- tensorflow/cc/framework/cc_op_gen_test.cc | 5 +- tensorflow/cc/framework/scope.cc | 3 +- tensorflow/compiler/aot/codegen_test.cc | 3 +- tensorflow/compiler/aot/tfcompile_main.cc | 2 +- .../compiler/xla/service/hlo_graph_dumper.cc | 6 +- .../compiler/xla/service/user_computation.cc | 4 +- .../xla/tests/compute_constant_test.cc | 9 +-- .../xla/tests/xla_hlo_profile_test.cc | 4 +- tensorflow/contrib/cloud/kernels/BUILD | 1 + .../kernels/bigquery_table_accessor_test.cc | 5 +- .../session_bundle/session_bundle_test.cc | 30 ++++---- .../contrib/session_bundle/signature_test.cc | 68 ++++++++++--------- tensorflow/core/grappler/costs/BUILD | 1 + .../core/grappler/costs/graph_properties.cc | 4 +- tensorflow/core/lib/io/inputbuffer_test.cc | 3 +- tensorflow/core/lib/io/recordio_test.cc | 3 +- tensorflow/python/framework/python_op_gen.cc | 2 +- .../python/framework/python_op_gen_main.cc | 4 +- tensorflow/stream_executor/kernel.cc | 3 +- tensorflow/stream_executor/lib/str_util.h | 2 +- 20 files changed, 92 insertions(+), 70 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc index 1e0f2d241b..5d9dfd95a5 100644 --- a/tensorflow/cc/framework/cc_op_gen_test.cc +++ b/tensorflow/cc/framework/cc_op_gen_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -61,12 +62,12 @@ op { )"; void ExpectHasSubstr(StringPiece s, StringPiece expected) { - EXPECT_TRUE(s.contains(expected)) + EXPECT_TRUE(str_util::StrContains(s, expected)) << "'" << s << "' does not contain '" << expected << "'"; } void ExpectDoesNotHaveSubstr(StringPiece s, StringPiece expected) { - EXPECT_FALSE(s.contains(expected)) + EXPECT_FALSE(str_util::StrContains(s, expected)) << "'" << s << "' contains '" << expected << "'"; } diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc index 7164249262..c143b97833 100644 --- a/tensorflow/cc/framework/scope.cc +++ b/tensorflow/cc/framework/scope.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { @@ -218,7 +219,7 @@ std::unordered_set Scope::Impl::GetColocationConstraints( if (GetNodeAttr(attrs, kColocationAttrName, &node_constraints).ok()) { for (const string& entry : node_constraints) { StringPiece s(entry); - if (s.Consume(kColocationGroupPrefix)) { + if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) { current_constraints.insert(s.ToString()); } } diff --git a/tensorflow/compiler/aot/codegen_test.cc b/tensorflow/compiler/aot/codegen_test.cc index 972b7d51ec..2642536c4f 100644 --- a/tensorflow/compiler/aot/codegen_test.cc +++ b/tensorflow/compiler/aot/codegen_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" @@ -33,7 +34,7 @@ namespace { void ExpectErrorContains(const Status& status, StringPiece str) { EXPECT_NE(Status::OK(), status); - EXPECT_TRUE(StringPiece(status.error_message()).contains(str)) + EXPECT_TRUE(str_util::StrContains(status.error_message(), str)) << "expected error: " << status.error_message() << " to contain: " << str; } diff --git a/tensorflow/compiler/aot/tfcompile_main.cc b/tensorflow/compiler/aot/tfcompile_main.cc index e2f01179d4..8ea014c2ee 100644 --- a/tensorflow/compiler/aot/tfcompile_main.cc +++ b/tensorflow/compiler/aot/tfcompile_main.cc @@ -55,7 +55,7 @@ const char kUsageHeader[] = "\n"; Status ReadProtoFile(const string& fname, protobuf::Message* proto) { - if (StringPiece(fname).ends_with(".pbtxt")) { + if (str_util::EndsWith(fname, ".pbtxt")) { return ReadTextProto(Env::Default(), fname, proto); } else { return ReadBinaryProto(Env::Default(), fname, proto); diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 1dc72355cf..25702dc65e 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -823,7 +823,7 @@ string HloDotDumper::GetInstructionNodeInlinedOperands( // Otherwise, print e.g. "%constant.42 (s32[100])". string constant_name; - if (tensorflow::StringPiece(constant->name()).starts_with("constant")) { + if (tensorflow::str_util::StartsWith(constant->name(), "constant")) { constant_name = constant->name(); } else { constant_name = StrCat("constant ", constant->name()); @@ -1041,8 +1041,8 @@ string HloDotDumper::GetInstructionNodeLabel(const HloInstruction* instr) { // The HLO instruction name contains usually the opcode, e.g. "%add.42" is // an add instruction. In this case we render just the name. - if (tensorflow::StringPiece(instr->name()) - .starts_with(HloOpcodeString(instr->opcode()))) { + if (tensorflow::str_util::StartsWith(instr->name(), + HloOpcodeString(instr->opcode()))) { return Printf("%s", HtmlLikeStringSanitize(instr->name())); } string extended_opcode = diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 0dca30a804..fcdb2e01fb 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -1284,8 +1284,8 @@ StatusOr UserComputation::AddCustomCallInstruction( TF_RETURN_IF_ERROR(LookUpRequest(handle).status()); } - if (tensorflow::StringPiece(custom_call_request.call_target_name()) - .starts_with("$")) { + if (tensorflow::str_util::StartsWith(custom_call_request.call_target_name(), + "$")) { return InvalidArgument( "Invalid custom_call_target \"%s\": Call targets that start with '$' " "are reserved for internal use.", diff --git a/tensorflow/compiler/xla/tests/compute_constant_test.cc b/tensorflow/compiler/xla/tests/compute_constant_test.cc index ec2c580670..e5a03b49ad 100644 --- a/tensorflow/compiler/xla/tests/compute_constant_test.cc +++ b/tensorflow/compiler/xla/tests/compute_constant_test.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -167,8 +168,8 @@ TEST_F(ComputeConstantTest, DirectParamMissing) { EXPECT_FALSE(IsConstant(computation, &b)); auto value = ComputeConstantScalar(client, computation, &b); - EXPECT_TRUE(tensorflow::StringPiece(value.status().ToString()) - .contains("depends on a parameter")) + EXPECT_TRUE(tensorflow::str_util::StrContains(value.status().ToString(), + "depends on a parameter")) << value.status(); } } @@ -183,8 +184,8 @@ TEST_F(ComputeConstantTest, IndirectParamMissing) { EXPECT_FALSE(IsConstant(computation, &b)); auto value = ComputeConstantScalar(client, computation, &b); - EXPECT_TRUE(tensorflow::StringPiece(value.status().ToString()) - .contains("depends on a parameter")) + EXPECT_TRUE(tensorflow::str_util::StrContains(value.status().ToString(), + "depends on a parameter")) << value.status(); } } diff --git a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc index 24b9f37a80..ff3418a128 100644 --- a/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc +++ b/tensorflow/compiler/xla/tests/xla_hlo_profile_test.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -294,7 +295,8 @@ XLA_TEST_F(HloProfileTest, auto while_body_profile_start = std::find_if(profile_output_lines.begin(), profile_output_lines.end(), [](tensorflow::StringPiece s) { - return s.starts_with("Execution profile for body"); + return tensorflow::str_util::StartsWith( + s, "Execution profile for body"); }); ASSERT_NE(while_body_profile_start, profile_output_lines.end()); diff --git a/tensorflow/contrib/cloud/kernels/BUILD b/tensorflow/contrib/cloud/kernels/BUILD index 56f930a9a8..d5fc604de9 100644 --- a/tensorflow/contrib/cloud/kernels/BUILD +++ b/tensorflow/contrib/cloud/kernels/BUILD @@ -73,6 +73,7 @@ tf_cc_test( ], deps = [ ":bigquery_table_accessor", + "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", diff --git a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc index e9b79a066d..7416eb19d3 100644 --- a/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc +++ b/tensorflow/contrib/cloud/kernels/bigquery_table_accessor_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/example/feature.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/cloud/http_request_fake.h" #include "tensorflow/core/platform/test.h" @@ -28,8 +29,8 @@ constexpr char kTestProject[] = "test-project"; constexpr char kTestDataset[] = "test-dataset"; constexpr char kTestTable[] = "test-table"; -bool HasSubstr(const string& base, const string& substr) { - bool ok = StringPiece(base).contains(substr); +bool HasSubstr(StringPiece base, StringPiece substr) { + bool ok = str_util::StrContains(base, substr); EXPECT_TRUE(ok) << base << ", expected substring " << substr; return ok; } diff --git a/tensorflow/contrib/session_bundle/session_bundle_test.cc b/tensorflow/contrib/session_bundle/session_bundle_test.cc index 6d997bac9e..612623ae30 100644 --- a/tensorflow/contrib/session_bundle/session_bundle_test.cc +++ b/tensorflow/contrib/session_bundle/session_bundle_test.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session.h" @@ -239,8 +240,8 @@ TEST(LoadSessionBundleFromPath, BasicTestRunOptionsThreadPoolInvalid) { // Expect failed session run calls with invalid run-options. EXPECT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Invalid inter_op_thread_pool: 2")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Invalid inter_op_thread_pool: 2")) << status.error_message(); } @@ -314,8 +315,8 @@ TEST_F(SessionBundleTest, ServingGraphEmpty) { }); status_ = LoadSessionBundleFromPath(options_, path, &bundle_); EXPECT_FALSE(status_.ok()); - EXPECT_TRUE(StringPiece(status_.error_message()) - .contains("Expected exactly one serving GraphDef")) + EXPECT_TRUE(str_util::StrContains(status_.error_message(), + "Expected exactly one serving GraphDef")) << status_.error_message(); } @@ -330,8 +331,9 @@ TEST_F(SessionBundleTest, ServingGraphAnyIncorrectType) { }); status_ = LoadSessionBundleFromPath(options_, path, &bundle_); EXPECT_FALSE(status_.ok()); - EXPECT_TRUE(StringPiece(status_.error_message()) - .contains("Expected Any type_url for: tensorflow.GraphDef")) + EXPECT_TRUE( + str_util::StrContains(status_.error_message(), + "Expected Any type_url for: tensorflow.GraphDef")) << status_.error_message(); } @@ -347,7 +349,8 @@ TEST_F(SessionBundleTest, ServingGraphAnyValueCorrupted) { }); status_ = LoadSessionBundleFromPath(options_, path, &bundle_); EXPECT_FALSE(status_.ok()); - EXPECT_TRUE(StringPiece(status_.error_message()).contains("Failed to unpack")) + EXPECT_TRUE( + str_util::StrContains(status_.error_message(), "Failed to unpack")) << status_.error_message(); } @@ -362,9 +365,9 @@ TEST_F(SessionBundleTest, AssetFileAnyIncorrectType) { }); status_ = LoadSessionBundleFromPath(options_, path, &bundle_); EXPECT_FALSE(status_.ok()); - EXPECT_TRUE( - StringPiece(status_.error_message()) - .contains("Expected Any type_url for: tensorflow.serving.AssetFile")) + EXPECT_TRUE(str_util::StrContains( + status_.error_message(), + "Expected Any type_url for: tensorflow.serving.AssetFile")) << status_.error_message(); } @@ -380,7 +383,8 @@ TEST_F(SessionBundleTest, AssetFileAnyValueCorrupted) { }); status_ = LoadSessionBundleFromPath(options_, path, &bundle_); EXPECT_FALSE(status_.ok()); - EXPECT_TRUE(StringPiece(status_.error_message()).contains("Failed to unpack")) + EXPECT_TRUE( + str_util::StrContains(status_.error_message(), "Failed to unpack")) << status_.error_message(); } @@ -395,8 +399,8 @@ TEST_F(SessionBundleTest, InitOpTooManyValues) { }); status_ = LoadSessionBundleFromPath(options_, path, &bundle_); EXPECT_FALSE(status_.ok()); - EXPECT_TRUE(StringPiece(status_.error_message()) - .contains("Expected exactly one serving init op")) + EXPECT_TRUE(str_util::StrContains(status_.error_message(), + "Expected exactly one serving init op")) << status_.error_message(); } diff --git a/tensorflow/contrib/session_bundle/signature_test.cc b/tensorflow/contrib/session_bundle/signature_test.cc index 741b7fde9b..b1ff55552e 100644 --- a/tensorflow/contrib/session_bundle/signature_test.cc +++ b/tensorflow/contrib/session_bundle/signature_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session.h" @@ -33,8 +34,8 @@ namespace tensorflow { namespace serving { namespace { -static bool HasSubstr(const string& base, const string& substr) { - bool ok = StringPiece(base).contains(substr); +static bool HasSubstr(StringPiece base, StringPiece substr) { + bool ok = str_util::StrContains(base, substr); EXPECT_TRUE(ok) << base << ", expected substring " << substr; return ok; } @@ -69,8 +70,8 @@ TEST(GetClassificationSignature, MissingSignature) { ClassificationSignature signature; const Status status = GetClassificationSignature(meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Expected a classification signature")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Expected a classification signature")) << status.error_message(); } @@ -86,8 +87,8 @@ TEST(GetClassificationSignature, WrongSignatureType) { ClassificationSignature signature; const Status status = GetClassificationSignature(meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Expected a classification signature")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Expected a classification signature")) << status.error_message(); } @@ -122,8 +123,8 @@ TEST(GetNamedClassificationSignature, MissingSignature) { const Status status = GetNamedClassificationSignature("foo", meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Missing signature named \"foo\"")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Missing signature named \"foo\"")) << status.error_message(); } @@ -141,9 +142,9 @@ TEST(GetNamedClassificationSignature, WrongSignatureType) { const Status status = GetNamedClassificationSignature("foo", meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE( - StringPiece(status.error_message()) - .contains("Expected a classification signature for name \"foo\"")) + EXPECT_TRUE(str_util::StrContains( + status.error_message(), + "Expected a classification signature for name \"foo\"")) << status.error_message(); } @@ -176,8 +177,8 @@ TEST(GetRegressionSignature, MissingSignature) { RegressionSignature signature; const Status status = GetRegressionSignature(meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Expected a regression signature")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Expected a regression signature")) << status.error_message(); } @@ -193,8 +194,8 @@ TEST(GetRegressionSignature, WrongSignatureType) { RegressionSignature signature; const Status status = GetRegressionSignature(meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Expected a regression signature")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Expected a regression signature")) << status.error_message(); } @@ -227,8 +228,8 @@ TEST(GetNamedSignature, MissingSignature) { Signature signature; const Status status = GetNamedSignature("foo", meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Missing signature named \"foo\"")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Missing signature named \"foo\"")) << status.error_message(); } @@ -370,7 +371,7 @@ TEST(RunClassification, RunNotOk) { const Status status = RunClassification(signature, input_tensor, &session, &classes_tensor, nullptr); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()).contains("Data is gone")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), "Data is gone")) << status.error_message(); } @@ -386,7 +387,8 @@ TEST(RunClassification, TooManyOutputs) { const Status status = RunClassification(signature, input_tensor, &session, &classes_tensor, nullptr); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()).contains("Expected 1 output")) + EXPECT_TRUE( + str_util::StrContains(status.error_message(), "Expected 1 output")) << status.error_message(); } @@ -402,8 +404,9 @@ TEST(RunClassification, WrongBatchOutputs) { const Status status = RunClassification(signature, input_tensor, &session, &classes_tensor, nullptr); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Input batch size did not match output batch size")) + EXPECT_TRUE( + str_util::StrContains(status.error_message(), + "Input batch size did not match output batch size")) << status.error_message(); } @@ -449,7 +452,7 @@ TEST_F(RunRegressionTest, RunNotOk) { const Status status = RunRegression(signature_, input_tensor_, &session_, &output_tensor_); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()).contains("Data is gone")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), "Data is gone")) << status.error_message(); } @@ -460,8 +463,9 @@ TEST_F(RunRegressionTest, MismatchedSizeForBatchInputAndOutput) { const Status status = RunRegression(signature_, input_tensor_, &session_, &output_tensor_); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Input batch size did not match output batch size")) + EXPECT_TRUE( + str_util::StrContains(status.error_message(), + "Input batch size did not match output batch size")) << status.error_message(); } @@ -488,7 +492,7 @@ TEST(GetSignatures, MissingSignature) { const auto status = GetSignatures(meta_graph_def, &read_signatures); EXPECT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code()); EXPECT_TRUE( - StringPiece(status.error_message()).contains("Expected exactly one")) + str_util::StrContains(status.error_message(), "Expected exactly one")) << status.error_message(); } @@ -502,9 +506,9 @@ TEST(GetSignatures, WrongProtoInAny) { Signatures read_signatures; const auto status = GetSignatures(meta_graph_def, &read_signatures); EXPECT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Expected Any type_url for: " - "tensorflow.serving.Signatures")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Expected Any type_url for: " + "tensorflow.serving.Signatures")) << status.error_message(); } @@ -519,7 +523,7 @@ TEST(GetSignatures, JunkInAny) { Signatures read_signatures; const auto status = GetSignatures(meta_graph_def, &read_signatures); EXPECT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code()); - EXPECT_TRUE(StringPiece(status.error_message()).contains("Failed to unpack")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), "Failed to unpack")) << status.error_message(); } @@ -567,7 +571,7 @@ TEST(GetSignatures, MultipleSignaturesNotOK) { const auto status = GetSignatures(meta_graph_def, &read_signatures); EXPECT_EQ(tensorflow::error::FAILED_PRECONDITION, status.code()); EXPECT_TRUE( - StringPiece(status.error_message()).contains("Expected exactly one")) + str_util::StrContains(status.error_message(), "Expected exactly one")) << status.error_message(); } @@ -641,8 +645,8 @@ TEST(GetGenericSignature, WrongSignatureType) { const Status status = GetGenericSignature("generic_bindings", meta_graph_def, &signature); ASSERT_FALSE(status.ok()); - EXPECT_TRUE(StringPiece(status.error_message()) - .contains("Expected a generic signature:")) + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "Expected a generic signature:")) << status.error_message(); } diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 5336df1f51..df5a26f475 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -55,6 +55,7 @@ cc_library( ":utils", "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 817247e379..a5fd79447d 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/grappler/costs/utils.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { namespace grappler { @@ -251,8 +252,7 @@ typename DisjointSet::Rep* DisjointSet::Find(Handle value) { } bool IsQueue(const Node& node) { - StringPiece type(node.type_string()); - return type.ends_with("QueueV2"); + return str_util::EndsWith(node.type_string(), "QueueV2"); } // Returns true if the node is an Enter op AND its input is a Queue. diff --git a/tensorflow/core/lib/io/inputbuffer_test.cc b/tensorflow/core/lib/io/inputbuffer_test.cc index 6be1f819c2..3608008b30 100644 --- a/tensorflow/core/lib/io/inputbuffer_test.cc +++ b/tensorflow/core/lib/io/inputbuffer_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" @@ -287,7 +288,7 @@ TEST(InputBuffer, Seek) { EXPECT_TRUE(errors::IsOutOfRange(in.ReadNBytes(1, &read))); EXPECT_TRUE( - StringPiece(in.Seek(-1).ToString()).contains("negative position")); + str_util::StrContains(in.Seek(-1).ToString(), "negative position")); } } diff --git a/tensorflow/core/lib/io/recordio_test.cc b/tensorflow/core/lib/io/recordio_test.cc index b7e51256a2..63235761d9 100644 --- a/tensorflow/core/lib/io/recordio_test.cc +++ b/tensorflow/core/lib/io/recordio_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/lib/io/record_reader.h" #include "tensorflow/core/lib/io/record_writer.h" #include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" @@ -218,7 +219,7 @@ TEST_F(RecordioTest, RandomRead) { // Tests of all the error paths in log_reader.cc follow: static void AssertHasSubstr(StringPiece s, StringPiece expected) { - EXPECT_TRUE(StringPiece(s).contains(expected)) + EXPECT_TRUE(str_util::StrContains(s, expected)) << s << " does not contain " << expected; } diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 9850f0becc..e5e3b82199 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -448,7 +448,7 @@ string AttrValueToPython(const string& type, const AttrValue& value, return TensorToPython(value.tensor()); } else if (type == "func") { return StringToPython(value.func().name()); - } else if (StringPiece(type).starts_with("list(")) { + } else if (str_util::StartsWith(type, "list(")) { return strings::StrCat("[", AttrListToPython(value, dtype_module), "]"); } else { return "?"; diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc index bc5ca195da..ca6ed42bee 100644 --- a/tensorflow/python/framework/python_op_gen_main.cc +++ b/tensorflow/python/framework/python_op_gen_main.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/lib/io/inputbuffer.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/scanner.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/logging.h" @@ -95,7 +96,8 @@ string InferSourceFileName(const char* argv_zero) { // operators defined in _ops.cc const char* kExecPrefix = "gen_"; const char* kExecSuffix = "_py_wrappers_cc"; - if (command_str.Consume(kExecPrefix) && command_str.ends_with(kExecSuffix)) { + if (str_util::ConsumePrefix(&command_str, kExecPrefix) && + str_util::EndsWith(command_str, kExecSuffix)) { command_str.remove_suffix(strlen(kExecSuffix)); return strings::StrCat(command_str, ".cc"); } else { diff --git a/tensorflow/stream_executor/kernel.cc b/tensorflow/stream_executor/kernel.cc index 81e531efb3..636199cfa2 100644 --- a/tensorflow/stream_executor/kernel.cc +++ b/tensorflow/stream_executor/kernel.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/stream_executor/lib/demangle.h" #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/logging.h" @@ -96,7 +97,7 @@ static const char *kStubPrefix = "__device_stub_"; void KernelBase::set_name(port::StringPiece name) { name_ = name.ToString(); port::StringPiece stubless_name = name; - if (name.starts_with(kStubPrefix)) { + if (tensorflow::str_util::StartsWith(name, kStubPrefix)) { stubless_name.remove_prefix(strlen(kStubPrefix)); } demangled_name_ = port::Demangle(stubless_name.data()); diff --git a/tensorflow/stream_executor/lib/str_util.h b/tensorflow/stream_executor/lib/str_util.h index 4dd6f3b0cc..5dd3d06aff 100644 --- a/tensorflow/stream_executor/lib/str_util.h +++ b/tensorflow/stream_executor/lib/str_util.h @@ -29,7 +29,7 @@ using tensorflow::str_util::Split; // Returns a copy of the input string 'str' with the given 'suffix' // removed. If the suffix doesn't match, returns a copy of the original string. inline string StripSuffixString(port::StringPiece str, port::StringPiece suffix) { - if (str.ends_with(suffix)) { + if (tensorflow::str_util::EndsWith(str, suffix)) { str.remove_suffix(suffix.size()); } return str.ToString(); -- GitLab From 7c06ae2fd9b933e83aea0e5088c0b32b7c1fcaaf Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 27 Mar 2018 12:06:19 -0700 Subject: [PATCH 552/960] Remove warnings for initialize_variables (#18023) The `initialize_variables` has been deprecated and replaced with `tf.variables_initializer`. This fix makes the change and fixes the following warning in array_ops_test.py: ``` WARNING:tensorflow:From /private/var/tmp/_bazel_ytang/48f7de64c479bcefe5e55c65866b55a6/execroot/org_tensorflow/bazel-out/darwin-opt/bin/tensorflow/python/kernel_tests/array_ops_test.runfiles/org_tensorflow/tensorflow/python/util/tf_should_use.py:118: initialize_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02. Instructions for updating: Use `tf.variables_initializer` instead. ``` Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/array_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index d0ba8020c1..b82aa47ebe 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -890,7 +890,7 @@ class StridedSliceAssignChecker(object): var = resource_variable_ops.ResourceVariable(self.x) else: var = variables.Variable(self.x) - sess.run(variables.initialize_variables([var])) + sess.run(variables.variables_initializer([var])) val = sess.run(var[index].assign(value)) # val_copy is used to check that tf.assign works equivalently to the # assign method above. -- GitLab From 4a24413ee92c23727e11108bfd9b823ac09ef209 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 27 Mar 2018 12:11:39 -0700 Subject: [PATCH 553/960] Validate axis in shape function of tf.reverse (#18024) * Validate axis in shape function of tf.reverse tf.reverse requires the axis to be in the range of `[-rank(tensor), rank(tensor))`. Previously the validation is only done in runtime though it is possible to validate axis inside the shape function if the shape of the input tensor is already known. This fix add the validation in the shape function. Signed-off-by: Yong Tang * Replace with temp variable Signed-off-by: Yong Tang * Sanitize with clang-foramt -i Signed-off-by: Yong Tang * Validate multiple specification of axis in tf.reverse as well Signed-off-by: Yong Tang * Add test case for axis validation in shape function for tf.reverse Signed-off-by: Yong Tang * Update existing test cases Signed-off-by: Yong Tang --- tensorflow/core/ops/array_ops.cc | 26 ++++++++++++++++++- .../python/kernel_tests/array_ops_test.py | 24 ++++++++++++++--- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 39b92464cb..f97f1645a6 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -752,11 +752,35 @@ REGISTER_OP("ReverseV2") ShapeHandle input = c->input(0); ShapeHandle axis; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &axis)); - // TODO(aselle): if input(0)'s dimension is known we could validate axis if (c->Rank(input) > 8) { return errors::InvalidArgument( "reverse does not work on tensors with more than 8 dimensions"); } + const Tensor* axis_tensor = c->input_tensor(1); + if (axis_tensor != nullptr && c->RankKnown(input)) { + int32 rank = c->Rank(input); + std::vector axis_value; + if (axis_tensor->dtype() == DT_INT32) { + axis_value = AsInt64(axis_tensor, axis_tensor->NumElements()); + } else { + axis_value = AsInt64(axis_tensor, axis_tensor->NumElements()); + } + std::vector axes_dense(c->Rank(input), false); + for (int i = 0; i < axis_value.size(); i++) { + int64 canonical_axis = + axis_value[i] < 0 ? rank + axis_value[i] : axis_value[i]; + if (canonical_axis < 0 || canonical_axis >= rank) { + return errors::InvalidArgument("'axis'[", i, "] = ", axis_value[i], + " is out of valid range [", 0, ", ", + rank - 1); + } + if (axes_dense[canonical_axis]) { + return errors::InvalidArgument("axis ", canonical_axis, + " specified more than once."); + } + axes_dense[canonical_axis] = true; + } + } c->set_output(0, input); return Status::OK(); }); diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index b82aa47ebe..64c1760d5e 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -315,21 +315,39 @@ class ReverseV2Test(test_util.TensorFlowTestCase): self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1]) self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1]) + # This test covers the axis validation in the shape function + # (no eval()) + def testInvalidAxis(self): + x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + with self.assertRaisesRegexp(ValueError, + "is out of valid range"): + array_ops.reverse_v2(x_np, [-30]) + with self.assertRaisesRegexp(ValueError, + "is out of valid range"): + array_ops.reverse_v2(x_np, [2]) + with self.assertRaisesRegexp(ValueError, + "axis 0 specified more than once"): + array_ops.reverse_v2(x_np, [0, -2]) + # This is the version of reverse that uses axis indices rather than # bool tensors # TODO(b/32254538): Change this test to use array_ops.reverse + # + # Note: this test passes placeholder as constant axis is validated + # in shape function (see testInvalidAxis) def testInvalid(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + axis = array_ops.placeholder(dtypes.int32) with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, [-30]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]}) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, [2]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [2]}) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis 0 specified more than once"): - array_ops.reverse_v2(x_np, [0, -2]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [0, -2]}) def testReverse1DimAuto(self): for dtype in [ -- GitLab From 5da1cdcf0032f63c22afb41a460fd44c52ada048 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Mar 2018 12:09:59 -0700 Subject: [PATCH 554/960] Improved shape inference for reshape PiperOrigin-RevId: 190651873 --- .../python/kernel_tests/shape_ops_test.py | 5 +- .../contrib/signal/python/ops/shape_ops.py | 2 + tensorflow/core/ops/array_ops.cc | 104 ++++++++++++------ tensorflow/core/ops/array_ops_test.cc | 6 +- 4 files changed, 84 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py index 1c052354b8..bc4663fbb0 100644 --- a/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py @@ -338,7 +338,10 @@ class FrameTest(test.TestCase): def test_constant_folding(self): """frame should be constant foldable for constant inputs.""" - for pad_end in [False, True]: + # Padding is incorrectly defined in shape_ops.py (the rank of the padding + # tensor should be equal to the rank of the input tensor + 1): only test + # with padding set to False to avoid this. + for pad_end in [False]: g = ops.Graph() with g.as_default(): frame_length, frame_step = 32, 16 diff --git a/tensorflow/contrib/signal/python/ops/shape_ops.py b/tensorflow/contrib/signal/python/ops/shape_ops.py index 1ddc2941ec..97fe20866b 100644 --- a/tensorflow/contrib/signal/python/ops/shape_ops.py +++ b/tensorflow/contrib/signal/python/ops/shape_ops.py @@ -139,6 +139,8 @@ def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1, [[0, pad_samples]], array_ops.zeros([num_inner_dimensions, 2], dtype=pad_samples.dtype)], 0) + # TODO(rjryan): the paddings tensor must of rank tf.rank(signal) + 1. This + # isn't the case here and should be fixed. signal = array_ops.pad(signal, paddings, constant_values=pad_value) signal_shape = array_ops.shape(signal) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 39b92464cb..88d2aa3f41 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -178,46 +178,88 @@ Status SetOutputShapeForReshape(InferenceContext* c) { c->set_output(0, out); return Status::OK(); } - DimensionHandle num_in_elems = c->NumElements(in); - if (c->FullyDefined(out)) { - DimensionHandle num_out_elems = c->NumElements(out); - if (c->ValueKnown(num_in_elems) && - c->Value(num_in_elems) != c->Value(num_out_elems)) { - return errors::InvalidArgument( - "Cannot reshape a tensor with ", c->DebugString(num_in_elems), - " elements to shape ", c->DebugString(out), " (", - c->DebugString(num_out_elems), " elements)"); - } - c->set_output(0, out); - return Status::OK(); - } - if (c->ValueKnown(num_in_elems)) { + if (c->RankKnown(out) && c->RankKnown(in)) { // We don't know the number of output elements, but we can try to infer // the missing dimension. - int32 unknown_idx = -1; bool too_many_unknown = false; - DimensionHandle known_elems = c->MakeDim(1); - for (int32 i = 0; i < c->Rank(out); ++i) { - DimensionHandle dim = c->Dim(out, i); - if (!c->ValueKnown(dim)) { - if (unknown_idx >= 0) { - too_many_unknown = true; - break; + int32 out_unknown_idx = -1; + + DimensionHandle known_out_elems = c->NumElements(out); + if (!c->ValueKnown(known_out_elems)) { + known_out_elems = c->MakeDim(1); + for (int32 i = 0; i < c->Rank(out); ++i) { + DimensionHandle dim = c->Dim(out, i); + if (!c->ValueKnown(dim)) { + if (out_unknown_idx >= 0) { + too_many_unknown = true; + break; + } + out_unknown_idx = i; + } else { + TF_RETURN_IF_ERROR( + c->Multiply(known_out_elems, dim, &known_out_elems)); } - unknown_idx = i; - } else { - TF_RETURN_IF_ERROR(c->Multiply(known_elems, dim, &known_elems)); } } - if (!too_many_unknown && c->Value(known_elems) != 0) { - DimensionHandle inferred_dim; - TF_RETURN_IF_ERROR(c->Divide(num_in_elems, c->Value(known_elems), - true /* evenly_divisible */, &inferred_dim)); - TF_RETURN_IF_ERROR(c->ReplaceDim(out, unknown_idx, inferred_dim, &out)); + int32 in_unknown_idx = -1; + DimensionHandle known_in_elems = c->NumElements(in); + if (!c->ValueKnown(known_in_elems)) { + known_in_elems = c->MakeDim(1); + for (int32 i = 0; i < c->Rank(in); ++i) { + DimensionHandle dim = c->Dim(in, i); + if (!c->ValueKnown(dim)) { + if (in_unknown_idx >= 0) { + too_many_unknown = true; + break; + } + in_unknown_idx = i; + } else { + TF_RETURN_IF_ERROR(c->Multiply(known_in_elems, dim, &known_in_elems)); + } + } } - } + if (!too_many_unknown) { + if (in_unknown_idx < 0 && out_unknown_idx < 0) { + // Just check that the dimensions match. + if (c->Value(known_in_elems) != c->Value(known_out_elems)) { + return errors::InvalidArgument( + "Cannot reshape a tensor with ", c->DebugString(known_in_elems), + " elements to shape ", c->DebugString(out), " (", + c->DebugString(known_out_elems), " elements)"); + } + } else if (in_unknown_idx < 0 && out_unknown_idx >= 0 && + c->Value(known_out_elems) > 0) { + // Input fully known, infer the one missing output dim + DimensionHandle inferred_dim; + TF_RETURN_IF_ERROR(c->Divide(known_in_elems, c->Value(known_out_elems), + true /* evenly_divisible */, + &inferred_dim)); + TF_RETURN_IF_ERROR( + c->ReplaceDim(out, out_unknown_idx, inferred_dim, &out)); + + } else if (in_unknown_idx >= 0 && out_unknown_idx < 0 && + c->Value(known_in_elems) != 0) { + // Output fully known, infer the one missing input dim + DimensionHandle inferred_dim; + TF_RETURN_IF_ERROR(c->Divide(known_out_elems, c->Value(known_in_elems), + true /* evenly_divisible */, + &inferred_dim)); + DimensionHandle unknown_in_dim = c->Dim(in, in_unknown_idx); + TF_RETURN_IF_ERROR( + c->Merge(unknown_in_dim, inferred_dim, &unknown_in_dim)); + } else if (in_unknown_idx >= 0 && out_unknown_idx >= 0) { + // Exactly one unknown dimension in both input and output. These 2 are + // equal iff the known elements are equal. + if (c->Value(known_in_elems) == c->Value(known_out_elems)) { + DimensionHandle unknown_in_dim = c->Dim(in, in_unknown_idx); + TF_RETURN_IF_ERROR( + c->ReplaceDim(out, out_unknown_idx, unknown_in_dim, &out)); + } + } + } + } c->set_output(0, out); return Status::OK(); } diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc index cf5bb5ad84..b1463338fb 100644 --- a/tensorflow/core/ops/array_ops_test.cc +++ b/tensorflow/core/ops/array_ops_test.cc @@ -838,7 +838,7 @@ TEST(ArrayOpsTest, Reshape_ShapeFn) { // Unknown dimensions. // Flatten: new_shape = test::AsTensor({-1}); - INFER_OK(op, "[?];[1]", "[?]"); + INFER_OK(op, "[?];[1]", "[d0_0]"); INFER_OK(op, "[2,2];[1]", "[4]"); // The first dimension is inferred: new_shape = test::AsTensor({2, -1}); @@ -851,6 +851,10 @@ TEST(ArrayOpsTest, Reshape_ShapeFn) { new_shape = test::AsTensor({-1, -1, 2}); INFER_OK(op, "[8];[3]", "[?,?,2]"); + // Symbolic shape propagation + new_shape = test::AsTensor({-1, 2, 3}); + INFER_OK(op, "[?,2,3];[3]", "[d0_0,2,3]"); + // Reshaping to a scalar. new_shape = test::AsTensor({}); INFER_OK(op, "[1];[0]", "[]"); -- GitLab From 3771b5b0d9cbd5a9d34f1d579454b78012cb0bb4 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 27 Mar 2018 12:22:49 -0700 Subject: [PATCH 555/960] Update BUILD --- tensorflow/python/kernel_tests/testdata/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/testdata/BUILD b/tensorflow/python/kernel_tests/testdata/BUILD index a4a0dfc139..45264c773a 100644 --- a/tensorflow/python/kernel_tests/testdata/BUILD +++ b/tensorflow/python/kernel_tests/testdata/BUILD @@ -1,7 +1,7 @@ # Data files for kernel tests. package( - default_visibility = ["//tensorflow:internal"], + default_visibility = ["//visibility:public"], ) licenses(["notice"]) # Apache 2.0 -- GitLab From 7fd3ca7ab6e96af7b867c7ae56ac74a3f3393b26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 12:34:17 -0700 Subject: [PATCH 556/960] Updating test so that it evaluates the optimized and original graph and checks whether the output tensors produced by them are the same. PiperOrigin-RevId: 190655831 --- .../core/grappler/optimizers/constant_folding_test.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 6340565bcd..dc9c1053d2 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -614,7 +614,8 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { GrapplerItem item; item.fetch.push_back("e"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -641,6 +642,9 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { } } EXPECT_EQ(1, found); + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { -- GitLab From 2700e87f0e5fbc3aa7fe3a6a7ffb7152b894da4a Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Wed, 28 Mar 2018 03:54:20 +0800 Subject: [PATCH 557/960] Fix the incorect rendering of math equation in monte_carlo api guides (#18018) * Fix the math equation in monte_carlo api guides * Replace \( \) with \\( \\) according to guideline --- .../python/contrib.bayesflow.monte_carlo.md | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md index 956dccb64f..f3db5857ae 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md +++ b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md @@ -6,42 +6,42 @@ Monte Carlo integration and helpers. ## Background Monte Carlo integration refers to the practice of estimating an expectation with -a sample mean. For example, given random variable `Z in R^k` with density `p`, +a sample mean. For example, given random variable `Z in \\(R^k\\)` with density `p`, the expectation of function `f` can be approximated like: ``` -E_p[f(Z)] = \int f(z) p(z) dz - ~ S_n - := n^{-1} \sum_{i=1}^n f(z_i), z_i iid samples from p. +$$E_p[f(Z)] = \int f(z) p(z) dz$$ +$$ ~ S_n + := n^{-1} \sum_{i=1}^n f(z_i), z_i\ iid\ samples\ from\ p.$$ ``` -If `E_p[|f(Z)|] < infinity`, then `S_n --> E_p[f(Z)]` by the strong law of large -numbers. If `E_p[f(Z)^2] < infinity`, then `S_n` is asymptotically normal with -variance `Var[f(Z)] / n`. +If `\\(E_p[|f(Z)|] < infinity\\)`, then `\\(S_n\\) --> \\(E_p[f(Z)]\\)` by the strong law of large +numbers. If `\\(E_p[f(Z)^2] < infinity\\)`, then `\\(S_n\\)` is asymptotically normal with +variance `\\(Var[f(Z)] / n\\)`. Practitioners of Bayesian statistics often find themselves wanting to estimate -`E_p[f(Z)]` when the distribution `p` is known only up to a constant. For +`\\(E_p[f(Z)]\\)` when the distribution `p` is known only up to a constant. For example, the joint distribution `p(z, x)` may be known, but the evidence -`p(x) = \int p(z, x) dz` may be intractable. In that case, a parameterized -distribution family `q_lambda(z)` may be chosen, and the optimal `lambda` is the -one minimizing the KL divergence between `q_lambda(z)` and -`p(z | x)`. We only know `p(z, x)`, but that is sufficient to find `lambda`. +`\\(p(x) = \int p(z, x) dz\\)` may be intractable. In that case, a parameterized +distribution family `\\(q_\lambda(z)\\)` may be chosen, and the optimal `\\(\lambda\\)` is the +one minimizing the KL divergence between `\\(q_\lambda(z)\\)` and +`\\(p(z | x)\\)`. We only know `p(z, x)`, but that is sufficient to find `\\(\lambda\\)`. ## Log-space evaluation and subtracting the maximum Care must be taken when the random variable lives in a high dimensional space. -For example, the naive importance sample estimate `E_q[f(Z) p(Z) / q(Z)]` -involves the ratio of two terms `p(Z) / q(Z)`, each of which must have tails -dropping off faster than `O(|z|^{-(k + 1)})` in order to have finite integral. +For example, the naive importance sample estimate `\\(E_q[f(Z) p(Z) / q(Z)]\\)` +involves the ratio of two terms `\\(p(Z) / q(Z)\\)`, each of which must have tails +dropping off faster than `\\(O(|z|^{-(k + 1)})\\)` in order to have finite integral. This ratio would often be zero or infinity up to numerical precision. For that reason, we write ``` -Log E_q[ f(Z) p(Z) / q(Z) ] - = Log E_q[ exp{Log[f(Z)] + Log[p(Z)] - Log[q(Z)] - C} ] + C, where -C := Max[ Log[f(Z)] + Log[p(Z)] - Log[q(Z)] ]. +$$Log E_q[ f(Z) p(Z) / q(Z) ]$$ +$$ = Log E_q[ \exp\{Log[f(Z)] + Log[p(Z)] - Log[q(Z)] - C\} ] + C,$$ where +$$C := Max[ Log[f(Z)] + Log[p(Z)] - Log[q(Z)] ].$$ ``` The maximum value of the exponentiated term will be 0.0, and the expectation -- GitLab From 5d76c7db2ab72f9b0cc70ce12ba0a3395dcc20d3 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Wed, 28 Mar 2018 03:55:28 +0800 Subject: [PATCH 558/960] Fix minor spelling typos in contrib (#18015) --- .../contrib/bayesflow/python/ops/metropolis_hastings_impl.py | 2 +- .../contrib/estimator/python/estimator/replicate_model_fn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py index 05aa134ed5..fdee0a8da6 100644 --- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py @@ -238,7 +238,7 @@ def evolve(initial_sample, using the Metropolis-Hastings algorithm. These samples are from a Markov chain whose equilibrium distribution matches the target distribution. - The probability distribution may have an unknown normalization constan. + The probability distribution may have an unknown normalization constant. We parameterize the probability density as follows: ```none diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index e0fae2c992..fa2697800e 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -136,7 +136,7 @@ def replicate_model_fn(model_fn, the train_op argument of `EstimatorSpec`. loss_reduction: controls whether losses are summed or averaged. devices: Optional list of devices to replicate the model across. This - argument can be used to replice only on the subset of available GPUs. + argument can be used to replicate only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. -- GitLab From 0ef36a5de45486ccbc0d6237f86280c2ac22f52e Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 27 Mar 2018 12:56:14 -0700 Subject: [PATCH 559/960] Add broadcast support for softmax_cross_entropy_with_logits (#16784) * Add broadcast support for softmax_cross_entropy_with_logits This fix tries to address the issue raised in 11534 where there was no broadcast support for SoftmaxCrossEntropyWithLogits. This fix adds the broadcast support for SoftmaxCrossEntropyWithLogits, and adds test cases for it. This fix fixes 11534. Signed-off-by: Yong Tang * Add BroadcastBinaryOpOutputShapeFn for shape function This commit adds BroadcastBinaryOpOutputShapeFn, so that the implementation of BroadcastBinaryOpShapeFn coule be reused in SoftmaxCrossEntropyWithLogits. Signed-off-by: Yong Tang * Update the shape function of SoftmaxCrossEntropyWithLogits so that broadcast could be supported. Signed-off-by: Yong Tang * Add broadcast support for SoftmaxCrossEntropyWithLogits Signed-off-by: Yong Tang * Add broadcast support for SoftmaxCrossEntropyWithLogits with GPU Signed-off-by: Yong Tang * Reformat with clang-format Signed-off-by: Yong Tang * Fix shape test issues Signed-off-by: Yong Tang * Remove `_` for gen_nn_ops._softmax_cross_entropy_with_logits as `_` is not needed anymore with the recent changes Signed-off-by: Yong Tang * Sanitize nn_ops.cc with clang-format Signed-off-by: Yong Tang * Add broadcast examples for SoftmaxCrossEntropyWithLogits shape function Signed-off-by: Yong Tang * Add benchmark tests for trival cases Signed-off-by: Yong Tang * Fix pylint issue Signed-off-by: Yong Tang --- tensorflow/core/framework/common_shape_fns.cc | 4 +- tensorflow/core/framework/common_shape_fns.h | 8 +- tensorflow/core/framework/shape_inference.h | 1 + tensorflow/core/kernels/xent_op.cc | 65 ++++++++++----- tensorflow/core/kernels/xent_op.h | 35 +++++--- tensorflow/core/kernels/xent_op_gpu.cu.cc | 9 ++- tensorflow/core/ops/nn_ops.cc | 30 ++++--- tensorflow/core/ops/nn_ops_test.cc | 16 +++- .../python/kernel_tests/xent_op_test.py | 81 ++++++++++++++++++- 9 files changed, 197 insertions(+), 52 deletions(-) diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 623248b6ce..2fb17c2b02 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -1210,7 +1210,7 @@ Status ConcatV2Shape(InferenceContext* c) { c->num_inputs() - 1 /* dim_index */); } -Status BroadcastBinaryOpShapeFn(InferenceContext* c) { +Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index) { ShapeHandle shape_x = c->input(0); ShapeHandle shape_y = c->input(1); if (!c->RankKnown(shape_x) || !c->RankKnown(shape_y)) { @@ -1272,7 +1272,7 @@ Status BroadcastBinaryOpShapeFn(InferenceContext* c) { } } - c->set_output(0, c->MakeShape(dims)); + c->set_output(output_index, c->MakeShape(dims)); return Status::OK(); } diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h index 293c40e04d..7230e0f09c 100644 --- a/tensorflow/core/framework/common_shape_fns.h +++ b/tensorflow/core/framework/common_shape_fns.h @@ -265,9 +265,15 @@ Status ConcatShape(shape_inference::InferenceContext* c, // Shape function for concat operations. Status ConcatV2Shape(shape_inference::InferenceContext* c); +// Shape function for binary operators that broadcast their inputs +// and with output to output_index. +Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index); + // Shape function for binary operators that broadcast their inputs. // Tested by ops/math_ops_test.cc. -Status BroadcastBinaryOpShapeFn(InferenceContext* c); +inline Status BroadcastBinaryOpShapeFn(InferenceContext* c) { + return BroadcastBinaryOpOutputShapeFn(c, 0); +} // Shape function for random operations. Status RandomShape(shape_inference::InferenceContext* c); diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index e3cc848a16..accc587000 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -317,6 +317,7 @@ class InferenceContext { input_tensors_as_shapes_ = input_tensors_as_shapes; } + ShapeHandle output(int64 idx) const { return outputs_[idx]; } void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; } Status set_output(StringPiece output_name, const std::vector& shapes); diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc index a6a71fdfaf..ebd19c3d35 100644 --- a/tensorflow/core/kernels/xent_op.cc +++ b/tensorflow/core/kernels/xent_op.cc @@ -17,12 +17,14 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/xent_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/xent_op.h" +#include "tensorflow/core/util/bcast.h" namespace tensorflow { @@ -41,37 +43,56 @@ class SoftmaxXentWithLogitsOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& logits_in = context->input(0); const Tensor& labels_in = context->input(1); - OP_REQUIRES(context, logits_in.IsSameSize(labels_in), - errors::InvalidArgument( - "logits and labels must be same size: logits_size=", - logits_in.shape().DebugString(), - " labels_size=", labels_in.shape().DebugString())); - OP_REQUIRES(context, TensorShapeUtils::IsMatrix(logits_in.shape()), - errors::InvalidArgument("logits must be 2-dimensional")); - // As we already tested that both inputs have the same shape no need to - // check that "labels" is a matrix too. + + TensorShape shape_in = logits_in.shape(); + + BCast bcast(BCast::FromShape(logits_in.shape()), + BCast::FromShape(labels_in.shape())); + if (!logits_in.IsSameSize(labels_in)) { + OP_REQUIRES(context, bcast.IsValid(), + errors::InvalidArgument( + "logits and labels must be broadcastable: logits_size=", + logits_in.shape().DebugString(), " labels_size=", + labels_in.shape().DebugString())); + shape_in = BCast::ToShape(bcast.output_shape()); + } + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(shape_in), + errors::InvalidArgument("logits and labels must be beither " + "2-dimensional, or roadcasted to " + "2-dimensional")); // loss is 1-D (one per example), and size is batch_size. Tensor scratch; OP_REQUIRES_OK( context, context->allocate_temp(DataTypeToEnum::value, - TensorShape({logits_in.dim_size(0), 1}), + TensorShape({shape_in.dim_size(0), 1}), &scratch)); Tensor* loss_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({logits_in.dim_size(0)}), &loss_out)); + 0, TensorShape({shape_in.dim_size(0)}), &loss_out)); Tensor* back_out = nullptr; // Try to reuse the logits_in buffer for the backprop output. OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, logits_in.shape(), &back_out)); - if (logits_in.dim_size(0) > 0) { + {0}, 1, shape_in, &back_out)); + if (shape_in.dim_size(0) > 0) { functor::XentFunctor functor; - functor(context->eigen_device(), logits_in.matrix(), - labels_in.matrix(), scratch.matrix(), loss_out->vec(), - back_out->matrix()); + if (logits_in.IsSameSize(labels_in)) { + functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), + Eigen::array{1, 1}, + Eigen::array{1, 1}, logits_in.matrix(), + labels_in.matrix(), scratch.matrix(), loss_out->vec(), + back_out->matrix()); + } else { + functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), + BCast::ToIndexArray<2>(bcast.x_bcast()), + BCast::ToIndexArray<2>(bcast.y_bcast()), + logits_in.template shaped(bcast.x_reshape()), + labels_in.template shaped(bcast.y_reshape()), + scratch.matrix(), loss_out->vec(), back_out->matrix()); + } } } }; @@ -81,13 +102,17 @@ class SoftmaxXentWithLogitsOp : public OpKernel { namespace functor { template struct XentFunctorBase { - void operator()(const Device& d, typename TTypes::ConstMatrix logits, + void operator()(const Device& d, + const Eigen::DSizes& shape, + const Eigen::array& logits_bcast, + const Eigen::array& labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, typename TTypes::Matrix backprop) { - XentEigenImpl::Compute(d, logits, labels, scratch, loss, - backprop); + XentEigenImpl::Compute(d, shape, logits_bcast, labels_bcast, + logits, labels, scratch, loss, backprop); } }; diff --git a/tensorflow/core/kernels/xent_op.h b/tensorflow/core/kernels/xent_op.h index e689fca7ff..87be17fca9 100644 --- a/tensorflow/core/kernels/xent_op.h +++ b/tensorflow/core/kernels/xent_op.h @@ -18,6 +18,7 @@ limitations under the License. // Functor definition for XentOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { @@ -33,7 +34,11 @@ struct XentFunctor { // scratch: temporary tensor, dims: batch_size, 1 // loss: output tensor for the loss, dims: batch_size. // backprop: output tensor for the backprop, dims: batch_size, num_classes. - void operator()(const Device& d, typename TTypes::ConstMatrix logits, + void operator()(const Device &d, + const Eigen::DSizes &shape, + const Eigen::array &logits_bcast, + const Eigen::array &labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, @@ -45,7 +50,11 @@ struct XentFunctor { // specializations for both device types. template struct XentEigenImpl { - static void Compute(const Device& d, typename TTypes::ConstMatrix logits, + static void Compute(const Device &d, + const Eigen::DSizes &shape, + const Eigen::array &logits_bcast, + const Eigen::array &labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, @@ -57,8 +66,8 @@ struct XentEigenImpl { const int kBatchDim = 0; const int kClassDim = 1; - const int batch_size = logits.dimension(kBatchDim); - const int num_classes = logits.dimension(kClassDim); + const int batch_size = shape[kBatchDim]; + const int num_classes = shape[kClassDim]; // These arrays are used to reduce along the class dimension, and broadcast // the resulting value to all classes. @@ -84,10 +93,12 @@ struct XentEigenImpl { #endif // max_logits along classes. - scratch.reshape(batch_only).device(d) = logits.maximum(along_class); + scratch.reshape(batch_only).device(d) = + logits.broadcast(logits_bcast).maximum(along_class); // logits - max_logits. - backprop.device(d) = logits - scratch.broadcast(one_by_class); + backprop.device(d) = + logits.broadcast(logits_bcast) - scratch.broadcast(one_by_class); // sum(exp(logits - max_logits)) along classes. scratch.reshape(batch_only).device(d) = backprop.exp().sum(along_class); @@ -99,15 +110,15 @@ struct XentEigenImpl { // sum(-labels * // ((logits - max_logits) - log(sum(exp(logits - max_logits))))) // along classes - loss.device(d) = - (labels * (scratch.log().eval().broadcast(one_by_class) - backprop)) - .eval() - .sum(along_class); + loss.device(d) = (labels.broadcast(labels_bcast) * + (scratch.log().eval().broadcast(one_by_class) - backprop)) + .eval() + .sum(along_class); // backprop: prob - labels, where // prob = exp(logits - max_logits) / sum(exp(logits - max_logits)) - backprop.device(d) = - (backprop.exp() / scratch.broadcast(one_by_class)) - labels; + backprop.device(d) = (backprop.exp() / scratch.broadcast(one_by_class)) - + labels.broadcast(labels_bcast); } }; diff --git a/tensorflow/core/kernels/xent_op_gpu.cu.cc b/tensorflow/core/kernels/xent_op_gpu.cu.cc index 05ee7da490..2c0c0b3a02 100644 --- a/tensorflow/core/kernels/xent_op_gpu.cu.cc +++ b/tensorflow/core/kernels/xent_op_gpu.cu.cc @@ -31,12 +31,17 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { template struct XentFunctor { - void operator()(const GPUDevice& d, typename TTypes::ConstMatrix logits, + void operator()(const GPUDevice &d, + const Eigen::DSizes &shape, + const Eigen::array &logits_bcast, + const Eigen::array &labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, typename TTypes::Matrix backprop) { - XentEigenImpl::Compute(d, logits, labels, scratch, loss, + XentEigenImpl::Compute(d, shape, logits_bcast, labels_bcast, + logits, labels, scratch, loss, backprop); } }; diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 1f4e9753c3..b9d5104857 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1062,12 +1062,22 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits") .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); - TF_RETURN_IF_ERROR(c->Merge(input, c->input(1), &input)); + if (c->WithRank(c->input(0), 2, &input) == Status::OK() && + c->Merge(input, c->input(1), &input) == Status::OK()) { + DimensionHandle batch_size = c->Dim(input, 0); + c->set_output(0, c->Vector(batch_size)); + c->set_output(1, input); + return Status::OK(); + } + TF_RETURN_IF_ERROR(BroadcastBinaryOpOutputShapeFn(c, 1)); - DimensionHandle batch_size = c->Dim(input, 0); + if (!c->RankKnown(c->output(1)) || c->Rank(c->output(1)) != 2) { + return errors::InvalidArgument( + "Shape must be broadcasted with rank 2, but is rank ", + c->Rank(c->output(1))); + } + DimensionHandle batch_size = c->Dim(c->output(1), 0); c->set_output(0, c->Vector(batch_size)); - c->set_output(1, input); return Status::OK(); }); @@ -1155,9 +1165,9 @@ Status TopKShapeFn(InferenceContext* c) { DimensionHandle last_dim = c->Dim(input, -1); if (c->ValueKnown(last_dim) && c->ValueKnown(k_dim) && c->Value(last_dim) < c->Value(k_dim)) { - return errors::InvalidArgument( - "input must have last dimension >= k = ", c->Value(k_dim), " but is ", - c->Value(last_dim)); + return errors::InvalidArgument("input must have last dimension >= k = ", + c->Value(k_dim), " but is ", + c->Value(last_dim)); } // Replace last_dim with k_dim. @@ -1211,9 +1221,9 @@ REGISTER_OP("NthElement") DimensionHandle last_dim = c->Dim(input, -1); if (c->ValueKnown(last_dim) && c->ValueKnown(n_dim) && c->Value(last_dim) <= c->Value(n_dim)) { - return errors::InvalidArgument( - "Input must have last dimension > n = ", c->Value(n_dim), - " but is ", c->Value(last_dim)); + return errors::InvalidArgument("Input must have last dimension > n = ", + c->Value(n_dim), " but is ", + c->Value(last_dim)); } // Reduce last_dim for output tensor diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc index 1b17a7cda6..289b953055 100644 --- a/tensorflow/core/ops/nn_ops_test.cc +++ b/tensorflow/core/ops/nn_ops_test.cc @@ -410,10 +410,18 @@ TEST(NNOpsTest, SoftmaxCrossEntropyWithLogits_ShapeFn) { INFER_OK(op, "[1,?];[?,2]", "[d0_0];[d0_0,d0_1|d1_1]"); INFER_OK(op, "[?,2];[1,2]", "[d1_0];in1"); - INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, - "[1,?];[2,?]"); - INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3];?"); - INFER_ERROR("Shapes must be equal rank, but are 2 and 3", op, "?;[1,2,3]"); + INFER_ERROR("Shape must be broadcasted with rank 2", op, "[1,2,3];?"); + INFER_ERROR("Shape must be broadcasted with rank 2", op, "?;[1,2,3]"); + + // Broadcast example + // [1,4] and [2,4] are broadcasted to [2,4] + INFER_OK(op, "[1,4];[2,4]", "[d1_0];[d1_0,d0_1|d1_1]"); + // [2,4] and [2,1] are broadcasted to [2,4] + INFER_OK(op, "[2,4];[2,1]", "[d0_0];[d0_0|d1_0,d0_1]"); + // [1,?] and [2,4] are broadcasted to [2,4] + INFER_OK(op, "[1,?];[2,4]", "[d1_0];[d1_0,d0_1|d1_1]"); + // [2,4] and [?,1] are broadcasted to [2,4] + INFER_OK(op, "[2,4];[?,1]", "[d0_0];[d0_0|d1_0,d0_1]"); } TEST(NNOpsTest, SparseSoftmaxCrossEntropyWithLogits_ShapeFn) { diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index e3e120a4eb..60c726d54c 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -18,10 +18,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools +import sys + import numpy as np +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl @@ -88,7 +94,7 @@ class XentTest(test.TestCase): 4.]]]).astype(dtype) np_labels = np.array([[[0., 0., 0., 1.]], [[0., .5, .5, 0.]]]).astype(dtype) - self.assertRaisesRegexp(ValueError, "must be rank 2", + self.assertRaisesRegexp(ValueError, "rank 2, but is rank 3", gen_nn_ops.softmax_cross_entropy_with_logits, np_features, np_labels) @@ -128,6 +134,24 @@ class XentTest(test.TestCase): self.assertAllClose( np.array([1.3862, 1.9401]), np_loss, rtol=1.e-3, atol=1.e-3) + def testShapeBroadcast(self): + np_f = np.array([[1., 2., 3., 4.], + [1., 2., 3., 4.]]).astype(np.float32) + np_l = np.array([[0., 0., 0., 1.], + [0., .5, .5, 0.]]).astype(np.float32) + np_loss, np_backprop = self._npXent(np_f, np_l) + tf_f = constant_op.constant( + np.array([[1., 2., 3., 4.]]).astype(np.float32)) + tf_l = constant_op.constant( + np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float32)) + for use_gpu in [False, True]: + with self.test_session(use_gpu=use_gpu) as sess: + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( + tf_f, tf_l) + tf_loss, tf_backprop = sess.run([loss, backprop]) + self.assertAllCloseAccordingToType(np_loss, tf_loss) + self.assertAllCloseAccordingToType(np_backprop, tf_backprop) + def testShapeMismatch(self): with self.test_session(): with self.assertRaises(ValueError): @@ -260,5 +284,60 @@ class XentTest(test.TestCase): self.assertAllEqual(np_loss, tf_loss) +class XentBenchmark(test.Benchmark): + + def benchmarkZeroDimension(self): + for (m, n, p, use_gpu) in itertools.product( + [128], + [10, 100, 1000, 10000, 100000], + [0.001, 0.01, 0.5, 0.99, 1.0], + [False]): + k = int(p * n) + if k == 0: + continue + name = "zero_dimension_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) + device = "/%s:0" % ("gpu" if use_gpu else "cpu") + with ops.Graph().as_default(): + with ops.device(device): + labels = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) + logits = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) + op = nn_ops.softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + with session.Session() as sess: + r = self.run_op_benchmark(sess, op, min_iters=100, name=name) + gb_processed_input = m * n / 1.0e9 + throughput = gb_processed_input / r["wall_time"] + print("Benchmark: %s \t wall_time: %0.03g s \t " + "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) + sys.stdout.flush() + + def benchmarkSingleClass(self): + for (m, n, p, use_gpu) in itertools.product( + [128], + [10, 100, 1000, 10000, 100000], + [0.001, 0.01, 0.5, 0.99, 1.0], + [False]): + k = int(p * n) + if k == 0: + continue + name = "single_class_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) + device = "/%s:0" % ("gpu" if use_gpu else "cpu") + with ops.Graph().as_default(): + with ops.device(device): + labels = constant_op.constant([[1.], [-1.], [0.]], + dtype=dtypes.float32) + logits = constant_op.constant([[-1.], [0.], [1.]], + dtype=dtypes.float32) + op = nn_ops.softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + with session.Session() as sess: + r = self.run_op_benchmark(sess, op, min_iters=100, name=name) + gb_processed_input = m * n / 1.0e9 + throughput = gb_processed_input / r["wall_time"] + print("Benchmark: %s \t wall_time: %0.03g s \t " + "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) + sys.stdout.flush() + + if __name__ == "__main__": test.main() -- GitLab From 31232f29daffe0e496bea22dffeda9e7945d344c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 27 Mar 2018 12:55:56 -0700 Subject: [PATCH 560/960] [TF:XLA] Force DebugOptions to be specified when calling HloModule::CreateModuleConfigFromProto Otherwise it's easy to forget that you likely want the DebugOptions to be `legacy_flags::GetDebugOptionsFromFlags()`. PiperOrigin-RevId: 190659046 --- tensorflow/compiler/xla/client/xla_client/BUILD | 1 + .../compiler/xla/client/xla_client/xla_builder_test.cc | 4 +++- tensorflow/compiler/xla/service/hlo_module.cc | 3 ++- tensorflow/compiler/xla/service/hlo_module.h | 2 +- tensorflow/compiler/xla/service/hlo_runner.cc | 7 +++---- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/BUILD b/tensorflow/compiler/xla/client/xla_client/BUILD index cc5f551c9c..60f13e04cb 100644 --- a/tensorflow/compiler/xla/client/xla_client/BUILD +++ b/tensorflow/compiler/xla/client/xla_client/BUILD @@ -70,6 +70,7 @@ tf_cc_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/legacy_flags:debug_options_flags", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_matchers", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc index 85d4227ba4..ce984564d0 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -39,7 +40,8 @@ class XlaBuilderTest : public ::testing::Test { TF_ASSIGN_OR_RETURN(XlaComputation computation, b->Build()); const HloModuleProto& proto = computation.proto(); TF_ASSIGN_OR_RETURN(const auto& config, - HloModule::CreateModuleConfigFromProto(proto)); + HloModule::CreateModuleConfigFromProto( + proto, legacy_flags::GetDebugOptionsFromFlags())); return HloModule::CreateFromProto(proto, config); } diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 595c531ccf..08b9a29aed 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -295,12 +295,13 @@ StatusOr> HloModule::CreateFromProto( /* static */ StatusOr HloModule::CreateModuleConfigFromProto( - const HloModuleProto& module) { + const HloModuleProto& module, const DebugOptions& debug_options) { TF_RET_CHECK(module.has_program_shape()) << "No program shape found in the proto"; const auto& program_shape = module.program_shape(); HloModuleConfig module_config(program_shape); + module_config.set_debug_options(debug_options); // The module config is constructed with default layouts regardless of what is // passed in via the ProgramShape. Set the layouts to the appropriate values. diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 755bbd359f..9f7f25202b 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -172,7 +172,7 @@ class HloModule { // Creates and returns an HloModuleConfig with an appropriate program shape // for the HLO module in the given proto. static StatusOr CreateModuleConfigFromProto( - const HloModuleProto& module); + const HloModuleProto& module, const DebugOptions& debug_options); // Outlines the given expression from the given computation. // instructions_to_outline contains the instructions that form the expression. diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index e5b1c2efa3..ec7d8210a7 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -52,10 +52,9 @@ namespace { // Creates an HloModule from the given proto. StatusOr> HloProtoToModule( const HloProto& proto, const DebugOptions& debug_options) { - TF_ASSIGN_OR_RETURN( - HloModuleConfig config, - HloModule::CreateModuleConfigFromProto(proto.hlo_module())); - config.set_debug_options(debug_options); + TF_ASSIGN_OR_RETURN(HloModuleConfig config, + HloModule::CreateModuleConfigFromProto(proto.hlo_module(), + debug_options)); TF_ASSIGN_OR_RETURN(auto module, HloModule::CreateFromProto(proto.hlo_module(), config)); return std::move(module); -- GitLab From 502237976694764897e31bad606ec7d02320d5d3 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Wed, 28 Mar 2018 04:19:36 +0800 Subject: [PATCH 561/960] Fix the math equation format issue in tf.matrix_solve_ls (#18022) * Fix the math equation format issue in tf.matrix_solve_ls * Fix math equation to avoid break new line within \\( and \\) * Fix minor indent --- .../core/api_def/base_api/api_def_MatrixSolveLs.pbtxt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt index 51d91399f8..e667c328ae 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt @@ -49,14 +49,14 @@ in the batch: If `fast` is `True`, then the solution is computed by solving the normal equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + -\lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as +problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). +If \\(m \lt n\\) then `output` is computed as \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the minimum-norm solution to the under-determined linear system, i.e. \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), subject to \\(A Z = B\\). Notice that the fast path is only numerically stable when \\(A\\) is numerically full rank and has a condition number -\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is +\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is sufficiently large. If `fast` is `False` an algorithm based on the numerically robust complete -- GitLab From 40d12e552f67600ddfd3f2937092c0fda157b8a8 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 27 Mar 2018 14:06:44 -0700 Subject: [PATCH 562/960] Fast path for calling pack when the list is full of eager tensors. FastPathExecute function also allows inputs to be sequences instead of just lists. PiperOrigin-RevId: 190670587 --- tensorflow/python/eager/pywrap_tfe_src.cc | 36 ++++++++++++----------- tensorflow/python/ops/array_ops.py | 5 ++++ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 55ba509065..30ef6781ec 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1405,10 +1405,10 @@ bool CheckInputsOk(PyObject* seq, int start_index, PyObject* item = PyTuple_GET_ITEM(seq, i + start_index); if (!op_def.input_arg(i).number_attr().empty() || !op_def.input_arg(i).type_list_attr().empty()) { - // This item should be a list input. - if (!PyList_Check(item)) return false; - for (Py_ssize_t j = 0; j < PyList_Size(item); j++) { - PyObject* inner_item = PyList_GET_ITEM(item, j); + // This item should be a seq input. + if (!PySequence_Check(item)) return false; + for (Py_ssize_t j = 0; j < PySequence_Fast_GET_SIZE(item); j++) { + PyObject* inner_item = PySequence_Fast_GET_ITEM(item, j); if (!EagerTensor_CheckExact(inner_item) && !CheckResourceVariable(inner_item)) { return false; @@ -1726,11 +1726,11 @@ const char* GetDeviceName(PyObject* py_device_name) { return nullptr; } -bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { - if (!PyList_Check(list)) { +bool RaiseIfNotPySequence(PyObject* seq, const string& attr_name) { + if (!PySequence_Check(seq)) { PyErr_SetString(PyExc_TypeError, - Printf("expected a list for attr %s, got %s instead", - attr_name.data(), list->ob_type->tp_name) + Printf("expected a sequence for attr %s, got %s instead", + attr_name.data(), seq->ob_type->tp_name) .data()); return false; @@ -1940,8 +1940,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyTuple_GET_ITEM(args, kFastPathExecuteInputStartIndex + i); if (!input_arg.number_attr().empty()) { // The item is a homogeneous list. - if (!RaiseIfNotPyList(input, input_arg.number_attr())) return nullptr; - Py_ssize_t len = PyList_Size(input); + if (!RaiseIfNotPySequence(input, input_arg.number_attr())) return nullptr; + Py_ssize_t len = PySequence_Fast_GET_SIZE(input); TFE_OpSetAttrInt(op, input_arg.number_attr().data(), len); if (op_exec_info.run_callbacks) { @@ -1953,15 +1953,15 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { if (len > 0) { // First item adds the type attr. - if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, 0), &input_arg, - flattened_attrs.get(), flattened_inputs.get(), op, - status)) { + if (!AddInputToOp(op_exec_info, PySequence_Fast_GET_ITEM(input, 0), + &input_arg, flattened_attrs.get(), + flattened_inputs.get(), op, status)) { return nullptr; } for (Py_ssize_t j = 1; j < len; j++) { // Since the list is homogeneous, we don't need to re-add the attr. - if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, j), + if (!AddInputToOp(op_exec_info, PySequence_Fast_GET_ITEM(input, j), nullptr /* input_arg */, nullptr /* flattened_attrs */, flattened_inputs.get(), op, status)) { @@ -1971,16 +1971,18 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } } else if (!input_arg.type_list_attr().empty()) { // The item is a heterogeneous list. - if (!RaiseIfNotPyList(input, input_arg.type_list_attr())) return nullptr; + if (!RaiseIfNotPySequence(input, input_arg.type_list_attr())) { + return nullptr; + } const string& attr_name = input_arg.type_list_attr(); - Py_ssize_t len = PyList_Size(input); + Py_ssize_t len = PySequence_Fast_GET_SIZE(input); tensorflow::gtl::InlinedVector attr_value(len); PyObject* py_attr_value = nullptr; if (op_exec_info.run_callbacks) { py_attr_value = PyTuple_New(len); } for (Py_ssize_t j = 0; j < len; j++) { - PyObject* py_input = PyList_GET_ITEM(input, j); + PyObject* py_input = PySequence_Fast_GET_ITEM(input, j); tensorflow::Safe_PyObjectPtr py_eager_tensor; if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, status)) { diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 9106461c60..207866610b 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -957,6 +957,11 @@ def _autopacking_helper(list_or_tuple, dtype, name): Returns: A `tf.Tensor` with value equivalent to `list_or_tuple`. """ + if context.executing_eagerly(): + # NOTE: Fast path when all the items are tensors, this doesn't do any type + # checking. + if all(ops.is_dense_tensor_like(elem) for elem in list_or_tuple): + return gen_array_ops.pack(list_or_tuple, name=name) must_pack = False converted_elems = [] with ops.name_scope(name) as scope: -- GitLab From cc4103dacacc10edb93224c276f68f172922734a Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Tue, 27 Mar 2018 14:12:00 -0700 Subject: [PATCH 563/960] Fix documentation of Clamp; it does not take a computation at all. See: https://github.com/tensorflow/tensorflow/blob/r1.6/tensorflow/compiler/xla/client/computation_builder.h#L668 PiperOrigin-RevId: 190671530 --- tensorflow/docs_src/performance/xla/operation_semantics.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 4d12c7ab6d..32f249cf10 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -241,13 +241,10 @@ See also Clamps an operand to within the range between a minimum and maximum value. - `Clamp(computation, min, operand, max)` + `Clamp(min, operand, max)` | Arguments | Type | Semantics | | ------------- | ----------------------- | -------------------------------- | -| `computation` | `Computation` | computation of type `T_0, T_1, | -: : : ..., T_N -> S` with N parameters : -: : : of arbitrary type : | `min` | `ComputationDataHandle` | array of type T | | `operand` | `ComputationDataHandle` | array of type T | | `max` | `ComputationDataHandle` | array of type T | -- GitLab From 12accfe7d26617797f6f9d63352e09697c6d9455 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Tue, 27 Mar 2018 14:14:01 -0700 Subject: [PATCH 564/960] Add "serve" as a default value for savedmodel_tagset. PiperOrigin-RevId: 190671867 --- tensorflow/contrib/lite/toco/BUILD | 1 + tensorflow/contrib/lite/toco/args.h | 3 ++- tensorflow/contrib/lite/toco/toco_saved_model.cc | 9 ++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 102740ee47..051fa8de3c 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -143,6 +143,7 @@ cc_library( ":toco_graphviz_dump_options", ":toco_port", ":types_proto_cc", + "//tensorflow/cc/saved_model:tag_constants", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "@com_google_absl//absl/strings", diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h index 7b71792ff7..52c789293c 100644 --- a/tensorflow/contrib/lite/toco/args.h +++ b/tensorflow/contrib/lite/toco/args.h @@ -26,6 +26,7 @@ limitations under the License. #endif #include "absl/strings/numbers.h" #include "absl/strings/str_split.h" +#include "tensorflow/cc/saved_model/tag_constants.h" #include "tensorflow/contrib/lite/toco/toco_port.h" #include "tensorflow/contrib/lite/toco/toco_types.h" @@ -220,7 +221,7 @@ struct ParsedTocoFlags { Arg output_file; Arg input_format = Arg("TENSORFLOW_GRAPHDEF"); Arg output_format = Arg("TFLITE"); - Arg savedmodel_tagset; + Arg savedmodel_tagset = Arg(tensorflow::kSavedModelTagServe); // TODO(aselle): command_line_flags doesn't support doubles Arg default_ranges_min = Arg(0.); Arg default_ranges_max = Arg(0.); diff --git a/tensorflow/contrib/lite/toco/toco_saved_model.cc b/tensorflow/contrib/lite/toco/toco_saved_model.cc index 91a742b9e0..26f55a66c7 100644 --- a/tensorflow/contrib/lite/toco/toco_saved_model.cc +++ b/tensorflow/contrib/lite/toco/toco_saved_model.cc @@ -35,9 +35,12 @@ const tensorflow::SavedModelBundle* LoadSavedModel( << "Model is not saved in the supported SavedModel format.\n"; // Gets the tags identifying the MetaGraphDef from the command line arguments. - QCHECK(parsed_toco_flags.savedmodel_tagset.specified()) - << "Missing required flag --savedmodel_tagset.\n"; - const string tags_str = parsed_toco_flags.savedmodel_tagset.value(); + string tags_str; + if (parsed_toco_flags.savedmodel_tagset.specified()) { + tags_str = parsed_toco_flags.savedmodel_tagset.value(); + } else { + tags_str = parsed_toco_flags.savedmodel_tagset.default_value(); + } auto tags = absl::StrSplit(tags_str, ','); // Loads MetaGraphDef. -- GitLab From 95f2304c1faf24e9ae2e15cc6dc4b6f7114d1dad Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 27 Mar 2018 14:17:16 -0700 Subject: [PATCH 565/960] Improve error message when users forget to pass toco cmdline args for quantization, but have a model that has FAKE_QUANT operations. PiperOrigin-RevId: 190672414 --- tensorflow/contrib/lite/toco/tflite/export.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc index 2771959970..335b496dcc 100644 --- a/tensorflow/contrib/lite/toco/tflite/export.cc +++ b/tensorflow/contrib/lite/toco/tflite/export.cc @@ -300,6 +300,17 @@ void Export(const Model& model, bool allow_custom_ops, std::set error_summary; auto op_codes = ExportOperatorCodes(model, ops_by_type, operators_map, &builder, &error_summary); + const string fake_quant_operation_name = "FAKE_QUANT"; + if (error_summary.count(fake_quant_operation_name) != 0) { + LOG(ERROR) + << fake_quant_operation_name + << " operation was not converted. If running quantized make sure you " + "are passing --inference_type=QUANTIZED_UINT8 and values for " + "--std_values and --mean_values."; + // Remove the fake quant operation from the errors, since it shouldn't + // be provided a custom implementation. + error_summary.erase(fake_quant_operation_name); + } if (!allow_custom_ops && !error_summary.empty()) { LOG(QFATAL) << "Some of the operators in the model are not supported by " "the standard TensorFlow Lite runtime. If you have a custom " -- GitLab From 6bb4f7abb03a7904fecc5b61e3ed37e9b663d6b0 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 27 Mar 2018 14:23:28 -0700 Subject: [PATCH 566/960] [tf.data] Raise error when window size is 0 in `tf.contrib.data.group_by_window()`. PiperOrigin-RevId: 190673466 --- .../data/python/kernel_tests/bucketing_test.py | 15 +++++++++++++++ .../kernels/data/group_by_window_dataset_op.cc | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index d0131896a1..6002cc73c8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -104,6 +104,21 @@ class GroupByWindowTest(test.TestCase): self.assertAllEqual([0, 0, 0], sess.run(get_next)) self.assertAllEqual([1], sess.run(get_next)) + def testEmpty(self): + iterator = ( + dataset_ops.Dataset.range(4).apply( + grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + "Window size must be greater than zero, but got 0."): + print(sess.run(get_next)) + def testReduceFuncError(self): components = np.random.randint(100, size=(200,)).astype(np.int64) diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc index 834c06bb93..46f43dd1b1 100644 --- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc @@ -263,6 +263,11 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { } const int64 window_size = window_size_func_output[0].scalar()(); + if (window_size <= 0) { + return errors::InvalidArgument( + "Window size must be greater than zero, but got ", + window_size, "."); + } window_sizes_[key] = window_size; } -- GitLab From 11d895b4bc2f1820767a5bae65ed2ca251d03017 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 27 Mar 2018 14:54:40 -0700 Subject: [PATCH 567/960] Revert "Fix minor typos in contrib files" (#18029) * Revert " Fix the math equation format issue in tf.matrix_solve_ls (#18022)" This reverts commit 502237976694764897e31bad606ec7d02320d5d3. * Revert "Add broadcast support for softmax_cross_entropy_with_logits (#16784)" This reverts commit 0ef36a5de45486ccbc0d6237f86280c2ac22f52e. * Revert "Fix minor spelling typos in contrib (#18015)" This reverts commit 5d76c7db2ab72f9b0cc70ce12ba0a3395dcc20d3. --- .../contrib/bayesflow/python/ops/metropolis_hastings_impl.py | 2 +- .../contrib/estimator/python/estimator/replicate_model_fn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py index fdee0a8da6..05aa134ed5 100644 --- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py @@ -238,7 +238,7 @@ def evolve(initial_sample, using the Metropolis-Hastings algorithm. These samples are from a Markov chain whose equilibrium distribution matches the target distribution. - The probability distribution may have an unknown normalization constant. + The probability distribution may have an unknown normalization constan. We parameterize the probability density as follows: ```none diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index fa2697800e..e0fae2c992 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -136,7 +136,7 @@ def replicate_model_fn(model_fn, the train_op argument of `EstimatorSpec`. loss_reduction: controls whether losses are summed or averaged. devices: Optional list of devices to replicate the model across. This - argument can be used to replicate only on the subset of available GPUs. + argument can be used to replice only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. -- GitLab From 5baa43999877aeb7ec070374e579b291b1efe0cf Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 27 Mar 2018 15:01:51 -0700 Subject: [PATCH 568/960] Fix issue caused by None in batch dimension for tf.layers.conv3d (#18027) * Fix issue caused by None in batch dimension for tf.layers.conv3d This fix tries to address the issue raised in 15655 where error returns when the batch dimension for tf.layers.conv3d is None with "channels_first" format. This fix cast None to `-1` to address the issue This fix fixes 15655. Signed-off-by: Yong Tang * Add test case for batch dimension = None in tf.layers.conv3d Signed-off-by: Yong Tang * Address review feedback Signed-off-by: Yong Tang --- tensorflow/python/layers/convolutional.py | 2 ++ tensorflow/python/layers/convolutional_test.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 74e7c63fb3..2d99b1688f 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -180,6 +180,8 @@ class _Conv(base.Layer): # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() + if outputs_shape[0] is None: + outputs_shape[0] = -1 outputs_4d = array_ops.reshape(outputs, [outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py index 160e732b67..cdb42f5bd1 100644 --- a/tensorflow/python/layers/convolutional_test.py +++ b/tensorflow/python/layers/convolutional_test.py @@ -325,6 +325,12 @@ class ConvTest(test.TestCase): self.assertEqual(conv3d.kernel_constraint, k_constraint) self.assertEqual(conv3d.bias_constraint, b_constraint) + def testConv3DChannelsFirst(self): + # Test case for GitHub issue 15655 + images = array_ops.placeholder( + dtype=dtypes.float32, shape=[None, 1, 32, 32, 32]) + conv_layers.conv3d(images, 32, 9, data_format='channels_first') + @test_util.with_c_api class SeparableConv1DTest(test.TestCase): -- GitLab From 8eab17db63deb80c7c21b92d5c92dc48e852de96 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Wed, 28 Mar 2018 06:03:14 +0800 Subject: [PATCH 569/960] Fix the contrib losses function bullet lists rendering (#18021) --- .../api_guides/python/contrib.losses.md | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/docs_src/api_guides/python/contrib.losses.md b/tensorflow/docs_src/api_guides/python/contrib.losses.md index d7f862625e..8b7442216c 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.losses.md +++ b/tensorflow/docs_src/api_guides/python/contrib.losses.md @@ -107,19 +107,19 @@ weighted average over the individual prediction errors: loss = tf.contrib.losses.mean_squared_error(predictions, depths, weight) ``` -@{tf.contrib.losses.absolute_difference} -@{tf.contrib.losses.add_loss} -@{tf.contrib.losses.hinge_loss} -@{tf.contrib.losses.compute_weighted_loss} -@{tf.contrib.losses.cosine_distance} -@{tf.contrib.losses.get_losses} -@{tf.contrib.losses.get_regularization_losses} -@{tf.contrib.losses.get_total_loss} -@{tf.contrib.losses.log_loss} -@{tf.contrib.losses.mean_pairwise_squared_error} -@{tf.contrib.losses.mean_squared_error} -@{tf.contrib.losses.sigmoid_cross_entropy} -@{tf.contrib.losses.softmax_cross_entropy} -@{tf.contrib.losses.sparse_softmax_cross_entropy} +* @{tf.contrib.losses.absolute_difference} +* @{tf.contrib.losses.add_loss} +* @{tf.contrib.losses.hinge_loss} +* @{tf.contrib.losses.compute_weighted_loss} +* @{tf.contrib.losses.cosine_distance} +* @{tf.contrib.losses.get_losses} +* @{tf.contrib.losses.get_regularization_losses} +* @{tf.contrib.losses.get_total_loss} +* @{tf.contrib.losses.log_loss} +* @{tf.contrib.losses.mean_pairwise_squared_error} +* @{tf.contrib.losses.mean_squared_error} +* @{tf.contrib.losses.sigmoid_cross_entropy} +* @{tf.contrib.losses.softmax_cross_entropy} +* @{tf.contrib.losses.sparse_softmax_cross_entropy} -- GitLab From 736e055a756cf0f99d59b67284aade01baec9799 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 27 Mar 2018 15:07:05 -0700 Subject: [PATCH 570/960] Make _USE_C_API = True and _USE_C_SHAPES = False work with handle data, take 2. This change makes _set_shapes_for_outputs_c_api fetch and set Tensor._handle_data. This is necessary for running the Python shape inference code on resource tensors. PiperOrigin-RevId: 190681459 --- tensorflow/c/BUILD | 2 ++ tensorflow/c/python_api.cc | 26 +++++++++++++++ tensorflow/c/python_api.h | 7 ++++ tensorflow/python/BUILD | 2 ++ tensorflow/python/client/tf_session.i | 1 + tensorflow/python/framework/importer_test.py | 34 ++++++++++++++++++++ tensorflow/python/framework/ops.py | 10 ++++++ 7 files changed, 82 insertions(+) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 7f03e40d38..249135f728 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -283,6 +283,8 @@ tf_cuda_library( deps = [ ":c_api", ":c_api_internal", + # TODO(b/74620627): remove when _USE_C_SHAPES is removed + "//tensorflow/python:cpp_shape_inference_proto_cc", ], ) diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index cd604538f1..93155998b8 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/c/python_api.h" #include "tensorflow/c/c_api_internal.h" +#include "tensorflow/python/framework/cpp_shape_inference.pb.h" namespace tensorflow { @@ -109,4 +110,29 @@ void ExtendSession(TF_Session* session, TF_Status* status) { session->extend_before_run = false; } +std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) { + Node* node = &output.oper->node; + CppShapeInferenceResult::HandleData handle_data; + handle_data.set_is_set(true); + { + mutex_lock l(graph->mu); + tensorflow::shape_inference::InferenceContext* ic = + graph->refiner.GetContext(node); + CHECK(ic != nullptr); + CHECK_LT(output.index, ic->num_outputs()); + const auto* shapes_and_types = + ic->output_handle_shapes_and_types(output.index); + if (shapes_and_types == nullptr) return ""; + + for (const auto& p : *shapes_and_types) { + auto* out_shape_and_type = handle_data.add_shape_and_type(); + ic->ShapeHandleToProto(p.shape, out_shape_and_type->mutable_shape()); + out_shape_and_type->set_dtype(p.dtype); + } + } + string result; + handle_data.SerializeToString(&result); + return result; +} + } // namespace tensorflow diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h index 13b680b3a2..2d4c8cd9ed 100644 --- a/tensorflow/c/python_api.h +++ b/tensorflow/c/python_api.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_C_PYTHON_API_H_ #define TENSORFLOW_C_PYTHON_API_H_ +#include + #include "tensorflow/c/c_api.h" // These functions can be removed without notice. They exist to facilitate some @@ -51,6 +53,11 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require); // the graph after the session has been made aware of them. void ExtendSession(TF_Session* session, TF_Status* status); +// Returns the serialized CppShapeInferenceResult::HandleData proto for +// `output` if its a resource tensor, or otherwise returns the empty string. +// TODO(b/74620627): remove when _USE_C_SHAPES is removed +std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output); + } // namespace tensorflow #endif // TENSORFLOW_C_PYTHON_API_H_ diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 20d7e81045..4f61c01f65 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3144,6 +3144,8 @@ tf_proto_library( srcs = ["framework/cpp_shape_inference.proto"], cc_api_version = 2, protodeps = tf_additional_all_protos(), + # TODO(b/74620627): remove when _USE_C_SHAPES is removed + visibility = ["//tensorflow:internal"], ) py_test( diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index e88fc0c01a..70a3d032f4 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -723,6 +723,7 @@ def TF_Reset(target, containers=None, config=None): %unignore TF_TryEvaluateConstant_wrapper; %noexception TF_TryEvaluateConstant_wrapper; %unignore ExtendSession; +%unignore ResourceHandleShapeAndType; %include "tensorflow/python/client/tf_session_helper.h" diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 6593b17184..369669c2e6 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -39,6 +39,7 @@ from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -356,6 +357,39 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d._input_types, [dtypes.int32_ref, dtypes.int32]) self.assertEqual(d.outputs, []) + def testResources(self): + # Produce GraphDef containing a ops producing and consuming resources. + graph = ops.Graph() + with graph.as_default(): + var = resource_variable_ops.ResourceVariable(1.0) + var_assign = var.assign(2.0) + # Use an op that requires handle shape to be set. + var_shape = resource_variable_ops.variable_shape(var.handle) + init = variables.global_variables_initializer() + graph_def = graph.as_graph_def() + + # Import the GraphDef. + with ops.Graph().as_default(): + # pylint: disable=unused-variable + imported_var, imported_assign, imported_shape, imported_init = ( + importer.import_graph_def( + graph_def, + return_elements=[var.name, var_assign.name, var_shape.name, + init.name])) + + # Make sure the handle shape is set on the imported variable. + new_var_shape = resource_variable_ops.variable_shape(imported_var) + # pylint: enable=unused-variable + + # Run the imported graph. + # TODO(b/76173421): make this work (currently DCHECKS) + # with self.test_session() as sess: + # sess.run(imported_init) + # self.assertEqual(sess.run(imported_var), 1.0) + # self.assertEqual(sess.run(imported_assign), 2.0) + # self.assertEqual(list(sess.run(imported_shape)), []) + # self.assertEqual(list(sess.run(new_var_shape)), []) + def testWhileLoop(self): # Produce GraphDef containing while loop. graph = ops.Graph() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 25a951a2de..4b0f3f3a20 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -42,6 +42,7 @@ from tensorflow.python.eager import context from tensorflow.python.eager import core from tensorflow.python.eager import tape from tensorflow.python.framework import c_api_util +from tensorflow.python.framework import cpp_shape_inference_pb2 from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -295,6 +296,7 @@ class Tensor(_TensorLike): # Attributes used for C++ shape inference. Not inspected, only forwarded. # If set, will be a HandleData object from cpp_shape_inference.proto. + # TODO(b/74620627): remove when _USE_C_SHAPES is removed self._handle_data = None self._id = uid() @@ -2472,6 +2474,14 @@ def _set_shapes_for_outputs_c_api(op): shape_vector = [None if d == -1 else d for d in shape_vector] output.set_shape(tensor_shape.TensorShape(shape_vector)) + serialized = c_api.ResourceHandleShapeAndType(op._graph._c_graph, + output._as_tf_output()) + if serialized: + output._handle_data = ( + cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData.FromString( + compat.as_bytes(serialized))) + else: + output._handle_data = None # TODO(skyewm): remove this when _USE_C_API flag is removed. def _set_shapes_for_outputs(op): -- GitLab From 016a79b371061e060ad85b0dbac4aa27d07f0239 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 27 Mar 2018 15:09:19 -0700 Subject: [PATCH 571/960] Refix spelling error reverted in #18015 (#18031) --- .../contrib/estimator/python/estimator/replicate_model_fn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index e0fae2c992..fa2697800e 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -136,7 +136,7 @@ def replicate_model_fn(model_fn, the train_op argument of `EstimatorSpec`. loss_reduction: controls whether losses are summed or averaged. devices: Optional list of devices to replicate the model across. This - argument can be used to replice only on the subset of available GPUs. + argument can be used to replicate only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. -- GitLab From 9a0b91023d8444cd4691be10b36ce469ca08058d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 27 Mar 2018 15:08:12 -0700 Subject: [PATCH 572/960] Moves Execute() from c_api.cc PiperOrigin-RevId: 190681610 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 90 ++------------ tensorflow/c/eager/c_api_test.cc | 4 +- tensorflow/core/common_runtime/eager/BUILD | 15 +++ .../core/common_runtime/eager/execute.cc | 110 ++++++++++++++++++ .../core/common_runtime/eager/execute.h | 37 ++++++ 6 files changed, 172 insertions(+), 85 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/execute.cc create mode 100644 tensorflow/core/common_runtime/eager/execute.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 8df7b56623..e57011a08b 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -30,6 +30,7 @@ tf_cuda_library( "//tensorflow/core:core_cpu", "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", + "//tensorflow/core/common_runtime/eager:execute", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", "//tensorflow/core/common_runtime/eager:copy_to_device_node", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index eaeb2fd07a..ac7114f71e 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" +#include "tensorflow/core/common_runtime/eager/execute.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -574,83 +575,6 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, return nullptr; } -tensorflow::Status Execute( - TFE_Context* ctx, tensorflow::Device* device, - const tensorflow::gtl::InlinedVector& - op_inputs, - tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, - tensorflow::TensorHandle** retvals, int num_retvals) { - if (!ctx->context.SoftPlacement() && device == nullptr) { - device = ctx->context.HostCPU(); - } - - if (device == nullptr) { - // TODO(apassos) debug how the assignment below might return a different - // device from the one requested above. - device = kernel->device(); - } - - std::vector outputs(1); - const tensorflow::MemoryTypeVector* output_memory_types = nullptr; - output_memory_types = &kernel->kernel()->output_memory_types(); - std::vector inputs(op_inputs.size()); - for (int i = 0; i < op_inputs.size(); ++i) { - const tensorflow::Tensor* input_tensor = nullptr; - TF_RETURN_IF_ERROR(op_inputs[i]->Tensor(&input_tensor)); - inputs[i] = *input_tensor; - } - // WARNING: kernel->Run utilizes the FunctionLibraryRuntime - // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def. - // But knowledge of the implementation - // of FunctionLibraryRuntime tells us that func_lib_def is not accessed by - // FunctionLibraryRuntime::Run(), so there is no thread-safety concern here. - // This is quite subtle. Re-work things to make this better? (Would it make - // sense for FunctionLibraryRuntime to ensure thread-safe access to - // FunctionLibraryDefinition?). TODO(apassos) figure out how to record stats - // for ops which are a part of functions. - // TODO(agarwal): change Run to take vector of handles ? - TF_RETURN_IF_ERROR(kernel->Run(&inputs, &outputs, maybe_stats)); - if (maybe_stats != nullptr) { - maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() - - maybe_stats->all_start_micros()); - tensorflow::mutex_lock ml(*ctx->context.MetadataMu()); - if (ctx->context.ShouldStoreMetadata()) { - auto* step_stats = ctx->context.RunMetadataProto()->mutable_step_stats(); - // Lazily initialize the RunMetadata with information about all devices if - // this is the first call. - while (step_stats->dev_stats_size() < ctx->context.devices()->size()) { - step_stats->add_dev_stats(); - } - // Find the current device's index. - int device_idx = 0; - for (int i = 0; i < ctx->context.devices()->size(); ++i) { - if (ctx->context.devices()->at(i) == device) { - device_idx = i; - break; - } - } - // Populate the device stats for this device. - auto* dev_stats = step_stats->mutable_dev_stats(device_idx); - dev_stats->set_device(device->name()); - *dev_stats->add_node_stats() = *maybe_stats; - } - } - DCHECK_EQ(num_retvals, outputs.size()); - tensorflow::Device* op_device = IsCPU(device) ? nullptr : device; - for (int i = 0; i < num_retvals; ++i) { - tensorflow::Device* d = op_device; - if (d != nullptr && output_memory_types != nullptr && - (*output_memory_types)[i] == tensorflow::HOST_MEMORY) { - d = nullptr; - } - if (retvals[i] == nullptr) { - retvals[i] = new tensorflow::TensorHandle(outputs[i], d, op_device); - } else { - retvals[i]->SetTensorAndDevice(outputs[i], d, op_device); - } - } - return tensorflow::Status::OK(); -} // TODO(agarwal): move EagerExecutor and EagerNode related code to a separate // file. @@ -690,9 +614,9 @@ class ExecuteNode : public tensorflow::EagerNode { } tensorflow::Status Run() override { - const tensorflow::Status status = - Execute(ctx_, op_device_, inputs_, kernel_, maybe_stats_.get(), - retvals_.begin(), retvals_.size()); + const tensorflow::Status status = tensorflow::EagerExecute( + &ctx_->context, op_device_, inputs_, kernel_, maybe_stats_.get(), + retvals_.begin(), retvals_.size()); if (status.ok()) { return status; } else { @@ -1062,9 +986,9 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // allocate it. std::vector handle_retvals(*num_retvals, nullptr); - status->status = - Execute(op->ctx, op->device, op->inputs, kernel, maybe_stats.get(), - handle_retvals.data(), *num_retvals); + status->status = tensorflow::EagerExecute( + &op->ctx->context, op->device, op->inputs, kernel, maybe_stats.get(), + handle_retvals.data(), *num_retvals); for (int i = 0; i < *num_retvals; ++i) { retvals[i] = new TFE_TensorHandle(handle_retvals[i]); } diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 2268aba90d..d88a6c1dda 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -688,12 +688,12 @@ TEST(CAPI, Execute_Min_CPU) { TFE_DeleteOp(minOp); TFE_DeleteTensorHandle(input); TFE_DeleteTensorHandle(axis); - TFE_DeleteContext(ctx, status); - ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); ASSERT_EQ(1, num_retvals); TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteTensorHandle(retvals[0]); + TFE_DeleteContext(ctx, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); float output[2] = {0}; EXPECT_EQ(sizeof(output), TF_TensorByteSize(t)); diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index a619cac9a4..56e2e1094f 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -149,3 +149,18 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +cc_library( + name = "execute", + srcs = ["execute.cc"], + hdrs = ["execute.h"], + deps = [ + ":context", + ":kernel_and_device", + ":tensor_handle", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], +) diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc new file mode 100644 index 0000000000..9e6dddaa02 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/execute.h" + +#include + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/framework/step_stats.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +Status EagerExecute(EagerContext* ctx, Device* device, + const gtl::InlinedVector& op_inputs, + KernelAndDevice* kernel, NodeExecStats* maybe_stats, + TensorHandle** retvals, int num_retvals) { + if (!ctx->SoftPlacement() && device == nullptr) { + device = ctx->HostCPU(); + } + + if (device == nullptr) { + // TODO(apassos) debug how the assignment below might return a different + // device from the one requested above. + device = kernel->device(); + } + + std::vector outputs(1); + const MemoryTypeVector* output_memory_types = nullptr; + output_memory_types = &kernel->kernel()->output_memory_types(); + std::vector inputs(op_inputs.size()); + for (int i = 0; i < op_inputs.size(); ++i) { + const Tensor* input_tensor = nullptr; + TF_RETURN_IF_ERROR(op_inputs[i]->Tensor(&input_tensor)); + inputs[i] = *input_tensor; + } + // WARNING: kernel->Run utilizes the FunctionLibraryRuntime + // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def. + // But knowledge of the implementation + // of FunctionLibraryRuntime tells us that func_lib_def is not accessed by + // FunctionLibraryRuntime::Run(), so there is no thread-safety concern here. + // This is quite subtle. Re-work things to make this better? (Would it make + // sense for FunctionLibraryRuntime to ensure thread-safe access to + // FunctionLibraryDefinition?). TODO(apassos) figure out how to record stats + // for ops which are a part of functions. + // TODO(agarwal): change Run to take vector of handles ? + TF_RETURN_IF_ERROR(kernel->Run(&inputs, &outputs, maybe_stats)); + if (maybe_stats != nullptr) { + maybe_stats->set_op_end_rel_micros(Env::Default()->NowMicros() - + maybe_stats->all_start_micros()); + mutex_lock ml(*ctx->MetadataMu()); + if (ctx->ShouldStoreMetadata()) { + auto* step_stats = ctx->RunMetadataProto()->mutable_step_stats(); + // Lazily initialize the RunMetadata with information about all devices if + // this is the first call. + while (step_stats->dev_stats_size() < ctx->devices()->size()) { + step_stats->add_dev_stats(); + } + // Find the current device's index. + int device_idx = 0; + for (int i = 0; i < ctx->devices()->size(); ++i) { + if (ctx->devices()->at(i) == device) { + device_idx = i; + break; + } + } + // Populate the device stats for this device. + auto* dev_stats = step_stats->mutable_dev_stats(device_idx); + dev_stats->set_device(device->name()); + *dev_stats->add_node_stats() = *maybe_stats; + } + } + DCHECK_EQ(num_retvals, outputs.size()); + Device* op_device = device; + for (int i = 0; i < num_retvals; ++i) { + Device* d = op_device; + if (d != nullptr && output_memory_types != nullptr && + (*output_memory_types)[i] == HOST_MEMORY) { + d = nullptr; + } + if (retvals[i] == nullptr) { + retvals[i] = new TensorHandle(outputs[i], d, op_device); + } else { + retvals[i]->SetTensorAndDevice(outputs[i], d, op_device); + } + } + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/execute.h b/tensorflow/core/common_runtime/eager/execute.h new file mode 100644 index 0000000000..1c0ea6bcde --- /dev/null +++ b/tensorflow/core/common_runtime/eager/execute.h @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_H_ + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/framework/step_stats.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" + +namespace tensorflow { + +// Low-level utility to execute the kernel specified by kernel on device device, +// with the inputs op_inputs, in the context ctx. +Status EagerExecute(EagerContext* ctx, Device* device, + const gtl::InlinedVector& op_inputs, + KernelAndDevice* kernel, NodeExecStats* maybe_stats, + TensorHandle** retvals, int num_retvals); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_H_ -- GitLab From f9fd69a5236f20b8e90e0898f5b2928cd6591ebb Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Tue, 27 Mar 2018 15:35:37 -0700 Subject: [PATCH 573/960] Update tensorboard dependency to 1.7.0+ --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 3e4f9b0fdd..365e8d6b08 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.6.0, < 1.7.0', + 'tensorboard >= 1.7.0, < 1.8.0', 'termcolor >= 1.1.0', ] -- GitLab From 05ddf373980fae94a2c73cf93161332484e102fd Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 27 Mar 2018 15:37:35 -0700 Subject: [PATCH 574/960] [XLA] Fold reduce-window(convert(pad(X))) into reduce-window(convert(X)) ReduceWindow operations are done in higher precision to avoid accumulation error. Convert operations can find their way between a ReduceWindow and a Pad which can prevent a Pad from combining with a ReduceWindow. Fix this by looking past the Convert while also checking that the Convert'd Pad's init value is identical to the reduce-window value. PiperOrigin-RevId: 190686175 --- .../xla/service/algebraic_simplifier.cc | 57 ++++++++++--- .../xla/service/algebraic_simplifier_test.cc | 85 +++++++++++++++++++ 2 files changed, 132 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index f9fabd8a35..0e4624fd69 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1731,18 +1731,29 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( function)); } - VLOG(10) << "Considering folding Pad: " << operand->ToString() - << "\ninto reduce-window: " << reduce_window->ToString(); - // This optimization folds a pad op into reduce_window. - if (operand->opcode() != HloOpcode::kPad) { + HloInstruction* pad; + const HloInstruction* convert = nullptr; + if (operand->opcode() == HloOpcode::kPad) { + pad = operand; + } else if (operand->opcode() == HloOpcode::kConvert && + operand->operand(0)->opcode() == HloOpcode::kPad) { + convert = operand; + pad = operand->mutable_operand(0); + } else { VLOG(10) << "Not folding pad into reduce-window as there is no pad."; return Status::OK(); } + VLOG(10) << "Considering folding Pad: " << pad->ToString() + << "\ninto reduce-window: " << reduce_window->ToString() + << (convert != nullptr ? tensorflow::strings::StrCat( + "\nvia convert: ", convert->ToString()) + : ""); + // Do not fold interior padding into ReduceWindow since the backends do not // support it. - const PaddingConfig& pad_config = operand->padding_config(); + const PaddingConfig& pad_config = pad->padding_config(); if (HasInteriorPadding(pad_config)) { VLOG(10) << "Not folding pad into reduce-window due to interior padding."; return Status::OK(); @@ -1750,14 +1761,27 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( // If reduce_window already has padding, the pad value of the pad op and the // init value of reduce_window must match to allow folding the pad. - const HloInstruction* pad_value = operand->operand(1); + const HloInstruction* pad_value = pad->operand(1); const HloInstruction* reduce_init_value = reduce_window->operand(1); if (pad_value != reduce_init_value) { + auto literals_are_equivalent = [&] { + auto& pad_literal = pad_value->literal(); + auto& reduce_init_literal = reduce_init_value->literal(); + if (pad_literal == reduce_init_literal) { + return true; + } + auto converted_pad_literal = pad_literal.ConvertToShape( + reduce_init_value->shape(), /*round_f32_to_bf16=*/true); + if (!converted_pad_literal.ok()) { + return false; + } + return *converted_pad_literal.ValueOrDie() == reduce_init_literal; + }; // The pad value is usually a constant, so we handle that case and do not // try to get more fancy about proving equivalence in cases beyond that. if (pad_value->opcode() != HloOpcode::kConstant || reduce_init_value->opcode() != HloOpcode::kConstant || - pad_value->literal() != reduce_init_value->literal()) { + !literals_are_equivalent()) { VLOG(10) << "Not folding pad into reduce-window due to different pad " "values."; return Status::OK(); @@ -1766,7 +1790,7 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( // If the pad puts a single non-identity value in each window that we're // reducing, then this is a broadcast. - HloInstruction* pad_operand = operand->mutable_operand(0); + HloInstruction* pad_operand = pad->mutable_operand(0); auto is_effective_broadcast = [&] { if (window_util::HasStride(window)) { VLOG(10) << "Window has stride."; @@ -1810,6 +1834,18 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( VLOG(10) << "Found window covers a single unpadded element."; return true; }; + + HloInstruction* new_reduce_window_operand; + if (convert != nullptr) { + new_reduce_window_operand = + computation_->AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::ChangeElementType(pad_operand->shape(), + convert->shape().element_type()), + pad_operand)); + } else { + new_reduce_window_operand = pad_operand; + } + if (is_effective_broadcast()) { VLOG(10) << "Replacing pad/reduce-window with (implicit) broadcast."; auto fadd = [this](std::unique_ptr x) { @@ -1818,7 +1854,7 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( return ReplaceWithNewInstruction( reduce_window, HloInstruction::CreateBroadcastSequence( /*output_shape=*/reduce_window->shape(), - /*operand=*/pad_operand, fadd)); + /*operand=*/new_reduce_window_operand, fadd)); } // Carry out the folding of the pad into reduce_window. @@ -1835,10 +1871,11 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( window_dim.set_padding_high(window_dim.padding_high() + pad_dim.edge_padding_high()); } + return ReplaceWithNewInstruction( reduce_window, HloInstruction::CreateReduceWindow( /*shape=*/reduce_window->shape(), - /*operand=*/pad_operand, + /*operand=*/new_reduce_window_operand, /*init_value=*/reduce_window->mutable_operand(1), /*window=*/new_window, /*reduce_computation=*/function)); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 3b80a827bf..20c549562d 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2338,6 +2338,91 @@ TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) { EXPECT_EQ(root->window().dimensions(3).padding_high(), 102); } +// Test that ReduceWindow(Convert(Pad(op, x)), y) can simplify to +// ReduceWindow(Convert(op), x). +TEST_F(AlgebraicSimplifierTest, FoldConvertedPadIntoReduceWindow) { + HloModule module(TestName()); + HloComputation::Builder builder(TestName()); + + // Create operand to the pad. + HloInstruction* parameter = + builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(BF16, {1, 2, 3, 4}), "p0")); + + // Create the pad. + PaddingConfig padding = MakeNoPaddingConfig(4); + padding.mutable_dimensions(1)->set_edge_padding_low(1); + padding.mutable_dimensions(3)->set_edge_padding_high(2); + + HloInstruction* pad_value = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(5.0f))); + HloInstruction* pad = builder.AddInstruction(HloInstruction::CreatePad( + ShapeUtil::MakeShape(BF16, {1, 3, 3, 5}), parameter, pad_value, padding)); + + HloInstruction* convert = + builder.AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::ChangeElementType(pad->shape(), F32), pad)); + + // Create add computation. + HloComputation* add_computation = nullptr; + { + HloComputation::Builder builder(TestName() + ".add"); + const Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + HloInstruction* p0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "p0")); + HloInstruction* p1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "p1")); + builder.AddInstruction( + HloInstruction::CreateBinary(scalar_shape, HloOpcode::kAdd, p0, p1)); + add_computation = module.AddEmbeddedComputation(builder.Build()); + } + + // Create the reduce-window. + Window window; + for (int64 i = 0; i < ShapeUtil::Rank(pad->shape()); ++i) { + auto* dim = window.add_dimensions(); + dim->set_size(1); + dim->set_padding_low(10); + dim->set_padding_high(100); + dim->set_window_dilation(1); + dim->set_base_dilation(1); + } + const Shape reduce_window_shape = + ShapeUtil::MakeShape(F32, {111, 113, 113, 115}); + HloInstruction* reduce_init_value = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(5.0f))); + HloInstruction* reduce_window = + builder.AddInstruction(HloInstruction::CreateReduceWindow( + reduce_window_shape, convert, reduce_init_value, window, + add_computation)); + + // Build the computation and run the simplifier. + auto computation = module.AddEntryComputation(builder.Build()); + HloInstruction* root = computation->root_instruction(); + EXPECT_EQ(root, reduce_window); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(&module).ValueOrDie()); + + // Running simplification again should not result in any further changes. + ASSERT_FALSE(simplifier.Run(&module).ValueOrDie()); + + // Verify the result + root = computation->root_instruction(); + EXPECT_THAT(root, op::ReduceWindow(op::Convert(parameter), op::Constant())); + EXPECT_TRUE(ShapeUtil::Equal(root->shape(), reduce_window_shape)) + << ShapeUtil::HumanString(root->shape()) << " vs " + << ShapeUtil::HumanString(reduce_window_shape); + EXPECT_EQ(root->window().dimensions(0).padding_low(), 10); + EXPECT_EQ(root->window().dimensions(1).padding_low(), 11); + EXPECT_EQ(root->window().dimensions(2).padding_low(), 10); + EXPECT_EQ(root->window().dimensions(3).padding_low(), 10); + EXPECT_EQ(root->window().dimensions(0).padding_high(), 100); + EXPECT_EQ(root->window().dimensions(1).padding_high(), 100); + EXPECT_EQ(root->window().dimensions(2).padding_high(), 100); + EXPECT_EQ(root->window().dimensions(3).padding_high(), 102); +} + TEST_F(AlgebraicSimplifierTest, ReversalOfTrivialDimensionsToBitcast) { HloComputation::Builder builder(TestName()); const Shape shape = ShapeUtil::MakeShape(F32, {448, 2048, 1, 1}); -- GitLab From b4742b76c386409c96c60172e6ca1c1534e2b4af Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 15:47:23 -0700 Subject: [PATCH 575/960] Add node types for DFS traversal to catch more issues with deduping inputs to in-place ops. PiperOrigin-RevId: 190687820 --- tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 23e21855c8..5dd0b6f4b0 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1089,7 +1089,8 @@ namespace { bool FeedsInPlaceOp(const SimpleGraphView& graph_view, const NodeDef& node) { const std::unordered_set op_types_to_traverse = { - node.op(), "Identity", "IdentityN", "Reshape"}; + node.op(), "Identity", "IdentityN", "Reshape", + "ExpandDims", "Enter", "Switch", "Merge"}; int node_idx = graph_view.index(node.name()); std::set node_fanout; graph_view.DepthFirstSearch(op_types_to_traverse, node_idx, &node_fanout); -- GitLab From a16761483ec55095158b1b11118d93ea00a538f4 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 27 Mar 2018 15:55:04 -0700 Subject: [PATCH 576/960] TFTS: Fix a bug in the SavedModel cold-start export It now correctly broadcasts start state across whatever batch dimension it is passed rather than sqishing it down to a batch dimension of 1. PiperOrigin-RevId: 190688855 --- .../python/timeseries/estimators_test.py | 21 +++++++++++++++++++ .../timeseries/python/timeseries/head.py | 6 ++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py index f4304f2560..51d0c0ca3f 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py @@ -126,6 +126,27 @@ class TimeSeriesRegressorTest(test.TestCase): signatures=signatures, session=sess) + # Test cold starting + batch_numpy_times = numpy.tile( + numpy.arange(30, dtype=numpy.int64)[None, :], (10, 1)) + batch_numpy_values = numpy.ones([10, 30, 1]) + state = saved_model_utils.cold_start_filter( + signatures=signatures, + session=sess, + features={ + feature_keys.FilteringFeatures.TIMES: batch_numpy_times, + feature_keys.FilteringFeatures.VALUES: batch_numpy_values + } + ) + predict_times = numpy.tile( + numpy.arange(30, 45, dtype=numpy.int64)[None, :], (10, 1)) + predictions = saved_model_utils.predict_continuation( + continue_from=state, + times=predict_times, + signatures=signatures, + session=sess) + self.assertAllEqual([10, 15, 1], predictions["mean"].shape) + def test_fit_restore_fit_ar_regressor(self): def _estimator_fn(model_dir): return estimators.ARRegressor( diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py index 3d7e615290..4cf6bbcfd4 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -154,8 +154,10 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc no_state_features = { k: v for k, v in features.items() if not k.startswith(feature_keys.State.STATE_PREFIX)} - cold_filtering_outputs = self.create_loss( - no_state_features, estimator_lib.ModeKeys.EVAL) + # Ignore any state management when cold-starting. The model's default + # start state is replicated across the batch. + cold_filtering_outputs = self.model.define_loss( + features=no_state_features, mode=estimator_lib.ModeKeys.EVAL) return estimator_lib.EstimatorSpec( mode=estimator_lib.ModeKeys.PREDICT, export_outputs={ -- GitLab From 496840acbdd8b8b7688c257793e09a02229d21f6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 16:26:12 -0700 Subject: [PATCH 577/960] Test all TFLite kernel implementations for fully connected. PiperOrigin-RevId: 190693455 --- tensorflow/contrib/lite/kernels/BUILD | 1 + .../lite/kernels/fully_connected_test.cc | 57 +++++++++++++++---- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 200cb3075b..1450c1e14b 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -710,6 +710,7 @@ tf_cc_test( ":builtin_ops", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_absl//absl/memory", "@com_google_googletest//:gtest", ], ) diff --git a/tensorflow/contrib/lite/kernels/fully_connected_test.cc b/tensorflow/contrib/lite/kernels/fully_connected_test.cc index a0f766c4f4..87413000a9 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected_test.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected_test.cc @@ -19,12 +19,25 @@ limitations under the License. #include #include +#include "absl/memory/memory.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" #include "tensorflow/contrib/lite/model.h" namespace tflite { + +namespace ops { +namespace builtin { + +TfLiteRegistration* Register_FULLY_CONNECTED_REF(); +TfLiteRegistration* Register_FULLY_CONNECTED_NEON_OPT(); +TfLiteRegistration* Register_FULLY_CONNECTED_GENERIC_OPT(); +TfLiteRegistration* Register_FULLY_CONNECTED_PIE(); + +} // namespace builtin +} // namespace ops + namespace { using ::testing::ElementsAre; @@ -119,7 +132,8 @@ static float fully_connected_golden_output[] = { class BaseFullyConnectedOpModel : public SingleOpModel { public: // TODO(ahentz): test different activation types too. - BaseFullyConnectedOpModel(int units, int batches, const TensorData& input, + BaseFullyConnectedOpModel(TfLiteRegistration* registration, int units, + int batches, const TensorData& input, const TensorData& output = {TensorType_FLOAT32}) : batches_(batches), units_(units) { int total_input_size = 1; @@ -149,6 +163,8 @@ class BaseFullyConnectedOpModel : public SingleOpModel { BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions, CreateFullyConnectedOptions(builder_, ActivationFunctionType_RELU) .Union()); + resolver_ = absl::make_unique( + BuiltinOperator_FULLY_CONNECTED, registration); BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)}); } @@ -208,10 +224,25 @@ class QuantizedFullyConnectedOpModel : public BaseFullyConnectedOpModel { } }; +const auto kKernelMap = new std::map({ + {"Reference", ops::builtin::Register_FULLY_CONNECTED_REF()}, + {"NeonOptimized", ops::builtin::Register_FULLY_CONNECTED_NEON_OPT()}, + {"GenericOptimized", ops::builtin::Register_FULLY_CONNECTED_GENERIC_OPT()}, + {"Pie", ops::builtin::Register_FULLY_CONNECTED_PIE()}, +}); + +class FullyConnectedOpTest : public SingleOpTest { + protected: + const std::map& GetKernelMap() override { + return *kKernelMap; + } +}; + // TODO(ahentz): add more small tests like this one, focused on making sure the // calculations are correct. -TEST(FullyConnectedOpTest, SimpleTest) { - FloatFullyConnectedOpModel m(3, 2, {TensorType_FLOAT32, {2, 10}}); +TEST_P(FullyConnectedOpTest, SimpleTest) { + FloatFullyConnectedOpModel m(GetRegistration(), 3, 2, + {TensorType_FLOAT32, {2, 10}}); m.SetWeights({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 @@ -229,9 +260,9 @@ TEST(FullyConnectedOpTest, SimpleTest) { EXPECT_THAT(m.GetOutput(), ElementsAre(24, 25, 26, 58, 59, 60)); } -TEST(FullyConnectedOpTest, SimpleTestQuantized) { +TEST_P(FullyConnectedOpTest, SimpleTestQuantized) { QuantizedFullyConnectedOpModel m( - 3, 2, + GetRegistration(), 3, 2, /*input=*/{TensorType_UINT8, {2, 10}, -63.5, 64}, /*output=*/{TensorType_UINT8, {}, -127, 128}); @@ -261,7 +292,8 @@ TEST(FullyConnectedOpTest, SimpleTest4DInput) { // Note that it is not required that the first dimension be the number of // batches. All we care is that the input can be evenly distributed in // batches. In this case, we need the input to have multiples of '2'. - FloatFullyConnectedOpModel m(/*units=*/3, + FloatFullyConnectedOpModel m(ops::builtin::Register_FULLY_CONNECTED_PIE(), + /*units=*/3, /*batches=*/2, /*input=*/{TensorType_FLOAT32, {4, 1, 5, 1}}); m.SetWeights({ @@ -284,9 +316,9 @@ TEST(FullyConnectedOpTest, SimpleTest4DInput) { })); } -TEST(FullyConnectedOpTest, SimpleTest4dInputQuantized) { +TEST_P(FullyConnectedOpTest, SimpleTest4dInputQuantized) { QuantizedFullyConnectedOpModel m( - 3, 2, + GetRegistration(), 3, 2, /*input=*/{TensorType_UINT8, {4, 1, 5, 1}, -63.5, 64}, /*output=*/{TensorType_UINT8, {}, -127, 128}); @@ -312,10 +344,15 @@ TEST(FullyConnectedOpTest, SimpleTest4dInputQuantized) { EXPECT_THAT(m.GetOutput(), ElementsAre(151, 152, 153, 185, 186, 187)); } +INSTANTIATE_TEST_CASE_P( + FullyConnectedOpTest, FullyConnectedOpTest, + ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap))); + // TODO(ahentz): Reconsider this test. Having arbitrary weights makes it hard // to debug errors and doesn't necessarily test all the important details. -TEST(FullyConnectedOpTest, BlackBoxTest) { - FloatFullyConnectedOpModel m(16, 2, {TensorType_FLOAT32, {2, 8}}); +TEST_P(FullyConnectedOpTest, BlackBoxTest) { + FloatFullyConnectedOpModel m(GetRegistration(), 16, 2, + {TensorType_FLOAT32, {2, 8}}); m.SetWeights( {0.091327, 0.103366, -0.316505, -0.083120, 0.149366, -0.196636, -0.123672, 0.062800, 0.063031, 0.191670, -0.062001, -0.061504, -- GitLab From 9da8636553c4e6713d974fb7e9694198a530425a Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 27 Mar 2018 16:46:34 -0700 Subject: [PATCH 578/960] Add link to raspberry pi demo (#18034) --- tensorflow/contrib/lite/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 2680d515eb..c15ae3f233 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -126,6 +126,9 @@ The above pre-trained models have been trained on the ImageNet data set, which c The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) codelab walks through this process step-by-step. The retraining code supports retraining for both floating point and quantized inference. +# Getting started with RaspberryPi + +Using RaspberryPi can be accomplished by following the [Makefile instructions](g3doc/rpi.md). That will give a you a static library (.a) that you can build your app against. Python bindings will be coming soon as well as a demo app. ### Train a custom model A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. -- GitLab From 71593602d95385fbd8c3dde361dab09d381b5ac6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 16:43:48 -0700 Subject: [PATCH 579/960] Fixed a bug in ConvKFCBasicMultiIndepFB introduced in the last CL PiperOrigin-RevId: 190695737 --- tensorflow/contrib/kfac/python/ops/fisher_blocks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index b04bf76a88..e0d9cb5ea9 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -861,12 +861,12 @@ class ConvKFCBasicFB(InputOutputMultiTower, KroneckerProductFB): super(ConvKFCBasicFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): + inputs, grads_list = self._process_data(grads_list) + # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(self._inputs[0].shape.as_list(), + self._num_locations = num_conv_locations(inputs[0].shape.as_list(), self._strides) - inputs, grads_list = self._process_data(grads_list) - self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvInputKroneckerFactor, (inputs, self._filter_shape, self._padding, self._strides, @@ -1391,7 +1391,7 @@ class ConvKFCBasicMultiIndepFB(InputOutputMultiTowerMultiUse, inputs, grads_list = self._process_data(grads_list) # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(inputs.shape.as_list(), + self._num_locations = num_conv_locations(inputs[0].shape.as_list(), self._strides) self._input_factor = self._layer_collection.make_or_get_factor( -- GitLab From 719b0f6f900aca51b59e3bfc6fbf73d3b4a0af63 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 16:48:31 -0700 Subject: [PATCH 580/960] Improve support for DT_HALF and DT_BFLOAT16 in Grappler graph optimizations. Update GrapplerTest::EvaluateNodes to take feeds as an argument, to make it easier to write tests with placeholders. PiperOrigin-RevId: 190696386 --- .../optimizers/arithmetic_optimizer_test.cc | 4 +- .../grappler/optimizers/constant_folding.cc | 84 +++++++------- .../grappler/optimizers/constant_folding.h | 2 +- .../optimizers/constant_folding_test.cc | 109 +++++++++++++----- .../optimizers/function_optimizer_test.cc | 9 +- .../optimizers/memory_optimizer_test.cc | 2 +- tensorflow/core/grappler/utils.cc | 8 +- .../core/grappler/utils/grappler_test.cc | 5 +- .../core/grappler/utils/grappler_test.h | 3 +- 9 files changed, 141 insertions(+), 85 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 792f675043..ad3edc144a 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -158,7 +158,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { ArithmeticOptimizer optimizer; GraphDef output; - auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {}); EXPECT_EQ(1, tensors_expected.size()); Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -176,7 +176,7 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) { EXPECT_EQ("c1", new_div.input(0)); EXPECT_EQ("c1", new_div.input(1)); - auto tensors = EvaluateNodes(output, item.fetch); + auto tensors = EvaluateNodes(output, item.fetch, {}); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index bdec73e69e..22ede19493 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -109,33 +109,18 @@ class DeviceSimple : public DeviceBase { }; template -bool AllValuesAre(const TensorProto& tensor, const T& value) { - // TensorProto represents the content of the tensor in either _val or - // tensor_content. - typename checkpoint::SaveTypeTraits::RepeatedField* tensor_values = - checkpoint::MutableTensorProtoData(const_cast(&tensor)); - if (!tensor_values->empty()) { - for (const T& tensor_value : *tensor_values) { - if (tensor_value != value) { - return false; - } - } - return true; +bool AllValuesAre(const TensorProto& proto, const T& value) { + Tensor tensor; + if (!tensor.FromProto(proto)) { + return false; } - const auto tensor_content_size = tensor.tensor_content().size(); - if (tensor_content_size > 0) { - CHECK_EQ(0, tensor_content_size % sizeof(T)); - std::vector raw_values(tensor_content_size / sizeof(T)); - port::CopyToArray(tensor.tensor_content(), - reinterpret_cast(raw_values.data())); - for (int i = 0; i < tensor_content_size / sizeof(T); ++i) { - if (raw_values[i] != value) { - return false; - } + auto values = tensor.flat(); + for (int i = 0; i < tensor.NumElements(); ++i) { + if (values(i) != value) { + return false; } - return true; } - return false; + return true; } // Add new_input as a control input to node if it does not already depend on it. @@ -825,17 +810,23 @@ Status CreateConstantTensorAttrValue(DataType type, double value, t->set_dtype(type); *t->mutable_tensor_shape() = shape; switch (type) { - SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); - SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); - SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); - SET_TENSOR_VAL_CASE(DT_UINT64, int64, int64); - SET_TENSOR_VAL_CASE(DT_INT32, int32, int); - SET_TENSOR_VAL_CASE(DT_UINT32, int32, int); - SET_TENSOR_VAL_CASE(DT_INT16, int32, int); - SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); - SET_TENSOR_VAL_CASE(DT_INT8, int32, int); - SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); - SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); + case DT_HALF: + t->add_half_val(static_cast(value).x); + break; + case DT_BFLOAT16: + t->add_half_val(static_cast(value).value); + break; + SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); + SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); + SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_UINT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_INT32, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT32, int32, int); + SET_TENSOR_VAL_CASE(DT_INT16, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); + SET_TENSOR_VAL_CASE(DT_INT8, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); + SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); default: return errors::InvalidArgument("Unsupported type: ", type); } @@ -1388,8 +1379,8 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const { } const auto dtype = node.attr().at("dtype").type(); switch (dtype) { - // TODO(rmlarsen): Make DT_HALF case compile. - // IS_ONES_CASE(DT_HALF); + IS_ONES_CASE(DT_HALF); + IS_ONES_CASE(DT_BFLOAT16); IS_ONES_CASE(DT_FLOAT); IS_ONES_CASE(DT_DOUBLE); IS_ONES_CASE(DT_COMPLEX64); @@ -1423,8 +1414,8 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const { } const auto dtype = node.attr().at("dtype").type(); switch (dtype) { - // TODO(rmlarsen): Make DT_HALF case compile. - // IS_ZEROS_CASE(DT_HALF); + IS_ZEROS_CASE(DT_HALF); + IS_ZEROS_CASE(DT_BFLOAT16); IS_ZEROS_CASE(DT_FLOAT); IS_ZEROS_CASE(DT_DOUBLE); IS_ZEROS_CASE(DT_COMPLEX64); @@ -1511,9 +1502,8 @@ void ConstantFolding::ReplaceSubtractionFromZeroByNegation(NodeDef* node, } Status ConstantFolding::ReplaceOperationWithConstant( - double value, const TensorShapeProto& shape, NodeDef* node, - GraphDef* graph) { - AttrValue dtype_attr = node->attr().at("T"); + double value, const AttrValue& dtype_attr, const TensorShapeProto& shape, + NodeDef* node, GraphDef* graph) { AttrValue tensor_attr; TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(dtype_attr.type(), value, shape, &tensor_attr)); @@ -1947,8 +1937,14 @@ Status ConstantFolding::SimplifyGraph(GraphDef* optimized_graph, (is_mul || is_matmul || optimize_zeros_divided_by_y)) { const PartialTensorShape shp(output_shape); if (shp.IsFullyDefined()) { - TF_RETURN_IF_ERROR(ReplaceOperationWithConstant(0, output_shape, node, - optimized_graph)); + AttrValue dtype_attr; + if (node->op() == "SparseMatMul") { + dtype_attr.set_type(DT_FLOAT); + } else { + dtype_attr = node->attr().at("T"); + } + TF_RETURN_IF_ERROR(ReplaceOperationWithConstant( + 0, dtype_attr, output_shape, node, optimized_graph)); continue; } // Even if an input shape is only partially known, we may known that it diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index b6645d335e..f8a9e90d62 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -83,7 +83,7 @@ class ConstantFolding : public GraphOptimizer { void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, GraphDef* graph); void ReplaceSubtractionFromZeroByNegation(NodeDef* node, GraphDef* graph); - Status ReplaceOperationWithConstant(double value, + Status ReplaceOperationWithConstant(double value, const AttrValue& dtype_attr, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); void ReplaceDivisionOfOnesByReciprocal(NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index dc9c1053d2..85f877883c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -28,7 +28,59 @@ namespace tensorflow { namespace grappler { namespace { -class ConstantFoldingTest : public GrapplerTest {}; +class ConstantFoldingTest : public GrapplerTest { + protected: + template + void SimpleNeutralElementTest() { + typedef typename EnumToDataType::Type T; + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Placeholder(s.WithOpName("x"), DTYPE, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Tensor zeros_t(DTYPE, TensorShape({2, 2})); + Tensor ones_t(DTYPE, TensorShape({2, 2})); + Tensor x_t(DTYPE, TensorShape({2, 2})); + for (int i = 0; i < 4; ++i) { + zeros_t.flat()(i) = T(0); + ones_t.flat()(i) = T(1); + x_t.flat()(i) = T(i + 1); + } + Output zeros = ops::Const(s.WithOpName("zeros"), zeros_t); + Output ones = ops::Const(s.WithOpName("ones"), ones_t); + Output mul1 = ops::Mul(s.WithOpName("mul1"), x, zeros); + Output mul2 = ops::Mul(s.WithOpName("mul2"), x, ones); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"mul1", "mul2"}; + ConstantFolding optimizer(nullptr /* cpu_device */); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + LOG(INFO) << output.DebugString(); + EXPECT_EQ(5, output.node_size()); + for (int i = 0; i < output.node_size(); ++i) { + const NodeDef& node = output.node(i); + const string& name = node.name(); + if (name == "mul1") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^x", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); + } else if (name == "mul2") { + EXPECT_EQ("Snapshot", node.op()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("^ones", node.input(1)); + } + } + auto tensors_expected = + EvaluateNodes(item.graph, {"mul1", "mul2"}, {{"x", x_t}}); + auto tensors = EvaluateNodes(output, {"mul1", "mul2"}, {{"x", x_t}}); + EXPECT_EQ(2, tensors_expected.size()); + EXPECT_EQ(2, tensors.size()); + for (int i = 0; i < 2; ++i) { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } + } +}; TEST_F(ConstantFoldingTest, SimpleFolding) { // Build a simple graph with a few trivially prunable ops. @@ -55,8 +107,8 @@ TEST_F(ConstantFoldingTest, SimpleFolding) { EXPECT_EQ("Const", node_d.op()); std::vector fetch = {"d"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch); - auto tensors = EvaluateNodes(output, fetch); + auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); test::ExpectTensorEqual(tensors_expected[0], tensors[0]); @@ -141,10 +193,10 @@ TEST_F(ConstantFoldingTest, AddTree) { // Check that the result nodes have the expected value. std::vector fetch = {"c3", "c20"}; - auto tensor_expected = EvaluateNodes(item.graph, fetch); + auto tensor_expected = EvaluateNodes(item.graph, fetch, {}); EXPECT_EQ(fetch.size(), tensor_expected.size()); fetch = {"add_child", "mul_child"}; - auto tensors = EvaluateNodes(output, fetch); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(fetch.size(), tensors.size()); for (int i = 0; i < fetch.size(); i++) { test::ExpectTensorEqual(tensor_expected[i], tensors[i]); @@ -322,6 +374,11 @@ TEST_F(ConstantFoldingTest, NeutralElement) { } } +TEST_F(ConstantFoldingTest, NeutralElement_ShortFloats) { + SimpleNeutralElementTest(); + SimpleNeutralElementTest(); +} + TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output cf_half = ops::Const(s.WithOpName("cf_half"), 0.5f, {1}); @@ -379,10 +436,10 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { // Check that the reciprocals have the expected value. std::vector fetch = {"cf_half"}; - auto tensor_expected = EvaluateNodes(item.graph, fetch); + auto tensor_expected = EvaluateNodes(item.graph, fetch, {}); EXPECT_EQ(fetch.size(), tensor_expected.size()); fetch = {"ConstantFolding/div_f_recip", "ConstantFolding/realdiv_recip"}; - auto tensors = EvaluateNodes(output, fetch); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(fetch.size(), tensors.size()); for (int i = 0; i < fetch.size(); i++) { test::ExpectTensorEqual(tensor_expected[0], tensors[i]); @@ -590,8 +647,8 @@ TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { EXPECT_EQ("Const", new_d.op()); std::vector fetch = {"e", "f"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch); - auto tensors = EvaluateNodes(output, fetch); + auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(fetch.size(), tensors_expected.size()); EXPECT_EQ(fetch.size(), tensors.size()); for (int i = 0; i < fetch.size(); i++) { @@ -614,7 +671,7 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { GrapplerItem item; item.fetch.push_back("e"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {}); EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; @@ -631,8 +688,8 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { if (node.name() == "e") { EXPECT_EQ("Const", node.op()); ++found; - auto folded = EvaluateNodes(output, {"e"}); - auto expected = EvaluateNodes(item.graph, {"e"}); + auto folded = EvaluateNodes(output, {"e"}, {}); + auto expected = EvaluateNodes(item.graph, {"e"}, {}); EXPECT_EQ(1, expected.size()); EXPECT_EQ(1, folded.size()); test::ExpectTensorEqual(folded[0], expected[0]); @@ -642,7 +699,7 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { } } EXPECT_EQ(1, found); - auto tensors = EvaluateNodes(output, item.fetch); + auto tensors = EvaluateNodes(output, item.fetch, {}); EXPECT_EQ(1, tensors.size()); test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } @@ -678,8 +735,8 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { if (node.name() == "i1") { EXPECT_EQ("Const", node.op()); ++found; - auto folded = EvaluateNodes(output, {"i1"}); - auto expected = EvaluateNodes(item.graph, {"i1"}); + auto folded = EvaluateNodes(output, {"i1"}, {}); + auto expected = EvaluateNodes(item.graph, {"i1"}, {}); EXPECT_EQ(1, expected.size()); EXPECT_EQ(1, folded.size()); test::ExpectTensorEqual(folded[0], expected[0]); @@ -689,8 +746,8 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { if (node.name() == "i2") { EXPECT_EQ("Const", node.op()); ++found; - auto folded = EvaluateNodes(output, {"i2"}); - auto expected = EvaluateNodes(item.graph, {"i2"}); + auto folded = EvaluateNodes(output, {"i2"}, {}); + auto expected = EvaluateNodes(item.graph, {"i2"}, {}); EXPECT_EQ(1, expected.size()); EXPECT_EQ(1, folded.size()); test::ExpectTensorEqual(folded[0], expected[0]); @@ -808,8 +865,8 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { } EXPECT_EQ(8, constant_folded); - auto expected = EvaluateNodes(item.graph, outputs); - auto optimized = EvaluateNodes(output, outputs); + auto expected = EvaluateNodes(item.graph, outputs, {}); + auto optimized = EvaluateNodes(output, outputs, {}); ASSERT_EQ(expected.size(), optimized.size()); for (int i = 0; i < expected.size(); ++i) { test::ExpectTensorEqual(expected[i], optimized[i]); @@ -1236,7 +1293,7 @@ TEST_F(ConstantFoldingTest, MergeNodes) { EXPECT_EQ(6, found_nodes); std::vector fetch = {"out1", "idx1"}; - auto tensors = EvaluateNodes(output, fetch); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(2, tensors.size()); const Tensor& out_value = tensors[0]; EXPECT_EQ(3 * 5, out_value.NumElements()); @@ -1891,8 +1948,8 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } std::vector fetch = {"acc0"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch); - auto tensors = EvaluateNodes(output, fetch); + auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); @@ -1926,7 +1983,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { item.fetch = {"concat0", "concat1", "concat2", "concat3", "concat4", "concat5", "concat6", "concat7", "concat8", "concat9"}; - auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}); + auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}, {}); EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; @@ -1977,7 +2034,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { } } - auto tensors = EvaluateNodes(output, {"concat0"}); + auto tensors = EvaluateNodes(output, {"concat0"}, {}); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } @@ -2075,8 +2132,8 @@ TEST_F(ConstantFoldingTest, TrivialPack) { } std::vector fetch = {"stack"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch); - auto tensors = EvaluateNodes(output, fetch); + auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); EXPECT_EQ(tensors_expected[0].shape(), tensors[0].shape()); diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 52a1118080..deb2fabded 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -414,8 +414,9 @@ TEST_F(FunctionOptimizerTest, SymbolicGradients) { Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - std::vector expected = EvaluateNodes(item.graph, {"out1", "out2"}); - std::vector optimized = EvaluateNodes(output, {"out1", "out2"}); + std::vector expected = + EvaluateNodes(item.graph, {"out1", "out2"}, {}); + std::vector optimized = EvaluateNodes(output, {"out1", "out2"}, {}); test::ExpectTensorEqual(expected[0], optimized[0]); test::ExpectTensorEqual(expected[1], optimized[1]); } @@ -478,8 +479,8 @@ TEST_F(FunctionOptimizerTest, SymbolicGradientsIdentity) { EXPECT_EQ("Identity", output.node(i).op()); } - std::vector expected = EvaluateNodes(item.graph, {"out"}); - std::vector optimized = EvaluateNodes(output, {"out"}); + std::vector expected = EvaluateNodes(item.graph, {"out"}, {}); + std::vector optimized = EvaluateNodes(output, {"out"}, {}); test::ExpectTensorEqual(expected[0], optimized[0]); } diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 9595936e9e..a1f80802dd 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -426,7 +426,7 @@ TEST_F(MemoryOptimizerTest, AccumulationRewrites) { EXPECT_EQ(4, count); std::vector fetch = {"a", "b", "c", "e"}; - auto tensors = EvaluateNodes(output, fetch); + auto tensors = EvaluateNodes(output, fetch, {}); EXPECT_EQ(4, tensors.size()); for (int i = 0; i < tensors[0].NumElements(); ++i) { diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 829bfe9e31..86a6d5000d 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -33,8 +33,8 @@ namespace { template bool SafeSetScalarTensorValue(double value, Tensor* tensor) { using RealType = typename Eigen::NumTraits::Real; - if (value > std::numeric_limits::max() || - value < std::numeric_limits::min()) { + if (value > static_cast(std::numeric_limits::max()) || + value < static_cast(std::numeric_limits::min())) { return false; } tensor->flat()(0) = static_cast(value); @@ -473,8 +473,8 @@ Status SetTensorValue(DataType dtype, int value, Tensor* tensor) { "Expected scalar tensor, got num_elements = ", tensor->NumElements()); } switch (dtype) { - // TODO(rmlarsen): Handle DT_HALF. - // HANDLE_CASE(DT_HALF); + HANDLE_CASE(DT_HALF); + HANDLE_CASE(DT_BFLOAT16); HANDLE_CASE(DT_BOOL); HANDLE_CASE(DT_FLOAT); HANDLE_CASE(DT_DOUBLE); diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index ee126f4955..5c96359867 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -40,12 +40,13 @@ GrapplerTest::GrapplerTest() { } std::vector GrapplerTest::EvaluateNodes( - const GraphDef& graph, const std::vector& node_names) const { + const GraphDef& graph, const std::vector& node_names, + const std::vector>& inputs) const { std::unique_ptr session(NewSession(options_)); TF_CHECK_OK(session->Create(graph)); RunOptions run_options; std::vector output_tensors; - TF_CHECK_OK(session->Run(run_options, {}, node_names, node_names, + TF_CHECK_OK(session->Run(run_options, inputs, node_names, node_names, &output_tensors, nullptr)); TF_CHECK_OK(session->Close()); return output_tensors; diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index e0c67381a4..4b160e7f16 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -35,7 +35,8 @@ class GrapplerTest : public ::testing::Test { protected: std::vector EvaluateNodes( - const GraphDef& graph, const std::vector& node_names) const; + const GraphDef& graph, const std::vector& node_names, + const std::vector>& inputs) const; std::vector EvaluateFetchNodes(const GrapplerItem& item) const; -- GitLab From b2644288ebfb8a4cc52231c3ca93a968397c860a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 16:53:13 -0700 Subject: [PATCH 581/960] Change the host-op result per TPU step from a single value to a collection of values. PiperOrigin-RevId: 190696953 --- .../contrib/tpu/profiler/tf_op_stats.proto | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 20ed7419fd..590db2c376 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -199,10 +199,22 @@ message HostOpsPerTpuStep { map step_diffs = 5; } +message HostOpsDetailsPerCore { + // Map from core id to HostOpsPerTpuStep. + map core_map = 1; +} + +message HostOpsDetailsPerHost { + // Map from hostname to a map from core id to HostOpsPerTpuStep. + map host_map = 1; +} + // Result proto for the host ops for all TPU steps. message HostOpsResult { - // A sequence of HostOpsPerTpuStep (one for each TPU step) - repeated HostOpsPerTpuStep host_op_sequence = 1; + reserved 1; // (was repeated HostOpsPerTpuStep host_op_sequence) + // A sequence of records with one for each TPU step. Each record + // is a map from hostname to a map from core id to HostOpsPerTpuStep. + repeated HostOpsDetailsPerHost hostops_details = 2; } // Result proto for TfStatsHelper. -- GitLab From 6f8b85d301140ce42c0aa4871750ee0aec758105 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 17:02:59 -0700 Subject: [PATCH 582/960] [XLA] Redesign: implement Tuple and GetTupleElement. PiperOrigin-RevId: 190698245 --- .../xla/client/xla_client/xla_builder.cc | 33 ++++++- .../compiler/xla/service/shape_inference.cc | 6 ++ .../compiler/xla/service/shape_inference.h | 3 + tensorflow/compiler/xla/tests/BUILD | 2 + .../xla/tests/client_library_test_base.cc | 22 ++++- .../xla/tests/client_library_test_base.h | 6 +- tensorflow/compiler/xla/tests/tuple_test.cc | 99 +++++++++---------- 7 files changed, 115 insertions(+), 56 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index fcaf393b6b..7d39701b10 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -491,11 +491,40 @@ XlaOp XlaBuilder::Select(const XlaOp& pred, const XlaOp& on_true, } XlaOp XlaBuilder::Tuple(tensorflow::gtl::ArraySlice elements) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + std::vector operand_shape_ptrs; + std::vector operand_shapes; + for (const XlaOp& e : elements) { + TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(e)); + operand_shapes.push_back(shape); + } + c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), + [](const Shape& shape) { return &shape; }); + TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), + ShapeInference::InferVariadicOpShape( + HloOpcode::kTuple, operand_shape_ptrs)); + return AddInstruction(std::move(instr), HloOpcode::kTuple, elements); + }()); } XlaOp XlaBuilder::GetTupleElement(const XlaOp& tuple_data, int64 index) { - return UnimplementedOp(); + return NoteErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + TF_ASSIGN_OR_RETURN(const Shape& tuple_shape, GetShape(tuple_data)); + if (!ShapeUtil::IsTuple(tuple_shape)) { + return InvalidArgument( + "Operand to GetTupleElement() is not a tuple; got %s", + ShapeUtil::HumanString(tuple_shape).c_str()); + } + *instr.mutable_shape() = + ShapeUtil::GetTupleElementShape(tuple_shape, index); + + instr.set_tuple_index(index); + + return AddInstruction(std::move(instr), HloOpcode::kGetTupleElement, + {tuple_data}); + }()); } XlaOp XlaBuilder::Eq(const XlaOp& lhs, const XlaOp& rhs, diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 36456d552d..77e12d3602 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1070,6 +1070,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (const HloInstruction* operand : operands) { operand_shapes.push_back(&operand->shape()); } + return InferVariadicOpShape(opcode, operand_shapes); +} + +/* static */ StatusOr ShapeInference::InferVariadicOpShape( + HloOpcode opcode, + tensorflow::gtl::ArraySlice operand_shapes) { return InferVariadicOpShape(OpcodeToVariadicOperation(opcode), operand_shapes); } diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 88830e6d25..9da2c99b41 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -83,6 +83,9 @@ class ShapeInference { static StatusOr InferVariadicOpShape( VariadicOperation operation, tensorflow::gtl::ArraySlice operand_shapes); + static StatusOr InferVariadicOpShape( + HloOpcode opcode, + tensorflow::gtl::ArraySlice operand_shapes); static StatusOr InferVariadicOpShape( HloOpcode opcode, tensorflow::gtl::ArraySlice operands); diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 5ab25f2264..2fd97fa38e 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1011,6 +1011,8 @@ xla_test( "//tensorflow/compiler/xla/client:computation", "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/compiler/xla/client/xla_client:xla_builder", + "//tensorflow/compiler/xla/client/xla_client:xla_computation", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc index ec95a68ead..4a9faef1dc 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.cc +++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc @@ -441,8 +441,9 @@ void ClientLibraryTestBase::ComputeAndCompareR1U8( EXPECT_EQ(expected, actual->GetR1U8AsString()); } +template void ClientLibraryTestBase::ComputeAndCompareTuple( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments) { auto actual_status = ExecuteAndTransfer(builder, arguments); EXPECT_IS_OK(actual_status.status()); @@ -453,8 +454,9 @@ void ClientLibraryTestBase::ComputeAndCompareTuple( LiteralTestUtil::ExpectEqual(expected, *actual); } +template void ClientLibraryTestBase::ComputeAndCompareTuple( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error) { auto actual_status = ExecuteAndTransfer(builder, arguments); EXPECT_IS_OK(actual_status.status()); @@ -619,4 +621,20 @@ template void ClientLibraryTestBase::ComputeAndCompareLiteral( tensorflow::gtl::ArraySlice arguments, ErrorSpec error, const Shape* shape_with_layout); +template void ClientLibraryTestBase::ComputeAndCompareTuple( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments); + +template void ClientLibraryTestBase::ComputeAndCompareTuple( + XlaBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments); + +template void ClientLibraryTestBase::ComputeAndCompareTuple( + ComputationBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, ErrorSpec error); + +template void ClientLibraryTestBase::ComputeAndCompareTuple( + XlaBuilder* builder, const Literal& expected, + tensorflow::gtl::ArraySlice arguments, ErrorSpec error); + } // namespace xla diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.h b/tensorflow/compiler/xla/tests/client_library_test_base.h index 5ff200be03..be90f14c8e 100644 --- a/tensorflow/compiler/xla/tests/client_library_test_base.h +++ b/tensorflow/compiler/xla/tests/client_library_test_base.h @@ -217,11 +217,13 @@ class ClientLibraryTestBase : public ::testing::Test { // Convenience method for running a built computation, transferring the // result, and comparing it to the expected tuple literal. + template void ComputeAndCompareTuple( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments); + template void ComputeAndCompareTuple( - ComputationBuilder* builder, const Literal& expected, + BuilderT* builder, const Literal& expected, tensorflow::gtl::ArraySlice arguments, ErrorSpec error); // Convenience method for running a built computation and comparing the result diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index fa60af4b6a..098be6d7aa 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -20,6 +20,8 @@ limitations under the License. #include "tensorflow/compiler/xla/client/computation.h" #include "tensorflow/compiler/xla/client/computation_builder.h" #include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_client/xla_computation.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -41,7 +43,7 @@ class TupleTest : public ClientLibraryTestBase { // Tests a tuple-shaped constant. XLA_TEST_F(TupleTest, TupleConstant) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const float constant_scalar = 7.3f; std::initializer_list constant_vector = {1.1f, 2.0f, 3.3f}; @@ -54,13 +56,13 @@ XLA_TEST_F(TupleTest, TupleConstant) { Literal::CreateR1(constant_vector).get(), Literal::CreateR2(constant_matrix).get()}); - auto result = builder.ConstantLiteral(*value); + builder.ConstantLiteral(*value); ComputeAndCompareTuple(&builder, *value, {}, error_spec_); } // Tests a tuple made of scalar constants. XLA_TEST_F(TupleTest, TupleScalarConstant) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const float constant_scalar1 = 7.3f; const float constant_scalar2 = 1.2f; @@ -68,13 +70,13 @@ XLA_TEST_F(TupleTest, TupleScalarConstant) { Literal::MakeTuple({Literal::CreateR0(constant_scalar1).get(), Literal::CreateR0(constant_scalar2).get()}); - auto result = builder.ConstantLiteral(*value); + builder.ConstantLiteral(*value); ComputeAndCompareTuple(&builder, *value, {}, error_spec_); } // Tests the creation of tuple data. XLA_TEST_F(TupleTest, TupleCreate) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); const float constant_scalar = 7.3f; std::initializer_list constant_vector = {1.1f, 2.0f, 3.3f}; @@ -82,9 +84,9 @@ XLA_TEST_F(TupleTest, TupleCreate) { {1.1f, 2.2f, 3.5f}, // row 0 {4.8f, 5.0f, 6.7f}, // row 1 }; - auto result = builder.Tuple({builder.ConstantR0(constant_scalar), - builder.ConstantR1(constant_vector), - builder.ConstantR2(constant_matrix)}); + builder.Tuple({builder.ConstantR0(constant_scalar), + builder.ConstantR1(constant_vector), + builder.ConstantR2(constant_matrix)}); auto expected = Literal::MakeTuple({Literal::CreateR0(constant_scalar).get(), @@ -95,9 +97,9 @@ XLA_TEST_F(TupleTest, TupleCreate) { // Tests the creation of tuple data. XLA_TEST_F(TupleTest, TupleCreateWithZeroElementEntry) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); - auto result = builder.Tuple( + builder.Tuple( {builder.ConstantR0(7.0), builder.ConstantR1({})}); auto expected = Literal::MakeTuple({Literal::CreateR0(7.0).get(), @@ -107,15 +109,15 @@ XLA_TEST_F(TupleTest, TupleCreateWithZeroElementEntry) { // Tests the creation of an empty tuple. XLA_TEST_F(TupleTest, EmptyTupleCreate) { - ComputationBuilder builder(client_, TestName()); - auto result = builder.Tuple({}); + XlaBuilder builder(TestName()); + builder.Tuple({}); auto expected = Literal::MakeTuple({}); ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); } // Trivial test for extracting a tuple element with GetTupleElement. XLA_TEST_F(TupleTest, GetTupleElement) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list constant_vector = {1.f, 2.f, 3.f}; std::initializer_list> constant_matrix = { {1.f, 2.f, 3.f}, // row 0 @@ -123,23 +125,23 @@ XLA_TEST_F(TupleTest, GetTupleElement) { }; auto tuple_data = builder.Tuple({builder.ConstantR1(constant_vector), builder.ConstantR2(constant_matrix)}); - auto matrix_element = builder.GetTupleElement(tuple_data, 1); + builder.GetTupleElement(tuple_data, 1); ComputeAndCompareR2(&builder, Array2D(constant_matrix), {}, error_spec_); } // Trivial test for extracting a tuple element with GetTupleElement. XLA_TEST_F(TupleTest, GetTupleElementWithZeroElements) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto tuple_data = builder.Tuple( {builder.ConstantR1({}), builder.ConstantR2FromArray2D(Array2D(0, 101))}); - auto matrix_element = builder.GetTupleElement(tuple_data, 1); + builder.GetTupleElement(tuple_data, 1); ComputeAndCompareR2(&builder, Array2D(0, 101), {}, error_spec_); } XLA_TEST_F(TupleTest, GetTupleElementOfNonTupleFailsGracefully) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto value = builder.ConstantR1({4.5f}); builder.GetTupleElement(value, 1); auto result_status = builder.Build(); @@ -152,7 +154,7 @@ XLA_TEST_F(TupleTest, GetTupleElementOfNonTupleFailsGracefully) { // Extracts both elements from a tuple with GetTupleElement and then adds them // together. XLA_TEST_F(TupleTest, AddTupleElements) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list constant_vector = {1.f, 2.f, 3.f}; std::initializer_list> constant_matrix = { {1.f, 2.f, 3.f}, // row 0 @@ -164,22 +166,22 @@ XLA_TEST_F(TupleTest, AddTupleElements) { auto matrix_element = builder.GetTupleElement(tuple_data, 1); auto vector_shape = builder.GetShape(vector_element).ConsumeValueOrDie(); auto matrix_shape = builder.GetShape(matrix_element).ConsumeValueOrDie(); - auto result = builder.Add(matrix_element, vector_element, - /*broadcast_dimensions=*/{1}); + builder.Add(matrix_element, vector_element, + /*broadcast_dimensions=*/{1}); Array2D expected({ {2.f, 4.f, 6.f}, // row 0 {5.f, 7.f, 9.f}, // row 1 }); - ASSERT_TRUE(ShapeUtil::ShapeIs(*vector_shape, F32, {3})); - ASSERT_TRUE(ShapeUtil::ShapeIs(*matrix_shape, F32, {/*y=*/2, /*x=*/3})); + ASSERT_TRUE(ShapeUtil::ShapeIs(vector_shape, F32, {3})); + ASSERT_TRUE(ShapeUtil::ShapeIs(matrix_shape, F32, {/*y=*/2, /*x=*/3})); ComputeAndCompareR2(&builder, expected, {}, error_spec_); } // Extracts both elements from a tuple and then puts them into a new tuple in // the opposite order. XLA_TEST_F(TupleTest, TupleGTEToTuple) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list constant_vector = {1.f, 2.f, 3.f}; std::initializer_list> constant_matrix = { {1.f, 2.f, 3.f}, // row 0 @@ -187,8 +189,8 @@ XLA_TEST_F(TupleTest, TupleGTEToTuple) { }; auto tuple_data = builder.Tuple({builder.ConstantR1(constant_vector), builder.ConstantR2(constant_matrix)}); - auto new_tuple = builder.Tuple({builder.GetTupleElement(tuple_data, 1), - builder.GetTupleElement(tuple_data, 0)}); + builder.Tuple({builder.GetTupleElement(tuple_data, 1), + builder.GetTupleElement(tuple_data, 0)}); auto expected = Literal::MakeTuple({Literal::CreateR2(constant_matrix).get(), Literal::CreateR1(constant_vector).get()}); @@ -196,8 +198,8 @@ XLA_TEST_F(TupleTest, TupleGTEToTuple) { } XLA_TEST_F(TupleTest, SelectBetweenPredTuples) { - ComputationBuilder b(client_, TestName()); - ComputationDataHandle v1, v2; + XlaBuilder b(TestName()); + XlaOp v1, v2; for (bool direction : {false, true}) { std::unique_ptr v1_data = @@ -210,7 +212,7 @@ XLA_TEST_F(TupleTest, SelectBetweenPredTuples) { auto v2_gt = b.Gt(v2, v1); // true auto v1_v2 = b.Tuple({v1_gt, v2_gt}); // {false, true} auto v2_v1 = b.Tuple({v2_gt, v1_gt}); // {true, false} - auto select = b.Select(direction ? v1_gt : v2_gt, v1_v2, v2_v1); + b.Select(direction ? v1_gt : v2_gt, v1_v2, v2_v1); auto expected = Literal::MakeTuple({Literal::CreateR0(direction).get(), Literal::CreateR0(!direction).get()}); @@ -237,7 +239,7 @@ XLA_TEST_F(TupleTest, TupleGTEToTupleToGTEAdd) { // \ (tuple10)-- / // \ / \ / // -----(GTE 0)-- --(GTE 1)---------- - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list constant_vector = {1.f, 2.f, 3.f}; std::initializer_list> constant_matrix = { {1.f, 2.f, 3.f}, // row 0 @@ -257,8 +259,8 @@ XLA_TEST_F(TupleTest, TupleGTEToTupleToGTEAdd) { auto addvectors = builder.Add(vector_from_01, vector_from_10); auto addmatrices = builder.Add(matrix_from_01, matrix_from_10); - auto result = builder.Add(addmatrices, addvectors, - /*broadcast_dimensions=*/{1}); + builder.Add(addmatrices, addvectors, + /*broadcast_dimensions=*/{1}); Array2D expected({ {4.f, 8.f, 12.f}, // row 0 @@ -269,7 +271,7 @@ XLA_TEST_F(TupleTest, TupleGTEToTupleToGTEAdd) { XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnFalse)) { // Tests a selection between tuples with "false" path taken. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list vec1 = {1.f, 2.f, 3.f}; std::initializer_list vec2 = {2.f, 4.f, 6.f}; @@ -278,8 +280,7 @@ XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnFalse)) { auto tuple21 = builder.Tuple( {builder.ConstantR1(vec2), builder.ConstantR1(vec1)}); - auto select = - builder.Select(builder.ConstantR0(false), tuple12, tuple21); + builder.Select(builder.ConstantR0(false), tuple12, tuple21); auto expected = Literal::MakeTuple({Literal::CreateR1(vec2).get(), Literal::CreateR1(vec1).get()}); ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); @@ -314,7 +315,7 @@ XLA_TEST_F(TupleTest, TuplesInAMap) { XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnTrue)) { // Tests a selection between tuples with "true" path taken. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list vec1 = {1.f, 2.f, 3.f}; std::initializer_list vec2 = {2.f, 4.f, 6.f}; @@ -323,8 +324,7 @@ XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnTrue)) { auto tuple21 = builder.Tuple( {builder.ConstantR1(vec2), builder.ConstantR1(vec1)}); - auto select = - builder.Select(builder.ConstantR0(true), tuple12, tuple21); + builder.Select(builder.ConstantR0(true), tuple12, tuple21); auto expected = Literal::MakeTuple({Literal::CreateR1(vec1).get(), Literal::CreateR1(vec2).get()}); ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); @@ -333,7 +333,7 @@ XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesOnTrue)) { XLA_TEST_F(TupleTest, SelectBetweenTuplesElementResult) { // Tests a selection between tuples but the final result is an element of the // tuple, not the whole tuple. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list vec1 = {1.f, 2.f, 3.f}; std::initializer_list vec2 = {2.f, 4.f, 6.f}; @@ -344,7 +344,7 @@ XLA_TEST_F(TupleTest, SelectBetweenTuplesElementResult) { auto select = builder.Select(builder.ConstantR0(false), tuple12, tuple21); - auto element = builder.GetTupleElement(select, 0); + builder.GetTupleElement(select, 0); ComputeAndCompareR1(&builder, vec2, {}, error_spec_); } @@ -368,7 +368,7 @@ XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesCascaded)) { // / --(GTE 1)-- // / // (tuple 21) - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list vec1 = {1.f, 2.f, 3.f}; std::initializer_list vec2 = {2.f, 4.f, 6.f}; @@ -384,8 +384,8 @@ XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesCascaded)) { builder.Select(builder.GetTupleElement(pred_tuple, 0), tuple12, tuple21); auto select2 = builder.Select(builder.GetTupleElement(pred_tuple, 1), tuple21, select1); - auto result = builder.Add(builder.GetTupleElement(select2, 0), - builder.GetTupleElement(select2, 1)); + builder.Add(builder.GetTupleElement(select2, 0), + builder.GetTupleElement(select2, 1)); ComputeAndCompareR1(&builder, {3.f, 6.f, 9.f}, {}, error_spec_); } @@ -394,7 +394,7 @@ XLA_TEST_F(TupleTest, DISABLED_ON_CPU_PARALLEL(SelectBetweenTuplesReuseConstants)) { // Similar to SelectBetweenTuples, but the constants are shared between the // input tuples. - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); std::initializer_list vec1 = {1.f, 2.f, 3.f}; std::initializer_list vec2 = {2.f, 4.f, 6.f}; @@ -403,19 +403,18 @@ XLA_TEST_F(TupleTest, auto tuple12 = builder.Tuple({c1, c2}); auto tuple21 = builder.Tuple({c2, c1}); - auto select = - builder.Select(builder.ConstantR0(false), tuple12, tuple21); + builder.Select(builder.ConstantR0(false), tuple12, tuple21); + auto expected = Literal::MakeTuple({Literal::CreateR1(vec2).get(), Literal::CreateR1(vec1).get()}); ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); } XLA_TEST_F(TupleTest, NestedTuples) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); auto inner_tuple = builder.Tuple( {builder.ConstantR1({1.0, 2.0}), builder.ConstantR0(42.0)}); - auto outer_tuple = - builder.Tuple({inner_tuple, builder.ConstantR1({22.0, 44.0})}); + builder.Tuple({inner_tuple, builder.ConstantR1({22.0, 44.0})}); auto expected_v1 = Literal::CreateR1({1.0, 2.0}); auto expected_s = Literal::CreateR0(42.0); @@ -429,7 +428,7 @@ XLA_TEST_F(TupleTest, NestedTuples) { } XLA_TEST_F(TupleTest, GetTupleElementOfNestedTuple) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); Shape data_shape = ShapeUtil::MakeShape(F32, {3}); Shape inner_tuple_shape = ShapeUtil::MakeTupleShape({data_shape, data_shape}); @@ -460,7 +459,7 @@ XLA_TEST_F(TupleTest, GetTupleElementOfNestedTuple) { } XLA_TEST_F(TupleTest, ComplexTuples) { - ComputationBuilder builder(client_, TestName()); + XlaBuilder builder(TestName()); { Shape c64r0 = ShapeUtil::MakeShape(C64, {}); Shape c64r1 = ShapeUtil::MakeShape(C64, {2}); -- GitLab From 3551d41e36c569a200d3cbaeb1074b4165fb8f0a Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Tue, 27 Mar 2018 15:35:37 -0700 Subject: [PATCH 583/960] Update tensorboard dependency to 1.7.0+ --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 7a3184d64d..8b83257887 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.6.0, < 1.7.0', + 'tensorboard >= 1.7.0, < 1.8.0', 'termcolor >= 1.1.0', ] -- GitLab From 3dc861e754ffb86286038ef9c78327f59384eaad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 17:13:22 -0700 Subject: [PATCH 584/960] K-FAC: Bugfixes for TPU compatibility with covariance update ops. PiperOrigin-RevId: 190699635 --- .../contrib/kfac/python/ops/fisher_factors.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index 353e1c6abb..0d40d265a1 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -336,12 +336,16 @@ class FisherFactor(object): new_cov = math_ops.add_n(new_cov_contribs) / float(self._num_towers) - # I have no idea if the TPU code below is still correct since I don't know - # what it actually does. Also, this code is not present in some of the - # other versions of make_covariance_update_op. Does it matter? - # Synchronize value across all TPU cores. + # Compute average of 'new_cov' across all TPU cores. On a TPU, each + # instance of 'new_cov' will be based on a different minibatch. This ensures + # that by the end of assign_moving_average(), all TPU cores see the same + # value for self._cov. + # + # Other implementations of make_covariance_update_op() that accumulate + # statistics in other variables should mimic this behavior. if utils.on_tpu(): new_cov = utils.cross_replica_mean(new_cov) + return moving_averages.assign_moving_average( self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS) @@ -1398,6 +1402,10 @@ class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): new_cov_dt1 = (math_ops.add_n(new_cov_dt1_contribs) / float(self._num_towers)) + # See comments in FisherFactor.make_covariance_update_op() for details. + if utils.on_tpu(): + new_cov_dt1 = utils.cross_replica_mean(new_cov_dt1) + op2 = moving_averages.assign_moving_average( self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS) -- GitLab From 52271c7c2ae3905bb557ebf355a4eca44a930c1f Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 27 Mar 2018 17:14:50 -0700 Subject: [PATCH 585/960] Make tf.keras.Sequential (properly) Checkpointable Just numbers Layers like "layer-N". It may also make sense to track them by "ClassName-M", but that's a backwards-compatible change. Special-cases all of the dependency collection, since Layers can be added and removed from Sequential. PiperOrigin-RevId: 190699818 --- .../eager/python/checkpointable_utils_test.py | 33 +++++++++++++++++++ .../keras/_impl/keras/engine/sequential.py | 31 +++++++++++++++++ tensorflow/python/training/checkpointable.py | 1 + 3 files changed, 65 insertions(+) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index a8c47d76d1..5e1b64728a 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras._impl.keras.engine import sequential from tensorflow.python.keras._impl.keras.engine import training from tensorflow.python.layers import core from tensorflow.python.ops import control_flow_ops @@ -1036,6 +1037,38 @@ class CheckpointingTests(test.TestCase): beta1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(3., self.evaluate(beta1_power)) + @test_util.run_in_graph_and_eager_modes() + def test_sequential(self): + model = sequential.Sequential() + checkpoint = checkpointable_utils.Checkpoint(model=model) + model.add(core.Dense(4)) + second_dense = core.Dense(5) + model.add(second_dense) + model(constant_op.constant([[1.]])) + checkpoint.restore(None).initialize_or_restore() + self.evaluate(second_dense.bias.assign( + constant_op.constant([1., 2., 3., 4., 5.]))) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate(second_dense.bias.assign( + constant_op.constant([5., 6., 7., 8., 9.]))) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.assertAllEqual([1., 2., 3., 4., 5.], self.evaluate(second_dense.bias)) + + deferred_sequential = sequential.Sequential() + deferred_sequential_checkpoint = checkpointable_utils.Checkpoint( + model=deferred_sequential) + status = deferred_sequential_checkpoint.restore(save_path) + deferred_sequential.add(core.Dense(4)) + deferred_sequential(constant_op.constant([[1.]])) + deferred_second_dense = core.Dense(5) + deferred_sequential.add(deferred_second_dense) + deferred_sequential(constant_op.constant([[1.]])) + status.run_restore_ops() + self.assertAllEqual([1., 2., 3., 4., 5.], + self.evaluate(deferred_second_dense.bias)) + class TemplateTests(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/engine/sequential.py b/tensorflow/python/keras/_impl/keras/engine/sequential.py index 66cef1f5b9..2ef99d5ab3 100644 --- a/tensorflow/python/keras/_impl/keras/engine/sequential.py +++ b/tensorflow/python/keras/_impl/keras/engine/sequential.py @@ -29,6 +29,7 @@ from tensorflow.python.keras._impl.keras.engine.input_layer import Input from tensorflow.python.keras._impl.keras.engine.input_layer import InputLayer from tensorflow.python.keras._impl.keras.engine.training import Model from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util.tf_export import tf_export @@ -192,6 +193,36 @@ class Sequential(Model): self.build() else: self._layers.append(layer) + # In implementing Checkpointable, Sequential does not track its Layers + # normally, since they may be added and removed (in pop()). Instead, it + # names everything on demand (gathering dependencies in + # _checkpoint_dependencies, and looking them up in + # _lookup_dependency). _handle_deferred_dependencies just checks whether an + # existing checkpoint load targets this Layer, it does not create a + # dependency on the Layer. + self._handle_deferred_dependencies( + name='layer-%d' % (len(self._layers) - 1), checkpointable=layer) + + @property + def _checkpoint_dependencies(self): + """For implementing Checkpointable. Layers which should be saved.""" + return super(Sequential, self)._checkpoint_dependencies + [ + checkpointable.CheckpointableReference( + name='layer-%d' % layer_index, ref=layer) + for layer_index, layer in enumerate(self._layers)] + + def _lookup_dependency(self, name): + """For implementing Checkpointable. Looks up a Layer.""" + super_lookup = super(Sequential, self)._lookup_dependency(name=name) + if super_lookup is not None: + return super_lookup + if name.startswith('layer-'): + try: + return self._layers[int(name[6:])] + except IndexError: + return None + else: + return None def pop(self): """Removes the last layer in the model. diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index d0650eb127..bbbe1e8ac5 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -560,6 +560,7 @@ class CheckpointableBase(object): checkpointable: The Checkpointable object to restore (inheriting from `CheckpointableBase`). """ + self._maybe_initialize_checkpointable() deferred_dependencies_list = self._deferred_dependencies.pop(name, ()) for checkpoint_position in sorted( deferred_dependencies_list, -- GitLab From e4b367cef5f89d6741dac223c91a11fda9ca63ae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 17:15:14 -0700 Subject: [PATCH 586/960] Make slot_creator use DistributionStrategy for co-locating variables. Make DistributionStrategy.colocate_vars_with() match the existing behavior of ops.colocate_with() by default, for compatibility. PiperOrigin-RevId: 190699882 --- tensorflow/python/training/distribute.py | 16 ++++++++-------- tensorflow/python/training/slot_creator.py | 8 +++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 9261e13230..757ba71c4a 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -557,9 +557,14 @@ class DistributionStrategy(object): raise NotImplementedError("must be implemented in descendants") def colocate_vars_with(self, colocate_with_variable): - """Controls which devices variables will be created on. + """Scope that controls which devices variables will be created on. - Note this may only be used inside `self.scope()`. + No operations should be added to the graph inside this scope, it + should only be used when creating variables (some implementations + work by changing variable creation, others work by using a + tf.colocate_with() scope). + + This may only be used inside `self.scope()`. Example usage: @@ -1032,13 +1037,8 @@ class _DefaultDistributionStrategy(DistributionStrategy): def colocate_vars_with(self, colocate_with_variable): """Does not require `self.scope`.""" - def create_colocated_variable(next_creator, *args, **kwargs): - _require_distribution_strategy_scope(self) - with ops.colocate_with(colocate_with_variable): - return next_creator(*args, **kwargs) - _require_distribution_strategy_scope(self) - return variable_scope.variable_creator_scope(create_colocated_variable) + return ops.colocate_with(colocate_with_variable) def distribute_dataset(self, dataset): # TODO(josh11b): Support for this when executing eagerly is currently only diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py index 9ac52dd071..258a6f045d 100644 --- a/tensorflow/python/training/slot_creator.py +++ b/tensorflow/python/training/slot_creator.py @@ -40,12 +40,12 @@ from __future__ import division from __future__ import print_function from tensorflow.python.eager import context -from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.training import distribute as distribute_lib def _create_slot_var(primary, val, scope, validate_shape, shape, dtype): @@ -112,7 +112,8 @@ def create_slot(primary, val, name, colocate_with_primary=True): prefix = primary.op.name with variable_scope.variable_scope(None, prefix + "/" + name): if colocate_with_primary: - with ops.colocate_with(primary): + distribution_strategy = distribute_lib.get_distribution_strategy() + with distribution_strategy.colocate_vars_with(primary): return _create_slot_var(primary, val, "", validate_shape, None, None) else: return _create_slot_var(primary, val, "", validate_shape, None, None) @@ -148,7 +149,8 @@ def create_slot_with_initializer(primary, initializer, shape, dtype, name, prefix = primary.op.name with variable_scope.variable_scope(None, prefix + "/" + name): if colocate_with_primary: - with ops.colocate_with(primary): + distribution_strategy = distribute_lib.get_distribution_strategy() + with distribution_strategy.colocate_vars_with(primary): return _create_slot_var(primary, initializer, "", validate_shape, shape, dtype) else: -- GitLab From 50e1888fa89bce621e988a92ede3dc362e37b248 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 27 Mar 2018 17:16:31 -0700 Subject: [PATCH 587/960] [XLA] Assert that all buffers and sub-buffers passed to XLA have an explicit pointer. In the past, we allowed sub-buffers to be null if the top-level tuple was non-null. This doesn't actually work well on the GPU: For ops that are implemented using cudnn or cublas, we have to have a pointer to the sub-buffer on the host in order to make the call. Retrieving it from the GPU in an efficient manner is complicated, and the best we can come up with isn't all that efficient (fundamentally having to pull data down from the GPU blocks the ability of the CPU to "run ahead" of the GPU). Since TF wasn't making use of our flexibility *anyway*, we add the requirement that XLA be given non-null pointers to all sub-buffers. Changes to the XLA:GPU backend to take advantage of this will come separately. PiperOrigin-RevId: 190700021 --- .../xla/service/gpu/gpu_executable.cc | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 04b37d913e..28f9344795 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -267,16 +267,22 @@ StatusOr> GpuExecutable::ExecuteOnStream( ++i) { const BufferAllocation& allocation = assignment_->GetAllocation(i); if (allocation.is_entry_computation_parameter()) { - // The caller must give us a buffer for ShapeIndex {} of every parameter. - // It can optionally give us a buffer for other ShapeIndices, but we - // ignore them: Because we can't rely on these sub-buffers' addresses - // being available, our generated code can't use them. Instead, it must - // chase pointers starting at the tuple root. - if (allocation.param_shape_index().empty()) { - auto param_no = allocation.parameter_number(); - buffer_allocations_builder.RegisterBuffer( - i, arguments[param_no]->root_buffer()); + auto param_no = allocation.parameter_number(); + se::DeviceMemoryBase buffer = + arguments[param_no]->buffer(allocation.param_shape_index()); + + // All top-level buffers and sub-buffers must have an explicit, non-null + // pointer, except for zero-sized buffers, which may be null. + if (buffer.is_null() && buffer.size() > 0) { + return FailedPrecondition( + "Cannot run XLA computation because pointer to (sub-)buffer at " + "index %s of parameter %lld was null. All pointers to " + "(sub-)buffers must not be null, unless the (sub-)buffer has zero " + "elements.", + allocation.param_shape_index().ToString().c_str(), param_no); } + + buffer_allocations_builder.RegisterBuffer(i, buffer); } } se::StreamExecutor* executor = run_options->stream()->parent(); -- GitLab From ec3123979cd622a70fc809ee98b5a7b35772f369 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 27 Mar 2018 17:35:14 -0700 Subject: [PATCH 588/960] [XLA] Accurately measure FLOPs for base-dilated convolutions We incorrectly counted FLOPs when the output and kernel line up to access the padding or the dilated area. These should not be accounted as contributing to the FLOP count. PiperOrigin-RevId: 190702384 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_cost_analysis.cc | 108 +++++++++++++++--- .../xla/service/hlo_cost_analysis_test.cc | 37 +++++- 3 files changed, 129 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index da16976d06..f0bf68ae57 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1580,6 +1580,7 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:window_util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 4ec2ef27bf..44e4f75f75 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/window_util.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -379,20 +380,101 @@ Status HloCostAnalysis::HandleTranspose(const HloInstruction*) { } Status HloCostAnalysis::HandleConvolution(const HloInstruction* convolution) { - auto rhs_instruction = convolution->operand(1); + auto lhs = convolution->operand(0); + auto rhs = convolution->operand(1); + Window window = convolution->window(); + const auto& result_shape = convolution->shape(); + const Shape& lhs_shape = lhs->shape(); + const Shape& rhs_shape = rhs->shape(); + const auto& dnums = convolution->convolution_dimension_numbers(); - const int64 output_features = - convolution->shape().dimensions(dnums.output_feature_dimension()); - - // For each output element, we do one fma per element in the kernel at some - // given output feature index. - const int64 fmas_per_output_element = - output_features > 0 - ? ShapeUtil::ElementsIn(rhs_instruction->shape()) / output_features - : 0; - const int64 output_elements = ShapeUtil::ElementsIn(convolution->shape()); - current_properties_[kFlopsKey] = - output_elements * fmas_per_output_element * kFmaFlops; + + const int64 input_batch_dim = dnums.input_batch_dimension(); + const int64 input_feature_dim = dnums.input_feature_dimension(); + const int64 output_feature_dim = dnums.output_feature_dimension(); + const int64 input_feature = + ShapeUtil::GetDimension(lhs_shape, input_feature_dim); + const int64 output_feature = + ShapeUtil::GetDimension(result_shape, output_feature_dim); + const int64 batch = ShapeUtil::GetDimension(lhs_shape, input_batch_dim); + + DimensionVector kernel_limits; + DimensionVector output_limits; + DimensionVector input_limits; + if (window.dimensions().empty()) { + window = window_util::MakeWindow({1}); + kernel_limits.push_back(1); + output_limits.push_back(1); + input_limits.push_back(1); + } else { + for (int64 spatial_dimension = 0; + spatial_dimension < window.dimensions_size(); ++spatial_dimension) { + // Spatial dimension number for kernel (rhs). + const int64 kernel_spatial_dim = + dnums.kernel_spatial_dimensions(spatial_dimension); + const int64 kernel_limit = rhs_shape.dimensions(kernel_spatial_dim); + kernel_limits.push_back(kernel_limit); + + // Spatial dimension number for output. + const int64 output_spatial_dim = + dnums.output_spatial_dimensions(spatial_dimension); + const int64 output_limit = result_shape.dimensions(output_spatial_dim); + output_limits.push_back(output_limit); + + // Spatial dimension number for input (lhs). + const int64 input_spatial_dim = + dnums.input_spatial_dimensions(spatial_dimension); + const int64 input_limit = lhs_shape.dimensions(input_spatial_dim); + input_limits.push_back(input_limit); + } + } + + DimensionVector valid_position_counts; + + // Loop over each spatial dimension. + for (int64 spatial_dimension = 0; + spatial_dimension < window.dimensions_size(); ++spatial_dimension) { + int64 valid_position_count = 0; + // Loop over each point in the kernel. + for (int64 kernel_idx = 0; kernel_idx < kernel_limits[spatial_dimension]; + ++kernel_idx) { + // Loop over each point in the output. + for (int64 output_idx = 0; output_idx < output_limits[spatial_dimension]; + ++output_idx) { + // Calculate lhs (input) index without taking base dilation into + // account. + const auto& window_dim = window.dimensions(spatial_dimension); + const int64 undilated_index = output_idx * window_dim.stride() - + window_dim.padding_low() + + kernel_idx * window_dim.window_dilation(); + + // Calculate the actual lhs (input) index after dilation. Avoid the + // division as an optimization. + const int64 lhs_spatial_index = + window_dim.base_dilation() > 1 + ? undilated_index / window_dim.base_dilation() + : undilated_index; + + // Skip if the lhs (input) index is to be dilated. + if (undilated_index != lhs_spatial_index * window_dim.base_dilation()) { + continue; + } + + // Skip if input index is not in bound. + if (lhs_spatial_index < 0 || + lhs_spatial_index >= input_limits[spatial_dimension]) { + continue; + } + + valid_position_count += 1; + } + } + valid_position_counts.push_back(valid_position_count); + } + + const int64 fma_count = + input_feature * output_feature * batch * Product(valid_position_counts); + current_properties_[kFlopsKey] = fma_count * kFmaFlops; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index 3b289c240a..3d055b327e 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -186,12 +186,14 @@ TEST_F(HloCostAnalysisTest, Map) { TEST_F(HloCostAnalysisTest, Convolution) { ComputationBuilder builder(client_, "convolution"); auto input = builder.Parameter( - 0, ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/10, - /*x_dim=*/20}), + 0, + ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/10, + /*x_dim=*/20}), "input"); auto kernel = builder.Parameter( - 1, ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/3, - /*x_dim=*/3}), + 1, + ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/3, + /*x_dim=*/3}), "kernel"); auto result = builder.Conv(input, kernel, {1, 1}, Padding::kValid); @@ -440,5 +442,32 @@ TEST_F(HloCostAnalysisTest, TupleCost) { EXPECT_EQ(analysis.bytes_accessed(), kPointerSize * 2); } +TEST_F(HloCostAnalysisTest, BaseDilatedConvolution) { + ComputationBuilder builder(client_, "BaseDilatedConvolution"); + auto input = builder.Parameter( + 0, + ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/10, + /*x_dim=*/20}), + "input"); + auto kernel = builder.Parameter( + 1, + ShapeUtil::MakeShape(F32, {/*p_dim=*/1, /*z_dim=*/1, /*y_dim=*/3, + /*x_dim=*/3}), + "kernel"); + + auto result = builder.ConvGeneralDilated( + input, kernel, /*window_strides=*/{1, 1}, /*padding=*/{{1, 1}, {1, 1}}, + /*lhs_dilation=*/{3, 5}, /*rhs_dilation=*/{7, 11}, + ComputationBuilder::CreateDefaultConvDimensionNumbers(2)); + + // Run HLO cost analysis. + auto hlo_module = BuildHloGraph(&builder); + HloCostAnalysis analysis(ShapeSize); + ASSERT_IS_OK( + hlo_module->entry_computation()->root_instruction()->Accept(&analysis)); + + EXPECT_EQ(analysis.flop_count(), 1472); +} + } // namespace } // namespace xla -- GitLab From e7ce062d5f1144b0ab5b45a43a5e86d118941bf9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 17:35:55 -0700 Subject: [PATCH 589/960] Support GatherV2 (using Gather) PiperOrigin-RevId: 190702442 --- tensorflow/contrib/lite/testing/BUILD | 2 +- tensorflow/contrib/lite/testing/generate_examples.py | 3 +-- .../contrib/lite/testing/generated_examples_zip_test.cc | 5 ++++- tensorflow/contrib/lite/toco/import_tensorflow.cc | 9 ++++++--- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index dc9492f5e2..555ea90034 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -29,7 +29,7 @@ gen_zipped_test_files( "exp.zip", "fully_connected.zip", "fused_batch_norm.zip", - # "gather.zip", #TODO(b/76437794) + "gather.zip", "global_batch_norm.zip", "l2_pool.zip", "l2norm.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index e4ef17585f..cb5c500136 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -874,12 +874,11 @@ def make_gather_tests(zip_path): # TODO(mgubin): add string tests when they are supported by Toco. # TODO(mgubin): add tests for Nd indices when they are supported by # TfLite. - # TODO(mgubin): add tests for axis != 0 when it is supported by TfLite. "params_dtype": [tf.float32, tf.int32], "params_shape": [[10], [1, 2, 20]], "indices_dtype": [tf.int32], "indices_shape": [[3], [5]], - "axis": [0], # axis!=0 is GatherV2 + "axis": [0, 1], }] def build_graph(parameters): diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 08354b762c..a4a7283508 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -91,6 +91,9 @@ std::map kBrokenTests = { // PRelu only supports 4D input with (1, 1, channels) 3D alpha now. {R"(^\/prelu.*shared_axes=\[1\])", "75975192"}, + + // No support for axis!=0 in GatherV2. + {R"(^\/gather.*axis=1)", "76910444"}, }; // Allows test data to be unzipped into a temporary directory and makes @@ -244,7 +247,7 @@ INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(exp) INSTANTIATE_TESTS(fully_connected) INSTANTIATE_TESTS(fused_batch_norm) -// INSTANTIATE_TESTS(gather) //TODO(b/76437794) +INSTANTIATE_TESTS(gather) INSTANTIATE_TESTS(global_batch_norm) INSTANTIATE_TESTS(l2_pool) INSTANTIATE_TESTS(l2norm) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index b844e0b948..c26e4bddff 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1343,13 +1343,16 @@ void ConvertFloorOperator(const NodeDef& node, void ConvertGatherOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { - CHECK_EQ(node.op(), "Gather"); - CheckInputsCount(node, tf_import_flags, 2); + CHECK(node.op() == "Gather" || node.op() == "GatherV2"); + if (node.op() == "Gather") CheckInputsCount(node, tf_import_flags, 2); + if (node.op() == "GatherV2") CheckInputsCount(node, tf_import_flags, 3); const auto indices_data_type = GetDataTypeAttr(node, "Tindices"); CHECK(indices_data_type == DT_INT32 || indices_data_type == DT_INT64); auto* op = new GatherOperator; op->inputs.push_back(node.input(0)); op->inputs.push_back(node.input(1)); + // TODO(ahentz): we currently ignore the third tensor in GatherV2 but we + // should read it an pass it on to the TF Lite Interpreter. op->outputs.push_back(node.name()); model->operators.emplace_back(op); } @@ -2119,7 +2122,7 @@ std::unique_ptr ImportTensorFlowGraphDef( ConvertCastOperator(node, tf_import_flags, model); } else if (node.op() == "Floor") { ConvertFloorOperator(node, tf_import_flags, model); - } else if (node.op() == "Gather") { + } else if (node.op() == "Gather" || node.op() == "GatherV2") { ConvertGatherOperator(node, tf_import_flags, model); } else if (node.op() == "ResizeBilinear") { ConvertResizeBilinearOperator(node, tf_import_flags, model); -- GitLab From 4979e1d55783d05520fda56fd89641f817daf119 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Tue, 27 Mar 2018 18:06:30 -0700 Subject: [PATCH 590/960] Disable new Gather/Slice estimators for now to fix the crashes during some TF graphs optimizations. PiperOrigin-RevId: 190705686 --- .../grappler/costs/op_level_cost_estimator.cc | 3 + .../costs/op_level_cost_estimator_test.cc | 61 ++++++++++--------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index fdbc61f3f1..905cc2a215 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -171,9 +171,12 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {kNoOp, wrap(&OpLevelCostEstimator::PredictNoOp)}, + // TODO(76227186): re-enable with output size check & test + /* {kGather, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)}, {kGatherV2, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)}, {kSlice, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)}, + */ {kPlaceholder, wrap(&OpLevelCostEstimator::PredictIdentity)}, {kIdentity, wrap(&OpLevelCostEstimator::PredictIdentity)}, diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index f2a9615dfb..99bf28f21b 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -195,40 +195,43 @@ class OpLevelCostEstimatorTest : public ::testing::Test { OpLevelCostEstimator estimator_; }; +// TODO(76227186): re-enable with output size check & test +/* TEST_F(OpLevelCostEstimatorTest, TestGatherCosts) { - OpContext op_context; - SetCpuDevice(&op_context.op_info); - op_context.op_info.set_op("Gather"); - - // Huge first input shouldn't affect Gather execution and memory costs. - DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); - DescribeArbitraryRankInput({16}, DT_INT64, &op_context.op_info); - DescribeArbitraryRankOutput({16, 10}, DT_FLOAT, &op_context.op_info); - - auto cost = estimator_.PredictCosts(op_context); - EXPECT_EQ(Costs::Duration(130), cost.memory_time); - EXPECT_EQ(Costs::Duration(16), cost.compute_time); - EXPECT_EQ(Costs::Duration(146), cost.execution_time); - EXPECT_FALSE(cost.inaccurate); +OpContext op_context; +SetCpuDevice(&op_context.op_info); +op_context.op_info.set_op("Gather"); + +// Huge first input shouldn't affect Gather execution and memory costs. +DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); +DescribeArbitraryRankInput({16}, DT_INT64, &op_context.op_info); +DescribeArbitraryRankOutput({16, 10}, DT_FLOAT, &op_context.op_info); + +auto cost = estimator_.PredictCosts(op_context); +EXPECT_EQ(Costs::Duration(130), cost.memory_time); +EXPECT_EQ(Costs::Duration(16), cost.compute_time); +EXPECT_EQ(Costs::Duration(146), cost.execution_time); +EXPECT_FALSE(cost.inaccurate); } TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) { - OpContext op_context; - SetCpuDevice(&op_context.op_info); - op_context.op_info.set_op("Slice"); - - // Huge first input shouldn't affect Slice execution and memory costs. - DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); - DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); - DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); - DescribeArbitraryRankOutput({10, 10}, DT_FLOAT, &op_context.op_info); - - auto cost = estimator_.PredictCosts(op_context); - EXPECT_EQ(Costs::Duration(81), cost.memory_time); - EXPECT_EQ(Costs::Duration(10), cost.compute_time); - EXPECT_EQ(Costs::Duration(91), cost.execution_time); - EXPECT_FALSE(cost.inaccurate); +OpContext op_context; +SetCpuDevice(&op_context.op_info); +op_context.op_info.set_op("Slice"); + +// Huge first input shouldn't affect Slice execution and memory costs. +DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); +DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); +DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); +DescribeArbitraryRankOutput({10, 10}, DT_FLOAT, &op_context.op_info); + +auto cost = estimator_.PredictCosts(op_context); +EXPECT_EQ(Costs::Duration(81), cost.memory_time); +EXPECT_EQ(Costs::Duration(10), cost.compute_time); +EXPECT_EQ(Costs::Duration(91), cost.execution_time); +EXPECT_FALSE(cost.inaccurate); } +*/ TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) { auto cost = PredictCosts(DescribeBiasAdd(1000, 10)); -- GitLab From 94b2d2db576a6cce878aee92d6b1f90ded4278b4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 18:09:37 -0700 Subject: [PATCH 591/960] [XLA] Remove CheckShape and CheckSameShape in ComputationBuilder, they are not/rarely used. PiperOrigin-RevId: 190706088 --- .../xla/client/computation_builder.cc | 20 -------------- .../compiler/xla/client/computation_builder.h | 9 ------- .../xla/tests/batch_normalization_test.cc | 26 ++++++++++++++----- 3 files changed, 20 insertions(+), 35 deletions(-) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 39d02f0863..4d3b0ee0d6 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -253,26 +253,6 @@ StatusOr ComputationBuilder::GetProgramShape() { return std::move(*response.mutable_program_shape()); } -ComputationDataHandle ComputationBuilder::CheckShape( - const ComputationDataHandle& operand, const Shape& expected_shape) { - std::unique_ptr actual_shape = GetShape(operand).ConsumeValueOrDie(); - CHECK(ShapeUtil::Equal(expected_shape, *actual_shape)) - << "want " << ShapeUtil::HumanString(expected_shape) << " got " - << ShapeUtil::HumanString(*actual_shape); - return operand; -} - -void ComputationBuilder::CheckSameShape(const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { - std::unique_ptr lhs_shape = GetShape(lhs).ConsumeValueOrDie(); - std::unique_ptr rhs_shape = GetShape(rhs).ConsumeValueOrDie(); - VLOG(2) << "checking " << ShapeUtil::HumanString(*lhs_shape) << " equals " - << ShapeUtil::HumanString(*rhs_shape); - CHECK(ShapeUtil::Equal(*lhs_shape, *rhs_shape)) - << "lhs " << ShapeUtil::HumanString(*lhs_shape) << " rhs " - << ShapeUtil::HumanString(*rhs_shape); -} - ComputationDataHandle ComputationBuilder::Slice( const ComputationDataHandle& operand, tensorflow::gtl::ArraySlice start_indices, diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 2141ebc206..019c6f3afb 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -104,15 +104,6 @@ class ComputationBuilder { // Retrieves the (inferred) result for the current computation's shape. StatusOr GetProgramShape(); - // Checks that the operand has the given expected shape. Returns the operand - // if yes, fails with a CHECK error if no. - ComputationDataHandle CheckShape(const ComputationDataHandle& operand, - const Shape& expected_shape); - - // Checks that the lhs and rhs results have the same shape. - void CheckSameShape(const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs); - // Enqueues a constant with the value of the given literal onto the // computation. ComputationDataHandle ConstantLiteral(const Literal& literal); diff --git a/tensorflow/compiler/xla/tests/batch_normalization_test.cc b/tensorflow/compiler/xla/tests/batch_normalization_test.cc index 28ab965499..af8af99c79 100644 --- a/tensorflow/compiler/xla/tests/batch_normalization_test.cc +++ b/tensorflow/compiler/xla/tests/batch_normalization_test.cc @@ -69,6 +69,17 @@ class BatchNormalizationTest CHECK_EQ(kY, input_array_.width()); } + ComputationDataHandle CheckShape(ComputationBuilder* b, + const ComputationDataHandle& operand, + const Shape& expected_shape) const { + std::unique_ptr actual_shape = + b->GetShape(operand).ConsumeValueOrDie(); + CHECK(ShapeUtil::Equal(expected_shape, *actual_shape)) + << "want " << ShapeUtil::HumanString(expected_shape) << " got " + << ShapeUtil::HumanString(*actual_shape); + return operand; + } + static constexpr int64 kSamples = 3; static constexpr int64 kX = 1; static constexpr int64 kY = 1; @@ -164,14 +175,15 @@ XLA_TEST_P(BatchNormalizationTest, VarianceToStddev) { XLA_TEST_P(BatchNormalizationTest, SpecComparisonForward) { ComputationBuilder builder(client_, "batch_normalize_per_spec"); auto input_activations = - builder.CheckShape(builder.ConstantLiteral(input_literal_), - ShapeUtil::MakeShape(F32, {3, 2, 1, 1})); + CheckShape(&builder, builder.ConstantLiteral(input_literal_), + ShapeUtil::MakeShape(F32, {3, 2, 1, 1})); auto gamma = builder.ConstantR1({1.0, 1.0}); auto beta = builder.ConstantR1({0.0, 0.0}); Computation add = CreateScalarAddComputation(F32, &builder); // Reduce all dimensions except dimension 1. Shape TwoElementVectorF32 = ShapeUtil::MakeShape(F32, {2}); - auto sum = builder.CheckShape( + auto sum = CheckShape( + &builder, builder.Reduce(input_activations, builder.ConstantR0(0.0f), add, /*dimensions_to_reduce=*/{0, 2, 3}), TwoElementVectorF32); @@ -187,14 +199,16 @@ XLA_TEST_P(BatchNormalizationTest, SpecComparisonForward) { auto activation_deviations = builder.Sub(input_activations, set_means, /*broadcast_dimensions=*/{1}); auto dev_squares = builder.SquareF32(activation_deviations); - auto sum_of_squares = builder.CheckShape( + auto sum_of_squares = CheckShape( + &builder, builder.Reduce(dev_squares, builder.ConstantR0(0.0f), add, /*dimensions_to_reduce=*/{0, 2, 3}), TwoElementVectorF32); auto variance = builder.Div(sum_of_squares, count); auto standard_deviation = builder.SqrtF32(variance); - auto standard_deviation_above_epsilon = builder.CheckShape( - builder.Gt(standard_deviation, epsilon), ShapeUtil::MakeShape(PRED, {2})); + auto standard_deviation_above_epsilon = + CheckShape(&builder, builder.Gt(standard_deviation, epsilon), + ShapeUtil::MakeShape(PRED, {2})); auto gt_eps = builder.Select(standard_deviation_above_epsilon, standard_deviation, epsilon2); auto normalization_factors = builder.ReciprocalF32(gt_eps); -- GitLab From 9d0b5ba5cfdf0f1c35a228abac9c84c11bb7d695 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 27 Mar 2018 18:18:33 -0700 Subject: [PATCH 592/960] Pass options to TFE_ContextOptionsSetAsync PiperOrigin-RevId: 190707017 --- tensorflow/python/eager/context.py | 2 +- tensorflow/python/eager/core_test.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 6c9a14730c..8c1bb06bc3 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -232,7 +232,7 @@ class Context(object): pywrap_tensorflow.TFE_ContextOptionsSetDevicePlacementPolicy( opts, self._device_policy) if self._execution_mode == ASYNC: - pywrap_tensorflow.TFE_ContextOptionsSetAsync(True) + pywrap_tensorflow.TFE_ContextOptionsSetAsync(opts, True) self._context_handle = pywrap_tensorflow.TFE_NewContext(opts, status) finally: pywrap_tensorflow.TFE_DeleteContextOptions(opts) diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 6ebf5b2481..5f19f64846 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -97,6 +97,14 @@ class TFETest(test_util.TensorFlowTestCase): self.assertTrue(has_cpu_device) del ctx + def testAsyncBasic(self): + ctx = context.Context(execution_mode=context.ASYNC) + has_cpu_device = False + for x in ctx.devices(): + has_cpu_device = has_cpu_device or 'CPU' in x + self.assertTrue(has_cpu_device) + del ctx + def testRunMetadata(self): context.enable_run_metadata() t = constant_op.constant(1.0) -- GitLab From 9b5411a13c2d983e6709e6dff4f82b1779389ece Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Tue, 27 Mar 2018 18:31:55 -0700 Subject: [PATCH 593/960] Fix problem with HandleElementwiseUnary/Binary in DfsHloVisitorWithDefault. DfsHloVisitorWithDefault incorrectly included some overrides for handling several elementwise binary and unary opcodes. These overrides explicitly called DefaultAction which meant that these opcodes were not handled by HandleElementwiseUnary/Binary. This CL removes these overrides and adds a comment describing the potential problem. Unfortunately, I don't see a way of automatically catching these issues when new opcodes are added, so the comment will have to do. PiperOrigin-RevId: 190708245 --- tensorflow/compiler/xla/service/BUILD | 17 ++++ .../service/dfs_hlo_visitor_with_default.h | 15 ++-- .../dfs_hlo_visitor_with_default_test.cc | 90 +++++++++++++++++++ 3 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index f0bf68ae57..bde749d317 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -285,6 +285,23 @@ cc_library( ], ) +tf_cc_test( + name = "dfs_hlo_visitor_with_default_test", + srcs = ["dfs_hlo_visitor_with_default_test.cc"], + deps = [ + ":hlo", + ":hlo_runner", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) + cc_library( name = "hlo_reachability", srcs = ["hlo_reachability.cc"], diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index ecda5288ee..240faebe62 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -35,6 +35,12 @@ class HloInstruction; // DfsHloVisitor with default action based on the HloInstruction being visited. // Users should not use this class directly, but use the type aliases // DfsHloVisitorWithDefault/ConstDfsHloVisitorWithDefault instead. +// +// Do *not* add an override to this class if the opcode is covered by +// HandleElementwiseUnary/Binary. These opcode handlers dispatch to +// HandleElementwiseUnary/Binary in DfsHloVisitorBase. Adding such a handler +// here will break passes which rely on the HandleElementwiseUnary/Binary +// handling these opcodes. template class DfsHloVisitorWithDefaultBase : public DfsHloVisitorBase { @@ -70,12 +76,6 @@ class DfsHloVisitorWithDefaultBase Status HandleConcatenate(HloInstructionPtr concatenate) override { return DefaultAction(concatenate); } - Status HandleConvert(HloInstructionPtr convert) override { - return DefaultAction(convert); - } - Status HandleCopy(HloInstructionPtr copy) override { - return DefaultAction(copy); - } Status HandleSelect(HloInstructionPtr select) override { return DefaultAction(select); } @@ -91,9 +91,6 @@ class DfsHloVisitorWithDefaultBase Status HandleCrossReplicaSum(HloInstructionPtr crs) override { return DefaultAction(crs); } - Status HandleCompare(HloInstructionPtr compare) override { - return DefaultAction(compare); - } Status HandleRng(HloInstructionPtr random) override { return DefaultAction(random); } diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default_test.cc b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default_test.cc new file mode 100644 index 0000000000..825e1436f0 --- /dev/null +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default_test.cc @@ -0,0 +1,90 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_runner.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +class DfsHloVisitorWithDefaultTest : public HloTestBase {}; + +TEST_F(DfsHloVisitorWithDefaultTest, DefaultElementwiseTest) { + // Verify that HandleElementwiseBinary and HandleElementwiseUnary are called + // on the appropriate HLO ops (elementwise binary/unary ops). + + class ElementwiseTestVisitor : public DfsHloVisitorWithDefault { + public: + Status DefaultAction(HloInstruction* hlo) override { + // The HLO should be neither an elementwise unary nor binary op. These + // cases are handled in HandleElementwiseBinary/Unary. + TF_RET_CHECK(!(hlo->IsElementwise() && hlo->operand_count() == 2)) + << hlo->ToString(); + TF_RET_CHECK(!(hlo->IsElementwise() && hlo->operand_count() == 1)) + << hlo->ToString(); + return Status::OK(); + } + + Status HandleElementwiseBinary(HloInstruction* hlo) override { + // HLO should be elementwise binary. + TF_RET_CHECK(hlo->IsElementwise() && hlo->operand_count() == 2) + << hlo->ToString(); + return Status::OK(); + } + Status HandleElementwiseUnary(HloInstruction* hlo) override { + // HLO should be elementwise unary. + TF_RET_CHECK(hlo->IsElementwise() && hlo->operand_count() == 1) + << hlo->ToString(); + return Status::OK(); + } + }; + + // HLO module contains are arbitrary mix of elementwise and non-elementwise + // operations. + const string& hlo_string = R"( +HloModule TestModule + +ENTRY TestComputation { + arg = f32[] parameter(0) + tuple = (f32[]) tuple(arg) + gte = f32[] get-tuple-element(tuple), index=0 + abs = f32[] abs(arg) + add = f32[] add(arg, gte) + broadcast = f32[42] broadcast(add), dimensions={} + slice = f32[0] slice(broadcast), slice={[1:2]} + copy = f32[] copy(arg) + eq = pred[] equal-to(arg, gte) + neg = f32[] negate(arg) + ROOT convert = f64[] convert(f32[] arg) +})"; + std::unique_ptr module = + HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest()) + .ConsumeValueOrDie(); + ElementwiseTestVisitor visitor; + TF_EXPECT_OK(module->entry_computation()->Accept(&visitor)); +} + +} // namespace +} // namespace xla -- GitLab From f656b7f3e07fc3a6a51cb6083d27abebcc6212bb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Mar 2018 19:24:40 -0700 Subject: [PATCH 594/960] Fixed the interaction between virtual cluster and measuring cost estimator. PiperOrigin-RevId: 190712404 --- .../costs/measuring_cost_estimator.cc | 23 ++++++++++++++++--- tensorflow/core/grappler/costs/utils.cc | 4 ++-- tensorflow/python/grappler/cluster_test.py | 16 ++++++++----- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/grappler/costs/measuring_cost_estimator.cc b/tensorflow/core/grappler/costs/measuring_cost_estimator.cc index ea4320687a..833205ac6f 100644 --- a/tensorflow/core/grappler/costs/measuring_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/measuring_cost_estimator.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/core/framework/cost_graph.pb.h" +#include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/grappler/clusters/cluster.h" #include "tensorflow/core/grappler/costs/robust_stats.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -52,6 +53,8 @@ Status MeasuringCostEstimator::Initialize(const GrapplerItem& item) { Status MeasuringCostEstimator::PredictCosts(const GraphDef& optimized_graph, CostGraphDef* cost_graph, Costs* costs) const { + const bool running_simulation = (cluster_->type() == "virtual"); + std::vector times(measurement_steps_); BlockingCounter barrier(measurement_steps_); @@ -80,9 +83,23 @@ Status MeasuringCostEstimator::PredictCosts(const GraphDef& optimized_graph, } const Costs::MicroSeconds finish = Env::Default()->NowMicros(); - const double time = (finish - start).count() * 1e3; - times[step] = time; - + if (running_simulation) { + // When running simulation, return the estimated runtime, not the time it + // takes to run the simulation. + double time = 0.0; + for (const DeviceStepStats& stepstats : + metadata.step_stats().dev_stats()) { + for (const NodeExecStats& node_stats : stepstats.node_stats()) { + const double completion_time = + node_stats.all_end_rel_micros() + node_stats.all_start_micros(); + time = std::max(time, completion_time * 1e3); + } + } + times[step] = time; + } else { + const double time = (finish - start).count() * 1e3; + times[step] = time; + } if (cost_graph && (step + 1 == measurement_steps_)) { metadata.mutable_cost_graph()->Swap(cost_graph); } diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 076945d5c6..f318e3911c 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -212,8 +212,8 @@ DeviceProperties GetDeviceInfo(const string& device_str) { CudaGpuId cuda_gpu_id; Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); if (!s.ok()) { - LOG(ERROR) << s; - return unknown; + // We are probably running simulation without linking cuda libraries. + cuda_gpu_id = CudaGpuId(parsed.id); } return GetLocalGPUInfo(cuda_gpu_id); } else if (parsed.type == "CPU") { diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py index a3c4c2bbeb..26c6f22d34 100644 --- a/tensorflow/python/grappler/cluster_test.py +++ b/tensorflow/python/grappler/cluster_test.py @@ -87,9 +87,10 @@ class ClusterTest(test.TestCase): def testVirtualCluster(self): with ops.Graph().as_default() as g: - a = random_ops.random_uniform(shape=()) - b = random_ops.random_uniform(shape=()) - c = a + b + with ops.device('/device:GPU:0'): + a = random_ops.random_uniform(shape=[1024, 1024]) + b = random_ops.random_uniform(shape=[1024, 1024]) + c = a + b train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(c) mg = meta_graph.create_meta_graph_def(graph=g) @@ -102,10 +103,13 @@ class ClusterTest(test.TestCase): 'architecture': '7' }) named_device = device_properties_pb2.NamedDevice( - properties=device_properties, name='/GPU:0') - grappler_cluster = cluster.Cluster(devices=[named_device]) + properties=device_properties, name='/device:GPU:0') + grappler_cluster = cluster.Cluster( + disable_detailed_stats=False, + disable_timeline=False, + devices=[named_device]) op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item) - self.assertGreater(run_time, 0) + self.assertEqual(run_time, 0.000545) self.assertEqual(len(op_perfs), 15) estimated_perf = grappler_cluster.EstimatePerformance(named_device) -- GitLab From 756dc39c83136cc3518e20993be4382fe77f9013 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 19:42:55 -0700 Subject: [PATCH 595/960] Implement strip assert in DebugStripper. PiperOrigin-RevId: 190713919 --- tensorflow/core/grappler/optimizers/BUILD | 5 +- .../grappler/optimizers/debug_stripper.cc | 19 ++++- .../optimizers/debug_stripper_test.cc | 71 +++++++++++++++++-- .../grappler/optimizers/meta_optimizer.cc | 8 +-- 4 files changed, 93 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 601984fcfd..19ff788aba 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -630,7 +630,10 @@ cc_library( deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", "//tensorflow/core/grappler/optimizers:graph_optimizer", ], @@ -638,7 +641,6 @@ cc_library( tf_cuda_cc_test( name = "debug_stripper_test", - size = "small", srcs = ["debug_stripper_test.cc"], deps = [ ":debug_stripper", @@ -646,6 +648,7 @@ tf_cuda_cc_test( "//tensorflow/core:tensorflow", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler/utils:grappler_test", ], diff --git a/tensorflow/core/grappler/optimizers/debug_stripper.cc b/tensorflow/core/grappler/optimizers/debug_stripper.cc index 461f1aa2fb..0e058e3435 100644 --- a/tensorflow/core/grappler/optimizers/debug_stripper.cc +++ b/tensorflow/core/grappler/optimizers/debug_stripper.cc @@ -14,16 +14,33 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/optimizers/debug_stripper.h" + +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/grappler/clusters/cluster.h" #include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" namespace tensorflow { namespace grappler { Status DebugStripper::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { - // TODO(haoliang): Let's remove assertions here. *output = item.graph; + for (NodeDef& node : *output->mutable_node()) { + if (IsAssert(node)) { + // Convert this node into a no-op. + node.set_op("NoOp"); + node.clear_attr(); + // Convert all its inputs into control dependency, which will then + // be optimized away by dependency optimizer. + for (string& inp : *node.mutable_input()) { + if (!IsControlInput(inp)) { + inp = AsControlDependency(inp); + } + } + } + } return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/debug_stripper_test.cc b/tensorflow/core/grappler/optimizers/debug_stripper_test.cc index d2cabc0798..aacd55f136 100644 --- a/tensorflow/core/grappler/optimizers/debug_stripper_test.cc +++ b/tensorflow/core/grappler/optimizers/debug_stripper_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/debug_stripper.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -27,16 +28,78 @@ namespace { class DebugStripperTest : public GrapplerTest {}; -// TODO(haoliang): Add tests for different removal operations. TEST_F(DebugStripperTest, OutputEqualToInput) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto c = ops::Const(s.WithOpName("c"), 0, {}); + constexpr char device[] = "/device:CPU:0"; GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "XTimesTwo", {"x"}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + {}); DebugStripper optimizer; GraphDef output; TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); + CompareGraphs(item.graph, output); +} + +TEST_F(DebugStripperTest, StripAssertFromGraph) { + constexpr char device[] = "/device:CPU:0"; + GrapplerItem item; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("GreaterEqual", "GreaterEqual", {"x", "y"}, + {{"T", DT_FLOAT}}, device), + test::function::NDef("Assert", "Assert", {"GreaterEqual"}, + {{"T", DT_FLOAT}}, device), + test::function::NDef("z", "Add", {"x", "y", "^Assert"}, {}, device)}, + {}); + + DebugStripper optimizer; + GraphDef output; + TF_EXPECT_OK(optimizer.Optimize(nullptr, item, &output)); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "x") { + count++; + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "GreaterEqual") { + count++; + EXPECT_EQ("GreaterEqual", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + } else if (node.name() == "Assert") { + count++; + EXPECT_EQ("NoOp", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^GreaterEqual", node.input(0)); + EXPECT_EQ(0, node.attr_size()); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^Assert", node.input(2)); + } + } + EXPECT_EQ(5, count); } } // namespace diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 47ec16226b..ad655db727 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -102,6 +102,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new FunctionOptimizer(cfg_.function_optimization()))); } + if (cfg_.debug_stripper() == RewriterConfig::ON) { + optimizers.push_back( + std::unique_ptr(new DebugStripper())); + } if (cfg_.constant_folding() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( new ConstantFolding(cfg_.constant_folding(), cpu_device_))); @@ -138,10 +142,6 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new AutoParallel(cfg_.auto_parallel().num_replicas()))); } - if (cfg_.debug_stripper() == RewriterConfig::ON) { - optimizers.push_back( - std::unique_ptr(new DebugStripper())); - } } else { const std::set available_optimizers = { "pruning", "function", "constfold", "layout", -- GitLab From 078793e131ecec0eae3e7a0549eb13a321993dfb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 19:58:53 -0700 Subject: [PATCH 596/960] Fix _force_data_dependency for scalar inputs PiperOrigin-RevId: 190715033 --- tensorflow/contrib/layers/python/layers/rev_block_lib.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index 0b38c0c3fd..e49589ddf6 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -33,6 +33,7 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.framework.python import ops as contrib_framework_ops +from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops as framework_ops from tensorflow.python.layers import base @@ -660,7 +661,9 @@ def _force_data_dependency(first_compute, then_compute): if x.get_shape().ndims is None: raise ValueError("Rank of Tensor %s must be known" % x) ndims = x.get_shape().ndims - return array_ops.reshape(array_ops.slice(x, [0] * ndims, [1] * ndims), []) + begin = framework_ops.convert_to_tensor([0] * ndims, dtype=dtypes.int32) + size = framework_ops.convert_to_tensor([1] * ndims, dtype=dtypes.int32) + return array_ops.reshape(array_ops.slice(x, begin, size), []) first_compute_sum = math_ops.add_n( [_first_element(x) for x in first_compute if x is not None]) -- GitLab From 9bc9b63bf8319eb1224d0d3331668f4e0913af54 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 21:03:41 -0700 Subject: [PATCH 597/960] Fix non-uniformity of orthogonal matrices. Add test code for this purpose. PiperOrigin-RevId: 190719729 --- .../python/kernel_tests/init_ops_test.py | 24 +++++++++++++++++++ tensorflow/python/ops/init_ops.py | 9 ++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index c1755985ee..36a86a25cc 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -649,6 +649,30 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): sess.run(outputs_2norm)/(np.sqrt(np.prod(shape))*np.sqrt(3.14)), rtol=tol, atol=tol) + def testNonuniformity(self): + value = 0 + abs_value = 0 + shape = [3, 3, 10, 10] + count = 70 + tol = 1e-5 + with self.test_session(use_gpu=True): # as sess: + for i in range(count): + x = variable_scope.get_variable("{}".format(i), shape=shape, + initializer= + init_ops.convolutional_delta_orthogonal) + x.initializer.run() + y = x.eval()[1, 1, :, :] + determinant = np.linalg.det(y) + value += determinant + abs_value += np.abs(determinant) + + # Check there is some variation in the signs of the determinants + self.assertLess(value, count - tol) + self.assertLess(-count + tol, value) + # Check all determinants have absolute value 1 + # Compute the sum of the absolute values of 'count' determinants + self.assertAllClose(abs_value, count, rtol=tol, atol=tol) + class IdentityInitializerTest(test.TestCase): diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 40ab22951b..9dfe5ffbf4 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -532,8 +532,7 @@ class Orthogonal(Initializer): q, r = linalg_ops.qr(a, full_matrices=False) # Make Q uniform d = array_ops.diag_part(r) - ph = d / math_ops.abs(d) - q *= ph + q *= math_ops.sign(d) if num_rows < num_cols: q = array_ops.matrix_transpose(q) return self.gain * array_ops.reshape(q, shape) @@ -579,7 +578,11 @@ class ConvolutionDeltaOrthogonal(Initializer): a = random_ops.random_normal([shape[-1], shape[-1]], dtype=dtype, seed=self.seed) # Compute the qr factorization - q, _ = linalg_ops.qr(a, full_matrices=False) + q, r = linalg_ops.qr(a, full_matrices=False) + # Make Q uniform + d = array_ops.diag_part(r) + # ph = d / math_ops.abs(d) + q *= math_ops.sign(d) q = q[:shape[-2], :] q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) if len(shape) == 3: -- GitLab From 0a451b1aa0baaa3f7abbf8d90dfe58193cf1533e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 21:22:54 -0700 Subject: [PATCH 598/960] Speed up statistical_testing_test by consolidating sess.run calls. PiperOrigin-RevId: 190721153 --- tensorflow/contrib/distributions/BUILD | 1 + .../kernel_tests/statistical_testing_test.py | 82 +++++++++++++------ 2 files changed, 60 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 1c381cc354..682448b84b 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -486,6 +486,7 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], + shard_count = 4, tags = [ "manual", "noasan", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index 3548ac1807..c0e7bdd259 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -22,39 +22,75 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import statistical_testing as st from tensorflow.python.framework import errors -from tensorflow.python.ops import check_ops from tensorflow.python.platform import test class StatisticalTestingTest(test.TestCase): def test_dkwm_design_mean_one_sample_soundness(self): - numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] - with self.test_session() as sess: - for ff in rates: - for fp in rates: - sufficient_n = st.min_num_samples_for_dkwm_mean_test( - numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( - sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) - sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + false_fail_rates, false_pass_rates = np.meshgrid(rates, rates) + false_fail_rates = false_fail_rates.flatten().astype(np.float32) + false_pass_rates = false_pass_rates.flatten().astype(np.float32) + + detectable_discrepancies = [] + for false_pass_rate, false_fail_rate in zip( + false_pass_rates, false_fail_rates): + sufficient_n = st.min_num_samples_for_dkwm_mean_test( + thresholds, low=0., high=1., false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate) + detectable_discrepancies.append( + st.min_discrepancy_of_true_means_detectable_by_dkwm( + sufficient_n, low=0., high=1., false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate)) + + detectable_discrepancies_ = self.evaluate(detectable_discrepancies) + for discrepancies, false_pass_rate, false_fail_rate in zip( + detectable_discrepancies_, false_pass_rates, false_fail_rates): + below_threshold = discrepancies <= thresholds + self.assertAllEqual( + np.ones_like(below_threshold, np.bool), below_threshold, + msg='false_pass_rate({}), false_fail_rate({})'.format( + false_pass_rate, false_fail_rate)) def test_dkwm_design_mean_two_sample_soundness(self): - numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] - with self.test_session() as sess: - for ff in rates: - for fp in rates: - (sufficient_n1, - sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( - numbers, 0., 1., 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample - detectable_d = d_fn( - sufficient_n1, 0., 1., sufficient_n2, 0., 1., - false_fail_rate=ff, false_pass_rate=fp) - sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + false_fail_rates, false_pass_rates = np.meshgrid(rates, rates) + false_fail_rates = false_fail_rates.flatten().astype(np.float32) + false_pass_rates = false_pass_rates.flatten().astype(np.float32) + + detectable_discrepancies = [] + for false_pass_rate, false_fail_rate in zip( + false_pass_rates, false_fail_rates): + [ + sufficient_n1, + sufficient_n2 + ] = st.min_num_samples_for_dkwm_mean_two_sample_test( + thresholds, low1=0., high1=1., low2=0., high2=1., + false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate) + + detectable_discrepancies.append( + st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + n1=sufficient_n1, + low1=0., + high1=1., + n2=sufficient_n2, + low2=0., + high2=1., + false_fail_rate=false_fail_rate, + false_pass_rate=false_pass_rate)) + + detectable_discrepancies_ = self.evaluate(detectable_discrepancies) + for discrepancies, false_pass_rate, false_fail_rate in zip( + detectable_discrepancies_, false_pass_rates, false_fail_rates): + below_threshold = discrepancies <= thresholds + self.assertAllEqual( + np.ones_like(below_threshold, np.bool), below_threshold, + msg='false_pass_rate({}), false_fail_rate({})'.format( + false_pass_rate, false_fail_rate)) def test_true_mean_confidence_interval_by_dkwm_one_sample(self): rng = np.random.RandomState(seed=0) -- GitLab From 6861e22c4cd68f57a408b2b51af834990b7a881a Mon Sep 17 00:00:00 2001 From: Dustin Tran Date: Tue, 27 Mar 2018 23:15:08 -0700 Subject: [PATCH 599/960] Have TensorFlow Distributions share name scopes across method calls. PiperOrigin-RevId: 190728742 --- .../python/kernel_tests/distribution_test.py | 16 ++++++++++++++++ .../distributions/python/ops/autoregressive.py | 2 +- .../contrib/distributions/python/ops/binomial.py | 2 +- .../contrib/distributions/python/ops/cauchy.py | 2 +- .../contrib/distributions/python/ops/chi2.py | 4 ++-- .../distributions/python/ops/deterministic.py | 2 +- .../distributions/python/ops/geometric.py | 2 +- .../contrib/distributions/python/ops/gumbel.py | 2 +- .../distributions/python/ops/half_normal.py | 2 +- .../distributions/python/ops/independent.py | 2 +- .../distributions/python/ops/inverse_gamma.py | 4 ++-- .../distributions/python/ops/kumaraswamy.py | 9 +++++---- .../contrib/distributions/python/ops/logistic.py | 2 +- .../contrib/distributions/python/ops/mixture.py | 2 +- .../python/ops/mixture_same_family.py | 2 +- .../contrib/distributions/python/ops/mvn_diag.py | 4 ++-- .../python/ops/mvn_diag_plus_low_rank.py | 2 +- .../python/ops/mvn_full_covariance.py | 2 +- .../python/ops/mvn_linear_operator.py | 2 +- .../contrib/distributions/python/ops/mvn_tril.py | 2 +- .../python/ops/negative_binomial.py | 2 +- .../python/ops/onehot_categorical.py | 2 +- .../contrib/distributions/python/ops/poisson.py | 2 +- .../python/ops/poisson_lognormal.py | 2 +- .../python/ops/quantized_distribution.py | 2 +- .../python/ops/relaxed_bernoulli.py | 2 +- .../python/ops/relaxed_onehot_categorical.py | 2 +- .../distributions/python/ops/sinh_arcsinh.py | 3 ++- .../python/ops/vector_diffeomixture.py | 2 +- .../python/ops/vector_exponential_diag.py | 2 +- .../ops/vector_exponential_linear_operator.py | 2 +- .../python/ops/vector_sinh_arcsinh_diag.py | 2 +- .../distributions/python/ops/vector_student_t.py | 2 +- .../contrib/distributions/python/ops/wishart.py | 10 +++++----- tensorflow/python/ops/distributions/bernoulli.py | 2 +- tensorflow/python/ops/distributions/beta.py | 6 +++--- .../python/ops/distributions/categorical.py | 2 +- tensorflow/python/ops/distributions/dirichlet.py | 2 +- .../ops/distributions/dirichlet_multinomial.py | 2 +- .../python/ops/distributions/distribution.py | 10 ++++++++-- .../python/ops/distributions/exponential.py | 4 ++-- tensorflow/python/ops/distributions/gamma.py | 4 ++-- tensorflow/python/ops/distributions/laplace.py | 4 ++-- .../python/ops/distributions/multinomial.py | 2 +- tensorflow/python/ops/distributions/normal.py | 4 ++-- tensorflow/python/ops/distributions/student_t.py | 4 ++-- .../distributions/transformed_distribution.py | 2 +- tensorflow/python/ops/distributions/uniform.py | 2 +- 48 files changed, 89 insertions(+), 65 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py index 68e0d9cb82..750fe00d63 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py @@ -190,6 +190,22 @@ class DistributionTest(test.TestCase): y = dist._set_sample_static_shape(x, sample_shape) self.assertTrue(y.get_shape().ndims is None) + def testNameScopeWorksCorrectly(self): + x = tfd.Normal(loc=0., scale=1., name="x") + x_duplicate = tfd.Normal(loc=0., scale=1., name="x") + with ops.name_scope("y") as name: + y = tfd.Bernoulli(logits=0., name=name) + x_sample = x.sample(name="custom_sample") + x_sample_duplicate = x.sample(name="custom_sample") + x_log_prob = x.log_prob(0., name="custom_log_prob") + + self.assertEqual(x.name, "x") + self.assertEqual(x_duplicate.name, "x_1") + self.assertEqual(y.name, "y") + self.assertTrue(x_sample.name.startswith("x/custom_sample")) + self.assertTrue(x_sample_duplicate.name.startswith("x/custom_sample_1")) + self.assertTrue(x_log_prob.name.startswith("x/custom_log_prob")) + def testStrWorksCorrectlyScalar(self): normal = tfd.Normal(loc=np.float16(0), scale=np.float16(1)) self.assertEqual( diff --git a/tensorflow/contrib/distributions/python/ops/autoregressive.py b/tensorflow/contrib/distributions/python/ops/autoregressive.py index 69f3d57ff0..88ed012784 100644 --- a/tensorflow/contrib/distributions/python/ops/autoregressive.py +++ b/tensorflow/contrib/distributions/python/ops/autoregressive.py @@ -145,7 +145,7 @@ class Autoregressive(distribution_lib.Distribution): ValueError: if `num_steps < 1`. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: self._distribution_fn = distribution_fn self._sample0 = sample0 self._distribution0 = (distribution_fn() if sample0 is None diff --git a/tensorflow/contrib/distributions/python/ops/binomial.py b/tensorflow/contrib/distributions/python/ops/binomial.py index 6a1bb39ab2..12d1603178 100644 --- a/tensorflow/contrib/distributions/python/ops/binomial.py +++ b/tensorflow/contrib/distributions/python/ops/binomial.py @@ -164,7 +164,7 @@ class Binomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[total_count, logits, probs]): + with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._total_count = self._maybe_assert_valid_total_count( ops.convert_to_tensor(total_count, name="total_count"), validate_args) diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index 6f5d724a2a..daacfe657f 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -121,7 +121,7 @@ class Cauchy(distribution.Distribution): TypeError: if `loc` and `scale` have different `dtype`. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/chi2.py b/tensorflow/contrib/distributions/python/ops/chi2.py index e610f469e5..c77c5fd208 100644 --- a/tensorflow/contrib/distributions/python/ops/chi2.py +++ b/tensorflow/contrib/distributions/python/ops/chi2.py @@ -88,7 +88,7 @@ class Chi2(gamma.Gamma): # not true in the parent class "gamma." therefore, passing # allow_nan_stats=True # through to the parent class results in unnecessary asserts. - with ops.name_scope(name, values=[df]): + with ops.name_scope(name, values=[df]) as name: with ops.control_dependencies([ check_ops.assert_positive(df), ] if validate_args else []): @@ -120,7 +120,7 @@ class Chi2WithAbsDf(Chi2): allow_nan_stats=True, name="Chi2WithAbsDf"): parameters = locals() - with ops.name_scope(name, values=[df]): + with ops.name_scope(name, values=[df]) as name: super(Chi2WithAbsDf, self).__init__( df=math_ops.floor( math_ops.abs(df, name="abs_df"), diff --git a/tensorflow/contrib/distributions/python/ops/deterministic.py b/tensorflow/contrib/distributions/python/ops/deterministic.py index 8049522e9f..a42350430e 100644 --- a/tensorflow/contrib/distributions/python/ops/deterministic.py +++ b/tensorflow/contrib/distributions/python/ops/deterministic.py @@ -87,7 +87,7 @@ class _BaseDeterministic(distribution.Distribution): ValueError: If `loc` is a scalar. """ parameters = locals() - with ops.name_scope(name, values=[loc, atol, rtol]): + with ops.name_scope(name, values=[loc, atol, rtol]) as name: loc = ops.convert_to_tensor(loc, name="loc") if is_vector and validate_args: msg = "Argument loc must be at least rank 1." diff --git a/tensorflow/contrib/distributions/python/ops/geometric.py b/tensorflow/contrib/distributions/python/ops/geometric.py index 8f190e48a7..53dd42f4c8 100644 --- a/tensorflow/contrib/distributions/python/ops/geometric.py +++ b/tensorflow/contrib/distributions/python/ops/geometric.py @@ -86,7 +86,7 @@ class Geometric(distribution.Distribution): """ parameters = locals() - with ops.name_scope(name, values=[logits, probs]): + with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name) diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py index 8d05ad6b80..2c261073ee 100644 --- a/tensorflow/contrib/distributions/python/ops/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/gumbel.py @@ -125,7 +125,7 @@ class _Gumbel(distribution.Distribution): TypeError: if loc and scale are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/half_normal.py b/tensorflow/contrib/distributions/python/ops/half_normal.py index fc0751a6e0..d0df2befd6 100644 --- a/tensorflow/contrib/distributions/python/ops/half_normal.py +++ b/tensorflow/contrib/distributions/python/ops/half_normal.py @@ -106,7 +106,7 @@ class HalfNormal(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[scale]): + with ops.name_scope(name, values=[scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._scale = array_ops.identity(scale, name="scale") diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py index 7dcb3e3ac4..d7d0b0c78b 100644 --- a/tensorflow/contrib/distributions/python/ops/independent.py +++ b/tensorflow/contrib/distributions/python/ops/independent.py @@ -119,7 +119,7 @@ class Independent(distribution_lib.Distribution): parameters = locals() name = name or "Independent" + distribution.name self._distribution = distribution - with ops.name_scope(name): + with ops.name_scope(name) as name: if reinterpreted_batch_ndims is None: reinterpreted_batch_ndims = self._get_default_reinterpreted_batch_ndims( distribution) diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py index 51ac61dcf6..502bd4f493 100644 --- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py +++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py @@ -126,7 +126,7 @@ class InverseGamma(distribution.Distribution): TypeError: if `concentration` and `rate` are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: with ops.control_dependencies([ check_ops.assert_positive(concentration), check_ops.assert_positive(rate), @@ -281,7 +281,7 @@ class InverseGammaWithSoftplusConcentrationRate(InverseGamma): allow_nan_stats=True, name="InverseGammaWithSoftplusConcentrationRate"): parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: super(InverseGammaWithSoftplusConcentrationRate, self).__init__( concentration=nn.softplus(concentration, name="softplus_concentration"), diff --git a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py index 192dede6ff..66682b2ff5 100644 --- a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py +++ b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py @@ -151,10 +151,11 @@ class Kumaraswamy(transformed_distribution.TransformedDistribution): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ - concentration1 = ops.convert_to_tensor( - concentration1, name="concentration1") - concentration0 = ops.convert_to_tensor( - concentration0, name="concentration0") + with ops.name_scope(name, values=[concentration1, concentration0]) as name: + concentration1 = ops.convert_to_tensor( + concentration1, name="concentration1") + concentration0 = ops.convert_to_tensor( + concentration0, name="concentration0") super(Kumaraswamy, self).__init__( distribution=uniform.Uniform( low=array_ops.zeros([], dtype=concentration1.dtype), diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py index 68e6bca5a5..c83b5bc2e3 100644 --- a/tensorflow/contrib/distributions/python/ops/logistic.py +++ b/tensorflow/contrib/distributions/python/ops/logistic.py @@ -120,7 +120,7 @@ class Logistic(distribution.Distribution): TypeError: if loc and scale are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py index cef6a143fc..2ef294af2e 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture.py +++ b/tensorflow/contrib/distributions/python/ops/mixture.py @@ -145,7 +145,7 @@ class Mixture(distribution.Distribution): "none of the components provide a static number of ndims") # Ensure that all batch and event ndims are consistent. - with ops.name_scope(name, values=[cat.logits]): + with ops.name_scope(name, values=[cat.logits]) as name: num_components = cat.event_size static_num_components = tensor_util.constant_value(num_components) if static_num_components is None: diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py index b93bdc5ab4..0b1301e551 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py +++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py @@ -131,7 +131,7 @@ class MixtureSameFamily(distribution.Distribution): `components_distribution` rightmost batch shape. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: self._mixture_distribution = mixture_distribution self._components_distribution = components_distribution self._runtime_assertions = [] diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag.py b/tensorflow/contrib/distributions/python/ops/mvn_diag.py index e862552880..e3236c2db9 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag.py @@ -194,7 +194,7 @@ class MultivariateNormalDiag( ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): # No need to validate_args while making diag_scale. The returned @@ -225,7 +225,7 @@ class MultivariateNormalDiagWithSoftplusScale(MultivariateNormalDiag): allow_nan_stats=True, name="MultivariateNormalDiagWithSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[scale_diag]): + with ops.name_scope(name, values=[scale_diag]) as name: super(MultivariateNormalDiagWithSoftplusScale, self).__init__( loc=loc, scale_diag=nn.softplus(scale_diag), diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py index 413e88f03a..2f6a6f198c 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py @@ -218,7 +218,7 @@ class MultivariateNormalDiagPlusLowRank( parameters = locals() def _convert_to_tensor(x, name): return None if x is None else ops.convert_to_tensor(x, name=name) - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier, scale_perturb_factor, scale_perturb_diag]): diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py index 4bea99fbb7..86fcd4db54 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py @@ -159,7 +159,7 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL): parameters = locals() # Convert the covariance_matrix up to a scale_tril and call MVNTriL. - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[loc, covariance_matrix]): if covariance_matrix is None: scale_tril = None diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py index a739979289..44c92312c7 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py @@ -176,7 +176,7 @@ class MultivariateNormalLinearOperator( if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") - with ops.name_scope(name, values=[loc] + scale.graph_parents): + with ops.name_scope(name, values=[loc] + scale.graph_parents) as name: # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py index 6c7dc4ca7a..d6f8b731cb 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py @@ -184,7 +184,7 @@ class MultivariateNormalTriL( return None if x is None else ops.convert_to_tensor(x, name=name) if loc is None and scale_tril is None: raise ValueError("Must specify one or both of `loc`, `scale_tril`.") - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[loc, scale_tril]): loc = _convert_to_tensor(loc, name="loc") scale_tril = _convert_to_tensor(scale_tril, name="scale_tril") diff --git a/tensorflow/contrib/distributions/python/ops/negative_binomial.py b/tensorflow/contrib/distributions/python/ops/negative_binomial.py index 3a58df80da..eeaf9c0a5e 100644 --- a/tensorflow/contrib/distributions/python/ops/negative_binomial.py +++ b/tensorflow/contrib/distributions/python/ops/negative_binomial.py @@ -91,7 +91,7 @@ class NegativeBinomial(distribution.Distribution): """ parameters = locals() - with ops.name_scope(name, values=[total_count, logits, probs]): + with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits, probs, validate_args=validate_args, name=name) with ops.control_dependencies( diff --git a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py index 46c2cc8b7a..fd5e8b10b0 100644 --- a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py @@ -116,7 +116,7 @@ class OneHotCategorical(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs]): + with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( name=name, logits=logits, probs=probs, validate_args=validate_args, multidimensional=True) diff --git a/tensorflow/contrib/distributions/python/ops/poisson.py b/tensorflow/contrib/distributions/python/ops/poisson.py index 02e97c0a2f..a84aad6fc9 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson.py +++ b/tensorflow/contrib/distributions/python/ops/poisson.py @@ -94,7 +94,7 @@ class Poisson(distribution.Distribution): TypeError: if `log_rate` is not a float-type. """ parameters = locals() - with ops.name_scope(name, values=[rate]): + with ops.name_scope(name, values=[rate]) as name: if (rate is None) == (log_rate is None): raise ValueError("Must specify exactly one of `rate` and `log_rate`.") elif log_rate is None: diff --git a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py index 92f2bba182..ea5514d345 100644 --- a/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py +++ b/tensorflow/contrib/distributions/python/ops/poisson_lognormal.py @@ -256,7 +256,7 @@ class PoissonLogNormalQuadratureCompound(distribution_lib.Distribution): `dtype`. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: if loc is not None: loc = ops.convert_to_tensor(loc, name="loc") if scale is not None: diff --git a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py index 8aebb79b91..1ef7651d03 100644 --- a/tensorflow/contrib/distributions/python/ops/quantized_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/quantized_distribution.py @@ -217,7 +217,7 @@ class QuantizedDistribution(distributions.Distribution): values = ( list(distribution.parameters.values()) + [low, high]) - with ops.name_scope(name, values=values): + with ops.name_scope(name, values=values) as name: self._dist = distribution if low is not None: diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py index b525809015..00c1ede5c1 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_bernoulli.py @@ -166,7 +166,7 @@ class RelaxedBernoulli(transformed_distribution.TransformedDistribution): ValueError: If both `probs` and `logits` are passed, or if neither. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs, temperature]): + with ops.name_scope(name, values=[logits, probs, temperature]) as name: with ops.control_dependencies([check_ops.assert_positive(temperature)] if validate_args else []): self._temperature = array_ops.identity(temperature, name="temperature") diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index ff33f327c7..b60a4872aa 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -163,7 +163,7 @@ class ExpRelaxedOneHotCategorical(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs, temperature]): + with ops.name_scope(name, values=[logits, probs, temperature]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( name=name, logits=logits, probs=probs, validate_args=validate_args, diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py index 0d8a192691..7a8f5bbfbe 100644 --- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py @@ -134,7 +134,8 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): """ parameters = locals() - with ops.name_scope(name, values=[loc, scale, skewness, tailweight]): + with ops.name_scope(name, + values=[loc, scale, skewness, tailweight]) as name: loc = ops.convert_to_tensor(loc, name="loc") dtype = loc.dtype scale = ops.convert_to_tensor(scale, name="scale", dtype=dtype) diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index 971d65c4a6..026de97be7 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -396,7 +396,7 @@ class VectorDiffeomixture(distribution_lib.Distribution): ValueError: if `not distribution.is_scalar_event`. """ parameters = locals() - with ops.name_scope(name, values=[mix_loc, temperature]): + with ops.name_scope(name, values=[mix_loc, temperature]) as name: if not scale or len(scale) < 2: raise ValueError("Must specify list (or list-like object) of scale " "LinearOperators, one for each component with " diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py index 526fe2d39a..e265b5d0f7 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py @@ -176,7 +176,7 @@ class VectorExponentialDiag( ValueError: if at most `scale_identity_multiplier` is specified. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=[ loc, scale_diag, scale_identity_multiplier]): # No need to validate_args while making diag_scale. The returned diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py index 9d5fd9ac41..89136d6760 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py @@ -181,7 +181,7 @@ class VectorExponentialLinearOperator( if not scale.dtype.is_floating: raise TypeError("`scale` parameter must have floating-point dtype.") - with ops.name_scope(name, values=[loc] + scale.graph_parents): + with ops.name_scope(name, values=[loc] + scale.graph_parents) as name: # Since expand_dims doesn't preserve constant-ness, we obtain the # non-dynamic value if possible. loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py index 003c66b941..b8ac7c10ca 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py @@ -169,7 +169,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): name, values=[ loc, scale_diag, scale_identity_multiplier, skewness, tailweight - ]): + ]) as name: loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight has_default_skewness = skewness is None diff --git a/tensorflow/contrib/distributions/python/ops/vector_student_t.py b/tensorflow/contrib/distributions/python/ops/vector_student_t.py index 8c67647a61..23a01cc95c 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py +++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py @@ -178,7 +178,7 @@ class _VectorStudentT(transformed_distribution.TransformedDistribution): parameters = locals() graph_parents = [df, loc, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_factor, scale_perturb_diag] - with ops.name_scope(name): + with ops.name_scope(name) as name: with ops.name_scope("init", values=graph_parents): # The shape of the _VectorStudentT distribution is governed by the # relationship between df.batch_shape and affine.batch_shape. In diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index 5a8c94dabf..91453fed5d 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -109,7 +109,7 @@ class _WishartLinearOperator(distribution.Distribution): """ parameters = locals() self._cholesky_input_output_matrices = cholesky_input_output_matrices - with ops.name_scope(name) as ns: + with ops.name_scope(name) as name: with ops.name_scope("init", values=[df, scale_operator]): if not scale_operator.dtype.is_floating: raise TypeError( @@ -163,7 +163,7 @@ class _WishartLinearOperator(distribution.Distribution): parameters=parameters, graph_parents=([self._df, self._dimension] + self._scale_operator.graph_parents), - name=ns) + name=name) @property def df(self): @@ -531,7 +531,7 @@ class WishartCholesky(_WishartLinearOperator): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[scale]): + with ops.name_scope(name, values=[scale]) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) if validate_args: @@ -647,7 +647,7 @@ class WishartFull(_WishartLinearOperator): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name) as ns: + with ops.name_scope(name) as name: with ops.name_scope("init", values=[scale]): scale = ops.convert_to_tensor(scale) if validate_args: @@ -666,5 +666,5 @@ class WishartFull(_WishartLinearOperator): cholesky_input_output_matrices=cholesky_input_output_matrices, validate_args=validate_args, allow_nan_stats=allow_nan_stats, - name=ns) + name=name) self._parameters = parameters diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py index 68aaf3815e..2c9f0e9a32 100644 --- a/tensorflow/python/ops/distributions/bernoulli.py +++ b/tensorflow/python/ops/distributions/bernoulli.py @@ -72,7 +72,7 @@ class Bernoulli(distribution.Distribution): ValueError: If p and logits are passed, or if neither are passed. """ parameters = locals() - with ops.name_scope(name): + with ops.name_scope(name) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, diff --git a/tensorflow/python/ops/distributions/beta.py b/tensorflow/python/ops/distributions/beta.py index 469bcadb8e..8beab99bf8 100644 --- a/tensorflow/python/ops/distributions/beta.py +++ b/tensorflow/python/ops/distributions/beta.py @@ -151,7 +151,7 @@ class Beta(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[concentration1, concentration0]): + with ops.name_scope(name, values=[concentration1, concentration0]) as name: self._concentration1 = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration1, name="concentration1"), validate_args) @@ -323,7 +323,7 @@ class BetaWithSoftplusConcentration(Beta): name="BetaWithSoftplusConcentration"): parameters = locals() with ops.name_scope(name, values=[concentration1, - concentration0]) as ns: + concentration0]) as name: super(BetaWithSoftplusConcentration, self).__init__( concentration1=nn.softplus(concentration1, name="softplus_concentration1"), @@ -331,7 +331,7 @@ class BetaWithSoftplusConcentration(Beta): name="softplus_concentration0"), validate_args=validate_args, allow_nan_stats=allow_nan_stats, - name=ns) + name=name) self._parameters = parameters diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py index 9161e3fa9f..66fa9e110c 100644 --- a/tensorflow/python/ops/distributions/categorical.py +++ b/tensorflow/python/ops/distributions/categorical.py @@ -183,7 +183,7 @@ class Categorical(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[logits, probs]): + with ops.name_scope(name, values=[logits, probs]) as name: self._logits, self._probs = distribution_util.get_logits_and_probs( logits=logits, probs=probs, diff --git a/tensorflow/python/ops/distributions/dirichlet.py b/tensorflow/python/ops/distributions/dirichlet.py index 25afeec936..eafcd5c78f 100644 --- a/tensorflow/python/ops/distributions/dirichlet.py +++ b/tensorflow/python/ops/distributions/dirichlet.py @@ -155,7 +155,7 @@ class Dirichlet(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[concentration]): + with ops.name_scope(name, values=[concentration]) as name: self._concentration = self._maybe_assert_valid_concentration( ops.convert_to_tensor(concentration, name="concentration"), validate_args) diff --git a/tensorflow/python/ops/distributions/dirichlet_multinomial.py b/tensorflow/python/ops/distributions/dirichlet_multinomial.py index 03a98c56ba..fe0ed7e07d 100644 --- a/tensorflow/python/ops/distributions/dirichlet_multinomial.py +++ b/tensorflow/python/ops/distributions/dirichlet_multinomial.py @@ -192,7 +192,7 @@ class DirichletMultinomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[total_count, concentration]): + with ops.name_scope(name, values=[total_count, concentration]) as name: # Broadcasting works because: # * The broadcasting convention is to prepend dimensions of size [1], and # we use the last dimension for the distribution, whereas diff --git a/tensorflow/python/ops/distributions/distribution.py b/tensorflow/python/ops/distributions/distribution.py index 7c43bf54fc..b6e6a01fc8 100644 --- a/tensorflow/python/ops/distributions/distribution.py +++ b/tensorflow/python/ops/distributions/distribution.py @@ -434,13 +434,19 @@ class Distribution(_BaseDistribution): for i, t in enumerate(graph_parents): if t is None or not tensor_util.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) + if name is None: + with ops.name_scope(type(self).__name__) as name: + pass + # If a name ends with a '/' it is a "name scope" and we use it as-is, after + # removing the trailing '/'. + name = name[:-1] if (name and name[-1] == "/") else name self._dtype = dtype self._reparameterization_type = reparameterization_type self._allow_nan_stats = allow_nan_stats self._validate_args = validate_args self._parameters = parameters or {} self._graph_parents = graph_parents - self._name = name or type(self).__name__ + self._name = name @classmethod def param_shapes(cls, sample_shape, name="DistributionParamShapes"): @@ -1157,7 +1163,7 @@ class Distribution(_BaseDistribution): @contextlib.contextmanager def _name_scope(self, name=None, values=None): """Helper function to standardize op scope.""" - with ops.name_scope(self.name): + with ops.name_scope(self.name + "/"): # use absolute name scope with ops.name_scope(name, values=( ([] if values is None else values) + self._graph_parents)) as scope: yield scope diff --git a/tensorflow/python/ops/distributions/exponential.py b/tensorflow/python/ops/distributions/exponential.py index 6345a76d48..cf0e729e1a 100644 --- a/tensorflow/python/ops/distributions/exponential.py +++ b/tensorflow/python/ops/distributions/exponential.py @@ -95,7 +95,7 @@ class Exponential(gamma.Gamma): # true in the parent class "Gamma." Therefore, passing # allow_nan_stats=True # through to the parent class results in unnecessary asserts. - with ops.name_scope(name, values=[rate]): + with ops.name_scope(name, values=[rate]) as name: self._rate = ops.convert_to_tensor(rate, name="rate") super(Exponential, self).__init__( concentration=array_ops.ones([], dtype=self._rate.dtype), @@ -144,7 +144,7 @@ class ExponentialWithSoftplusRate(Exponential): allow_nan_stats=True, name="ExponentialWithSoftplusRate"): parameters = locals() - with ops.name_scope(name, values=[rate]): + with ops.name_scope(name, values=[rate]) as name: super(ExponentialWithSoftplusRate, self).__init__( rate=nn.softplus(rate, name="softplus_rate"), validate_args=validate_args, diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py index adb1f4f9a8..d39f7c56d3 100644 --- a/tensorflow/python/ops/distributions/gamma.py +++ b/tensorflow/python/ops/distributions/gamma.py @@ -127,7 +127,7 @@ class Gamma(distribution.Distribution): TypeError: if `concentration` and `rate` are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: with ops.control_dependencies([ check_ops.assert_positive(concentration), check_ops.assert_positive(rate), @@ -262,7 +262,7 @@ class GammaWithSoftplusConcentrationRate(Gamma): allow_nan_stats=True, name="GammaWithSoftplusConcentrationRate"): parameters = locals() - with ops.name_scope(name, values=[concentration, rate]): + with ops.name_scope(name, values=[concentration, rate]) as name: super(GammaWithSoftplusConcentrationRate, self).__init__( concentration=nn.softplus(concentration, name="softplus_concentration"), diff --git a/tensorflow/python/ops/distributions/laplace.py b/tensorflow/python/ops/distributions/laplace.py index e98ac855c5..3ccfc618d1 100644 --- a/tensorflow/python/ops/distributions/laplace.py +++ b/tensorflow/python/ops/distributions/laplace.py @@ -101,7 +101,7 @@ class Laplace(distribution.Distribution): TypeError: if `loc` and `scale` are of different dtype. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") @@ -218,7 +218,7 @@ class LaplaceWithSoftplusScale(Laplace): allow_nan_stats=True, name="LaplaceWithSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: super(LaplaceWithSoftplusScale, self).__init__( loc=loc, scale=nn.softplus(scale, name="softplus_scale"), diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 4ae67a009b..ab77f5c1f8 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -183,7 +183,7 @@ class Multinomial(distribution.Distribution): name: Python `str` name prefixed to Ops created by this class. """ parameters = locals() - with ops.name_scope(name, values=[total_count, logits, probs]): + with ops.name_scope(name, values=[total_count, logits, probs]) as name: self._total_count = ops.convert_to_tensor(total_count, name="total_count") if validate_args: self._total_count = ( diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py index 32e8a49c81..20d4420e91 100644 --- a/tensorflow/python/ops/distributions/normal.py +++ b/tensorflow/python/ops/distributions/normal.py @@ -132,7 +132,7 @@ class Normal(distribution.Distribution): TypeError: if `loc` and `scale` have different `dtype`. """ parameters = locals() - with ops.name_scope(name, values=[loc, scale]): + with ops.name_scope(name, values=[loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(scale)] if validate_args else []): self._loc = array_ops.identity(loc, name="loc") @@ -244,7 +244,7 @@ class NormalWithSoftplusScale(Normal): allow_nan_stats=True, name="NormalWithSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[scale]): + with ops.name_scope(name, values=[scale]) as name: super(NormalWithSoftplusScale, self).__init__( loc=loc, scale=nn.softplus(scale, name="softplus_scale"), diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py index 9d9e65b4e8..961b07a7bd 100644 --- a/tensorflow/python/ops/distributions/student_t.py +++ b/tensorflow/python/ops/distributions/student_t.py @@ -158,7 +158,7 @@ class StudentT(distribution.Distribution): TypeError: if loc and scale are different dtypes. """ parameters = locals() - with ops.name_scope(name, values=[df, loc, scale]): + with ops.name_scope(name, values=[df, loc, scale]) as name: with ops.control_dependencies([check_ops.assert_positive(df)] if validate_args else []): self._df = array_ops.identity(df, name="df") @@ -350,7 +350,7 @@ class StudentTWithAbsDfSoftplusScale(StudentT): allow_nan_stats=True, name="StudentTWithAbsDfSoftplusScale"): parameters = locals() - with ops.name_scope(name, values=[df, scale]): + with ops.name_scope(name, values=[df, scale]) as name: super(StudentTWithAbsDfSoftplusScale, self).__init__( df=math_ops.floor(math_ops.abs(df)), loc=loc, diff --git a/tensorflow/python/ops/distributions/transformed_distribution.py b/tensorflow/python/ops/distributions/transformed_distribution.py index 1efcf9d32e..6290297153 100644 --- a/tensorflow/python/ops/distributions/transformed_distribution.py +++ b/tensorflow/python/ops/distributions/transformed_distribution.py @@ -258,7 +258,7 @@ class TransformedDistribution(distribution_lib.Distribution): parameters = locals() name = name or (("" if bijector is None else bijector.name) + distribution.name) - with ops.name_scope(name, values=[event_shape, batch_shape]): + with ops.name_scope(name, values=[event_shape, batch_shape]) as name: # For convenience we define some handy constants. self._zero = constant_op.constant(0, dtype=dtypes.int32, name="zero") self._empty = constant_op.constant([], dtype=dtypes.int32, name="empty") diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index ec623b55eb..b8acd0ded6 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -103,7 +103,7 @@ class Uniform(distribution.Distribution): InvalidArgumentError: if `low >= high` and `validate_args=False`. """ parameters = locals() - with ops.name_scope(name, values=[low, high]): + with ops.name_scope(name, values=[low, high]) as name: with ops.control_dependencies([ check_ops.assert_less( low, high, message="uniform not defined when low >= high.") -- GitLab From 7a212edc6b3ed6200158fe51acf4694a62ca6938 Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Wed, 28 Mar 2018 02:55:01 -0400 Subject: [PATCH 600/960] Utility classes for writing Java source code from a C++ process (part 3) (#16120) * Add output streams for writing Java source code. * Rename variable 'declared_generics_names' to 'generic_namespace' * Merge Java streams into Source writer * Second pass of code review * Replace vectors and deques with lists --- tensorflow/java/BUILD | 4 +- tensorflow/java/src/gen/cc/java_defs.h | 45 +-- tensorflow/java/src/gen/cc/source_writer.cc | 297 +++++++++++++- tensorflow/java/src/gen/cc/source_writer.h | 191 +++++++-- .../java/src/gen/cc/source_writer_test.cc | 367 +++++++++++++++++- .../java/src/gen/resources/test.snippet.java | 2 + 6 files changed, 824 insertions(+), 82 deletions(-) create mode 100644 tensorflow/java/src/gen/resources/test.snippet.java diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 9dee1aa72b..7296205e24 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -311,9 +311,11 @@ tf_cc_test( srcs = [ "src/gen/cc/source_writer_test.cc", ], + data = [ + "src/gen/resources/test.snippet.java", + ], deps = [ ":java_op_gen_lib", - "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", ], diff --git a/tensorflow/java/src/gen/cc/java_defs.h b/tensorflow/java/src/gen/cc/java_defs.h index 615cdc165b..59f8beaee7 100644 --- a/tensorflow/java/src/gen/cc/java_defs.h +++ b/tensorflow/java/src/gen/cc/java_defs.h @@ -17,10 +17,7 @@ limitations under the License. #define TENSORFLOW_JAVA_SRC_GEN_CC_JAVA_DEFS_H_ #include -#include -#include - -#include "tensorflow/core/platform/env.h" +#include namespace tensorflow { namespace java { @@ -104,17 +101,17 @@ class Type { description_ = description; return *this; } - const std::vector& parameters() const { return parameters_; } + const std::list& parameters() const { return parameters_; } Type& add_parameter(const Type& parameter) { parameters_.push_back(parameter); return *this; } - const std::vector& annotations() const { return annotations_; } + const std::list& annotations() const { return annotations_; } Type& add_annotation(const Annotation& annotation) { annotations_.push_back(annotation); return *this; } - const std::deque& supertypes() const { return supertypes_; } + const std::list& supertypes() const { return supertypes_; } Type& add_supertype(const Type& type) { if (type.kind_ == CLASS) { supertypes_.push_front(type); // keep superclass at the front of the list @@ -141,9 +138,9 @@ class Type { string name_; string package_; string description_; - std::vector parameters_; - std::vector annotations_; - std::deque supertypes_; + std::list parameters_; + std::list annotations_; + std::list supertypes_; }; // Definition of a Java annotation @@ -223,16 +220,12 @@ class Method { return_description_ = description; return *this; } - const std::vector& arguments() const { return arguments_; } - Method& add_arguments(const std::vector& args) { - arguments_.insert(arguments_.cend(), args.cbegin(), args.cend()); - return *this; - } + const std::list& arguments() const { return arguments_; } Method& add_argument(const Variable& var) { arguments_.push_back(var); return *this; } - const std::vector& annotations() const { return annotations_; } + const std::list& annotations() const { return annotations_; } Method& add_annotation(const Annotation& annotation) { annotations_.push_back(annotation); return *this; @@ -244,29 +237,13 @@ class Method { bool constructor_; string description_; string return_description_; - std::vector arguments_; - std::vector annotations_; + std::list arguments_; + std::list annotations_; Method(const string& name, const Type& return_type, bool constructor) : name_(name), return_type_(return_type), constructor_(constructor) {} }; -// A piece of code to read from a file. -class Snippet { - public: - static Snippet Create(const string& fname, Env* env = Env::Default()) { - return Snippet(fname, env); - } - const string& data() const { return data_; } - - private: - string data_; - - Snippet(const string& fname, Env* env) { - TF_CHECK_OK(ReadFileToString(env, fname, &data_)); - } -}; - } // namespace java } // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc index 2da81f2911..214999af9a 100644 --- a/tensorflow/java/src/gen/cc/source_writer.cc +++ b/tensorflow/java/src/gen/cc/source_writer.cc @@ -14,49 +14,318 @@ limitations under the License. ==============================================================================*/ #include +#include +#include #include "tensorflow/java/src/gen/cc/source_writer.h" namespace tensorflow { +namespace java { -SourceWriter& SourceWriter::Append(const StringPiece& str) { - if (!str.empty()) { - if (newline_) { - DoAppend(left_margin_ + line_prefix_); - newline_ = false; - } - DoAppend(str); - } +SourceWriter::SourceWriter() { + // push an empty generic namespace at start, for simplification + generic_namespaces_.push(new GenericNamespace()); +} + +SourceWriter& SourceWriter::Indent(int tab) { + left_margin_.resize( + std::max(static_cast(left_margin_.size() + tab), 0), ' '); + return *this; +} + +SourceWriter& SourceWriter::Prefix(const char* line_prefix) { + line_prefix_ = line_prefix; return *this; } -SourceWriter& SourceWriter::Write(const string& str) { +SourceWriter& SourceWriter::Write(const StringPiece& str) { size_t line_pos = 0; do { size_t start_pos = line_pos; line_pos = str.find('\n', start_pos); if (line_pos != string::npos) { ++line_pos; - Append(StringPiece(str.data() + start_pos, line_pos - start_pos)); + Append(str.substr(start_pos, line_pos - start_pos)); newline_ = true; } else { - Append(StringPiece(str.data() + start_pos, str.size() - start_pos)); + Append(str.substr(start_pos, str.size() - start_pos)); } } while (line_pos != string::npos && line_pos < str.size()); return *this; } +SourceWriter& SourceWriter::WriteFromFile(const string& fname, Env* env) { + string data_; + TF_CHECK_OK(ReadFileToString(env, fname, &data_)); + return Write(data_); +} + +SourceWriter& SourceWriter::Append(const StringPiece& str) { + if (!str.empty()) { + if (newline_) { + DoAppend(left_margin_ + line_prefix_); + newline_ = false; + } + DoAppend(str); + } + return *this; +} + +SourceWriter& SourceWriter::AppendType(const Type& type) { + if (type.kind() == Type::Kind::GENERIC && type.name().empty()) { + Append("?"); + } else { + Append(type.name()); + } + if (!type.parameters().empty()) { + Append("<"); + for (const Type& t : type.parameters()) { + if (&t != &type.parameters().front()) { + Append(", "); + } + AppendType(t); + } + Append(">"); + } + return *this; +} + SourceWriter& SourceWriter::EndLine() { Append("\n"); newline_ = true; return *this; } -SourceWriter& SourceWriter::Indent(int tab) { - left_margin_.resize(std::max(static_cast(left_margin_.size() + tab), 0), - ' '); +SourceWriter& SourceWriter::BeginMethod(const Method& method, int modifiers) { + GenericNamespace* generic_namespace = PushGenericNamespace(modifiers); + if (!method.constructor()) { + generic_namespace->Visit(method.return_type()); + } + for (const Variable& v : method.arguments()) { + generic_namespace->Visit(v.type()); + } + EndLine(); + WriteDoc(method.description(), method.return_description(), + &method.arguments()); + if (!method.annotations().empty()) { + WriteAnnotations(method.annotations()); + } + WriteModifiers(modifiers); + if (!generic_namespace->declared_types().empty()) { + WriteGenerics(generic_namespace->declared_types()); + Append(" "); + } + if (!method.constructor()) { + AppendType(method.return_type()).Append(" "); + } + Append(method.name()).Append("("); + for (const Variable& v : method.arguments()) { + if (&v != &method.arguments().front()) { + Append(", "); + } + AppendType(v.type()).Append(v.variadic() ? "... " : " ").Append(v.name()); + } + return Append(")").BeginBlock(); +} + +SourceWriter& SourceWriter::EndMethod() { + EndBlock(); + PopGenericNamespace(); return *this; } +SourceWriter& SourceWriter::BeginType(const Type& type, + const std::list* dependencies, int modifiers) { + if (!type.package().empty()) { + Append("package ").Append(type.package()).Append(";").EndLine(); + } + if (dependencies != nullptr && !dependencies->empty()) { + TypeImporter type_importer(type.package()); + for (const Type& t : *dependencies) { + type_importer.Visit(t); + } + EndLine(); + for (const string& s : type_importer.imports()) { + Append("import ").Append(s).Append(";").EndLine(); + } + } + return BeginInnerType(type, modifiers); +} + +SourceWriter& SourceWriter::BeginInnerType(const Type& type, int modifiers) { + GenericNamespace* generic_namespace = PushGenericNamespace(modifiers); + generic_namespace->Visit(type); + EndLine(); + WriteDoc(type.description()); + if (!type.annotations().empty()) { + WriteAnnotations(type.annotations()); + } + WriteModifiers(modifiers); + CHECK_EQ(Type::Kind::CLASS, type.kind()) << ": Not supported yet"; + Append("class ").Append(type.name()); + if (!generic_namespace->declared_types().empty()) { + WriteGenerics(generic_namespace->declared_types()); + } + if (!type.supertypes().empty()) { + bool first_interface = true; + for (const Type& t : type.supertypes()) { + if (t.kind() == Type::CLASS) { // superclass is always first in list + Append(" extends "); + } else if (first_interface) { + Append(" implements "); + first_interface = false; + } else { + Append(", "); + } + AppendType(t); + } + } + return BeginBlock(); +} + +SourceWriter& SourceWriter::EndType() { + EndBlock(); + PopGenericNamespace(); + return *this; +} + +SourceWriter& SourceWriter::WriteFields(const std::list& fields, + int modifiers) { + EndLine(); + for (const Variable& v : fields) { + WriteModifiers(modifiers); + AppendType(v.type()).Append(" ").Append(v.name()).Append(";"); + EndLine(); + } + return *this; +} + +SourceWriter& SourceWriter::WriteModifiers(int modifiers) { + if (modifiers & PUBLIC) { + Append("public "); + } else if (modifiers & PROTECTED) { + Append("protected "); + } else if (modifiers & PRIVATE) { + Append("private "); + } + if (modifiers & STATIC) { + Append("static "); + } + if (modifiers & FINAL) { + Append("final "); + } + return *this; +} + +SourceWriter& SourceWriter::WriteDoc(const string& description, + const string& return_description, const std::list* parameters) { + if (description.empty() && return_description.empty() + && (parameters == nullptr || parameters->empty())) { + return *this; // no doc to write + } + bool do_line_break = false; + Append("/**").EndLine().Prefix(" * "); + if (!description.empty()) { + Write(description).EndLine(); + do_line_break = true; + } + if (parameters != nullptr && !parameters->empty()) { + if (do_line_break) { + EndLine(); + do_line_break = false; + } + for (const Variable& v : *parameters) { + Append("@param ").Append(v.name()); + if (!v.description().empty()) { + Append(" ").Write(v.description()); + } + EndLine(); + } + } + if (!return_description.empty()) { + if (do_line_break) { + EndLine(); + do_line_break = false; + } + Append("@return ").Write(return_description).EndLine(); + } + return Prefix("").Append(" **/").EndLine(); +} + +SourceWriter& SourceWriter::WriteAnnotations( + const std::list& annotations) { + for (const Annotation& a : annotations) { + Append("@" + a.name()); + if (!a.attributes().empty()) { + Append("(").Append(a.attributes()).Append(")"); + } + EndLine(); + } + return *this; +} + +SourceWriter& SourceWriter::WriteGenerics( + const std::list& generics) { + Append("<"); + for (const Type* pt : generics) { + if (pt != generics.front()) { + Append(", "); + } + Append(pt->name()); + if (!pt->supertypes().empty()) { + Append(" extends ").AppendType(pt->supertypes().front()); + } + } + return Append(">"); +} + +SourceWriter::GenericNamespace* SourceWriter::PushGenericNamespace( + int modifiers) { + GenericNamespace* generic_namespace; + if (modifiers & STATIC) { + generic_namespace = new GenericNamespace(); + } else { + generic_namespace = new GenericNamespace(generic_namespaces_.top()); + } + generic_namespaces_.push(generic_namespace); + return generic_namespace; +} + +void SourceWriter::PopGenericNamespace() { + GenericNamespace* generic_namespace = generic_namespaces_.top(); + generic_namespaces_.pop(); + delete generic_namespace; +} + +void SourceWriter::TypeVisitor::Visit(const Type& type) { + DoVisit(type); + for (const Type& t : type.parameters()) { + DoVisit(t); + } + for (const Annotation& t : type.annotations()) { + DoVisit(t); + } + for (const Type& t : type.supertypes()) { + DoVisit(t); + } +} + +void SourceWriter::GenericNamespace::DoVisit(const Type& type) { + // ignore non-generic parameters, wildcards and generics already declared + if (type.kind() == Type::GENERIC + && !type.IsWildcard() + && generic_names_.find(type.name()) == generic_names_.end()) { + declared_types_.push_back(&type); + generic_names_.insert(type.name()); + } +} + +void SourceWriter::TypeImporter::DoVisit(const Type& type) { + if (!type.package().empty() && type.package() != current_package_) { + imports_.insert(type.package() + '.' + type.name()); + } +} + +} // namespace java } // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/source_writer.h b/tensorflow/java/src/gen/cc/source_writer.h index bff26eb185..6abe13b5d2 100644 --- a/tensorflow/java/src/gen/cc/source_writer.h +++ b/tensorflow/java/src/gen/cc/source_writer.h @@ -17,45 +17,23 @@ limitations under the License. #define TENSORFLOW_JAVA_SRC_GEN_CC_SOURCE_WRITER_H_ #include +#include +#include +#include #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/java/src/gen/cc/java_defs.h" namespace tensorflow { +namespace java { -// A utility class for writing source code, normally generated at -// compile-time. -// -// Source writers are language-agnostic and therefore only expose generic -// methods common to most languages. Extend or wrap this class to implement -// language-specific features. -// -// Note: if you are looking to reuse this class for generating code in another -// language than Java, please do by moving it at the '//tensorflow/core/lib/io' -// level. +// A class for writing Java source code. class SourceWriter { public: + SourceWriter(); virtual ~SourceWriter() = default; - // Returns true if the writer is at the beginnig of a new line - bool newline() const { return newline_; } - - // Appends a piece of code or text. - // - // It is expected that no newline character is present in the data provided, - // otherwise Write() must be used. - SourceWriter& Append(const StringPiece& str); - - // Writes a block of code or text. - // - // The data might potentially contain newline characters, therefore it will - // be scanned to ensure that each line is indented and prefixed properly, - // making it a bit slower than Append(). - SourceWriter& Write(const string& text); - - // Appends a newline character and start writing on a new line. - SourceWriter& EndLine(); - // Indents following lines with white spaces. // // Indentation is cumulative, i.e. the provided tabulation is added to the @@ -75,18 +53,166 @@ class SourceWriter { // Indent(2)->Prefix("//") will result in prefixing lines with " //". // // An empty value ("") will remove any line prefix that was previously set. - SourceWriter& Prefix(const char* line_prefix) { - line_prefix_ = line_prefix; - return *this; + SourceWriter& Prefix(const char* line_prefix); + + // Writes a source code snippet. + // + // The data might potentially contain newline characters, therefore it will + // be scanned to ensure that each line is indented and prefixed properly, + // making it a bit slower than Append(). + SourceWriter& Write(const StringPiece& text); + + // Writes a source code snippet read from a file. + // + // All lines of the file at the provided path will be read and written back + // to the output of this writer in regard of its current attributes (e.g. + // the indentation, prefix, etc.) + SourceWriter& WriteFromFile(const string& fname, Env* env = Env::Default()); + + // Appends a piece of source code. + // + // It is expected that no newline character is present in the data provided, + // otherwise Write() must be used. + SourceWriter& Append(const StringPiece& str); + + // Appends a type to the current line. + // + // The type is written in its simple form (i.e. not prefixed by its package) + // and followed by any parameter types it has enclosed in brackets (<>). + SourceWriter& AppendType(const Type& type); + + // Appends a newline character. + // + // Data written after calling this method will start on a new line, in respect + // of the current indentation. + SourceWriter& EndLine(); + + // Begins a block of source code. + // + // This method appends a new opening brace to the current data and indent the + // next lines according to Google Java Style Guide. The block can optionally + // be preceded by an expression (e.g. Append("if(true)").BeginBlock();) + SourceWriter& BeginBlock() { + return Append(newline_ ? "{" : " {").EndLine().Indent(2); + } + + // Ends the current block of source code. + // + // This method appends a new closing brace to the current data and outdent the + // next lines back to the margin used before BeginBlock() was invoked. + SourceWriter& EndBlock() { + return Indent(-2).Append("}").EndLine(); } + // Begins to write a method. + // + // This method outputs the signature of the Java method from the data passed + // in the 'method' parameter and starts a new block. Additionnal modifiers can + // also be passed in parameter to define the accesses and the scope of this + // method. + SourceWriter& BeginMethod(const Method& method, int modifiers = 0); + + // Ends the current method. + // + // This method ends the block of code that has begun when invoking + // BeginMethod() prior to this. + SourceWriter& EndMethod(); + + // Begins to write the main type of a source file. + // + // This method outputs the declaration of the Java type from the data passed + // in the 'type' parameter and starts a new block. Additionnal modifiers can + // also be passed in parameter to define the accesses and the scope of this + // type. + // + // If not null, all types found in the 'dependencies' list will be imported + // before declaring the new type. + SourceWriter& BeginType(const Type& clazz, + const std::list* dependencies, int modifiers = 0); + + // Begins to write a new inner type. + // + // This method outputs the declaration of the Java type from the data passed + // in the 'type' parameter and starts a new block. Additionnal modifiers can + // also be passed in parameter to define the accesses and the scope of this + // type. + SourceWriter& BeginInnerType(const Type& type, int modifiers = 0); + + // Ends the current type. + // + // This method ends the block of code that has begun when invoking + // BeginType() or BeginInnerType() prior to this. + SourceWriter& EndType(); + + // Writes a list of variables as fields of a type. + // + // This method must be called within the definition of a type (see BeginType() + // or BeginInnerType()). Additional modifiers can also be passed in parameter + // to define the accesses and the scope of those fields. + SourceWriter& WriteFields(const std::list& fields, + int modifiers = 0); + protected: virtual void DoAppend(const StringPiece& str) = 0; private: + // A utility base class for visiting elements of a type. + class TypeVisitor { + public: + virtual ~TypeVisitor() = default; + void Visit(const Type& type); + + protected: + virtual void DoVisit(const Type& type) = 0; + }; + + // A utility class for keeping track of declared generics in a given scope. + class GenericNamespace : public TypeVisitor { + public: + GenericNamespace() = default; + explicit GenericNamespace(const GenericNamespace* parent) + : generic_names_(parent->generic_names_) {} + std::list declared_types() { + return declared_types_; + } + protected: + virtual void DoVisit(const Type& type); + + private: + std::list declared_types_; + std::set generic_names_; + }; + + // A utility class for collecting a list of import statements to declare. + class TypeImporter : public TypeVisitor { + public: + explicit TypeImporter(const string& current_package) + : current_package_(current_package) {} + virtual ~TypeImporter() = default; + const std::set imports() { + return imports_; + } + protected: + virtual void DoVisit(const Type& type); + + private: + string current_package_; + std::set imports_; + }; + string left_margin_; string line_prefix_; bool newline_ = true; + std::stack generic_namespaces_; + + SourceWriter& WriteModifiers(int modifiers); + SourceWriter& WriteDoc(const string& description, + const string& return_description = "", + const std::list* parameters = nullptr); + SourceWriter& WriteAnnotations(const std::list& annotations); + SourceWriter& WriteGenerics(const std::list& generics); + GenericNamespace* PushGenericNamespace(int modifiers); + void PopGenericNamespace(); }; // A writer that outputs source code into a file. @@ -128,6 +254,7 @@ class SourceBufferWriter : public SourceWriter { string* buffer_; }; +} // namespace java } // namespace tensorflow #endif // TENSORFLOW_JAVA_SRC_GEN_CC_SOURCE_WRITER_H_ diff --git a/tensorflow/java/src/gen/cc/source_writer_test.cc b/tensorflow/java/src/gen/cc/source_writer_test.cc index e973895754..6926a5a411 100644 --- a/tensorflow/java/src/gen/cc/source_writer_test.cc +++ b/tensorflow/java/src/gen/cc/source_writer_test.cc @@ -13,11 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/java/src/gen/cc/source_writer.h" +#include + #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/java/src/gen/cc/java_defs.h" +#include "tensorflow/java/src/gen/cc/source_writer.h" namespace tensorflow { +namespace java { namespace { TEST(AppendTest, SingleLineText) { @@ -211,5 +215,366 @@ TEST(MarginTest, EmptyPrefix) { ASSERT_STREQ(expected, writer.str().data()); } +TEST(StreamTest, BlocksAndLines) { + SourceBufferWriter writer; + + writer.Append("int i = 0;").EndLine() + .Append("int j = 10;").EndLine() + .Append("if (true)") + .BeginBlock() + .Append("int aLongWayToTen = 0;").EndLine() + .Append("while (++i <= j)") + .BeginBlock() + .Append("++aLongWayToTen;").EndLine() + .EndBlock() + .EndBlock(); + + const char* expected = + "int i = 0;\n" + "int j = 10;\n" + "if (true) {\n" + " int aLongWayToTen = 0;\n" + " while (++i <= j) {\n" + " ++aLongWayToTen;\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(StreamTest, Types) { + SourceBufferWriter writer; + Type generic = Type::Generic("T").add_supertype(Type::Class("Number")); + + writer.AppendType(Type::Int()).Append(", ") + .AppendType(Type::Class("String")).Append(", ") + .AppendType(generic).Append(", ") + .AppendType(Type::ListOf(generic)).Append(", ") + .AppendType(Type::ListOf(Type::IterableOf(generic))).Append(", ") + .AppendType(Type::ListOf(Type::Generic())); + + const char* expected = + "int, String, T, List, List>, List"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(StreamTest, FileSnippet) { + SourceBufferWriter writer; + const string& fname = "tensorflow/java/src/gen/resources/test.snippet.java"; + + writer.WriteFromFile(fname) + .BeginBlock() + .WriteFromFile(fname) + .EndBlock(); + + const char* expected = + "// Here is a little snippet\n" + "System.out.println(\"Hello!\");\n" + "{\n" + " // Here is a little snippet\n" + " System.out.println(\"Hello!\");\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, SimpleClass) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + + writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, SimpleClassWithDependencies) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + std::list deps; + deps.push_back(Type::Class("TypeA", "org.test.sub")); + deps.push_back(Type::Class("TypeA", "org.test.sub")); // a second time + deps.push_back(Type::Class("TypeB", "org.other")); + deps.push_back(Type::Class("SamePackageType", "org.tensorflow")); + deps.push_back(Type::Class("NoPackageType")); + + writer.BeginType(clazz, &deps, PUBLIC).EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "import org.other.TypeB;\n" + "import org.test.sub.TypeA;\n\n" + "public class Test {\n}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, AnnotatedAndDocumentedClass) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + clazz.description("This class has a\n

\nmultiline description."); + clazz.add_annotation(Annotation::Create("Bean")); + clazz.add_annotation(Annotation::Create("SuppressWarnings") + .attributes("\"rawtypes\"")); + + writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "/**\n" + " * This class has a\n" + " *

\n" + " * multiline description.\n" + " **/\n" + "@Bean\n" + "@SuppressWarnings(\"rawtypes\")\n" + "public class Test {\n}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, ParameterizedClass) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + clazz.add_parameter(Type::Generic("T")); + clazz.add_parameter(Type::Generic("U").add_supertype(Type::Class("Number"))); + + writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, ParameterizedClassAndSupertypes) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Type type_t = Type::Generic("T"); + clazz.add_parameter(type_t); + Type type_u = Type::Generic("U").add_supertype(Type::Class("Number")); + clazz.add_parameter(type_u); + clazz.add_supertype(Type::Interface("Parametrizable").add_parameter(type_u)); + clazz.add_supertype(Type::Interface("Runnable")); + clazz.add_supertype(Type::Class("SuperTest").add_parameter(type_t)); + + writer.BeginType(clazz, nullptr, PUBLIC).EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test" + " extends SuperTest implements Parametrizable, Runnable {\n}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, ParameterizedClassFields) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Type type_t = Type::Generic("T").add_supertype(Type::Class("Number")); + clazz.add_parameter(type_t); + std::list static_fields; + static_fields.push_back(Variable::Create("field1", Type::Class("String"))); + std::list member_fields; + member_fields.push_back(Variable::Create("field2", Type::Class("String"))); + member_fields.push_back(Variable::Create("field3", type_t)); + + writer.BeginType(clazz, nullptr, PUBLIC) + .WriteFields(static_fields, STATIC | PUBLIC | FINAL) + .WriteFields(member_fields, PRIVATE) + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " public static final String field1;\n" + " \n" + " private String field2;\n" + " private T field3;\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, SimpleInnerClass) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Type inner_class = Type::Class("InnerTest"); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginInnerType(inner_class, PUBLIC) + .EndType() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " public class InnerTest {\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteType, StaticParameterizedInnerClass) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Type type_t = Type::Generic("T").add_supertype(Type::Class("Number")); + clazz.add_parameter(type_t); + Type inner_class = Type::Class("InnerTest"); + inner_class.add_parameter(type_t); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginInnerType(inner_class, PUBLIC | STATIC) + .EndType() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " public static class InnerTest {\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteMethod, SimpleMethod) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Method method = Method::Create("doNothing", Type::Void()); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginMethod(method, PUBLIC).EndMethod() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " public void doNothing() {\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteMethod, AnnotatedAndDocumentedMethod) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Method method = Method::Create("doNothing", Type::Void()); + method.description("This method has a\n

\nmultiline description."); + method.add_annotation(Annotation::Create("Override")); + method.add_annotation(Annotation::Create("SuppressWarnings") + .attributes("\"rawtypes\"")); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginMethod(method, PUBLIC).EndMethod() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " /**\n" + " * This method has a\n" + " *

\n" + " * multiline description.\n" + " **/\n" + " @Override\n" + " @SuppressWarnings(\"rawtypes\")\n" + " public void doNothing() {\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteMethod, DocumentedMethodWithArguments) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Method method = Method::Create("boolToInt", Type::Int()); + method.description("Converts a boolean to an int"); + method.return_description("int value for this boolean"); + method.add_argument(Variable::Create("b", Type::Boolean())); + Variable reverse = Variable::Create("reverse", Type::Boolean()); + reverse.description("if true, value is reversed"); + method.add_argument(reverse); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginMethod(method, PUBLIC) + .Append("if (b && !reverse)") + .BeginBlock() + .Append("return 1;").EndLine() + .EndBlock() + .Append("return 0;").EndLine() + .EndMethod() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " /**\n" + " * Converts a boolean to an int\n" + " * \n" + " * @param b\n" + " * @param reverse if true, value is reversed\n" + " * @return int value for this boolean\n" + " **/\n" + " public int boolToInt(boolean b, boolean reverse) {\n" + " if (b && !reverse) {\n" + " return 1;\n" + " }\n" + " return 0;\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteMethod, ParameterizedMethod) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Type type_t = Type::Generic("T").add_supertype(Type::Class("Number")); + clazz.add_parameter(type_t); + Method method = Method::Create("doNothing", type_t); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginMethod(method, PUBLIC) + .Append("return null;").EndLine() + .EndMethod() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " public T doNothing() {\n" + " return null;\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + +TEST(WriteMethod, StaticParameterizedMethod) { + SourceBufferWriter writer; + Type clazz = Type::Class("Test", "org.tensorflow"); + Type type_t = Type::Generic("T").add_supertype(Type::Class("Number")); + clazz.add_parameter(type_t); + Method method = Method::Create("doNothing", type_t); + + writer.BeginType(clazz, nullptr, PUBLIC) + .BeginMethod(method, PUBLIC | STATIC) + .Append("return null;").EndLine() + .EndMethod() + .EndType(); + + const char* expected = + "package org.tensorflow;\n\n" + "public class Test {\n" + " \n" + " public static T doNothing() {\n" + " return null;\n" + " }\n" + "}\n"; + ASSERT_STREQ(expected, writer.str().data()); +} + } // namespace +} // namespace java } // namespace tensorflow diff --git a/tensorflow/java/src/gen/resources/test.snippet.java b/tensorflow/java/src/gen/resources/test.snippet.java new file mode 100644 index 0000000000..5e412a9aef --- /dev/null +++ b/tensorflow/java/src/gen/resources/test.snippet.java @@ -0,0 +1,2 @@ +// Here is a little snippet +System.out.println("Hello!"); -- GitLab From 134f4ca0a70ef0373f5436b890be0f8585badb34 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 00:57:37 -0700 Subject: [PATCH 601/960] Internal change PiperOrigin-RevId: 190735724 --- tensorflow/compiler/jit/xla_tensor_info.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/compiler/jit/xla_tensor_info.h b/tensorflow/compiler/jit/xla_tensor_info.h index fbd6ad770f..a02f642c37 100644 --- a/tensorflow/compiler/jit/xla_tensor_info.h +++ b/tensorflow/compiler/jit/xla_tensor_info.h @@ -71,6 +71,12 @@ class XlaTensorInfoManager : public AllocatorWrapper { // Creates a new XlaTensorInfoManager, delegating all DeallocateRaw calls to // allocator. XlaTensorInfoManager(Allocator* allocator) : AllocatorWrapper(allocator) {} + ~XlaTensorInfoManager() { + // Destroy the tensor info hashtable under the lock, to ensure all accesses + // to the hashtable are properly sequenced. + mutex_lock lock(lock_); + tensor_infos_.clear(); + } // Returns the XlaTensorInfo for the given device memory pointer or nullptr if // none exists. -- GitLab From 8f3f838d2b14c78ae778031231ff5460e4403c80 Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Thu, 29 Mar 2018 00:22:54 +0900 Subject: [PATCH 602/960] Fix typos (#18048) --- .../toco/graph_transformations/unpartition_embedding_lookup.cc | 2 +- tensorflow/contrib/verbs/rdma.h | 2 +- tensorflow/core/common_runtime/scoped_allocator_mgr.cc | 2 +- tensorflow/core/kernels/mkl_softmax_op.cc | 2 +- tensorflow/python/kernel_tests/init_ops_test.py | 2 +- tensorflow/python/ops/custom_gradient.py | 2 +- tensorflow/python/training/distribute.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc index 48c326651f..cbea39bcc0 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc @@ -165,7 +165,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { CHECK(mod_op && mod_op->type == OperatorType::kFloorMod) << "Unsupported partition strategy"; CHECK_EQ(mod_op, GetOpWithOutput(*model, indices_partition_op->inputs[1])) - << "Indices and data parition ops require the same partition strategy " + << "Indices and data partition ops require the same partition strategy " "and inputs"; // Glob together all of the gather data. This is not yet in the correct order. diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 94203ee2b3..c9df6beb6b 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -262,7 +262,7 @@ class RdmaTensorRequest { // Receive tensor content (RDMA write was completed). // // Decode proto if required and/or move to GPU if the content was not - // written to it directly (GPU direct is not avaliable). Afterwards, + // written to it directly (GPU direct is not available). Afterwards, // invoke Done(). void RecvTensorContent(); diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc index e1f70404e3..be79cc4507 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr.cc +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.cc @@ -103,7 +103,7 @@ ScopedAllocatorContainer::~ScopedAllocatorContainer() { // In normal execution the table should be empty and all of its // contents deleted via Drop. When when a step ends early // (e.g. through abnormal termination) we need to clean up - // explicitly. So long as graph exection of the associated step has + // explicitly. So long as graph execution of the associated step has // completey terminated this should be safe. for (auto& it : allocators_) { if (it.second.field_index == ScopedAllocator::kBackingIndex) { diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index 170523b5b4..f79e18cff2 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -102,7 +102,7 @@ class MklSoftmaxOp : public OpKernel { // Softmax MklDnn output layout is same as input layout. auto dst_pd = src.GetUsrMemPrimDesc(); - // if input is MKL shape, ouput is also MKL shape. + // if input is MKL shape, output is also MKL shape. // if input is TF shape, output is also TF shape if (src_mkl_shape.IsMklTensor()) { output_mkl_shape.SetMklTensor(true); diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index c1755985ee..3c4d038ef9 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -618,7 +618,7 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): for dtype in [dtypes.float32]: for kernel_size in [[3], [8], [3, 5], [2, 4], [3, 3, 3], [2, 2, 2]]: tol = 1e-2 - # Check orthogonality by computing the 2-norms of the inputs and ouputs. + # Check orthogonality by computing the 2-norms of the inputs and outputs. if len(kernel_size) == 1: shape = [4, 32, 64] convolution = convolutional.conv1d diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index 9eacac1b37..dfa07abfc6 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -95,7 +95,7 @@ def custom_gradient(f): if not context.executing_eagerly(): if kwargs: raise ValueError( - "The custom_gradient decorator currently suports keywords " + "The custom_gradient decorator currently supports keywords " "arguments only when eager execution is enabled.") name = "CustomGradient-%s" % ops.uid() args = [ops.convert_to_tensor(x) for x in args] diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 9261e13230..3571a41d4f 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -519,7 +519,7 @@ class DistributionStrategy(object): in the distributed vs. single tower cases. """ - # TODO(josh11b): Raise an exception if variable paritioning requested before + # TODO(josh11b): Raise an exception if variable partitioning requested before # we add support. # TODO(josh11b): Also `parameter_device_index` property? # TODO(josh11b): `map()` -- GitLab From 119ed5aa2acb6df04595835f6dfa99f5422449f2 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 28 Mar 2018 08:25:14 -0700 Subject: [PATCH 603/960] Move ExecuteNode and CopyToDevice_Internal PiperOrigin-RevId: 190775681 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 139 ++++-------------- tensorflow/core/common_runtime/eager/BUILD | 17 +++ .../core/common_runtime/eager/execute.cc | 24 +++ .../core/common_runtime/eager/execute.h | 4 + .../core/common_runtime/eager/execute_node.h | 88 +++++++++++ 6 files changed, 161 insertions(+), 112 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/execute_node.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index e57011a08b..a2d96357ac 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -31,6 +31,7 @@ tf_cuda_library( "//tensorflow/core/common_runtime/eager:context", "//tensorflow/core/common_runtime/eager:eager_executor", "//tensorflow/core/common_runtime/eager:execute", + "//tensorflow/core/common_runtime/eager:execute_node", "//tensorflow/core/common_runtime/eager:kernel_and_device", "//tensorflow/core/common_runtime/eager:tensor_handle", "//tensorflow/core/common_runtime/eager:copy_to_device_node", diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index ac7114f71e..028865d360 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" #include "tensorflow/core/common_runtime/eager/execute.h" +#include "tensorflow/core/common_runtime/eager/execute_node.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -435,39 +436,8 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, namespace { -// TODO(apassos) move to TensorHandle -tensorflow::TensorHandle* TFE_TensorHandleCopyToDevice_Internal( - tensorflow::TensorHandle* h, TFE_Context* ctx, const char* device_name, - TF_Status* status) { - status->status = ctx->context.GetStatus(); - if (!status->status.ok()) { - return nullptr; - } - tensorflow::Device* dstd = ctx->context.HostCPU(); - if (device_name != nullptr && strlen(device_name) > 0) { - status->status = - ctx->context.device_mgr()->LookupDevice(device_name, &dstd); - if (!status->status.ok()) return nullptr; - } - if (ctx->context.Async()) { - // Note that `h` may not be currently ready. However execution order will - // make sure that `h` is ready before the copy is actually done. - tensorflow::CopyToDeviceNode* node = - new tensorflow::CopyToDeviceNode(h, dstd, &ctx->context); - tensorflow::TensorHandle* output = node->dst(); - // Note that calling Add makes `node` accessible by the EagerExecutor - // thread. So further accesses need to be thread-safe. - ctx->context.ExecutorAdd(node); - return output; - } else { - tensorflow::TensorHandle* output = nullptr; - status->status = h->CopyToDevice(&ctx->context, dstd, &output); - return output; - } -} - tensorflow::Status ValidateInputTypeAndPlacement( - TFE_Context* ctx, tensorflow::Device* host_device, + tensorflow::EagerContext* ctx, tensorflow::Device* host_device, tensorflow::Device* op_device, TFE_Op* op, const tensorflow::OpKernel* kernel) { const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types(); @@ -484,8 +454,8 @@ tensorflow::Status ValidateInputTypeAndPlacement( const tensorflow::Device* actual_device = handle_device == nullptr ? host_device : handle_device; if (expected_device != actual_device) { - switch (TFE_ContextGetDevicePlacementPolicy(ctx)) { - case TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32: + switch (ctx->GetDevicePlacementPolicy()) { + case tensorflow::DEVICE_PLACEMENT_SILENT_FOR_INT32: // TODO(xpan): See if we could bubble python related error up // to python level. if (handle->dtype == tensorflow::DT_INT32) { @@ -494,7 +464,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( break; } TF_FALLTHROUGH_INTENDED; - case TFE_DEVICE_PLACEMENT_EXPLICIT: + case tensorflow::DEVICE_PLACEMENT_EXPLICIT: return tensorflow::errors::InvalidArgument( "Tensors on conflicting devices:" " cannot compute ", @@ -506,7 +476,7 @@ tensorflow::Status ValidateInputTypeAndPlacement( " or transparently copied by using tfe.enable_eager_execution(" "tfe.DEVICE_PLACEMENT_SILENT). Copying tensors between devices" " may slow down your model"); - case TFE_DEVICE_PLACEMENT_WARN: + case tensorflow::DEVICE_PLACEMENT_WARN: LOG(WARNING) << "before computing " << op->name << " input #" << i << " was expected to be on " << expected_device->name() << " but is actually on " << actual_device->name() @@ -514,17 +484,14 @@ tensorflow::Status ValidateInputTypeAndPlacement( << "). This triggers a copy which can be a performance " "bottleneck."; break; - case TFE_DEVICE_PLACEMENT_SILENT: // Do nothing. + case tensorflow::DEVICE_PLACEMENT_SILENT: // Do nothing. break; } // We are only here if the policy is warn or silent copies, so we should // trigger a copy. - TF_Status* s = TF_NewStatus(); - tensorflow::TensorHandle* copied_tensor = - TFE_TensorHandleCopyToDevice_Internal( - handle, ctx, expected_device->name().c_str(), s); - tensorflow::Status status = s->status; - TF_DeleteStatus(s); + tensorflow::TensorHandle* copied_tensor = nullptr; + tensorflow::Status status = tensorflow::EagerCopyToDevice( + handle, ctx, expected_device->name().c_str(), &copied_tensor); if (!status.ok()) { if (copied_tensor != nullptr) copied_tensor->Unref(); return tensorflow::errors::Internal( @@ -576,68 +543,6 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, } -// TODO(agarwal): move EagerExecutor and EagerNode related code to a separate -// file. -class ExecuteNode : public tensorflow::EagerNode { - public: - ExecuteNode(TFE_Op* op, tensorflow::KernelAndDevice* kernel, - tensorflow::NodeExecStats* maybe_stats, - const tensorflow::DataTypeVector& output_dtypes, - TFE_TensorHandle** retvals, int num_retvals) - : tensorflow::EagerNode(op->ctx->context.NextId()), - ctx_(op->ctx), - op_device_(op->device), - inputs_(op->inputs), - kernel_(kernel), - maybe_stats_(maybe_stats), - retvals_(num_retvals) { - for (auto handle : inputs_) { - handle->Ref(); - } - TFE_Context* ctx = op->ctx; - for (int i = 0; i < num_retvals; ++i) { - tensorflow::TensorHandle* h = - new tensorflow::TensorHandle(id, output_dtypes[i], &ctx->context); - h->Ref(); - retvals[i] = new TFE_TensorHandle(h); - retvals_[i] = h; - } - } - - ~ExecuteNode() override { - for (auto handle : inputs_) { - handle->Unref(); - } - for (auto handle : retvals_) { - handle->Unref(); - } - } - - tensorflow::Status Run() override { - const tensorflow::Status status = tensorflow::EagerExecute( - &ctx_->context, op_device_, inputs_, kernel_, maybe_stats_.get(), - retvals_.begin(), retvals_.size()); - if (status.ok()) { - return status; - } else { - return tensorflow::Status( - status.code(), - tensorflow::strings::StrCat("Got error, \"", status.error_message(), - "\" while executing kernel ", - kernel_->kernel()->def().DebugString())); - } - } - - private: - TFE_Context* ctx_; - tensorflow::Device* op_device_; - tensorflow::gtl::InlinedVector inputs_; - tensorflow::KernelAndDevice* kernel_; - std::unique_ptr maybe_stats_; - tensorflow::gtl::InlinedVector retvals_; -}; - - #ifdef TENSORFLOW_EAGER_USE_XLA // Synthesizes and returns a wrapper function over `op`, which must be a // primitive op (e.g. matmul). @@ -961,8 +866,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // device from the one requested above. device = kernel->device(); } - status->status = ValidateInputTypeAndPlacement(ctx, ctx->context.HostCPU(), - device, op, kernel->kernel()); + status->status = ValidateInputTypeAndPlacement( + &ctx->context, ctx->context.HostCPU(), device, op, kernel->kernel()); if (!status->status.ok()) return; std::unique_ptr maybe_stats; if (ctx->context.ShouldStoreMetadata()) { @@ -977,9 +882,18 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Note that for async mode, execution order will make sure that all // input handles are ready before executing them. // TODO(agarwal): Consider executing "cheap" kernels inline for performance. - tensorflow::EagerNode* node = - new ExecuteNode(op, kernel, maybe_stats.release(), output_dtypes, - retvals, *num_retvals); + tensorflow::gtl::InlinedVector handle_retvals( + *num_retvals); + tensorflow::uint64 id = op->ctx->context.NextId(); + for (int i = 0; i < *num_retvals; ++i) { + tensorflow::TensorHandle* h = + new tensorflow::TensorHandle(id, output_dtypes[i], &op->ctx->context); + retvals[i] = new TFE_TensorHandle(h); + handle_retvals[i] = h; + } + tensorflow::EagerNode* node = new tensorflow::ExecuteNode( + id, &op->ctx->context, op->device, op->inputs, kernel, + maybe_stats.release(), output_dtypes, handle_retvals); ctx->context.ExecutorAdd(node); } else { // Execute checks if retvals[i] is nullptr or not to figure if it needs to @@ -999,8 +913,9 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status) { - tensorflow::TensorHandle* handle = TFE_TensorHandleCopyToDevice_Internal( - h->handle, ctx, device_name, status); + tensorflow::TensorHandle* handle; + status->status = tensorflow::EagerCopyToDevice(h->handle, &ctx->context, + device_name, &handle); if (status->status.ok()) { return new TFE_TensorHandle(handle); } diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 56e2e1094f..9e8baab618 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -156,6 +156,23 @@ cc_library( hdrs = ["execute.h"], deps = [ ":context", + ":copy_to_device_node", + ":kernel_and_device", + ":tensor_handle", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], +) + +cc_library( + name = "execute_node", + hdrs = ["execute_node.h"], + deps = [ + ":context", + ":eager_executor", + ":execute", ":kernel_and_device", ":tensor_handle", "//tensorflow/core:core_cpu_lib", diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 9e6dddaa02..4f16e42568 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/copy_to_device_node.h" #include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/eager/tensor_handle.h" #include "tensorflow/core/framework/step_stats.pb.h" @@ -107,4 +108,27 @@ Status EagerExecute(EagerContext* ctx, Device* device, return Status::OK(); } +Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, + const char* device_name, TensorHandle** result) { + TF_RETURN_IF_ERROR(ctx->GetStatus()); + Device* dstd = ctx->HostCPU(); + if (device_name != nullptr && strlen(device_name) > 0) { + TF_RETURN_IF_ERROR(ctx->device_mgr()->LookupDevice(device_name, &dstd)); + } + if (ctx->Async()) { + // Note that `h` may not be currently ready. However execution order will + // make sure that `h` is ready before the copy is actually done. + CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); + TensorHandle* output = node->dst(); + // Note that calling Add makes `node` accessible by the EagerExecutor + // thread. So further accesses need to be thread-safe. + ctx->ExecutorAdd(node); + *result = output; + return Status::OK(); + } else { + TF_RETURN_IF_ERROR(h->CopyToDevice(ctx, dstd, result)); + return Status::OK(); + } +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/execute.h b/tensorflow/core/common_runtime/eager/execute.h index 1c0ea6bcde..0f6ad031e1 100644 --- a/tensorflow/core/common_runtime/eager/execute.h +++ b/tensorflow/core/common_runtime/eager/execute.h @@ -32,6 +32,10 @@ Status EagerExecute(EagerContext* ctx, Device* device, KernelAndDevice* kernel, NodeExecStats* maybe_stats, TensorHandle** retvals, int num_retvals); +// Low-level utility to copy a tensor handle from one device to another. +Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, + const char* device_name, TensorHandle** result); + } // namespace tensorflow #endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_H_ diff --git a/tensorflow/core/common_runtime/eager/execute_node.h b/tensorflow/core/common_runtime/eager/execute_node.h new file mode 100644 index 0000000000..93018dd969 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/execute_node.h @@ -0,0 +1,88 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_NODE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_NODE_H_ + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/common_runtime/eager/eager_executor.h" +#include "tensorflow/core/common_runtime/eager/execute.h" +#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" +#include "tensorflow/core/common_runtime/eager/tensor_handle.h" +#include "tensorflow/core/framework/step_stats.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" + +namespace tensorflow { + +class ExecuteNode : public EagerNode { + public: + ExecuteNode(uint64 id, EagerContext* ctx, Device* op_device, + const tensorflow::gtl::InlinedVector& inputs, + KernelAndDevice* kernel, NodeExecStats* maybe_stats, + const DataTypeVector& output_dtypes, + const tensorflow::gtl::InlinedVector& retvals) + : EagerNode(id), + ctx_(ctx), + op_device_(op_device), + inputs_(inputs), + kernel_(kernel), + maybe_stats_(maybe_stats), + retvals_(retvals) { + for (auto handle : inputs_) { + handle->Ref(); + } + for (auto handle : retvals_) { + handle->Ref(); + } + } + + ~ExecuteNode() override { + for (auto handle : inputs_) { + handle->Unref(); + } + for (auto handle : retvals_) { + handle->Unref(); + } + } + + tensorflow::Status Run() override { + const Status status = + EagerExecute(ctx_, op_device_, inputs_, kernel_, maybe_stats_.get(), + retvals_.begin(), retvals_.size()); + if (status.ok()) { + return status; + } else { + return Status(status.code(), + strings::StrCat("Got error, \"", status.error_message(), + "\" while executing kernel ", + kernel_->kernel()->def().DebugString())); + } + } + + private: + tensorflow::EagerContext* ctx_; + tensorflow::Device* op_device_; + tensorflow::gtl::InlinedVector inputs_; + tensorflow::KernelAndDevice* kernel_; + std::unique_ptr maybe_stats_; + tensorflow::gtl::InlinedVector retvals_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EAGER_EXECUTE_NODE_H_ -- GitLab From 0a2a35e210d899d81a7e0478eeb49ea478b05bb8 Mon Sep 17 00:00:00 2001 From: hsm207 Date: Wed, 28 Mar 2018 12:05:21 -0400 Subject: [PATCH 604/960] Fix typo (#17947) --- tensorflow/python/eager/execution_callbacks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/execution_callbacks.py b/tensorflow/python/eager/execution_callbacks.py index 535361498a..9a08259653 100644 --- a/tensorflow/python/eager/execution_callbacks.py +++ b/tensorflow/python/eager/execution_callbacks.py @@ -253,7 +253,7 @@ def add_execution_callback(callback): `f(op_type, op_name, attrs, inputs, outputs)`. `op_type` is the type of the operation that was just executed (e.g., `MatMul`). - `op_name` is the name of the operation that has was just executed. This + `op_name` is the name of the operation that was just executed. This name is set by the client who created the operation and can be `None` if it is unset. `attrs` contains the attributes of the operation as a `tuple` of -- GitLab From 5a213116df09c19c3ee0eecb5fc79444e5671e80 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 28 Mar 2018 10:03:06 -0700 Subject: [PATCH 605/960] Allow positional arguments in tf.keras.Model subclasses Makes the tf.keras.Layer.__call__ signature identical to tf.layers.Layer.__call__, but makes passing positional arguments other than "inputs" an error in most cases. The only case it's allowed is subclassed Models which do not have an "inputs" argument to their call() method. This means subclassed Models no longer need to pass all but the first argument as a keyword argument (or do list packing/unpacking) when call() takes multiple Tensor arguments. Includes errors for cases where whether an argument indicates an input is ambiguous, but otherwise doesn't do much to support non-"inputs" call() signatures for shape inference or deferred Tensors. The definition of an input/non-input is pretty clear, so that cleanup will mostly be tracking down all of the users of "self.call" and getting them to pass inputs as positional arguments if necessary. PiperOrigin-RevId: 190787899 --- .../eager/python/examples/spinn/spinn_test.py | 13 +- .../keras/_impl/keras/engine/base_layer.py | 90 +++++++++++- .../keras/_impl/keras/engine/network.py | 9 +- .../keras/_impl/keras/engine/training.py | 5 + .../_impl/keras/model_subclassing_test.py | 130 +++++++++++++++++- tensorflow/python/layers/base.py | 2 + third_party/examples/eager/spinn/spinn.py | 29 ++-- 7 files changed, 246 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 591d99edcd..9261823d77 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -173,7 +173,7 @@ class SpinnTest(test_util.TensorFlowTestCase): right_in.append(tf.random_normal((1, size * 2))) tracking.append(tf.random_normal((1, tracker_size * 2))) - out = reducer(left_in, right_in=right_in, tracking=tracking) + out = reducer(left_in, right_in, tracking=tracking) self.assertEqual(batch_size, len(out)) self.assertEqual(tf.float32, out[0].dtype) self.assertEqual((1, size * 2), out[0].shape) @@ -227,7 +227,7 @@ class SpinnTest(test_util.TensorFlowTestCase): self.assertEqual((batch_size, size * 2), stacks[0][0].shape) for _ in range(2): - out1, out2 = tracker(bufs, stacks=stacks) + out1, out2 = tracker(bufs, stacks) self.assertIsNone(out2) self.assertEqual(batch_size, len(out1)) self.assertEqual(tf.float32, out1[0].dtype) @@ -260,7 +260,7 @@ class SpinnTest(test_util.TensorFlowTestCase): self.assertEqual(tf.int64, transitions.dtype) self.assertEqual((num_transitions, 1), transitions.shape) - out = s(buffers, transitions=transitions, training=True) + out = s(buffers, transitions, training=True) self.assertEqual(tf.float32, out.dtype) self.assertEqual((1, embedding_dims), out.shape) @@ -286,15 +286,12 @@ class SpinnTest(test_util.TensorFlowTestCase): vocab_size) # Invoke model under non-training mode. - logits = model( - prem, premise_transition=prem_trans, hypothesis=hypo, - hypothesis_transition=hypo_trans, training=False) + logits = model(prem, prem_trans, hypo, hypo_trans, training=False) self.assertEqual(tf.float32, logits.dtype) self.assertEqual((batch_size, d_out), logits.shape) # Invoke model under training model. - logits = model(prem, premise_transition=prem_trans, hypothesis=hypo, - hypothesis_transition=hypo_trans, training=True) + logits = model(prem, prem_trans, hypo, hypo_trans, training=True) self.assertEqual(tf.float32, logits.dtype) self.assertEqual((batch_size, d_out), logits.shape) diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index 5615241ae3..755607aafb 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -19,6 +19,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import inspect # Necessary supplement to tf_inspect to deal with variadic args. + from six.moves import zip # pylint: disable=redefined-builtin from tensorflow.python.eager import context @@ -30,6 +32,8 @@ from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.utils import generic_utils from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import tf_decorator +from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export @@ -143,6 +147,7 @@ class Layer(tf_base_layers.Layer): super(Layer, self).__init__( name=name, dtype=dtype, trainable=trainable, activity_regularizer=kwargs.get('activity_regularizer')) + self._uses_inputs_arg = True # Add properties that are Keras-only for now. self.supports_masking = False @@ -213,7 +218,71 @@ class Layer(tf_base_layers.Layer): """ return inputs - def __call__(self, inputs, **kwargs): + def _inputs_from_call_args(self, call_args, call_kwargs): + """Get Layer inputs from __call__ *args and **kwargs. + + Args: + call_args: The positional arguments passed to __call__. + call_kwargs: The keyword argument dict passed to __call__. + + Returns: + A tuple of (inputs, non_input_kwargs). These may be the same objects as + were passed in (call_args and call_kwargs). + """ + if getattr(self, '_uses_inputs_arg', True): + assert len(call_args) == 1 # TypeError raised earlier in __call__. + return call_args[0], call_kwargs + else: + call_arg_spec = tf_inspect.getargspec(self.call) + # There is no explicit "inputs" argument expected or provided to + # call(). Arguments which have default values are considered non-inputs, + # and arguments without are considered inputs. + if call_arg_spec.defaults: + if call_arg_spec.varargs is not None: + raise TypeError( + 'Layer.call() may not accept both *args and arguments with ' + 'default values (unable to determine which are inputs to the ' + 'Layer).') + keyword_arg_names = set( + call_arg_spec.args[-len(call_arg_spec.defaults):]) + else: + keyword_arg_names = set() + # Training is never an input argument name, to allow signatures like + # call(x, training). + keyword_arg_names.add('training') + _, unwrapped_call = tf_decorator.unwrap(self.call) + bound_args = inspect.getcallargs( + unwrapped_call, *call_args, **call_kwargs) + if call_arg_spec.keywords is not None: + var_kwargs = bound_args.pop(call_arg_spec.keywords) + bound_args.update(var_kwargs) + keyword_arg_names = keyword_arg_names.union(var_kwargs.keys()) + all_args = call_arg_spec.args + if all_args and bound_args[all_args[0]] is self: + # Ignore the 'self' argument of methods + bound_args.pop(call_arg_spec.args[0]) + all_args = all_args[1:] + non_input_arg_values = {} + input_arg_values = [] + remaining_args_are_keyword = False + for argument_name in all_args: + if argument_name in keyword_arg_names: + remaining_args_are_keyword = True + else: + if remaining_args_are_keyword: + raise TypeError( + 'Found a positional argument to call() after a non-input ' + 'argument. All arguments after "training" must be keyword ' + 'arguments, and are not tracked as inputs to the Layer.') + if remaining_args_are_keyword: + non_input_arg_values[argument_name] = bound_args[argument_name] + else: + input_arg_values.append(bound_args[argument_name]) + if call_arg_spec.varargs is not None: + input_arg_values.extend(bound_args[call_arg_spec.varargs]) + return input_arg_values, non_input_arg_values + + def __call__(self, inputs, *args, **kwargs): """Wrapper around self.call(), for handling internal references. If a Keras tensor is passed: @@ -226,6 +295,10 @@ class Layer(tf_base_layers.Layer): Arguments: inputs: Can be a tensor or list/tuple of tensors. + *args: Additional positional arguments to be passed to `call()`. Only + allowed in subclassed Models with custom call() signatures. In other + cases, `Layer` inputs must be passed using the `inputs` argument and + non-inputs must be keyword arguments. **kwargs: Additional keyword arguments to be passed to `call()`. Returns: @@ -234,12 +307,25 @@ class Layer(tf_base_layers.Layer): Raises: ValueError: in case the layer is missing shape information for its `build` call. + TypeError: If positional arguments are passed and this `Layer` is not a + subclassed `Model`. """ # Actually call the layer (optionally building it). - output = super(Layer, self).__call__(inputs, **kwargs) + output = super(Layer, self).__call__(inputs, *args, **kwargs) + + if args and getattr(self, '_uses_inputs_arg', True): + raise TypeError( + 'This Layer takes an `inputs` argument to call(), and only the ' + '`inputs` argument may be specified as a positional argument. Pass ' + 'everything else as a keyword argument (those arguments will not be ' + 'tracked as inputs to the Layer).') + if context.executing_eagerly(): return output + inputs, kwargs = self._inputs_from_call_args( + call_args=(inputs,) + args, call_kwargs=kwargs) + if hasattr(self, '_symbolic_set_inputs') and not self.inputs: # Subclassed network: explicitly set metadata normally set by a call to # self._set_inputs(). diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index ea4be0d293..9f1c7de115 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -117,6 +117,7 @@ class Network(base_layer.Layer): self._inbound_nodes = [] def _init_graph_network(self, inputs, outputs, name=None): + self._uses_inputs_arg = True # Normalize and set self.inputs, self.outputs. if isinstance(inputs, (list, tuple)): self.inputs = list(inputs) # Tensor or list of tensors. @@ -274,11 +275,15 @@ class Network(base_layer.Layer): def _init_subclassed_network(self, name=None): self._base_init(name=name) self._is_graph_network = False - if 'training' in tf_inspect.getargspec(self.call).args: + call_args = tf_inspect.getargspec(self.call).args + if 'training' in call_args: self._expects_training_arg = True else: self._expects_training_arg = False - + if 'inputs' in call_args: + self._uses_inputs_arg = True + else: + self._uses_inputs_arg = False self.outputs = None self.inputs = None self.built = False diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 08288d353e..971245c162 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -874,6 +874,11 @@ class Model(Network): whether to build the model's graph in inference mode (False), training mode (True), or using the Keras learning phase (None). """ + if not getattr(self, '_uses_inputs_arg', True): + raise NotImplementedError( + 'Subclassed Models without "inputs" in their call() signatures do ' + 'not yet support shape inference. File a feature request if this ' + 'limitation bothers you.') if self.__class__.__name__ == 'Sequential': # Note: we can't test whether the model is `Sequential` via `isinstance` # since `Sequential` depends on `Model`. diff --git a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py index 58b144365b..4445900330 100644 --- a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py +++ b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py @@ -22,7 +22,9 @@ import os import tempfile import numpy as np +import six +from tensorflow.python.eager import context from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.keras._impl import keras @@ -36,6 +38,7 @@ except ImportError: h5py = None +# pylint: disable=not-callable class SimpleTestModel(keras.Model): def __init__(self, use_bn=False, use_dp=False, num_classes=10): @@ -104,7 +107,7 @@ class NestedTestModel1(keras.Model): def call(self, inputs): x = self.dense1(inputs) x = self.bn(x) - x = self.test_net(x) # pylint: disable=not-callable + x = self.test_net(x) return self.dense2(x) @@ -161,7 +164,7 @@ def get_nested_model_3(input_dim, num_classes): return tensor_shape.TensorShape((input_shape[0], 5)) test_model = Inner() - x = test_model(x) # pylint: disable=not-callable + x = test_model(x) outputs = keras.layers.Dense(num_classes)(x) return keras.Model(inputs, outputs, name='nested_model_3') @@ -574,5 +577,128 @@ class ModelSubclassingTest(test.TestCase): self.assertGreater(loss, 0.1) +class CustomCallModel(keras.Model): + + def __init__(self): + super(CustomCallModel, self).__init__() + self.dense1 = keras.layers.Dense(1, activation='relu') + self.dense2 = keras.layers.Dense(1, activation='softmax') + + def call(self, first, second, fiddle_with_output='no', training=True): + combined = self.dense1(first) + self.dense2(second) + if fiddle_with_output == 'yes': + return 10. * combined + else: + return combined + + +class CustomCallSignatureTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def test_no_inputs_in_signature(self): + model = CustomCallModel() + first = array_ops.ones([2, 3]) + second = array_ops.ones([2, 5]) + output = model(first, second) + self.evaluate([v.initializer for v in model.variables]) + expected_output = self.evaluate(model.dense1(first) + model.dense2(second)) + self.assertAllClose(expected_output, self.evaluate(output)) + output = model(first, second, fiddle_with_output='yes') + self.assertAllClose(10. * expected_output, self.evaluate(output)) + output = model(first, second=second, training=False) + self.assertAllClose(expected_output, self.evaluate(output)) + if not context.executing_eagerly(): + six.assertCountEqual(self, [first, second], model.inputs) + with self.assertRaises(TypeError): + # tf.layers.Layer expects an "inputs" argument, so all-keywords doesn't + # work at the moment. + model(first=first, second=second, fiddle_with_output='yes') + + @test_util.run_in_graph_and_eager_modes() + def test_inputs_in_signature(self): + + class HasInputsAndOtherPositional(keras.Model): + + def call(self, inputs, some_other_arg, training=False): + return inputs + + model = HasInputsAndOtherPositional() + with self.assertRaisesRegexp( + TypeError, 'everything else as a keyword argument'): + model(array_ops.ones([]), array_ops.ones([])) + + @test_util.run_in_graph_and_eager_modes() + def test_kwargs_in_signature(self): + + class HasKwargs(keras.Model): + + def call(self, x, y=3, **key_words): + return x + + model = HasKwargs() + arg = array_ops.ones([]) + model(arg, a=3) + if not context.executing_eagerly(): + six.assertCountEqual(self, [arg], model.inputs) + + @test_util.run_in_graph_and_eager_modes() + def test_args_in_signature(self): + + class HasArgs(keras.Model): + + def call(self, x, *args, **kwargs): + return [x] + list(args) + + model = HasArgs() + arg1 = array_ops.ones([]) + arg2 = array_ops.ones([]) + arg3 = array_ops.ones([]) + model(arg1, arg2, arg3, a=3) + if not context.executing_eagerly(): + six.assertCountEqual(self, [arg1, arg2, arg3], model.inputs) + + def test_args_and_keywords_in_signature(self): + + class HasArgs(keras.Model): + + def call(self, x, training=True, *args, **kwargs): + return x + + with context.graph_mode(): + model = HasArgs() + arg1 = array_ops.ones([]) + arg2 = array_ops.ones([]) + arg3 = array_ops.ones([]) + with self.assertRaisesRegexp(TypeError, 'args and arguments with'): + model(arg1, arg2, arg3, a=3) + + def test_training_no_default(self): + + class TrainingNoDefault(keras.Model): + + def call(self, x, training): + return x + + with context.graph_mode(): + model = TrainingNoDefault() + arg = array_ops.ones([]) + model(arg, True) + six.assertCountEqual(self, [arg], model.inputs) + + def test_training_no_default_with_positional(self): + + class TrainingNoDefaultWithPositional(keras.Model): + + def call(self, x, training, positional): + return x + + with context.graph_mode(): + model = TrainingNoDefaultWithPositional() + arg1 = array_ops.ones([]) + arg2 = array_ops.ones([]) + arg3 = array_ops.ones([]) + with self.assertRaisesRegexp(TypeError, 'after a non-input'): + model(arg1, arg2, arg3) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 1e5f26a77f..242cdff6f3 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -625,6 +625,8 @@ class Layer(checkpointable.CheckpointableBase): input_list = nest.flatten(inputs) build_graph = not context.executing_eagerly() + # TODO(fchollet, allenl): Make deferred mode work with subclassed Models + # which don't use an "inputs" argument. in_deferred_mode = isinstance(input_list[0], _DeferredTensor) # Ensure the Layer, if being reused, is working with inputs from # the same graph as where it was created. diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py index f8fb6ecb0c..8a2b24aa4e 100644 --- a/third_party/examples/eager/spinn/spinn.py +++ b/third_party/examples/eager/spinn/spinn.py @@ -266,8 +266,7 @@ class SPINN(tf.keras.Model): trackings.append(tracking) if rights: - reducer_output = self.reducer( - lefts, right_in=rights, tracking=trackings) + reducer_output = self.reducer(lefts, rights, trackings) reduced = iter(reducer_output) for transition, stack in zip(trans, stacks): @@ -388,10 +387,10 @@ class SNLIClassifier(tf.keras.Model): # Run the batch-normalized and dropout-processed word vectors through the # SPINN encoder. - premise = self.encoder( - premise_embed, transitions=premise_transition, training=training) - hypothesis = self.encoder( - hypothesis_embed, transitions=hypothesis_transition, training=training) + premise = self.encoder(premise_embed, premise_transition, + training=training) + hypothesis = self.encoder(hypothesis_embed, hypothesis_transition, + training=training) # Combine encoder outputs for premises and hypotheses into logits. # Then apply batch normalization and dropuout on the logits. @@ -465,11 +464,10 @@ class SNLIClassifierTrainer(tfe.Checkpointable): """ with tfe.GradientTape() as tape: tape.watch(self._model.variables) - # TODO(allenl): Allow passing Layer inputs as position arguments. logits = self._model(premise, - premise_transition=premise_transition, - hypothesis=hypothesis, - hypothesis_transition=hypothesis_transition, + premise_transition, + hypothesis, + hypothesis_transition, training=True) loss = self.loss(labels, logits) gradients = tape.gradient(loss, self._model.variables) @@ -533,9 +531,7 @@ def _evaluate_on_dataset(snli_data, batch_size, trainer, use_gpu): snli_data, batch_size): if use_gpu: label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu() - logits = trainer.model( - prem, premise_transition=prem_trans, hypothesis=hypo, - hypothesis_transition=hypo_trans, training=False) + logits = trainer.model(prem, prem_trans, hypo, hypo_trans, training=False) loss_val = trainer.loss(label, logits) batch_size = tf.shape(label)[0] mean_loss(loss_val, weights=batch_size.gpu() if use_gpu else batch_size) @@ -639,11 +635,8 @@ def train_or_infer_spinn(embed, hypo, hypo_trans = inference_sentence_pair[1] hypo_trans = inference_sentence_pair[1][1] inference_logits = model( - tf.constant(prem), - premise_transition=tf.constant(prem_trans), - hypothesis=tf.constant(hypo), - hypothesis_transition=tf.constant(hypo_trans), - training=False) + tf.constant(prem), tf.constant(prem_trans), + tf.constant(hypo), tf.constant(hypo_trans), training=False) inference_logits = inference_logits[0][1:] max_index = tf.argmax(inference_logits) print("\nInference logits:") -- GitLab From 23d6c55ba22596f903696e0dba8037ad81470a39 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 10:03:37 -0700 Subject: [PATCH 606/960] Enable the Grappler arithmetic optimizer by default in Python tests. PiperOrigin-RevId: 190787954 --- tensorflow/python/framework/test_util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 43106b6e59..4192a27f65 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -968,8 +968,6 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) - config.graph_options.rewrite_options.arithmetic_optimization = ( - rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: -- GitLab From 2d2d4529fe3b99f25e531ee2fc0e9211341c160f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 10:15:42 -0700 Subject: [PATCH 607/960] Avoid overwriting existing namespace items that might replace the converted functions. PiperOrigin-RevId: 190789781 --- tensorflow/contrib/autograph/impl/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/autograph/impl/api.py b/tensorflow/contrib/autograph/impl/api.py index 1c4fcaa622..dce994e50d 100644 --- a/tensorflow/contrib/autograph/impl/api.py +++ b/tensorflow/contrib/autograph/impl/api.py @@ -247,7 +247,10 @@ def to_graph(e, # The compiled code should see everything the entry function saw. # TODO(mdan): This might not work well if the call tree spans modules? if tf_inspect.isfunction(e): - compiled_node.__dict__.update(inspect_utils.getnamespace(e)) + for key, val in inspect_utils.getnamespace(e).items(): + # Avoid overwriting entities that have been transformed. + if key not in compiled_node.__dict__: + compiled_node.__dict__[key] = val compiled_fn = getattr(compiled_node, name) if verbose: -- GitLab From 1fe68ce0f4f7ef020cc52d1cc9963dd344fccba0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 10:15:46 -0700 Subject: [PATCH 608/960] internal change PiperOrigin-RevId: 190789794 --- tensorflow/core/framework/resource_mgr.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h index 9a458431e7..c84ea3b034 100644 --- a/tensorflow/core/framework/resource_mgr.h +++ b/tensorflow/core/framework/resource_mgr.h @@ -319,14 +319,13 @@ class IsResourceInitialized : public OpKernel { // specified type. The type will be a part of the generated op name. // TODO(apassos): figure out how to get non-cpu-allocated tensors to work // through constant folding so this doesn't have to be marked as stateful. -#define REGISTER_RESOURCE_HANDLE_OP(Type) \ - REGISTER_OP(#Type "HandleOp") \ - .Attr("container: string = ''") \ - .Attr("shared_name: string = ''") \ - .Output("resource: resource") \ - .SetIsStateful() \ - .SetShapeFn(tensorflow::shape_inference::ScalarShape) \ - .Doc("Creates a handle to a " #Type) +#define REGISTER_RESOURCE_HANDLE_OP(Type) \ + REGISTER_OP(#Type "HandleOp") \ + .Attr("container: string = ''") \ + .Attr("shared_name: string = ''") \ + .Output("resource: resource") \ + .SetIsStateful() \ + .SetShapeFn(tensorflow::shape_inference::ScalarShape) // Utility op kernel to produce a handle to a resource of type T. template -- GitLab From c3603d0a0d9a8390fd9e9423cd661717d2702bfd Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 28 Mar 2018 10:20:49 -0700 Subject: [PATCH 609/960] Trying to fix libtensorflow GPU build. (#18056) CUDNN path error. Invalid path to cuDNN 7 toolkit. None of the following files can be found: C:/tools/cuda\lib/x64/cudnn.lib C:/tools/cuda\lib/x64/cudnn.lib --- tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 7b2d7e1a56..d654b433e7 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -120,7 +120,9 @@ function run_configure_for_gpu_build { export TF_CUDA_VERSION=9.0 export CUDA_TOOLKIT_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0" export TF_CUDNN_VERSION=7.0 - export CUDNN_INSTALL_PATH="C:/tools/cuda" + if [ -z "$CUDNN_INSTALL_PATH" ]; then + export CUDNN_INSTALL_PATH="C:/tools/cuda" + fi export TF_CUDA_COMPUTE_CAPABILITIES="3.7" if [ -z "$TF_ENABLE_XLA" ]; then export TF_ENABLE_XLA=0 -- GitLab From b1f6c4e0dee8732d8e25262052656e9cdf5ca513 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 28 Mar 2018 10:25:27 -0700 Subject: [PATCH 610/960] Properly serialize ResourceVariable global_step into the metagraph. Prior to this, saving and restoring a graph with a resource variable global_step would cause the global_step collection of the reimported graph to contain a resource tensor (the object underlying the ResourceVariable); the actual metadata associated with it would be serialized. PiperOrigin-RevId: 190791443 --- tensorflow/python/ops/resource_variable_ops.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index df873da98e..2f39ea2e7d 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -1087,6 +1087,11 @@ ops.register_proto_function( proto_type=variable_pb2.VariableDef, to_proto=_to_proto_fn, from_proto=_from_proto_fn) +ops.register_proto_function( + ops.GraphKeys.GLOBAL_STEP, + proto_type=variable_pb2.VariableDef, + to_proto=_to_proto_fn, + from_proto=_from_proto_fn) def is_resource_variable(var): -- GitLab From 0aaa61ab332611e9dcfd3d1cc25115a8972bd5fd Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Mar 2018 10:55:00 -0700 Subject: [PATCH 611/960] Update version strings to 1.7 --- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 14 ++++++------ tensorflow/tools/pip_package/setup.py | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 40eebd1db0..706968d347 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 9059b3f3b6..a3eca4bf37 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 2e47a6d212..1a0956634d 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index eff066d200..cdde45a6f4 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.7.0-rc1 + 1.7.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.7.0-rc1 + 1.7.0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.7.0-rc1 + 1.7.0 org.tensorflow libtensorflow_jni_gpu - 1.7.0-rc1 + 1.7.0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -

javac -cp libtensorflow-1.7.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.7.0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.7.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.7.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.7.0-rc1.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.7.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 3c5db9bced..fdf9bf81e7 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -199,7 +199,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -304,7 +304,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -490,7 +490,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl
## Validate your installation @@ -657,14 +657,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -676,14 +676,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -695,14 +695,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp35-cp35m-linux_x86_64.whl
 
@@ -714,14 +714,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index fa6951a8f1..6f55e6a650 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py2-none-any.whl @@ -523,7 +523,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py2-none-any.whl
 
@@ -531,5 +531,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 0454c172f8..73446663e9 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.7.0rc1 on Linux: +for TensorFlow 1.7.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0-py2-none-any.whl
 
## Validate your installation @@ -459,8 +459,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - - + + @@ -480,7 +480,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.7.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
- + @@ -495,8 +495,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.7.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
- - + + diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 8b83257887..a486631621 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.7.0-rc1' +_VERSION = '1.7.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 95efef3271d67dd63ec2e397012a20d63d088668 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 11:18:45 -0700 Subject: [PATCH 612/960] Make ArithmeticOptimizer robust to failures of shape inference and individual stages. Get rid of graph annotation and use GraphProperties directly. PiperOrigin-RevId: 190801044 --- .../optimizers/arithmetic_optimizer.cc | 49 +++++++++++-------- .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/graph_optimizer_stage.cc | 4 ++ .../optimizers/graph_optimizer_stage.h | 3 ++ 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 5dd0b6f4b0..629872bf19 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -196,8 +196,6 @@ void SetSourceDataType(DataType dtype, NodeDef* node) { bool IsNumberType(DataType dtype) { return kNumberTypes.Contains(dtype); } -const char kOutputShapesAttr[] = "_output_shapes"; - // Shape is symbolically defined if it has a known rank, and each dimension is // defined, or is an unknown symbol (dim.size <= -2). bool ShapeIsSymbolicallyDefined(const TensorShapeProto& shape) { @@ -234,16 +232,19 @@ bool ShapesSymbolicallyEqual(const OpInfo::TensorProperties& left, // Returns whether `reshape` is an identity op. The tensor that `reshape` // reshapes is the `output_pos`-th output of node `input`. bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, - const int output_pos) { - if (!reshape.attr().count(kOutputShapesAttr) || - !input.attr().count(kOutputShapesAttr)) { + const int output_pos, + const GraphProperties& graph_properties) { + const std::vector& reshape_props = + graph_properties.GetOutputProperties(reshape.name()); + const std::vector& input_props = + graph_properties.GetOutputProperties(input.name()); + if (reshape_props.empty() || input_props.empty() || + input_props.size() <= output_pos) { return false; } - PartialTensorShape src_shape( - input.attr().at(kOutputShapesAttr).list().shape(output_pos)); - PartialTensorShape dst_shape( - reshape.attr().at(kOutputShapesAttr).list().shape(0)); + const PartialTensorShape& src_shape = input_props[output_pos].shape(); + const PartialTensorShape& dst_shape = reshape_props[0].shape(); if (src_shape.unknown_rank() || dst_shape.unknown_rank()) { return false; } @@ -1272,7 +1273,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // outputs tensors of shape [M, N] while feeding it with tensors of shape // [M*N] (or worse). The reshape nodes are then necessary to update the // tensor metadata to the required shape. - if (ReshapeIsIdentity(*reshape, *input, output_pos)) { + if (can_use_shapes_ && + ReshapeIsIdentity(*reshape, *input, output_pos, *graph_properties_)) { return reshape->input(0); } } @@ -1586,11 +1588,11 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { std::vector> stages; - if (options_.combine_add_to_addn) { + if (options_.combine_add_to_addn && can_use_shapes_) { stages.push_back(std::unique_ptr( new AddOpsRewriteStage(ctx, ctx_ext))); } - if (options_.hoist_common_factor_out_of_aggregation) { + if (options_.hoist_common_factor_out_of_aggregation && can_use_shapes_) { stages.push_back(std::unique_ptr( new HoistCommonFactorOutOfAggregation(ctx, ctx_ext))); } @@ -1627,7 +1629,15 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { if (simplified_tensor.empty()) { for (auto& stage : stages) { if (stage->IsSupported(node)) { - TF_RETURN_IF_ERROR(stage->TrySimplify(node, &simplified_tensor)); + const Status stage_status = + stage->TrySimplify(node, &simplified_tensor); + // Each stage must be "error safe" (just like exception safe). In + // case of any error it must leave optimized graph unmodified. + if (!stage_status.ok()) { + LOG(WARNING) << "Failed to run arithmetic optimizer stage " + << stage->stage_name() + << ". Error: " << stage_status.error_message(); + } if (!simplified_tensor.empty()) { break; } @@ -1694,19 +1704,16 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, &frame_map_, &num_frames)); // Shapes are only needed in aggressive mode. graph_properties_.reset(new GraphProperties(item)); - TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); - // TODO(ezhulenev): Use GraphProperties to lookup tensor shapes directly - TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_)); + const Status status = graph_properties_->InferStatically(false); + can_use_shapes_ = status.ok(); + if (!can_use_shapes_) { + LOG(WARNING) << "Shape inference failed."; + } // Perform the optimizations. DedupComputations(); TF_RETURN_IF_ERROR(SimplifyArithmeticOps()); - // Clear output shapes. - for (int i = 0; i < optimized_graph->node_size(); ++i) { - optimized_graph_->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr); - } - return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 965f0e9ea2..cdeed0554e 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -126,6 +126,7 @@ class ArithmeticOptimizer : public GraphOptimizer { RewriterConfig::Toggle opt_level_; ArithmeticOptimizerOptions options_; + bool can_use_shapes_ = false; bool fetch_nodes_known_ = false; std::unordered_set nodes_to_preserve_; std::unique_ptr node_map_; diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc index 7044705ade..1ea57f7b4f 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc @@ -42,6 +42,10 @@ Status GetInputNode(const GraphOptimizerContext& ctx, const string& input, Status GetTensorProperties(const GraphOptimizerContext& ctx, const string& tensor, OpInfo::TensorProperties* properties) { + if (ctx.graph_properties == nullptr) { + return errors::InvalidArgument("Graph properties are unknown."); + } + int port; string tensor_node_name = ParseNodeName(tensor, &port); if (port < 0) { diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h index be95c00d2d..c7af82abbb 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h @@ -117,6 +117,9 @@ class GraphOptimizerStage { : optimizer_name_(optimizer_name), stage_name_(stage_name), ctx_(ctx) {} virtual ~GraphOptimizerStage() = default; + const string& stage_name() const { return stage_name_; } + const string& optimizer_name() const { return optimizer_name_; } + // Check if we should try to simplify node. Returning true doesn't // guarantee that node will be simplified. // -- GitLab From 71b917851b8fcd36481306d225fa478e9e6f7b83 Mon Sep 17 00:00:00 2001 From: Chris Ying Date: Wed, 28 Mar 2018 11:22:28 -0700 Subject: [PATCH 613/960] Fix TPUClusterResolver tpu parameter for profiler tool. PiperOrigin-RevId: 190801968 --- .../contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py index a730d6142d..0b78cf8695 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py @@ -76,7 +76,7 @@ def main(unused_argv=None): else: tpu_cluster_resolver = ( tf.contrib.cluster_resolver.TPUClusterResolver( - tpu_names=[FLAGS.tpu_name], + [FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)) service_addr = tpu_cluster_resolver.get_master() -- GitLab From b8384bbe0325c5b1c20838f9e6fd494e78e299dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 11:24:01 -0700 Subject: [PATCH 614/960] Updating tests in constant_folding_test.cc so that it evaluates the optimized and original graph and checks whether the output tensors produced by them are the same. PiperOrigin-RevId: 190802264 --- .../optimizers/constant_folding_test.cc | 62 +++++++++++-------- .../core/grappler/utils/grappler_test.cc | 5 ++ .../core/grappler/utils/grappler_test.h | 3 + 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 85f877883c..e0ff9b17b1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -107,8 +107,8 @@ TEST_F(ConstantFoldingTest, SimpleFolding) { EXPECT_EQ("Const", node_d.op()); std::vector fetch = {"d"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); test::ExpectTensorEqual(tensors_expected[0], tensors[0]); @@ -193,10 +193,10 @@ TEST_F(ConstantFoldingTest, AddTree) { // Check that the result nodes have the expected value. std::vector fetch = {"c3", "c20"}; - auto tensor_expected = EvaluateNodes(item.graph, fetch, {}); + auto tensor_expected = EvaluateNodes(item.graph, fetch); EXPECT_EQ(fetch.size(), tensor_expected.size()); fetch = {"add_child", "mul_child"}; - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(fetch.size(), tensors.size()); for (int i = 0; i < fetch.size(); i++) { test::ExpectTensorEqual(tensor_expected[i], tensors[i]); @@ -436,10 +436,10 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { // Check that the reciprocals have the expected value. std::vector fetch = {"cf_half"}; - auto tensor_expected = EvaluateNodes(item.graph, fetch, {}); + auto tensor_expected = EvaluateNodes(item.graph, fetch); EXPECT_EQ(fetch.size(), tensor_expected.size()); fetch = {"ConstantFolding/div_f_recip", "ConstantFolding/realdiv_recip"}; - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(fetch.size(), tensors.size()); for (int i = 0; i < fetch.size(); i++) { test::ExpectTensorEqual(tensor_expected[0], tensors[i]); @@ -647,8 +647,8 @@ TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { EXPECT_EQ("Const", new_d.op()); std::vector fetch = {"e", "f"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(fetch.size(), tensors_expected.size()); EXPECT_EQ(fetch.size(), tensors.size()); for (int i = 0; i < fetch.size(); i++) { @@ -671,7 +671,7 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { GrapplerItem item; item.fetch.push_back("e"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {}); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; @@ -688,8 +688,8 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { if (node.name() == "e") { EXPECT_EQ("Const", node.op()); ++found; - auto folded = EvaluateNodes(output, {"e"}, {}); - auto expected = EvaluateNodes(item.graph, {"e"}, {}); + auto folded = EvaluateNodes(output, {"e"}); + auto expected = EvaluateNodes(item.graph, {"e"}); EXPECT_EQ(1, expected.size()); EXPECT_EQ(1, folded.size()); test::ExpectTensorEqual(folded[0], expected[0]); @@ -699,7 +699,7 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { } } EXPECT_EQ(1, found); - auto tensors = EvaluateNodes(output, item.fetch, {}); + auto tensors = EvaluateNodes(output, item.fetch); EXPECT_EQ(1, tensors.size()); test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } @@ -735,8 +735,8 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { if (node.name() == "i1") { EXPECT_EQ("Const", node.op()); ++found; - auto folded = EvaluateNodes(output, {"i1"}, {}); - auto expected = EvaluateNodes(item.graph, {"i1"}, {}); + auto folded = EvaluateNodes(output, {"i1"}); + auto expected = EvaluateNodes(item.graph, {"i1"}); EXPECT_EQ(1, expected.size()); EXPECT_EQ(1, folded.size()); test::ExpectTensorEqual(folded[0], expected[0]); @@ -746,8 +746,8 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { if (node.name() == "i2") { EXPECT_EQ("Const", node.op()); ++found; - auto folded = EvaluateNodes(output, {"i2"}, {}); - auto expected = EvaluateNodes(item.graph, {"i2"}, {}); + auto folded = EvaluateNodes(output, {"i2"}); + auto expected = EvaluateNodes(item.graph, {"i2"}); EXPECT_EQ(1, expected.size()); EXPECT_EQ(1, folded.size()); test::ExpectTensorEqual(folded[0], expected[0]); @@ -775,7 +775,8 @@ TEST_F(ConstantFoldingTest, ControlDependenciesDeduplicate) { GrapplerItem item; item.fetch.push_back("i2"); TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -794,6 +795,9 @@ TEST_F(ConstantFoldingTest, ControlDependenciesDeduplicate) { EXPECT_EQ("^p2", node.input(1)); } } + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { @@ -865,8 +869,8 @@ TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) { } EXPECT_EQ(8, constant_folded); - auto expected = EvaluateNodes(item.graph, outputs, {}); - auto optimized = EvaluateNodes(output, outputs, {}); + auto expected = EvaluateNodes(item.graph, outputs); + auto optimized = EvaluateNodes(output, outputs); ASSERT_EQ(expected.size(), optimized.size()); for (int i = 0; i < expected.size(); ++i) { test::ExpectTensorEqual(expected[i], optimized[i]); @@ -1293,7 +1297,7 @@ TEST_F(ConstantFoldingTest, MergeNodes) { EXPECT_EQ(6, found_nodes); std::vector fetch = {"out1", "idx1"}; - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(2, tensors.size()); const Tensor& out_value = tensors[0]; EXPECT_EQ(3 * 5, out_value.NumElements()); @@ -1803,6 +1807,12 @@ TEST_F(ConstantFoldingTest, LargeConstant) { EXPECT_EQ(2, found); EXPECT_GT(1024 * 1024, output.ByteSizeLong()); + + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + EXPECT_EQ(1, tensors_expected.size()); + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } TEST_F(ConstantFoldingTest, SwitchIdenticalInputs) { @@ -1948,8 +1958,8 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } std::vector fetch = {"acc0"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); @@ -1983,7 +1993,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { item.fetch = {"concat0", "concat1", "concat2", "concat3", "concat4", "concat5", "concat6", "concat7", "concat8", "concat9"}; - auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}, {}); + auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}); EXPECT_EQ(1, tensors_expected.size()); ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; @@ -2034,7 +2044,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) { } } - auto tensors = EvaluateNodes(output, {"concat0"}, {}); + auto tensors = EvaluateNodes(output, {"concat0"}); EXPECT_EQ(1, tensors.size()); test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); } @@ -2132,8 +2142,8 @@ TEST_F(ConstantFoldingTest, TrivialPack) { } std::vector fetch = {"stack"}; - auto tensors_expected = EvaluateNodes(item.graph, fetch, {}); - auto tensors = EvaluateNodes(output, fetch, {}); + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); EXPECT_EQ(1, tensors_expected.size()); EXPECT_EQ(1, tensors.size()); EXPECT_EQ(tensors_expected[0].shape(), tensors[0].shape()); diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index 5c96359867..910b0acaef 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -39,6 +39,11 @@ GrapplerTest::GrapplerTest() { cfg->set_debug_stripper(RewriterConfig::OFF); } +std::vector GrapplerTest::EvaluateNodes( + const GraphDef& graph, const std::vector& node_names) const { + return EvaluateNodes(graph, node_names, {}); +} + std::vector GrapplerTest::EvaluateNodes( const GraphDef& graph, const std::vector& node_names, const std::vector>& inputs) const { diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 4b160e7f16..3bc7bea454 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -34,6 +34,9 @@ class GrapplerTest : public ::testing::Test { GrapplerTest(); protected: + std::vector EvaluateNodes( + const GraphDef& graph, const std::vector& node_names) const; + std::vector EvaluateNodes( const GraphDef& graph, const std::vector& node_names, const std::vector>& inputs) const; -- GitLab From cc9944abe196827bae38975d813ee3e428349dcb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 11:34:32 -0700 Subject: [PATCH 615/960] In contrib/all_reduce raise a ValueError if the input tensors do not have fully-defined shapes. PiperOrigin-RevId: 190804146 --- tensorflow/contrib/all_reduce/python/all_reduce.py | 7 +++---- tensorflow/contrib/all_reduce/python/all_reduce_test.py | 6 ++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/all_reduce/python/all_reduce.py b/tensorflow/contrib/all_reduce/python/all_reduce.py index 6658f0d9c1..8add2aacff 100644 --- a/tensorflow/contrib/all_reduce/python/all_reduce.py +++ b/tensorflow/contrib/all_reduce/python/all_reduce.py @@ -38,16 +38,15 @@ def _flatten_tensors(tensors): shape: the original shape of each element of input tensors Raises: - ValueError: tensors are empty or non-isomorphic. + ValueError: tensors are empty or non-isomorphic or have unknown shape. """ if not tensors: raise ValueError("tensors cannot be empty") shape = tensors[0].shape for tensor in tensors: shape = shape.merge_with(tensor.shape) - if shape.ndims is None: - raise ValueError("At least one of the tensors in 'tensors' must have " - "statically known rank.") + if not shape.is_fully_defined(): + raise ValueError("Tensors must have statically known shape.") if len(shape) != 1: reshaped = [] for t in tensors: diff --git a/tensorflow/contrib/all_reduce/python/all_reduce_test.py b/tensorflow/contrib/all_reduce/python/all_reduce_test.py index 47bab0a367..b3f5d92259 100644 --- a/tensorflow/contrib/all_reduce/python/all_reduce_test.py +++ b/tensorflow/contrib/all_reduce/python/all_reduce_test.py @@ -36,6 +36,12 @@ from tensorflow.python.platform import tf_logging class AllReduceTest(test_util.TensorFlowTestCase): + def testFlattenTensorsShapesDefined(self): + x = array_ops.placeholder(types_pb2.DT_FLOAT, [None]) + with self.assertRaisesRegexp(ValueError, + "must have statically known shape"): + ar._flatten_tensors([x, x]) + def testRingPermutations(self): # 0 devices pred_by_c_d, rank_by_c_d = ar._ring_permutations(1, 0, []) -- GitLab From 70a51319f1d6e42f0d5eadbf65e941419974aac4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 11:48:28 -0700 Subject: [PATCH 616/960] Fixes to DepthwiseConv kernel PiperOrigin-RevId: 190806668 --- .../internal/optimized/depthwiseconv_uint8.h | 11 +++-- .../depthwiseconv_uint8_3x3_filter.h | 43 ++++++++++++++++++- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index c71b070680..0f78e0f728 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1694,12 +1694,11 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); #ifdef __aarch64__ - // Call kernel optimized for depthwise convolutions using 3x3 filters, - // stride = 1, no padding, depth_multiplier = 1 and depth a multiple of 16. - if (filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && - (stride_width == 1 || stride_width == 2) && - (stride_height == 1 || stride_height == 2) && pad_width == 0 && - pad_height == 0 && (input_depth % 16) == 0) { + // Call kernel optimized for depthwise convolutions using 3x3 filters if + // parameters are supported. + if (Fast3by3FilterKernelSupported(input_dims, filter_dims, stride_width, + stride_height, pad_width, pad_height, + depth_multiplier, output_dims)) { DepthwiseConv3by3FilterDepth16( input_data, input_dims, input_offset, filter_data, filter_dims, filter_offset, bias_data, bias_dims, stride_width, stride_height, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 9dc76e7608..a349892076 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -440,6 +440,47 @@ struct ConvKernel3x3FilterDepth16<1, 1> { } }; +inline bool Fast3by3FilterKernelSupported(const Dims<4>& input_dims, + const Dims<4>& filter_dims, + int stride_width, int stride_height, + int pad_width, int pad_height, + int depth_multiplier, + const Dims<4>& output_dims) { + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = ArraySize(input_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + + bool supported = filter_width == 3 && filter_height == 3 && + depth_multiplier == 1 && + (stride_width == 1 || stride_width == 2) && + (stride_height == 1 || stride_height == 2) && + pad_width == 0 && pad_height == 0 && (input_depth % 16) == 0; + + if (!supported) { + return false; + } + + // Handle case where padding is zero but type is not kValid. This would + // require special boundary case handling that is not supported yet. + + const int out_x = output_width - 1; + const int out_y = output_height - 1; + + const int in_x_origin = (out_x * stride_width) - pad_width; + const int in_y_origin = (out_y * stride_height) - pad_height; + + const int in_x_end = in_x_origin + filter_width; + const int in_y_end = in_y_origin + filter_height; + + // Supported only if filter on the right and bottom boundary lies completely + // within the input. + return in_x_end <= input_width && in_y_end <= input_height; +} + inline void DepthwiseConv3by3FilterDepth16( const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, @@ -634,7 +675,7 @@ inline void DepthwiseConv3by3FilterDepth16( // Handle the rest of the right side. for (; out_x < output_width; out_x++) { // This code path can only be reached if we're handling >1 x outputs - // at a time or support padding. + // at a time or support kSame padding. } } -- GitLab From f9dbf697535f8262d2513ade20ada85431c323f3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 12:00:18 -0700 Subject: [PATCH 617/960] Reorder element wise operators across reshape operators. This allows batch-norm folding to work across reshape operators. PiperOrigin-RevId: 190808678 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + .../swap_elementwise_binary.cc | 175 ++++++++++++++++++ .../toco/graph_transformations/tests/BUILD | 11 ++ .../tests/swap_elementwise_binary_test.cc | 89 +++++++++ tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + 6 files changed, 278 insertions(+) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 051fa8de3c..8ed3e0e14e 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -280,6 +280,7 @@ cc_library( "graph_transformations/resolve_tensorflow_switch.cc", "graph_transformations/resolve_tensorflow_tile.cc", "graph_transformations/resolve_transpose_attributes.cc", + "graph_transformations/swap_elementwise_binary.cc", "graph_transformations/unfuse_activation_functions.cc", "graph_transformations/unpartition_embedding_lookup.cc", "graph_transformations/unroll_batch_matmul.cc", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 640afc7c74..1291825c8e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -180,6 +180,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantGather) DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) +DECLARE_GRAPH_TRANSFORMATION(SwapElementwiseBinary) DECLARE_GRAPH_TRANSFORMATION(Dequantize) DECLARE_GRAPH_TRANSFORMATION(UnpartitionEmbeddingLookup) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc new file mode 100644 index 0000000000..ecbce58d16 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc @@ -0,0 +1,175 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +bool ShapesAllowSwapping(const string& input_array_name, + const string& const_array_name, Model* model) { + const Array& input_array = model->GetOrCreateArray(input_array_name); + const Array& const_array = model->GetOrCreateArray(const_array_name); + // Wait until these shapes have been resolved. + if (!input_array.has_shape() || !const_array.has_shape()) { + return false; + } + + // Currently swapping is not handled for scalar const_array, though that could + // be done once there is a test model. + if (RequiredBufferSizeForShape(input_array.shape()) != + RequiredBufferSizeForShape(const_array.shape())) { + return false; + } + + return true; +} + +} // namespace + +// Swaps: +// Input +// \ +// (Reshape Op) Const +// \ / +// (Add/Sub/Mul/Div op) +// | +// Output +// +// To: +// +// Input Const +// \ / +// (Add/Sub/Mul/Div op) +// | +// (Reshape Op) +// | +// Output +// +// This can allow Add/Mul ops from batch normalization to be folded into an +// Input op from a FullyConnected layer. +bool SwapElementwiseBinary::Run(Model* model, std::size_t op_index) { + const auto element_wise_op_it = model->operators.begin() + op_index; + std::unique_ptr& element_wise_op = *element_wise_op_it; + DCHECK(element_wise_op); + + switch (element_wise_op->type) { + case OperatorType::kAdd: + case OperatorType::kSub: + case OperatorType::kMul: + case OperatorType::kDiv: + break; + default: + return false; + } + + int reshape_input = -1; + Operator* op = GetOpWithOutput(*model, element_wise_op->inputs[0]); + if (!op) { + return false; + } + + if (op->type == OperatorType::kTensorFlowReshape) { + reshape_input = 0; + } else { + op = GetOpWithOutput(*model, element_wise_op->inputs[1]); + if (!op || op->type != OperatorType::kTensorFlowReshape) { + return false; + } + reshape_input = 1; + } + + int const_input = (reshape_input == 0) ? 1 : 0; + const string& const_input_array = element_wise_op->inputs[const_input]; + if (!IsConstantParameterArray(*model, const_input_array)) { + return false; + } + + // Do not fold division if denominator is not constant. + if (element_wise_op->type != OperatorType::kDiv && const_input != 1) { + return false; + } + + const auto reshape_it = + FindOpWithOutput(*model, element_wise_op->inputs[reshape_input]); + // Note: we take copies of the tensor names here, instead of const-refs as we + // may overwrite the original names. + const string reshape_input_name = (*reshape_it)->inputs[0]; + const string intermediate_name = (*reshape_it)->outputs[0]; + const string element_wise_output_name = element_wise_op->outputs[0]; + + // Check the reshape op input and const op have their shapes resolved. + if (!ShapesAllowSwapping(reshape_input_name, const_input_array, model)) { + return false; + } + + int count_ops_consuming_output = CountOpsWithInput(*model, intermediate_name); + DCHECK_GE(count_ops_consuming_output, 1); + if (count_ops_consuming_output > 1) { + AddMessageF( + "Not exchanging element-wise function with %s because it is " + "consumed by more than 1 other operator", + LogName(**reshape_it)); + return false; + } + + // If the element_wise_op was originally producing an output_array we can't + // swap as otherwise the output array would change. It'd be nice to still be + // able to swap but if code is relying on the fetch names instead of array + // indices this won't work. + for (int i = 0; i < model->flags.output_arrays_size(); ++i) { + if (model->flags.output_arrays(i) == element_wise_op->outputs[0]) { + AddMessageF( + "Not exchanging activation function with %s to preserve output array " + "name %s", + LogName(**reshape_it), element_wise_op->outputs[0]); + return false; + } + } + + // Rewire by changing inputs, including all consumers. + // TODO(b/76086261): Replace with new utility function. + Operator* consumer = GetFirstOpWithInput(*model, element_wise_output_name); + while (consumer) { + for (int i = 0; i < consumer->inputs.size(); ++i) { + if (consumer->inputs[i] == element_wise_output_name) { + consumer->inputs[i] = intermediate_name; + } + } + consumer = GetFirstOpWithInput(*model, element_wise_output_name); + } + element_wise_op->inputs[reshape_input] = reshape_input_name; + (*reshape_it)->inputs[0] = element_wise_output_name; + + // Clear shapes; this will allow shape propagation to fix the sizes for us. + model->GetOrCreateArray(element_wise_output_name).clear_shape(); + + // Finally, swap operators. Note that this only works when there are no other + // direct descendents of the reshape operator. + element_wise_op.swap(*reshape_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD index 2f94f9cd8a..b975cc996b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -18,6 +18,17 @@ tf_cc_test( ], ) +tf_cc_test( + name = "swap_elementwise_binary_test", + srcs = ["swap_elementwise_binary_test.cc"], + deps = [ + "//tensorflow/contrib/lite/toco:graph_transformations", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "@com_google_googletest//:gtest_main", + ], +) + tf_cc_test( name = "lstm_utils_test", srcs = ["lstm_utils_test.cc"], diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc new file mode 100644 index 0000000000..c3778017f3 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc @@ -0,0 +1,89 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { + +int ShapeCount(const std::vector& size) { + CHECK(size.size()); + int count = 1; + for (int dim : size) { + count *= dim; + } + return count; +} + +// Adds a new parameter array to the model. +void AddConstArray(const string& name, const float* data, + const std::vector& size, Model* model) { + Array& array = model->GetOrCreateArray(name); + array.data_type = ArrayDataType::kFloat; + Shape* shape = array.mutable_shape(); + *(shape->mutable_dims()) = size; + + auto& buffer = array.GetMutableBuffer(); + buffer.data.resize(ShapeCount(size)); + std::copy(data, data + ShapeCount(size), buffer.data.data()); +} + +} // namespace + +TEST(SwapElementwiseBinaryTest, SwapsReshape) { + Model model; + const float parameters[2][4] = {{0., 1., 2., 3.}, {10., 11., 12., 13.}}; + + AddConstArray("before_reshape", parameters[0], {2, 2}, &model); + AddConstArray("add_vector", parameters[1], {1, 4}, &model); + + auto reshape_op = absl::make_unique(); + reshape_op->shape = {1, 4}; + reshape_op->inputs = {"before_reshape"}; + reshape_op->outputs = {"after_reshape"}; + Array& reshape_array = model.GetOrCreateArray("after_reshape"); + *(reshape_array.mutable_shape()) = {1, 4}; + + auto add_op = absl::make_unique(); + add_op->inputs = {"after_reshape", "add_vector"}; + add_op->outputs = {"add"}; + Array& add_array = model.GetOrCreateArray("add"); + *(add_array.mutable_shape()) = {1, 4}; + + model.operators.push_back(std::move(reshape_op)); + model.operators.push_back(std::move(add_op)); + + auto transformation = absl::make_unique(); + ASSERT_TRUE(transformation->Run(&model, 1)); + + Operator* op = GetOpWithOutput(model, "add"); + ASSERT_NE(nullptr, op); + ASSERT_EQ(OperatorType::kAdd, op->type); + ASSERT_EQ(2, op->inputs.size()); + for (const string& input : op->inputs) { + EXPECT_TRUE(IsConstantParameterArray(model, input)) + << input << " is not const input"; + } +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 30dd6fab9e..41ea1481bc 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -90,6 +90,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveTensorFlowTile); transformations->Add(new ResolveTensorFlowConcat); transformations->Add(new ResolveMultiplyByZero); + transformations->Add(new SwapElementwiseBinary); transformations->Add(new IdentifyDilatedConv); transformations->Add(new IdentifyL2Normalization); transformations->Add(new IdentifyL2Pool); -- GitLab From 195be47024f5608b284c52239d006b756cbad0d5 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 28 Mar 2018 12:06:33 -0700 Subject: [PATCH 618/960] Add some VLOGs to make it easier to see why things don't go through the fast path PiperOrigin-RevId: 190809906 --- tensorflow/python/eager/pywrap_tfe_src.cc | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 30ef6781ec..73482792d5 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1406,15 +1406,32 @@ bool CheckInputsOk(PyObject* seq, int start_index, if (!op_def.input_arg(i).number_attr().empty() || !op_def.input_arg(i).type_list_attr().empty()) { // This item should be a seq input. - if (!PySequence_Check(item)) return false; + if (!PySequence_Check(item)) { + VLOG(1) << "Falling back to slow path for Op \"" << op_def.name() + << "\", Input \"" << op_def.input_arg(i).name() + << "\" since we expected a sequence, but got " + << item->ob_type->tp_name; + return false; + } for (Py_ssize_t j = 0; j < PySequence_Fast_GET_SIZE(item); j++) { PyObject* inner_item = PySequence_Fast_GET_ITEM(item, j); if (!EagerTensor_CheckExact(inner_item) && !CheckResourceVariable(inner_item)) { + VLOG(1) + << "Falling back to slow path for Op \"" << op_def.name() + << "\", Input \"" << op_def.input_arg(i).name() << "\", Index " + << j + << " since we expected an EagerTensor/ResourceVariable, but got " + << inner_item->ob_type->tp_name; return false; } } } else if (!EagerTensor_CheckExact(item) && !CheckResourceVariable(item)) { + VLOG(1) + << "Falling back to slow path for Op \"" << op_def.name() + << "\", Input \"" << op_def.input_arg(i).name() + << "\" since we expected an EagerTensor/ResourceVariable, but got " + << item->ob_type->tp_name; return false; } } @@ -1894,6 +1911,9 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { py_attr_value, &attr_list_sizes, status); if (TF_GetCode(status) != TF_OK) { + VLOG(1) << "Falling back to slow path for Op \"" << op_def->name() + << "\" since we are unable to set the value for attr \"" + << attr.name() << "\" due to: " << TF_Message(status); RaiseFallbackException(TF_Message(status)); return nullptr; } -- GitLab From e0956b390aabaf8882dff600056db805f3fccbf6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 28 Mar 2018 12:16:51 -0700 Subject: [PATCH 619/960] Don't access properties in case they're not present PiperOrigin-RevId: 190811935 --- tensorflow/core/grappler/optimizers/constant_folding.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 22ede19493..c3f8a1ce22 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1534,6 +1534,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* optimized_graph, // Remove Shuffle or Reverse op over scalar values. if (use_shape_info && + !properties->GetInputProperties(node->name()).empty() && (IsShuffle(*node) || IsReverse(*node) || IsTranspose(*node))) { const auto& shape = properties->GetInputProperties(node->name())[0].shape(); -- GitLab From 4ec02c23174b07540d190cec620347ee6f31a8d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 12:18:24 -0700 Subject: [PATCH 620/960] [XLA] Redesign: add the rest of the XlaBuilder public methods. PiperOrigin-RevId: 190812260 --- .../xla/client/xla_client/xla_builder.cc | 107 +++++++++++++++++- .../xla/client/xla_client/xla_builder.h | 71 ++++++++++++ .../xla/client/xla_client/xla_computation.h | 2 + 3 files changed, 179 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 7d39701b10..1b94f9a4eb 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -128,6 +128,18 @@ StatusOr XlaBuilder::GetProgramShape() { return GetProgramShape(&root_id); } +XlaComputation XlaBuilder::BuildAndNoteError() { + DCHECK(parent_builder_ != nullptr); + auto build_status = Build(); + if (!build_status.ok()) { + parent_builder_->NoteError( + AddStatus(build_status.status(), + tensorflow::strings::StrCat("error from: ", name_))); + return {}; + } + return build_status.ConsumeValueOrDie(); +} + StatusOr XlaBuilder::Build() { if (!first_error_.ok()) { string backtrace; @@ -945,6 +957,99 @@ XlaOp XlaBuilder::Recv(const Shape& shape, const ChannelHandle& handle) { return UnimplementedOp(); } +StatusOr XlaBuilder::IsConstant(const XlaOp& operand, + int64 num_parameters) { + return Unimplemented("IsConstant is not implemented."); +} + +StatusOr> XlaBuilder::ComputeConstant( + const XlaOp& operand, const Layout* output_layout, + tensorflow::gtl::ArraySlice parameters) { + return Unimplemented("ComputeConstant is not implemented"); +} + +std::unique_ptr XlaBuilder::CreateSubBuilder( + const string& computation_name) { + auto sub_builder = MakeUnique(computation_name); + sub_builder->parent_builder_ = this; + sub_builder->die_immediately_on_error_ = this->die_immediately_on_error_; + return sub_builder; +} + +Status XlaBuilder::SetReturnValue(const XlaOp& operand) { + return Unimplemented("SetReturnValue is not implemented."); +} + +/* static */ ConvolutionDimensionNumbers +XlaBuilder::CreateDefaultConvDimensionNumbers(int num_spatial_dims) { + ConvolutionDimensionNumbers dimension_numbers; + dimension_numbers.set_input_batch_dimension(kConvBatchDimension); + dimension_numbers.set_input_feature_dimension(kConvFeatureDimension); + dimension_numbers.set_output_batch_dimension(kConvBatchDimension); + dimension_numbers.set_output_feature_dimension(kConvFeatureDimension); + dimension_numbers.set_kernel_output_feature_dimension( + kConvKernelOutputDimension); + dimension_numbers.set_kernel_input_feature_dimension( + kConvKernelInputDimension); + for (int i = 0; i < num_spatial_dims; ++i) { + dimension_numbers.add_input_spatial_dimensions(i + 2); + dimension_numbers.add_kernel_spatial_dimensions(i + 2); + dimension_numbers.add_output_spatial_dimensions(i + 2); + } + return dimension_numbers; +} + +/* static */ Status XlaBuilder::Validate( + const ConvolutionDimensionNumbers& dnum) { + if (dnum.input_spatial_dimensions_size() < 2) { + return FailedPrecondition("input spacial dimension < 2: %d", + dnum.input_spatial_dimensions_size()); + } + if (dnum.kernel_spatial_dimensions_size() < 2) { + return FailedPrecondition("kernel spacial dimension < 2: %d", + dnum.kernel_spatial_dimensions_size()); + } + if (dnum.output_spatial_dimensions_size() < 2) { + return FailedPrecondition("output spacial dimension < 2: %d", + dnum.output_spatial_dimensions_size()); + } + + if (std::set( + {dnum.input_batch_dimension(), dnum.input_feature_dimension(), + dnum.input_spatial_dimensions(0), dnum.input_spatial_dimensions(1)}) + .size() != 4) { + return FailedPrecondition( + "dimension numbers for the input are not unique: (%lld, %lld, %lld, " + "%lld)", + dnum.input_batch_dimension(), dnum.input_feature_dimension(), + dnum.input_spatial_dimensions(0), dnum.input_spatial_dimensions(1)); + } + if (std::set({dnum.kernel_output_feature_dimension(), + dnum.kernel_input_feature_dimension(), + dnum.kernel_spatial_dimensions(0), + dnum.kernel_spatial_dimensions(1)}) + .size() != 4) { + return FailedPrecondition( + "dimension numbers for the weight are not unique: (%lld, %lld, %lld, " + "%lld)", + dnum.kernel_output_feature_dimension(), + dnum.kernel_input_feature_dimension(), + dnum.kernel_spatial_dimensions(0), dnum.kernel_spatial_dimensions(1)); + } + if (std::set({dnum.output_batch_dimension(), + dnum.output_feature_dimension(), + dnum.output_spatial_dimensions(0), + dnum.output_spatial_dimensions(1)}) + .size() != 4) { + return FailedPrecondition( + "dimension numbers for the output are not unique: (%lld, %lld, %lld, " + "%lld)", + dnum.output_batch_dimension(), dnum.output_feature_dimension(), + dnum.output_spatial_dimensions(0), dnum.output_spatial_dimensions(1)); + } + return Status::OK(); +} + StatusOr XlaBuilder::AddInstruction( HloInstructionProto&& instr, HloOpcode opcode, tensorflow::gtl::ArraySlice operands) { @@ -986,7 +1091,7 @@ StatusOr XlaBuilder::LookUpInstruction( } XlaOp XlaBuilder::UnimplementedOp() { - NoteError(Unimplemented("Op not yet implemented")); + NoteError(Unimplemented("Op not implemented")); return {}; } diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.h b/tensorflow/compiler/xla/client/xla_client/xla_builder.h index c5c35159e0..f66feb93ce 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.h @@ -335,6 +335,26 @@ class XlaBuilder { XlaOp DotGeneral(const XlaOp& lhs, const XlaOp& rhs, const DotDimensionNumbers& dimension_numbers); + // Default dimension numbers used for a 2D convolution. + static constexpr int64 kConvBatchDimension = 0; + static constexpr int64 kConvFeatureDimension = 1; + static constexpr int64 kConvFirstSpatialDimension = 2; + static constexpr int64 kConvSecondSpatialDimension = 3; + static constexpr int64 kConvKernelOutputDimension = 0; + static constexpr int64 kConvKernelInputDimension = 1; + static constexpr int64 kConvKernelFirstSpatialDimension = 2; + static constexpr int64 kConvKernelSecondSpatialDimension = 3; + + // Creates a default ConvolutionDimensionNumbers. For a 2D convolution, for + // the input operand {batch, feature, height, width} = {0, 1, 2, 3} and for + // the kernel operand + // {output_feature, input_feature, height, width} = {0, 1, 2, 3}. + static ConvolutionDimensionNumbers CreateDefaultConvDimensionNumbers( + int num_spatial_dims = 2); + + // Returns an error if the convolution dimension numbers have conflicts. + static Status Validate(const ConvolutionDimensionNumbers& dnum); + // Enqueues a convolution instruction onto the computation, which uses the // default convolution dimension numbers. XlaOp Conv(const XlaOp& lhs, const XlaOp& rhs, @@ -711,10 +731,59 @@ class XlaBuilder { const XlaOp& grad_output, float epsilon, int64 feature_index); + // Computes the value of a constant indicated by a XlaOp using a non-optimized + // interpreter on the host. + // + // The operand must represent a constant value, which in this case + // means that it must not statically depend on any parameter of the + // computation that is being built other then the ones specified on the + // parameter list. The parameters in the list will be indexed by their + // parameter id property so the number of parameters specified should be at + // least as many as the largest used parameter index. + // + // `IsConstant` can be used to test whether a computation is a compile-time + // constant without evaluation it. `ComputeConstant` only succeeds for + // computations where `IsConstant` returns true. + // + // This functionality can be useful when translating a computation + // into XLA where something that looked dynamic is required by + // XLA to be specified as a constant. E.g. the source + // computation (outside of XLA) may include a dynamic + // computation of the shape of something and ComputeConstant lets + // you determine what the value of that computation is in the case + // where the value can be determined at compile time. + // + // If output_layout is non-null, then the output of the computation + // will be stored using that layout. + StatusOr> ComputeConstant( + const XlaOp& operand, const Layout* output_layout = nullptr, + tensorflow::gtl::ArraySlice parameters = {}); + + // Returns a new XlaBuilder whose resultant Computation is used only by this + // XlaBuilder. The sub-XlaBuilder has the same die_immediately_on_error + // behavior as the parent. + std::unique_ptr CreateSubBuilder(const string& computation_name); + + // Modifies the computation being built so that executions of it will return + // the value associated with operand, rather than the last expression enqueued + // on the XlaBuilder. Any subsequent operations added to the XlaBuilder will + // not have any effect unless SetReturnValue is called again. + Status SetReturnValue(const XlaOp& operand); + // Builds the computation with the requested operations, or returns a non-ok // status. StatusOr Build(); + // Builds the computation with the requested operations, or notes an error in + // the parent XlaBuilder and returns an empty computation if building failed. + // This function is intended to be used where the returned XlaComputation is + // only used by the parent XlaBuilder and hence further operation on the + // returned XlaComputation will simply be error'ed out if an error occurred + // while building this computation. If the built computation is to be used by + // a XlaBuilder other than the parent XlaBuilder then Build() should be used + // instead. + XlaComputation BuildAndNoteError(); + // Returns the first error that was encountered while building the // computation. When an error is encountered, by default we return a vacuous // XlaOp and inform the user of the error that occurred while @@ -814,6 +883,8 @@ class XlaBuilder { // Mode bit that indicates whether to die when a first error is encountered. bool die_immediately_on_error_ = false; + + XlaBuilder* parent_builder_{nullptr}; }; template diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.h b/tensorflow/compiler/xla/client/xla_client/xla_computation.h index 5b89747fdd..78e1e3c32c 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_computation.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.h @@ -29,6 +29,8 @@ namespace xla { // TODO(b/74197823): Replace xla::Computation with this one. class XlaComputation { public: + XlaComputation() : unique_id_(-1) {} + XlaComputation(const XlaComputation&) = delete; XlaComputation& operator=(const XlaComputation&) = delete; -- GitLab From 7863645e0323d3b2ef034a6499ec6673f0cca761 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 12:19:27 -0700 Subject: [PATCH 621/960] When importing meta graphs under name scopes, the names of the created ops are prepended with the scopes. Since the saver_def of the meta graph does not contain this information, we need to pass it explicitly to Saver. PiperOrigin-RevId: 190812434 --- tensorflow/python/training/saver.py | 20 +++++++++++---- tensorflow/python/training/saver_test.py | 32 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index ba0d038475..cec581d997 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1924,12 +1924,22 @@ def import_meta_graph(meta_graph_or_file, clear_devices=False, else: meta_graph_def = meta_graph_or_file - meta_graph.import_scoped_meta_graph(meta_graph_def, - clear_devices=clear_devices, - import_scope=import_scope, - **kwargs) + imported_vars = meta_graph.import_scoped_meta_graph( + meta_graph_def, + clear_devices=clear_devices, + import_scope=import_scope, + **kwargs) + if meta_graph_def.HasField("saver_def"): - return Saver(saver_def=meta_graph_def.saver_def, name=import_scope) + # Infer the scope that is prepended by `import_scoped_meta_graph`. + scope = import_scope + var_names = list(imported_vars.keys()) + if var_names: + sample_key = var_names[0] + sample_var = imported_vars[sample_key] + scope = sample_var.name[:-len(sample_key)] + + return Saver(saver_def=meta_graph_def.saver_def, name=scope) else: if variables._all_saveable_objects(): # pylint: disable=protected-access # Return the default saver instance for all graph variables. diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 7de778f298..d1c24b3930 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2341,6 +2341,38 @@ class MetaGraphTest(test.TestCase): 10, size=[1, 10]) }) + def testImportIntoImplicitNamescope(self): + # Test that we can import a meta graph into an implicit namescope. + test_dir = self._get_test_dir("import_into_namescope") + filename = os.path.join(test_dir, "ckpt") + image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") + label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") + with session.Session() as sess: + weights = variables.Variable( + random_ops.random_uniform([784, 10]), name="weights") + bias = variables.Variable(array_ops.zeros([10]), name="bias") + logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits") + nn_ops.softmax(logit, name="prediction") + cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, + logits=logit, name="cost") + adam.AdamOptimizer().minimize(cost, name="optimize") + saver = saver_module.Saver() + sess.run(variables.global_variables_initializer()) + saver.save(sess, filename) + + graph = ops_lib.Graph() + with session.Session(graph=graph) as sess: + with ops_lib.name_scope("new_model"): + new_saver = saver_module.import_meta_graph( + filename + ".meta", graph=graph) + + new_saver.restore(sess, filename) + sess.run(["new_model/optimize"], { + "new_model/image:0": np.random.random([1, 784]), + "new_model/label:0": np.random.randint( + 10, size=[1, 10]) + }) + def testClearDevicesOnImport(self): # Test that we import a graph without its devices and run successfully. with ops_lib.Graph().as_default(): -- GitLab From f80486324807181614ac71367dbb9cf588aa2804 Mon Sep 17 00:00:00 2001 From: Noah Eisen Date: Wed, 28 Mar 2018 12:28:32 -0700 Subject: [PATCH 622/960] Upgrade gRPC version used in OSS Tensorflow PiperOrigin-RevId: 190813848 --- tensorflow/contrib/cmake/external/grpc.cmake | 2 +- tensorflow/tools/pip_package/BUILD | 1 + tensorflow/workspace.bzl | 9 +++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index cc218e8ab8..abfc69243e 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc) -set(GRPC_TAG 575bda39755b98d1f7099406bb57a6e3b2074874) +set(GRPC_TAG bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 16c47f7555..dd75eda231 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -113,6 +113,7 @@ filegroup( "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", "@grpc//third_party/nanopb:LICENSE.txt", + "@grpc//third_party/address_sorting:LICENSE", "@nasm//:LICENSE", "@nsync//:LICENSE", "@pcre//:LICENCE", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 206a5a3d99..9fcbfb664b 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -405,13 +405,14 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "grpc", urls = [ - "https://mirror.bazel.build/github.com/grpc/grpc/archive/575bda39755b98d1f7099406bb57a6e3b2074874.tar.gz", - "https://github.com/grpc/grpc/archive/575bda39755b98d1f7099406bb57a6e3b2074874.tar.gz", + "https://mirror.bazel.build/github.com/grpc/grpc/archive/bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2.tar.gz", + "https://github.com/grpc/grpc/archive/bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2.tar.gz", ], - sha256 = "f08a5c8e265191b39cc74915b1bc1fd380d86cd0176c92b7cce30b6ac50514ad", - strip_prefix = "grpc-575bda39755b98d1f7099406bb57a6e3b2074874", + sha256 = "0a05bd355e4571b01d813dddffa38e57e689ac41b264dc9b1bd6ec66463ef5d6", + strip_prefix = "grpc-bd6bdf93279a39a8cd92978fd7c9d14eccd98fc2", ) + tf_http_archive( name = "linenoise", sha256 = "7f51f45887a3d31b4ce4fa5965210a5e64637ceac12720cfce7954d6a2e812f7", -- GitLab From 560ef036727c871bab57faa9942ccaff977ef88a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 12:29:32 -0700 Subject: [PATCH 623/960] Supports quantized reduce_mean in TF Lite. PiperOrigin-RevId: 190813997 --- .../internal/reference/reference_ops.h | 22 +++-- tensorflow/contrib/lite/kernels/mean.cc | 62 +++++++++++--- tensorflow/contrib/lite/kernels/mean_test.cc | 81 +++++++++++-------- .../graph_transformations/hardcode_min_max.cc | 1 + .../toco/graph_transformations/quantize.cc | 2 +- 5 files changed, 114 insertions(+), 54 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index ce12fad95d..33d60afa26 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -3183,19 +3183,20 @@ inline void Exp(const T* input_data, const size_t num_elements, } } -template -inline void Mean(T* input_data, const int* input_dims, const int input_num_dims, +template +inline bool Mean(T* input_data, const int* input_dims, const int input_num_dims, T* output_data, const int* output_dims, const int output_num_dims, const int* axis, const int num_axis_dimensions, bool keep_dims, int* temp_index, - int* resolved_axis) { + int* resolved_axis, U* temp_sum) { // resets output data. size_t num_outputs = 1; for (int idx = 0; idx < output_num_dims; ++idx) { num_outputs *= static_cast(output_dims[idx]); } for (size_t idx = 0; idx < num_outputs; ++idx) { - output_data[idx] = 0; + output_data[idx] = T(); + temp_sum[idx] = U(); } // resets temp index. for (int idx = 0; idx < input_num_dims; ++idx) { @@ -3228,19 +3229,24 @@ inline void Mean(T* input_data, const int* input_dims, const int input_num_dims, size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, num_resolved_axis, resolved_axis); - output_data[output_offset] += input_data[input_offset]; + temp_sum[output_offset] += static_cast(input_data[input_offset]); } // takes average by num of elements added to get mean. size_t num_elements_in_axis = 1; for (int idx = 0; idx < num_resolved_axis; ++idx) { - num_elements_in_axis *= static_cast(input_dims[resolved_axis[idx]]); + size_t current = static_cast(input_dims[resolved_axis[idx]]); + if (current > (std::numeric_limits::max() / num_elements_in_axis)) { + return false; + } + num_elements_in_axis *= current; } if (num_elements_in_axis > 0) { for (size_t idx = 0; idx < num_outputs; ++idx) { - output_data[idx] = static_cast(static_cast(output_data[idx]) / - num_elements_in_axis); + output_data[idx] = + static_cast(temp_sum[idx] / static_cast(num_elements_in_axis)); } } + return true; } template diff --git a/tensorflow/contrib/lite/kernels/mean.cc b/tensorflow/contrib/lite/kernels/mean.cc index aff19581ea..047bdd1039 100644 --- a/tensorflow/contrib/lite/kernels/mean.cc +++ b/tensorflow/contrib/lite/kernels/mean.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" #include "tensorflow/contrib/lite/kernels/internal/tensor.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" @@ -48,7 +49,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { // Creates two temp tensors to store index and axis for internal // implementation only. auto* scratch_tensor_index = new int; - context->AddTensors(context, 2, scratch_tensor_index); + context->AddTensors(context, 3, scratch_tensor_index); return scratch_tensor_index; } @@ -64,6 +65,14 @@ TfLiteStatus ResizeTempAxis(TfLiteContext* context, MeanContext* op_context, return context->ResizeTensor(context, resolved_axis, axis_size); } +// Resizes the temp tensor that stores temp sum of reduced elements. +TfLiteStatus ResizeTempSum(TfLiteContext* context, MeanContext* op_context, + TfLiteTensor* temp_sum) { + TfLiteIntArray* size = TfLiteIntArrayCreate(1); + size->data[0] = static_cast(NumElements(op_context->output)); + return context->ResizeTensor(context, temp_sum, size); +} + // Resizes output array based on the input size and resolved axis. TfLiteStatus ResizeOutputTensor(TfLiteContext* context, MeanContext* op_context) { @@ -135,7 +144,7 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, // Creates a temp index to iterate through input data. int* scratch_tensor_index = reinterpret_cast(node->user_data); TfLiteIntArrayFree(node->temporaries); - node->temporaries = TfLiteIntArrayCreate(2); + node->temporaries = TfLiteIntArrayCreate(3); node->temporaries->data[0] = *scratch_tensor_index; TfLiteTensor* scratch_tensor = &context->tensors[node->temporaries->data[0]]; scratch_tensor->type = kTfLiteInt32; @@ -149,6 +158,25 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, node->temporaries->data[1] = *scratch_tensor_index + 1; TfLiteTensor* resolved_axis = &context->tensors[node->temporaries->data[1]]; resolved_axis->type = kTfLiteInt32; + // Creates a temp tensor to store temp sums when calculating mean. + node->temporaries->data[2] = *scratch_tensor_index + 2; + TfLiteTensor* temp_sum = &context->tensors[node->temporaries->data[2]]; + switch (op_context->input->type) { + case kTfLiteFloat32: + temp_sum->type = kTfLiteFloat32; + break; + case kTfLiteInt32: + temp_sum->type = kTfLiteInt64; + break; + case kTfLiteInt64: + temp_sum->type = kTfLiteInt64; + break; + case kTfLiteUInt8: + temp_sum->type = kTfLiteInt32; + break; + default: + return kTfLiteError; + } return kTfLiteOk; } @@ -160,16 +188,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context)); TfLiteTensor* resolved_axis = &context->tensors[node->temporaries->data[1]]; + TfLiteTensor* temp_sum = &context->tensors[node->temporaries->data[2]]; // Leaves work to Eval if axis is not constant; else resizes output. if (!IsConstantTensor(op_context.axis)) { SetTensorToDynamic(op_context.output); SetTensorToDynamic(resolved_axis); + SetTensorToDynamic(temp_sum); return kTfLiteOk; } resolved_axis->allocation_type = kTfLiteArenaRw; TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis)); - return ResizeOutputTensor(context, &op_context); + TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); + temp_sum->allocation_type = kTfLiteArenaRw; + return ResizeTempSum(context, &op_context, temp_sum); } template @@ -178,14 +210,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int num_axis = static_cast(NumElements(op_context.axis)); TfLiteTensor* temp_index = &context->tensors[node->temporaries->data[0]]; TfLiteTensor* resolved_axis = &context->tensors[node->temporaries->data[1]]; + TfLiteTensor* temp_sum = &context->tensors[node->temporaries->data[2]]; // Resize the output tensor if the output tensor is dynamic. if (IsDynamicTensor(op_context.output)) { TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis)); TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); + TF_LITE_ENSURE_OK(context, ResizeTempSum(context, &op_context, temp_sum)); } -#define TF_LITE_MEAN(kernel_type, data_type) \ +#define TF_LITE_MEAN(kernel_type, data_type, temp_data_type) \ kernel_type::Mean<>( \ GetTensorData(op_context.input), \ op_context.input->dims->data, op_context.input->dims->size, \ @@ -193,21 +227,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { op_context.output->dims->data, op_context.output->dims->size, \ GetTensorData(op_context.axis), num_axis, \ op_context.params->keep_dims, GetTensorData(temp_index), \ - GetTensorData(resolved_axis)) + GetTensorData(resolved_axis), \ + GetTensorData(temp_sum)) if (kernel_type == kReference) { switch (op_context.input->type) { case kTfLiteFloat32: - TF_LITE_MEAN(reference_ops, float); + TF_LITE_ENSURE(context, TF_LITE_MEAN(reference_ops, float, float)); break; case kTfLiteInt32: - TF_LITE_MEAN(reference_ops, int); - break; - case kTfLiteUInt8: - TF_LITE_MEAN(reference_ops, uint8_t); + TF_LITE_ENSURE(context, TF_LITE_MEAN(reference_ops, int, int64_t)); break; case kTfLiteInt64: - TF_LITE_MEAN(reference_ops, int64_t); + TF_LITE_ENSURE(context, TF_LITE_MEAN(reference_ops, int64_t, int64_t)); + break; + case kTfLiteUInt8: + TF_LITE_ENSURE_EQ(context, op_context.input->params.scale, + op_context.output->params.scale); + TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, + op_context.output->params.zero_point); + TF_LITE_ENSURE(context, TF_LITE_MEAN(reference_ops, uint8_t, int)); break; default: return kTfLiteError; @@ -216,7 +255,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { #undef TF_LITE_MEAN return kTfLiteOk; } - } // namespace mean TfLiteRegistration* Register_MEAN_REF() { diff --git a/tensorflow/contrib/lite/kernels/mean_test.cc b/tensorflow/contrib/lite/kernels/mean_test.cc index 2d6d4bc2da..79c9957f76 100644 --- a/tensorflow/contrib/lite/kernels/mean_test.cc +++ b/tensorflow/contrib/lite/kernels/mean_test.cc @@ -37,8 +37,15 @@ class BaseMeanOpModel : public SingleOpModel { return ExtractVector(output_); } + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } + std::vector GetOutputShape() { return GetTensorShape(output_); } + int Input() { return input_; } + protected: int input_; int axis_; @@ -142,56 +149,64 @@ TEST(DynamicFloatMeanOpTest, Scale) { EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({9.527}))); } +// for quantized Add, the error shouldn't exceed step +float GetTolerance(int min, int max) { return (max - min) / 255.0; } + TEST(ConstUint8MeanOpTest, NotKeepDims) { - std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24}; - MeanOpConstModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {2}}, - {4}, {1, 0, -3, -3}, false); - m.SetInput(data); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::initializer_list data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; + MeanOpConstModel m({TensorType_UINT8, {1, 3, 2}, -1.0, 1.0}, + {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {1}, false); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({12, 13})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + {0.4, 0.4}, kQuantizedTolerance))); } TEST(ConstUint8MeanOpTest, KeepDims) { - std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24}; - MeanOpConstModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {3}}, - {2}, {0, 2}, true); - m.SetInput(data); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::initializer_list data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; + MeanOpConstModel m({TensorType_UINT8, {3, 2}, -1.0, 1.0}, + {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {1}, true); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 3, 1})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({10, 12, 14})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1})); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({0.3, 0.35, 0.55}, kQuantizedTolerance))); } TEST(DynamicUint8MeanOpTest, NotKeepDims) { - std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24}; - MeanOpDynamicModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {2}}, - {TensorType_INT32, {4}}, false); - std::initializer_list axis = {1, 0, -3, -3}; + float kQuantizedTolerance = GetTolerance(-5.0, 2.0); + std::initializer_list data = {1.3, -4.8, -3.6, 0.24}; + MeanOpDynamicModel m({TensorType_UINT8, {2, 2}, -5.0, 2.0}, + {TensorType_UINT8, {2}, -5.0, 2.0}, + {TensorType_INT32, {1}}, false); + std::initializer_list axis = {1}; m.SetAxis(axis); - m.SetInput(data); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({12, 13})); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-1.75, -1.68}, kQuantizedTolerance))); } TEST(DynamicUint8MeanOpTest, KeepDims) { - std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24}; - MeanOpDynamicModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {3}}, - {TensorType_INT32, {2}}, true); - std::initializer_list axis = {0, 2}; + float kQuantizedTolerance = GetTolerance(-10.0, 12.0); + std::initializer_list data = {11.14, -0.14, 7.423, 0.879}; + MeanOpDynamicModel m({TensorType_UINT8, {2, 2}, -10.0, 12.0}, + {TensorType_UINT8, {2}, -10.0, 12.0}, + {TensorType_INT32, {1}}, true); + std::initializer_list axis = {0}; m.SetAxis(axis); - m.SetInput(data); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 3, 1})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({10, 12, 14})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({9.2815, 0.3695}, kQuantizedTolerance))); } } // namespace diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index 5cc82da5d5..7c97ef0d31 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -332,6 +332,7 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { case OperatorType::kPad: case OperatorType::kGather: case OperatorType::kTranspose: + case OperatorType::kMean: changed = HardcodeMinMaxFromFirstInput(model, op); break; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 9679ea0a77..9fcc95e1fe 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -52,7 +52,7 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kStridedSlice || type == OperatorType::kDepthToSpace || type == OperatorType::kLstmCell || type == OperatorType::kGather || - type == OperatorType::kTranspose; + type == OperatorType::kTranspose || type == OperatorType::kMean; } template -- GitLab From dcbc0f007da212ae123efdd9eb86a72208a849da Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 28 Mar 2018 15:59:04 -0400 Subject: [PATCH 624/960] Update api.py (#18049) Avoid overwriting existing namespace items that might replace the converted functions. --- tensorflow/contrib/autograph/impl/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/autograph/impl/api.py b/tensorflow/contrib/autograph/impl/api.py index 1c4fcaa622..dce994e50d 100644 --- a/tensorflow/contrib/autograph/impl/api.py +++ b/tensorflow/contrib/autograph/impl/api.py @@ -247,7 +247,10 @@ def to_graph(e, # The compiled code should see everything the entry function saw. # TODO(mdan): This might not work well if the call tree spans modules? if tf_inspect.isfunction(e): - compiled_node.__dict__.update(inspect_utils.getnamespace(e)) + for key, val in inspect_utils.getnamespace(e).items(): + # Avoid overwriting entities that have been transformed. + if key not in compiled_node.__dict__: + compiled_node.__dict__[key] = val compiled_fn = getattr(compiled_node, name) if verbose: -- GitLab From 52aeafdf04af9f95500067dc353fd80728032b63 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Wed, 28 Mar 2018 21:59:25 +0200 Subject: [PATCH 625/960] documenting that init_op will not be run when loading from checkpoint (#18051) --- tensorflow/python/training/session_manager.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py index 360e02fb44..a00ceb9021 100644 --- a/tensorflow/python/training/session_manager.py +++ b/tensorflow/python/training/session_manager.py @@ -229,10 +229,14 @@ class SessionManager(object): up to `max_wait_secs`, for recovery to succeed. If the model cannot be recovered successfully then it is initialized by - either running the provided `init_op`, or calling the provided `init_fn`. - The local_init_op is also run after init_op and init_fn, regardless of + running the `init_op` and calling `init_fn` if they are provided. + The `local_init_op` is also run after init_op and init_fn, regardless of whether the model was recovered successfully, but only if - ready_for_local_init_op passes. + `ready_for_local_init_op` passes. + + If the model is recovered from a checkpoint it is assumed that all + global variables have been initialized, in particular neither `init_op` + nor `init_fn` will be executed. It is an error if the model cannot be recovered and no `init_op` or `init_fn` or `local_init_op` are passed. -- GitLab From 23c9e506bba637d9528cdf0c3a18a4cb05135a3a Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Thu, 29 Mar 2018 04:03:55 +0800 Subject: [PATCH 626/960] Replace PLATFORM_WINDOWS to _MSC_VER as it only applies to MSVC (#18047) --- tensorflow/core/platform/cpu_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index 331f3e5251..bb77650e26 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -18,7 +18,7 @@ limitations under the License. #include -#if defined(PLATFORM_WINDOWS) +#if defined(_MSC_VER) #include "tensorflow/core/platform/windows/cpu_info.h" #endif -- GitLab From 6708117d292e09f259a4c685f8ca4d81cd6a0bd9 Mon Sep 17 00:00:00 2001 From: shengfuintel Date: Wed, 28 Mar 2018 13:04:08 -0700 Subject: [PATCH 627/960] Fixed the bug in mkl_input_conversion to compare tensorflow shape instead of mkl shape (#18033) --- .../core/kernels/mkl_input_conversion_op.cc | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index d91f7107c5..68d3e1c9ab 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -263,21 +263,18 @@ class MklInputConversionOp : public OpKernel { private: void Compute(OpKernelContext* context) override { - const Tensor& input_tensor_0 = MklGetInput(context, 0); + const int kInputIndex_0 = 0, kInputIndex_1 = 1; + const Tensor& input_tensor_0 = MklGetInput(context, kInputIndex_0); MklDnnShape input_shape_0; - GetMklShape(context, 0, &input_shape_0); + GetMklShape(context, kInputIndex_0, &input_shape_0); - const Tensor& input_tensor_1 = MklGetInput(context, 1); + const Tensor& input_tensor_1 = MklGetInput(context, kInputIndex_1); MklDnnShape input_shape_1; - GetMklShape(context, 1, &input_shape_1); - - bool tf_shapes_are_same = - context->input(0).shape() == context->input(1).shape(); + GetMklShape(context, kInputIndex_1, &input_shape_1); - VLOG(1) << "MklInputConversionOp: Input shapes are " - << (tf_shapes_are_same ? "*same*" : "*different*") << ": " - << context->input(0).shape().DebugString() << " and " - << context->input(1).shape().DebugString(); + VLOG(1) << "MklInputConversionOp: Input shapes are: " + << context->input(kInputIndex_0).shape().DebugString() << " and " + << context->input(kInputIndex_1).shape().DebugString(); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // if both inputs are in TF format, just copy input tensors to output. @@ -285,15 +282,19 @@ class MklInputConversionOp : public OpKernel { VLOG(1) << "MklInputConversionOp: No conversion needed, " << "copying TF inputs to output"; - ForwardTfTensorInToOut(context, 0, 0); - ForwardTfTensorInToOut(context, 1, 1); + ForwardTfTensorInToOut(context, kInputIndex_0, kInputIndex_0); + ForwardTfTensorInToOut(context, kInputIndex_1, kInputIndex_1); return; } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // If both inputs are in MKL format if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) { - if (tf_shapes_are_same) { + // It is safer to compare the original TensorFlow shapes than to compare + // Mkl shapes since element wise ops are forwarded to Eigen implementation. + TensorShape tf_shape0 = input_shape_0.GetTfShape(); + TensorShape tf_shape1 = input_shape_1.GetTfShape(); + if (tf_shape0 == tf_shape1) { auto input0_md = input_shape_0.GetMklLayout(); auto input1_md = input_shape_1.GetMklLayout(); @@ -302,8 +303,8 @@ class MklInputConversionOp : public OpKernel { VLOG(1) << "MklInputConversionOp: No conversion needed, " << "copying MKL inputs with identical shapes to output"; - ForwardMklTensorInToOut(context, 0, 0); - ForwardMklTensorInToOut(context, 1, 1); + ForwardMklTensorInToOut(context, kInputIndex_0, kInputIndex_0); + ForwardMklTensorInToOut(context, kInputIndex_1, kInputIndex_1); return; } else { VLOG(1) << "MklInputConversionOp: Shape is same, but format is " @@ -324,7 +325,7 @@ class MklInputConversionOp : public OpKernel { mkl_output_mkl_shape.SetMklLayout(&input1_md); // Create output Mkl tensor for index 0 - AllocateOutputSetMklShape(context, 0, &tensor_out, + AllocateOutputSetMklShape(context, kInputIndex_0, &tensor_out, input_tensor_0.shape(), mkl_output_mkl_shape); @@ -342,7 +343,7 @@ class MklInputConversionOp : public OpKernel { stream(stream::kind::eager).submit(net).wait(); // Input1 will be passed through - ForwardMklTensorInToOut(context, 1, 1); + ForwardMklTensorInToOut(context, kInputIndex_1, kInputIndex_1); return; } } @@ -361,11 +362,11 @@ class MklInputConversionOp : public OpKernel { << "converted MKL inputs to TF format"; MklToTfOp::ConvertMklToTf(this, context, data_format_str, - op_data_type, has_avx512f_, 0); + op_data_type, has_avx512f_, kInputIndex_0); MklToTfOp::ConvertMklToTf(this, context, data_format_str, - op_data_type, has_avx512f_, 1); - SetDummyMklShapeOutput(context, 0); - SetDummyMklShapeOutput(context, 1); + op_data_type, has_avx512f_, kInputIndex_1); + SetDummyMklShapeOutput(context, kInputIndex_0); + SetDummyMklShapeOutput(context, kInputIndex_1); return; } @@ -377,7 +378,6 @@ class MklInputConversionOp : public OpKernel { const Tensor* mkl_tensor; const MklDnnShape* mkl_shape; const Tensor* tf_tensor; - MklDnnShape* tf_mkl_shape; uint mkl_tensor_index; uint tf_tensor_index; if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) { @@ -385,14 +385,12 @@ class MklInputConversionOp : public OpKernel { mkl_shape = &input_shape_0; mkl_tensor_index = 0; tf_tensor = &input_tensor_1; - tf_mkl_shape = &input_shape_1; tf_tensor_index = 1; } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) { mkl_tensor = &input_tensor_1; mkl_shape = &input_shape_1; mkl_tensor_index = 1; tf_tensor = &input_tensor_0; - tf_mkl_shape = &input_shape_0; tf_tensor_index = 0; } else { CHECK(false) << "MklInputConversionOp: Unexpected combination of input " @@ -466,8 +464,8 @@ class MklInputConversionOp : public OpKernel { } VLOG(1) << "MklInputConversionOp: Shapes (output): " - << context->mutable_output(0)->shape().DebugString() << " and " - << context->mutable_output(1)->shape().DebugString(); + << context->mutable_output(kInputIndex_0)->shape().DebugString() << " and " + << context->mutable_output(kInputIndex_1)->shape().DebugString(); VLOG(1) << "MklInputConversion completed successfully."; } -- GitLab From bb4e724f429ae5c9afad3a343dc1f483ecde1f74 Mon Sep 17 00:00:00 2001 From: George Sterpu Date: Wed, 28 Mar 2018 22:05:28 +0200 Subject: [PATCH 628/960] small update ctc_ops docstring (#18046) --- tensorflow/python/ops/ctc_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 4b57e2de79..908e793902 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -218,7 +218,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): The rows store: `[batch, time]`. `decoded.values`: Values vector, size `(total_decoded_outputs)`. The vector stores the decoded classes. - `decoded.shape`: Shape vector, size `(2)`. + `decoded.dense_shape`: Shape vector, size `(2)`. The shape values are: `[batch_size, max_decoded_length]` neg_sum_logits: A `float` matrix `(batch_size x 1)` containing, for the sequence found, the negative of the sum of the greatest logit at each @@ -265,7 +265,7 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, The rows store: [batch, time]. `decoded[j].values`: Values vector, size `(total_decoded_outputs[j])`. The vector stores the decoded classes for beam j. - `decoded[j].shape`: Shape vector, size `(2)`. + `decoded[j].dense_shape`: Shape vector, size `(2)`. The shape values are: `[batch_size, max_decoded_length[j]]`. log_probability: A `float` matrix `(batch_size x top_paths)` containing sequence log-probabilities. -- GitLab From 480ac84aa8390e19a54bd2feef3a6069d959bb4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 13:11:12 -0700 Subject: [PATCH 629/960] Add op cost model for MaxPool, AvgPool, FusedBatchNorm, their grad ops, and ReluGrad. PiperOrigin-RevId: 190821116 --- .../grappler/costs/op_level_cost_estimator.cc | 306 +++++++++++++- .../grappler/costs/op_level_cost_estimator.h | 14 +- .../costs/op_level_cost_estimator_test.cc | 391 ++++++++++++++++++ 3 files changed, 709 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 905cc2a215..0f6307cfdf 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -50,6 +50,12 @@ constexpr char kPreventGradient[] = "PreventGradient"; constexpr char kGather[] = "Gather"; constexpr char kGatherV2[] = "GatherV2"; constexpr char kSlice[] = "Slice"; +constexpr char kMaxPool[] = "MaxPool"; +constexpr char kMaxPoolGrad[] = "MaxPoolGrad"; +constexpr char kAvgPool[] = "AvgPool"; +constexpr char kAvgPoolGrad[] = "AvgPoolGrad"; +constexpr char kFusedBatchNorm[] = "FusedBatchNorm"; +constexpr char kFusedBatchNormGrad[] = "FusedBatchNormGrad"; static const Costs::Duration kMinComputeTime(1); @@ -71,14 +77,39 @@ Padding GetPadding(const OpInfo& op_features) { return Padding::SAME; // Default padding. } +bool IsTraining(const OpInfo& op_info) { + if (op_info.attr().find("is_training") != op_info.attr().end() && + op_info.attr().at("is_training").b()) { + return true; + } + return false; +} + +// TODO(dyoon): support non-4D tensors in the c ost functions of convolution +// related ops (Conv, Pool, BatchNorm, and their backprops) and the related +// helper functions. std::vector GetStrides(const OpInfo& op_features) { if (op_features.attr().find("strides") != op_features.attr().end()) { const auto strides = op_features.attr().at("strides").list().i(); + CHECK(strides.size() == 4) << "Attr strides is not a length-4 vector: " + << op_features.DebugString(); return {strides[0], strides[1], strides[2], strides[3]}; } return {1, 1, 1, 1}; } +std::vector GetKernelSize(const OpInfo& op_info) { + if (op_info.attr().find("ksize") != op_info.attr().end()) { + const auto ksize = op_info.attr().at("ksize").list().i(); + CHECK(ksize.size() == 4) + << "Attr ksize is not a length-4 vector: " << op_info.DebugString(); + return {ksize[0], ksize[1], ksize[2], ksize[3]}; + } + // Note that FusedBatchNorm doesn't have ksize attr, but GetKernelSize returns + // {1, 1, 1, 1} in that case. + return {1, 1, 1, 1}; +} + int64 GetOutputSize(const int64 input, const int64 filter, const int64 stride, const Padding& padding) { // Logic for calculating output shape is from GetWindowedOutputSizeVerbose() @@ -193,7 +224,15 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {kRank, wrap(&OpLevelCostEstimator::PredictMetadata)}, {kShape, wrap(&OpLevelCostEstimator::PredictMetadata)}, - {kSize, wrap(&OpLevelCostEstimator::PredictMetadata)}}; + {kSize, wrap(&OpLevelCostEstimator::PredictMetadata)}, + {kMaxPool, wrap(&OpLevelCostEstimator::PredictMaxPool)}, + {kMaxPoolGrad, wrap(&OpLevelCostEstimator::PredictMaxPoolGrad)}, + {kAvgPool, wrap(&OpLevelCostEstimator::PredictAvgPool)}, + {kAvgPoolGrad, wrap(&OpLevelCostEstimator::PredictAvgPoolGrad)}, + {kFusedBatchNorm, wrap(&OpLevelCostEstimator::PredictFusedBatchNorm)}, + {kFusedBatchNormGrad, + wrap(&OpLevelCostEstimator::PredictFusedBatchNormGrad)}, + }; #define EIGEN_COST(X) Eigen::internal::functor_traits::Cost @@ -258,6 +297,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() { {"QuantizedAdd", EIGEN_COST(scalar_sum_op)}, {"QuantizedMul", EIGEN_COST(scalar_product_op)}, {"RealDiv", EIGEN_COST(scalar_quotient_op)}, + {"ReluGrad", EIGEN_COST(scalar_max_op)}, {"SquareDifference", 1}, {"Sub", EIGEN_COST(scalar_difference_op)}, {"TruncateDiv", EIGEN_COST(scalar_quotient_op)}, @@ -1044,5 +1084,269 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice( return costs; } +/* static */ +OpLevelCostEstimator::ConvolutionDimensions +OpLevelCostEstimator::OpDimensionsFromInputs( + const TensorShapeProto& original_image_shape, const OpInfo& op_info, + bool* found_unknown_shapes) { + VLOG(2) << "op features: " << op_info.DebugString(); + VLOG(2) << "Original image shape: " << original_image_shape.DebugString(); + auto image_shape = + MaybeGetMinimumShape(original_image_shape, 4, found_unknown_shapes); + VLOG(2) << "Image shape: " << image_shape.DebugString(); + + int x_index, y_index, channel_index; + const string& data_format = GetDataFormat(op_info); + if (data_format == "NCHW") { + x_index = 2; + y_index = 3; + channel_index = 1; + } else { + x_index = 1; + y_index = 2; + channel_index = 3; + } + int64 batch = image_shape.dim(0).size(); + int64 ix = image_shape.dim(x_index).size(); + int64 iy = image_shape.dim(y_index).size(); + int64 iz = image_shape.dim(channel_index).size(); + + // Note that FusedBatchNorm doesn't have ksize attr, but GetKernelSize returns + // {1, 1, 1, 1} in that case. + std::vector ksize = GetKernelSize(op_info); + int64 kx = ksize[x_index]; + int64 ky = ksize[y_index]; + + std::vector strides = GetStrides(op_info); + int64 sx = strides[x_index]; + int64 sy = strides[y_index]; + const auto padding = GetPadding(op_info); + + int64 ox = GetOutputSize(ix, kx, sx, padding); + int64 oy = GetOutputSize(iy, ky, sy, padding); + int64 oz = iz; + + OpLevelCostEstimator::ConvolutionDimensions conv_dims = { + batch, ix, iy, iz, kx, ky, oz, ox, oy, sx, sy, padding}; + return conv_dims; +} + +Costs OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context) const { + bool found_unknown_shapes = false; + const auto& op_info = op_context.op_info; + // x: op_info.inputs(0) + ConvolutionDimensions dims = OpDimensionsFromInputs( + op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + // kx * ky - 1 comparisons per output (kx * xy > 1) + // or 1 copy per output (kx * k1 = 1). + int per_output_ops = dims.kx * dims.ky == 1 ? 1 : dims.kx * dims.ky - 1; + int64 ops = dims.batch * dims.ox * dims.oy * dims.oz * per_output_ops; + + double total_input_size = 0; + if (dims.ky >= dims.sy) { + total_input_size = + CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes); + } else { // dims.ky < dims.sy + // Vertical stride is larger than vertical kernel; assuming row-major + // format, skip unnecessary rows (or read every kx rows per sy rows, as the + // others are not used for output). + const auto data_size = DataTypeSize(BaseType(op_info.inputs(0).dtype())); + total_input_size = + data_size * dims.batch * dims.ix * dims.ky * dims.oy * dims.iz; + } + const double total_output_size = + CalculateOutputSize(op_info, &found_unknown_shapes); + + Costs costs = PredictOpCountBasedCost( + ops, total_input_size + total_output_size, op_info); + costs.inaccurate = found_unknown_shapes; + costs.max_memory = total_output_size; + return costs; +} + +Costs OpLevelCostEstimator::PredictMaxPoolGrad( + const OpContext& op_context) const { + bool found_unknown_shapes = false; + const auto& op_info = op_context.op_info; + // x: op_info.inputs(0) + // y: op_info.inputs(1) + // y_grad: op_info.inputs(2) + ConvolutionDimensions dims = OpDimensionsFromInputs( + op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + + int64 ops = 0; + if (dims.kx == 1 && dims.ky == 1) { + // 1x1 window. No need to know which input was max. + ops = dims.batch * dims.ix * dims.iy * dims.iz; + } else if (dims.kx <= dims.sx && dims.ky <= dims.sy) { + // Non-overlapping window: re-run maxpool, then assign zero or y_grad. + ops = dims.batch * dims.iz * + (dims.ox * dims.oy * (dims.kx * dims.ky - 1) + dims.ix * dims.iy); + } else { + // Overlapping window: initialize with zeros, re-run maxpool, then + // accumulate y_gad to proper x_grad locations. + ops = dims.batch * dims.iz * + (dims.ox * dims.oy * (dims.kx * dims.ky - 1) + dims.ix * dims.iy * 2); + } + + // Just read x and y_grad; no need to read y as we assume MaxPoolGrad re-run + // MaxPool internally. + double total_input_size = + CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes); + total_input_size += + CalculateTensorSize(op_info.inputs(2), &found_unknown_shapes); + // Write x_grad; size equal to x. + const double total_output_size = + CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes); + + Costs costs = PredictOpCountBasedCost( + ops, total_input_size + total_output_size, op_info); + costs.inaccurate = found_unknown_shapes; + costs.max_memory = total_output_size; + return costs; +} + +Costs OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context) const { + bool found_unknown_shapes = false; + const auto& op_info = op_context.op_info; + // x: op_info.inputs(0) + ConvolutionDimensions dims = OpDimensionsFromInputs( + op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + + // kx * ky - 1 additions and 1 multiplication per output. + int64 ops = dims.batch * dims.ox * dims.oy * dims.oz * dims.kx * dims.ky; + + double total_input_size = 0; + if (dims.ky >= dims.sy) { + total_input_size = + CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes); + } else { // dims.ky < dims.sy + // vertical stride is larger than vertical kernel; assuming row-major + // format, skip unnecessary rows (or read every kx rows per sy rows, as the + // others are not used for output). + const auto data_size = DataTypeSize(BaseType(op_info.inputs(0).dtype())); + total_input_size = + data_size * dims.batch * dims.ix * dims.ky * dims.oy * dims.iz; + } + const double total_output_size = + CalculateOutputSize(op_info, &found_unknown_shapes); + + Costs costs = PredictOpCountBasedCost( + ops, total_input_size + total_output_size, op_info); + costs.inaccurate = found_unknown_shapes; + costs.max_memory = total_output_size; + return costs; +} + +Costs OpLevelCostEstimator::PredictAvgPoolGrad( + const OpContext& op_context) const { + bool found_unknown_shapes = false; + const auto& op_info = op_context.op_info; + // x: op_info.inputs(0) + // y_grad: op_info.inputs(1) + ConvolutionDimensions dims = OpDimensionsFromInputs( + op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + + int64 ops = 0; + if (dims.kx <= dims.sx && dims.ky <= dims.sy) { + // Non-overlapping window. + ops = dims.batch * dims.iz * (dims.ix * dims.iy + dims.ox * dims.oy); + } else { + // Overlapping window. + ops = dims.batch * dims.iz * + (dims.ix * dims.iy + dims.ox * dims.oy * (dims.kx * dims.ky + 1)); + } + + const double total_input_size = + CalculateInputSize(op_info, &found_unknown_shapes); + const double total_output_size = + CalculateOutputSize(op_info, &found_unknown_shapes); + + Costs costs = PredictOpCountBasedCost( + ops, total_input_size + total_output_size, op_info); + costs.inaccurate = found_unknown_shapes; + costs.max_memory = total_output_size; + return costs; +} + +Costs OpLevelCostEstimator::PredictFusedBatchNorm( + const OpContext& op_context) const { + bool found_unknown_shapes = false; + const auto& op_info = op_context.op_info; + // x: op_info.inputs(0) + // scale: op_info.inputs(1) + // offset: op_info.inputs(2) + // mean: op_info.inputs(3) --> only for inference + // variance: op_info.inputs(4) --> only for inference + ConvolutionDimensions dims = OpDimensionsFromInputs( + op_info.inputs(0).shape(), op_info, &found_unknown_shapes); + const bool is_training = IsTraining(op_info); + + int64 ops = 0; + const auto rsqrt_cost = Eigen::internal::functor_traits< + Eigen::internal::scalar_rsqrt_op>::Cost; + if (is_training) { + ops = dims.iz * (dims.batch * dims.ix * dims.iy * 4 + 6 + rsqrt_cost); + } else { + ops = dims.batch * dims.ix * dims.iy * dims.iz * 2; + } + + const double size_nhwc = + CalculateTensorSize(op_info.inputs(0), &found_unknown_shapes); + const double size_c = + CalculateTensorSize(op_info.inputs(1), &found_unknown_shapes); + double total_input_size = 0.0; + double total_internal_read_size = 0.0; + double total_output_size = 0.0; + if (is_training) { + total_input_size = size_nhwc + size_c * 2; + total_output_size = size_nhwc + size_c * 4; + total_internal_read_size = size_nhwc; + } else { + total_input_size = size_nhwc + size_c * 4; + total_output_size = size_nhwc; + } + + Costs costs = PredictOpCountBasedCost( + ops, total_input_size + total_output_size + total_internal_read_size, + op_info); + costs.inaccurate = found_unknown_shapes; + costs.max_memory = total_output_size; + return costs; +} + +Costs OpLevelCostEstimator::PredictFusedBatchNormGrad( + const OpContext& op_context) const { + bool found_unknown_shapes = false; + const auto& op_info = op_context.op_info; + // y_backprop: op_info.inputs(0) + // x: op_info.inputs(1) + // scale: op_info.inputs(2) + // mean: op_info.inputs(3) + // variance or inverse of variance: op_info.inputs(4) + ConvolutionDimensions dims = OpDimensionsFromInputs( + op_info.inputs(1).shape(), op_info, &found_unknown_shapes); + + int64 ops = 0; + const auto rsqrt_cost = Eigen::internal::functor_traits< + Eigen::internal::scalar_rsqrt_op>::Cost; + ops = dims.iz * (dims.batch * dims.ix * dims.iy * 11 + 5 + rsqrt_cost); + + const double size_nhwc = + CalculateTensorSize(op_info.inputs(1), &found_unknown_shapes); + const double size_c = + CalculateTensorSize(op_info.inputs(2), &found_unknown_shapes); + double total_input_size = size_nhwc * 2 + size_c * 2; + double total_internal_read_size = size_nhwc; + double total_output_size = size_nhwc * 1 + size_c * 2; + + Costs costs = PredictOpCountBasedCost( + ops, total_input_size + total_output_size + total_internal_read_size, + op_info); + costs.inaccurate = found_unknown_shapes; + costs.max_memory = total_output_size; + return costs; +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index 1b3babb206..fcbecbb6dc 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -145,6 +145,12 @@ class OpLevelCostEstimator { Costs PredictBatchMatMul(const OpContext& op_context) const; Costs PredictMetadata(const OpContext& op_context) const; Costs PredictGatherOrSlice(const OpContext& op_context) const; + Costs PredictMaxPool(const OpContext& op_context) const; + Costs PredictMaxPoolGrad(const OpContext& op_context) const; + Costs PredictAvgPool(const OpContext& op_context) const; + Costs PredictAvgPoolGrad(const OpContext& op_context) const; + Costs PredictFusedBatchNorm(const OpContext& op_context) const; + Costs PredictFusedBatchNormGrad(const OpContext& op_context) const; // Utility function for safe division. Returns 0 // if rhs is 0 or negative. @@ -156,9 +162,15 @@ class OpLevelCostEstimator { } } + // For convolution and its grad ops. static ConvolutionDimensions ConvolutionDimensionsFromInputs( const TensorShapeProto& original_image_shape, - const TensorShapeProto& original_filter_shape, const OpInfo& op_features, + const TensorShapeProto& original_filter_shape, const OpInfo& op_info, + bool* found_unknown_shapes); + + // For Pooling, FusedBatchNorm, and their grad ops. + static ConvolutionDimensions OpDimensionsFromInputs( + const TensorShapeProto& original_image_shape, const OpInfo& op_info, bool* found_unknown_shapes); protected: diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 99bf28f21b..56915ed821 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.pb.h" @@ -169,6 +171,130 @@ OpContext DescribeBiasAdd(int size1, int size2) { return op_context; } +int GetOutputSize(const int x, const int k, const int s, + const string& padding) { + if (padding == "SAME") { + return (x + s - 1) / s; + } else { + return (x - k + s) / s; + } +} + +std::vector GetPoolingOutputSize(const std::vector& input, + const std::vector& ksize, + const std::vector& strides, + const string& data_format, + const string& padding) { + // h, w, and c indices: default with NHWC. + int h_index = 1; + int w_index = 2; + int c_index = 3; + if (data_format == "NCHW") { + h_index = 2; + w_index = 3; + c_index = 1; + } + // Extract parameters. + int n = input[0]; + int h = input[h_index]; + int w = input[w_index]; + int c = input[c_index]; + int sx = strides[h_index]; + int sy = strides[w_index]; + int kx = ksize[h_index]; + int ky = ksize[w_index]; + + // Output activation size: default with VALID padding. + int ho = GetOutputSize(h, kx, sx, padding); + int wo = GetOutputSize(w, ky, sy, padding); + + std::vector output; + if (data_format == "NHWC") { + output = {n, ho, wo, c}; + } else { + output = {n, c, ho, wo}; + } + return output; +} + +OpContext DescribePoolingOp(const string& op_name, const std::vector& x, + const std::vector& ksize, + const std::vector& strides, + const string& data_format, const string& padding) { + OpContext op_context; + auto& op_info = op_context.op_info; + SetCpuDevice(&op_info); + op_info.set_op(op_name); + + const std::vector y = + GetPoolingOutputSize(x, ksize, strides, data_format, padding); + if (op_name == "AvgPool" || op_name == "MaxPool") { + // input: x, output: y. + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_inputs()); + DescribeTensor4D(y[0], y[1], y[2], y[3], op_info.add_outputs()); + } else if (op_name == "AvgPoolGrad") { + // input: x, y_grad, output: x_grad. + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_inputs()); + DescribeTensor4D(y[0], y[1], y[2], y[3], op_info.add_inputs()); + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_outputs()); + } else if (op_name == "MaxPoolGrad") { + // input: x, y, y_grad, output: x_grad. + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_inputs()); + DescribeTensor4D(y[0], y[1], y[2], y[3], op_info.add_inputs()); + DescribeTensor4D(y[0], y[1], y[2], y[3], op_info.add_inputs()); + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_outputs()); + } + auto* attr = op_info.mutable_attr(); + SetAttrValue(data_format, &(*attr)["data_format"]); + SetAttrValue(padding, &(*attr)["padding"]); + SetAttrValue(strides, &(*attr)["strides"]); + SetAttrValue(ksize, &(*attr)["ksize"]); + return op_context; +} + +OpContext DescribeFusedBatchNorm(const bool is_training, const bool is_grad, + const std::vector& x, + const string& data_format) { + // First, get MaxPool op info with unit stride and unit window. + OpContext op_context = DescribePoolingOp("MaxPool", x, {1, 1, 1, 1}, + {1, 1, 1, 1}, data_format, "SAME"); + auto& op_info = op_context.op_info; + // Override op name. + if (is_grad) { + op_info.set_op("FusedBatchNormGrad"); + } else { + op_info.set_op("FusedBatchNorm"); + } + + // Add additional input output tensors. + if (is_grad) { + DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_inputs()); + } + int num_1d_inputs = is_grad ? 3 : 4; + for (int i = 0; i < num_1d_inputs; i++) { + auto* tensor = op_info.add_inputs(); + auto* shape = tensor->mutable_shape(); + shape->add_dim()->set_size(x[3]); + tensor->set_dtype(DT_FLOAT); + } + for (int i = 0; i < 4; i++) { + auto* tensor = op_info.add_outputs(); + auto* shape = tensor->mutable_shape(); + shape->add_dim()->set_size(x[3]); + tensor->set_dtype(DT_FLOAT); + } + + // Delete unnecessary attr. + auto* attr = op_context.op_info.mutable_attr(); + attr->erase("ksize"); + attr->erase("strides"); + attr->erase("padding"); + + // Additional attrs for FusedBatchNorm. + SetAttrValue(is_training, &(*attr)["is_training"]); + + return op_context; +} } // namespace class OpLevelCostEstimatorTest : public ::testing::Test { @@ -192,6 +318,50 @@ class OpLevelCostEstimatorTest : public ::testing::Test { estimator_.compute_memory_overlap_ = value; } + void ValidateOpDimensionsFromImputs(const int n, const int h, const int w, + const int c, const int kx, const int ky, + const int sx, const int sy, + const string& data_format, + const string& padding) { + OpContext op_context; + int ho; + int wo; + if (data_format == "NHWC") { + op_context = DescribePoolingOp("MaxPool", {n, h, w, c}, {1, kx, ky, 1}, + {1, sx, sy, 1}, "NHWC", padding); + ho = op_context.op_info.outputs(0).shape().dim(1).size(); + wo = op_context.op_info.outputs(0).shape().dim(2).size(); + } else { + op_context = DescribePoolingOp("MaxPool", {n, c, h, w}, {1, 1, kx, ky}, + {1, 1, sx, sy}, "NCHW", padding); + ho = op_context.op_info.outputs(0).shape().dim(2).size(); + wo = op_context.op_info.outputs(0).shape().dim(3).size(); + } + + bool found_unknown_shapes; + auto dims = OpLevelCostEstimator::OpDimensionsFromInputs( + op_context.op_info.inputs(0).shape(), op_context.op_info, + &found_unknown_shapes); + Padding padding_enum; + if (padding == "VALID") { + padding_enum = Padding::VALID; + } else { + padding_enum = Padding::SAME; + } + EXPECT_EQ(n, dims.batch); + EXPECT_EQ(h, dims.ix); + EXPECT_EQ(w, dims.iy); + EXPECT_EQ(c, dims.iz); + EXPECT_EQ(kx, dims.kx); + EXPECT_EQ(ky, dims.ky); + EXPECT_EQ(sx, dims.sx); + EXPECT_EQ(sy, dims.sy); + EXPECT_EQ(ho, dims.ox); + EXPECT_EQ(wo, dims.oy); + EXPECT_EQ(c, dims.oz); + EXPECT_EQ(padding_enum, dims.padding); + } + OpLevelCostEstimator estimator_; }; @@ -443,5 +613,226 @@ TEST_F(OpLevelCostEstimatorTest, GetTensorShapeProtoFromTensorProto) { } } +TEST_F(OpLevelCostEstimatorTest, OpDimensionsFromInputs) { + std::vector paddings = {"VALID", "SAME"}; + std::vector formats = {"NHWC", "NCHW"}; + for (const auto& p : paddings) { + for (const auto& f : formats) { + // n, h, w, c, kx, ky, sx, sy, data_format, padding. + ValidateOpDimensionsFromImputs(10, 20, 20, 100, 3, 3, 2, 2, f, p); + ValidateOpDimensionsFromImputs(10, 20, 20, 100, 1, 1, 3, 3, f, p); + ValidateOpDimensionsFromImputs(10, 200, 200, 100, 5, 5, 3, 3, f, p); + ValidateOpDimensionsFromImputs(10, 14, 14, 3840, 3, 3, 2, 2, f, p); + } + } +} + +TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) { + auto predict_max_pool = [this](const int n, const int in, const int c, + const int k, const int s, + const string& padding) -> Costs { + OpContext op_context = DescribePoolingOp( + "MaxPool", {n, in, in, c}, {1, k, k, 1}, {1, s, s, 1}, "NHWC", padding); + return estimator_.PredictCosts(op_context); + }; + + { + // Typical 3xz3 window with 2x2 stride. + auto costs = predict_max_pool(10, 20, 384, 3, 2, "SAME"); + EXPECT_EQ(Costs::Duration(1075200), costs.execution_time); + EXPECT_EQ(Costs::Duration(307200), costs.compute_time); + EXPECT_EQ(Costs::Duration(768000), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 1x1 window with 2x2 stride: used for shortcut in resnet-50. + auto costs = predict_max_pool(10, 20, 384, 1, 2, "SAME"); + EXPECT_EQ(Costs::Duration(499200), costs.execution_time); + EXPECT_EQ(Costs::Duration(38400), costs.compute_time); + EXPECT_EQ(Costs::Duration(460800), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 2x2 window with 3x3 stride. + auto costs = predict_max_pool(10, 20, 384, 2, 3, "VALID"); + EXPECT_EQ(Costs::Duration(561792), costs.execution_time); + EXPECT_EQ(Costs::Duration(56448), costs.compute_time); + EXPECT_EQ(Costs::Duration(505344), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } +} + +TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) { + auto predict_max_pool_grad = [this](const int n, const int in, const int c, + const int k, const int s, + const string& padding) -> Costs { + OpContext op_context = + DescribePoolingOp("MaxPoolGrad", {n, in, in, c}, {1, k, k, 1}, + {1, s, s, 1}, "NHWC", padding); + return estimator_.PredictCosts(op_context); + }; + + { + // Typical 3xz3 window with 2x2 stride. + auto costs = predict_max_pool_grad(10, 20, 384, 3, 2, "SAME"); + EXPECT_EQ(Costs::Duration(1996800), costs.execution_time); + EXPECT_EQ(Costs::Duration(614400), costs.compute_time); + EXPECT_EQ(Costs::Duration(1382400), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 1x1 window with 2x2 stride: used for shortcut in resnet-50. + auto costs = predict_max_pool_grad(10, 20, 384, 1, 2, "SAME"); + EXPECT_EQ(Costs::Duration(1536000), costs.execution_time); + EXPECT_EQ(Costs::Duration(153600), costs.compute_time); + EXPECT_EQ(Costs::Duration(1382400), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 2x2 window with 3x3 stride. + auto costs = predict_max_pool_grad(10, 20, 384, 2, 3, "VALID"); + EXPECT_EQ(Costs::Duration(1514112), costs.execution_time); + EXPECT_EQ(Costs::Duration(210048), costs.compute_time); + EXPECT_EQ(Costs::Duration(1304064), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } +} + +TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) { + auto predict_avg_pool = [this](const int n, const int in, const int c, + const int k, const int s, + const string& padding) -> Costs { + OpContext op_context = DescribePoolingOp( + "AvgPool", {n, in, in, c}, {1, k, k, 1}, {1, s, s, 1}, "NHWC", padding); + return estimator_.PredictCosts(op_context); + }; + + { + // Typical 3xz3 window with 2x2 stride. + auto costs = predict_avg_pool(10, 20, 384, 3, 2, "SAME"); + EXPECT_EQ(Costs::Duration(1113600), costs.execution_time); + EXPECT_EQ(Costs::Duration(345600), costs.compute_time); + EXPECT_EQ(Costs::Duration(768000), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 1x1 window with 2x2 stride: used for shortcut in resnet-50. + auto costs = predict_avg_pool(10, 20, 384, 1, 2, "SAME"); + EXPECT_EQ(Costs::Duration(499200), costs.execution_time); + EXPECT_EQ(Costs::Duration(38400), costs.compute_time); + EXPECT_EQ(Costs::Duration(460800), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 2x2 window with 3x3 stride. + auto costs = predict_avg_pool(10, 20, 384, 2, 3, "VALID"); + EXPECT_EQ(Costs::Duration(580608), costs.execution_time); + EXPECT_EQ(Costs::Duration(75264), costs.compute_time); + EXPECT_EQ(Costs::Duration(505344), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } +} + +TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) { + auto predict_avg_pool_grad = [this](const int n, const int in, const int c, + const int k, const int s, + const string& padding) -> Costs { + OpContext op_context = + DescribePoolingOp("AvgPoolGrad", {n, in, in, c}, {1, k, k, 1}, + {1, s, s, 1}, "NHWC", padding); + return estimator_.PredictCosts(op_context); + }; + + { + // Typical 3xz3 window with 2x2 stride. + auto costs = predict_avg_pool_grad(10, 20, 384, 3, 2, "SAME"); + EXPECT_EQ(Costs::Duration(1920000), costs.execution_time); + EXPECT_EQ(Costs::Duration(537600), costs.compute_time); + EXPECT_EQ(Costs::Duration(1382400), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 1x1 window with 2x2 stride: used for shortcut in resnet-50. + auto costs = predict_avg_pool_grad(10, 20, 384, 1, 2, "SAME"); + EXPECT_EQ(Costs::Duration(1574400), costs.execution_time); + EXPECT_EQ(Costs::Duration(192000), costs.compute_time); + EXPECT_EQ(Costs::Duration(1382400), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + { + // 2x2 window with 3x3 stride. + auto costs = predict_avg_pool_grad(10, 20, 384, 2, 3, "VALID"); + EXPECT_EQ(Costs::Duration(1476480), costs.execution_time); + EXPECT_EQ(Costs::Duration(172416), costs.compute_time); + EXPECT_EQ(Costs::Duration(1304064), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } +} + +TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) { + auto predict_fused_bn = [this](const int n, const int in, const int c, + const bool is_training) -> Costs { + OpContext op_context = DescribeFusedBatchNorm( + is_training, /*is_grad=*/false, {n, in, in, c}, "NHWC"); + return estimator_.PredictCosts(op_context); + }; + + { + auto costs = predict_fused_bn(10, 20, 96, /*is_training=*/true); + EXPECT_EQ(Costs::Duration(614737), costs.execution_time); + EXPECT_EQ(Costs::Duration(153706), costs.compute_time); + EXPECT_EQ(Costs::Duration(461031), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + + { + auto costs = predict_fused_bn(10, 20, 32, /*is_training=*/true); + EXPECT_EQ(Costs::Duration(204913), costs.execution_time); + EXPECT_EQ(Costs::Duration(51236), costs.compute_time); + EXPECT_EQ(Costs::Duration(153677), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + + { + auto costs = predict_fused_bn(10, 20, 96, /*is_training=*/false); + EXPECT_EQ(Costs::Duration(384154), costs.execution_time); + EXPECT_EQ(Costs::Duration(76800), costs.compute_time); + EXPECT_EQ(Costs::Duration(307354), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + + { + auto costs = predict_fused_bn(10, 20, 32, /*is_training=*/false); + EXPECT_EQ(Costs::Duration(128052), costs.execution_time); + EXPECT_EQ(Costs::Duration(25600), costs.compute_time); + EXPECT_EQ(Costs::Duration(102452), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } +} + +TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNormGrad) { + auto predict_fused_bn_grad = [this](const int n, const int in, + const int c) -> Costs { + OpContext op_context = DescribeFusedBatchNorm( + /*is_training=*/false, /*is_grad=*/true, {n, in, in, c}, "NHWC"); + return estimator_.PredictCosts(op_context); + }; + + { + auto costs = predict_fused_bn_grad(10, 20, 96); + EXPECT_EQ(Costs::Duration(1037050), costs.execution_time); + EXPECT_EQ(Costs::Duration(422496), costs.compute_time); + EXPECT_EQ(Costs::Duration(614554), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } + + { + auto costs = predict_fused_bn_grad(128, 7, 384); + EXPECT_EQ(Costs::Duration(6503809), costs.execution_time); + EXPECT_EQ(Costs::Duration(2649677), costs.compute_time); + EXPECT_EQ(Costs::Duration(3854132), costs.memory_time); + EXPECT_FALSE(costs.inaccurate); + } +} } // end namespace grappler } // end namespace tensorflow -- GitLab From a477242f91010480ca72b052a6adbb50f00ea43b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 13:13:20 -0700 Subject: [PATCH 630/960] Add comment that explicitly states that InitTableIterator is Thread-unsafe. PiperOrigin-RevId: 190821427 --- tensorflow/core/kernels/initializable_lookup_table.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/initializable_lookup_table.h b/tensorflow/core/kernels/initializable_lookup_table.h index b16c76dc7f..edb779540f 100644 --- a/tensorflow/core/kernels/initializable_lookup_table.h +++ b/tensorflow/core/kernels/initializable_lookup_table.h @@ -92,6 +92,8 @@ class InitializableLookupTable : public LookupInterface { // // Then the iterator is exhausted, valid returns false and status returns // Status::OutOfRange. + // + // This class is Thread-unsafe. class InitTableIterator { public: InitTableIterator() {} -- GitLab From 01583b714c4144dbf11e1f2ae5189f051d130d13 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 13:21:05 -0700 Subject: [PATCH 631/960] [XLA] Redesign: add the rest of client-service interfaces. The basic idea is, on the client side, for each public method that has a Computation parameter, add an overload with XlaCompuation. If such method needs to call the service side, add corresponding service interfaces. Also make XlaCompuation::GetProgramShape return StatusOr, to be consistent with the Computation class. PiperOrigin-RevId: 190822601 --- tensorflow/compiler/xla/client/client.cc | 19 ++++++++ tensorflow/compiler/xla/client/client.h | 44 +++++++++++++++++++ .../xla/client/xla_client/xla_builder.cc | 10 +++-- .../xla/client/xla_client/xla_computation.cc | 5 ++- .../xla/client/xla_client/xla_computation.h | 3 +- tensorflow/compiler/xla/service/service.cc | 10 +++++ tensorflow/compiler/xla/service/service.h | 16 +++++++ tensorflow/compiler/xla/service_interface.h | 8 ++++ tensorflow/compiler/xla/xla.proto | 9 ++++ 9 files changed, 118 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 5ce3c45528..a857c4ff0b 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -317,6 +317,12 @@ StatusOr>> Client::ExecuteParallel( return std::move(outputs); } +StatusOr>> Client::ExecuteParallel( + tensorflow::gtl::ArraySlice computations) { + return Unimplemented( + "ExecuteParallel is not yet implemented for XlaComputation."); +} + StatusOr> Client::GetDeviceHandles( int64 device_count) { if (device_count < 1) { @@ -393,6 +399,13 @@ StatusOr Client::GetComputationStats( return response.stats(); } +StatusOr Client::GetComputationStats( + const XlaComputation& computation, + const DebugOptions& debug_options) const { + return Unimplemented( + "GetComputationStats is not yet implemented for XlaComputation"); +} + StatusOr> Client::GetComputationShape( const Computation& computation) { GetComputationShapeRequest request; @@ -410,6 +423,12 @@ StatusOr> Client::GetComputationShape( return WrapUnique(response.release_program_shape()); } +StatusOr> Client::GetComputationShape( + const XlaComputation& computation) { + TF_ASSIGN_OR_RETURN(const auto& result, computation.GetProgramShape()); + return MakeUnique(result); +} + StatusOr Client::GetShape(const GlobalData& data) { GetShapeRequest request; *request.mutable_data() = data.handle(); diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index ec87646ebf..226b788d54 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -99,6 +99,36 @@ class Client { StatusOr>> ExecuteParallel( tensorflow::gtl::ArraySlice computations); + // A struct to represent a computation instance to be executed. + // * If execution_options.device_handles is not empty, the computation is + // executed on the devices associated with the handles by partitioning the + // computation based on the attached sharding attributes. Otherwise, a + // device is chosen by the service. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + struct XlaComputationInstance { + const XlaComputation& computation; + std::vector arguments; + ExecutionOptions execution_options; + ExecutionProfile* execution_profile; + + XlaComputationInstance(const XlaComputation& computation, + std::vector arguments, + ExecutionOptions execution_options, + ExecutionProfile* execution_profile) + : computation(computation), + arguments(std::move(arguments)), + execution_options(execution_options), + execution_profile(execution_profile) {} + }; + + // Executes a list XlaComputationInstances and returns global data produced + // from each computation. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr>> ExecuteParallel( + tensorflow::gtl::ArraySlice computations); + // Requests device_count device handles available on the target. The returned // device handles are used to specify the devices to execute the computations // (see ExecuteParallel) or to transfer data (see TransferToServer or @@ -175,6 +205,13 @@ class Client { StatusOr GetComputationStats( const Computation& computation, const DebugOptions& debug_options) const; + // Retrieves the statistics of the given computation. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr GetComputationStats( + const XlaComputation& computation, + const DebugOptions& debug_options) const; + // Returns the Shape of the given array specified by 'data'. The shape // includes the Layout of the array as it is stored on the service. StatusOr GetShape(const GlobalData& data); @@ -184,6 +221,13 @@ class Client { StatusOr> GetComputationShape( const Computation& computation); + // As above, but returns the shape of the provided computation (parameter + // types/names and return type). + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + StatusOr> GetComputationShape( + const XlaComputation& computation); + // Creates a channel handle that can be used to transfer data between // two computations via a pair of Send and Recv instructions. StatusOr CreateChannelHandle(); diff --git a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc index 1b94f9a4eb..e51a8b14c0 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_builder.cc @@ -369,10 +369,12 @@ XlaOp XlaBuilder::Call(const XlaComputation& computation, } c_transform(operand_shapes, std::back_inserter(operand_shape_ptrs), [](const Shape& shape) { return &shape; }); - TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), - ShapeInference::InferCallShape( - operand_shape_ptrs, - /*to_apply=*/computation.GetProgramShape())); + TF_ASSIGN_OR_RETURN(const ProgramShape& called_program_shape, + computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN( + *instr.mutable_shape(), + ShapeInference::InferCallShape(operand_shape_ptrs, + /*to_apply=*/called_program_shape)); // Add called computation. instr.add_called_computation_ids( diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.cc b/tensorflow/compiler/xla/client/xla_client/xla_computation.cc index 3681792eee..a6752c6010 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_computation.cc +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.cc @@ -17,9 +17,12 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/status_macros.h" + namespace xla { -const ProgramShape& XlaComputation::GetProgramShape() const { +StatusOr XlaComputation::GetProgramShape() const { + TF_RET_CHECK(proto_.has_program_shape()); return proto_.program_shape(); } diff --git a/tensorflow/compiler/xla/client/xla_client/xla_computation.h b/tensorflow/compiler/xla/client/xla_client/xla_computation.h index 78e1e3c32c..2a3c695266 100644 --- a/tensorflow/compiler/xla/client/xla_client/xla_computation.h +++ b/tensorflow/compiler/xla/client/xla_client/xla_computation.h @@ -40,7 +40,8 @@ class XlaComputation { // Returns the "program shape" (parameter and return shapes) for this // computation. - const ProgramShape& GetProgramShape() const; + StatusOr GetProgramShape() const; + const HloModuleProto& proto() const { return proto_; } private: diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 1d379f0d03..af05e3f516 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -837,6 +837,11 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, return tensorflow::Status::OK(); } +tensorflow::Status Service::ExecuteGraphParallel( + const ExecuteGraphParallelRequest* arg, ExecuteParallelResponse* result) { + return Unimplemented("execute-graph-parallel is not yet implemented"); +} + tensorflow::Status Service::GetDeviceHandles(const GetDeviceHandlesRequest* arg, GetDeviceHandlesResponse* result) { const int64 available_device_count = execute_backend_->device_count(); @@ -1445,6 +1450,11 @@ tensorflow::Status Service::GetComputationStats( return tensorflow::Status::OK(); } +tensorflow::Status Service::GetComputationGraphStats( + const ComputationGraphStatsRequest* arg, ComputationStatsResponse* result) { + return Unimplemented("get-computation-graph-stats is not yet implemented"); +} + template tensorflow::Status Service::AddInstruction( const RequestT* arg, ResponseT* result, diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 773f0a642d..ebe4a2e043 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -126,6 +126,15 @@ class Service : public ServiceInterface { tensorflow::Status ExecuteParallel(const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) override; + // Executes one or more computations in parallel with the provided global data + // passed as immutable arguments. Returns global data output for each + // computation. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + tensorflow::Status ExecuteGraphParallel( + const ExecuteGraphParallelRequest* arg, + ExecuteParallelResponse* result) override; + // Requests one or more device handles from the target. // // When N device handles are requested and the number of replicas is R, at @@ -224,6 +233,13 @@ class Service : public ServiceInterface { const ComputationStatsRequest* arg, ComputationStatsResponse* result) override; + // Retrieves the statistics of a computation. + // + // TODO(b/74197823): This is a part of a NOT YET ready refactor. + tensorflow::Status GetComputationGraphStats( + const ComputationGraphStatsRequest* arg, + ComputationStatsResponse* result) override; + // Snapshots the current state of a computation handle into a serializable // protocol buffer form, so it can be loaded via // LoadComputationSnapshot. diff --git a/tensorflow/compiler/xla/service_interface.h b/tensorflow/compiler/xla/service_interface.h index d8235113dd..32aae64973 100644 --- a/tensorflow/compiler/xla/service_interface.h +++ b/tensorflow/compiler/xla/service_interface.h @@ -60,6 +60,10 @@ class ServiceInterface { virtual tensorflow::Status ExecuteParallel( const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) = 0; + virtual tensorflow::Status ExecuteGraphParallel( + const ExecuteGraphParallelRequest* arg, + ExecuteParallelResponse* result) = 0; + virtual tensorflow::Status ExecuteAsync(const ExecuteAsyncRequest* arg, ExecuteAsyncResponse* result) = 0; @@ -72,6 +76,10 @@ class ServiceInterface { virtual tensorflow::Status GetComputationStats( const ComputationStatsRequest* arg, ComputationStatsResponse* result) = 0; + virtual tensorflow::Status GetComputationGraphStats( + const ComputationGraphStatsRequest* arg, + ComputationStatsResponse* result) = 0; + virtual tensorflow::Status GetComputationShape( const GetComputationShapeRequest* arg, GetComputationShapeResponse* result) = 0; diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index edf1b07af8..5cb18113e5 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -299,6 +299,11 @@ message ComputationStatsRequest { DebugOptions debug_options = 2; } +message ComputationGraphStatsRequest { + HloModuleProto computation = 1; + DebugOptions debug_options = 2; +} + message ComputationStatsResponse { ComputationStats stats = 1; } @@ -355,6 +360,10 @@ message ExecuteParallelRequest { repeated ExecuteRequest requests = 1; } +message ExecuteGraphParallelRequest { + repeated ExecuteGraphRequest requests = 1; +} + message ExecuteResponse { GlobalDataHandle output = 1; ExecutionProfile profile = 2; -- GitLab From 70666858800a55585ae2775f97a1731db305388a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 13:27:07 -0700 Subject: [PATCH 632/960] Make sure tensor size match before inspecting their content. PiperOrigin-RevId: 190823557 --- .../contrib/lite/testing/tflite_driver.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 613223f3d4..c399f4f2b7 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -56,12 +56,16 @@ void SetTensorData(const std::vector& values, TfLitePtrUnion* data) { class TfLiteDriver::Expectation { public: - Expectation() { data_.raw = nullptr; } + Expectation() { + data_.raw = nullptr; + num_elements_ = 0; + } ~Expectation() { delete[] data_.raw; } template void SetData(const string& csv_values) { const auto& values = testing::Split(csv_values, ","); - data_.raw = new char[values.size() * sizeof(T)]; + num_elements_ = values.size(); + data_.raw = new char[num_elements_ * sizeof(T)]; SetTensorData(values, &data_); } @@ -88,7 +92,13 @@ class TfLiteDriver::Expectation { constexpr double kRelativeThreshold = 1e-2f; constexpr double kAbsoluteThreshold = 1e-4f; - int tensor_size = tensor.bytes / sizeof(T); + size_t tensor_size = tensor.bytes / sizeof(T); + + if (tensor_size != num_elements_) { + std::cerr << "Expected a tensor with " << num_elements_ + << " elements, got " << tensor_size << std::endl; + return false; + } bool good_output = true; for (int i = 0; i < tensor_size; ++i) { @@ -115,6 +125,7 @@ class TfLiteDriver::Expectation { } TfLitePtrUnion data_; + size_t num_elements_; }; TfLiteDriver::TfLiteDriver(bool use_nnapi) : use_nnapi_(use_nnapi) {} -- GitLab From d355f4e2644b68ea643f573c564936ec23b93787 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 28 Mar 2018 14:04:01 -0700 Subject: [PATCH 633/960] [tf.data] Autotune prefetch buffer sizes In order to make it easier for tf.data users to achieve high performance with their input pipelines, this change adds the ability for the prefetch op to automatically tune its buffer size. To use the auto-tuning configuration of the `prefetch` transformation, simply skip passing in a buffer size. Example: ```python dataset = # ... dataset = dataset.prefetch() # Look ma, no buffer value req'd! ``` PiperOrigin-RevId: 190829736 --- tensorflow/contrib/data/__init__.py | 3 + tensorflow/core/kernels/data/BUILD | 21 +++++ .../core/kernels/data/prefetch_autotuner.cc | 46 +++++++++++ .../core/kernels/data/prefetch_autotuner.h | 71 ++++++++++++++++ .../kernels/data/prefetch_autotuner_test.cc | 82 +++++++++++++++++++ .../core/kernels/data/prefetch_dataset_op.cc | 13 ++- tensorflow/python/data/ops/dataset_ops.py | 2 + 7 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 tensorflow/core/kernels/data/prefetch_autotuner.cc create mode 100644 tensorflow/core/kernels/data/prefetch_autotuner.h create mode 100644 tensorflow/core/kernels/data/prefetch_autotuner_test.cc diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 766721d8d2..7c3a9f82ff 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -82,3 +82,6 @@ from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_s from tensorflow.python.util.all_util import remove_undocumented remove_undocumented(__name__) + +# A constant that can be used to enable auto-tuning. +AUTOTUNE = -1 diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 01754ec21a..a8784e3656 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -10,6 +10,7 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", "tf_kernel_library", + "tf_cc_test", ) filegroup( @@ -295,11 +296,31 @@ tf_kernel_library( ], ) +cc_library( + name = "prefetch_autotuner", + srcs = ["prefetch_autotuner.cc"], + hdrs = ["prefetch_autotuner.h"], + deps = [ + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "prefetch_autotuner_test", + srcs = ["prefetch_autotuner_test.cc"], + deps = [ + ":prefetch_autotuner", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + tf_kernel_library( name = "prefetch_dataset_op", srcs = ["prefetch_dataset_op.cc"], deps = [ ":dataset", + ":prefetch_autotuner", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", diff --git a/tensorflow/core/kernels/data/prefetch_autotuner.cc b/tensorflow/core/kernels/data/prefetch_autotuner.cc new file mode 100644 index 0000000000..b3272f6bcd --- /dev/null +++ b/tensorflow/core/kernels/data/prefetch_autotuner.cc @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/data/prefetch_autotuner.h" + +namespace tensorflow { + +PrefetchAutotuner::PrefetchAutotuner(int64 initial_buffer_size) + : buffer_limit_(initial_buffer_size) { + if (initial_buffer_size == kAutoTune) { + mode_ = Mode::kUpswing; + buffer_limit_ = 1; + } +} + +void PrefetchAutotuner::RecordConsumption(size_t current_buffer_size) { + switch (mode_) { + case Mode::kDisabled: + return; + case Mode::kUpswing: + if (current_buffer_size == buffer_limit_) { + mode_ = Mode::kDownswing; + } + return; + case Mode::kDownswing: + if (current_buffer_size == 0) { + buffer_limit_ *= 2; // Increase the buffer size. + mode_ = Mode::kUpswing; + } + return; + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/prefetch_autotuner.h b/tensorflow/core/kernels/data/prefetch_autotuner.h new file mode 100644 index 0000000000..fa8a184072 --- /dev/null +++ b/tensorflow/core/kernels/data/prefetch_autotuner.h @@ -0,0 +1,71 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_DATA_PREFETCH_AUTOTUNER_H_ +#define TENSORFLOW_CORE_KERNELS_DATA_PREFETCH_AUTOTUNER_H_ + +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +// PrefetchAutotuner dynamically adjusts the buffer size of a prefetch iterator. +// +// PrefetchAutotuner attempts to find the minimum buffer size such that there is +// always at least 1 element in the prefetch queue every time the downstream +// iterator calls GetNext(). +// +// One common failure mode of input pipelines is being throughput bound. No +// amount of prefetching can address that performance mode. In order to guard +// against this condition, PrefetchAutotuner will only increase the buffer_limit +// if the prefetching thread is able to successfully fill the buffer at its +// current size. +// +// Note: in the current implementation, we never decrease the buffer_limit(). +// This should change in the future! +// +// PrefetchAutotuner is NOT thread safe. +class PrefetchAutotuner { + public: + static const int64 kAutoTune = -1; + + explicit PrefetchAutotuner(int64 initial_buffer_size); + + int64 buffer_limit() const { return buffer_limit_; } + + void RecordConsumption(size_t current_buffer_size); + void RecordEmpty() { RecordConsumption(0); } + + private: + // PrefetchAutotuner operates as a state machine. + enum class Mode { + // Disables the autotuning. + kDisabled, + + // We have increased the size of the buffer, and will transition to + // kDownswing if we successfully fill the buffer. + kUpswing, + + // We have successfully filled a buffer of this size. If we ever block the + // downstream iterator, we should increase the buffer size. + kDownswing, + }; + + int64 buffer_limit_; + Mode mode_ = Mode::kDisabled; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_DATA_PREFETCH_AUTOTUNER_H_ diff --git a/tensorflow/core/kernels/data/prefetch_autotuner_test.cc b/tensorflow/core/kernels/data/prefetch_autotuner_test.cc new file mode 100644 index 0000000000..2f573dfb35 --- /dev/null +++ b/tensorflow/core/kernels/data/prefetch_autotuner_test.cc @@ -0,0 +1,82 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/data/prefetch_autotuner.h" + +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +TEST(PrefetchAutotuner, Disabled) { + PrefetchAutotuner t(2); + EXPECT_EQ(2, t.buffer_limit()); + t.RecordConsumption(0); + t.RecordConsumption(2); + t.RecordConsumption(0); + t.RecordConsumption(2); + EXPECT_EQ(2, t.buffer_limit()); +} + +TEST(PrefetchAutotuner, Enabled) { + PrefetchAutotuner t(PrefetchAutotuner::kAutoTune); + EXPECT_EQ(1, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(1, t.buffer_limit()); + t.RecordConsumption(1); + EXPECT_EQ(1, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(2, t.buffer_limit()); + t.RecordConsumption(2); + EXPECT_EQ(2, t.buffer_limit()); + t.RecordConsumption(1); + EXPECT_EQ(2, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(4, t.buffer_limit()); + t.RecordConsumption(4); + EXPECT_EQ(4, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(8, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to stay the same! + EXPECT_EQ(8, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to stay the same! + EXPECT_EQ(8, t.buffer_limit()); +} + +TEST(PrefetchAutotuner, EnabledSteady) { + PrefetchAutotuner t(PrefetchAutotuner::kAutoTune); + EXPECT_EQ(1, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(1, t.buffer_limit()); + t.RecordConsumption(1); + EXPECT_EQ(1, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(2, t.buffer_limit()); + t.RecordConsumption(2); + EXPECT_EQ(2, t.buffer_limit()); + t.RecordConsumption(0); // Expect buffer limit to increase. + EXPECT_EQ(4, t.buffer_limit()); + + // Never reach zero again. + std::vector consumption_values = {2, 3, 1, 4, 1, 2, 3, 1}; + for (int i = 0; i < consumption_values.size(); ++i) { + t.RecordConsumption(consumption_values[i]); + EXPECT_EQ(4, t.buffer_limit()) + << "Failed at index " << i << " with value: " << consumption_values[i]; + } +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/prefetch_dataset_op.cc b/tensorflow/core/kernels/data/prefetch_dataset_op.cc index 1c548a30d2..536de81fd8 100644 --- a/tensorflow/core/kernels/data/prefetch_dataset_op.cc +++ b/tensorflow/core/kernels/data/prefetch_dataset_op.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/kernels/data/prefetch_autotuner.h" #include "tensorflow/core/lib/core/error_codes.pb.h" namespace tensorflow { @@ -37,7 +38,8 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { int64 buffer_size; OP_REQUIRES_OK( ctx, ParseScalarArgument(ctx, "buffer_size", &buffer_size)); - OP_REQUIRES(ctx, buffer_size > 0, + OP_REQUIRES(ctx, + buffer_size > 0 || buffer_size == PrefetchAutotuner::kAutoTune, errors::InvalidArgument("buffer_size must be > 0")); *output = new Dataset(ctx, input, buffer_size); @@ -85,7 +87,8 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { public: explicit Iterator(const Params& params) : DatasetIterator(params), - input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + input_impl_(params.dataset->input_->MakeIterator(params.prefix)), + auto_tuner_(params.dataset->buffer_size_) {} ~Iterator() override { // Signal the prefetch thread to terminate it. We will then @@ -113,6 +116,7 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { // Wait until the next element in the buffer has been // produced, or we are shutting down. while (!cancelled_ && !prefetch_thread_finished_ && buffer_.empty()) { + auto_tuner_.RecordEmpty(); cond_var_.wait(l); } @@ -129,6 +133,7 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { if (s.ok()) { *out_tensors = std::move(buffer_.front().value); } + auto_tuner_.RecordConsumption(buffer_.size()); buffer_.pop_front(); *end_of_sequence = false; @@ -242,7 +247,8 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { // 1. Wait for a slot in the buffer. { mutex_lock l(mu_); - while (!cancelled_ && buffer_.size() == dataset()->buffer_size_) { + while (!cancelled_ && + buffer_.size() == auto_tuner_.buffer_limit()) { cond_var_.wait(l); } @@ -323,6 +329,7 @@ class PrefetchDatasetOp : public UnaryDatasetOpKernel { mutex parent_mu_ ACQUIRED_BEFORE(mu_); const std::unique_ptr input_impl_ GUARDED_BY(parent_mu_); condition_variable cond_var_; + PrefetchAutotuner auto_tuner_ GUARDED_BY(mu_); std::deque buffer_ GUARDED_BY(mu_); std::unique_ptr prefetch_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index c0a6283be4..8729e085a3 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -2043,6 +2043,8 @@ class PrefetchDataset(Dataset): """See `Dataset.prefetch()` for details.""" super(PrefetchDataset, self).__init__() self._input_dataset = input_dataset + if buffer_size is None: + buffer_size = -1 # This is the sentinel for auto-tuning. self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") -- GitLab From 3c0229c36ad7ade3cf795e3171c3c563e0222ed2 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Thu, 29 Mar 2018 05:28:16 +0800 Subject: [PATCH 634/960] Fix broken wiki link of Positive-definite_matrix in linalg api guide (#18057) * Fix broken wiki link of Positive-definite_matrix in linalg api guide * Fix minor intent --- .../contrib/linalg/python/ops/linear_operator_block_diag.py | 3 +-- tensorflow/python/ops/linalg/linear_operator.py | 3 +-- tensorflow/python/ops/linalg/linear_operator_composition.py | 3 +-- tensorflow/python/ops/linalg/linear_operator_diag.py | 3 +-- tensorflow/python/ops/linalg/linear_operator_full_matrix.py | 3 +-- tensorflow/python/ops/linalg/linear_operator_identity.py | 6 ++---- .../python/ops/linalg/linear_operator_lower_triangular.py | 3 +-- 7 files changed, 8 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_block_diag.py b/tensorflow/contrib/linalg/python/ops/linear_operator_block_diag.py index 80649bd52d..9d3af66c92 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_block_diag.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_block_diag.py @@ -138,8 +138,7 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. This is true by default, and will raise a `ValueError` otherwise. name: A name for this `LinearOperator`. Default is the individual diff --git a/tensorflow/python/ops/linalg/linear_operator.py b/tensorflow/python/ops/linalg/linear_operator.py index c7513d5b40..193c787baa 100644 --- a/tensorflow/python/ops/linalg/linear_operator.py +++ b/tensorflow/python/ops/linalg/linear_operator.py @@ -166,8 +166,7 @@ class LinearOperator(object): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: A name for this `LinearOperator`. diff --git a/tensorflow/python/ops/linalg/linear_operator_composition.py b/tensorflow/python/ops/linalg/linear_operator_composition.py index ecd30e4d7e..0292bc51dc 100644 --- a/tensorflow/python/ops/linalg/linear_operator_composition.py +++ b/tensorflow/python/ops/linalg/linear_operator_composition.py @@ -134,8 +134,7 @@ class LinearOperatorComposition(linear_operator.LinearOperator): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: A name for this `LinearOperator`. Default is the individual operators names joined with `_o_`. diff --git a/tensorflow/python/ops/linalg/linear_operator_diag.py b/tensorflow/python/ops/linalg/linear_operator_diag.py index e180e83026..5beaea65a5 100644 --- a/tensorflow/python/ops/linalg/linear_operator_diag.py +++ b/tensorflow/python/ops/linalg/linear_operator_diag.py @@ -132,8 +132,7 @@ class LinearOperatorDiag(linear_operator.LinearOperator): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: A name for this `LinearOperator`. diff --git a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py index f979fb37d6..5ba3b090ae 100644 --- a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py +++ b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py @@ -125,8 +125,7 @@ class LinearOperatorFullMatrix(linear_operator.LinearOperator): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: A name for this `LinearOperator`. diff --git a/tensorflow/python/ops/linalg/linear_operator_identity.py b/tensorflow/python/ops/linalg/linear_operator_identity.py index 50f3d407e8..45929eb4e2 100644 --- a/tensorflow/python/ops/linalg/linear_operator_identity.py +++ b/tensorflow/python/ops/linalg/linear_operator_identity.py @@ -236,8 +236,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. assert_proper_shapes: Python `bool`. If `False`, only perform static checks that initialization and method arguments have proper shape. @@ -576,8 +575,7 @@ class LinearOperatorScaledIdentity(BaseLinearOperatorIdentity): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. assert_proper_shapes: Python `bool`. If `False`, only perform static checks that initialization and method arguments have proper shape. diff --git a/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py index a5130188b6..c4d386ccb4 100644 --- a/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py +++ b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py @@ -133,8 +133,7 @@ class LinearOperatorLowerTriangular(linear_operator.LinearOperator): meaning the quadratic form `x^H A x` has positive real part for all nonzero `x`. Note that we do not require the operator to be self-adjoint to be positive-definite. See: - https://en.wikipedia.org/wiki/Positive-definite_matrix\ - #Extension_for_non_symmetric_matrices + https://en.wikipedia.org/wiki/Positive-definite_matrix#Extension_for_non-symmetric_matrices is_square: Expect that this operator acts like square [batch] matrices. name: A name for this `LinearOperator`. -- GitLab From ef6552b544b3c3bf6808be807b30dd9bd4f19669 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Mar 2018 14:30:39 -0700 Subject: [PATCH 635/960] [tf.data] Fix reference leak in FunctionBufferingResource. Previously, the FunctionBufferingResource's destructor would never be called, which led to use-after-free (of the underlying Device object) errors in the prefetching function. PiperOrigin-RevId: 190834415 --- tensorflow/contrib/cmake/tf_tests.cmake | 1 + tensorflow/contrib/data/kernels/prefetching_kernels.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 237f4fe33a..f793877c8b 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -281,6 +281,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py" "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py" # Deadlocks "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py" # b/65430561 + "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/prefetching_ops_test.py" # Segfaults on Windows. # tensor_forest tests (also note that we exclude the hybrid tests for now) "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/python/kernel_tests/count_extremely_random_stats_op_test.py" # Results in wrong order. "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/python/kernel_tests/sample_inputs_op_test.py" # Results in wrong order. diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index 79d1fc3494..f51570db85 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -314,6 +314,7 @@ class FunctionBufferResourceHandleOp : public OpKernel { source_device, target_device, func_args, thread_pool_size_); return Status::OK(); })); + core::ScopedUnref s(buffer); OP_REQUIRES_OK(ctx, buffer->Instantiate()); initialized_ = true; } -- GitLab From e97c9e91e016efd951dc52e82744f607d948bb2a Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 28 Mar 2018 14:36:18 -0700 Subject: [PATCH 636/960] Merge changes from github. PiperOrigin-RevId: 190835392 --- RELEASE.md | 60 +++ configure.py | 2 +- tensorflow/BUILD | 7 + tensorflow/contrib/BUILD | 27 +- .../boosted_trees/kernels/quantile_ops.cc | 2 +- .../boosted_trees/lib/utils/batch_features.cc | 2 +- .../lib/utils/batch_features_test.cc | 2 +- .../boosted_trees/lib/utils/dropout_utils.cc | 2 +- .../boosted_trees/lib/utils/dropout_utils.h | 2 +- .../lib/utils/sparse_column_iterable_test.cc | 2 +- .../boosted_trees/proto/tree_config.proto | 2 +- .../kernel_tests/prediction_ops_test.py | 10 +- .../python/kernel_tests/quantile_ops_test.py | 2 +- .../boosted_trees/python/ops/quantile_ops.py | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 3 + .../kernel_tests/batch_dataset_op_test.py | 14 + tensorflow/contrib/eager/python/BUILD | 6 +- .../eager/python/examples/spinn/spinn_test.py | 1 - .../python/estimator/replicate_model_fn.py | 2 +- .../factorization/kernels/clustering_ops.cc | 2 +- .../python/ops/factorization_ops.py | 14 +- .../python/ops/factorization_ops_test.py | 12 +- .../factorization/python/ops/gmm_ops.py | 4 +- .../factorization/python/ops/gmm_test.py | 2 +- .../factorization/python/ops/kmeans_test.py | 4 +- .../contrib/factorization/python/ops/wals.py | 2 +- tensorflow/contrib/learn/BUILD | 1 + .../learn/python/learn/estimators/linear.py | 4 +- .../linear_optimizer/python/sdca_estimator.py | 4 +- tensorflow/contrib/lite/README.md | 3 + tensorflow/contrib/lite/builtin_ops.h | 1 + tensorflow/contrib/lite/g3doc/models.md | 2 +- tensorflow/contrib/lite/kernels/BUILD | 13 + .../internal/reference/reference_ops.h | 25 ++ tensorflow/contrib/lite/kernels/maximum.cc | 106 +++++ .../contrib/lite/kernels/maximum_test.cc | 81 ++++ tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/model.cc | 3 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/python/lite.py | 22 +- tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 124 +++++- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 36 ++ .../testing/generated_examples_zip_test.cc | 1 + .../contrib/lite/toco/tflite/operator.cc | 2 + .../contrib/lite/toco/tflite/operator_test.cc | 2 + tensorflow/contrib/lookup/lookup_ops.py | 2 +- .../contrib/makefile/download_dependencies.sh | 2 +- tensorflow/contrib/makefile/tf_op_files.txt | 1 + .../seq2seq/kernels/beam_search_ops.cc | 2 +- .../seq2seq/python/ops/attention_wrapper.py | 8 +- .../seq2seq/python/ops/beam_search_decoder.py | 6 +- .../slim/python/slim/data/parallel_reader.py | 4 +- .../slim/python/slim/data/prefetch_queue.py | 4 +- .../python/slim/data/tfexample_decoder.py | 2 +- tensorflow/contrib/tensorrt/README.md | 46 ++- .../contrib/tensorrt/convert/convert_graph.cc | 20 +- .../contrib/tensorrt/convert/convert_nodes.cc | 375 ++++++++++-------- .../contrib/tensorrt/segment/segment.cc | 55 ++- tensorflow/contrib/tensorrt/segment/segment.h | 4 +- .../contrib/tensorrt/segment/segment_test.cc | 8 +- .../timeseries/python/timeseries/ar_model.py | 2 +- .../python/timeseries/math_utils.py | 2 +- .../timeseries/state_space_models/varma.py | 4 +- .../base_api/api_def_MatrixSolveLs.pbtxt | 6 +- .../core/common_runtime/mkl_cpu_allocator.cc | 3 - tensorflow/core/framework/common_shape_fns.cc | 4 +- tensorflow/core/framework/common_shape_fns.h | 8 +- tensorflow/core/framework/shape_inference.h | 1 + .../core/kernels/mkl_fused_batch_norm_op.cc | 2 +- .../core/kernels/segment_reduction_ops.h | 7 + tensorflow/core/kernels/snapshot_op.cc | 30 ++ tensorflow/core/kernels/snapshot_op.h | 26 +- tensorflow/core/kernels/snapshot_op_gpu.cu.cc | 9 +- tensorflow/core/kernels/xent_op.cc | 65 ++- tensorflow/core/kernels/xent_op.h | 35 +- tensorflow/core/kernels/xent_op_gpu.cu.cc | 9 +- tensorflow/core/ops/array_ops.cc | 26 +- tensorflow/core/ops/nn_ops.cc | 23 +- tensorflow/core/ops/nn_ops_test.cc | 16 +- tensorflow/core/public/version.h | 4 +- .../python/contrib.bayesflow.monte_carlo.md | 36 +- .../api_guides/python/contrib.losses.md | 28 +- .../docs_src/community/documentation.md | 38 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 22 +- tensorflow/docs_src/install/install_mac.md | 14 +- .../docs_src/install/install_sources.md | 9 +- tensorflow/docs_src/mobile/optimizing.md | 2 + tensorflow/docs_src/mobile/prepare_models.md | 2 +- tensorflow/python/BUILD | 2 +- .../python/kernel_tests/array_ops_test.py | 26 +- tensorflow/python/kernel_tests/testdata/BUILD | 2 +- .../python/kernel_tests/xent_op_test.py | 81 +++- tensorflow/python/layers/convolutional.py | 2 + .../python/layers/convolutional_test.py | 6 + tensorflow/python/ops/linalg_ops.py | 2 +- .../python/training/monitored_session.py | 33 +- .../python/training/monitored_session_test.py | 36 ++ tensorflow/tensorflow.bzl | 4 +- .../tools/api/golden/tensorflow.train.pbtxt | 2 +- .../tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/lib_package/BUILD | 2 - tensorflow/tools/pip_package/BUILD | 1 - tensorflow/tools/pip_package/setup.py | 6 +- tensorflow/workspace.bzl | 133 ++++--- third_party/mkl/BUILD | 46 ++- third_party/mkl/MKL_LICENSE | 201 ++++++++++ third_party/mkl/build_defs.bzl | 12 + third_party/mkl/mkl.BUILD | 27 +- 116 files changed, 1703 insertions(+), 556 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/maximum.cc create mode 100644 tensorflow/contrib/lite/kernels/maximum_test.cc create mode 100644 third_party/mkl/MKL_LICENSE diff --git a/RELEASE.md b/RELEASE.md index 6f54dee58f..c63d9f20c9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,63 @@ +# Release 1.7.0 + +## Major Features And Improvements +* Eager mode is moving out of contrib, try `tf.enable_eager_execution()`. +* Graph rewrites emulating fixed-point quantization compatible with TensorFlow Lite, supported by new `tf.contrib.quantize` package. +* Easily customize gradient computation with `tf.custom_gradient`. +* [TensorBoard Debugger Plugin](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md), the graphical user interface (GUI) of TensorFlow Debugger (tfdbg), is now in alpha. +* Experimental support for reading a sqlite database as a `Dataset` with new `tf.contrib.data.SqlDataset`. +* Distributed Mutex / CriticalSection added to `tf.contrib.framework.CriticalSection`. +* Better text processing with `tf.regex_replace`. +* Easy, efficient sequence input with `tf.contrib.data.bucket_by_sequence_length` + +## Bug Fixes and Other Changes +* Accelerated Linear Algebra (XLA): + * Add `MaxPoolGradGrad` support for XLA + * CSE pass from Tensorflow is now disabled in XLA. +* `tf.data`: + * `tf.data.Dataset` + * Add support for building C++ Dataset op kernels as external libraries, using the `tf.load_op_library()` mechanism. + * `Dataset.list_files()` now shuffles its output by default. + * `Dataset.shuffle(..., seed=tf.constant(0, dtype=tf.int64))` now yields the same sequence of elements as `Dataset.shuffle(..., seed=0)`. + * Add `num_parallel_reads` argument to `tf.data.TFRecordDataset`. +* `tf.contrib`: + * `tf.contrib.bayesflow.halton_sequence` now supports randomization. + * Add support for scalars in `tf.contrib.all_reduce`. + * Add `effective_sample_size` to `tf.contrib.bayesflow.mcmc_diagnostics`. + * Add `potential_scale_reduction` to `tf.contrib.bayesflow.mcmc_diagnostics`. + * Add `BatchNormalization`, `Kumaraswamy` bijectors. + * Deprecate `tf.contrib.learn`. Please check contrib/learn/README.md for instructions on how to convert existing code. + * `tf.contrib.data` + * Remove deprecated `tf.contrib.data.Dataset`, `tf.contrib.data.Iterator`, `tf.contrib.data.FixedLengthRecordDataset`, `tf.contrib.data.TextLineDataset`, and `tf.contrib.data.TFRecordDataset` classes. + * Added `bucket_by_sequence_length`, `sliding_window_batch`, and `make_batched_features_dataset` + * Remove unmaintained `tf.contrib.ndlstm`. You can find it externally at https://github.com/tmbarchive/tfndlstm. + * Moved most of `tf.contrib.bayesflow` to its own repo: `tfp` +* Other: + * tf.py_func now reports the full stack trace if an exception occurs. + * Integrate `TPUClusterResolver` with GKE's integration for Cloud TPUs. + * Add a library for statistical testing of samplers. + * Add Helpers to stream data from the GCE VM to a Cloud TPU. + * Integrate ClusterResolvers with TPUEstimator. + * Unify metropolis_hastings interface with HMC kernel. + * Move LIBXSMM convolutions to a separate --define flag so that they are disabled by default. + * Fix `MomentumOptimizer` lambda. + * Reduce `tfp.layers` boilerplate via programmable docstrings. + * Add `auc_with_confidence_intervals`, a method for computing the AUC and confidence interval with linearithmic time complexity. + * `regression_head` now accepts customized link function, to satisfy the usage that user can define their own link function if the `array_ops.identity` does not meet the requirement. + * Fix `initialized_value` and `initial_value` behaviors for `ResourceVariables` created from `VariableDef` protos. + * Add TensorSpec to represent the specification of Tensors. + * Constant folding pass is now deterministic. + * Support `float16` `dtype` in `tf.linalg.*`. + * Add `tf.estimator.export.TensorServingInputReceiver` that allows `tf.estimator.Estimator.export_savedmodel` to pass raw tensors to model functions. + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +4d55397500, Abe, Alistair Low, Andy Kernahan, Appledore, Ben, Ben Barsdell, Boris Pfahringer, Brad Wannow, Brett Koonce, Carl Thomé, cclauss, Chengzhi Chen, Chris Drake, Christopher Yeh, Clayne Robison, Codrut Grosu, Daniel Trebbien, Danny Goodman, David Goodwin, David Norman, Deron Eriksson, Donggeon Lim, Donny Viszneki, DosLin, DylanDmitri, Francisco Guerrero, Fred Reiss, gdh1995, Giuseppe, Glenn Weidner, gracehoney, Guozhong Zhuang, Haichen "Hc" Li, Harald Husum, harumitsu.nobuta, Henry Spivey, hsm207, Jekyll Song, Jerome, Jiongyan Zhang, jjsjann123, John Sungjin Park, Johnson145, JoshVarty, Julian Wolff, Jun Wang, June-One, Kamil Sindi, Kb Sriram, Kdavis-Mozilla, Kenji, lazypanda1, Liang-Chi Hsieh, Loo Rong Jie, Mahesh Bhosale, MandarJKulkarni, ManHyuk, Marcus Ong, Marshal Hayes, Martin Pool, matthieudelaro, mdfaijul, mholzel, Michael Zhou, Ming Li, Minmin Sun, Myungjoo Ham, MyungsungKwak, Naman Kamra, Peng Yu, Penghao Cen, Phil, Raghuraman-K, resec, Rohin Mohanadas, Sandeep N Gupta, Scott Tseng, seaotterman, Seo Sanghyeon, Sergei Lebedev, Ted Chang, terrytangyuan, Tim H, tkunic, Tod, vihanjain, Yan Facai (颜发才), Yin Li, Yong Tang, Yukun Chen, Yusuke Yamada + + + # Release 1.6.0 ## Breaking Changes diff --git a/configure.py b/configure.py index 22b9abedd7..0f52c0ec99 100644 --- a/configure.py +++ b/configure.py @@ -1414,7 +1414,7 @@ def main(): set_build_var(environ_cp, 'TF_NEED_S3', 'Amazon S3 File System', 'with_s3_support', True, 's3') set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', - 'with_kafka_support', False, 'kafka') + 'with_kafka_support', True, 'kafka') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 6ab43638ba..29a01efc84 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -240,6 +240,13 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "with_kafka_support_windows_override", + define_values = {"with_kafka_support": "true"}, + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + config_setting( name = "with_gcp_support_android_override", define_values = {"with_gcp_support": "true"}, diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index bdbd738906..fb81b50fe8 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -51,7 +51,6 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", - "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -110,7 +109,13 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", - ]), + ]) + select({ + "//tensorflow:with_kafka_support_windows_override": [], + "//tensorflow:with_kafka_support": [ + "//tensorflow/contrib/kafka", + ], + "//conditions:default": [], + }), ) cc_library( @@ -120,7 +125,6 @@ cc_library( "//tensorflow/contrib/boosted_trees:boosted_trees_kernels", "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -133,7 +137,13 @@ cc_library( "//tensorflow/contrib/text:all_kernels", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([ "//tensorflow/contrib/nccl:nccl_kernels", - ]), + ]) + select({ + "//tensorflow:with_kafka_support_windows_override": [], + "//tensorflow:with_kafka_support": [ + "//tensorflow/contrib/kafka:dataset_kernels", + ], + "//conditions:default": [], + }), ) cc_library( @@ -146,7 +156,6 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", @@ -157,7 +166,13 @@ cc_library( "//tensorflow/contrib/tensor_forest:tensor_forest_ops_op_lib", "//tensorflow/contrib/text:all_ops", "//tensorflow/contrib/tpu:all_ops", - ], + ] + select({ + "//tensorflow:with_kafka_support_windows_override": [], + "//tensorflow:with_kafka_support": [ + "//tensorflow/contrib/kafka:dataset_ops_op_lib", + ], + "//conditions:default": [], + }), ) filegroup( diff --git a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc index 0f4c2298f5..0b28f81e7c 100644 --- a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc @@ -253,7 +253,7 @@ class CreateQuantileAccumulatorOp : public OpKernel { private: float epsilon_; int32 num_quantiles_; - // An upperbound on the number of enteries that the summaries might have + // An upper bound on the number of entries that the summaries might have // for a feature. int64 max_elements_; bool generate_quantiles_; diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc index cf4f9a097a..35b059f349 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc @@ -54,7 +54,7 @@ Status BatchFeatures::Initialize( TF_CHECK_AND_RETURN_IF_ERROR( dense_float_feature.dim_size(1) == 1, errors::InvalidArgument( - "Dense float features may not be multi-valent: dim_size(1) = ", + "Dense float features may not be multivalent: dim_size(1) = ", dense_float_feature.dim_size(1))); dense_float_feature_columns_.emplace_back(dense_float_feature); } diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc b/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc index 609519e8b1..cfe9101e74 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc @@ -59,7 +59,7 @@ TEST_F(BatchFeaturesTest, DenseFloatFeatures_Multivalent) { BatchFeatures batch_features(1); auto dense_vec = AsTensor({3.0f, 7.0f}, {1, 2}); auto expected_error = InvalidArgument( - "Dense float features may not be multi-valent: dim_size(1) = 2"); + "Dense float features may not be multivalent: dim_size(1) = 2"); EXPECT_EQ(expected_error, batch_features.Initialize({dense_vec}, {}, {}, {}, {}, {}, {})); } diff --git a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc index db34db998a..ce67db797d 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc @@ -54,7 +54,7 @@ Status DropoutUtils::DropOutTrees( if (probability_of_skipping_dropout < 0 || probability_of_skipping_dropout > 1) { return errors::InvalidArgument( - "Probability of skiping dropout must be in [0,1] range"); + "Probability of skipping dropout must be in [0,1] range"); } const auto num_trees = weights.size(); diff --git a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h index 928bfbfe5c..77c16da541 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h @@ -66,7 +66,7 @@ class DropoutUtils { // Current weights and num_updates will be updated as a result of this // func std::vector* current_weights, - // How many weight assignements have been done for each tree already. + // How many weight assignments have been done for each tree already. std::vector* num_updates); }; diff --git a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc index 0138aae3db..cc7604745e 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc @@ -34,7 +34,7 @@ TEST_F(SparseColumnIterableTest, Empty) { } TEST_F(SparseColumnIterableTest, Iterate) { - // 8 examples having 7 sparse features with the 3rd and 7th multi-valent. + // 8 examples having 7 sparse features with the 3rd and 7th multivalent. // This can be visualized like the following: // Instance | Sparse | // 0 | x | diff --git a/tensorflow/contrib/boosted_trees/proto/tree_config.proto b/tensorflow/contrib/boosted_trees/proto/tree_config.proto index 4407c4d981..81411aa84a 100644 --- a/tensorflow/contrib/boosted_trees/proto/tree_config.proto +++ b/tensorflow/contrib/boosted_trees/proto/tree_config.proto @@ -53,7 +53,7 @@ message DenseFloatBinarySplit { // Float feature column and split threshold describing // the rule feature <= threshold. int32 feature_column = 1; - // If feature column is multivalent, this holds the index of the dimensiong + // If feature column is multivalent, this holds the index of the dimension // for the split. Defaults to 0. int32 dimension_id = 5; float threshold = 2; diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index c1acf35160..cf55759aaa 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -120,8 +120,8 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): """Sets up the prediction tests. Create a batch of two examples having one dense float, two sparse float - single valued, one sparse float multidimensionl and one sparse int features. - The data looks like the following: + single valued, one sparse float multidimensional and one sparse int + features. The data looks like the following: | Instance | Dense0 | SparseF0 | SparseF1 | SparseI0 | SparseM | 0 | 7 | -3 | | 9,1 | __, 5.0 | 1 | -2 | | 4 | | 3, ___ @@ -810,7 +810,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): # building. This tree should never be dropped. num_trees = 10 with self.test_session(): - # Empty tree ensenble. + # Empty tree ensemble. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() # Add 10 trees with some weights. for i in range(0, num_trees): @@ -951,7 +951,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): def testDropOutZeroProb(self): with self.test_session(): - # Empty tree ensenble. + # Empty tree ensemble. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() # Add 1000 trees with some weights. for i in range(0, 999): @@ -994,7 +994,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): def testAveragingAllTrees(self): with self.test_session(): - # Empty tree ensenble. + # Empty tree ensemble. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() adjusted_tree_ensemble_config = ( tree_config_pb2.DecisionTreeEnsembleConfig()) diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py index 81f58de28c..074623699d 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py @@ -482,7 +482,7 @@ class QuantilesOpTest(test_util.TensorFlowTestCase): """Sets up the quantile op tests. Create a batch of 4 examples having 2 dense and 4 sparse features. - Forth sparse feature is multivalent (3 dimensional) + Fourth sparse feature is multivalent (3 dimensional) The data looks like this | Instance | Dense 0 | Dense 1 | Sparse 0 | Sparse 1 |Sparse 2| SparseM | 0 | -0.1 | -1 | -2 | 0.1 | |_ ,1,_ diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py index 97d57e8b23..1b184d296b 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py @@ -184,7 +184,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): """Finalizes quantile summary stream and resets it for next iteration. Args: - stamp_token: Exepcted current token. + stamp_token: Expected current token. next_stamp_token: Next value for the token. Returns: A list of quantiles or approximate boundaries. diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index f793877c8b..92f2ab6dea 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -210,6 +210,9 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py" # Test is flaky on Windows GPU builds (b/38283730). "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/gmm_test.py" + # Disable following manual tag in BUILD. + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py" + ) if (WIN32) set(tf_test_src_py_exclude diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 5abb38c2d2..75482f67da 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -413,6 +413,20 @@ class BatchDatasetTest(test.TestCase): def testMapAndBatchPartialBatchDropRemainder(self): return self._testMapAndBatchPartialBatchHelper(drop_remainder=True) + def testMapAndBatchYieldsPartialBatch(self): + iterator = (dataset_ops.Dataset.range(10) + .apply(batching.map_and_batch( + lambda x: array_ops.reshape(x * x, [1]), 4)) + .make_one_shot_iterator()) + self.assertEqual([None, 1], iterator.output_shapes.as_list()) + next_element = iterator.get_next() + with self.test_session() as sess: + self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) + self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) + self.assertAllEqual([[64], [81]], sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + def testMapAndBatchSparse(self): def _sparse(i): diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 4fba014d6f..80176397c0 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -270,7 +270,11 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = ["notsan"], + tags = [ + "no_oss", # b/74395663 + "no_windows", # TODO: needs investigation on Windows + "notsan", + ], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 9261823d77..9adf47d505 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -418,7 +418,6 @@ class SpinnTest(test_util.TensorFlowTestCase): if event.summary.value and event.summary.value[0].tag == "train/loss"] self.assertEqual(config.epochs, len(train_losses)) - self.assertLess(train_losses[-1], train_losses[0]) # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index e0fae2c992..fa2697800e 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -136,7 +136,7 @@ def replicate_model_fn(model_fn, the train_op argument of `EstimatorSpec`. loss_reduction: controls whether losses are summed or averaged. devices: Optional list of devices to replicate the model across. This - argument can be used to replice only on the subset of available GPUs. + argument can be used to replicate only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. diff --git a/tensorflow/contrib/factorization/kernels/clustering_ops.cc b/tensorflow/contrib/factorization/kernels/clustering_ops.cc index dd61f59585..2a6c97e8b9 100644 --- a/tensorflow/contrib/factorization/kernels/clustering_ops.cc +++ b/tensorflow/contrib/factorization/kernels/clustering_ops.cc @@ -353,7 +353,7 @@ class NearestNeighborsOp : public OpKernel { auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); const int64 num_threads = worker_threads.num_threads; // This kernel might be configured to use fewer than the total number of - // available CPUs on the host machine. To avoid descructive interference + // available CPUs on the host machine. To avoid destructive interference // with other jobs running on the host machine, we must only use a fraction // of total available L3 cache. Unfortunately, we cannot query the host // machine to get the number of physical CPUs. So, we use a fixed per-CPU diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops.py b/tensorflow/contrib/factorization/python/ops/factorization_ops.py index 054888e734..8e0ed1d80e 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py @@ -106,7 +106,7 @@ class WALSModel(object): # the prep_gramian_op for row(column) can be run. worker_init_op = model.worker_init - # To be run once per interation sweep before the row(column) update + # To be run once per integration sweep before the row(column) update # initialize ops can be run. Note that in the distributed training # situations, this should only be run by the chief trainer. All other # trainers need to block until this is done. @@ -118,9 +118,9 @@ class WALSModel(object): init_row_update_op = model.initialize_row_update_op init_col_update_op = model.initialize_col_update_op - # Ops to upate row(column). This can either take the entire sparse tensor - # or slices of sparse tensor. For distributed trainer, each trainer - # handles just part of the matrix. + # Ops to update row(column). This can either take the entire sparse + # tensor or slices of sparse tensor. For distributed trainer, each + # trainer handles just part of the matrix. _, row_update_op, unreg_row_loss, row_reg, _ = model.update_row_factors( sp_input=matrix_slices_from_queue_for_worker_shard) row_loss = unreg_row_loss + row_reg @@ -220,7 +220,7 @@ class WALSModel(object): in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of inner lists matching the number of row factor shards and the elements in each inner list are the weights for the rows of the corresponding row - factor shard. In this case, w_ij = unonbserved_weight + + factor shard. In this case, w_ij = unobserved_weight + row_weights[i] * col_weights[j]. - If this is a single non-negative real number, this value is used for all row weights and w_ij = unobserved_weight + row_weights * @@ -435,7 +435,7 @@ class WALSModel(object): gramian: Variable storing the gramian calculated from the factors. Returns: - A op that updates the gramian with the calcuated value from the factors. + A op that updates the gramian with the calculated value from the factors. """ partial_gramians = [] for f in factors: @@ -564,7 +564,7 @@ class WALSModel(object): Note that specifically this initializes the cache of the row and column weights on workers when `use_factors_weights_cache` is True. In this case, - if these weights are being calcualted and reset after the object is created, + if these weights are being calculated and reset after the object is created, it is important to ensure this ops is run afterwards so the cache reflects the correct values. """ diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py index c813733915..bb5140aeb3 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py @@ -210,7 +210,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reprodue the same row factors in the model as the + # This is expected to reproduce the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -283,8 +283,8 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 3 column feature vectors. - # This is expected to reprodue the same column factors in the model as the - # weights and feature vectors are identical to that used in model + # This is expected to reproduce the same column factors in the model as + # the weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( sp_input=sp_feeder, @@ -385,7 +385,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reprodue the same row factors in the model as the + # This is expected to reproduce the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -462,8 +462,8 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 2 column feature vectors. - # This is expected to reprodue the same column factors in the model as the - # weights and feature vectors are identical to that used in model + # This is expected to reproduce the same column factors in the model as + # the weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( sp_input=sp_feeder, diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index 98d6434f47..14d4c733e3 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -280,7 +280,7 @@ class GmmAlgorithm(object): self._define_score_samples() def _define_full_covariance_probs(self, shard_id, shard): - """Defines the full covariance probabilties per example in a class. + """Defines the full covariance probabilities per example in a class. Updates a matrix with dimension num_examples X num_classes. @@ -344,7 +344,7 @@ class GmmAlgorithm(object): def _define_prior_log_prob_operation(self, shard_id): """Computes the prior probability of all samples. - Updates a vector where each item is the prior probabibility of an + Updates a vector where each item is the prior probability of an input example. Args: diff --git a/tensorflow/contrib/factorization/python/ops/gmm_test.py b/tensorflow/contrib/factorization/python/ops/gmm_test.py index 00a4734eb6..4fc9c96e9d 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_test.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_test.py @@ -210,7 +210,7 @@ class GMMTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependendent on inputs, if input + # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py index 0103cc4439..88eb9cf692 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py @@ -413,7 +413,7 @@ class KMeansCosineDistanceTest(KMeansTestBase): self.assertAllClose(score, self.true_score, atol=1e-2) def test_predict_kmeans_plus_plus(self): - # Most points are concetrated near one center. KMeans++ is likely to find + # Most points are concentrated near one center. KMeans++ is likely to find # the less populated centers. points = np.array( [[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], @@ -604,7 +604,7 @@ class KMeansTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependendent on inputs, if input + # Note that since cluster initialization is dependent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index 4fe22ea26e..62db3bb4c4 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -235,7 +235,7 @@ def _wals_factorization_model_function(features, labels, mode, params): num_items: An integer, the total number of items of this axis. update_fn: A function that takes one argument (`sp_input`), and that returns a tuple of - * new_factors: A flot Tensor of the factor values after update. + * new_factors: A float Tensor of the factor values after update. * update_op: a TensorFlow op which updates the factors. * loss: A float Tensor, the unregularized loss. * reg_loss: A float Tensor, the regularization loss. diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 9c59150580..16f80a876f 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -226,6 +226,7 @@ py_test( size = "small", srcs = ["python/learn/monitors_test.py"], srcs_version = "PY2AND3", + tags = ["no_pip_gpu"], # b/74437598 deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 64d7ecc68e..70b70af98c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -243,8 +243,8 @@ def sdca_model_fn(features, labels, mode, params): parent_scope = "linear" - with variable_scope.variable_op_scope( - features.values(), parent_scope) as scope: + with variable_scope.variable_scope( + values=features.values(), name_or_scope=parent_scope) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py index 05794a42c5..d4e54c82f9 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py @@ -140,8 +140,8 @@ def sdca_model_fn(features, labels, mode, params, config=None): parent_scope = "linear" - with variable_scope.variable_op_scope(features.values(), - parent_scope) as scope: + with variable_scope.variable_scope( + values=features.values(), name_or_scope=parent_scope) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 2680d515eb..c15ae3f233 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -126,6 +126,9 @@ The above pre-trained models have been trained on the ImageNet data set, which c The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) codelab walks through this process step-by-step. The retraining code supports retraining for both floating point and quantized inference. +# Getting started with RaspberryPi + +Using RaspberryPi can be accomplished by following the [Makefile instructions](g3doc/rpi.md). That will give a you a static library (.a) that you can build your app against. Python bindings will be coming soon as well as a demo app. ### Train a custom model A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index d7993e60cc..17b791e4e2 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -79,6 +79,7 @@ typedef enum { kTfLiteBuiltinBidirectionalSequenceLstm = 52, kTfLiteBuiltinCast = 53, kTfLiteBuiltinPrelu = 54, + kTfLiteBuiltinMaximum = 55, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/g3doc/models.md b/tensorflow/contrib/lite/g3doc/models.md index 5b393140d6..48f43d4fc4 100644 --- a/tensorflow/contrib/lite/g3doc/models.md +++ b/tensorflow/contrib/lite/g3doc/models.md @@ -1,4 +1,4 @@ -#List of Hosted Models +# List of Hosted Models * [Inception V3 2015](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_2015_2017_11_10.zip) * [Inception V3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 1450c1e14b..c423c00bf5 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -156,6 +156,7 @@ cc_library( "local_response_norm.cc", "lsh_projection.cc", "lstm.cc", + "maximum.cc", "mean.cc", "mfcc.cc", "mul.cc", @@ -536,6 +537,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "maximum_test", + size = "small", + srcs = ["maximum_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "mean_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 33d60afa26..3575974ae9 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -404,6 +404,7 @@ inline void DepthToSpace(const T* input_data, const Dims<4>& input_dims, const int in_d = out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth; + const int in_w = out_w / block_size; const int in_h = out_h / block_size; const int in_b = out_b; @@ -3363,6 +3364,30 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, } } +template +void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + auto out_idx = Offset(output_dims, c, x, y, b); + auto in1_idx = SubscriptToIndex(desc1, c, x, y, b); + auto in2_idx = SubscriptToIndex(desc2, c, x, y, b); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = in1_val > in2_val ? in1_val : in2_val; + } + } + } + } +} + template void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, T2* output_data, const Dims<4>& output_dims) { diff --git a/tensorflow/contrib/lite/kernels/maximum.cc b/tensorflow/contrib/lite/kernels/maximum.cc new file mode 100644 index 0000000000..9fdf2b47ea --- /dev/null +++ b/tensorflow/contrib/lite/kernels/maximum.cc @@ -0,0 +1,106 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace maximum { + +// This file has a reference implemenation of TFMaximum. +enum KernelType { + kReference, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct MaximumContext { + MaximumContext(TfLiteContext* context, TfLiteNode* node) { + input1 = GetInput(context, node, kInputTensor1); + input2 = GetInput(context, node, kInputTensor2); + output = GetOutput(context, node, kOutputTensor); + } + TfLiteTensor* input1; + TfLiteTensor* input2; + TfLiteTensor* output; +}; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + MaximumContext op_context(context, node); + TF_LITE_ENSURE_EQ(context, op_context.input1->type, op_context.input2->type); + TfLiteIntArray* output_dims = TfLiteIntArrayCopy(op_context.input2->dims); + op_context.output->type = op_context.input2->type; + return context->ResizeTensor(context, op_context.output, output_dims); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + MaximumContext op_context(context, node); + +#define TF_LITE_MAXIMUM(kernel_type, data_type) \ + kernel_type::TensorFlowMaximum( \ + GetTensorData(op_context.input1), \ + GetTensorDims(op_context.input1), \ + GetTensorData(op_context.input2), \ + GetTensorDims(op_context.input2), \ + GetTensorData(op_context.output), \ + GetTensorDims(op_context.output)) + + if (kernel_type == kReference) { + switch (op_context.output->type) { + case kTfLiteFloat32: + TF_LITE_MAXIMUM(reference_ops, float); + break; + default: + context->ReportError(context, + "Type %d is currently not supported by Maximum.", + op_context.output->type); + return kTfLiteError; + } + } else { + context->ReportError(context, + "Type %d is currently not supported by Maximum.", + op_context.output->type); + return kTfLiteError; + } +#undef TF_LITE_MAXIMUM + return kTfLiteOk; +} + +} // namespace maximum + +TfLiteRegistration* Register_MAXIMUM_REF() { + static TfLiteRegistration r = {nullptr, nullptr, maximum::Prepare, + maximum::Eval}; + return &r; +} + +TfLiteRegistration* Register_MAXIMUM() { return Register_MAXIMUM_REF(); } + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/maximum_test.cc b/tensorflow/contrib/lite/kernels/maximum_test.cc new file mode 100644 index 0000000000..b3fd7d4e6f --- /dev/null +++ b/tensorflow/contrib/lite/kernels/maximum_test.cc @@ -0,0 +1,81 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class MaximumOpModel : public SingleOpModel { + public: + MaximumOpModel(const TensorData& input1, const TensorData& input2, + const TensorType& output) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumOptions, + CreateMaximumOptions(builder_).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + template + void SetInput1(std::initializer_list data) { + PopulateTensor(input1_, data); + } + + template + void SetInput2(std::initializer_list data) { + PopulateTensor(input2_, data); + } + + template + std::vector GetOutput() { + return ExtractVector(output_); + } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input1_; + int input2_; + int output_; +}; + +TEST(MaximumOpTest, FloatTest) { + std::initializer_list data1 = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::initializer_list data2 = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + MaximumOpModel m({TensorType_FLOAT32, {3, 1, 2}}, + {TensorType_FLOAT32, {3, 1, 2}}, TensorType_FLOAT32); + m.SetInput1(data1); + m.SetInput2(data2); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1, 2})); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({1.0, 0.0, 1.0, 12.0, -2.0, -1.43}))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 62045f0a4d..0f98154b90 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -76,6 +76,7 @@ TfLiteRegistration* Register_LOG_SOFTMAX(); TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_DEQUANTIZE(); TfLiteRegistration* Register_PRELU(); +TfLiteRegistration* Register_MAXIMUM(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -133,6 +134,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); + AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index b7ccdf070b..791d1378f3 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -597,6 +597,9 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_MAXIMUM: { + break; + } case BuiltinOperator_DELEGATE: { // TODO(ycling): Revisit when supporting saving delegated models. error_reporter->Report("DELEGATE op shouldn't exist in model."); diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index e31b7c03a5..decaf9f160 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -350,6 +350,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_DELEGATE: case tflite::BuiltinOperator_CAST: case tflite::BuiltinOperator_PRELU: + case tflite::BuiltinOperator_MAXIMUM: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid break; diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index 35d224924e..ed6dd036f9 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -25,9 +25,9 @@ EXPERIMENTAL: APIs here are unstable and likely to change without notice. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os -import subprocess -import tempfile +import os as _os +import subprocess as _subprocess +import tempfile as _tempfile # pylint: disable=unused-import from tensorflow.contrib.lite.python.op_hint import convert_op_hints_to_stubs @@ -74,7 +74,7 @@ else: _toco_from_proto_bin = _resource_loader.get_path_to_datafile( "../toco/python/toco_from_protos") -if _toco_from_proto_bin and not os.path.exists(_toco_from_proto_bin): +if _toco_from_proto_bin and not _os.path.exists(_toco_from_proto_bin): _toco_from_proto_bin = "toco_from_protos" @@ -102,10 +102,10 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): return _toco_python.TocoConvert( model_flags_str, toco_flags_str, input_data_str) - with tempfile.NamedTemporaryFile() as fp_toco, \ - tempfile.NamedTemporaryFile() as fp_model, \ - tempfile.NamedTemporaryFile() as fp_input, \ - tempfile.NamedTemporaryFile() as fp_output: + with _tempfile.NamedTemporaryFile() as fp_toco, \ + _tempfile.NamedTemporaryFile() as fp_model, \ + _tempfile.NamedTemporaryFile() as fp_input, \ + _tempfile.NamedTemporaryFile() as fp_output: fp_model.write(model_flags_str) fp_toco.write(toco_flags_str) fp_input.write(input_data_str) @@ -118,11 +118,11 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): fp_output.name ] cmdline = " ".join(cmd) - proc = subprocess.Popen( + proc = _subprocess.Popen( cmdline, shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, + stdout=_subprocess.PIPE, + stderr=_subprocess.STDOUT, close_fds=True) stdout, stderr = proc.communicate() exitcode = proc.returncode diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index e1075971e9..7d2e00fe32 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -131,6 +131,7 @@ enum BuiltinOperator : byte { BIDIRECTIONAL_SEQUENCE_LSTM = 52, CAST = 53, PRELU = 54, + MAXIMUM = 55, } // Options for the builtin operators. @@ -173,6 +174,7 @@ union BuiltinOptions { LogSoftmaxOptions, CastOptions, DequantizeOptions, + MaximumOptions, } enum Padding : byte { SAME, VALID } @@ -384,6 +386,9 @@ table CastOptions { table DequantizeOptions { } +table MaximumOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 86daeaf5cc..66a97a1460 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -145,6 +145,9 @@ struct CastOptionsT; struct DequantizeOptions; struct DequantizeOptionsT; +struct MaximumOptions; +struct MaximumOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -255,11 +258,12 @@ enum BuiltinOperator { BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, BuiltinOperator_CAST = 53, BuiltinOperator_PRELU = 54, + BuiltinOperator_MAXIMUM = 55, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_PRELU + BuiltinOperator_MAX = BuiltinOperator_MAXIMUM }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[53] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[54] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -313,7 +317,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[53] { BuiltinOperator_DELEGATE, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOperator_CAST, - BuiltinOperator_PRELU + BuiltinOperator_PRELU, + BuiltinOperator_MAXIMUM }; return values; } @@ -375,6 +380,7 @@ inline const char **EnumNamesBuiltinOperator() { "BIDIRECTIONAL_SEQUENCE_LSTM", "CAST", "PRELU", + "MAXIMUM", nullptr }; return names; @@ -425,11 +431,12 @@ enum BuiltinOptions { BuiltinOptions_LogSoftmaxOptions = 36, BuiltinOptions_CastOptions = 37, BuiltinOptions_DequantizeOptions = 38, + BuiltinOptions_MaximumOptions = 39, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_DequantizeOptions + BuiltinOptions_MAX = BuiltinOptions_MaximumOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[39] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[40] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -469,7 +476,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[39] { BuiltinOptions_SplitOptions, BuiltinOptions_LogSoftmaxOptions, BuiltinOptions_CastOptions, - BuiltinOptions_DequantizeOptions + BuiltinOptions_DequantizeOptions, + BuiltinOptions_MaximumOptions }; return values; } @@ -515,6 +523,7 @@ inline const char **EnumNamesBuiltinOptions() { "LogSoftmaxOptions", "CastOptions", "DequantizeOptions", + "MaximumOptions", nullptr }; return names; @@ -681,6 +690,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MaximumOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1016,6 +1029,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_DequantizeOptions ? reinterpret_cast(value) : nullptr; } + MaximumOptionsT *AsMaximumOptions() { + return type == BuiltinOptions_MaximumOptions ? + reinterpret_cast(value) : nullptr; + } + const MaximumOptionsT *AsMaximumOptions() const { + return type == BuiltinOptions_MaximumOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3759,6 +3780,46 @@ inline flatbuffers::Offset CreateDequantizeOptions( flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct MaximumOptionsT : public flatbuffers::NativeTable { + typedef MaximumOptions TableType; + MaximumOptionsT() { + } +}; + +struct MaximumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef MaximumOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + MaximumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MaximumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct MaximumOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit MaximumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + MaximumOptionsBuilder &operator=(const MaximumOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateMaximumOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + MaximumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateMaximumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -3990,6 +4051,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == BuiltinOptions_DequantizeOptions ? static_cast(builtin_options()) : nullptr; } + const MaximumOptions *builtin_options_as_MaximumOptions() const { + return builtin_options_type() == BuiltinOptions_MaximumOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -4168,6 +4232,10 @@ template<> inline const DequantizeOptions *Operator::builtin_options_as inline const MaximumOptions *Operator::builtin_options_as() const { + return builtin_options_as_MaximumOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5696,6 +5764,29 @@ inline flatbuffers::Offset CreateDequantizeOptions(flatbuffer _fbb); } +inline MaximumOptionsT *MaximumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new MaximumOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void MaximumOptions::UnPackTo(MaximumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset MaximumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateMaximumOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateMaximumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateMaximumOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -6028,6 +6119,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_MaximumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -6198,6 +6293,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_MaximumOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -6356,6 +6455,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_MaximumOptions: { + auto ptr = reinterpret_cast(value); + return CreateMaximumOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -6514,6 +6617,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new DequantizeOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_MaximumOptions: { + value = new MaximumOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -6711,6 +6818,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_MaximumOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 555ea90034..12b7b3c350 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -36,6 +36,7 @@ gen_zipped_test_files( "local_response_norm.zip", "log_softmax.zip", "max_pool.zip", + "maximum.zip", "mean.zip", "mul.zip", "pad.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index cb5c500136..8045052452 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -862,6 +862,41 @@ def make_log_softmax_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_maximum_tests(zip_path): + """Make a set of tests to do maximum.""" + + test_parameters = [{ + "input_dtype": [tf.float32], + "input_shape_1": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + "input_shape_2": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + }] + + def build_graph(parameters): + """Build the maximum op testing graph.""" + input_tensor_1 = tf.placeholder( + dtype=parameters["input_dtype"], + name="input_1", + shape=parameters["input_shape_1"]) + input_tensor_2 = tf.placeholder( + dtype=parameters["input_dtype"], + name="input_2", + shape=parameters["input_shape_2"]) + + out = tf.maximum(input_tensor_1, input_tensor_2) + return [input_tensor_1, input_tensor_2], [out] + + def build_inputs(parameters, sess, inputs, outputs): + values = [ + create_tensor_data(parameters["input_dtype"], + parameters["input_shape_1"]), + create_tensor_data(parameters["input_dtype"], + parameters["input_shape_2"]) + ] + return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_binary_op_tests_func(binary_operator): """Return a function that does a test on a binary operator.""" return lambda zip_path: make_binary_op_tests(zip_path, binary_operator) @@ -1977,6 +2012,7 @@ def main(unused_args): "exp.zip": make_exp_tests, "log_softmax.zip": make_log_softmax_tests, "lstm.zip": make_lstm_tests, + "maximum.zip": make_maximum_tests, } out = FLAGS.zip_to_output bin_path = FLAGS.toco diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index a4a7283508..6697b86e79 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -253,6 +253,7 @@ INSTANTIATE_TESTS(l2_pool) INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(local_response_norm) INSTANTIATE_TESTS(log_softmax) +INSTANTIATE_TESTS(maximum) INSTANTIATE_TESTS(max_pool) INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(mul) diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index f23249cfa1..0989bfe5a3 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -863,6 +863,8 @@ std::vector> BuildOperatorList() { ops.emplace_back(new SimpleOperator("EXP", OperatorType::kExp)); ops.emplace_back(new SimpleOperator( "LOG_SOFTMAX", OperatorType::kLogSoftmax)); + ops.emplace_back(new SimpleOperator( + "MAXIMUM", OperatorType::kTensorFlowMaximum)); return ops; } diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 9c19f8d464..f7a213ecfc 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -109,6 +109,8 @@ TEST_F(OperatorTest, SimpleOperators) { CheckSimpleOperator("EXP", OperatorType::kExp); CheckSimpleOperator("LOG_SOFTMAX", OperatorType::kLogSoftmax); + CheckSimpleOperator( + "MAXIMUM", OperatorType::kTensorFlowMaximum); } TEST_F(OperatorTest, BuiltinAdd) { diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index a57a1e5421..a03e731be3 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -494,7 +494,7 @@ class MutableDenseHashTable(LookupInterface): value_dtype=tf.int64, default_value=-1, empty_key=0) - table.insert(keys, values) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) ``` diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 4ae18b2cef..8b415e6527 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -34,7 +34,7 @@ PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/. RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" -CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" +CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, # so work around it by patching the source. diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 5a812af4e9..7a7683c953 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -258,6 +258,7 @@ tensorflow/core/kernels/requantize.cc tensorflow/core/kernels/remote_fused_graph_execute_op.cc tensorflow/core/kernels/remote_fused_graph_execute_utils.cc tensorflow/core/kernels/batch_matmul_op_real.cc +tensorflow/core/kernels/random_op.cc tensorflow/core/ops/training_ops.cc tensorflow/core/ops/string_ops.cc tensorflow/core/ops/state_ops.cc diff --git a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc index dfa12e873a..a9a32b7b25 100644 --- a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc +++ b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc @@ -74,7 +74,7 @@ class GatherTreeOp : public OpKernel { ctx, step_ids_shape.dim_size(1) == max_sequence_lengths.shape().dim_size(0), errors::InvalidArgument("batch size dimensions step_ids.shape[1] and " - "max_seqeuence_lengths.shape[0] must match. " + "max_sequence_lengths.shape[0] must match. " "but shapes are: ", step_ids_shape.DebugString(), " and ", max_sequence_lengths.shape().DebugString())); diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 9ff8a343f1..be53779826 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -736,7 +736,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): """Base attention mechanism for monotonic attention. Simply overrides the initial_alignments function to provide a dirac - distribution,which is needed in order for the monotonic attention + distribution, which is needed in order for the monotonic attention distributions to have the correct behavior. """ @@ -763,7 +763,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Bahadanau-style energy function. - This type of attention encorces a monotonic constraint on the attention + This type of attention enforces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -867,7 +867,7 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Luong-style energy function. - This type of attention encorces a monotonic constraint on the attention + This type of attention enforces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -1133,7 +1133,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): output_attention: Python bool. If `True` (default), the output at each time step is the attention value. This is the behavior of Luong-style attention mechanisms. If `False`, the output at each time step is - the output of `cell`. This is the beahvior of Bhadanau-style + the output of `cell`. This is the behavior of Bhadanau-style attention mechanisms. In both cases, the `attention` tensor is propagated to the next time step via the state and is used there. This flag only controls whether the attention mechanism is propagated diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index a26107b0d7..184144f64a 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -821,9 +821,9 @@ def _get_scores(log_probs, sequence_lengths, length_penalty_weight): Returns: The scores normalized by the length_penalty. """ - length_penality_ = _length_penalty( + length_penalty_ = _length_penalty( sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight) - return log_probs / length_penality_ + return log_probs / length_penalty_ def _length_penalty(sequence_lengths, penalty_factor): @@ -860,7 +860,7 @@ def _mask_probs(probs, eos_token, finished): unfinished beams remain unchanged. Args: - probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]` + probs: Log probabilities of shape `[batch_size, beam_width, vocab_size]` eos_token: An int32 id corresponding to the EOS token to allocate probability to. finished: A boolean tensor of shape `[batch_size, beam_width]` that diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index b3343aef47..99ad487630 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -115,8 +115,8 @@ class ParallelReader(io_ops.ReaderBase): reader needs to start reading from a new file since it has finished with the previous file). - A queue runner for enqueing in the `common_queue` is automatically added to - the TF QueueRunners collection. + A queue runner for enqueuing in the `common_queue` is automatically added + to the TF QueueRunners collection. Args: queue: A Queue or a mutable string Tensor representing a handle diff --git a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py index 37e9c4754c..62bd200361 100644 --- a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py +++ b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py @@ -36,9 +36,9 @@ def prefetch_queue(tensors, dynamic_pad=False, shared_name=None, name=None): - """Creates a queue to prefetech tensors from `tensors`. + """Creates a queue to prefetch tensors from `tensors`. - A queue runner for enqueing tensors into the prefetch_queue is automatically + A queue runner for enqueuing tensors into the prefetch_queue is automatically added to the TF QueueRunners collection. Example: diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index b3b61e1dfe..f2d31dc8db 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -124,7 +124,7 @@ class BoundingBox(ItemHandler): super(BoundingBox, self).__init__(self._full_keys) def tensors_to_item(self, keys_to_tensors): - """Maps the given dictionary of tensors to a contatenated list of bboxes. + """Maps the given dictionary of tensors to a concatenated list of bboxes. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 461e627e99..6eafc1754c 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -1,15 +1,15 @@ -Using TensorRT in TensorFlow -============================ +# Using TensorRT in TensorFlow + This module provides necessary bindings and introduces TRT_engine_op operator that wraps a subgraph in TensorRT. This is still a work in progress but should be useable with most common graphs. -Compilation ------------ +## Compilation + In order to compile the module, you need to have a local TensorRT -installation (libnvinfer.so and respective include files). During the +installation ( libnvinfer.so and respective include files ). During the configuration step, TensorRT should be enabled and installation path should be set. If installed through package managers (deb,rpm), configure script should find the necessary components from the system @@ -22,4 +22,38 @@ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py directory +will be available. An example use can be found in test/test_tftrt.py script + +## Installing TensorRT 3.0.4 + +In order to make use of TensorRT integration, you will need a local installation of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). Due to compiler compatibility, you will need to download and install the TensorRT 3.0.4 tarball for _Ubuntu 14.04_, i.e., **_TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz_**, even if you are using Ubuntu 16.04 or later. + +### Preparing TensorRT installation + +Once you have downloaded TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz, you will need to unpack it to an installation directory, which will be referred to as . Please replace with the full path of actual installation directory you choose in commands below. + +```shell +cd && tar -zxf /path/to/TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz +``` + +After unpacking the binaries, you have several options to use them: + +#### To run TensorFlow as a user without superuser privileges + +For a regular user without any sudo rights, you should add TensorRT to your `$LD_LIBRARY_PATH`: + + ```shell + export LD_LIBRARY_PATH=/TensorRT-3.0.4/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + ``` + +Then you are ready to use TensorFlow-TensorRT integration. `$LD_LIBRARY_PATH` must contain the path to TensorRT installation for TensorFlow-TensorRT integration to work. If you are using a VirtualEnv-like setup, you can add the command above to your `bin/activate` script or to your `.bashrc` script. + +#### To run TensorFlow as a superuser + + When running as a superuser, such as in a container or via sudo, the `$LD_LIBRARY_PATH` approach above may not work. The following is preferred when the user has superuser privileges: + + ```shell + echo "/TensorRT-3.0.4/lib" | sudo tee /etc/ld.so.conf.d/tensorrt304.conf && sudo ldconfig + ``` + + Please ensure that any existing deb package installation of TensorRT is removed before following these instructions to avoid package conflicts. \ No newline at end of file diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index eea8c8efa2..ff8cc6374d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -49,12 +49,13 @@ namespace tensorrt { namespace convert { namespace { -bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +bool IsTensorRTCandidate(const tensorflow::Node* node) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set candidate_ops = { "Identity", + "Snapshot", "Const", "Conv2D", "MaxPool", @@ -74,7 +75,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) - return candidate_ops.count(node_def.op()); + return candidate_ops.count(node->type_string()); } void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, @@ -84,10 +85,10 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, const tensorflow::Node* node = graph.FindNodeId(node_id); for (const tensorflow::Edge* edge : node->in_edges()) { if (!subgraph_node_ids.count(edge->src()->id()) && - !edge->src()->IsSource()) { + !edge->src()->IsSource() && !edge->IsControlEdge()) { incoming_edges->insert(edge); } else { - VLOG(2) << edge->src()->name() << " N, "; + VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; } } } @@ -100,11 +101,11 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, const tensorflow::Node* node = graph.FindNodeId(node_id); for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && - !edge->dst()->IsSink()) { - VLOG(2) << edge->dst()->name() << " Y, "; + !edge->dst()->IsSink() && !edge->IsControlEdge()) { + VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << edge->dst()->name() << " N, "; + VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; } } } @@ -409,8 +410,9 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::Status status = ConvertSubGraphToTensorRT(&p); if (status != tensorflow::Status::OK()) { LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \n" - << status.ToString() << " SKIPPING......"; + << " due to: \"" << status.ToString() + << "\" SKIPPING......( " << subgraph_node_names.size() + << " nodes)"; } count++; } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 92a692baa7..370911e4d9 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -53,8 +53,8 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; - namespace { inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, @@ -430,9 +430,8 @@ class Converter { tensorflow::tensorrt::TRTWeightStore* weight_store_; bool fp16_; void register_op_converters(); - std::vector get_inputs( - const tensorflow::NodeDef& node_def) { - std::vector inputs; + tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def, + std::vector* inputs) { for (auto const& input_name : node_def.input()) { /************************************************************************* * TODO(jie) handle case 1) here @@ -453,13 +452,17 @@ class Converter { VLOG(2) << "retrieve input: " << name; if (trt_tensors_.count(name)) { - inputs.push_back(trt_tensors_.at(name)); + inputs->push_back(trt_tensors_.at(name)); } else { - LOG(FATAL) << "input: " << name << " not available for node at, " - << node_def.name(); + string str("Node "); + StrAppend(&str, node_def.name(), " should have an input named '", name, + "' but it is not available"); + LOG(WARNING) << "input: " << name << " not available for node at " + << node_def.name(); + return tensorflow::errors::InvalidArgument(str); } } - return inputs; + return tensorflow::Status::OK(); } public: @@ -483,7 +486,8 @@ class Converter { } tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) { - std::vector inputs = this->get_inputs(node_def); + std::vector inputs; + TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs)); string op = node_def.op(); if (!op_registry_.count(op)) { return tensorflow::errors::Unimplemented( @@ -548,6 +552,19 @@ class Converter { } }; +TRT_ShapedWeights ConvertFP32ToFP16(Converter& ctx, + const TRT_ShapedWeights& weights_src) { + auto dtype_new = tensorflow::DataType::DT_HALF; + TRT_ShapedWeights weights = + ctx.get_temp_weights(dtype_new, weights_src.shape_); + const float* src = static_cast(weights_src.GetValues()); + Eigen::half* dst = const_cast( + static_cast(weights.GetValues())); + for (int64_t i = 0; i < weights_src.count(); i++) { + dst[i] = Eigen::half_impl::float_to_half_rtne(src[i]); + } + return weights; +} // **************************************************************************** // Constant folding functions // TODO(jie): once optimizer kicks in, we should have done constant folding @@ -875,7 +892,7 @@ tensorflow::Status BinaryTensorOpWeight( // Check type consistency nvinfer1::DataType ttype; - TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); + TF_RETURN_IF_ERROR(ConvertDType(weights.type_, &ttype)); // Check scale mode auto dims_w = weights.shape_; @@ -957,6 +974,10 @@ tensorflow::Status BinaryTensorOpWeight( } } + if (ctx.isFP16()) { + weights = ConvertFP32ToFP16(ctx, weights); + } + // prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); @@ -998,9 +1019,7 @@ enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; tensorflow::Status ConvertConv2DHelper( Converter& ctx, const tensorflow::NodeDef& node_def, const std::vector& inputs, - std::vector* outputs, - int group // group ==0 specifies depthwise conv -) { + std::vector* outputs, int group) { const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); @@ -1025,6 +1044,10 @@ tensorflow::Status ConvertConv2DHelper( VLOG(2) << "groups count: " << num_groups; TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + if (ctx.isFP16()) { + weights_rsck = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); + } + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); TRT_ShapedWeights biases(weights.type_); @@ -1134,9 +1157,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1295,8 +1318,11 @@ tensorflow::Status ConvertScale(Converter& ctx, // Implement tensor binaryOp weight [channel wise] for now; const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); + if (ctx.isFP16()) { + weights = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); + } + TRT_ShapedWeights empty_weights(weights.type_); TFAttrs attrs(node_def); @@ -1376,8 +1402,11 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); + LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + string err_str("Broadcast method is not supported for '"); + StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); + return tensorflow::errors::InvalidArgument(err_str); } } } else { @@ -1391,33 +1420,16 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - auto half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - half_tensor.device(defd) = - tensor.flat().template cast(); - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } else if (!weights_tensor.int_val().empty()) { VLOG(2) << "int!!!" << node_def.name(); nvinfer1::Dims scalar_shape; @@ -1432,8 +1444,11 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.d[0] = weights_tensor.int_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); + LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + string err_str("Broadcast method is not supported for '"); + StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); + return tensorflow::errors::InvalidArgument(err_str); } } } else { @@ -1447,62 +1462,23 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - TTypes::Flat half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - switch (dtype) { - case (tensorflow::DT_INT32): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT16): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_UINT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - default: - return tensorflow::errors::InvalidArgument( - "Datatype " + tensorflow::DataTypeString(dtype) + - " for FP16 conversion"); - break; - }; - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); - len_data = std::max(len_data, len_tensor); - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val() - .end()); // make a local copy first to flatten - // doesn't have to be contiguous - memcpy(dst, tensor_data.data(), len_tensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } + // we should not have converted //if (ctx.isFP16()) { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; + size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); + len_data = std::max(len_data, len_tensor); + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val().end()); // make a local copy first to flatten + // doesn't have to be contigous + memcpy(dst, tensor_data.data(), len_tensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } else if (!weights_tensor.tensor_content().empty()) { + // obsolete method. + // After optimization path, we do not see weights in this format. + // fp16 conversion technically should be needed here. VLOG(2) << "TENSOR!!!" << node_def.name(); const auto& content = weights_tensor.tensor_content(); @@ -1784,8 +1760,6 @@ tensorflow::Status ConvertConcat(Converter& ctx, TRT_ShapedWeights axis = inputs.at(input_size).weights(); TFAttrs attrs(node_def); - // auto attr_size = attrs.at("N")->i(); - // auto data_type = attrs.get("T"); auto index_type = attrs.get("Tidx"); // TODO(jie): handle data type @@ -1875,71 +1849,103 @@ tensorflow::Status ConvertFusedBatchNorm( "only is_training=false is supported, at " + node_def.name()); } nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - TRT_ShapedWeights scale_weights = inputs.at(1).weights(); - TRT_ShapedWeights offset_weights = inputs.at(2).weights(); - TRT_ShapedWeights mean_weights = inputs.at(3).weights(); - TRT_ShapedWeights variance_weights = inputs.at(4).weights(); - TRT_ShapedWeights dummy_power_weights(scale_weights.type_); - TRT_ShapedWeights combined_scale_weights = - ctx.get_temp_weights_like(scale_weights); - TRT_ShapedWeights combined_offset_weights = - ctx.get_temp_weights_like(offset_weights); - size_t nweight = scale_weights.count(); - if ((scale_weights.type_ == offset_weights.type_) && - (mean_weights.type_ == variance_weights.type_) && - (scale_weights.type_ == variance_weights.type_)) { - if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && - (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { + + // Check parameter types + auto parameter_type = inputs.at(1).weights().type_; + if ((parameter_type != tensorflow::DataType::DT_FLOAT) && + (parameter_type != tensorflow::DataType::DT_HALF)) { + return tensorflow::errors::Unimplemented( + "only float32 or float16 weight data type is supported, for node " + + node_def.name() + " got " + tensorflow::DataTypeString(parameter_type)); + } + for (int i = 1; i < 5; i++) { + if (inputs.at(i).weights().type_ != parameter_type) { return tensorflow::errors::Unimplemented( - "only float32 or float16 weight data type is supported, for node " + - node_def.name() + " got " + - tensorflow::DataTypeString(scale_weights.type_)); + "Inconsistent parameter type for batchnormis not supported, at: " + + node_def.name()); } - if (scale_weights.type_ == tensorflow::DT_FLOAT) { - for (size_t i = 0; i < nweight; ++i) { - float scale = (static_cast(scale_weights.GetValues()))[i]; - float offset = - (static_cast(offset_weights.GetValues()))[i]; - float mean = (static_cast(mean_weights.GetValues()))[i]; - float variance = - (static_cast(variance_weights.GetValues()))[i]; - float& combined_scale_ref = const_cast( - static_cast(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast( - static_cast(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); - combined_offset_ref = offset - mean * combined_scale_ref; - } - } else { - const Eigen::half* scale_vals = - (static_cast(scale_weights.GetValues())); - const Eigen::half* off_vals = - (static_cast(offset_weights.GetValues())); - const Eigen::half* mean_vals = - (static_cast(mean_weights.GetValues())); - const Eigen::half* variance_vals = - (static_cast(variance_weights.GetValues())); - Eigen::half* comb_scale_vals = const_cast( - static_cast(combined_scale_weights.GetValues())); - Eigen::half* comb_off_vals = const_cast( - static_cast(combined_offset_weights.GetValues())); - for (size_t i = 0; i < nweight; ++i) { - float scale(scale_vals[i]); - float offset(off_vals[i]); - float mean(mean_vals[i]); - float variance(variance_vals[i]); - float combined_scale_ref = scale / sqrtf(variance + epsilon); - comb_scale_vals[i] = Eigen::half(combined_scale_ref); - float combined_offset_ref = offset - mean * combined_scale_ref; - comb_off_vals[i] = Eigen::half(combined_offset_ref); + } + + TRT_ShapedWeights dummy_power_weights(parameter_type); + size_t nweight = 0; + for (int i = 1; i < 5; i++) { + nweight = std::max(nweight, (size_t)inputs.at(i).weights().count()); + } + TRT_ShapedWeights* ptr_shape_weights = nullptr; + for (int i = 1; i < 5; i++) { + if (inputs.at(i).weights().count() == nweight) { + ptr_shape_weights = + const_cast(&(inputs.at(i).weights())); + } else if (inputs.at(i).weights().count() != 1) { + return tensorflow::errors::InvalidArgument( + "Inconsistent batchnorm parameter count, at: " + node_def.name()); + } + } + // We could technically have two weights with different shape. + // that requires two addScale op, arguably less performant + TRT_ShapedWeights combined_scale_weights = + ctx.get_temp_weights_like(*ptr_shape_weights); + TRT_ShapedWeights combined_offset_weights = + ctx.get_temp_weights_like(*ptr_shape_weights); + + const Eigen::half* cast_vals_array[4]; + const float* vals_array[4]; + for (int j = 0; j < 4; j++) { + cast_vals_array[j] = + static_cast(inputs.at(j + 1).weights().GetValues()); + vals_array[j] = + static_cast(inputs.at(j + 1).weights().GetValues()); + } + Eigen::half* cast_combined_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + Eigen::half* cast_combined_offset_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + float* combined_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + float* combined_offset_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + + for (size_t i = 0; i < nweight; ++i) { + float batchnorm_data[4]; + for (int j = 0; j < 4; j++) { + if (inputs.at(j + 1).weights().count() != 1) { + if (parameter_type == tensorflow::DT_FLOAT) { + batchnorm_data[j] = vals_array[j][i]; + } else if (parameter_type == tensorflow::DT_HALF) { + batchnorm_data[j] = + Eigen::half_impl::half_to_float(cast_vals_array[j][i]); + } + } else { + if (parameter_type == tensorflow::DT_FLOAT) { + batchnorm_data[j] = vals_array[j][0]; + } else if (parameter_type == tensorflow::DT_HALF) { + batchnorm_data[j] = + Eigen::half_impl::half_to_float(cast_vals_array[j][0]); + } } } + float scale = batchnorm_data[0]; + float offset = batchnorm_data[1]; + float mean = batchnorm_data[2]; + float variance = batchnorm_data[3]; + float combined_scale_val = scale / sqrtf(variance + epsilon); + float combined_offset_val = offset - mean * combined_scale_val; + if (parameter_type == tensorflow::DT_FLOAT) { + combined_scale_vals[i] = combined_scale_val; + combined_offset_vals[i] = combined_offset_val; + } else if (parameter_type == tensorflow::DT_HALF) { + cast_combined_scale_vals[i] = Eigen::half(combined_scale_val); + cast_combined_offset_vals[i] = Eigen::half(combined_offset_val); + } } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - combined_offset_weights.GetWeightsForTRT(), - combined_scale_weights.GetWeightsForTRT(), - dummy_power_weights.GetWeightsForTRT()); + + nvinfer1::ScaleMode mode = nweight == 1 ? nvinfer1::ScaleMode::kUNIFORM + : nvinfer1::ScaleMode::kCHANNEL; + nvinfer1::IScaleLayer* layer = + ctx.network()->addScale(*const_cast(tensor), mode, + combined_offset_weights.GetWeightsForTRT(), + combined_scale_weights.GetWeightsForTRT(), + dummy_power_weights.GetWeightsForTRT()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -2050,6 +2056,7 @@ void Converter::register_op_converters() { op_registry_["Const"] = ConvertConst; // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed + op_registry_["Snapshot"] = ConvertIdentity; // Snapshot should be removed // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -2143,8 +2150,11 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->thr_->join(); delete calib_res->thr_; if (!calib_res->engine_) { - LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " + LOG(ERROR) << "Calibration failed!, engine does not exist. Did you run " "calibration graph?"; + return tensorflow::errors::FailedPrecondition( + "Calibration graph needs to be executed on" + " calibration data before convertsion to inference graph"); } auto weight_rmgr = trt_rm->getManager("WeightStore"); TF_CHECK_OK(weight_rmgr->Delete( @@ -2181,7 +2191,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( return status; } auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_CHECK_OK(status); + TF_RETURN_IF_ERROR(status); for (size_t i = 0; i < out_edges.size(); i++) { VLOG(1) << "Connecting trt_engine_node output " << i << " with " << out_edges.at(i)->dst()->name() << " port " @@ -2279,6 +2289,12 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + auto type_status = ConvertDType(tf_dtype, &dtype); + if (type_status != tensorflow::Status::OK()) { + LOG(WARNING) << "Data type conversion for input '" << node_name + << "' failed"; + return type_status; + } TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); VLOG(2) << "accessing output index of: " << output_idx @@ -2346,8 +2362,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); + return tensorflow::errors::InvalidArgument("Output node'" + tensor_name + + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { @@ -2504,7 +2520,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); + auto type_status = ConvertDType(tf_dtype, &dtype); + if (type_status != tensorflow::Status::OK()) { + LOG(WARNING) << "Type conversion failed for " << node_name; + return type_status; + } VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name @@ -2515,8 +2535,12 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(jie): TRT 3.x only support 4 dimensional input tensor. // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) - return tensorflow::errors::Unimplemented("require 4 dimensional input"); + if (op_info.shape().dim_size() != 4) { + string err_str = "Require 4 dimensional input."; + StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", + shape_inference_node_name); + return tensorflow::errors::Unimplemented(err_str); + } for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i @@ -2577,8 +2601,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); + return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { @@ -2622,7 +2646,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } TF_RETURN_IF_ERROR(weight_rmgr->Delete( engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name; + LOG(INFO) << "finished engine " << engine_name << " containing " + << s.subgraph_node_ids.size() << " nodes"; // Build the TRT op tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 6193f0b0a1..8fc4697c51 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -80,13 +80,20 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, std::vector in_edges(dst->in_edges().begin(), dst->in_edges().end()); for (const tensorflow::Edge* in_edge : in_edges) { - if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); - if (e->src() == graph->source_node()) { - graph->AddEdge(e->src(), e->src_output(), src, - tensorflow::Graph::kControlSlot); - } else { - graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */); + if (in_edge->IsControlEdge()) { + if (in_edge->src() != src) { + tensorflow::Edge* e = const_cast(in_edge); + graph->AddControlEdge(e->src(), src); + } + } else { + if (in_edge->src() != src) { + tensorflow::Edge* e = const_cast(in_edge); + if (e->src() == graph->source_node()) { + graph->AddEdge(e->src(), e->src_output(), src, + tensorflow::Graph::kControlSlot); + } else { + graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */); + } } } } @@ -94,12 +101,19 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, std::vector out_edges(dst->out_edges().begin(), dst->out_edges().end()); for (const tensorflow::Edge* out_edge : out_edges) { - tensorflow::Edge* e = const_cast(out_edge); - if (e->dst() == graph->sink_node()) { - graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(), - e->dst_input()); + if (out_edge->IsControlEdge()) { + tensorflow::Edge* e = const_cast(out_edge); + graph->AddControlEdge(src, e->dst()); } else { - graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input()); + tensorflow::Edge* e = const_cast(out_edge); + if (e->dst() == graph->sink_node()) { + VLOG(1) << " edge to sink node " << src->name() << " -> " + << e->dst()->name(); + graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(), + e->dst_input()); + } else { + graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input()); + } } } @@ -118,7 +132,7 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, - const std::function& candidate_fn, + const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments) { // Create a Graph representation of the GraphDef. tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), @@ -136,7 +150,7 @@ tensorflow::Status SegmentGraph( for (int i = 0; i < graph.num_node_ids(); ++i) { tensorflow::Node* node = graph.FindNodeId(i); if (options.exclude_node_list.count(node->name()) != 0 || - !candidate_fn(node->def())) { + !candidate_fn(node)) { node = nullptr; } node_segments.emplace_back(node); @@ -155,7 +169,7 @@ tensorflow::Status SegmentGraph( for (const tensorflow::Node* node : order) { // All output nodes of 'node' have been visited... - VLOG(2) << "Trying node " << node->name(); + VLOG(2) << "Trying node " << node->name() << " id=" << node->id(); // 'node' must be a TRT candidate... if (node_segments[node->id()].Value() == nullptr) { @@ -169,8 +183,12 @@ tensorflow::Status SegmentGraph( while (true) { std::set contract_edges; for (const tensorflow::Edge* out_edge : node->out_edges()) { - VLOG(2) << "... out node " << out_edge->dst()->name(); - + VLOG(2) << "... out node " << out_edge->dst()->name() << " ( " + << out_edge->dst()->id() << " <- " << node->id() << " )"; + if (out_edge->IsControlEdge()) { + VLOG(2) << "... ... Control Edge, Skipping"; + continue; + } // Out node must be TRT candidate... if (node_segments[out_edge->dst()->id()].Value() == nullptr) { VLOG(2) << "... ... not a TRT candidate"; @@ -196,7 +214,8 @@ tensorflow::Status SegmentGraph( const tensorflow::Node* src = contract_edge->src(); const tensorflow::Node* dst = contract_edge->dst(); - VLOG(2) << "Merge " << src->name() << " <- " << dst->name(); + VLOG(2) << "Merge " << src->name() << " <- " << dst->name() << " (" + << src->id() << " <- " << dst->id(); node_segments[src->id()].Merge(&node_segments[dst->id()]); // Contracting the edge leaves disconnected graph edges. diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h index ee6e2b3ed2..7e8685f44a 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.h +++ b/tensorflow/contrib/tensorrt/segment/segment.h @@ -20,10 +20,12 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { + namespace tensorrt { namespace segment { @@ -46,7 +48,7 @@ struct SegmentOptions { // @return the status. tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, - const std::function& candidate_fn, + const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments); } // namespace segment diff --git a/tensorflow/contrib/tensorrt/segment/segment_test.cc b/tensorflow/contrib/tensorrt/segment/segment_test.cc index 74cbc5f2b3..7ddabec268 100644 --- a/tensorflow/contrib/tensorrt/segment/segment_test.cc +++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc @@ -35,7 +35,7 @@ class SegmentTest : public ::testing::Test { TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name); - std::function MakeCandidateFn( + std::function MakeCandidateFn( const std::set& node_names); protected: @@ -60,10 +60,10 @@ bool SegmentTest::GetGraphDef(TF_Graph* graph, return ret; } -std::function SegmentTest::MakeCandidateFn( +std::function SegmentTest::MakeCandidateFn( const std::set& node_names) { - return [node_names](const NodeDef& node) -> bool { - return node_names.find(node.name()) != node_names.end(); + return [node_names](const Node* node) -> bool { + return node_names.find(node->name()) != node_names.end(); }; } diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py index ff140efd48..4f6527a546 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py @@ -70,7 +70,7 @@ class ARModel(model.TimeSeriesModel): input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that - setting it to > 1 empiricaly seems to give a better fit. + setting it to > 1 empirically seems to give a better fit. num_features: number of input features per time step. num_time_buckets: Number of buckets into which to divide (time % periodicity) for generating time based features. diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 23452a81c3..26793c80bf 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -185,7 +185,7 @@ def batch_matrix_pow(matrices, powers): { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p power(A, 0) = I - The power(A, 0) = I case is handeled by starting with accumulator set to the + The power(A, 0) = I case is handled by starting with accumulator set to the identity matrix; matrices with zero residual powers are passed through unchanged. diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py index 1afc58cfb2..6746dd7b43 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py @@ -107,7 +107,7 @@ class VARMA(state_space_model.StateSpaceModel): Returns: the state transition matrix. It has shape - [self.state_dimendion, self.state_dimension]. + [self.state_dimension, self.state_dimension]. """ # Pad any unused AR blocks with zeros. The extra state is necessary if # ma_order >= ar_order. @@ -127,7 +127,7 @@ class VARMA(state_space_model.StateSpaceModel): Returns: the state noise transform matrix. It has shape - [self.state_dimendion, self.num_features]. + [self.state_dimension, self.num_features]. """ # Noise is broadcast, through the moving average coefficients, to # un-observed parts of the latent state. diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt index 51d91399f8..e667c328ae 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt @@ -49,14 +49,14 @@ in the batch: If `fast` is `True`, then the solution is computed by solving the normal equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + -\lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as +problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). +If \\(m \lt n\\) then `output` is computed as \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the minimum-norm solution to the under-determined linear system, i.e. \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), subject to \\(A Z = B\\). Notice that the fast path is only numerically stable when \\(A\\) is numerically full rank and has a condition number -\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is +\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is sufficiently large. If `fast` is `False` an algorithm based on the numerically robust complete diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc index 43a909466e..829c19204a 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc @@ -19,9 +19,6 @@ limitations under the License. namespace tensorflow { -constexpr const char* MklCPUAllocator::kMaxLimitStr; -constexpr const size_t MklCPUAllocator::kDefaultMaxLimit; - } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 623248b6ce..2fb17c2b02 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -1210,7 +1210,7 @@ Status ConcatV2Shape(InferenceContext* c) { c->num_inputs() - 1 /* dim_index */); } -Status BroadcastBinaryOpShapeFn(InferenceContext* c) { +Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index) { ShapeHandle shape_x = c->input(0); ShapeHandle shape_y = c->input(1); if (!c->RankKnown(shape_x) || !c->RankKnown(shape_y)) { @@ -1272,7 +1272,7 @@ Status BroadcastBinaryOpShapeFn(InferenceContext* c) { } } - c->set_output(0, c->MakeShape(dims)); + c->set_output(output_index, c->MakeShape(dims)); return Status::OK(); } diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h index 293c40e04d..789746b403 100644 --- a/tensorflow/core/framework/common_shape_fns.h +++ b/tensorflow/core/framework/common_shape_fns.h @@ -265,9 +265,15 @@ Status ConcatShape(shape_inference::InferenceContext* c, // Shape function for concat operations. Status ConcatV2Shape(shape_inference::InferenceContext* c); +// Shape function for binary operators that broadcast their inputs +// and with output to output_index. +Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index); + // Shape function for binary operators that broadcast their inputs. // Tested by ops/math_ops_test.cc. -Status BroadcastBinaryOpShapeFn(InferenceContext* c); +inline Status BroadcastBinaryOpShapeFn(InferenceContext* c) { + return BroadcastBinaryOpOutputShapeFn(c, 0); +} // Shape function for random operations. Status RandomShape(shape_inference::InferenceContext* c); diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index e3cc848a16..accc587000 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -317,6 +317,7 @@ class InferenceContext { input_tensors_as_shapes_ = input_tensors_as_shapes; } + ShapeHandle output(int64 idx) const { return outputs_[idx]; } void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; } Status set_output(StringPiece output_name, const std::vector& shapes); diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 333a6570dc..62aafa7930 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -933,7 +933,7 @@ class MklFusedBatchNormOp : public OpKernel { bool is_training_; T* mean_values_; T* variance_values_; - size_t depth_; // batch normalization is done for per channel. + int depth_; // batch normalization is done for per channel. void ExtractParams(OpKernelContext* context) { const Tensor& input = MklGetInput(context, 0); diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 4abfbfb1a6..7badc00572 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -23,6 +23,13 @@ limitations under the License. // non-GPU targets. This only breaks in clang, because it's more strict for // template code and CudaAtomicMax is used in template context. +// This file requires the following include because it uses CudaAtomicMax: +// #include "tensorflow/core/util/cuda_kernel_helper.h" + +// Unfortunately we can't add the #include, since it breaks compilation for +// non-GPU targets. This only breaks in clang, because it's more strict for +// template code and CudaAtomicMax is used in template context. + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc index 50157d5d48..fe04dcf72e 100644 --- a/tensorflow/core/kernels/snapshot_op.cc +++ b/tensorflow/core/kernels/snapshot_op.cc @@ -22,6 +22,26 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +template +class SnapshotOp : public OpKernel { + public: + explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + Tensor* output = nullptr; + // Try to use buffer forwarding to avoid an explicit copy. + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &output)); + if (!output->SharesBufferWith(input)) { + functor::Snapshot functor; + functor(context->eigen_device(), input.flat(), + output->flat()); + } + } +}; #define REGISTER_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -31,6 +51,16 @@ typedef Eigen::ThreadPoolDevice CPUDevice; TF_CALL_POD_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL +#if GOOGLE_CUDA +#define REGISTER_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Snapshot").Device(DEVICE_GPU).TypeConstraint("T"), \ + SnapshotOp); + +TF_CALL_POD_TYPES(REGISTER_KERNEL); +#undef REGISTER_KERNEL +#endif + #if TENSORFLOW_USE_SYCL typedef Eigen::SyclDevice SyclDevice; #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h index b94834f159..a18065d42b 100644 --- a/tensorflow/core/kernels/snapshot_op.h +++ b/tensorflow/core/kernels/snapshot_op.h @@ -26,29 +26,19 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { +namespace functor { +// Functor used by SnapshotOp. template -class SnapshotOp : public OpKernel { - public: - explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); - Tensor* output = nullptr; - // Try to use buffer forwarding to avoid an explicit copy. - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 0, input.shape(), &output)); - if (!output->SharesBufferWith(input)) { - // We had to allocate a new buffer since the refcount on the input was - // greater than 1. Copy the input to the new buffer. - const Device& device = context->eigen_device(); - device.memcpy(output->template flat().data(), - input.template flat().data(), - input.NumElements() * sizeof(Scalar)); - } +struct Snapshot { + void operator()(const Device& device, + typename TTypes::ConstTensor input, + typename TTypes::Tensor output) { + device.memcpy(output.data(), input.data(), input.size() * sizeof(Scalar)); } }; +} // namespace functor } // namespace tensorflow #endif // TENSORFLOW_KERNELS_SNAPSHOT_OP_H_ diff --git a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc index 52070be838..e4e3bd5220 100644 --- a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc +++ b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc @@ -24,13 +24,10 @@ limitations under the License. namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; -#define REGISTER_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Snapshot").Device(DEVICE_GPU).TypeConstraint("T"), \ - SnapshotOp); +// Definition of the GPU implementations declared in softsign_op.cc. +#define DEFINE_GPU_KERNELS(T) template struct functor::Snapshot; -TF_CALL_POD_TYPES(REGISTER_KERNEL); -#undef REGISTER_KERNEL +TF_CALL_POD_TYPES(DEFINE_GPU_KERNELS); } // namespace tensorflow diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc index a6a71fdfaf..9a3612bd72 100644 --- a/tensorflow/core/kernels/xent_op.cc +++ b/tensorflow/core/kernels/xent_op.cc @@ -17,12 +17,14 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/xent_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/xent_op.h" +#include "tensorflow/core/util/bcast.h" namespace tensorflow { @@ -41,37 +43,56 @@ class SoftmaxXentWithLogitsOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& logits_in = context->input(0); const Tensor& labels_in = context->input(1); - OP_REQUIRES(context, logits_in.IsSameSize(labels_in), - errors::InvalidArgument( - "logits and labels must be same size: logits_size=", - logits_in.shape().DebugString(), - " labels_size=", labels_in.shape().DebugString())); - OP_REQUIRES(context, TensorShapeUtils::IsMatrix(logits_in.shape()), - errors::InvalidArgument("logits must be 2-dimensional")); - // As we already tested that both inputs have the same shape no need to - // check that "labels" is a matrix too. + + TensorShape shape_in = logits_in.shape(); + + BCast bcast(BCast::FromShape(logits_in.shape()), + BCast::FromShape(labels_in.shape())); + if (!logits_in.IsSameSize(labels_in)) { + OP_REQUIRES(context, bcast.IsValid(), + errors::InvalidArgument( + "logits and labels must be broadcastable: logits_size=", + logits_in.shape().DebugString(), + " labels_size=", labels_in.shape().DebugString())); + shape_in = BCast::ToShape(bcast.output_shape()); + } + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(shape_in), + errors::InvalidArgument("logits and labels must be beither " + "2-dimensional, or roadcasted to " + "2-dimensional")); // loss is 1-D (one per example), and size is batch_size. Tensor scratch; OP_REQUIRES_OK( context, context->allocate_temp(DataTypeToEnum::value, - TensorShape({logits_in.dim_size(0), 1}), + TensorShape({shape_in.dim_size(0), 1}), &scratch)); Tensor* loss_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({logits_in.dim_size(0)}), &loss_out)); + 0, TensorShape({shape_in.dim_size(0)}), &loss_out)); Tensor* back_out = nullptr; // Try to reuse the logits_in buffer for the backprop output. OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, logits_in.shape(), &back_out)); - if (logits_in.dim_size(0) > 0) { + {0}, 1, shape_in, &back_out)); + if (shape_in.dim_size(0) > 0) { functor::XentFunctor functor; - functor(context->eigen_device(), logits_in.matrix(), - labels_in.matrix(), scratch.matrix(), loss_out->vec(), - back_out->matrix()); + if (logits_in.IsSameSize(labels_in)) { + functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), + Eigen::array{1, 1}, + Eigen::array{1, 1}, logits_in.matrix(), + labels_in.matrix(), scratch.matrix(), loss_out->vec(), + back_out->matrix()); + } else { + functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), + BCast::ToIndexArray<2>(bcast.x_bcast()), + BCast::ToIndexArray<2>(bcast.y_bcast()), + logits_in.template shaped(bcast.x_reshape()), + labels_in.template shaped(bcast.y_reshape()), + scratch.matrix(), loss_out->vec(), back_out->matrix()); + } } } }; @@ -81,13 +102,17 @@ class SoftmaxXentWithLogitsOp : public OpKernel { namespace functor { template struct XentFunctorBase { - void operator()(const Device& d, typename TTypes::ConstMatrix logits, + void operator()(const Device& d, + const Eigen::DSizes& shape, + const Eigen::array& logits_bcast, + const Eigen::array& labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, typename TTypes::Matrix backprop) { - XentEigenImpl::Compute(d, logits, labels, scratch, loss, - backprop); + XentEigenImpl::Compute(d, shape, logits_bcast, labels_bcast, + logits, labels, scratch, loss, backprop); } }; diff --git a/tensorflow/core/kernels/xent_op.h b/tensorflow/core/kernels/xent_op.h index e689fca7ff..87be17fca9 100644 --- a/tensorflow/core/kernels/xent_op.h +++ b/tensorflow/core/kernels/xent_op.h @@ -18,6 +18,7 @@ limitations under the License. // Functor definition for XentOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { @@ -33,7 +34,11 @@ struct XentFunctor { // scratch: temporary tensor, dims: batch_size, 1 // loss: output tensor for the loss, dims: batch_size. // backprop: output tensor for the backprop, dims: batch_size, num_classes. - void operator()(const Device& d, typename TTypes::ConstMatrix logits, + void operator()(const Device &d, + const Eigen::DSizes &shape, + const Eigen::array &logits_bcast, + const Eigen::array &labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, @@ -45,7 +50,11 @@ struct XentFunctor { // specializations for both device types. template struct XentEigenImpl { - static void Compute(const Device& d, typename TTypes::ConstMatrix logits, + static void Compute(const Device &d, + const Eigen::DSizes &shape, + const Eigen::array &logits_bcast, + const Eigen::array &labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, @@ -57,8 +66,8 @@ struct XentEigenImpl { const int kBatchDim = 0; const int kClassDim = 1; - const int batch_size = logits.dimension(kBatchDim); - const int num_classes = logits.dimension(kClassDim); + const int batch_size = shape[kBatchDim]; + const int num_classes = shape[kClassDim]; // These arrays are used to reduce along the class dimension, and broadcast // the resulting value to all classes. @@ -84,10 +93,12 @@ struct XentEigenImpl { #endif // max_logits along classes. - scratch.reshape(batch_only).device(d) = logits.maximum(along_class); + scratch.reshape(batch_only).device(d) = + logits.broadcast(logits_bcast).maximum(along_class); // logits - max_logits. - backprop.device(d) = logits - scratch.broadcast(one_by_class); + backprop.device(d) = + logits.broadcast(logits_bcast) - scratch.broadcast(one_by_class); // sum(exp(logits - max_logits)) along classes. scratch.reshape(batch_only).device(d) = backprop.exp().sum(along_class); @@ -99,15 +110,15 @@ struct XentEigenImpl { // sum(-labels * // ((logits - max_logits) - log(sum(exp(logits - max_logits))))) // along classes - loss.device(d) = - (labels * (scratch.log().eval().broadcast(one_by_class) - backprop)) - .eval() - .sum(along_class); + loss.device(d) = (labels.broadcast(labels_bcast) * + (scratch.log().eval().broadcast(one_by_class) - backprop)) + .eval() + .sum(along_class); // backprop: prob - labels, where // prob = exp(logits - max_logits) / sum(exp(logits - max_logits)) - backprop.device(d) = - (backprop.exp() / scratch.broadcast(one_by_class)) - labels; + backprop.device(d) = (backprop.exp() / scratch.broadcast(one_by_class)) - + labels.broadcast(labels_bcast); } }; diff --git a/tensorflow/core/kernels/xent_op_gpu.cu.cc b/tensorflow/core/kernels/xent_op_gpu.cu.cc index 05ee7da490..2c0c0b3a02 100644 --- a/tensorflow/core/kernels/xent_op_gpu.cu.cc +++ b/tensorflow/core/kernels/xent_op_gpu.cu.cc @@ -31,12 +31,17 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { template struct XentFunctor { - void operator()(const GPUDevice& d, typename TTypes::ConstMatrix logits, + void operator()(const GPUDevice &d, + const Eigen::DSizes &shape, + const Eigen::array &logits_bcast, + const Eigen::array &labels_bcast, + typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, typename TTypes::Matrix backprop) { - XentEigenImpl::Compute(d, logits, labels, scratch, loss, + XentEigenImpl::Compute(d, shape, logits_bcast, labels_bcast, + logits, labels, scratch, loss, backprop); } }; diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 88d2aa3f41..111670c361 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -794,11 +794,35 @@ REGISTER_OP("ReverseV2") ShapeHandle input = c->input(0); ShapeHandle axis; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &axis)); - // TODO(aselle): if input(0)'s dimension is known we could validate axis if (c->Rank(input) > 8) { return errors::InvalidArgument( "reverse does not work on tensors with more than 8 dimensions"); } + const Tensor* axis_tensor = c->input_tensor(1); + if (axis_tensor != nullptr && c->RankKnown(input)) { + int32 rank = c->Rank(input); + std::vector axis_value; + if (axis_tensor->dtype() == DT_INT32) { + axis_value = AsInt64(axis_tensor, axis_tensor->NumElements()); + } else { + axis_value = AsInt64(axis_tensor, axis_tensor->NumElements()); + } + std::vector axes_dense(c->Rank(input), false); + for (int i = 0; i < axis_value.size(); i++) { + int64 canonical_axis = + axis_value[i] < 0 ? rank + axis_value[i] : axis_value[i]; + if (canonical_axis < 0 || canonical_axis >= rank) { + return errors::InvalidArgument("'axis'[", i, "] = ", axis_value[i], + " is out of valid range [", 0, ", ", + rank - 1); + } + if (axes_dense[canonical_axis]) { + return errors::InvalidArgument("axis ", canonical_axis, + " specified more than once."); + } + axes_dense[canonical_axis] = true; + } + } c->set_output(0, input); return Status::OK(); }); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 1f4e9753c3..6c2fc60bab 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1062,12 +1062,27 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits") .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); - TF_RETURN_IF_ERROR(c->Merge(input, c->input(1), &input)); + if (c->WithRank(c->input(0), 2, &input) == Status::OK() && + c->Merge(input, c->input(1), &input) == Status::OK()) { + DimensionHandle batch_size = c->Dim(input, 0); + c->set_output(0, c->Vector(batch_size)); + c->set_output(1, input); + return Status::OK(); + } + TF_RETURN_IF_ERROR(BroadcastBinaryOpOutputShapeFn(c, 1)); - DimensionHandle batch_size = c->Dim(input, 0); + if (!c->RankKnown(c->output(1))) { + return errors::InvalidArgument( + "Shape must be broadcasted with rank 2, but is rank is unknown."); + } + + if (c->Rank(c->output(1)) != 2) { + return errors::InvalidArgument( + "Shape must be broadcasted with rank 2, but is rank ", + c->Rank(c->output(1))); + } + DimensionHandle batch_size = c->Dim(c->output(1), 0); c->set_output(0, c->Vector(batch_size)); - c->set_output(1, input); return Status::OK(); }); diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc index 1b17a7cda6..289b953055 100644 --- a/tensorflow/core/ops/nn_ops_test.cc +++ b/tensorflow/core/ops/nn_ops_test.cc @@ -410,10 +410,18 @@ TEST(NNOpsTest, SoftmaxCrossEntropyWithLogits_ShapeFn) { INFER_OK(op, "[1,?];[?,2]", "[d0_0];[d0_0,d0_1|d1_1]"); INFER_OK(op, "[?,2];[1,2]", "[d1_0];in1"); - INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, - "[1,?];[2,?]"); - INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3];?"); - INFER_ERROR("Shapes must be equal rank, but are 2 and 3", op, "?;[1,2,3]"); + INFER_ERROR("Shape must be broadcasted with rank 2", op, "[1,2,3];?"); + INFER_ERROR("Shape must be broadcasted with rank 2", op, "?;[1,2,3]"); + + // Broadcast example + // [1,4] and [2,4] are broadcasted to [2,4] + INFER_OK(op, "[1,4];[2,4]", "[d1_0];[d1_0,d0_1|d1_1]"); + // [2,4] and [2,1] are broadcasted to [2,4] + INFER_OK(op, "[2,4];[2,1]", "[d0_0];[d0_0|d1_0,d0_1]"); + // [1,?] and [2,4] are broadcasted to [2,4] + INFER_OK(op, "[1,?];[2,4]", "[d1_0];[d1_0,d0_1|d1_1]"); + // [2,4] and [?,1] are broadcasted to [2,4] + INFER_OK(op, "[2,4];[?,1]", "[d0_0];[d0_0|d1_0,d0_1]"); } TEST(NNOpsTest, SparseSoftmaxCrossEntropyWithLogits_ShapeFn) { diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 22f2c02b78..40eebd1db0 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 6 +#define TF_MINOR_VERSION 7 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md index 956dccb64f..f3db5857ae 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md +++ b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md @@ -6,42 +6,42 @@ Monte Carlo integration and helpers. ## Background Monte Carlo integration refers to the practice of estimating an expectation with -a sample mean. For example, given random variable `Z in R^k` with density `p`, +a sample mean. For example, given random variable `Z in \\(R^k\\)` with density `p`, the expectation of function `f` can be approximated like: ``` -E_p[f(Z)] = \int f(z) p(z) dz - ~ S_n - := n^{-1} \sum_{i=1}^n f(z_i), z_i iid samples from p. +$$E_p[f(Z)] = \int f(z) p(z) dz$$ +$$ ~ S_n + := n^{-1} \sum_{i=1}^n f(z_i), z_i\ iid\ samples\ from\ p.$$ ``` -If `E_p[|f(Z)|] < infinity`, then `S_n --> E_p[f(Z)]` by the strong law of large -numbers. If `E_p[f(Z)^2] < infinity`, then `S_n` is asymptotically normal with -variance `Var[f(Z)] / n`. +If `\\(E_p[|f(Z)|] < infinity\\)`, then `\\(S_n\\) --> \\(E_p[f(Z)]\\)` by the strong law of large +numbers. If `\\(E_p[f(Z)^2] < infinity\\)`, then `\\(S_n\\)` is asymptotically normal with +variance `\\(Var[f(Z)] / n\\)`. Practitioners of Bayesian statistics often find themselves wanting to estimate -`E_p[f(Z)]` when the distribution `p` is known only up to a constant. For +`\\(E_p[f(Z)]\\)` when the distribution `p` is known only up to a constant. For example, the joint distribution `p(z, x)` may be known, but the evidence -`p(x) = \int p(z, x) dz` may be intractable. In that case, a parameterized -distribution family `q_lambda(z)` may be chosen, and the optimal `lambda` is the -one minimizing the KL divergence between `q_lambda(z)` and -`p(z | x)`. We only know `p(z, x)`, but that is sufficient to find `lambda`. +`\\(p(x) = \int p(z, x) dz\\)` may be intractable. In that case, a parameterized +distribution family `\\(q_\lambda(z)\\)` may be chosen, and the optimal `\\(\lambda\\)` is the +one minimizing the KL divergence between `\\(q_\lambda(z)\\)` and +`\\(p(z | x)\\)`. We only know `p(z, x)`, but that is sufficient to find `\\(\lambda\\)`. ## Log-space evaluation and subtracting the maximum Care must be taken when the random variable lives in a high dimensional space. -For example, the naive importance sample estimate `E_q[f(Z) p(Z) / q(Z)]` -involves the ratio of two terms `p(Z) / q(Z)`, each of which must have tails -dropping off faster than `O(|z|^{-(k + 1)})` in order to have finite integral. +For example, the naive importance sample estimate `\\(E_q[f(Z) p(Z) / q(Z)]\\)` +involves the ratio of two terms `\\(p(Z) / q(Z)\\)`, each of which must have tails +dropping off faster than `\\(O(|z|^{-(k + 1)})\\)` in order to have finite integral. This ratio would often be zero or infinity up to numerical precision. For that reason, we write ``` -Log E_q[ f(Z) p(Z) / q(Z) ] - = Log E_q[ exp{Log[f(Z)] + Log[p(Z)] - Log[q(Z)] - C} ] + C, where -C := Max[ Log[f(Z)] + Log[p(Z)] - Log[q(Z)] ]. +$$Log E_q[ f(Z) p(Z) / q(Z) ]$$ +$$ = Log E_q[ \exp\{Log[f(Z)] + Log[p(Z)] - Log[q(Z)] - C\} ] + C,$$ where +$$C := Max[ Log[f(Z)] + Log[p(Z)] - Log[q(Z)] ].$$ ``` The maximum value of the exponentiated term will be 0.0, and the expectation diff --git a/tensorflow/docs_src/api_guides/python/contrib.losses.md b/tensorflow/docs_src/api_guides/python/contrib.losses.md index d7f862625e..8b7442216c 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.losses.md +++ b/tensorflow/docs_src/api_guides/python/contrib.losses.md @@ -107,19 +107,19 @@ weighted average over the individual prediction errors: loss = tf.contrib.losses.mean_squared_error(predictions, depths, weight) ``` -@{tf.contrib.losses.absolute_difference} -@{tf.contrib.losses.add_loss} -@{tf.contrib.losses.hinge_loss} -@{tf.contrib.losses.compute_weighted_loss} -@{tf.contrib.losses.cosine_distance} -@{tf.contrib.losses.get_losses} -@{tf.contrib.losses.get_regularization_losses} -@{tf.contrib.losses.get_total_loss} -@{tf.contrib.losses.log_loss} -@{tf.contrib.losses.mean_pairwise_squared_error} -@{tf.contrib.losses.mean_squared_error} -@{tf.contrib.losses.sigmoid_cross_entropy} -@{tf.contrib.losses.softmax_cross_entropy} -@{tf.contrib.losses.sparse_softmax_cross_entropy} +* @{tf.contrib.losses.absolute_difference} +* @{tf.contrib.losses.add_loss} +* @{tf.contrib.losses.hinge_loss} +* @{tf.contrib.losses.compute_weighted_loss} +* @{tf.contrib.losses.cosine_distance} +* @{tf.contrib.losses.get_losses} +* @{tf.contrib.losses.get_regularization_losses} +* @{tf.contrib.losses.get_total_loss} +* @{tf.contrib.losses.log_loss} +* @{tf.contrib.losses.mean_pairwise_squared_error} +* @{tf.contrib.losses.mean_squared_error} +* @{tf.contrib.losses.sigmoid_cross_entropy} +* @{tf.contrib.losses.softmax_cross_entropy} +* @{tf.contrib.losses.sparse_softmax_cross_entropy} diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 003e0a25ec..6f2107ef40 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -477,31 +477,29 @@ should use Markdown in the docstring. Here's a simple example: -```python -def foo(x, y, name="bar"): - """Computes foo. + def foo(x, y, name="bar"): + """Computes foo. - Given two 1-D tensors `x` and `y`, this operation computes the foo. + Given two 1-D tensors `x` and `y`, this operation computes the foo. - Example: + Example: - ``` - # x is [1, 1] - # y is [2, 2] - tf.foo(x, y) ==> [3, 3] - ``` - Args: - x: A `Tensor` of type `int32`. - y: A `Tensor` of type `int32`. - name: A name for the operation (optional). + ``` + # x is [1, 1] + # y is [2, 2] + tf.foo(x, y) ==> [3, 3] + ``` + Args: + x: A `Tensor` of type `int32`. + y: A `Tensor` of type `int32`. + name: A name for the operation (optional). - Returns: - A `Tensor` of type `int32` that is the foo of `x` and `y`. + Returns: + A `Tensor` of type `int32` that is the foo of `x` and `y`. - Raises: - ValueError: If `x` or `y` are not of type `int32`. - """ -``` + Raises: + ValueError: If `x` or `y` are not of type `int32`. + """ ## Description of the docstring sections diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 0481c97885..9059b3f3b6 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 8f89898c92..2e47a6d212 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 0ee9c849e1..eff066d200 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0 + 1.7.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0 + 1.7.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0 + 1.7.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.6.0 + 1.7.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0.jar HelloTF.java
+
javac -cp libtensorflow-1.7.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.7.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.7.0-rc1.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 5e9a84bff6..27b696696d 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -165,7 +165,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -270,7 +270,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -456,7 +456,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -630,14 +630,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -649,14 +649,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -668,14 +668,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -687,14 +687,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 55b460e189..7060ef43da 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -118,8 +118,8 @@ Take the following steps to install TensorFlow with Virtualenv: Python 2.7, the command to install TensorFlow in the active Virtualenv is as follows: -
 $ pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+
 $ pip3 install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -241,8 +241,8 @@ take the following steps: you are installing TensorFlow for macOS and Python 2.7 issue the following command: -
 $ sudo pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl 
+
 $ sudo pip3 install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl 
If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl @@ -524,7 +524,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
 
@@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index a7f33819b4..148f80efe2 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -350,10 +350,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0 on Linux: +for TensorFlow 1.7.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc1-py2-none-any.whl
 
## Validate your installation @@ -450,6 +450,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.7.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
+ + @@ -469,6 +471,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
+ @@ -483,6 +486,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
+ + diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index ca9cb043e9..778e4d3a62 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -233,6 +233,8 @@ order by how long they took. From left to right, the columns are: - The cumulative total time of this and the previous ops in the table. This is handy for understanding what the distribution of work is across the layers, to see if just a few of the nodes are taking up most of the time. + +- The amount of memory consumed by outputs of this type of op. - Name of the node. diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 360ee302aa..8b22c04d87 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -60,7 +60,7 @@ and serialized as protocol buffers: the `NodeDef`, so if all the `Variable` weights are converted to `Const` nodes, then we only need a single `GraphDef` file to hold the model architecture and the weights. Freezing the graph handles the process of loading the - checkpoints, and then converts all Consts to Variables. You can then load the + checkpoints, and then converts all Variables to Consts. You can then load the resulting file in a single call, without having to restore variable values from checkpoints. One thing to watch out for with `GraphDef` files is that sometimes they’re stored in text format for easy inspection. These versions diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4f61c01f65..a0dd409205 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1065,7 +1065,7 @@ py_test( py_test( name = "framework_importer_test", - size = "medium", + size = "large", srcs = ["framework/importer_test.py"], main = "framework/importer_test.py", srcs_version = "PY2AND3", diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index d0ba8020c1..64c1760d5e 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -315,21 +315,39 @@ class ReverseV2Test(test_util.TensorFlowTestCase): self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1]) self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1]) + # This test covers the axis validation in the shape function + # (no eval()) + def testInvalidAxis(self): + x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + with self.assertRaisesRegexp(ValueError, + "is out of valid range"): + array_ops.reverse_v2(x_np, [-30]) + with self.assertRaisesRegexp(ValueError, + "is out of valid range"): + array_ops.reverse_v2(x_np, [2]) + with self.assertRaisesRegexp(ValueError, + "axis 0 specified more than once"): + array_ops.reverse_v2(x_np, [0, -2]) + # This is the version of reverse that uses axis indices rather than # bool tensors # TODO(b/32254538): Change this test to use array_ops.reverse + # + # Note: this test passes placeholder as constant axis is validated + # in shape function (see testInvalidAxis) def testInvalid(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + axis = array_ops.placeholder(dtypes.int32) with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, [-30]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]}) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, [2]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [2]}) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis 0 specified more than once"): - array_ops.reverse_v2(x_np, [0, -2]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [0, -2]}) def testReverse1DimAuto(self): for dtype in [ @@ -890,7 +908,7 @@ class StridedSliceAssignChecker(object): var = resource_variable_ops.ResourceVariable(self.x) else: var = variables.Variable(self.x) - sess.run(variables.initialize_variables([var])) + sess.run(variables.variables_initializer([var])) val = sess.run(var[index].assign(value)) # val_copy is used to check that tf.assign works equivalently to the # assign method above. diff --git a/tensorflow/python/kernel_tests/testdata/BUILD b/tensorflow/python/kernel_tests/testdata/BUILD index a4a0dfc139..45264c773a 100644 --- a/tensorflow/python/kernel_tests/testdata/BUILD +++ b/tensorflow/python/kernel_tests/testdata/BUILD @@ -1,7 +1,7 @@ # Data files for kernel tests. package( - default_visibility = ["//tensorflow:internal"], + default_visibility = ["//visibility:public"], ) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index e3e120a4eb..60c726d54c 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -18,10 +18,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools +import sys + import numpy as np +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl @@ -88,7 +94,7 @@ class XentTest(test.TestCase): 4.]]]).astype(dtype) np_labels = np.array([[[0., 0., 0., 1.]], [[0., .5, .5, 0.]]]).astype(dtype) - self.assertRaisesRegexp(ValueError, "must be rank 2", + self.assertRaisesRegexp(ValueError, "rank 2, but is rank 3", gen_nn_ops.softmax_cross_entropy_with_logits, np_features, np_labels) @@ -128,6 +134,24 @@ class XentTest(test.TestCase): self.assertAllClose( np.array([1.3862, 1.9401]), np_loss, rtol=1.e-3, atol=1.e-3) + def testShapeBroadcast(self): + np_f = np.array([[1., 2., 3., 4.], + [1., 2., 3., 4.]]).astype(np.float32) + np_l = np.array([[0., 0., 0., 1.], + [0., .5, .5, 0.]]).astype(np.float32) + np_loss, np_backprop = self._npXent(np_f, np_l) + tf_f = constant_op.constant( + np.array([[1., 2., 3., 4.]]).astype(np.float32)) + tf_l = constant_op.constant( + np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float32)) + for use_gpu in [False, True]: + with self.test_session(use_gpu=use_gpu) as sess: + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( + tf_f, tf_l) + tf_loss, tf_backprop = sess.run([loss, backprop]) + self.assertAllCloseAccordingToType(np_loss, tf_loss) + self.assertAllCloseAccordingToType(np_backprop, tf_backprop) + def testShapeMismatch(self): with self.test_session(): with self.assertRaises(ValueError): @@ -260,5 +284,60 @@ class XentTest(test.TestCase): self.assertAllEqual(np_loss, tf_loss) +class XentBenchmark(test.Benchmark): + + def benchmarkZeroDimension(self): + for (m, n, p, use_gpu) in itertools.product( + [128], + [10, 100, 1000, 10000, 100000], + [0.001, 0.01, 0.5, 0.99, 1.0], + [False]): + k = int(p * n) + if k == 0: + continue + name = "zero_dimension_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) + device = "/%s:0" % ("gpu" if use_gpu else "cpu") + with ops.Graph().as_default(): + with ops.device(device): + labels = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) + logits = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) + op = nn_ops.softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + with session.Session() as sess: + r = self.run_op_benchmark(sess, op, min_iters=100, name=name) + gb_processed_input = m * n / 1.0e9 + throughput = gb_processed_input / r["wall_time"] + print("Benchmark: %s \t wall_time: %0.03g s \t " + "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) + sys.stdout.flush() + + def benchmarkSingleClass(self): + for (m, n, p, use_gpu) in itertools.product( + [128], + [10, 100, 1000, 10000, 100000], + [0.001, 0.01, 0.5, 0.99, 1.0], + [False]): + k = int(p * n) + if k == 0: + continue + name = "single_class_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) + device = "/%s:0" % ("gpu" if use_gpu else "cpu") + with ops.Graph().as_default(): + with ops.device(device): + labels = constant_op.constant([[1.], [-1.], [0.]], + dtype=dtypes.float32) + logits = constant_op.constant([[-1.], [0.], [1.]], + dtype=dtypes.float32) + op = nn_ops.softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + with session.Session() as sess: + r = self.run_op_benchmark(sess, op, min_iters=100, name=name) + gb_processed_input = m * n / 1.0e9 + throughput = gb_processed_input / r["wall_time"] + print("Benchmark: %s \t wall_time: %0.03g s \t " + "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) + sys.stdout.flush() + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 74e7c63fb3..2d99b1688f 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -180,6 +180,8 @@ class _Conv(base.Layer): # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() + if outputs_shape[0] is None: + outputs_shape[0] = -1 outputs_4d = array_ops.reshape(outputs, [outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py index 160e732b67..cdb42f5bd1 100644 --- a/tensorflow/python/layers/convolutional_test.py +++ b/tensorflow/python/layers/convolutional_test.py @@ -325,6 +325,12 @@ class ConvTest(test.TestCase): self.assertEqual(conv3d.kernel_constraint, k_constraint) self.assertEqual(conv3d.bias_constraint, b_constraint) + def testConv3DChannelsFirst(self): + # Test case for GitHub issue 15655 + images = array_ops.placeholder( + dtype=dtypes.float32, shape=[None, 1, 32, 32, 32]) + conv_layers.conv3d(images, 32, 9, data_format='channels_first') + @test_util.with_c_api class SeparableConv1DTest(test.TestCase): diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 5b4fb4f7c8..170861b43f 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -429,7 +429,7 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): u, s, v_adj = np.linalg.svd(a, full_matrices=False) np_a_approx = np.dot(u, np.dot(np.diag(s), v_adj)) # tf_a_approx and np_a_approx should be numerically close. - ```` + ``` @end_compatibility """ s, u, v = gen_linalg_ops.svd( diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 6c5c9e01a7..4ce6f6d002 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -281,13 +281,14 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name scaffold=None, hooks=None, chief_only_hooks=None, - save_checkpoint_secs=600, + save_checkpoint_secs=USE_DEFAULT, save_summaries_steps=USE_DEFAULT, save_summaries_secs=USE_DEFAULT, config=None, stop_grace_period_secs=120, log_step_count_steps=100, - max_wait_secs=7200): + max_wait_secs=7200, + save_checkpoint_steps=USE_DEFAULT): """Creates a `MonitoredSession` for training. For a chief, this utility sets proper session initializer/restorer. It also @@ -310,8 +311,10 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if `is_chief==True`, ignore otherwise. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. + using a default checkpoint saver. If both `save_checkpoint_steps` and + `save_checkpoint_secs` are set to `None`, then the default checkpoint + saver isn't used. If both are provided, then only `save_checkpoint_secs` + is used. Default 600. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then @@ -330,6 +333,11 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name become available. This should be kept relatively short to help detect incorrect code, but sometimes may need to be increased if the chief takes a while to start up. + save_checkpoint_steps: The frequency, in number of global steps, that a + checkpoint is saved using a default checkpoint saver. If both + `save_checkpoint_steps` and `save_checkpoint_secs` are set to `None`, then + the default checkpoint saver isn't used. If both are provided, then only + `save_checkpoint_secs` is used. Default not enabled. Returns: A `MonitoredSession` object. @@ -342,6 +350,15 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name elif save_summaries_steps == USE_DEFAULT: save_summaries_steps = None + if (save_checkpoint_steps == USE_DEFAULT and + save_checkpoint_secs == USE_DEFAULT): + save_checkpoint_steps = None + save_checkpoint_secs = 600 + elif save_checkpoint_secs == USE_DEFAULT: + save_checkpoint_secs = None + elif save_checkpoint_steps == USE_DEFAULT: + save_checkpoint_steps = None + scaffold = scaffold or Scaffold() if not is_chief: session_creator = WorkerSessionCreator( @@ -374,9 +391,13 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name save_steps=save_summaries_steps, save_secs=save_summaries_secs, output_dir=checkpoint_dir)) - if save_checkpoint_secs and save_checkpoint_secs > 0: + if (save_checkpoint_secs and save_checkpoint_secs > 0) or ( + save_checkpoint_steps and save_checkpoint_steps > 0): all_hooks.append(basic_session_run_hooks.CheckpointSaverHook( - checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold)) + checkpoint_dir, + save_steps=save_checkpoint_steps, + save_secs=save_checkpoint_secs, + scaffold=scaffold)) if hooks: all_hooks.extend(hooks) diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py index 159b2d5c16..3806056f01 100644 --- a/tensorflow/python/training/monitored_session_test.py +++ b/tensorflow/python/training/monitored_session_test.py @@ -282,6 +282,42 @@ class MonitoredTrainingSessionTest(test.TestCase): is_chief=True, checkpoint_dir=logdir) as session: self.assertEqual(2, session.run(gstep)) + def test_save_checkpoint_steps(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_steps') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_steps=100, + log_step_count_steps=10) as session: + for _ in range(100): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(100, session.run(gstep)) + + def test_save_checkpoint_secs(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_secs') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_secs=0.1, + log_step_count_steps=10) as session: + session.run(new_gstep) + time.sleep(0.2) + for _ in range(10): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(11, session.run(gstep)) + def test_summaries_steps(self): logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps') with ops.Graph().as_default(): diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 2d3cb415fe..fcc57d506e 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -22,6 +22,7 @@ load( load( "//third_party/mkl:build_defs.bzl", "if_mkl", + "if_mkl_lnx_x64" ) def register_extension_info(**kwargs): @@ -202,7 +203,8 @@ def tf_copts(android_optimization_level_override="-O2", is_external=False): "-ftemplate-depth=900"]) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) - + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",]) + + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + + if_mkl_lnx_x64(["-fopenmp"]) + if_android_arm(["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + if_ios_x86_64(["-msse4.1"]) diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index c75ee474aa..bec72e1e60 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -238,7 +238,7 @@ tf_module { } member_method { name: "MonitoredTrainingSession" - argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'600\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\'], " + argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\', \'save_checkpoint_steps\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\', \'\'], " } member_method { name: "NewCheckpointReader" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index e1b56b9a25..7d471b4703 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -31,5 +31,5 @@ export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" -export PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +export PATH="$PATH:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" build_libtensorflow_tarball "-cpu-darwin-$(uname -m)" diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 22c73c3fe1..11f476d12c 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -70,7 +70,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 3690e7dfe5..037d13116e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.6 +ARG TF_BRANCH=r1.7 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 69ba340f92..1fcb6428b2 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -79,7 +79,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 3fbdb5cacd..0ede8c6370 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -138,7 +138,6 @@ genrule( "@zlib_archive//:zlib.h", ] + if_mkl([ "//third_party/mkl:LICENSE", - "@mkl//:LICENSE", ]), outs = ["include/tensorflow/c/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", @@ -176,7 +175,6 @@ genrule( "@zlib_archive//:zlib.h", ] + if_mkl([ "//third_party/mkl:LICENSE", - "@mkl//:LICENSE", ]), outs = ["include/tensorflow/jni/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index dd75eda231..62fec2c402 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -127,7 +127,6 @@ filegroup( "@org_python_pypi_backports_weakref//:LICENSE", ] + if_mkl([ "//third_party/mkl:LICENSE", - "@mkl//:LICENSE", ]) + if_not_windows([ "@nccl_archive//:LICENSE.txt", ]) + tf_additional_license_deps(), diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e0152da4df..365e8d6b08 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0' +_VERSION = '1.7.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.6.0, < 1.7.0', + 'tensorboard >= 1.7.0, < 1.8.0', 'termcolor >= 1.1.0', ] @@ -62,7 +62,7 @@ else: if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.7.0a0, < 1.8.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.8.0a0, < 1.9.0a0' break # weakref.finalize and enum were introduced in Python 3.4 diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 9fcbfb664b..5f6e717532 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -15,6 +15,11 @@ load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_ load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +# Sanitize a dependency so that it works correctly from code that includes +# TensorFlow as a submodule. +def clean_dep(dep): + return str(Label(dep)) + # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. @@ -32,17 +37,37 @@ def tf_workspace(path_prefix="", tf_repo_name=""): arm_compiler_configure( name="local_config_arm_compiler", remote_config_repo="../arm_compiler", - build_file = str(Label("//third_party/toolchains/cpus/arm:BUILD"))) + build_file = clean_dep("//third_party/toolchains/cpus/arm:BUILD")) mkl_repository( - name = "mkl", + name = "mkl_linux", + urls = [ + "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", + ], + sha256 = "feacc3d82565c1231470359b42c696236fae873704e0b013436afba5fd4fd30f", + strip_prefix = "mklml_lnx_2018.0.1.20171227", + build_file = clean_dep("//third_party/mkl:mkl.BUILD") + ) + mkl_repository( + name = "mkl_windows", + urls = [ + "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip" + ], + sha256 = "24bae8d7b22b431a654acadea43f2243c46ae6b1e5a73a4a936825f31d284ee4", + strip_prefix = "mklml_win_2018.0.1.20171227", + build_file = clean_dep("//third_party/mkl:mkl.BUILD") + ) + mkl_repository( + name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz", - "https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz", + "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz" ], - sha256 = "6b07cb7e5451db67c2e31e785ae458b18f7f363c60a61685488f69e9ae7199d4", - strip_prefix = "mklml_lnx_2018.0.1.20171007", - build_file = str(Label("//third_party/mkl:mkl.BUILD")), + sha256 = "0e954ec6fd3dc5e37f64c4043f6b5613dd687558da3df1028b3b7c29ff5cf77f", + strip_prefix = "mklml_mac_2018.0.1.20171227", + build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) if path_prefix: @@ -52,12 +77,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz", - "https://github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.12.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.12.tar.gz", ], - sha256 = "02e244f63dd95402691a361392504c143eede9a89043426f174836638a9cbf09", - strip_prefix = "mkl-dnn-e0bfcaa7fcb2b1e1558f5f0676933c1db807a729", - build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")), + sha256 = "86fa2a8c12a56e3b725945acedeaa82492746be02545aba6d710f097e013e19e", + strip_prefix = "mkl-dnn-0.12", + build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) tf_http_archive( @@ -68,7 +93,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "5996380e3e8b981f55d1c8d58e709c00dbb4806ba367be75d0925a68cc2f6478", strip_prefix = "abseil-cpp-720c017e30339fd1786ce4aac68bc8559736e53f", - build_file = str(Label("//third_party:com_google_absl.BUILD")), + build_file = clean_dep("//third_party:com_google_absl.BUILD"), ) tf_http_archive( @@ -79,8 +104,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "0cadb31a35b514bf2dfd6b5d38205da94ef326ec6908fc3fd7c269948467214f", strip_prefix = "eigen-eigen-2355b229ea4c", - build_file = str(Label("//third_party:eigen.BUILD")), - patch_file = str(Label("//third_party:eigen_fix_cuda_compilation.patch")) + build_file = clean_dep("//third_party:eigen.BUILD"), + patch_file = clean_dep("//third_party:eigen_fix_cuda_compilation.patch") ) tf_http_archive( @@ -93,7 +118,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # remove the whitelist entry in third_party/repo.bzl. # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz", ], - build_file = str(Label("//:arm_compiler.BUILD")), + build_file = clean_dep("//:arm_compiler.BUILD"), ) tf_http_archive( @@ -104,7 +129,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ade869c3f42f23b5263c7d594aa3c7e5e61ac6a3afcaf5d6e42899d2a7986ce", strip_prefix = "libxsmm-1.8.1", - build_file = str(Label("//third_party:libxsmm.BUILD")), + build_file = clean_dep("//third_party:libxsmm.BUILD"), ) tf_http_archive( @@ -117,7 +142,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "932075525642b04ac6f1b50589f1df5cd72ec2f448b721fd32234cf183f0e755", strip_prefix = "or-tools-253f7955c6a1fd805408fba2e42ac6d45b312d15/src", - build_file = str(Label("//third_party:ortools.BUILD")), + build_file = clean_dep("//third_party:ortools.BUILD"), ) tf_http_archive( @@ -149,7 +174,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", - build_file = str(Label("//third_party:farmhash.BUILD")), + build_file = clean_dep("//third_party:farmhash.BUILD"), ) tf_http_archive( @@ -160,7 +185,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", - build_file = str(Label("//third_party:highwayhash.BUILD")), + build_file = clean_dep("//third_party:highwayhash.BUILD"), ) tf_http_archive( @@ -171,7 +196,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", strip_prefix = "nasm-2.12.02", - build_file = str(Label("//third_party:nasm.BUILD")), + build_file = clean_dep("//third_party:nasm.BUILD"), ) tf_http_archive( @@ -182,7 +207,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", strip_prefix = "libjpeg-turbo-1.5.1", - build_file = str(Label("//third_party/jpeg:jpeg.BUILD")), + build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"), ) tf_http_archive( @@ -193,7 +218,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "716c59c7dfc808a4c368f8ada526932be72b2fcea11dd85dc9d88b1df1dfe9c2", strip_prefix = "libpng-1.2.53", - build_file = str(Label("//third_party:png.BUILD")), + build_file = clean_dep("//third_party:png.BUILD"), ) tf_http_archive( @@ -204,7 +229,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4", strip_prefix = "sqlite-amalgamation-3200000", - build_file = str(Label("//third_party:sqlite.BUILD")), + build_file = clean_dep("//third_party:sqlite.BUILD"), ) tf_http_archive( @@ -215,7 +240,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1", strip_prefix = "giflib-5.1.4", - build_file = str(Label("//third_party:gif.BUILD")), + build_file = clean_dep("//third_party:gif.BUILD"), ) tf_http_archive( @@ -226,7 +251,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", strip_prefix = "six-1.10.0", - build_file = str(Label("//third_party:six.BUILD")), + build_file = clean_dep("//third_party:six.BUILD"), ) tf_http_archive( @@ -237,7 +262,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d", strip_prefix = "astor-0.6.2", - build_file = str(Label("//third_party:astor.BUILD")), + build_file = clean_dep("//third_party:astor.BUILD"), ) tf_http_archive( @@ -248,7 +273,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930", strip_prefix = "gast-0.2.0", - build_file = str(Label("//third_party:gast.BUILD")), + build_file = clean_dep("//third_party:gast.BUILD"), ) tf_http_archive( @@ -259,7 +284,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b", strip_prefix = "termcolor-1.1.0", - build_file = str(Label("//third_party:termcolor.BUILD")), + build_file = clean_dep("//third_party:termcolor.BUILD"), ) tf_http_archive( @@ -280,7 +305,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892", strip_prefix = "backports.weakref-1.0rc1/src", - build_file = str(Label("//third_party:backports_weakref.BUILD")), + build_file = clean_dep("//third_party:backports_weakref.BUILD"), ) tf_http_archive( @@ -291,7 +316,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", strip_prefix = "codegen-1.0", - build_file = str(Label("//third_party:codegen.BUILD")), + build_file = clean_dep("//third_party:codegen.BUILD"), ) filegroup_external( @@ -376,7 +401,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://ftp.exim.org/pub/pcre/pcre-8.39.tar.gz", ], strip_prefix = "pcre-8.39", - build_file = str(Label("//third_party:pcre.BUILD")), + build_file = clean_dep("//third_party:pcre.BUILD"), ) tf_http_archive( @@ -388,7 +413,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://pilotfiber.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz", ], strip_prefix = "swig-3.0.8", - build_file = str(Label("//third_party:swig.BUILD")), + build_file = clean_dep("//third_party:swig.BUILD"), ) tf_http_archive( @@ -399,7 +424,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://curl.haxx.se/download/curl-7.49.1.tar.gz", ], strip_prefix = "curl-7.49.1", - build_file = str(Label("//third_party:curl.BUILD")), + build_file = clean_dep("//third_party:curl.BUILD"), ) tf_http_archive( @@ -421,7 +446,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", ], strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3", - build_file = str(Label("//third_party:linenoise.BUILD")), + build_file = clean_dep("//third_party:linenoise.BUILD"), ) # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror. @@ -434,7 +459,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "1efbb9b05af88368be984d2f6526061d4a857181ef10f8841889a3a46869bb01", strip_prefix = "llvm-1c3cdea2f181d8e14ee184466c5fb237f1b4cda8", - build_file = str(Label("//third_party/llvm:llvm.BUILD")), + build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) tf_http_archive( @@ -445,7 +470,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "108532fb94c6f227558d45be3f3347b52539f0f58290a7bb31ec06c462d05326", strip_prefix = "lmdb-LMDB_0.9.19/libraries/liblmdb", - build_file = str(Label("//third_party:lmdb.BUILD")), + build_file = clean_dep("//third_party:lmdb.BUILD"), ) tf_http_archive( @@ -456,7 +481,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "07d34db40593d257324ec5fb9debc4dc33f29f8fb44e33a2eeb35503e61d0fe2", strip_prefix = "jsoncpp-11086dd6a7eba04289944367ca82cea71299ed70", - build_file = str(Label("//third_party:jsoncpp.BUILD")), + build_file = clean_dep("//third_party:jsoncpp.BUILD"), ) tf_http_archive( @@ -477,7 +502,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d", strip_prefix = "zlib-1.2.8", - build_file = str(Label("//third_party:zlib.BUILD")), + build_file = clean_dep("//third_party:zlib.BUILD"), ) tf_http_archive( @@ -487,7 +512,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz", ], sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296", - build_file = str(Label("//third_party/fft2d:fft2d.BUILD")), + build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"), ) tf_http_archive( @@ -498,7 +523,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94", strip_prefix = "snappy-1.1.4", - build_file = str(Label("//third_party:snappy.BUILD")), + build_file = clean_dep("//third_party:snappy.BUILD"), ) tf_http_archive( @@ -509,7 +534,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", - build_file = str(Label("//third_party:nccl.BUILD")), + build_file = clean_dep("//third_party:nccl.BUILD"), ) tf_http_archive( @@ -520,8 +545,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "dd035d57c8f19b0b612dd6eefe6e5eebad76f506e302cccb7c2066f25a83585e", strip_prefix = "librdkafka-0.11.1", - build_file = str(Label("//third_party:kafka/BUILD")), - patch_file = str(Label("//third_party/kafka:config.patch")), + build_file = clean_dep("//third_party:kafka/BUILD"), + patch_file = clean_dep("//third_party/kafka:config.patch"), ) tf_http_archive( @@ -532,7 +557,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", strip_prefix = "aws-sdk-cpp-1.3.15", - build_file = str(Label("//third_party:aws.BUILD")), + build_file = clean_dep("//third_party:aws.BUILD"), ) java_import_external( @@ -568,7 +593,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", strip_prefix = "jemalloc-4.4.0", - build_file = str(Label("//third_party:jemalloc.BUILD")), + build_file = clean_dep("//third_party:jemalloc.BUILD"), ) java_import_external( @@ -613,7 +638,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4", strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650", - build_file = str(Label("//third_party:pprof.BUILD")), + build_file = clean_dep("//third_party:pprof.BUILD"), ) tf_http_archive( @@ -624,7 +649,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", strip_prefix = "cub-1.8.0", - build_file = str(Label("//third_party:cub.BUILD")), + build_file = clean_dep("//third_party:cub.BUILD"), ) tf_http_archive( @@ -635,7 +660,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/cython/cython/archive/3732784c45cfb040a5b0936951d196f83a12ea17.tar.gz", ], strip_prefix = "cython-3732784c45cfb040a5b0936951d196f83a12ea17", - build_file = str(Label("//third_party:cython.BUILD")), + build_file = clean_dep("//third_party:cython.BUILD"), delete = ["BUILD.bazel"], ) @@ -657,7 +682,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", ], - build_file = str(Label("//third_party:arm_neon_2_x86_sse.BUILD")), + build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"), ) tf_http_archive( @@ -668,7 +693,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", ], - build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")), + build_file = clean_dep("//third_party/flatbuffers:flatbuffers.BUILD"), ) tf_http_archive( @@ -678,7 +703,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", ], - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), + build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"), ) tf_http_archive( @@ -688,7 +713,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip" ], - build_file = str(Label("//third_party:tflite_smartreply.BUILD")), + build_file = clean_dep("//third_party:tflite_smartreply.BUILD"), ) ############################################################################## @@ -752,7 +777,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # Needed by Protobuf native.bind( name = "python_headers", - actual = str(Label("//util/python:python_headers")), + actual = clean_dep("//util/python:python_headers"), ) # Needed by Protobuf diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index b27d341404..c2adf578c7 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -1,7 +1,5 @@ licenses(["notice"]) # 3-Clause BSD -exports_files(["LICENSE"]) - config_setting( name = "using_mkl", values = { @@ -10,17 +8,51 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "using_mkl_lnx_x64", + values = { + "cpu": "k8", + "define": "using_mkl=true", + }, + visibility = ["//visibility:public"], +) + load( "//third_party/mkl:build_defs.bzl", "if_mkl", ) +filegroup( + name = "LICENSE", + srcs = ["MKL_LICENSE"] + select({ + "@org_tensorflow//tensorflow:linux_x86_64": [ + "@mkl_linux//:LICENSE", + ], + "@org_tensorflow//tensorflow:darwin": [ + "@mkl_darwin//:LICENSE", + ], + "@org_tensorflow//tensorflow:windows": [ + "@mkl_windows//:LICENSE", + ], + }), + visibility = ["//visibility:public"], +) + cc_library( name = "intel_binary_blob", - srcs = if_mkl([ - "@mkl//:libmklml_intel.so", - "@mkl//:libiomp5.so", - ]), visibility = ["//visibility:public"], - deps = ["@mkl//:mkl_headers"], + deps = select({ + "@org_tensorflow//tensorflow:linux_x86_64": [ + "@mkl_linux//:mkl_headers", + "@mkl_linux//:mkl_libs_linux", + ], + "@org_tensorflow//tensorflow:darwin": [ + "@mkl_darwin//:mkl_headers", + "@mkl_darwin//:mkl_libs_darwin", + ], + "@org_tensorflow//tensorflow:windows": [ + "@mkl_windows//:mkl_headers", + "@mkl_windows//:mkl_libs_windows", + ], + }), ) diff --git a/third_party/mkl/MKL_LICENSE b/third_party/mkl/MKL_LICENSE new file mode 100644 index 0000000000..9c8f3ea087 --- /dev/null +++ b/third_party/mkl/MKL_LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 8b73ddabdd..53e02769da 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -24,6 +24,18 @@ def if_mkl(if_true, if_false = []): "//conditions:default": if_false }) +def if_mkl_lnx_x64(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with MKL. + + Returns a select statement which evaluates to if_true if we're building + with MKL enabled. Otherwise, the select statement evaluates to if_false. + + """ + return select({ + str(Label("//third_party/mkl:using_mkl_lnx_x64")): if_true, + "//conditions:default": if_false + }) + def _enable_local_mkl(repository_ctx): return _TF_MKL_ROOT in repository_ctx.os.environ diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD index 8db97232e1..c3a71e4ff9 100644 --- a/third_party/mkl/mkl.BUILD +++ b/third_party/mkl/mkl.BUILD @@ -17,14 +17,29 @@ cc_library( visibility = ["//visibility:public"], ) -filegroup( - name = "libmklml_intel.so", - srcs = ["lib/libmklml_intel.so"], +cc_library( + name = "mkl_libs_linux", + srcs = [ + "lib/libiomp5.so", + "lib/libmklml_intel.so", + ], visibility = ["//visibility:public"], ) -filegroup( - name = "libiomp5.so", - srcs = ["lib/libiomp5.so"], +cc_library( + name = "mkl_libs_darwin", + srcs = [ + "lib/libiomp5.dylib", + "lib/libmklml.dylib", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "mkl_libs_windows", + srcs = [ + "lib/libiomp5md.lib", + "lib/mklml.lib", + ], visibility = ["//visibility:public"], ) -- GitLab From 828ebed1fe252339769ddc0acde83a55219b38c0 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Mar 2018 14:42:57 -0700 Subject: [PATCH 637/960] Internal change. PiperOrigin-RevId: 190836675 --- tensorflow/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 228d1c2452..05f34db14b 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1569,7 +1569,7 @@ cuda_py_test( cuda_py_test( name = "init_ops_test", - size = "small", + size = "medium", srcs = ["init_ops_test.py"], additional_deps = [ "//third_party/py/numpy", -- GitLab From 355c88503a3a998aef3c1dc51045409778afd578 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 14:47:00 -0700 Subject: [PATCH 638/960] Use high precision to compute softmax_cross_entropy_with_logits. PiperOrigin-RevId: 190837379 --- tensorflow/core/kernels/cwise_op_log.cc | 4 +- tensorflow/python/ops/nn_ops.py | 8 ++-- tensorflow/python/ops/nn_test.py | 51 +++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc index 98936e0f96..5d17c890cf 100644 --- a/tensorflow/core/kernels/cwise_op_log.cc +++ b/tensorflow/core/kernels/cwise_op_log.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Log", functor::log, float, Eigen::half, double, + bfloat16, complex64, complex128); #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double); diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a74de39eab..0c55386241 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1836,8 +1836,10 @@ def softmax_cross_entropy_with_logits_v2( [logits, labels]) as name: logits = ops.convert_to_tensor(logits, name="logits") labels = ops.convert_to_tensor(labels, name="labels") + convert_to_float32 = ( + logits.dtype == dtypes.float16 or logits.dtype == dtypes.bfloat16) precise_logits = math_ops.cast( - logits, dtypes.float32) if (logits.dtype == dtypes.float16) else logits + logits, dtypes.float32) if convert_to_float32 else logits # labels and logits must be of the same type labels = math_ops.cast(labels, precise_logits.dtype) input_rank = array_ops.rank(precise_logits) @@ -1883,8 +1885,8 @@ def softmax_cross_entropy_with_logits_v2( del shape[dim] cost.set_shape(shape) - if logits.dtype == dtypes.float16: - return math_ops.cast(cost, dtypes.float16) + if convert_to_float32: + return math_ops.cast(cost, logits.dtype) else: return cost diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index af9dae2aa6..da86d5f6ca 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -852,6 +852,57 @@ class ComputeSampledLogitsTest(test_lib.TestCase): self.assertAllClose(exp_sampled_softmax_loss, got_sampled_softmax_loss.eval(), 1e-4) + def testSampledSoftmaxLossBf16(self): + # A simple test to verify the numerics for bfloat16. + def _SoftmaxCrossEntropyWithLogits(logits, targets): + # logits, targets: float arrays of the same shape. + assert logits.shape == targets.shape + stable_exp_logits = np.exp( + logits - np.amax(logits, axis=1, keepdims=True)) + pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True) + return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) + + np.random.seed(0) + num_classes = 5 + batch_size = 3 + labels = [0, 1, 2] + sampled = [1, 0, 2, 3] + (weights, biases, hidden_acts, _, exp_logits, + exp_labels) = self._GenerateTestData( + num_classes=num_classes, + dim=10, + batch_size=batch_size, + num_true=1, + labels=labels, + sampled=sampled, + subtract_log_q=True) + exp_sampled_softmax_loss = _SoftmaxCrossEntropyWithLogits( + exp_logits, exp_labels) + + with self.test_session(): + true_exp_bf16 = np.full( + [batch_size, 1], fill_value=0.5, dtype=dtypes.bfloat16.as_numpy_dtype) + sampled_exp_bf16 = np.full( + [len(sampled)], fill_value=0.5, dtype=dtypes.bfloat16.as_numpy_dtype) + sampled_vals_bf16 = (sampled, true_exp_bf16, sampled_exp_bf16) + + got_sampled_softmax_loss = math_ops.cast( + nn_impl.sampled_softmax_loss( + weights=constant_op.constant(weights, dtype=dtypes.bfloat16), + biases=constant_op.constant(biases, dtype=dtypes.bfloat16), + labels=constant_op.constant( + labels, shape=(batch_size, 1), dtype=dtypes.bfloat16), + inputs=constant_op.constant(hidden_acts, dtype=dtypes.bfloat16), + num_sampled=4, + num_classes=num_classes, + num_true=1, + sampled_values=sampled_vals_bf16, + remove_accidental_hits=False, + partition_strategy="div"), dtypes.float32) + + self.assertAllClose(exp_sampled_softmax_loss, + got_sampled_softmax_loss.eval(), 1e-1) + class CReluTest(test_lib.TestCase): -- GitLab From 9e6f84b6c8f1d052272d75bcde186b7f1012df48 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 14:48:50 -0700 Subject: [PATCH 639/960] Internal change PiperOrigin-RevId: 190837707 --- tensorflow/core/BUILD | 29 ++++++++++++++----- .../core/platform/default/build_config/BUILD | 5 ++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1d11410332..4726946277 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -379,13 +379,13 @@ cc_library( ) cc_library( - name = "session_message", - srcs = ["util/session_message.cc"], - hdrs = ["util/session_message.h"], + name = "stacktrace", + srcs = glob(["platform/*/stacktrace.h"]), + hdrs = ["platform/stacktrace.h"], deps = [ - ":framework", - ":lib", - ":protos_all_cc", + ":abi", + ":lib_platform", + "//tensorflow/core/platform/default/build_config:stacktrace", ], ) @@ -394,8 +394,20 @@ cc_library( srcs = ["platform/stacktrace_handler.cc"], hdrs = ["platform/stacktrace_handler.h"], deps = [ - ":lib", + ":abi", ":lib_platform", + ":stacktrace", + ], +) + +cc_library( + name = "session_message", + srcs = ["util/session_message.cc"], + hdrs = ["util/session_message.h"], + deps = [ + ":framework", + ":lib", + ":protos_all_cc", ], ) @@ -1624,6 +1636,7 @@ cc_library( "platform/**/env_time.cc", "platform/**/cuda_libdevice_path.cc", "platform/**/device_tracer.cc", + "platform/abi.cc", "platform/variant_coding.cc", "platform/**/variant_cord_coding.cc", ], @@ -1635,6 +1648,7 @@ cc_library( "platform/**/stream_executor.h", "platform/**/env_time.cc", "platform/**/device_tracer.cc", + "platform/abi.cc", "platform/variant_coding.cc", "platform/**/variant_cord_coding.cc", ] + @@ -1648,6 +1662,7 @@ cc_library( deps = tf_additional_lib_deps() + [ ":lib_hash_crc32c_accelerate_internal", ":lib_proto_parsing", + ":abi", "//third_party/eigen3", "//tensorflow/core/platform/default/build_config:platformlib", "@snappy", diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index 2cd607edbe..afb1d84d14 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -128,6 +128,11 @@ cc_library( ], ) +cc_library( + name = "stacktrace", + srcs = [], +) + cc_library( name = "gif", copts = tf_copts(), -- GitLab From 15908d912ed26f2517207e0a0bea6cd5768476ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 14:52:25 -0700 Subject: [PATCH 640/960] Add DistributionStrategy support to Optimizer. PiperOrigin-RevId: 190838314 --- tensorflow/python/training/optimizer.py | 174 +++++++++++++++++++++++- 1 file changed, 172 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index bf79714f96..75665fc284 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -35,11 +35,28 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import checkpointable +from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import slot_creator from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export +def get_filtered_grad_fn(grad_fn): + # `distributed_context.join()` requires that its arguments are parallel + # across threads, and in particular that `grads_and_vars` has the same + # variables in the same order. + + # When computing gradients in eager mode with multiple threads, you + # can get extra variables with a gradient of `None`. This happens when + # those variables are accessed in another thread during the gradient + # computation. To get a consistent set of variables, we filter out + # those with `None` gradients. + def filtered_grad_fn(x=None): + return [(g, v) for g, v in grad_fn(x) if g is not None] + + return filtered_grad_fn + + def _deduplicate_indexed_slices(values, indices): """Sums `values` associated with any non-unique `indices`. @@ -335,6 +352,13 @@ class Optimizer( # ... } self._deferred_slot_restorations = {} + # TODO(isaprykin): When using a DistributionStrategy, and when an + # optimizer is created in each tower, it might be dangerous to + # rely on some Optimer methods. When such methods are called on a + # per-tower optimizer, an exception needs to be thrown. We do + # allow creation per-tower optimizers however, because the + # compute_gradients()->apply_gradients() sequence is safe. + def get_name(self): return self._name @@ -447,14 +471,33 @@ class Optimizer( if var_list is not None: tape.watch(var_list) loss_value = loss() + + # Scale loss if using a "mean" loss reduction and multiple towers. + # Have to be careful to call distribute_lib.get_loss_reduction() + # *after* loss() is evaluated, so we know what loss reduction it uses. + # TODO(josh11b): Test that we handle weight decay in a reasonable way. + if distribute_lib.get_loss_reduction() == "mean": + num_towers = distribute_lib.get_distribution_strategy().num_towers + if num_towers > 1: + loss_value *= (1. / num_towers) + if var_list is None: var_list = tape.watched_variables() grads = tape.gradient(loss_value, var_list, grad_loss) return list(zip(grads, var_list)) + + # Non-callable/Tensor loss case if context.executing_eagerly(): raise RuntimeError( "`loss` passed to Optimizer.compute_gradients should " "be a function when eager execution is enabled.") + + # Scale loss if using a "mean" loss reduction and multiple towers. + if distribute_lib.get_loss_reduction() == "mean": + num_towers = distribute_lib.get_distribution_strategy().num_towers + if num_towers > 1: + loss *= (1. / num_towers) + if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH]: raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " @@ -510,11 +553,25 @@ class Optimizer( Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. + RuntimeError: If you should use `_distributed_apply()` instead. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). + # Handle DistributionStrategy case. + if distribute_lib.get_cross_tower_context(): + raise RuntimeError("Use `_distributed_apply()` instead of " + "`apply_gradients()` in a cross-tower context.") + # TODO(isaprykin): Get rid of `has_distribution_strategy()` check by + # always calling _distributed_apply(), using the default distribution + # as needed. + if distribute_lib.has_distribution_strategy(): + grads_and_vars = get_filtered_grad_fn(lambda _: grads_and_vars)() + return distribute_lib.get_tower_context().merge_call( + self._distributed_apply, grads_and_vars, global_step, name) + + # No DistributionStrategy case. grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") @@ -582,6 +639,95 @@ class Optimizer( return apply_updates + def _distributed_apply(self, + distribution, + grads_and_vars, + global_step=None, + name=None): + """A version of `apply_gradients` for cross-tower context. + + This is a version of `apply_gradients()` for when you are using a + `DistributionStrategy` and are in a cross-tower context. If in a + tower context, use `apply_gradients()` as normal. + + Args: + distribution: A `DistributionStrategy` object. + grads_and_vars: List of (gradient, variable) pairs as returned by + `compute_gradients()`, and then aggregated across towers. + global_step: Optional (mirrored) `Variable` to increment by one + after the variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the `Optimizer` constructor. + + Returns: + An `Operation` that applies the specified gradients across all + towers. If `global_step` was not None, that operation also + increments `global_step`. + """ + reduced_grads = distribution.batch_reduce("sum", grads_and_vars) + var_list = [v for _, v in grads_and_vars] + grads_and_vars = zip(reduced_grads, var_list) + # Note that this is called in a cross-tower context. + self._create_slots(var_list) + + def update(v, g): + """Apply gradients to a replica variable.""" + assert v is not None + + try: + # Convert the grad to Tensor or IndexedSlices if necessary. + g = ops.convert_to_tensor_or_indexed_slices(g) + except TypeError: + raise TypeError("Gradient must be convertible to a Tensor" + " or IndexedSlices, or None: %s" % g) + if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): + raise TypeError( + "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) + p = _get_processor(v) + + scope_name = "" if context.executing_eagerly() else v.op.name + # device_policy is set because non-mirrored tensors will be read in + # `update_op`. `_resource_apply_dense`, `lr_t`, `beta1_t` and `beta2_t` + # is an example. + with ops.name_scope( + "update_" + scope_name), context.context().device_policy( + context.DEVICE_PLACEMENT_SILENT): + return p.update_op(self, g) + + with ops.name_scope(name, self._name) as name: + self._prepare() + + update_ops = [ + op + for grad, var in grads_and_vars + for op in distribution.unwrap(distribution.update(var, update, grad)) + ] + + def finish(self, update_ops): + return self._finish(update_ops, "update") + + non_slot_devices = distribution.non_slot_devices(var_list) + # Device policy is needed because hyperparameter tensors (such as + # AdamOptimizer's beta1_t) need to be copied across devices in Eager. + with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): + finish_updates = distribution.update_non_slot( + non_slot_devices, finish, self, update_ops) + if global_step is None: + apply_updates = distribution.group(finish_updates, name=name) + else: + with ops.control_dependencies(distribution.unwrap(finish_updates)): + apply_updates = distribution.group(distribution.update( + global_step, state_ops.assign_add, 1, name=name)) + + if not context.executing_eagerly(): + if isinstance(apply_updates, ops.Tensor): + apply_updates = apply_updates.op + train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + if apply_updates not in train_op: + train_op.append(apply_updates) + + return apply_updates + def get_slot(self, var, name): """Return a slot named `name` created for `var` by the Optimizer. @@ -599,9 +745,25 @@ class Optimizer( Returns: The `Variable` for the slot if it was created, `None` otherwise. """ + # pylint: disable=protected-access named_slots = self._slots.get(name, None) if not named_slots: return None + + if hasattr(var, "_mirrored_container"): + # NOTE: If this isn't patched, then there is no `handle` in + # `_resource_apply_dense`. + mirrored_container = var._mirrored_container() + assert mirrored_container is not None + if context.executing_eagerly(): + key = mirrored_container._unique_id + else: + key = (mirrored_container.graph, mirrored_container._shared_name) + # pylint: enable=protected-access + mirrored_slot = named_slots.get(key, None) + if mirrored_slot is None: return None + return mirrored_slot.get(device=var.device) + return named_slots.get(_var_key(var), None) def get_slot_names(self): @@ -645,6 +807,7 @@ class Optimizer( def _create_non_slot_variable(self, initial_value, name, colocate_with): """Add an extra variable, not associated with a slot.""" + # Recommendation: Use OptimizerV2 if your optimizer uses non-slot variables. eager = context.executing_eagerly() graph = None if eager else colocate_with.graph @@ -652,7 +815,8 @@ class Optimizer( v = self._non_slot_dict.get(key, None) if v is None: self._maybe_initialize_checkpointable() - with ops.colocate_with(colocate_with): + distribution_strategy = distribute_lib.get_distribution_strategy() + with distribution_strategy.colocate_vars_with(colocate_with): if eager: restored_initial_value = self._preload_simple_restoration( name=name, shape=None) @@ -694,7 +858,13 @@ class Optimizer( return self._get_non_slot_variable(name, graph=graph) def _get_non_slot_variable(self, name, graph=None): - return self._non_slot_dict.get((name, graph), None) + non_slot = self._non_slot_dict.get((name, graph), None) + if hasattr(non_slot, "_mirrored_container"): + # This is a mirrored non-slot. In order to enable code like `_finish` + # to assign to a non-slot, return the current context replica. + return non_slot.get() + else: + return non_slot def _non_slot_variables(self): """Additional variables created by the `Optimizer`. -- GitLab From 82f2f084268d80c242596116f77a4224fc4e3a0e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 14:59:53 -0700 Subject: [PATCH 641/960] Automated g4 rollback of changelist 190801044 PiperOrigin-RevId: 190839672 --- .../optimizers/arithmetic_optimizer.cc | 49 ++++++++----------- .../optimizers/arithmetic_optimizer.h | 1 - .../optimizers/graph_optimizer_stage.cc | 4 -- .../optimizers/graph_optimizer_stage.h | 3 -- 4 files changed, 21 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 629872bf19..5dd0b6f4b0 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -196,6 +196,8 @@ void SetSourceDataType(DataType dtype, NodeDef* node) { bool IsNumberType(DataType dtype) { return kNumberTypes.Contains(dtype); } +const char kOutputShapesAttr[] = "_output_shapes"; + // Shape is symbolically defined if it has a known rank, and each dimension is // defined, or is an unknown symbol (dim.size <= -2). bool ShapeIsSymbolicallyDefined(const TensorShapeProto& shape) { @@ -232,19 +234,16 @@ bool ShapesSymbolicallyEqual(const OpInfo::TensorProperties& left, // Returns whether `reshape` is an identity op. The tensor that `reshape` // reshapes is the `output_pos`-th output of node `input`. bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, - const int output_pos, - const GraphProperties& graph_properties) { - const std::vector& reshape_props = - graph_properties.GetOutputProperties(reshape.name()); - const std::vector& input_props = - graph_properties.GetOutputProperties(input.name()); - if (reshape_props.empty() || input_props.empty() || - input_props.size() <= output_pos) { + const int output_pos) { + if (!reshape.attr().count(kOutputShapesAttr) || + !input.attr().count(kOutputShapesAttr)) { return false; } - const PartialTensorShape& src_shape = input_props[output_pos].shape(); - const PartialTensorShape& dst_shape = reshape_props[0].shape(); + PartialTensorShape src_shape( + input.attr().at(kOutputShapesAttr).list().shape(output_pos)); + PartialTensorShape dst_shape( + reshape.attr().at(kOutputShapesAttr).list().shape(0)); if (src_shape.unknown_rank() || dst_shape.unknown_rank()) { return false; } @@ -1273,8 +1272,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // outputs tensors of shape [M, N] while feeding it with tensors of shape // [M*N] (or worse). The reshape nodes are then necessary to update the // tensor metadata to the required shape. - if (can_use_shapes_ && - ReshapeIsIdentity(*reshape, *input, output_pos, *graph_properties_)) { + if (ReshapeIsIdentity(*reshape, *input, output_pos)) { return reshape->input(0); } } @@ -1588,11 +1586,11 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { std::vector> stages; - if (options_.combine_add_to_addn && can_use_shapes_) { + if (options_.combine_add_to_addn) { stages.push_back(std::unique_ptr( new AddOpsRewriteStage(ctx, ctx_ext))); } - if (options_.hoist_common_factor_out_of_aggregation && can_use_shapes_) { + if (options_.hoist_common_factor_out_of_aggregation) { stages.push_back(std::unique_ptr( new HoistCommonFactorOutOfAggregation(ctx, ctx_ext))); } @@ -1629,15 +1627,7 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { if (simplified_tensor.empty()) { for (auto& stage : stages) { if (stage->IsSupported(node)) { - const Status stage_status = - stage->TrySimplify(node, &simplified_tensor); - // Each stage must be "error safe" (just like exception safe). In - // case of any error it must leave optimized graph unmodified. - if (!stage_status.ok()) { - LOG(WARNING) << "Failed to run arithmetic optimizer stage " - << stage->stage_name() - << ". Error: " << stage_status.error_message(); - } + TF_RETURN_IF_ERROR(stage->TrySimplify(node, &simplified_tensor)); if (!simplified_tensor.empty()) { break; } @@ -1704,16 +1694,19 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, &frame_map_, &num_frames)); // Shapes are only needed in aggressive mode. graph_properties_.reset(new GraphProperties(item)); - const Status status = graph_properties_->InferStatically(false); - can_use_shapes_ = status.ok(); - if (!can_use_shapes_) { - LOG(WARNING) << "Shape inference failed."; - } + TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); + // TODO(ezhulenev): Use GraphProperties to lookup tensor shapes directly + TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_)); // Perform the optimizations. DedupComputations(); TF_RETURN_IF_ERROR(SimplifyArithmeticOps()); + // Clear output shapes. + for (int i = 0; i < optimized_graph->node_size(); ++i) { + optimized_graph_->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr); + } + return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index cdeed0554e..965f0e9ea2 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -126,7 +126,6 @@ class ArithmeticOptimizer : public GraphOptimizer { RewriterConfig::Toggle opt_level_; ArithmeticOptimizerOptions options_; - bool can_use_shapes_ = false; bool fetch_nodes_known_ = false; std::unordered_set nodes_to_preserve_; std::unique_ptr node_map_; diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc index 1ea57f7b4f..7044705ade 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc @@ -42,10 +42,6 @@ Status GetInputNode(const GraphOptimizerContext& ctx, const string& input, Status GetTensorProperties(const GraphOptimizerContext& ctx, const string& tensor, OpInfo::TensorProperties* properties) { - if (ctx.graph_properties == nullptr) { - return errors::InvalidArgument("Graph properties are unknown."); - } - int port; string tensor_node_name = ParseNodeName(tensor, &port); if (port < 0) { diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h index c7af82abbb..be95c00d2d 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h @@ -117,9 +117,6 @@ class GraphOptimizerStage { : optimizer_name_(optimizer_name), stage_name_(stage_name), ctx_(ctx) {} virtual ~GraphOptimizerStage() = default; - const string& stage_name() const { return stage_name_; } - const string& optimizer_name() const { return optimizer_name_; } - // Check if we should try to simplify node. Returning true doesn't // guarantee that node will be simplified. // -- GitLab From b0e79c1c029f8829de8fce18dc16388d89e50318 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 15:31:19 -0700 Subject: [PATCH 642/960] Refresh Community pages to surface new resources, SIGs and mailing lists. PiperOrigin-RevId: 190845545 --- tensorflow/docs_src/community/contributing.md | 64 +++++++++++++ tensorflow/docs_src/community/groups.md | 17 ++++ tensorflow/docs_src/community/index.md | 95 +++++++++++++++---- tensorflow/docs_src/community/leftnav_files | 5 +- tensorflow/docs_src/community/lists.md | 35 +++++++ tensorflow/docs_src/community/welcome.md | 71 -------------- 6 files changed, 198 insertions(+), 89 deletions(-) create mode 100644 tensorflow/docs_src/community/contributing.md create mode 100644 tensorflow/docs_src/community/groups.md create mode 100644 tensorflow/docs_src/community/lists.md delete mode 100644 tensorflow/docs_src/community/welcome.md diff --git a/tensorflow/docs_src/community/contributing.md b/tensorflow/docs_src/community/contributing.md new file mode 100644 index 0000000000..b0960df435 --- /dev/null +++ b/tensorflow/docs_src/community/contributing.md @@ -0,0 +1,64 @@ +# Contributing to TensorFlow + +TensorFlow is an open-source project, and we welcome your participation +and contribution. This page describes how to get involved. + +## Repositories + +The code for TensorFlow is hosted in the [TensorFlow GitHub +organization](https://github.com/tensorflow). Multiple projects are located +inside the organization, including: + +* [TensorFlow](https://github.com/tensorflow/tensorflow) +* [Models](https://github.com/tensorflow/models) +* [TensorBoard](https://github.com/tensorflow/tensorboard) +* [TensorFlow.js](https://github.com/tensorflow/tfjs) +* [TensorFlow Serving](https://github.com/tensorflow/serving) +* [TensorFlow Documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/docs_src) + +## Contributor checklist + +* Before contributing to TensorFlow source code, please review the [contribution +guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md). + +* Join the +[developers@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/developers) +mailing list, to coordinate and discuss with others contributing to TensorFlow. + +* For coding style conventions, read the @{$style_guide$TensorFlow Style Guide}. + +* Finally, review @{$documentation$Writing TensorFlow Documentation}, which + explains documentation conventions. + +You may also wish to review our guide to @{$benchmarks$defining and running benchmarks}. + +## Special Interest Groups + +To enable focused collaboration on particular areas of TensorFlow, we host +Special Interest Groups (SIGs). SIGs do their work in public: if you want to +join and contribute, review the work of the group, and get in touch with the +relevant SIG leader. + +* **SIG Build** focuses on issues surrounding building, packaging, and + distribution of TensorFlow. [Mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/build). + +* **SIG TensorBoard** furthers the development and direction of TensorBoard and its plugins. + [Mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/tensorboard). + +* **SIG Rust** collaborates on the development of TensorFlow's Rust bindings. + [Mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/rust). + +## Projects developed by the TensorFlow community + +The TensorFlow community has created many great projects around TensorFlow, including: + +* [Machine Learning with TensorFlow (Book & Code)](http://tensorflowbook.com) +* [@jtoy's awesome "Awesome TensorFlow" list of awesome things](https://github.com/jtoy/awesome-tensorflow) +* [TensorFlow tutorials](https://github.com/pkmital/tensorflow_tutorials) +* [Caffe to TensorFlow model converter](https://github.com/ethereon/caffe-tensorflow) +* [Bitfusion's` GPU-enabled AWS EC2 TensorFlow AMI](https://github.com/bitfusionio/amis/tree/master/awsmrkt-bfboost-ubuntu14-cuda75-tensorflow) ([Launch AMI](https://aws.amazon.com/marketplace/pp/B01EYKBEQ0)) +* [Operator Vectorization Library](https://github.com/opveclib/opveclib) +* [Swift language bindings](https://github.com/PerfectlySoft/Perfect-TensorFlow) +* [Sublime Tensorflow - A plugin for Sublime Text](https://github.com/baptisteArnaud/Sublime-Tensorflow) +* [GPflow - Gaussian processes in TensorFlow](https://github.com/GPflow/GPflow) +* [CS 20SI: Tensorflow for Deep Learning Research](https://web.stanford.edu/class/cs20si/) - please note, this course was designed with TensorFlow v0.12, so some of the notes may be out of date - but it's still a great resource. diff --git a/tensorflow/docs_src/community/groups.md b/tensorflow/docs_src/community/groups.md new file mode 100644 index 0000000000..d92f5775fa --- /dev/null +++ b/tensorflow/docs_src/community/groups.md @@ -0,0 +1,17 @@ +# User Groups + +TensorFlow has communities around the world. + +## Asia + +* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ +* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ +* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ +* [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) + + +## Europe + +* [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/) +* [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/) + diff --git a/tensorflow/docs_src/community/index.md b/tensorflow/docs_src/community/index.md index ebeff8493b..c08aeb7a97 100644 --- a/tensorflow/docs_src/community/index.md +++ b/tensorflow/docs_src/community/index.md @@ -1,18 +1,81 @@ # Community -This section contains the following documents: - - * @{$welcome$Welcome to the TensorFlow Community}, which explains how - you can get involved, where to report issues, and where to join - like-minded TensorFlow enthusiasts online. - * @{$roadmap$Roadmap}, which summarizes upcoming additions to TensorFlow. - * @{$documentation$Writing TensorFlow Documentation}, which explains - TensorFlow's documentation conventions. If you are modifying - TensorFlow source code or documentation, please read this guide. - * @{$style_guide$TensorFlow Style Guide}, which identifies coding style - conventions that TensorFlow developers and users should follow. - * @{$community/benchmarks$Benchmarks}, Benchmarks, a guide for defining and - running a TensorFlow benchmark. - * @{$security$Using TensorFlow Securely}, which explains TensorFlow's security - model, a list of recent security reports, and information on how you can - report a security vulnerability to the TensorFlow team. +Welcome to the TensorFlow community! This page explains where to get help, and +different ways to be part of the community. We are committed to fostering an +open and welcoming environment, and request that you review our [code of +conduct](https://github.com/tensorflow/tensorflow/blob/master/CODE_OF_CONDUCT.md). + +## Get Help + +### Technical Questions + +To ask or answer technical questions about TensorFlow, use [Stack +Overflow](https://stackoverflow.com/questions/tagged/tensorflow). For example, +ask or search about a particular error message you encountered during +installation. + +### Bugs and Feature Requests + +To report bugs or make feature requests, file an issue on GitHub. Please choose +the appropriate repository for the project. Major repositories include: + + * [TensorFlow](https://github.com/tensorflow/tensorflow/issues) + * [TensorBoard](https://github.com/tensorflow/tensorboard/issues) + * [TensorFlow models](https://github.com/tensorflow/models/issues) + +### Security + +Before using TensorFlow, please take a look at our security model, list of +recent security announcements, and ways you can report security issues to the +TensorFlow team at the +[Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) page on GitHub. + +## Stay Informed + +### Announcements Mailing List + +All major releases and important announcements are sent to +[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). +We recommend that you join this list if you depend on TensorFlow in any way. + +### Development Roadmap + +The @{$roadmap$Roadmap} summarizes plans for upcoming additions to TensorFlow. + +### Social Media + +For news and updates from around the universe of TensorFlow projects, follow +[@tensorflow](https://twitter.com/tensorflow) on Twitter. + +### YouTube + +Our [YouTube Channel](http://youtube.com/tensorflow/) focuses on machine learing +and AI with TensorFlow. On it we have a number of new shows, including: + +- TensorFlow Meets: meet with community contributors to learn and share what they're doing +- Ask TensorFlow: the team answers the best questions tagged #AskTensorFlow from social media +- Coding TensorFlow: short bites with tips for success with TensorFlow + + +## Community Support + +### Mailing Lists + +For general discussion about TensorFlow development and direction, please join +the [TensorFlow discuss mailing +list](https://groups.google.com/a/tensorflow.org/d/forum/discuss). + +A number of other mailing lists exist, focused on different project areas, which +can be found at @{$lists$TensorFlow Mailing Lists}. + +### User Groups + +To meet with like-minded people local to you, check out the many +@{$groups$TensorFlow user groups} around the world. + + +## Contributing To TensorFlow + +We welcome contributions and collaboration on TensorFlow. For more information, +please read [Contributing to TensorFlow](contributing.md). + diff --git a/tensorflow/docs_src/community/leftnav_files b/tensorflow/docs_src/community/leftnav_files index af344506c7..0bd1f14de9 100644 --- a/tensorflow/docs_src/community/leftnav_files +++ b/tensorflow/docs_src/community/leftnav_files @@ -1,7 +1,8 @@ index.md -welcome.md roadmap.md +contributing.md +lists.md +groups.md documentation.md style_guide.md benchmarks.md -security.md diff --git a/tensorflow/docs_src/community/lists.md b/tensorflow/docs_src/community/lists.md new file mode 100644 index 0000000000..dc9240030e --- /dev/null +++ b/tensorflow/docs_src/community/lists.md @@ -0,0 +1,35 @@ +# Mailing Lists + +As a community, we do much of our collaboration on public mailing lists. +Please note that if you're looking for help using TensorFlow, [Stack +Overflow](https://stackoverflow.com/questions/tagged/tensorflow) and +[GitHub issues](https://github.com/tensorflow/tensorflow/issues) +are the best initial places to look. For more information, +see [how to get help](/community/#get_help). + +## General TensorFlow lists + +* [announce](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce) - Low-volume announcements of new releases. +* [discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) - General community discussion around TensorFlow. +* [developers](https://groups.google.com/a/tensorflow.org/forum/#!forum/developers) - Discussion for developers contributing to TensorFlow. + +## Project-specific lists + +These projects inside the TensorFlow GitHub organization have lists dedicated to their communities: + +* [tensor2tensor](https://groups.google.com/forum/#!forum/tensor2tensor) - User + and peer support for Tensor2Tensor. + +## Special Interest Groups + +TensorFlow's [Special Interest +Groups](/community/contributing#special_interest_groups) (SIGs) support +community collaboration on particular project focuses. Members of these groups +work together to build and support TensorFlow related projects. + +* [build](https://groups.google.com/a/tensorflow.org/forum/#!forum/build) - + Supporting SIG Build, for build, distribution and packaging of TensorFlow. +* [tensorboard](https://groups.google.com/a/tensorflow.org/forum/#!forum/tensorboard) - + Supporting SIG TensorBoard, for plugin development and other contribution. +* [rust](https://groups.google.com/a/tensorflow.org/forum/#!forum/rust) - + Supporting SIG Rust, for the Rust language bindings. diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md deleted file mode 100644 index 6d0458e678..0000000000 --- a/tensorflow/docs_src/community/welcome.md +++ /dev/null @@ -1,71 +0,0 @@ -# Welcome to the TensorFlow Community - -TensorFlow is an open-source project. This page explains how to contribute, -where to ask questions, and how to help each other. - - -## Development - -The source code for TensorFlow is on -[GitHub](https://github.com/tensorflow/tensorflow). - -Before contributing to TensorFlow source code, please review the -[Contribution guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md). - -### Projects developed by the TensorFlow community - -The TensorFlow community has created many great projects around TensorFlow, including: - -* [Machine Learning with TensorFlow (Book & Code)](http://tensorflowbook.com) -* [@jtoy's awesome "Awesome TensorFlow" list of awesome things](https://github.com/jtoy/awesome-tensorflow) -* [TensorFlow tutorials](https://github.com/pkmital/tensorflow_tutorials) -* [Caffe to TensorFlow model converter](https://github.com/ethereon/caffe-tensorflow) -* [Bitfusion's` GPU-enabled AWS EC2 TensorFlow AMI](https://github.com/bitfusionio/amis/tree/master/awsmrkt-bfboost-ubuntu14-cuda75-tensorflow) ([Launch AMI](https://aws.amazon.com/marketplace/pp/B01EYKBEQ0)) -* [Rust language bindings](https://github.com/google/tensorflow-rust) -* [Operator Vectorization Library](https://github.com/opveclib/opveclib) -* [Swift language bindings](https://github.com/PerfectlySoft/Perfect-TensorFlow) -* [Sublime Tensorflow - A plugin for Sublime Text](https://github.com/baptisteArnaud/Sublime-Tensorflow) -* [Edward - A library for probabilistic modeling, inference, and criticism](http://edwardlib.org) ([Github](https://github.com/blei-lab/edward), [Forum](https://discourse.edwardlib.org)) -* [GPflow - Gaussian processes in TensorFlow](https://github.com/GPflow/GPflow) -* [CS 20SI: Tensorflow for Deep Learning Research](https://web.stanford.edu/class/cs20si/) - Please note, this course was designed with TensorFlow v0.12, so some of the notes may be out of date - but it's still a great resource. - -## TensorFlow Communities Around the World - -Asia: - -* [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ -* [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ -* [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ -* [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) - - -Europe: - -* [TensorFlow Barcelona](https://www.meetup.com/Barcelona-Machine-Learning-Meetup/) -* [TensorFlow Madrid](https://www.meetup.com/TensorFlow-Madrid/) - - - -## Support - -TensorFlow provides multiple communication paths. To pick the right path, -please read the following list carefully: - - * For new release announcements and security updates, subscribe to - [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). - * To ask or answer technical questions about TensorFlow, use - [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). - For example, ask or search Stack Overflow about a particular error message - you encountered during installation. - * To join general discussions about TensorFlow development and directions, - please join the - [TensorFlow discuss mailing list](https://groups.google.com/a/tensorflow.org/d/forum/discuss). - For example, use this mailing list to learn about new features in - upcoming releases of TensorFlow. - * To report bugs or make feature requests, use the - [TensorFlow issues tracker](https://github.com/tensorflow/tensorflow/issues) - on GitHub. For example, use the issue tracker to request a - new operation in TensorFlow. - * To report vulnerabilities, please follow our - [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). - -- GitLab From 6cb3e6e0988a7bd123e683c13dae8470c71822af Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Mar 2018 15:54:31 -0700 Subject: [PATCH 643/960] [tf.data] Expose the symbol `tf.contrib.data.make_csv_dataset()`. PiperOrigin-RevId: 190849333 --- tensorflow/contrib/data/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 7c3a9f82ff..17048314a4 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -32,6 +32,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@group_by_window @@ignore_errors @@make_batched_features_dataset +@@make_csv_dataset @@make_saveable_from_iterator @@map_and_batch @@padded_batch_and_drop_remainder @@ -70,6 +71,7 @@ from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device from tensorflow.contrib.data.python.ops.readers import make_batched_features_dataset +from tensorflow.contrib.data.python.ops.readers import make_csv_dataset from tensorflow.contrib.data.python.ops.readers import read_batch_features from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample -- GitLab From 830c19c3f20816dcb5e8e9b6cb51f63cf8461442 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 16:12:51 -0700 Subject: [PATCH 644/960] Add IsSquare bool to the grappler op_types. PiperOrigin-RevId: 190852501 --- tensorflow/core/grappler/op_types.cc | 2 ++ tensorflow/core/grappler/op_types.h | 1 + 2 files changed, 3 insertions(+) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 1a6751befc..c31ac9b59c 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -309,6 +309,8 @@ bool IsSplitV(const NodeDef& node) { return node.op() == "SplitV"; } bool IsSqrtGrad(const NodeDef& node) { return node.op() == "SqrtGrad"; } +bool IsSquare(const NodeDef& node) { return node.op() == "Square"; } + bool IsSquaredDifference(const NodeDef& node) { return node.op() == "SquaredDifference"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 1ec1cd46e3..39affcbc24 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -121,6 +121,7 @@ bool IsSoftsignGrad(const NodeDef& node); bool IsSplit(const NodeDef& node); bool IsSplitV(const NodeDef& node); bool IsSqrtGrad(const NodeDef& node); +bool IsSquare(const NodeDef& node); bool IsSquaredDifference(const NodeDef& node); bool IsSqueeze(const NodeDef& node); bool IsStackOp(const NodeDef& node); -- GitLab From 390e19ab990f5656e09d98624c92b3c80e52937d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 16:16:48 -0700 Subject: [PATCH 645/960] Tower-local variable support for DistributionStrategy. Each tower has its own variable, but fetch() and checkpoint apply a reduction to get a single value. PiperOrigin-RevId: 190853123 --- tensorflow/python/training/distribute.py | 59 +++++++++++++++++++++--- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 757ba71c4a..f98872775a 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -126,16 +126,18 @@ class UpdateContext(object): def get_tower_context(): - """Returns the current TowerContext or None. + """Returns the current TowerContext or None if in a cross-tower context. Note that execution: - 1. starts in the default (single-tower) tower context; - 2. switches to cross-tower context when entering a - `with DistributionStrategy.scope():` block; + 1. starts in the default (single-tower) tower context (this function + will return the default TowerContext object); + 2. switches to cross-tower context (in which case this will return + None) when entering a `with DistributionStrategy.scope():` block; 3. switches to a (non-default) tower context inside `call_for_each_tower(fn, ...)`; 4. if `fn` calls `get_tower_context()->merge_call(merge_fn, ...)`, then - inside `merge_fn` you are back in the cross-tower context. + inside `merge_fn` you are back in the cross-tower context (and again + this function will return None). Note that you can also go directly from step 1 to 4 to switch to a cross-tower context for the default `DistributionStrategy`. You may @@ -188,6 +190,9 @@ def get_cross_tower_context(): def get_distribution_strategy(): """Returns the current `DistributionStrategy` object. + Prefer to use `get_tower_context()` or `get_cross_tower_context()` + instead when possible. + Returns: A `DistributionStrategy` object. Inside a `with distribution_strategy.scope()` block, it returns @@ -526,7 +531,6 @@ class DistributionStrategy(object): # TODO(josh11b): ClusterSpec/ClusterResolver # TODO(josh11b): Partitioned computations, state; sharding # TODO(josh11b): Model parallelism: "towers" with multiple devices; shuffling - # TODO(josh11b): Tower-local variables # TODO(josh11b): List of towers with their worker and parameter devices # (where the parameter devices may overlap in the ps case). @@ -556,6 +560,43 @@ class DistributionStrategy(object): # Note: should support "colocate_with" argument. raise NotImplementedError("must be implemented in descendants") + def tower_local_var_scope(self, reduce_method): + """Inside this scope, new variables will not be mirrored. + + There will still be one component variable per tower, but there is + no requirement that they stay in sync. Instead, when saving them + or calling `fetch()`, we use the value that results when calling + `reduce()` on all the towers' variables. + + Note: tower-local implies not trainable. Instead, it is expected + that each tower will directly update (using `assign_add()` or + whatever) its local variable instance but only the aggregated + value (accessible using `fetch()`) will be exported from the + model. When it is acceptable to only aggregate on export, we + greatly reduce communication overhead by using tower-local + variables. + + Note: All component variables will be initialized to the same + value, using the initialization expression from the first tower. + The values will match even if the initialization expression uses + random numbers. + + Args: + reduce_method: String used as a `method_string` to `reduce()` + to get the value to save when checkpointing. + + Returns: + A context manager. + """ + def create_tower_local_variable(next_creator, *args, **kwargs): + _require_distribution_strategy_scope(self) + kwargs["use_resource"] = True + kwargs["tower_local_reduce_method"] = reduce_method + return next_creator(*args, **kwargs) + + _require_distribution_strategy_scope(self) + return variable_scope.variable_creator_scope(create_tower_local_variable) + def colocate_vars_with(self, colocate_with_variable): """Scope that controls which devices variables will be created on. @@ -984,6 +1025,10 @@ class TowerContext(object): finally: _pop_per_thread_mode() + def tower_local_var_scope(self, reduce_method): + """Alias for distribution_strategy.tower_local_var_scope().""" + return self._distribution_strategy.tower_local_var_scope(reduce_method) + @property def is_single_tower(self): """Returns whether there is a single tower or multiple.""" @@ -1030,6 +1075,8 @@ class _DefaultDistributionStrategy(DistributionStrategy): def creator(next_creator, *args, **kwargs): _require_distribution_strategy_scope(self) + if kwargs.pop("tower_local_reduce_method", None) is not None: + kwargs["trainable"] = False return next_creator(*args, **kwargs) return _CurrentDistributionContext( -- GitLab From 108178da2a20ea2d3899417ee932d46ba1a5c652 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Mar 2018 16:52:39 -0700 Subject: [PATCH 646/960] Automated g4 rollback of changelist 190835392 PiperOrigin-RevId: 190858242 --- RELEASE.md | 60 --- configure.py | 2 +- tensorflow/BUILD | 7 - tensorflow/contrib/BUILD | 27 +- .../boosted_trees/kernels/quantile_ops.cc | 2 +- .../boosted_trees/lib/utils/batch_features.cc | 2 +- .../lib/utils/batch_features_test.cc | 2 +- .../boosted_trees/lib/utils/dropout_utils.cc | 2 +- .../boosted_trees/lib/utils/dropout_utils.h | 2 +- .../lib/utils/sparse_column_iterable_test.cc | 2 +- .../boosted_trees/proto/tree_config.proto | 2 +- .../kernel_tests/prediction_ops_test.py | 10 +- .../python/kernel_tests/quantile_ops_test.py | 2 +- .../boosted_trees/python/ops/quantile_ops.py | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 3 - .../kernel_tests/batch_dataset_op_test.py | 14 - tensorflow/contrib/eager/python/BUILD | 6 +- .../eager/python/examples/spinn/spinn_test.py | 1 + .../python/estimator/replicate_model_fn.py | 2 +- .../factorization/kernels/clustering_ops.cc | 2 +- .../python/ops/factorization_ops.py | 14 +- .../python/ops/factorization_ops_test.py | 12 +- .../factorization/python/ops/gmm_ops.py | 4 +- .../factorization/python/ops/gmm_test.py | 2 +- .../factorization/python/ops/kmeans_test.py | 4 +- .../contrib/factorization/python/ops/wals.py | 2 +- tensorflow/contrib/learn/BUILD | 1 - .../learn/python/learn/estimators/linear.py | 4 +- .../linear_optimizer/python/sdca_estimator.py | 4 +- tensorflow/contrib/lite/README.md | 3 - tensorflow/contrib/lite/builtin_ops.h | 1 - tensorflow/contrib/lite/g3doc/models.md | 2 +- tensorflow/contrib/lite/kernels/BUILD | 13 - .../internal/reference/reference_ops.h | 25 -- tensorflow/contrib/lite/kernels/maximum.cc | 106 ----- .../contrib/lite/kernels/maximum_test.cc | 81 ---- tensorflow/contrib/lite/kernels/register.cc | 2 - tensorflow/contrib/lite/model.cc | 3 - tensorflow/contrib/lite/nnapi_delegate.cc | 1 - tensorflow/contrib/lite/python/lite.py | 22 +- tensorflow/contrib/lite/schema/schema.fbs | 5 - .../contrib/lite/schema/schema_generated.h | 124 +----- tensorflow/contrib/lite/testing/BUILD | 1 - .../contrib/lite/testing/generate_examples.py | 36 -- .../testing/generated_examples_zip_test.cc | 1 - .../contrib/lite/toco/tflite/operator.cc | 2 - .../contrib/lite/toco/tflite/operator_test.cc | 2 - tensorflow/contrib/lookup/lookup_ops.py | 2 +- .../contrib/makefile/download_dependencies.sh | 2 +- tensorflow/contrib/makefile/tf_op_files.txt | 1 - .../seq2seq/kernels/beam_search_ops.cc | 2 +- .../seq2seq/python/ops/attention_wrapper.py | 8 +- .../seq2seq/python/ops/beam_search_decoder.py | 6 +- .../slim/python/slim/data/parallel_reader.py | 4 +- .../slim/python/slim/data/prefetch_queue.py | 4 +- .../python/slim/data/tfexample_decoder.py | 2 +- tensorflow/contrib/tensorrt/README.md | 46 +-- .../contrib/tensorrt/convert/convert_graph.cc | 20 +- .../contrib/tensorrt/convert/convert_nodes.cc | 375 ++++++++---------- .../contrib/tensorrt/segment/segment.cc | 55 +-- tensorflow/contrib/tensorrt/segment/segment.h | 4 +- .../contrib/tensorrt/segment/segment_test.cc | 8 +- .../timeseries/python/timeseries/ar_model.py | 2 +- .../python/timeseries/math_utils.py | 2 +- .../timeseries/state_space_models/varma.py | 4 +- .../base_api/api_def_MatrixSolveLs.pbtxt | 6 +- .../core/common_runtime/mkl_cpu_allocator.cc | 3 + tensorflow/core/framework/common_shape_fns.cc | 4 +- tensorflow/core/framework/common_shape_fns.h | 8 +- tensorflow/core/framework/shape_inference.h | 1 - .../core/kernels/mkl_fused_batch_norm_op.cc | 2 +- .../core/kernels/segment_reduction_ops.h | 7 - tensorflow/core/kernels/snapshot_op.cc | 30 -- tensorflow/core/kernels/snapshot_op.h | 26 +- tensorflow/core/kernels/snapshot_op_gpu.cu.cc | 9 +- tensorflow/core/kernels/xent_op.cc | 65 +-- tensorflow/core/kernels/xent_op.h | 35 +- tensorflow/core/kernels/xent_op_gpu.cu.cc | 9 +- tensorflow/core/ops/array_ops.cc | 26 +- tensorflow/core/ops/nn_ops.cc | 23 +- tensorflow/core/ops/nn_ops_test.cc | 16 +- tensorflow/core/public/version.h | 4 +- .../python/contrib.bayesflow.monte_carlo.md | 36 +- .../api_guides/python/contrib.losses.md | 28 +- .../docs_src/community/documentation.md | 38 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 22 +- tensorflow/docs_src/install/install_mac.md | 14 +- .../docs_src/install/install_sources.md | 9 +- tensorflow/docs_src/mobile/optimizing.md | 2 - tensorflow/docs_src/mobile/prepare_models.md | 2 +- tensorflow/python/BUILD | 2 +- .../python/kernel_tests/array_ops_test.py | 26 +- tensorflow/python/kernel_tests/testdata/BUILD | 2 +- .../python/kernel_tests/xent_op_test.py | 81 +--- tensorflow/python/layers/convolutional.py | 2 - .../python/layers/convolutional_test.py | 6 - tensorflow/python/ops/linalg_ops.py | 2 +- .../python/training/monitored_session.py | 33 +- .../python/training/monitored_session_test.py | 36 -- tensorflow/tensorflow.bzl | 4 +- .../tools/api/golden/tensorflow.train.pbtxt | 2 +- .../tools/ci_build/osx/libtensorflow_cpu.sh | 2 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/lib_package/BUILD | 2 + tensorflow/tools/pip_package/BUILD | 1 + tensorflow/tools/pip_package/setup.py | 6 +- tensorflow/workspace.bzl | 133 +++---- third_party/mkl/BUILD | 46 +-- third_party/mkl/MKL_LICENSE | 201 ---------- third_party/mkl/build_defs.bzl | 12 - third_party/mkl/mkl.BUILD | 27 +- 116 files changed, 556 insertions(+), 1703 deletions(-) delete mode 100644 tensorflow/contrib/lite/kernels/maximum.cc delete mode 100644 tensorflow/contrib/lite/kernels/maximum_test.cc delete mode 100644 third_party/mkl/MKL_LICENSE diff --git a/RELEASE.md b/RELEASE.md index c63d9f20c9..6f54dee58f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,63 +1,3 @@ -# Release 1.7.0 - -## Major Features And Improvements -* Eager mode is moving out of contrib, try `tf.enable_eager_execution()`. -* Graph rewrites emulating fixed-point quantization compatible with TensorFlow Lite, supported by new `tf.contrib.quantize` package. -* Easily customize gradient computation with `tf.custom_gradient`. -* [TensorBoard Debugger Plugin](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md), the graphical user interface (GUI) of TensorFlow Debugger (tfdbg), is now in alpha. -* Experimental support for reading a sqlite database as a `Dataset` with new `tf.contrib.data.SqlDataset`. -* Distributed Mutex / CriticalSection added to `tf.contrib.framework.CriticalSection`. -* Better text processing with `tf.regex_replace`. -* Easy, efficient sequence input with `tf.contrib.data.bucket_by_sequence_length` - -## Bug Fixes and Other Changes -* Accelerated Linear Algebra (XLA): - * Add `MaxPoolGradGrad` support for XLA - * CSE pass from Tensorflow is now disabled in XLA. -* `tf.data`: - * `tf.data.Dataset` - * Add support for building C++ Dataset op kernels as external libraries, using the `tf.load_op_library()` mechanism. - * `Dataset.list_files()` now shuffles its output by default. - * `Dataset.shuffle(..., seed=tf.constant(0, dtype=tf.int64))` now yields the same sequence of elements as `Dataset.shuffle(..., seed=0)`. - * Add `num_parallel_reads` argument to `tf.data.TFRecordDataset`. -* `tf.contrib`: - * `tf.contrib.bayesflow.halton_sequence` now supports randomization. - * Add support for scalars in `tf.contrib.all_reduce`. - * Add `effective_sample_size` to `tf.contrib.bayesflow.mcmc_diagnostics`. - * Add `potential_scale_reduction` to `tf.contrib.bayesflow.mcmc_diagnostics`. - * Add `BatchNormalization`, `Kumaraswamy` bijectors. - * Deprecate `tf.contrib.learn`. Please check contrib/learn/README.md for instructions on how to convert existing code. - * `tf.contrib.data` - * Remove deprecated `tf.contrib.data.Dataset`, `tf.contrib.data.Iterator`, `tf.contrib.data.FixedLengthRecordDataset`, `tf.contrib.data.TextLineDataset`, and `tf.contrib.data.TFRecordDataset` classes. - * Added `bucket_by_sequence_length`, `sliding_window_batch`, and `make_batched_features_dataset` - * Remove unmaintained `tf.contrib.ndlstm`. You can find it externally at https://github.com/tmbarchive/tfndlstm. - * Moved most of `tf.contrib.bayesflow` to its own repo: `tfp` -* Other: - * tf.py_func now reports the full stack trace if an exception occurs. - * Integrate `TPUClusterResolver` with GKE's integration for Cloud TPUs. - * Add a library for statistical testing of samplers. - * Add Helpers to stream data from the GCE VM to a Cloud TPU. - * Integrate ClusterResolvers with TPUEstimator. - * Unify metropolis_hastings interface with HMC kernel. - * Move LIBXSMM convolutions to a separate --define flag so that they are disabled by default. - * Fix `MomentumOptimizer` lambda. - * Reduce `tfp.layers` boilerplate via programmable docstrings. - * Add `auc_with_confidence_intervals`, a method for computing the AUC and confidence interval with linearithmic time complexity. - * `regression_head` now accepts customized link function, to satisfy the usage that user can define their own link function if the `array_ops.identity` does not meet the requirement. - * Fix `initialized_value` and `initial_value` behaviors for `ResourceVariables` created from `VariableDef` protos. - * Add TensorSpec to represent the specification of Tensors. - * Constant folding pass is now deterministic. - * Support `float16` `dtype` in `tf.linalg.*`. - * Add `tf.estimator.export.TensorServingInputReceiver` that allows `tf.estimator.Estimator.export_savedmodel` to pass raw tensors to model functions. - -## Thanks to our Contributors - -This release contains contributions from many people at Google, as well as: - -4d55397500, Abe, Alistair Low, Andy Kernahan, Appledore, Ben, Ben Barsdell, Boris Pfahringer, Brad Wannow, Brett Koonce, Carl Thomé, cclauss, Chengzhi Chen, Chris Drake, Christopher Yeh, Clayne Robison, Codrut Grosu, Daniel Trebbien, Danny Goodman, David Goodwin, David Norman, Deron Eriksson, Donggeon Lim, Donny Viszneki, DosLin, DylanDmitri, Francisco Guerrero, Fred Reiss, gdh1995, Giuseppe, Glenn Weidner, gracehoney, Guozhong Zhuang, Haichen "Hc" Li, Harald Husum, harumitsu.nobuta, Henry Spivey, hsm207, Jekyll Song, Jerome, Jiongyan Zhang, jjsjann123, John Sungjin Park, Johnson145, JoshVarty, Julian Wolff, Jun Wang, June-One, Kamil Sindi, Kb Sriram, Kdavis-Mozilla, Kenji, lazypanda1, Liang-Chi Hsieh, Loo Rong Jie, Mahesh Bhosale, MandarJKulkarni, ManHyuk, Marcus Ong, Marshal Hayes, Martin Pool, matthieudelaro, mdfaijul, mholzel, Michael Zhou, Ming Li, Minmin Sun, Myungjoo Ham, MyungsungKwak, Naman Kamra, Peng Yu, Penghao Cen, Phil, Raghuraman-K, resec, Rohin Mohanadas, Sandeep N Gupta, Scott Tseng, seaotterman, Seo Sanghyeon, Sergei Lebedev, Ted Chang, terrytangyuan, Tim H, tkunic, Tod, vihanjain, Yan Facai (颜发才), Yin Li, Yong Tang, Yukun Chen, Yusuke Yamada - - - # Release 1.6.0 ## Breaking Changes diff --git a/configure.py b/configure.py index 0f52c0ec99..22b9abedd7 100644 --- a/configure.py +++ b/configure.py @@ -1414,7 +1414,7 @@ def main(): set_build_var(environ_cp, 'TF_NEED_S3', 'Amazon S3 File System', 'with_s3_support', True, 's3') set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', - 'with_kafka_support', True, 'kafka') + 'with_kafka_support', False, 'kafka') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 29a01efc84..6ab43638ba 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -240,13 +240,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "with_kafka_support_windows_override", - define_values = {"with_kafka_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - config_setting( name = "with_gcp_support_android_override", define_values = {"with_gcp_support": "true"}, diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index fb81b50fe8..bdbd738906 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -51,6 +51,7 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", + "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -109,13 +110,7 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", - ]) + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ - "//tensorflow/contrib/kafka", - ], - "//conditions:default": [], - }), + ]), ) cc_library( @@ -125,6 +120,7 @@ cc_library( "//tensorflow/contrib/boosted_trees:boosted_trees_kernels", "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", + "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -137,13 +133,7 @@ cc_library( "//tensorflow/contrib/text:all_kernels", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([ "//tensorflow/contrib/nccl:nccl_kernels", - ]) + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ - "//tensorflow/contrib/kafka:dataset_kernels", - ], - "//conditions:default": [], - }), + ]), ) cc_library( @@ -156,6 +146,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", @@ -166,13 +157,7 @@ cc_library( "//tensorflow/contrib/tensor_forest:tensor_forest_ops_op_lib", "//tensorflow/contrib/text:all_ops", "//tensorflow/contrib/tpu:all_ops", - ] + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ - "//tensorflow/contrib/kafka:dataset_ops_op_lib", - ], - "//conditions:default": [], - }), + ], ) filegroup( diff --git a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc index 0b28f81e7c..0f4c2298f5 100644 --- a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc @@ -253,7 +253,7 @@ class CreateQuantileAccumulatorOp : public OpKernel { private: float epsilon_; int32 num_quantiles_; - // An upper bound on the number of entries that the summaries might have + // An upperbound on the number of enteries that the summaries might have // for a feature. int64 max_elements_; bool generate_quantiles_; diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc index 35b059f349..cf4f9a097a 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.cc @@ -54,7 +54,7 @@ Status BatchFeatures::Initialize( TF_CHECK_AND_RETURN_IF_ERROR( dense_float_feature.dim_size(1) == 1, errors::InvalidArgument( - "Dense float features may not be multivalent: dim_size(1) = ", + "Dense float features may not be multi-valent: dim_size(1) = ", dense_float_feature.dim_size(1))); dense_float_feature_columns_.emplace_back(dense_float_feature); } diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc b/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc index cfe9101e74..609519e8b1 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features_test.cc @@ -59,7 +59,7 @@ TEST_F(BatchFeaturesTest, DenseFloatFeatures_Multivalent) { BatchFeatures batch_features(1); auto dense_vec = AsTensor({3.0f, 7.0f}, {1, 2}); auto expected_error = InvalidArgument( - "Dense float features may not be multivalent: dim_size(1) = 2"); + "Dense float features may not be multi-valent: dim_size(1) = 2"); EXPECT_EQ(expected_error, batch_features.Initialize({dense_vec}, {}, {}, {}, {}, {}, {})); } diff --git a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc index ce67db797d..db34db998a 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.cc @@ -54,7 +54,7 @@ Status DropoutUtils::DropOutTrees( if (probability_of_skipping_dropout < 0 || probability_of_skipping_dropout > 1) { return errors::InvalidArgument( - "Probability of skipping dropout must be in [0,1] range"); + "Probability of skiping dropout must be in [0,1] range"); } const auto num_trees = weights.size(); diff --git a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h index 77c16da541..928bfbfe5c 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h @@ -66,7 +66,7 @@ class DropoutUtils { // Current weights and num_updates will be updated as a result of this // func std::vector* current_weights, - // How many weight assignments have been done for each tree already. + // How many weight assignements have been done for each tree already. std::vector* num_updates); }; diff --git a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc index cc7604745e..0138aae3db 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/utils/sparse_column_iterable_test.cc @@ -34,7 +34,7 @@ TEST_F(SparseColumnIterableTest, Empty) { } TEST_F(SparseColumnIterableTest, Iterate) { - // 8 examples having 7 sparse features with the 3rd and 7th multivalent. + // 8 examples having 7 sparse features with the 3rd and 7th multi-valent. // This can be visualized like the following: // Instance | Sparse | // 0 | x | diff --git a/tensorflow/contrib/boosted_trees/proto/tree_config.proto b/tensorflow/contrib/boosted_trees/proto/tree_config.proto index 81411aa84a..4407c4d981 100644 --- a/tensorflow/contrib/boosted_trees/proto/tree_config.proto +++ b/tensorflow/contrib/boosted_trees/proto/tree_config.proto @@ -53,7 +53,7 @@ message DenseFloatBinarySplit { // Float feature column and split threshold describing // the rule feature <= threshold. int32 feature_column = 1; - // If feature column is multivalent, this holds the index of the dimension + // If feature column is multivalent, this holds the index of the dimensiong // for the split. Defaults to 0. int32 dimension_id = 5; float threshold = 2; diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index cf55759aaa..c1acf35160 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -120,8 +120,8 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): """Sets up the prediction tests. Create a batch of two examples having one dense float, two sparse float - single valued, one sparse float multidimensional and one sparse int - features. The data looks like the following: + single valued, one sparse float multidimensionl and one sparse int features. + The data looks like the following: | Instance | Dense0 | SparseF0 | SparseF1 | SparseI0 | SparseM | 0 | 7 | -3 | | 9,1 | __, 5.0 | 1 | -2 | | 4 | | 3, ___ @@ -810,7 +810,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): # building. This tree should never be dropped. num_trees = 10 with self.test_session(): - # Empty tree ensemble. + # Empty tree ensenble. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() # Add 10 trees with some weights. for i in range(0, num_trees): @@ -951,7 +951,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): def testDropOutZeroProb(self): with self.test_session(): - # Empty tree ensemble. + # Empty tree ensenble. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() # Add 1000 trees with some weights. for i in range(0, 999): @@ -994,7 +994,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): def testAveragingAllTrees(self): with self.test_session(): - # Empty tree ensemble. + # Empty tree ensenble. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() adjusted_tree_ensemble_config = ( tree_config_pb2.DecisionTreeEnsembleConfig()) diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py index 074623699d..81f58de28c 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py @@ -482,7 +482,7 @@ class QuantilesOpTest(test_util.TensorFlowTestCase): """Sets up the quantile op tests. Create a batch of 4 examples having 2 dense and 4 sparse features. - Fourth sparse feature is multivalent (3 dimensional) + Forth sparse feature is multivalent (3 dimensional) The data looks like this | Instance | Dense 0 | Dense 1 | Sparse 0 | Sparse 1 |Sparse 2| SparseM | 0 | -0.1 | -1 | -2 | 0.1 | |_ ,1,_ diff --git a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py index 1b184d296b..97d57e8b23 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/quantile_ops.py @@ -184,7 +184,7 @@ class QuantileAccumulator(saver.BaseSaverBuilder.SaveableObject): """Finalizes quantile summary stream and resets it for next iteration. Args: - stamp_token: Expected current token. + stamp_token: Exepcted current token. next_stamp_token: Next value for the token. Returns: A list of quantiles or approximate boundaries. diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 92f2ab6dea..f793877c8b 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -210,9 +210,6 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py" # Test is flaky on Windows GPU builds (b/38283730). "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/gmm_test.py" - # Disable following manual tag in BUILD. - "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py" - ) if (WIN32) set(tf_test_src_py_exclude diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 75482f67da..5abb38c2d2 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -413,20 +413,6 @@ class BatchDatasetTest(test.TestCase): def testMapAndBatchPartialBatchDropRemainder(self): return self._testMapAndBatchPartialBatchHelper(drop_remainder=True) - def testMapAndBatchYieldsPartialBatch(self): - iterator = (dataset_ops.Dataset.range(10) - .apply(batching.map_and_batch( - lambda x: array_ops.reshape(x * x, [1]), 4)) - .make_one_shot_iterator()) - self.assertEqual([None, 1], iterator.output_shapes.as_list()) - next_element = iterator.get_next() - with self.test_session() as sess: - self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element)) - self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element)) - self.assertAllEqual([[64], [81]], sess.run(next_element)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element) - def testMapAndBatchSparse(self): def _sparse(i): diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 80176397c0..4fba014d6f 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -270,11 +270,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = [ - "no_oss", # b/74395663 - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 9adf47d505..9261823d77 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -418,6 +418,7 @@ class SpinnTest(test_util.TensorFlowTestCase): if event.summary.value and event.summary.value[0].tag == "train/loss"] self.assertEqual(config.epochs, len(train_losses)) + self.assertLess(train_losses[-1], train_losses[0]) # 5. Verify that checkpoints exist and contains all the expected variables. self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index fa2697800e..e0fae2c992 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -136,7 +136,7 @@ def replicate_model_fn(model_fn, the train_op argument of `EstimatorSpec`. loss_reduction: controls whether losses are summed or averaged. devices: Optional list of devices to replicate the model across. This - argument can be used to replicate only on the subset of available GPUs. + argument can be used to replice only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. diff --git a/tensorflow/contrib/factorization/kernels/clustering_ops.cc b/tensorflow/contrib/factorization/kernels/clustering_ops.cc index 2a6c97e8b9..dd61f59585 100644 --- a/tensorflow/contrib/factorization/kernels/clustering_ops.cc +++ b/tensorflow/contrib/factorization/kernels/clustering_ops.cc @@ -353,7 +353,7 @@ class NearestNeighborsOp : public OpKernel { auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads()); const int64 num_threads = worker_threads.num_threads; // This kernel might be configured to use fewer than the total number of - // available CPUs on the host machine. To avoid destructive interference + // available CPUs on the host machine. To avoid descructive interference // with other jobs running on the host machine, we must only use a fraction // of total available L3 cache. Unfortunately, we cannot query the host // machine to get the number of physical CPUs. So, we use a fixed per-CPU diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops.py b/tensorflow/contrib/factorization/python/ops/factorization_ops.py index 8e0ed1d80e..054888e734 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py @@ -106,7 +106,7 @@ class WALSModel(object): # the prep_gramian_op for row(column) can be run. worker_init_op = model.worker_init - # To be run once per integration sweep before the row(column) update + # To be run once per interation sweep before the row(column) update # initialize ops can be run. Note that in the distributed training # situations, this should only be run by the chief trainer. All other # trainers need to block until this is done. @@ -118,9 +118,9 @@ class WALSModel(object): init_row_update_op = model.initialize_row_update_op init_col_update_op = model.initialize_col_update_op - # Ops to update row(column). This can either take the entire sparse - # tensor or slices of sparse tensor. For distributed trainer, each - # trainer handles just part of the matrix. + # Ops to upate row(column). This can either take the entire sparse tensor + # or slices of sparse tensor. For distributed trainer, each trainer + # handles just part of the matrix. _, row_update_op, unreg_row_loss, row_reg, _ = model.update_row_factors( sp_input=matrix_slices_from_queue_for_worker_shard) row_loss = unreg_row_loss + row_reg @@ -220,7 +220,7 @@ class WALSModel(object): in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of inner lists matching the number of row factor shards and the elements in each inner list are the weights for the rows of the corresponding row - factor shard. In this case, w_ij = unobserved_weight + + factor shard. In this case, w_ij = unonbserved_weight + row_weights[i] * col_weights[j]. - If this is a single non-negative real number, this value is used for all row weights and w_ij = unobserved_weight + row_weights * @@ -435,7 +435,7 @@ class WALSModel(object): gramian: Variable storing the gramian calculated from the factors. Returns: - A op that updates the gramian with the calculated value from the factors. + A op that updates the gramian with the calcuated value from the factors. """ partial_gramians = [] for f in factors: @@ -564,7 +564,7 @@ class WALSModel(object): Note that specifically this initializes the cache of the row and column weights on workers when `use_factors_weights_cache` is True. In this case, - if these weights are being calculated and reset after the object is created, + if these weights are being calcualted and reset after the object is created, it is important to ensure this ops is run afterwards so the cache reflects the correct values. """ diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py index bb5140aeb3..c813733915 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py @@ -210,7 +210,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reproduce the same row factors in the model as the + # This is expected to reprodue the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -283,8 +283,8 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 3 column feature vectors. - # This is expected to reproduce the same column factors in the model as - # the weights and feature vectors are identical to that used in model + # This is expected to reprodue the same column factors in the model as the + # weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( sp_input=sp_feeder, @@ -385,7 +385,7 @@ class WalsModelTest(test.TestCase): # Test row projection. # Using the specified projection weights for the 2 row feature vectors. - # This is expected to reproduce the same row factors in the model as the + # This is expected to reprodue the same row factors in the model as the # weights and feature vectors are identical to that used in model # training. projected_rows = wals_model.project_row_factors( @@ -462,8 +462,8 @@ class WalsModelTest(test.TestCase): # Test column projection. # Using the specified projection weights for the 2 column feature vectors. - # This is expected to reproduce the same column factors in the model as - # the weights and feature vectors are identical to that used in model + # This is expected to reprodue the same column factors in the model as the + # weights and feature vectors are identical to that used in model # training. projected_cols = wals_model.project_col_factors( sp_input=sp_feeder, diff --git a/tensorflow/contrib/factorization/python/ops/gmm_ops.py b/tensorflow/contrib/factorization/python/ops/gmm_ops.py index 14d4c733e3..98d6434f47 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py @@ -280,7 +280,7 @@ class GmmAlgorithm(object): self._define_score_samples() def _define_full_covariance_probs(self, shard_id, shard): - """Defines the full covariance probabilities per example in a class. + """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. @@ -344,7 +344,7 @@ class GmmAlgorithm(object): def _define_prior_log_prob_operation(self, shard_id): """Computes the prior probability of all samples. - Updates a vector where each item is the prior probability of an + Updates a vector where each item is the prior probabibility of an input example. Args: diff --git a/tensorflow/contrib/factorization/python/ops/gmm_test.py b/tensorflow/contrib/factorization/python/ops/gmm_test.py index 4fc9c96e9d..00a4734eb6 100644 --- a/tensorflow/contrib/factorization/python/ops/gmm_test.py +++ b/tensorflow/contrib/factorization/python/ops/gmm_test.py @@ -210,7 +210,7 @@ class GMMTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependent on inputs, if input + # Note that since cluster initialization is dependendent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py index 88eb9cf692..0103cc4439 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans_test.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py @@ -413,7 +413,7 @@ class KMeansCosineDistanceTest(KMeansTestBase): self.assertAllClose(score, self.true_score, atol=1e-2) def test_predict_kmeans_plus_plus(self): - # Most points are concentrated near one center. KMeans++ is likely to find + # Most points are concetrated near one center. KMeans++ is likely to find # the less populated centers. points = np.array( [[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], @@ -604,7 +604,7 @@ class KMeansTestQueues(test.TestCase): return _fn # This test makes sure that there are no deadlocks when using a QueueRunner. - # Note that since cluster initialization is dependent on inputs, if input + # Note that since cluster initialization is dependendent on inputs, if input # is generated using a QueueRunner, one has to make sure that these runners # are started before the initialization. def test_queues(self): diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index 62db3bb4c4..4fe22ea26e 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -235,7 +235,7 @@ def _wals_factorization_model_function(features, labels, mode, params): num_items: An integer, the total number of items of this axis. update_fn: A function that takes one argument (`sp_input`), and that returns a tuple of - * new_factors: A float Tensor of the factor values after update. + * new_factors: A flot Tensor of the factor values after update. * update_op: a TensorFlow op which updates the factors. * loss: A float Tensor, the unregularized loss. * reg_loss: A float Tensor, the regularization loss. diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 16f80a876f..9c59150580 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -226,7 +226,6 @@ py_test( size = "small", srcs = ["python/learn/monitors_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip_gpu"], # b/74437598 deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 70b70af98c..64d7ecc68e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -243,8 +243,8 @@ def sdca_model_fn(features, labels, mode, params): parent_scope = "linear" - with variable_scope.variable_scope( - values=features.values(), name_or_scope=parent_scope) as scope: + with variable_scope.variable_op_scope( + features.values(), parent_scope) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py index d4e54c82f9..05794a42c5 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py @@ -140,8 +140,8 @@ def sdca_model_fn(features, labels, mode, params, config=None): parent_scope = "linear" - with variable_scope.variable_scope( - values=features.values(), name_or_scope=parent_scope) as scope: + with variable_scope.variable_op_scope(features.values(), + parent_scope) as scope: features = features.copy() features.update(layers.transform_features(features, feature_columns)) logits, columns_to_variables, bias = ( diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index c15ae3f233..2680d515eb 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -126,9 +126,6 @@ The above pre-trained models have been trained on the ImageNet data set, which c The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) codelab walks through this process step-by-step. The retraining code supports retraining for both floating point and quantized inference. -# Getting started with RaspberryPi - -Using RaspberryPi can be accomplished by following the [Makefile instructions](g3doc/rpi.md). That will give a you a static library (.a) that you can build your app against. Python bindings will be coming soon as well as a demo app. ### Train a custom model A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 17b791e4e2..d7993e60cc 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -79,7 +79,6 @@ typedef enum { kTfLiteBuiltinBidirectionalSequenceLstm = 52, kTfLiteBuiltinCast = 53, kTfLiteBuiltinPrelu = 54, - kTfLiteBuiltinMaximum = 55, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/g3doc/models.md b/tensorflow/contrib/lite/g3doc/models.md index 48f43d4fc4..5b393140d6 100644 --- a/tensorflow/contrib/lite/g3doc/models.md +++ b/tensorflow/contrib/lite/g3doc/models.md @@ -1,4 +1,4 @@ -# List of Hosted Models +#List of Hosted Models * [Inception V3 2015](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_2015_2017_11_10.zip) * [Inception V3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index c423c00bf5..1450c1e14b 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -156,7 +156,6 @@ cc_library( "local_response_norm.cc", "lsh_projection.cc", "lstm.cc", - "maximum.cc", "mean.cc", "mfcc.cc", "mul.cc", @@ -537,18 +536,6 @@ tf_cc_test( ], ) -tf_cc_test( - name = "maximum_test", - size = "small", - srcs = ["maximum_test.cc"], - deps = [ - ":builtin_ops", - "//tensorflow/contrib/lite:framework", - "//tensorflow/contrib/lite/kernels:test_util", - "@com_google_googletest//:gtest", - ], -) - tf_cc_test( name = "mean_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 3575974ae9..33d60afa26 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -404,7 +404,6 @@ inline void DepthToSpace(const T* input_data, const Dims<4>& input_dims, const int in_d = out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth; - const int in_w = out_w / block_size; const int in_h = out_h / block_size; const int in_b = out_b; @@ -3364,30 +3363,6 @@ void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, } } -template -void TensorFlowMaximum(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); - - for (int b = 0; b < ArraySize(output_dims, 3); ++b) { - for (int y = 0; y < ArraySize(output_dims, 2); ++y) { - for (int x = 0; x < ArraySize(output_dims, 1); ++x) { - for (int c = 0; c < ArraySize(output_dims, 0); ++c) { - auto out_idx = Offset(output_dims, c, x, y, b); - auto in1_idx = SubscriptToIndex(desc1, c, x, y, b); - auto in2_idx = SubscriptToIndex(desc2, c, x, y, b); - auto in1_val = input1_data[in1_idx]; - auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = in1_val > in2_val ? in1_val : in2_val; - } - } - } - } -} - template void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims, T2* output_data, const Dims<4>& output_dims) { diff --git a/tensorflow/contrib/lite/kernels/maximum.cc b/tensorflow/contrib/lite/kernels/maximum.cc deleted file mode 100644 index 9fdf2b47ea..0000000000 --- a/tensorflow/contrib/lite/kernels/maximum.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include -#include "tensorflow/contrib/lite/builtin_op_data.h" -#include "tensorflow/contrib/lite/context.h" -#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/tensor.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" -#include "tensorflow/contrib/lite/kernels/op_macros.h" - -namespace tflite { -namespace ops { -namespace builtin { -namespace maximum { - -// This file has a reference implemenation of TFMaximum. -enum KernelType { - kReference, -}; - -constexpr int kInputTensor1 = 0; -constexpr int kInputTensor2 = 1; -constexpr int kOutputTensor = 0; - -struct MaximumContext { - MaximumContext(TfLiteContext* context, TfLiteNode* node) { - input1 = GetInput(context, node, kInputTensor1); - input2 = GetInput(context, node, kInputTensor2); - output = GetOutput(context, node, kOutputTensor); - } - TfLiteTensor* input1; - TfLiteTensor* input2; - TfLiteTensor* output; -}; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - MaximumContext op_context(context, node); - TF_LITE_ENSURE_EQ(context, op_context.input1->type, op_context.input2->type); - TfLiteIntArray* output_dims = TfLiteIntArrayCopy(op_context.input2->dims); - op_context.output->type = op_context.input2->type; - return context->ResizeTensor(context, op_context.output, output_dims); -} - -template -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - MaximumContext op_context(context, node); - -#define TF_LITE_MAXIMUM(kernel_type, data_type) \ - kernel_type::TensorFlowMaximum( \ - GetTensorData(op_context.input1), \ - GetTensorDims(op_context.input1), \ - GetTensorData(op_context.input2), \ - GetTensorDims(op_context.input2), \ - GetTensorData(op_context.output), \ - GetTensorDims(op_context.output)) - - if (kernel_type == kReference) { - switch (op_context.output->type) { - case kTfLiteFloat32: - TF_LITE_MAXIMUM(reference_ops, float); - break; - default: - context->ReportError(context, - "Type %d is currently not supported by Maximum.", - op_context.output->type); - return kTfLiteError; - } - } else { - context->ReportError(context, - "Type %d is currently not supported by Maximum.", - op_context.output->type); - return kTfLiteError; - } -#undef TF_LITE_MAXIMUM - return kTfLiteOk; -} - -} // namespace maximum - -TfLiteRegistration* Register_MAXIMUM_REF() { - static TfLiteRegistration r = {nullptr, nullptr, maximum::Prepare, - maximum::Eval}; - return &r; -} - -TfLiteRegistration* Register_MAXIMUM() { return Register_MAXIMUM_REF(); } - -} // namespace builtin -} // namespace ops -} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/maximum_test.cc b/tensorflow/contrib/lite/kernels/maximum_test.cc deleted file mode 100644 index b3fd7d4e6f..0000000000 --- a/tensorflow/contrib/lite/kernels/maximum_test.cc +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include "tensorflow/contrib/lite/interpreter.h" -#include "tensorflow/contrib/lite/kernels/register.h" -#include "tensorflow/contrib/lite/kernels/test_util.h" -#include "tensorflow/contrib/lite/model.h" - -namespace tflite { -namespace { - -using ::testing::ElementsAreArray; - -class MaximumOpModel : public SingleOpModel { - public: - MaximumOpModel(const TensorData& input1, const TensorData& input2, - const TensorType& output) { - input1_ = AddInput(input1); - input2_ = AddInput(input2); - output_ = AddOutput(output); - SetBuiltinOp(BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumOptions, - CreateMaximumOptions(builder_).Union()); - BuildInterpreter({GetShape(input1_), GetShape(input2_)}); - } - - template - void SetInput1(std::initializer_list data) { - PopulateTensor(input1_, data); - } - - template - void SetInput2(std::initializer_list data) { - PopulateTensor(input2_, data); - } - - template - std::vector GetOutput() { - return ExtractVector(output_); - } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - protected: - int input1_; - int input2_; - int output_; -}; - -TEST(MaximumOpTest, FloatTest) { - std::initializer_list data1 = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; - std::initializer_list data2 = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; - MaximumOpModel m({TensorType_FLOAT32, {3, 1, 2}}, - {TensorType_FLOAT32, {3, 1, 2}}, TensorType_FLOAT32); - m.SetInput1(data1); - m.SetInput2(data2); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1, 2})); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray(ArrayFloatNear({1.0, 0.0, 1.0, 12.0, -2.0, -1.43}))); -} - -} // namespace -} // namespace tflite - -int main(int argc, char** argv) { - ::tflite::LogToStderr(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 0f98154b90..62045f0a4d 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -76,7 +76,6 @@ TfLiteRegistration* Register_LOG_SOFTMAX(); TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_DEQUANTIZE(); TfLiteRegistration* Register_PRELU(); -TfLiteRegistration* Register_MAXIMUM(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -134,7 +133,6 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); - AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 791d1378f3..b7ccdf070b 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -597,9 +597,6 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, builtin_data = reinterpret_cast(params); break; } - case BuiltinOperator_MAXIMUM: { - break; - } case BuiltinOperator_DELEGATE: { // TODO(ycling): Revisit when supporting saving delegated models. error_reporter->Report("DELEGATE op shouldn't exist in model."); diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index decaf9f160..e31b7c03a5 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -350,7 +350,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_DELEGATE: case tflite::BuiltinOperator_CAST: case tflite::BuiltinOperator_PRELU: - case tflite::BuiltinOperator_MAXIMUM: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid break; diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py index ed6dd036f9..35d224924e 100644 --- a/tensorflow/contrib/lite/python/lite.py +++ b/tensorflow/contrib/lite/python/lite.py @@ -25,9 +25,9 @@ EXPERIMENTAL: APIs here are unstable and likely to change without notice. from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os as _os -import subprocess as _subprocess -import tempfile as _tempfile +import os +import subprocess +import tempfile # pylint: disable=unused-import from tensorflow.contrib.lite.python.op_hint import convert_op_hints_to_stubs @@ -74,7 +74,7 @@ else: _toco_from_proto_bin = _resource_loader.get_path_to_datafile( "../toco/python/toco_from_protos") -if _toco_from_proto_bin and not _os.path.exists(_toco_from_proto_bin): +if _toco_from_proto_bin and not os.path.exists(_toco_from_proto_bin): _toco_from_proto_bin = "toco_from_protos" @@ -102,10 +102,10 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): return _toco_python.TocoConvert( model_flags_str, toco_flags_str, input_data_str) - with _tempfile.NamedTemporaryFile() as fp_toco, \ - _tempfile.NamedTemporaryFile() as fp_model, \ - _tempfile.NamedTemporaryFile() as fp_input, \ - _tempfile.NamedTemporaryFile() as fp_output: + with tempfile.NamedTemporaryFile() as fp_toco, \ + tempfile.NamedTemporaryFile() as fp_model, \ + tempfile.NamedTemporaryFile() as fp_input, \ + tempfile.NamedTemporaryFile() as fp_output: fp_model.write(model_flags_str) fp_toco.write(toco_flags_str) fp_input.write(input_data_str) @@ -118,11 +118,11 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): fp_output.name ] cmdline = " ".join(cmd) - proc = _subprocess.Popen( + proc = subprocess.Popen( cmdline, shell=True, - stdout=_subprocess.PIPE, - stderr=_subprocess.STDOUT, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, close_fds=True) stdout, stderr = proc.communicate() exitcode = proc.returncode diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 7d2e00fe32..e1075971e9 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -131,7 +131,6 @@ enum BuiltinOperator : byte { BIDIRECTIONAL_SEQUENCE_LSTM = 52, CAST = 53, PRELU = 54, - MAXIMUM = 55, } // Options for the builtin operators. @@ -174,7 +173,6 @@ union BuiltinOptions { LogSoftmaxOptions, CastOptions, DequantizeOptions, - MaximumOptions, } enum Padding : byte { SAME, VALID } @@ -386,9 +384,6 @@ table CastOptions { table DequantizeOptions { } -table MaximumOptions { -} - // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 66a97a1460..86daeaf5cc 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -145,9 +145,6 @@ struct CastOptionsT; struct DequantizeOptions; struct DequantizeOptionsT; -struct MaximumOptions; -struct MaximumOptionsT; - struct OperatorCode; struct OperatorCodeT; @@ -258,12 +255,11 @@ enum BuiltinOperator { BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, BuiltinOperator_CAST = 53, BuiltinOperator_PRELU = 54, - BuiltinOperator_MAXIMUM = 55, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_MAXIMUM + BuiltinOperator_MAX = BuiltinOperator_PRELU }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[54] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[53] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -317,8 +313,7 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[54] { BuiltinOperator_DELEGATE, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOperator_CAST, - BuiltinOperator_PRELU, - BuiltinOperator_MAXIMUM + BuiltinOperator_PRELU }; return values; } @@ -380,7 +375,6 @@ inline const char **EnumNamesBuiltinOperator() { "BIDIRECTIONAL_SEQUENCE_LSTM", "CAST", "PRELU", - "MAXIMUM", nullptr }; return names; @@ -431,12 +425,11 @@ enum BuiltinOptions { BuiltinOptions_LogSoftmaxOptions = 36, BuiltinOptions_CastOptions = 37, BuiltinOptions_DequantizeOptions = 38, - BuiltinOptions_MaximumOptions = 39, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_MaximumOptions + BuiltinOptions_MAX = BuiltinOptions_DequantizeOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[40] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[39] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -476,8 +469,7 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[40] { BuiltinOptions_SplitOptions, BuiltinOptions_LogSoftmaxOptions, BuiltinOptions_CastOptions, - BuiltinOptions_DequantizeOptions, - BuiltinOptions_MaximumOptions + BuiltinOptions_DequantizeOptions }; return values; } @@ -523,7 +515,6 @@ inline const char **EnumNamesBuiltinOptions() { "LogSoftmaxOptions", "CastOptions", "DequantizeOptions", - "MaximumOptions", nullptr }; return names; @@ -690,10 +681,6 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; }; -template<> struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = BuiltinOptions_MaximumOptions; -}; - struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1029,14 +1016,6 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_DequantizeOptions ? reinterpret_cast(value) : nullptr; } - MaximumOptionsT *AsMaximumOptions() { - return type == BuiltinOptions_MaximumOptions ? - reinterpret_cast(value) : nullptr; - } - const MaximumOptionsT *AsMaximumOptions() const { - return type == BuiltinOptions_MaximumOptions ? - reinterpret_cast(value) : nullptr; - } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3780,46 +3759,6 @@ inline flatbuffers::Offset CreateDequantizeOptions( flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -struct MaximumOptionsT : public flatbuffers::NativeTable { - typedef MaximumOptions TableType; - MaximumOptionsT() { - } -}; - -struct MaximumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef MaximumOptionsT NativeTableType; - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - verifier.EndTable(); - } - MaximumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(MaximumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -}; - -struct MaximumOptionsBuilder { - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - explicit MaximumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - MaximumOptionsBuilder &operator=(const MaximumOptionsBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateMaximumOptions( - flatbuffers::FlatBufferBuilder &_fbb) { - MaximumOptionsBuilder builder_(_fbb); - return builder_.Finish(); -} - -flatbuffers::Offset CreateMaximumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); - struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -4051,9 +3990,6 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const DequantizeOptions *builtin_options_as_DequantizeOptions() const { return builtin_options_type() == BuiltinOptions_DequantizeOptions ? static_cast(builtin_options()) : nullptr; } - const MaximumOptions *builtin_options_as_MaximumOptions() const { - return builtin_options_type() == BuiltinOptions_MaximumOptions ? static_cast(builtin_options()) : nullptr; - } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -4232,10 +4168,6 @@ template<> inline const DequantizeOptions *Operator::builtin_options_as inline const MaximumOptions *Operator::builtin_options_as() const { - return builtin_options_as_MaximumOptions(); -} - struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5764,29 +5696,6 @@ inline flatbuffers::Offset CreateDequantizeOptions(flatbuffer _fbb); } -inline MaximumOptionsT *MaximumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { - auto _o = new MaximumOptionsT(); - UnPackTo(_o, _resolver); - return _o; -} - -inline void MaximumOptions::UnPackTo(MaximumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { - (void)_o; - (void)_resolver; -} - -inline flatbuffers::Offset MaximumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { - return CreateMaximumOptions(_fbb, _o, _rehasher); -} - -inline flatbuffers::Offset CreateMaximumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { - (void)_rehasher; - (void)_o; - struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; - return tflite::CreateMaximumOptions( - _fbb); -} - inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -6119,10 +6028,6 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } default: return false; } } @@ -6293,10 +6198,6 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(obj); - return ptr->UnPack(resolver); - } default: return nullptr; } } @@ -6455,10 +6356,6 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union(); } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(value); - return CreateMaximumOptions(_fbb, ptr, _rehasher).Union(); - } default: return 0; } } @@ -6617,10 +6514,6 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new DequantizeOptionsT(*reinterpret_cast(u.value)); break; } - case BuiltinOptions_MaximumOptions: { - value = new MaximumOptionsT(*reinterpret_cast(u.value)); - break; - } default: break; } @@ -6818,11 +6711,6 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } - case BuiltinOptions_MaximumOptions: { - auto ptr = reinterpret_cast(value); - delete ptr; - break; - } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 12b7b3c350..555ea90034 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -36,7 +36,6 @@ gen_zipped_test_files( "local_response_norm.zip", "log_softmax.zip", "max_pool.zip", - "maximum.zip", "mean.zip", "mul.zip", "pad.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 8045052452..cb5c500136 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -862,41 +862,6 @@ def make_log_softmax_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) -def make_maximum_tests(zip_path): - """Make a set of tests to do maximum.""" - - test_parameters = [{ - "input_dtype": [tf.float32], - "input_shape_1": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], - "input_shape_2": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], - }] - - def build_graph(parameters): - """Build the maximum op testing graph.""" - input_tensor_1 = tf.placeholder( - dtype=parameters["input_dtype"], - name="input_1", - shape=parameters["input_shape_1"]) - input_tensor_2 = tf.placeholder( - dtype=parameters["input_dtype"], - name="input_2", - shape=parameters["input_shape_2"]) - - out = tf.maximum(input_tensor_1, input_tensor_2) - return [input_tensor_1, input_tensor_2], [out] - - def build_inputs(parameters, sess, inputs, outputs): - values = [ - create_tensor_data(parameters["input_dtype"], - parameters["input_shape_1"]), - create_tensor_data(parameters["input_dtype"], - parameters["input_shape_2"]) - ] - return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) - - make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) - - def make_binary_op_tests_func(binary_operator): """Return a function that does a test on a binary operator.""" return lambda zip_path: make_binary_op_tests(zip_path, binary_operator) @@ -2012,7 +1977,6 @@ def main(unused_args): "exp.zip": make_exp_tests, "log_softmax.zip": make_log_softmax_tests, "lstm.zip": make_lstm_tests, - "maximum.zip": make_maximum_tests, } out = FLAGS.zip_to_output bin_path = FLAGS.toco diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 6697b86e79..a4a7283508 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -253,7 +253,6 @@ INSTANTIATE_TESTS(l2_pool) INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(local_response_norm) INSTANTIATE_TESTS(log_softmax) -INSTANTIATE_TESTS(maximum) INSTANTIATE_TESTS(max_pool) INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(mul) diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 0989bfe5a3..f23249cfa1 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -863,8 +863,6 @@ std::vector> BuildOperatorList() { ops.emplace_back(new SimpleOperator("EXP", OperatorType::kExp)); ops.emplace_back(new SimpleOperator( "LOG_SOFTMAX", OperatorType::kLogSoftmax)); - ops.emplace_back(new SimpleOperator( - "MAXIMUM", OperatorType::kTensorFlowMaximum)); return ops; } diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index f7a213ecfc..9c19f8d464 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -109,8 +109,6 @@ TEST_F(OperatorTest, SimpleOperators) { CheckSimpleOperator("EXP", OperatorType::kExp); CheckSimpleOperator("LOG_SOFTMAX", OperatorType::kLogSoftmax); - CheckSimpleOperator( - "MAXIMUM", OperatorType::kTensorFlowMaximum); } TEST_F(OperatorTest, BuiltinAdd) { diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index a03e731be3..a57a1e5421 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -494,7 +494,7 @@ class MutableDenseHashTable(LookupInterface): value_dtype=tf.int64, default_value=-1, empty_key=0) - sess.run(table.insert(keys, values)) + table.insert(keys, values) out = table.lookup(query_keys) print(out.eval()) ``` diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh index 8b415e6527..4ae18b2cef 100755 --- a/tensorflow/contrib/makefile/download_dependencies.sh +++ b/tensorflow/contrib/makefile/download_dependencies.sh @@ -34,7 +34,7 @@ PROTOBUF_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/protobuf/. RE2_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)" FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" -CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v mirror.bazel | head -n1)" +CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)" # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64, # so work around it by patching the source. diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 7a7683c953..5a812af4e9 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -258,7 +258,6 @@ tensorflow/core/kernels/requantize.cc tensorflow/core/kernels/remote_fused_graph_execute_op.cc tensorflow/core/kernels/remote_fused_graph_execute_utils.cc tensorflow/core/kernels/batch_matmul_op_real.cc -tensorflow/core/kernels/random_op.cc tensorflow/core/ops/training_ops.cc tensorflow/core/ops/string_ops.cc tensorflow/core/ops/state_ops.cc diff --git a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc index a9a32b7b25..dfa12e873a 100644 --- a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc +++ b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc @@ -74,7 +74,7 @@ class GatherTreeOp : public OpKernel { ctx, step_ids_shape.dim_size(1) == max_sequence_lengths.shape().dim_size(0), errors::InvalidArgument("batch size dimensions step_ids.shape[1] and " - "max_sequence_lengths.shape[0] must match. " + "max_seqeuence_lengths.shape[0] must match. " "but shapes are: ", step_ids_shape.DebugString(), " and ", max_sequence_lengths.shape().DebugString())); diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index be53779826..9ff8a343f1 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -736,7 +736,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): """Base attention mechanism for monotonic attention. Simply overrides the initial_alignments function to provide a dirac - distribution, which is needed in order for the monotonic attention + distribution,which is needed in order for the monotonic attention distributions to have the correct behavior. """ @@ -763,7 +763,7 @@ class _BaseMonotonicAttentionMechanism(_BaseAttentionMechanism): class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Bahadanau-style energy function. - This type of attention enforces a monotonic constraint on the attention + This type of attention encorces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -867,7 +867,7 @@ class BahdanauMonotonicAttention(_BaseMonotonicAttentionMechanism): class LuongMonotonicAttention(_BaseMonotonicAttentionMechanism): """Monotonic attention mechanism with Luong-style energy function. - This type of attention enforces a monotonic constraint on the attention + This type of attention encorces a monotonic constraint on the attention distributions; that is once the model attends to a given point in the memory it can't attend to any prior points at subsequence output timesteps. It achieves this by using the _monotonic_probability_fn instead of softmax to @@ -1133,7 +1133,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): output_attention: Python bool. If `True` (default), the output at each time step is the attention value. This is the behavior of Luong-style attention mechanisms. If `False`, the output at each time step is - the output of `cell`. This is the behavior of Bhadanau-style + the output of `cell`. This is the beahvior of Bhadanau-style attention mechanisms. In both cases, the `attention` tensor is propagated to the next time step via the state and is used there. This flag only controls whether the attention mechanism is propagated diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 184144f64a..a26107b0d7 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -821,9 +821,9 @@ def _get_scores(log_probs, sequence_lengths, length_penalty_weight): Returns: The scores normalized by the length_penalty. """ - length_penalty_ = _length_penalty( + length_penality_ = _length_penalty( sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight) - return log_probs / length_penalty_ + return log_probs / length_penality_ def _length_penalty(sequence_lengths, penalty_factor): @@ -860,7 +860,7 @@ def _mask_probs(probs, eos_token, finished): unfinished beams remain unchanged. Args: - probs: Log probabilities of shape `[batch_size, beam_width, vocab_size]` + probs: Log probabiltiies of shape `[batch_size, beam_width, vocab_size]` eos_token: An int32 id corresponding to the EOS token to allocate probability to. finished: A boolean tensor of shape `[batch_size, beam_width]` that diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index 99ad487630..b3343aef47 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -115,8 +115,8 @@ class ParallelReader(io_ops.ReaderBase): reader needs to start reading from a new file since it has finished with the previous file). - A queue runner for enqueuing in the `common_queue` is automatically added - to the TF QueueRunners collection. + A queue runner for enqueing in the `common_queue` is automatically added to + the TF QueueRunners collection. Args: queue: A Queue or a mutable string Tensor representing a handle diff --git a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py index 62bd200361..37e9c4754c 100644 --- a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py +++ b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py @@ -36,9 +36,9 @@ def prefetch_queue(tensors, dynamic_pad=False, shared_name=None, name=None): - """Creates a queue to prefetch tensors from `tensors`. + """Creates a queue to prefetech tensors from `tensors`. - A queue runner for enqueuing tensors into the prefetch_queue is automatically + A queue runner for enqueing tensors into the prefetch_queue is automatically added to the TF QueueRunners collection. Example: diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index f2d31dc8db..b3b61e1dfe 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -124,7 +124,7 @@ class BoundingBox(ItemHandler): super(BoundingBox, self).__init__(self._full_keys) def tensors_to_item(self, keys_to_tensors): - """Maps the given dictionary of tensors to a concatenated list of bboxes. + """Maps the given dictionary of tensors to a contatenated list of bboxes. Args: keys_to_tensors: a mapping of TF-Example keys to parsed tensors. diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 6eafc1754c..461e627e99 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -1,15 +1,15 @@ -# Using TensorRT in TensorFlow - +Using TensorRT in TensorFlow +============================ This module provides necessary bindings and introduces TRT_engine_op operator that wraps a subgraph in TensorRT. This is still a work in progress but should be useable with most common graphs. -## Compilation - +Compilation +----------- In order to compile the module, you need to have a local TensorRT -installation ( libnvinfer.so and respective include files ). During the +installation (libnvinfer.so and respective include files). During the configuration step, TensorRT should be enabled and installation path should be set. If installed through package managers (deb,rpm), configure script should find the necessary components from the system @@ -22,38 +22,4 @@ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py script - -## Installing TensorRT 3.0.4 - -In order to make use of TensorRT integration, you will need a local installation of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt). Due to compiler compatibility, you will need to download and install the TensorRT 3.0.4 tarball for _Ubuntu 14.04_, i.e., **_TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz_**, even if you are using Ubuntu 16.04 or later. - -### Preparing TensorRT installation - -Once you have downloaded TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz, you will need to unpack it to an installation directory, which will be referred to as . Please replace with the full path of actual installation directory you choose in commands below. - -```shell -cd && tar -zxf /path/to/TensorRT-3.0.4.Ubuntu-14.04.5.x86_64.cuda-9.0.cudnn7.0-tar.gz -``` - -After unpacking the binaries, you have several options to use them: - -#### To run TensorFlow as a user without superuser privileges - -For a regular user without any sudo rights, you should add TensorRT to your `$LD_LIBRARY_PATH`: - - ```shell - export LD_LIBRARY_PATH=/TensorRT-3.0.4/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} - ``` - -Then you are ready to use TensorFlow-TensorRT integration. `$LD_LIBRARY_PATH` must contain the path to TensorRT installation for TensorFlow-TensorRT integration to work. If you are using a VirtualEnv-like setup, you can add the command above to your `bin/activate` script or to your `.bashrc` script. - -#### To run TensorFlow as a superuser - - When running as a superuser, such as in a container or via sudo, the `$LD_LIBRARY_PATH` approach above may not work. The following is preferred when the user has superuser privileges: - - ```shell - echo "/TensorRT-3.0.4/lib" | sudo tee /etc/ld.so.conf.d/tensorrt304.conf && sudo ldconfig - ``` - - Please ensure that any existing deb package installation of TensorRT is removed before following these instructions to avoid package conflicts. \ No newline at end of file +will be available. An example use can be found in test/test_tftrt.py directory diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index ff8cc6374d..eea8c8efa2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -49,13 +49,12 @@ namespace tensorrt { namespace convert { namespace { -bool IsTensorRTCandidate(const tensorflow::Node* node) { +bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set candidate_ops = { "Identity", - "Snapshot", "Const", "Conv2D", "MaxPool", @@ -75,7 +74,7 @@ bool IsTensorRTCandidate(const tensorflow::Node* node) { // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) - return candidate_ops.count(node->type_string()); + return candidate_ops.count(node_def.op()); } void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, @@ -85,10 +84,10 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, const tensorflow::Node* node = graph.FindNodeId(node_id); for (const tensorflow::Edge* edge : node->in_edges()) { if (!subgraph_node_ids.count(edge->src()->id()) && - !edge->src()->IsSource() && !edge->IsControlEdge()) { + !edge->src()->IsSource()) { incoming_edges->insert(edge); } else { - VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; + VLOG(2) << edge->src()->name() << " N, "; } } } @@ -101,11 +100,11 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, const tensorflow::Node* node = graph.FindNodeId(node_id); for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && - !edge->dst()->IsSink() && !edge->IsControlEdge()) { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; + !edge->dst()->IsSink()) { + VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; + VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -410,9 +409,8 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::Status status = ConvertSubGraphToTensorRT(&p); if (status != tensorflow::Status::OK()) { LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \"" << status.ToString() - << "\" SKIPPING......( " << subgraph_node_names.size() - << " nodes)"; + << " due to: \n" + << status.ToString() << " SKIPPING......"; } count++; } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 370911e4d9..92a692baa7 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -53,8 +53,8 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { -using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; + namespace { inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, @@ -430,8 +430,9 @@ class Converter { tensorflow::tensorrt::TRTWeightStore* weight_store_; bool fp16_; void register_op_converters(); - tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def, - std::vector* inputs) { + std::vector get_inputs( + const tensorflow::NodeDef& node_def) { + std::vector inputs; for (auto const& input_name : node_def.input()) { /************************************************************************* * TODO(jie) handle case 1) here @@ -452,17 +453,13 @@ class Converter { VLOG(2) << "retrieve input: " << name; if (trt_tensors_.count(name)) { - inputs->push_back(trt_tensors_.at(name)); + inputs.push_back(trt_tensors_.at(name)); } else { - string str("Node "); - StrAppend(&str, node_def.name(), " should have an input named '", name, - "' but it is not available"); - LOG(WARNING) << "input: " << name << " not available for node at " - << node_def.name(); - return tensorflow::errors::InvalidArgument(str); + LOG(FATAL) << "input: " << name << " not available for node at, " + << node_def.name(); } } - return tensorflow::Status::OK(); + return inputs; } public: @@ -486,8 +483,7 @@ class Converter { } tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) { - std::vector inputs; - TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs)); + std::vector inputs = this->get_inputs(node_def); string op = node_def.op(); if (!op_registry_.count(op)) { return tensorflow::errors::Unimplemented( @@ -552,19 +548,6 @@ class Converter { } }; -TRT_ShapedWeights ConvertFP32ToFP16(Converter& ctx, - const TRT_ShapedWeights& weights_src) { - auto dtype_new = tensorflow::DataType::DT_HALF; - TRT_ShapedWeights weights = - ctx.get_temp_weights(dtype_new, weights_src.shape_); - const float* src = static_cast(weights_src.GetValues()); - Eigen::half* dst = const_cast( - static_cast(weights.GetValues())); - for (int64_t i = 0; i < weights_src.count(); i++) { - dst[i] = Eigen::half_impl::float_to_half_rtne(src[i]); - } - return weights; -} // **************************************************************************** // Constant folding functions // TODO(jie): once optimizer kicks in, we should have done constant folding @@ -892,7 +875,7 @@ tensorflow::Status BinaryTensorOpWeight( // Check type consistency nvinfer1::DataType ttype; - TF_RETURN_IF_ERROR(ConvertDType(weights.type_, &ttype)); + TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); // Check scale mode auto dims_w = weights.shape_; @@ -974,10 +957,6 @@ tensorflow::Status BinaryTensorOpWeight( } } - if (ctx.isFP16()) { - weights = ConvertFP32ToFP16(ctx, weights); - } - // prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); @@ -1019,7 +998,9 @@ enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; tensorflow::Status ConvertConv2DHelper( Converter& ctx, const tensorflow::NodeDef& node_def, const std::vector& inputs, - std::vector* outputs, int group) { + std::vector* outputs, + int group // group ==0 specifies depthwise conv +) { const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); @@ -1044,10 +1025,6 @@ tensorflow::Status ConvertConv2DHelper( VLOG(2) << "groups count: " << num_groups; TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - if (ctx.isFP16()) { - weights_rsck = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); - } - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); TRT_ShapedWeights biases(weights.type_); @@ -1157,9 +1134,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1318,11 +1295,8 @@ tensorflow::Status ConvertScale(Converter& ctx, // Implement tensor binaryOp weight [channel wise] for now; const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); - if (ctx.isFP16()) { - weights = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); - } - TRT_ShapedWeights empty_weights(weights.type_); TFAttrs attrs(node_def); @@ -1402,11 +1376,8 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; } else { - LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - string err_str("Broadcast method is not supported for '"); - StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); - return tensorflow::errors::InvalidArgument(err_str); + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); } } } else { @@ -1420,16 +1391,33 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + if (ctx.isFP16()) { + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + auto half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + half_tensor.device(defd) = + tensor.flat().template cast(); + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + } else { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + } } else if (!weights_tensor.int_val().empty()) { VLOG(2) << "int!!!" << node_def.name(); nvinfer1::Dims scalar_shape; @@ -1444,11 +1432,8 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.d[0] = weights_tensor.int_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; } else { - LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - string err_str("Broadcast method is not supported for '"); - StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); - return tensorflow::errors::InvalidArgument(err_str); + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); } } } else { @@ -1462,23 +1447,62 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - // we should not have converted //if (ctx.isFP16()) { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; - size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); - len_data = std::max(len_data, len_tensor); - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val().end()); // make a local copy first to flatten - // doesn't have to be contigous - memcpy(dst, tensor_data.data(), len_tensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + if (ctx.isFP16()) { + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + TTypes::Flat half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + switch (dtype) { + case (tensorflow::DT_INT32): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT16): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_UINT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + default: + return tensorflow::errors::InvalidArgument( + "Datatype " + tensorflow::DataTypeString(dtype) + + " for FP16 conversion"); + break; + }; + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); + } else { + size_t len_data = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + len_data *= scalar_shape.d[i]; + size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); + len_data = std::max(len_data, len_tensor); + ctx.weight_store()->store_.push_back(std::vector(len_data)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val() + .end()); // make a local copy first to flatten + // doesn't have to be contiguous + memcpy(dst, tensor_data.data(), len_tensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + } } else if (!weights_tensor.tensor_content().empty()) { - // obsolete method. - // After optimization path, we do not see weights in this format. - // fp16 conversion technically should be needed here. VLOG(2) << "TENSOR!!!" << node_def.name(); const auto& content = weights_tensor.tensor_content(); @@ -1760,6 +1784,8 @@ tensorflow::Status ConvertConcat(Converter& ctx, TRT_ShapedWeights axis = inputs.at(input_size).weights(); TFAttrs attrs(node_def); + // auto attr_size = attrs.at("N")->i(); + // auto data_type = attrs.get("T"); auto index_type = attrs.get("Tidx"); // TODO(jie): handle data type @@ -1849,103 +1875,71 @@ tensorflow::Status ConvertFusedBatchNorm( "only is_training=false is supported, at " + node_def.name()); } nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - - // Check parameter types - auto parameter_type = inputs.at(1).weights().type_; - if ((parameter_type != tensorflow::DataType::DT_FLOAT) && - (parameter_type != tensorflow::DataType::DT_HALF)) { - return tensorflow::errors::Unimplemented( - "only float32 or float16 weight data type is supported, for node " + - node_def.name() + " got " + tensorflow::DataTypeString(parameter_type)); - } - for (int i = 1; i < 5; i++) { - if (inputs.at(i).weights().type_ != parameter_type) { - return tensorflow::errors::Unimplemented( - "Inconsistent parameter type for batchnormis not supported, at: " + - node_def.name()); - } - } - - TRT_ShapedWeights dummy_power_weights(parameter_type); - size_t nweight = 0; - for (int i = 1; i < 5; i++) { - nweight = std::max(nweight, (size_t)inputs.at(i).weights().count()); - } - TRT_ShapedWeights* ptr_shape_weights = nullptr; - for (int i = 1; i < 5; i++) { - if (inputs.at(i).weights().count() == nweight) { - ptr_shape_weights = - const_cast(&(inputs.at(i).weights())); - } else if (inputs.at(i).weights().count() != 1) { - return tensorflow::errors::InvalidArgument( - "Inconsistent batchnorm parameter count, at: " + node_def.name()); - } - } - // We could technically have two weights with different shape. - // that requires two addScale op, arguably less performant + TRT_ShapedWeights scale_weights = inputs.at(1).weights(); + TRT_ShapedWeights offset_weights = inputs.at(2).weights(); + TRT_ShapedWeights mean_weights = inputs.at(3).weights(); + TRT_ShapedWeights variance_weights = inputs.at(4).weights(); + TRT_ShapedWeights dummy_power_weights(scale_weights.type_); TRT_ShapedWeights combined_scale_weights = - ctx.get_temp_weights_like(*ptr_shape_weights); + ctx.get_temp_weights_like(scale_weights); TRT_ShapedWeights combined_offset_weights = - ctx.get_temp_weights_like(*ptr_shape_weights); - - const Eigen::half* cast_vals_array[4]; - const float* vals_array[4]; - for (int j = 0; j < 4; j++) { - cast_vals_array[j] = - static_cast(inputs.at(j + 1).weights().GetValues()); - vals_array[j] = - static_cast(inputs.at(j + 1).weights().GetValues()); - } - Eigen::half* cast_combined_scale_vals = const_cast( - static_cast(combined_scale_weights.GetValues())); - Eigen::half* cast_combined_offset_vals = const_cast( - static_cast(combined_offset_weights.GetValues())); - float* combined_scale_vals = const_cast( - static_cast(combined_scale_weights.GetValues())); - float* combined_offset_vals = const_cast( - static_cast(combined_offset_weights.GetValues())); - - for (size_t i = 0; i < nweight; ++i) { - float batchnorm_data[4]; - for (int j = 0; j < 4; j++) { - if (inputs.at(j + 1).weights().count() != 1) { - if (parameter_type == tensorflow::DT_FLOAT) { - batchnorm_data[j] = vals_array[j][i]; - } else if (parameter_type == tensorflow::DT_HALF) { - batchnorm_data[j] = - Eigen::half_impl::half_to_float(cast_vals_array[j][i]); - } - } else { - if (parameter_type == tensorflow::DT_FLOAT) { - batchnorm_data[j] = vals_array[j][0]; - } else if (parameter_type == tensorflow::DT_HALF) { - batchnorm_data[j] = - Eigen::half_impl::half_to_float(cast_vals_array[j][0]); - } - } + ctx.get_temp_weights_like(offset_weights); + size_t nweight = scale_weights.count(); + if ((scale_weights.type_ == offset_weights.type_) && + (mean_weights.type_ == variance_weights.type_) && + (scale_weights.type_ == variance_weights.type_)) { + if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && + (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { + return tensorflow::errors::Unimplemented( + "only float32 or float16 weight data type is supported, for node " + + node_def.name() + " got " + + tensorflow::DataTypeString(scale_weights.type_)); } - float scale = batchnorm_data[0]; - float offset = batchnorm_data[1]; - float mean = batchnorm_data[2]; - float variance = batchnorm_data[3]; - float combined_scale_val = scale / sqrtf(variance + epsilon); - float combined_offset_val = offset - mean * combined_scale_val; - if (parameter_type == tensorflow::DT_FLOAT) { - combined_scale_vals[i] = combined_scale_val; - combined_offset_vals[i] = combined_offset_val; - } else if (parameter_type == tensorflow::DT_HALF) { - cast_combined_scale_vals[i] = Eigen::half(combined_scale_val); - cast_combined_offset_vals[i] = Eigen::half(combined_offset_val); + if (scale_weights.type_ == tensorflow::DT_FLOAT) { + for (size_t i = 0; i < nweight; ++i) { + float scale = (static_cast(scale_weights.GetValues()))[i]; + float offset = + (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = + (static_cast(variance_weights.GetValues()))[i]; + float& combined_scale_ref = const_cast( + static_cast(combined_scale_weights.GetValues()))[i]; + float& combined_offset_ref = const_cast( + static_cast(combined_offset_weights.GetValues()))[i]; + combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_offset_ref = offset - mean * combined_scale_ref; + } + } else { + const Eigen::half* scale_vals = + (static_cast(scale_weights.GetValues())); + const Eigen::half* off_vals = + (static_cast(offset_weights.GetValues())); + const Eigen::half* mean_vals = + (static_cast(mean_weights.GetValues())); + const Eigen::half* variance_vals = + (static_cast(variance_weights.GetValues())); + Eigen::half* comb_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + Eigen::half* comb_off_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + for (size_t i = 0; i < nweight; ++i) { + float scale(scale_vals[i]); + float offset(off_vals[i]); + float mean(mean_vals[i]); + float variance(variance_vals[i]); + float combined_scale_ref = scale / sqrtf(variance + epsilon); + comb_scale_vals[i] = Eigen::half(combined_scale_ref); + float combined_offset_ref = offset - mean * combined_scale_ref; + comb_off_vals[i] = Eigen::half(combined_offset_ref); + } } } - - nvinfer1::ScaleMode mode = nweight == 1 ? nvinfer1::ScaleMode::kUNIFORM - : nvinfer1::ScaleMode::kCHANNEL; - nvinfer1::IScaleLayer* layer = - ctx.network()->addScale(*const_cast(tensor), mode, - combined_offset_weights.GetWeightsForTRT(), - combined_scale_weights.GetWeightsForTRT(), - dummy_power_weights.GetWeightsForTRT()); + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + combined_offset_weights.GetWeightsForTRT(), + combined_scale_weights.GetWeightsForTRT(), + dummy_power_weights.GetWeightsForTRT()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -2056,7 +2050,6 @@ void Converter::register_op_converters() { op_registry_["Const"] = ConvertConst; // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed - op_registry_["Snapshot"] = ConvertIdentity; // Snapshot should be removed // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -2150,11 +2143,8 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->thr_->join(); delete calib_res->thr_; if (!calib_res->engine_) { - LOG(ERROR) << "Calibration failed!, engine does not exist. Did you run " + LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " "calibration graph?"; - return tensorflow::errors::FailedPrecondition( - "Calibration graph needs to be executed on" - " calibration data before convertsion to inference graph"); } auto weight_rmgr = trt_rm->getManager("WeightStore"); TF_CHECK_OK(weight_rmgr->Delete( @@ -2191,7 +2181,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( return status; } auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_RETURN_IF_ERROR(status); + TF_CHECK_OK(status); for (size_t i = 0; i < out_edges.size(); i++) { VLOG(1) << "Connecting trt_engine_node output " << i << " with " << out_edges.at(i)->dst()->name() << " port " @@ -2289,12 +2279,6 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - auto type_status = ConvertDType(tf_dtype, &dtype); - if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Data type conversion for input '" << node_name - << "' failed"; - return type_status; - } TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); VLOG(2) << "accessing output index of: " << output_idx @@ -2362,8 +2346,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node'" + tensor_name + - "' is weights not tensor"); + return tensorflow::errors::InvalidArgument( + "Output node is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { @@ -2520,11 +2504,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - auto type_status = ConvertDType(tf_dtype, &dtype); - if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Type conversion failed for " << node_name; - return type_status; - } + TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name @@ -2535,12 +2515,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(jie): TRT 3.x only support 4 dimensional input tensor. // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) { - string err_str = "Require 4 dimensional input."; - StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", - shape_inference_node_name); - return tensorflow::errors::Unimplemented(err_str); - } + if (op_info.shape().dim_size() != 4) + return tensorflow::errors::Unimplemented("require 4 dimensional input"); for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i @@ -2601,8 +2577,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + - "' is weights not tensor"); + return tensorflow::errors::InvalidArgument( + "Output node is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { @@ -2646,8 +2622,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } TF_RETURN_IF_ERROR(weight_rmgr->Delete( engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name << " containing " - << s.subgraph_node_ids.size() << " nodes"; + LOG(INFO) << "finished engine " << engine_name; // Build the TRT op tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 8fc4697c51..6193f0b0a1 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -80,20 +80,13 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, std::vector in_edges(dst->in_edges().begin(), dst->in_edges().end()); for (const tensorflow::Edge* in_edge : in_edges) { - if (in_edge->IsControlEdge()) { - if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); - graph->AddControlEdge(e->src(), src); - } - } else { - if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); - if (e->src() == graph->source_node()) { - graph->AddEdge(e->src(), e->src_output(), src, - tensorflow::Graph::kControlSlot); - } else { - graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */); - } + if (in_edge->src() != src) { + tensorflow::Edge* e = const_cast(in_edge); + if (e->src() == graph->source_node()) { + graph->AddEdge(e->src(), e->src_output(), src, + tensorflow::Graph::kControlSlot); + } else { + graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */); } } } @@ -101,19 +94,12 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, std::vector out_edges(dst->out_edges().begin(), dst->out_edges().end()); for (const tensorflow::Edge* out_edge : out_edges) { - if (out_edge->IsControlEdge()) { - tensorflow::Edge* e = const_cast(out_edge); - graph->AddControlEdge(src, e->dst()); + tensorflow::Edge* e = const_cast(out_edge); + if (e->dst() == graph->sink_node()) { + graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(), + e->dst_input()); } else { - tensorflow::Edge* e = const_cast(out_edge); - if (e->dst() == graph->sink_node()) { - VLOG(1) << " edge to sink node " << src->name() << " -> " - << e->dst()->name(); - graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(), - e->dst_input()); - } else { - graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input()); - } + graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input()); } } @@ -132,7 +118,7 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, - const std::function& candidate_fn, + const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments) { // Create a Graph representation of the GraphDef. tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), @@ -150,7 +136,7 @@ tensorflow::Status SegmentGraph( for (int i = 0; i < graph.num_node_ids(); ++i) { tensorflow::Node* node = graph.FindNodeId(i); if (options.exclude_node_list.count(node->name()) != 0 || - !candidate_fn(node)) { + !candidate_fn(node->def())) { node = nullptr; } node_segments.emplace_back(node); @@ -169,7 +155,7 @@ tensorflow::Status SegmentGraph( for (const tensorflow::Node* node : order) { // All output nodes of 'node' have been visited... - VLOG(2) << "Trying node " << node->name() << " id=" << node->id(); + VLOG(2) << "Trying node " << node->name(); // 'node' must be a TRT candidate... if (node_segments[node->id()].Value() == nullptr) { @@ -183,12 +169,8 @@ tensorflow::Status SegmentGraph( while (true) { std::set contract_edges; for (const tensorflow::Edge* out_edge : node->out_edges()) { - VLOG(2) << "... out node " << out_edge->dst()->name() << " ( " - << out_edge->dst()->id() << " <- " << node->id() << " )"; - if (out_edge->IsControlEdge()) { - VLOG(2) << "... ... Control Edge, Skipping"; - continue; - } + VLOG(2) << "... out node " << out_edge->dst()->name(); + // Out node must be TRT candidate... if (node_segments[out_edge->dst()->id()].Value() == nullptr) { VLOG(2) << "... ... not a TRT candidate"; @@ -214,8 +196,7 @@ tensorflow::Status SegmentGraph( const tensorflow::Node* src = contract_edge->src(); const tensorflow::Node* dst = contract_edge->dst(); - VLOG(2) << "Merge " << src->name() << " <- " << dst->name() << " (" - << src->id() << " <- " << dst->id(); + VLOG(2) << "Merge " << src->name() << " <- " << dst->name(); node_segments[src->id()].Merge(&node_segments[dst->id()]); // Contracting the edge leaves disconnected graph edges. diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h index 7e8685f44a..ee6e2b3ed2 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.h +++ b/tensorflow/contrib/tensorrt/segment/segment.h @@ -20,12 +20,10 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" -#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { - namespace tensorrt { namespace segment { @@ -48,7 +46,7 @@ struct SegmentOptions { // @return the status. tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, - const std::function& candidate_fn, + const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments); } // namespace segment diff --git a/tensorflow/contrib/tensorrt/segment/segment_test.cc b/tensorflow/contrib/tensorrt/segment/segment_test.cc index 7ddabec268..74cbc5f2b3 100644 --- a/tensorflow/contrib/tensorrt/segment/segment_test.cc +++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc @@ -35,7 +35,7 @@ class SegmentTest : public ::testing::Test { TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name); - std::function MakeCandidateFn( + std::function MakeCandidateFn( const std::set& node_names); protected: @@ -60,10 +60,10 @@ bool SegmentTest::GetGraphDef(TF_Graph* graph, return ret; } -std::function SegmentTest::MakeCandidateFn( +std::function SegmentTest::MakeCandidateFn( const std::set& node_names) { - return [node_names](const Node* node) -> bool { - return node_names.find(node->name()) != node_names.end(); + return [node_names](const NodeDef& node) -> bool { + return node_names.find(node.name()) != node_names.end(); }; } diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py index 4f6527a546..ff140efd48 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py @@ -70,7 +70,7 @@ class ARModel(model.TimeSeriesModel): input_window_size: Number of past time steps of data to look at when doing the regression. output_window_size: Number of future time steps to predict. Note that - setting it to > 1 empirically seems to give a better fit. + setting it to > 1 empiricaly seems to give a better fit. num_features: number of input features per time step. num_time_buckets: Number of buckets into which to divide (time % periodicity) for generating time based features. diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 26793c80bf..23452a81c3 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -185,7 +185,7 @@ def batch_matrix_pow(matrices, powers): { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p power(A, 0) = I - The power(A, 0) = I case is handled by starting with accumulator set to the + The power(A, 0) = I case is handeled by starting with accumulator set to the identity matrix; matrices with zero residual powers are passed through unchanged. diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py index 6746dd7b43..1afc58cfb2 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/varma.py @@ -107,7 +107,7 @@ class VARMA(state_space_model.StateSpaceModel): Returns: the state transition matrix. It has shape - [self.state_dimension, self.state_dimension]. + [self.state_dimendion, self.state_dimension]. """ # Pad any unused AR blocks with zeros. The extra state is necessary if # ma_order >= ar_order. @@ -127,7 +127,7 @@ class VARMA(state_space_model.StateSpaceModel): Returns: the state noise transform matrix. It has shape - [self.state_dimension, self.num_features]. + [self.state_dimendion, self.num_features]. """ # Noise is broadcast, through the moving average coefficients, to # un-observed parts of the latent state. diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt index e667c328ae..51d91399f8 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixSolveLs.pbtxt @@ -49,14 +49,14 @@ in the batch: If `fast` is `True`, then the solution is computed by solving the normal equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). -If \\(m \lt n\\) then `output` is computed as +problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + +\lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the minimum-norm solution to the under-determined linear system, i.e. \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), subject to \\(A Z = B\\). Notice that the fast path is only numerically stable when \\(A\\) is numerically full rank and has a condition number -\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is +\\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is sufficiently large. If `fast` is `False` an algorithm based on the numerically robust complete diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc index 829c19204a..43a909466e 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc @@ -19,6 +19,9 @@ limitations under the License. namespace tensorflow { +constexpr const char* MklCPUAllocator::kMaxLimitStr; +constexpr const size_t MklCPUAllocator::kDefaultMaxLimit; + } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 2fb17c2b02..623248b6ce 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -1210,7 +1210,7 @@ Status ConcatV2Shape(InferenceContext* c) { c->num_inputs() - 1 /* dim_index */); } -Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index) { +Status BroadcastBinaryOpShapeFn(InferenceContext* c) { ShapeHandle shape_x = c->input(0); ShapeHandle shape_y = c->input(1); if (!c->RankKnown(shape_x) || !c->RankKnown(shape_y)) { @@ -1272,7 +1272,7 @@ Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index) { } } - c->set_output(output_index, c->MakeShape(dims)); + c->set_output(0, c->MakeShape(dims)); return Status::OK(); } diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h index 789746b403..293c40e04d 100644 --- a/tensorflow/core/framework/common_shape_fns.h +++ b/tensorflow/core/framework/common_shape_fns.h @@ -265,15 +265,9 @@ Status ConcatShape(shape_inference::InferenceContext* c, // Shape function for concat operations. Status ConcatV2Shape(shape_inference::InferenceContext* c); -// Shape function for binary operators that broadcast their inputs -// and with output to output_index. -Status BroadcastBinaryOpOutputShapeFn(InferenceContext* c, int output_index); - // Shape function for binary operators that broadcast their inputs. // Tested by ops/math_ops_test.cc. -inline Status BroadcastBinaryOpShapeFn(InferenceContext* c) { - return BroadcastBinaryOpOutputShapeFn(c, 0); -} +Status BroadcastBinaryOpShapeFn(InferenceContext* c); // Shape function for random operations. Status RandomShape(shape_inference::InferenceContext* c); diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index accc587000..e3cc848a16 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -317,7 +317,6 @@ class InferenceContext { input_tensors_as_shapes_ = input_tensors_as_shapes; } - ShapeHandle output(int64 idx) const { return outputs_[idx]; } void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; } Status set_output(StringPiece output_name, const std::vector& shapes); diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 62aafa7930..333a6570dc 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -933,7 +933,7 @@ class MklFusedBatchNormOp : public OpKernel { bool is_training_; T* mean_values_; T* variance_values_; - int depth_; // batch normalization is done for per channel. + size_t depth_; // batch normalization is done for per channel. void ExtractParams(OpKernelContext* context) { const Tensor& input = MklGetInput(context, 0); diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index 7badc00572..4abfbfb1a6 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -23,13 +23,6 @@ limitations under the License. // non-GPU targets. This only breaks in clang, because it's more strict for // template code and CudaAtomicMax is used in template context. -// This file requires the following include because it uses CudaAtomicMax: -// #include "tensorflow/core/util/cuda_kernel_helper.h" - -// Unfortunately we can't add the #include, since it breaks compilation for -// non-GPU targets. This only breaks in clang, because it's more strict for -// template code and CudaAtomicMax is used in template context. - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc index fe04dcf72e..50157d5d48 100644 --- a/tensorflow/core/kernels/snapshot_op.cc +++ b/tensorflow/core/kernels/snapshot_op.cc @@ -22,26 +22,6 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; -typedef Eigen::GpuDevice GPUDevice; - -template -class SnapshotOp : public OpKernel { - public: - explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override { - const Tensor& input = context->input(0); - Tensor* output = nullptr; - // Try to use buffer forwarding to avoid an explicit copy. - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 0, input.shape(), &output)); - if (!output->SharesBufferWith(input)) { - functor::Snapshot functor; - functor(context->eigen_device(), input.flat(), - output->flat()); - } - } -}; #define REGISTER_KERNEL(TYPE) \ REGISTER_KERNEL_BUILDER( \ @@ -51,16 +31,6 @@ class SnapshotOp : public OpKernel { TF_CALL_POD_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL -#if GOOGLE_CUDA -#define REGISTER_KERNEL(TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Snapshot").Device(DEVICE_GPU).TypeConstraint("T"), \ - SnapshotOp); - -TF_CALL_POD_TYPES(REGISTER_KERNEL); -#undef REGISTER_KERNEL -#endif - #if TENSORFLOW_USE_SYCL typedef Eigen::SyclDevice SyclDevice; #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h index a18065d42b..b94834f159 100644 --- a/tensorflow/core/kernels/snapshot_op.h +++ b/tensorflow/core/kernels/snapshot_op.h @@ -26,19 +26,29 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { -namespace functor { -// Functor used by SnapshotOp. template -struct Snapshot { - void operator()(const Device& device, - typename TTypes::ConstTensor input, - typename TTypes::Tensor output) { - device.memcpy(output.data(), input.data(), input.size() * sizeof(Scalar)); +class SnapshotOp : public OpKernel { + public: + explicit SnapshotOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + Tensor* output = nullptr; + // Try to use buffer forwarding to avoid an explicit copy. + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &output)); + if (!output->SharesBufferWith(input)) { + // We had to allocate a new buffer since the refcount on the input was + // greater than 1. Copy the input to the new buffer. + const Device& device = context->eigen_device(); + device.memcpy(output->template flat().data(), + input.template flat().data(), + input.NumElements() * sizeof(Scalar)); + } } }; -} // namespace functor } // namespace tensorflow #endif // TENSORFLOW_KERNELS_SNAPSHOT_OP_H_ diff --git a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc index e4e3bd5220..52070be838 100644 --- a/tensorflow/core/kernels/snapshot_op_gpu.cu.cc +++ b/tensorflow/core/kernels/snapshot_op_gpu.cu.cc @@ -24,10 +24,13 @@ limitations under the License. namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; -// Definition of the GPU implementations declared in softsign_op.cc. -#define DEFINE_GPU_KERNELS(T) template struct functor::Snapshot; +#define REGISTER_KERNEL(TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Snapshot").Device(DEVICE_GPU).TypeConstraint("T"), \ + SnapshotOp); -TF_CALL_POD_TYPES(DEFINE_GPU_KERNELS); +TF_CALL_POD_TYPES(REGISTER_KERNEL); +#undef REGISTER_KERNEL } // namespace tensorflow diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc index 9a3612bd72..a6a71fdfaf 100644 --- a/tensorflow/core/kernels/xent_op.cc +++ b/tensorflow/core/kernels/xent_op.cc @@ -17,14 +17,12 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/kernels/xent_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/xent_op.h" -#include "tensorflow/core/util/bcast.h" namespace tensorflow { @@ -43,56 +41,37 @@ class SoftmaxXentWithLogitsOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& logits_in = context->input(0); const Tensor& labels_in = context->input(1); - - TensorShape shape_in = logits_in.shape(); - - BCast bcast(BCast::FromShape(logits_in.shape()), - BCast::FromShape(labels_in.shape())); - if (!logits_in.IsSameSize(labels_in)) { - OP_REQUIRES(context, bcast.IsValid(), - errors::InvalidArgument( - "logits and labels must be broadcastable: logits_size=", - logits_in.shape().DebugString(), - " labels_size=", labels_in.shape().DebugString())); - shape_in = BCast::ToShape(bcast.output_shape()); - } - OP_REQUIRES(context, TensorShapeUtils::IsMatrix(shape_in), - errors::InvalidArgument("logits and labels must be beither " - "2-dimensional, or roadcasted to " - "2-dimensional")); + OP_REQUIRES(context, logits_in.IsSameSize(labels_in), + errors::InvalidArgument( + "logits and labels must be same size: logits_size=", + logits_in.shape().DebugString(), + " labels_size=", labels_in.shape().DebugString())); + OP_REQUIRES(context, TensorShapeUtils::IsMatrix(logits_in.shape()), + errors::InvalidArgument("logits must be 2-dimensional")); + // As we already tested that both inputs have the same shape no need to + // check that "labels" is a matrix too. // loss is 1-D (one per example), and size is batch_size. Tensor scratch; OP_REQUIRES_OK( context, context->allocate_temp(DataTypeToEnum::value, - TensorShape({shape_in.dim_size(0), 1}), + TensorShape({logits_in.dim_size(0), 1}), &scratch)); Tensor* loss_out = nullptr; OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({shape_in.dim_size(0)}), &loss_out)); + 0, TensorShape({logits_in.dim_size(0)}), &loss_out)); Tensor* back_out = nullptr; // Try to reuse the logits_in buffer for the backprop output. OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 1, shape_in, &back_out)); - if (shape_in.dim_size(0) > 0) { + {0}, 1, logits_in.shape(), &back_out)); + if (logits_in.dim_size(0) > 0) { functor::XentFunctor functor; - if (logits_in.IsSameSize(labels_in)) { - functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), - Eigen::array{1, 1}, - Eigen::array{1, 1}, logits_in.matrix(), - labels_in.matrix(), scratch.matrix(), loss_out->vec(), - back_out->matrix()); - } else { - functor(context->eigen_device(), shape_in.AsEigenDSizes<2>(), - BCast::ToIndexArray<2>(bcast.x_bcast()), - BCast::ToIndexArray<2>(bcast.y_bcast()), - logits_in.template shaped(bcast.x_reshape()), - labels_in.template shaped(bcast.y_reshape()), - scratch.matrix(), loss_out->vec(), back_out->matrix()); - } + functor(context->eigen_device(), logits_in.matrix(), + labels_in.matrix(), scratch.matrix(), loss_out->vec(), + back_out->matrix()); } } }; @@ -102,17 +81,13 @@ class SoftmaxXentWithLogitsOp : public OpKernel { namespace functor { template struct XentFunctorBase { - void operator()(const Device& d, - const Eigen::DSizes& shape, - const Eigen::array& logits_bcast, - const Eigen::array& labels_bcast, - typename TTypes::ConstMatrix logits, + void operator()(const Device& d, typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, typename TTypes::Matrix backprop) { - XentEigenImpl::Compute(d, shape, logits_bcast, labels_bcast, - logits, labels, scratch, loss, backprop); + XentEigenImpl::Compute(d, logits, labels, scratch, loss, + backprop); } }; diff --git a/tensorflow/core/kernels/xent_op.h b/tensorflow/core/kernels/xent_op.h index 87be17fca9..e689fca7ff 100644 --- a/tensorflow/core/kernels/xent_op.h +++ b/tensorflow/core/kernels/xent_op.h @@ -18,7 +18,6 @@ limitations under the License. // Functor definition for XentOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { @@ -34,11 +33,7 @@ struct XentFunctor { // scratch: temporary tensor, dims: batch_size, 1 // loss: output tensor for the loss, dims: batch_size. // backprop: output tensor for the backprop, dims: batch_size, num_classes. - void operator()(const Device &d, - const Eigen::DSizes &shape, - const Eigen::array &logits_bcast, - const Eigen::array &labels_bcast, - typename TTypes::ConstMatrix logits, + void operator()(const Device& d, typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, @@ -50,11 +45,7 @@ struct XentFunctor { // specializations for both device types. template struct XentEigenImpl { - static void Compute(const Device &d, - const Eigen::DSizes &shape, - const Eigen::array &logits_bcast, - const Eigen::array &labels_bcast, - typename TTypes::ConstMatrix logits, + static void Compute(const Device& d, typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, @@ -66,8 +57,8 @@ struct XentEigenImpl { const int kBatchDim = 0; const int kClassDim = 1; - const int batch_size = shape[kBatchDim]; - const int num_classes = shape[kClassDim]; + const int batch_size = logits.dimension(kBatchDim); + const int num_classes = logits.dimension(kClassDim); // These arrays are used to reduce along the class dimension, and broadcast // the resulting value to all classes. @@ -93,12 +84,10 @@ struct XentEigenImpl { #endif // max_logits along classes. - scratch.reshape(batch_only).device(d) = - logits.broadcast(logits_bcast).maximum(along_class); + scratch.reshape(batch_only).device(d) = logits.maximum(along_class); // logits - max_logits. - backprop.device(d) = - logits.broadcast(logits_bcast) - scratch.broadcast(one_by_class); + backprop.device(d) = logits - scratch.broadcast(one_by_class); // sum(exp(logits - max_logits)) along classes. scratch.reshape(batch_only).device(d) = backprop.exp().sum(along_class); @@ -110,15 +99,15 @@ struct XentEigenImpl { // sum(-labels * // ((logits - max_logits) - log(sum(exp(logits - max_logits))))) // along classes - loss.device(d) = (labels.broadcast(labels_bcast) * - (scratch.log().eval().broadcast(one_by_class) - backprop)) - .eval() - .sum(along_class); + loss.device(d) = + (labels * (scratch.log().eval().broadcast(one_by_class) - backprop)) + .eval() + .sum(along_class); // backprop: prob - labels, where // prob = exp(logits - max_logits) / sum(exp(logits - max_logits)) - backprop.device(d) = (backprop.exp() / scratch.broadcast(one_by_class)) - - labels.broadcast(labels_bcast); + backprop.device(d) = + (backprop.exp() / scratch.broadcast(one_by_class)) - labels; } }; diff --git a/tensorflow/core/kernels/xent_op_gpu.cu.cc b/tensorflow/core/kernels/xent_op_gpu.cu.cc index 2c0c0b3a02..05ee7da490 100644 --- a/tensorflow/core/kernels/xent_op_gpu.cu.cc +++ b/tensorflow/core/kernels/xent_op_gpu.cu.cc @@ -31,17 +31,12 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { template struct XentFunctor { - void operator()(const GPUDevice &d, - const Eigen::DSizes &shape, - const Eigen::array &logits_bcast, - const Eigen::array &labels_bcast, - typename TTypes::ConstMatrix logits, + void operator()(const GPUDevice& d, typename TTypes::ConstMatrix logits, typename TTypes::ConstMatrix labels, typename TTypes::Matrix scratch, typename TTypes::Vec loss, typename TTypes::Matrix backprop) { - XentEigenImpl::Compute(d, shape, logits_bcast, labels_bcast, - logits, labels, scratch, loss, + XentEigenImpl::Compute(d, logits, labels, scratch, loss, backprop); } }; diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 111670c361..88d2aa3f41 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -794,35 +794,11 @@ REGISTER_OP("ReverseV2") ShapeHandle input = c->input(0); ShapeHandle axis; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &axis)); + // TODO(aselle): if input(0)'s dimension is known we could validate axis if (c->Rank(input) > 8) { return errors::InvalidArgument( "reverse does not work on tensors with more than 8 dimensions"); } - const Tensor* axis_tensor = c->input_tensor(1); - if (axis_tensor != nullptr && c->RankKnown(input)) { - int32 rank = c->Rank(input); - std::vector axis_value; - if (axis_tensor->dtype() == DT_INT32) { - axis_value = AsInt64(axis_tensor, axis_tensor->NumElements()); - } else { - axis_value = AsInt64(axis_tensor, axis_tensor->NumElements()); - } - std::vector axes_dense(c->Rank(input), false); - for (int i = 0; i < axis_value.size(); i++) { - int64 canonical_axis = - axis_value[i] < 0 ? rank + axis_value[i] : axis_value[i]; - if (canonical_axis < 0 || canonical_axis >= rank) { - return errors::InvalidArgument("'axis'[", i, "] = ", axis_value[i], - " is out of valid range [", 0, ", ", - rank - 1); - } - if (axes_dense[canonical_axis]) { - return errors::InvalidArgument("axis ", canonical_axis, - " specified more than once."); - } - axes_dense[canonical_axis] = true; - } - } c->set_output(0, input); return Status::OK(); }); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 6c2fc60bab..1f4e9753c3 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1062,27 +1062,12 @@ REGISTER_OP("SoftmaxCrossEntropyWithLogits") .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle input; - if (c->WithRank(c->input(0), 2, &input) == Status::OK() && - c->Merge(input, c->input(1), &input) == Status::OK()) { - DimensionHandle batch_size = c->Dim(input, 0); - c->set_output(0, c->Vector(batch_size)); - c->set_output(1, input); - return Status::OK(); - } - TF_RETURN_IF_ERROR(BroadcastBinaryOpOutputShapeFn(c, 1)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); + TF_RETURN_IF_ERROR(c->Merge(input, c->input(1), &input)); - if (!c->RankKnown(c->output(1))) { - return errors::InvalidArgument( - "Shape must be broadcasted with rank 2, but is rank is unknown."); - } - - if (c->Rank(c->output(1)) != 2) { - return errors::InvalidArgument( - "Shape must be broadcasted with rank 2, but is rank ", - c->Rank(c->output(1))); - } - DimensionHandle batch_size = c->Dim(c->output(1), 0); + DimensionHandle batch_size = c->Dim(input, 0); c->set_output(0, c->Vector(batch_size)); + c->set_output(1, input); return Status::OK(); }); diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc index 289b953055..1b17a7cda6 100644 --- a/tensorflow/core/ops/nn_ops_test.cc +++ b/tensorflow/core/ops/nn_ops_test.cc @@ -410,18 +410,10 @@ TEST(NNOpsTest, SoftmaxCrossEntropyWithLogits_ShapeFn) { INFER_OK(op, "[1,?];[?,2]", "[d0_0];[d0_0,d0_1|d1_1]"); INFER_OK(op, "[?,2];[1,2]", "[d1_0];in1"); - INFER_ERROR("Shape must be broadcasted with rank 2", op, "[1,2,3];?"); - INFER_ERROR("Shape must be broadcasted with rank 2", op, "?;[1,2,3]"); - - // Broadcast example - // [1,4] and [2,4] are broadcasted to [2,4] - INFER_OK(op, "[1,4];[2,4]", "[d1_0];[d1_0,d0_1|d1_1]"); - // [2,4] and [2,1] are broadcasted to [2,4] - INFER_OK(op, "[2,4];[2,1]", "[d0_0];[d0_0|d1_0,d0_1]"); - // [1,?] and [2,4] are broadcasted to [2,4] - INFER_OK(op, "[1,?];[2,4]", "[d1_0];[d1_0,d0_1|d1_1]"); - // [2,4] and [?,1] are broadcasted to [2,4] - INFER_OK(op, "[2,4];[?,1]", "[d0_0];[d0_0|d1_0,d0_1]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1,?];[2,?]"); + INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3];?"); + INFER_ERROR("Shapes must be equal rank, but are 2 and 3", op, "?;[1,2,3]"); } TEST(NNOpsTest, SparseSoftmaxCrossEntropyWithLogits_ShapeFn) { diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 40eebd1db0..22f2c02b78 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 7 +#define TF_MINOR_VERSION 6 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md index f3db5857ae..956dccb64f 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md +++ b/tensorflow/docs_src/api_guides/python/contrib.bayesflow.monte_carlo.md @@ -6,42 +6,42 @@ Monte Carlo integration and helpers. ## Background Monte Carlo integration refers to the practice of estimating an expectation with -a sample mean. For example, given random variable `Z in \\(R^k\\)` with density `p`, +a sample mean. For example, given random variable `Z in R^k` with density `p`, the expectation of function `f` can be approximated like: ``` -$$E_p[f(Z)] = \int f(z) p(z) dz$$ -$$ ~ S_n - := n^{-1} \sum_{i=1}^n f(z_i), z_i\ iid\ samples\ from\ p.$$ +E_p[f(Z)] = \int f(z) p(z) dz + ~ S_n + := n^{-1} \sum_{i=1}^n f(z_i), z_i iid samples from p. ``` -If `\\(E_p[|f(Z)|] < infinity\\)`, then `\\(S_n\\) --> \\(E_p[f(Z)]\\)` by the strong law of large -numbers. If `\\(E_p[f(Z)^2] < infinity\\)`, then `\\(S_n\\)` is asymptotically normal with -variance `\\(Var[f(Z)] / n\\)`. +If `E_p[|f(Z)|] < infinity`, then `S_n --> E_p[f(Z)]` by the strong law of large +numbers. If `E_p[f(Z)^2] < infinity`, then `S_n` is asymptotically normal with +variance `Var[f(Z)] / n`. Practitioners of Bayesian statistics often find themselves wanting to estimate -`\\(E_p[f(Z)]\\)` when the distribution `p` is known only up to a constant. For +`E_p[f(Z)]` when the distribution `p` is known only up to a constant. For example, the joint distribution `p(z, x)` may be known, but the evidence -`\\(p(x) = \int p(z, x) dz\\)` may be intractable. In that case, a parameterized -distribution family `\\(q_\lambda(z)\\)` may be chosen, and the optimal `\\(\lambda\\)` is the -one minimizing the KL divergence between `\\(q_\lambda(z)\\)` and -`\\(p(z | x)\\)`. We only know `p(z, x)`, but that is sufficient to find `\\(\lambda\\)`. +`p(x) = \int p(z, x) dz` may be intractable. In that case, a parameterized +distribution family `q_lambda(z)` may be chosen, and the optimal `lambda` is the +one minimizing the KL divergence between `q_lambda(z)` and +`p(z | x)`. We only know `p(z, x)`, but that is sufficient to find `lambda`. ## Log-space evaluation and subtracting the maximum Care must be taken when the random variable lives in a high dimensional space. -For example, the naive importance sample estimate `\\(E_q[f(Z) p(Z) / q(Z)]\\)` -involves the ratio of two terms `\\(p(Z) / q(Z)\\)`, each of which must have tails -dropping off faster than `\\(O(|z|^{-(k + 1)})\\)` in order to have finite integral. +For example, the naive importance sample estimate `E_q[f(Z) p(Z) / q(Z)]` +involves the ratio of two terms `p(Z) / q(Z)`, each of which must have tails +dropping off faster than `O(|z|^{-(k + 1)})` in order to have finite integral. This ratio would often be zero or infinity up to numerical precision. For that reason, we write ``` -$$Log E_q[ f(Z) p(Z) / q(Z) ]$$ -$$ = Log E_q[ \exp\{Log[f(Z)] + Log[p(Z)] - Log[q(Z)] - C\} ] + C,$$ where -$$C := Max[ Log[f(Z)] + Log[p(Z)] - Log[q(Z)] ].$$ +Log E_q[ f(Z) p(Z) / q(Z) ] + = Log E_q[ exp{Log[f(Z)] + Log[p(Z)] - Log[q(Z)] - C} ] + C, where +C := Max[ Log[f(Z)] + Log[p(Z)] - Log[q(Z)] ]. ``` The maximum value of the exponentiated term will be 0.0, and the expectation diff --git a/tensorflow/docs_src/api_guides/python/contrib.losses.md b/tensorflow/docs_src/api_guides/python/contrib.losses.md index 8b7442216c..d7f862625e 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.losses.md +++ b/tensorflow/docs_src/api_guides/python/contrib.losses.md @@ -107,19 +107,19 @@ weighted average over the individual prediction errors: loss = tf.contrib.losses.mean_squared_error(predictions, depths, weight) ``` -* @{tf.contrib.losses.absolute_difference} -* @{tf.contrib.losses.add_loss} -* @{tf.contrib.losses.hinge_loss} -* @{tf.contrib.losses.compute_weighted_loss} -* @{tf.contrib.losses.cosine_distance} -* @{tf.contrib.losses.get_losses} -* @{tf.contrib.losses.get_regularization_losses} -* @{tf.contrib.losses.get_total_loss} -* @{tf.contrib.losses.log_loss} -* @{tf.contrib.losses.mean_pairwise_squared_error} -* @{tf.contrib.losses.mean_squared_error} -* @{tf.contrib.losses.sigmoid_cross_entropy} -* @{tf.contrib.losses.softmax_cross_entropy} -* @{tf.contrib.losses.sparse_softmax_cross_entropy} +@{tf.contrib.losses.absolute_difference} +@{tf.contrib.losses.add_loss} +@{tf.contrib.losses.hinge_loss} +@{tf.contrib.losses.compute_weighted_loss} +@{tf.contrib.losses.cosine_distance} +@{tf.contrib.losses.get_losses} +@{tf.contrib.losses.get_regularization_losses} +@{tf.contrib.losses.get_total_loss} +@{tf.contrib.losses.log_loss} +@{tf.contrib.losses.mean_pairwise_squared_error} +@{tf.contrib.losses.mean_squared_error} +@{tf.contrib.losses.sigmoid_cross_entropy} +@{tf.contrib.losses.softmax_cross_entropy} +@{tf.contrib.losses.sparse_softmax_cross_entropy} diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 6f2107ef40..003e0a25ec 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -477,29 +477,31 @@ should use Markdown in the docstring. Here's a simple example: - def foo(x, y, name="bar"): - """Computes foo. +```python +def foo(x, y, name="bar"): + """Computes foo. - Given two 1-D tensors `x` and `y`, this operation computes the foo. + Given two 1-D tensors `x` and `y`, this operation computes the foo. - Example: + Example: - ``` - # x is [1, 1] - # y is [2, 2] - tf.foo(x, y) ==> [3, 3] - ``` - Args: - x: A `Tensor` of type `int32`. - y: A `Tensor` of type `int32`. - name: A name for the operation (optional). + ``` + # x is [1, 1] + # y is [2, 2] + tf.foo(x, y) ==> [3, 3] + ``` + Args: + x: A `Tensor` of type `int32`. + y: A `Tensor` of type `int32`. + name: A name for the operation (optional). - Returns: - A `Tensor` of type `int32` that is the foo of `x` and `y`. + Returns: + A `Tensor` of type `int32` that is the foo of `x` and `y`. - Raises: - ValueError: If `x` or `y` are not of type `int32`. - """ + Raises: + ValueError: If `x` or `y` are not of type `int32`. + """ +``` ## Description of the docstring sections diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 9059b3f3b6..0481c97885 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 2e47a6d212..8f89898c92 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.7.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index eff066d200..0ee9c849e1 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.7.0-rc1 + 1.6.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.7.0-rc1 + 1.6.0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.7.0-rc1 + 1.6.0 org.tensorflow libtensorflow_jni_gpu - 1.7.0-rc1 + 1.6.0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.7.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.7.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.7.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.7.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.7.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.7.0-rc1.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 27b696696d..5e9a84bff6 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -165,7 +165,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -270,7 +270,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -456,7 +456,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -630,14 +630,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -649,14 +649,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -668,14 +668,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
 
@@ -687,14 +687,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 7060ef43da..55b460e189 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -118,8 +118,8 @@ Take the following steps to install TensorFlow with Virtualenv: Python 2.7, the command to install TensorFlow in the active Virtualenv is as follows: -
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
+
 $ pip install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -241,8 +241,8 @@ take the following steps: you are installing TensorFlow for macOS and Python 2.7 issue the following command: -
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl 
+
 $ sudo pip install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl 
If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl @@ -524,7 +524,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
 
@@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.7.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 148f80efe2..a7f33819b4 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -350,10 +350,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.7.0rc1 on Linux: +for TensorFlow 1.6.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.7.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
 
## Validate your installation @@ -450,8 +450,6 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- - @@ -471,7 +469,6 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
- @@ -486,8 +483,6 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
- - diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index 778e4d3a62..ca9cb043e9 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -233,8 +233,6 @@ order by how long they took. From left to right, the columns are: - The cumulative total time of this and the previous ops in the table. This is handy for understanding what the distribution of work is across the layers, to see if just a few of the nodes are taking up most of the time. - -- The amount of memory consumed by outputs of this type of op. - Name of the node. diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 8b22c04d87..360ee302aa 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -60,7 +60,7 @@ and serialized as protocol buffers: the `NodeDef`, so if all the `Variable` weights are converted to `Const` nodes, then we only need a single `GraphDef` file to hold the model architecture and the weights. Freezing the graph handles the process of loading the - checkpoints, and then converts all Variables to Consts. You can then load the + checkpoints, and then converts all Consts to Variables. You can then load the resulting file in a single call, without having to restore variable values from checkpoints. One thing to watch out for with `GraphDef` files is that sometimes they’re stored in text format for easy inspection. These versions diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a0dd409205..4f61c01f65 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1065,7 +1065,7 @@ py_test( py_test( name = "framework_importer_test", - size = "large", + size = "medium", srcs = ["framework/importer_test.py"], main = "framework/importer_test.py", srcs_version = "PY2AND3", diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 64c1760d5e..d0ba8020c1 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -315,39 +315,21 @@ class ReverseV2Test(test_util.TensorFlowTestCase): self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1]) self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1]) - # This test covers the axis validation in the shape function - # (no eval()) - def testInvalidAxis(self): - x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) - with self.assertRaisesRegexp(ValueError, - "is out of valid range"): - array_ops.reverse_v2(x_np, [-30]) - with self.assertRaisesRegexp(ValueError, - "is out of valid range"): - array_ops.reverse_v2(x_np, [2]) - with self.assertRaisesRegexp(ValueError, - "axis 0 specified more than once"): - array_ops.reverse_v2(x_np, [0, -2]) - # This is the version of reverse that uses axis indices rather than # bool tensors # TODO(b/32254538): Change this test to use array_ops.reverse - # - # Note: this test passes placeholder as constant axis is validated - # in shape function (see testInvalidAxis) def testInvalid(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) - axis = array_ops.placeholder(dtypes.int32) with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]}) + array_ops.reverse_v2(x_np, [-30]).eval() with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [2]}) + array_ops.reverse_v2(x_np, [2]).eval() with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis 0 specified more than once"): - array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [0, -2]}) + array_ops.reverse_v2(x_np, [0, -2]).eval() def testReverse1DimAuto(self): for dtype in [ @@ -908,7 +890,7 @@ class StridedSliceAssignChecker(object): var = resource_variable_ops.ResourceVariable(self.x) else: var = variables.Variable(self.x) - sess.run(variables.variables_initializer([var])) + sess.run(variables.initialize_variables([var])) val = sess.run(var[index].assign(value)) # val_copy is used to check that tf.assign works equivalently to the # assign method above. diff --git a/tensorflow/python/kernel_tests/testdata/BUILD b/tensorflow/python/kernel_tests/testdata/BUILD index 45264c773a..a4a0dfc139 100644 --- a/tensorflow/python/kernel_tests/testdata/BUILD +++ b/tensorflow/python/kernel_tests/testdata/BUILD @@ -1,7 +1,7 @@ # Data files for kernel tests. package( - default_visibility = ["//visibility:public"], + default_visibility = ["//tensorflow:internal"], ) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index 60c726d54c..e3e120a4eb 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -18,16 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import itertools -import sys - import numpy as np -from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl @@ -94,7 +88,7 @@ class XentTest(test.TestCase): 4.]]]).astype(dtype) np_labels = np.array([[[0., 0., 0., 1.]], [[0., .5, .5, 0.]]]).astype(dtype) - self.assertRaisesRegexp(ValueError, "rank 2, but is rank 3", + self.assertRaisesRegexp(ValueError, "must be rank 2", gen_nn_ops.softmax_cross_entropy_with_logits, np_features, np_labels) @@ -134,24 +128,6 @@ class XentTest(test.TestCase): self.assertAllClose( np.array([1.3862, 1.9401]), np_loss, rtol=1.e-3, atol=1.e-3) - def testShapeBroadcast(self): - np_f = np.array([[1., 2., 3., 4.], - [1., 2., 3., 4.]]).astype(np.float32) - np_l = np.array([[0., 0., 0., 1.], - [0., .5, .5, 0.]]).astype(np.float32) - np_loss, np_backprop = self._npXent(np_f, np_l) - tf_f = constant_op.constant( - np.array([[1., 2., 3., 4.]]).astype(np.float32)) - tf_l = constant_op.constant( - np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float32)) - for use_gpu in [False, True]: - with self.test_session(use_gpu=use_gpu) as sess: - loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( - tf_f, tf_l) - tf_loss, tf_backprop = sess.run([loss, backprop]) - self.assertAllCloseAccordingToType(np_loss, tf_loss) - self.assertAllCloseAccordingToType(np_backprop, tf_backprop) - def testShapeMismatch(self): with self.test_session(): with self.assertRaises(ValueError): @@ -284,60 +260,5 @@ class XentTest(test.TestCase): self.assertAllEqual(np_loss, tf_loss) -class XentBenchmark(test.Benchmark): - - def benchmarkZeroDimension(self): - for (m, n, p, use_gpu) in itertools.product( - [128], - [10, 100, 1000, 10000, 100000], - [0.001, 0.01, 0.5, 0.99, 1.0], - [False]): - k = int(p * n) - if k == 0: - continue - name = "zero_dimension_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) - device = "/%s:0" % ("gpu" if use_gpu else "cpu") - with ops.Graph().as_default(): - with ops.device(device): - labels = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) - logits = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) - op = nn_ops.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - with session.Session() as sess: - r = self.run_op_benchmark(sess, op, min_iters=100, name=name) - gb_processed_input = m * n / 1.0e9 - throughput = gb_processed_input / r["wall_time"] - print("Benchmark: %s \t wall_time: %0.03g s \t " - "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) - sys.stdout.flush() - - def benchmarkSingleClass(self): - for (m, n, p, use_gpu) in itertools.product( - [128], - [10, 100, 1000, 10000, 100000], - [0.001, 0.01, 0.5, 0.99, 1.0], - [False]): - k = int(p * n) - if k == 0: - continue - name = "single_class_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) - device = "/%s:0" % ("gpu" if use_gpu else "cpu") - with ops.Graph().as_default(): - with ops.device(device): - labels = constant_op.constant([[1.], [-1.], [0.]], - dtype=dtypes.float32) - logits = constant_op.constant([[-1.], [0.], [1.]], - dtype=dtypes.float32) - op = nn_ops.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - with session.Session() as sess: - r = self.run_op_benchmark(sess, op, min_iters=100, name=name) - gb_processed_input = m * n / 1.0e9 - throughput = gb_processed_input / r["wall_time"] - print("Benchmark: %s \t wall_time: %0.03g s \t " - "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) - sys.stdout.flush() - - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 2d99b1688f..74e7c63fb3 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -180,8 +180,6 @@ class _Conv(base.Layer): # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() - if outputs_shape[0] is None: - outputs_shape[0] = -1 outputs_4d = array_ops.reshape(outputs, [outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py index cdb42f5bd1..160e732b67 100644 --- a/tensorflow/python/layers/convolutional_test.py +++ b/tensorflow/python/layers/convolutional_test.py @@ -325,12 +325,6 @@ class ConvTest(test.TestCase): self.assertEqual(conv3d.kernel_constraint, k_constraint) self.assertEqual(conv3d.bias_constraint, b_constraint) - def testConv3DChannelsFirst(self): - # Test case for GitHub issue 15655 - images = array_ops.placeholder( - dtype=dtypes.float32, shape=[None, 1, 32, 32, 32]) - conv_layers.conv3d(images, 32, 9, data_format='channels_first') - @test_util.with_c_api class SeparableConv1DTest(test.TestCase): diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 170861b43f..5b4fb4f7c8 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -429,7 +429,7 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): u, s, v_adj = np.linalg.svd(a, full_matrices=False) np_a_approx = np.dot(u, np.dot(np.diag(s), v_adj)) # tf_a_approx and np_a_approx should be numerically close. - ``` + ```` @end_compatibility """ s, u, v = gen_linalg_ops.svd( diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 4ce6f6d002..6c5c9e01a7 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -281,14 +281,13 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name scaffold=None, hooks=None, chief_only_hooks=None, - save_checkpoint_secs=USE_DEFAULT, + save_checkpoint_secs=600, save_summaries_steps=USE_DEFAULT, save_summaries_secs=USE_DEFAULT, config=None, stop_grace_period_secs=120, log_step_count_steps=100, - max_wait_secs=7200, - save_checkpoint_steps=USE_DEFAULT): + max_wait_secs=7200): """Creates a `MonitoredSession` for training. For a chief, this utility sets proper session initializer/restorer. It also @@ -311,10 +310,8 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if `is_chief==True`, ignore otherwise. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If both `save_checkpoint_steps` and - `save_checkpoint_secs` are set to `None`, then the default checkpoint - saver isn't used. If both are provided, then only `save_checkpoint_secs` - is used. Default 600. + using a default checkpoint saver. If `save_checkpoint_secs` is set to + `None`, then the default checkpoint saver isn't used. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then @@ -333,11 +330,6 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name become available. This should be kept relatively short to help detect incorrect code, but sometimes may need to be increased if the chief takes a while to start up. - save_checkpoint_steps: The frequency, in number of global steps, that a - checkpoint is saved using a default checkpoint saver. If both - `save_checkpoint_steps` and `save_checkpoint_secs` are set to `None`, then - the default checkpoint saver isn't used. If both are provided, then only - `save_checkpoint_secs` is used. Default not enabled. Returns: A `MonitoredSession` object. @@ -350,15 +342,6 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name elif save_summaries_steps == USE_DEFAULT: save_summaries_steps = None - if (save_checkpoint_steps == USE_DEFAULT and - save_checkpoint_secs == USE_DEFAULT): - save_checkpoint_steps = None - save_checkpoint_secs = 600 - elif save_checkpoint_secs == USE_DEFAULT: - save_checkpoint_secs = None - elif save_checkpoint_steps == USE_DEFAULT: - save_checkpoint_steps = None - scaffold = scaffold or Scaffold() if not is_chief: session_creator = WorkerSessionCreator( @@ -391,13 +374,9 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name save_steps=save_summaries_steps, save_secs=save_summaries_secs, output_dir=checkpoint_dir)) - if (save_checkpoint_secs and save_checkpoint_secs > 0) or ( - save_checkpoint_steps and save_checkpoint_steps > 0): + if save_checkpoint_secs and save_checkpoint_secs > 0: all_hooks.append(basic_session_run_hooks.CheckpointSaverHook( - checkpoint_dir, - save_steps=save_checkpoint_steps, - save_secs=save_checkpoint_secs, - scaffold=scaffold)) + checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold)) if hooks: all_hooks.extend(hooks) diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py index 3806056f01..159b2d5c16 100644 --- a/tensorflow/python/training/monitored_session_test.py +++ b/tensorflow/python/training/monitored_session_test.py @@ -282,42 +282,6 @@ class MonitoredTrainingSessionTest(test.TestCase): is_chief=True, checkpoint_dir=logdir) as session: self.assertEqual(2, session.run(gstep)) - def test_save_checkpoint_steps(self): - logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_steps') - with ops.Graph().as_default(): - gstep = variables_lib.get_or_create_global_step() - new_gstep = state_ops.assign_add(gstep, 1) - with monitored_session.MonitoredTrainingSession( - is_chief=True, - checkpoint_dir=logdir, - save_checkpoint_steps=100, - log_step_count_steps=10) as session: - for _ in range(100): - session.run(new_gstep) - # A restart will find the checkpoint and recover automatically. - with monitored_session.MonitoredTrainingSession( - is_chief=True, checkpoint_dir=logdir) as session: - self.assertEqual(100, session.run(gstep)) - - def test_save_checkpoint_secs(self): - logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_secs') - with ops.Graph().as_default(): - gstep = variables_lib.get_or_create_global_step() - new_gstep = state_ops.assign_add(gstep, 1) - with monitored_session.MonitoredTrainingSession( - is_chief=True, - checkpoint_dir=logdir, - save_checkpoint_secs=0.1, - log_step_count_steps=10) as session: - session.run(new_gstep) - time.sleep(0.2) - for _ in range(10): - session.run(new_gstep) - # A restart will find the checkpoint and recover automatically. - with monitored_session.MonitoredTrainingSession( - is_chief=True, checkpoint_dir=logdir) as session: - self.assertEqual(11, session.run(gstep)) - def test_summaries_steps(self): logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps') with ops.Graph().as_default(): diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index fcc57d506e..2d3cb415fe 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -22,7 +22,6 @@ load( load( "//third_party/mkl:build_defs.bzl", "if_mkl", - "if_mkl_lnx_x64" ) def register_extension_info(**kwargs): @@ -203,8 +202,7 @@ def tf_copts(android_optimization_level_override="-O2", is_external=False): "-ftemplate-depth=900"]) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) - + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) - + if_mkl_lnx_x64(["-fopenmp"]) + + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",]) + if_android_arm(["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + if_ios_x86_64(["-msse4.1"]) diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index bec72e1e60..c75ee474aa 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -238,7 +238,7 @@ tf_module { } member_method { name: "MonitoredTrainingSession" - argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\', \'save_checkpoint_steps\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\', \'\'], " + argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'600\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\'], " } member_method { name: "NewCheckpointReader" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index 7d471b4703..e1b56b9a25 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -31,5 +31,5 @@ export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" -export PATH="$PATH:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +export PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" build_libtensorflow_tarball "-cpu-darwin-$(uname -m)" diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 11f476d12c..22c73c3fe1 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -70,7 +70,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 037d13116e..3690e7dfe5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.7 +ARG TF_BRANCH=r1.6 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 1fcb6428b2..69ba340f92 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -79,7 +79,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 0ede8c6370..3fbdb5cacd 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -138,6 +138,7 @@ genrule( "@zlib_archive//:zlib.h", ] + if_mkl([ "//third_party/mkl:LICENSE", + "@mkl//:LICENSE", ]), outs = ["include/tensorflow/c/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", @@ -175,6 +176,7 @@ genrule( "@zlib_archive//:zlib.h", ] + if_mkl([ "//third_party/mkl:LICENSE", + "@mkl//:LICENSE", ]), outs = ["include/tensorflow/jni/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 62fec2c402..dd75eda231 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -127,6 +127,7 @@ filegroup( "@org_python_pypi_backports_weakref//:LICENSE", ] + if_mkl([ "//third_party/mkl:LICENSE", + "@mkl//:LICENSE", ]) + if_not_windows([ "@nccl_archive//:LICENSE.txt", ]) + tf_additional_license_deps(), diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 365e8d6b08..e0152da4df 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.7.0-rc1' +_VERSION = '1.6.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.7.0, < 1.8.0', + 'tensorboard >= 1.6.0, < 1.7.0', 'termcolor >= 1.1.0', ] @@ -62,7 +62,7 @@ else: if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.8.0a0, < 1.9.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.7.0a0, < 1.8.0a0' break # weakref.finalize and enum were introduced in Python 3.4 diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5f6e717532..9fcbfb664b 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -15,11 +15,6 @@ load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_ load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") -# Sanitize a dependency so that it works correctly from code that includes -# TensorFlow as a submodule. -def clean_dep(dep): - return str(Label(dep)) - # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. @@ -37,37 +32,17 @@ def tf_workspace(path_prefix="", tf_repo_name=""): arm_compiler_configure( name="local_config_arm_compiler", remote_config_repo="../arm_compiler", - build_file = clean_dep("//third_party/toolchains/cpus/arm:BUILD")) + build_file = str(Label("//third_party/toolchains/cpus/arm:BUILD"))) mkl_repository( - name = "mkl_linux", - urls = [ - "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", - ], - sha256 = "feacc3d82565c1231470359b42c696236fae873704e0b013436afba5fd4fd30f", - strip_prefix = "mklml_lnx_2018.0.1.20171227", - build_file = clean_dep("//third_party/mkl:mkl.BUILD") - ) - mkl_repository( - name = "mkl_windows", - urls = [ - "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip", - "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip" - ], - sha256 = "24bae8d7b22b431a654acadea43f2243c46ae6b1e5a73a4a936825f31d284ee4", - strip_prefix = "mklml_win_2018.0.1.20171227", - build_file = clean_dep("//third_party/mkl:mkl.BUILD") - ) - mkl_repository( - name = "mkl_darwin", + name = "mkl", urls = [ - "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz", - "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz" + "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz", + "https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz", ], - sha256 = "0e954ec6fd3dc5e37f64c4043f6b5613dd687558da3df1028b3b7c29ff5cf77f", - strip_prefix = "mklml_mac_2018.0.1.20171227", - build_file = clean_dep("//third_party/mkl:mkl.BUILD") + sha256 = "6b07cb7e5451db67c2e31e785ae458b18f7f363c60a61685488f69e9ae7199d4", + strip_prefix = "mklml_lnx_2018.0.1.20171007", + build_file = str(Label("//third_party/mkl:mkl.BUILD")), ) if path_prefix: @@ -77,12 +52,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.12.tar.gz", - "https://github.com/intel/mkl-dnn/archive/v0.12.tar.gz", + "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz", + "https://github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz", ], - sha256 = "86fa2a8c12a56e3b725945acedeaa82492746be02545aba6d710f097e013e19e", - strip_prefix = "mkl-dnn-0.12", - build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), + sha256 = "02e244f63dd95402691a361392504c143eede9a89043426f174836638a9cbf09", + strip_prefix = "mkl-dnn-e0bfcaa7fcb2b1e1558f5f0676933c1db807a729", + build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")), ) tf_http_archive( @@ -93,7 +68,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "5996380e3e8b981f55d1c8d58e709c00dbb4806ba367be75d0925a68cc2f6478", strip_prefix = "abseil-cpp-720c017e30339fd1786ce4aac68bc8559736e53f", - build_file = clean_dep("//third_party:com_google_absl.BUILD"), + build_file = str(Label("//third_party:com_google_absl.BUILD")), ) tf_http_archive( @@ -104,8 +79,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "0cadb31a35b514bf2dfd6b5d38205da94ef326ec6908fc3fd7c269948467214f", strip_prefix = "eigen-eigen-2355b229ea4c", - build_file = clean_dep("//third_party:eigen.BUILD"), - patch_file = clean_dep("//third_party:eigen_fix_cuda_compilation.patch") + build_file = str(Label("//third_party:eigen.BUILD")), + patch_file = str(Label("//third_party:eigen_fix_cuda_compilation.patch")) ) tf_http_archive( @@ -118,7 +93,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # remove the whitelist entry in third_party/repo.bzl. # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz", ], - build_file = clean_dep("//:arm_compiler.BUILD"), + build_file = str(Label("//:arm_compiler.BUILD")), ) tf_http_archive( @@ -129,7 +104,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ade869c3f42f23b5263c7d594aa3c7e5e61ac6a3afcaf5d6e42899d2a7986ce", strip_prefix = "libxsmm-1.8.1", - build_file = clean_dep("//third_party:libxsmm.BUILD"), + build_file = str(Label("//third_party:libxsmm.BUILD")), ) tf_http_archive( @@ -142,7 +117,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "932075525642b04ac6f1b50589f1df5cd72ec2f448b721fd32234cf183f0e755", strip_prefix = "or-tools-253f7955c6a1fd805408fba2e42ac6d45b312d15/src", - build_file = clean_dep("//third_party:ortools.BUILD"), + build_file = str(Label("//third_party:ortools.BUILD")), ) tf_http_archive( @@ -174,7 +149,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", - build_file = clean_dep("//third_party:farmhash.BUILD"), + build_file = str(Label("//third_party:farmhash.BUILD")), ) tf_http_archive( @@ -185,7 +160,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", - build_file = clean_dep("//third_party:highwayhash.BUILD"), + build_file = str(Label("//third_party:highwayhash.BUILD")), ) tf_http_archive( @@ -196,7 +171,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", strip_prefix = "nasm-2.12.02", - build_file = clean_dep("//third_party:nasm.BUILD"), + build_file = str(Label("//third_party:nasm.BUILD")), ) tf_http_archive( @@ -207,7 +182,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", strip_prefix = "libjpeg-turbo-1.5.1", - build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"), + build_file = str(Label("//third_party/jpeg:jpeg.BUILD")), ) tf_http_archive( @@ -218,7 +193,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "716c59c7dfc808a4c368f8ada526932be72b2fcea11dd85dc9d88b1df1dfe9c2", strip_prefix = "libpng-1.2.53", - build_file = clean_dep("//third_party:png.BUILD"), + build_file = str(Label("//third_party:png.BUILD")), ) tf_http_archive( @@ -229,7 +204,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4", strip_prefix = "sqlite-amalgamation-3200000", - build_file = clean_dep("//third_party:sqlite.BUILD"), + build_file = str(Label("//third_party:sqlite.BUILD")), ) tf_http_archive( @@ -240,7 +215,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1", strip_prefix = "giflib-5.1.4", - build_file = clean_dep("//third_party:gif.BUILD"), + build_file = str(Label("//third_party:gif.BUILD")), ) tf_http_archive( @@ -251,7 +226,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", strip_prefix = "six-1.10.0", - build_file = clean_dep("//third_party:six.BUILD"), + build_file = str(Label("//third_party:six.BUILD")), ) tf_http_archive( @@ -262,7 +237,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d", strip_prefix = "astor-0.6.2", - build_file = clean_dep("//third_party:astor.BUILD"), + build_file = str(Label("//third_party:astor.BUILD")), ) tf_http_archive( @@ -273,7 +248,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930", strip_prefix = "gast-0.2.0", - build_file = clean_dep("//third_party:gast.BUILD"), + build_file = str(Label("//third_party:gast.BUILD")), ) tf_http_archive( @@ -284,7 +259,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b", strip_prefix = "termcolor-1.1.0", - build_file = clean_dep("//third_party:termcolor.BUILD"), + build_file = str(Label("//third_party:termcolor.BUILD")), ) tf_http_archive( @@ -305,7 +280,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892", strip_prefix = "backports.weakref-1.0rc1/src", - build_file = clean_dep("//third_party:backports_weakref.BUILD"), + build_file = str(Label("//third_party:backports_weakref.BUILD")), ) tf_http_archive( @@ -316,7 +291,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", strip_prefix = "codegen-1.0", - build_file = clean_dep("//third_party:codegen.BUILD"), + build_file = str(Label("//third_party:codegen.BUILD")), ) filegroup_external( @@ -401,7 +376,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://ftp.exim.org/pub/pcre/pcre-8.39.tar.gz", ], strip_prefix = "pcre-8.39", - build_file = clean_dep("//third_party:pcre.BUILD"), + build_file = str(Label("//third_party:pcre.BUILD")), ) tf_http_archive( @@ -413,7 +388,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://pilotfiber.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz", ], strip_prefix = "swig-3.0.8", - build_file = clean_dep("//third_party:swig.BUILD"), + build_file = str(Label("//third_party:swig.BUILD")), ) tf_http_archive( @@ -424,7 +399,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://curl.haxx.se/download/curl-7.49.1.tar.gz", ], strip_prefix = "curl-7.49.1", - build_file = clean_dep("//third_party:curl.BUILD"), + build_file = str(Label("//third_party:curl.BUILD")), ) tf_http_archive( @@ -446,7 +421,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", ], strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3", - build_file = clean_dep("//third_party:linenoise.BUILD"), + build_file = str(Label("//third_party:linenoise.BUILD")), ) # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror. @@ -459,7 +434,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "1efbb9b05af88368be984d2f6526061d4a857181ef10f8841889a3a46869bb01", strip_prefix = "llvm-1c3cdea2f181d8e14ee184466c5fb237f1b4cda8", - build_file = clean_dep("//third_party/llvm:llvm.BUILD"), + build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) tf_http_archive( @@ -470,7 +445,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "108532fb94c6f227558d45be3f3347b52539f0f58290a7bb31ec06c462d05326", strip_prefix = "lmdb-LMDB_0.9.19/libraries/liblmdb", - build_file = clean_dep("//third_party:lmdb.BUILD"), + build_file = str(Label("//third_party:lmdb.BUILD")), ) tf_http_archive( @@ -481,7 +456,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "07d34db40593d257324ec5fb9debc4dc33f29f8fb44e33a2eeb35503e61d0fe2", strip_prefix = "jsoncpp-11086dd6a7eba04289944367ca82cea71299ed70", - build_file = clean_dep("//third_party:jsoncpp.BUILD"), + build_file = str(Label("//third_party:jsoncpp.BUILD")), ) tf_http_archive( @@ -502,7 +477,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d", strip_prefix = "zlib-1.2.8", - build_file = clean_dep("//third_party:zlib.BUILD"), + build_file = str(Label("//third_party:zlib.BUILD")), ) tf_http_archive( @@ -512,7 +487,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz", ], sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296", - build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"), + build_file = str(Label("//third_party/fft2d:fft2d.BUILD")), ) tf_http_archive( @@ -523,7 +498,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94", strip_prefix = "snappy-1.1.4", - build_file = clean_dep("//third_party:snappy.BUILD"), + build_file = str(Label("//third_party:snappy.BUILD")), ) tf_http_archive( @@ -534,7 +509,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", - build_file = clean_dep("//third_party:nccl.BUILD"), + build_file = str(Label("//third_party:nccl.BUILD")), ) tf_http_archive( @@ -545,8 +520,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "dd035d57c8f19b0b612dd6eefe6e5eebad76f506e302cccb7c2066f25a83585e", strip_prefix = "librdkafka-0.11.1", - build_file = clean_dep("//third_party:kafka/BUILD"), - patch_file = clean_dep("//third_party/kafka:config.patch"), + build_file = str(Label("//third_party:kafka/BUILD")), + patch_file = str(Label("//third_party/kafka:config.patch")), ) tf_http_archive( @@ -557,7 +532,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", strip_prefix = "aws-sdk-cpp-1.3.15", - build_file = clean_dep("//third_party:aws.BUILD"), + build_file = str(Label("//third_party:aws.BUILD")), ) java_import_external( @@ -593,7 +568,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", strip_prefix = "jemalloc-4.4.0", - build_file = clean_dep("//third_party:jemalloc.BUILD"), + build_file = str(Label("//third_party:jemalloc.BUILD")), ) java_import_external( @@ -638,7 +613,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4", strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650", - build_file = clean_dep("//third_party:pprof.BUILD"), + build_file = str(Label("//third_party:pprof.BUILD")), ) tf_http_archive( @@ -649,7 +624,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", strip_prefix = "cub-1.8.0", - build_file = clean_dep("//third_party:cub.BUILD"), + build_file = str(Label("//third_party:cub.BUILD")), ) tf_http_archive( @@ -660,7 +635,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/cython/cython/archive/3732784c45cfb040a5b0936951d196f83a12ea17.tar.gz", ], strip_prefix = "cython-3732784c45cfb040a5b0936951d196f83a12ea17", - build_file = clean_dep("//third_party:cython.BUILD"), + build_file = str(Label("//third_party:cython.BUILD")), delete = ["BUILD.bazel"], ) @@ -682,7 +657,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", ], - build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"), + build_file = str(Label("//third_party:arm_neon_2_x86_sse.BUILD")), ) tf_http_archive( @@ -693,7 +668,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", ], - build_file = clean_dep("//third_party/flatbuffers:flatbuffers.BUILD"), + build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")), ) tf_http_archive( @@ -703,7 +678,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", ], - build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"), + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), ) tf_http_archive( @@ -713,7 +688,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip" ], - build_file = clean_dep("//third_party:tflite_smartreply.BUILD"), + build_file = str(Label("//third_party:tflite_smartreply.BUILD")), ) ############################################################################## @@ -777,7 +752,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # Needed by Protobuf native.bind( name = "python_headers", - actual = clean_dep("//util/python:python_headers"), + actual = str(Label("//util/python:python_headers")), ) # Needed by Protobuf diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index c2adf578c7..b27d341404 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -1,17 +1,10 @@ licenses(["notice"]) # 3-Clause BSD -config_setting( - name = "using_mkl", - values = { - "define": "using_mkl=true", - }, - visibility = ["//visibility:public"], -) +exports_files(["LICENSE"]) config_setting( - name = "using_mkl_lnx_x64", + name = "using_mkl", values = { - "cpu": "k8", "define": "using_mkl=true", }, visibility = ["//visibility:public"], @@ -22,37 +15,12 @@ load( "if_mkl", ) -filegroup( - name = "LICENSE", - srcs = ["MKL_LICENSE"] + select({ - "@org_tensorflow//tensorflow:linux_x86_64": [ - "@mkl_linux//:LICENSE", - ], - "@org_tensorflow//tensorflow:darwin": [ - "@mkl_darwin//:LICENSE", - ], - "@org_tensorflow//tensorflow:windows": [ - "@mkl_windows//:LICENSE", - ], - }), - visibility = ["//visibility:public"], -) - cc_library( name = "intel_binary_blob", + srcs = if_mkl([ + "@mkl//:libmklml_intel.so", + "@mkl//:libiomp5.so", + ]), visibility = ["//visibility:public"], - deps = select({ - "@org_tensorflow//tensorflow:linux_x86_64": [ - "@mkl_linux//:mkl_headers", - "@mkl_linux//:mkl_libs_linux", - ], - "@org_tensorflow//tensorflow:darwin": [ - "@mkl_darwin//:mkl_headers", - "@mkl_darwin//:mkl_libs_darwin", - ], - "@org_tensorflow//tensorflow:windows": [ - "@mkl_windows//:mkl_headers", - "@mkl_windows//:mkl_libs_windows", - ], - }), + deps = ["@mkl//:mkl_headers"], ) diff --git a/third_party/mkl/MKL_LICENSE b/third_party/mkl/MKL_LICENSE deleted file mode 100644 index 9c8f3ea087..0000000000 --- a/third_party/mkl/MKL_LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 53e02769da..8b73ddabdd 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -24,18 +24,6 @@ def if_mkl(if_true, if_false = []): "//conditions:default": if_false }) -def if_mkl_lnx_x64(if_true, if_false = []): - """Shorthand for select()'ing on whether we're building with MKL. - - Returns a select statement which evaluates to if_true if we're building - with MKL enabled. Otherwise, the select statement evaluates to if_false. - - """ - return select({ - str(Label("//third_party/mkl:using_mkl_lnx_x64")): if_true, - "//conditions:default": if_false - }) - def _enable_local_mkl(repository_ctx): return _TF_MKL_ROOT in repository_ctx.os.environ diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD index c3a71e4ff9..8db97232e1 100644 --- a/third_party/mkl/mkl.BUILD +++ b/third_party/mkl/mkl.BUILD @@ -17,29 +17,14 @@ cc_library( visibility = ["//visibility:public"], ) -cc_library( - name = "mkl_libs_linux", - srcs = [ - "lib/libiomp5.so", - "lib/libmklml_intel.so", - ], - visibility = ["//visibility:public"], -) - -cc_library( - name = "mkl_libs_darwin", - srcs = [ - "lib/libiomp5.dylib", - "lib/libmklml.dylib", - ], +filegroup( + name = "libmklml_intel.so", + srcs = ["lib/libmklml_intel.so"], visibility = ["//visibility:public"], ) -cc_library( - name = "mkl_libs_windows", - srcs = [ - "lib/libiomp5md.lib", - "lib/mklml.lib", - ], +filegroup( + name = "libiomp5.so", + srcs = ["lib/libiomp5.so"], visibility = ["//visibility:public"], ) -- GitLab From a0e0685ca974e484de9200caf8c414dcb55277bb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 17:06:44 -0700 Subject: [PATCH 647/960] Collective Ops Part 1 The basic interface definitions, local-only versions of remote-access, param-resolution, device-resolution and mgr. A collective op is able to execute synchronously across devices and across separate graphs. Collective ops to be introduced eventually include broadcast and all-reduce. This change is part of a series of changes that will introduce the necessary infrastructure then the initial op implementations. PiperOrigin-RevId: 190860248 --- tensorflow/core/BUILD | 16 + .../core/common_runtime/buf_rendezvous.cc | 166 +++++ .../core/common_runtime/buf_rendezvous.h | 103 +++ .../common_runtime/buf_rendezvous_test.cc | 197 ++++++ .../common_runtime/collective_executor_mgr.cc | 114 +++ .../common_runtime/collective_executor_mgr.h | 70 ++ .../collective_executor_mgr_test.cc | 98 +++ .../collective_param_resolver_local.cc | 666 ++++++++++++++++++ .../collective_param_resolver_local.h | 209 ++++++ .../collective_param_resolver_local_test.cc | 151 ++++ .../common_runtime/collective_rma_local.cc | 108 +++ .../common_runtime/collective_rma_local.h | 88 +++ .../collective_rma_local_test.cc | 148 ++++ .../common_runtime/device_resolver_local.cc | 49 ++ .../common_runtime/device_resolver_local.h | 48 ++ .../device_resolver_local_test.cc | 87 +++ tensorflow/core/framework/collective.cc | 120 ++++ tensorflow/core/framework/collective.h | 308 ++++++++ tensorflow/core/framework/op_kernel.h | 1 + 19 files changed, 2747 insertions(+) create mode 100644 tensorflow/core/common_runtime/buf_rendezvous.cc create mode 100644 tensorflow/core/common_runtime/buf_rendezvous.h create mode 100644 tensorflow/core/common_runtime/buf_rendezvous_test.cc create mode 100644 tensorflow/core/common_runtime/collective_executor_mgr.cc create mode 100644 tensorflow/core/common_runtime/collective_executor_mgr.h create mode 100644 tensorflow/core/common_runtime/collective_executor_mgr_test.cc create mode 100644 tensorflow/core/common_runtime/collective_param_resolver_local.cc create mode 100644 tensorflow/core/common_runtime/collective_param_resolver_local.h create mode 100644 tensorflow/core/common_runtime/collective_param_resolver_local_test.cc create mode 100644 tensorflow/core/common_runtime/collective_rma_local.cc create mode 100644 tensorflow/core/common_runtime/collective_rma_local.h create mode 100644 tensorflow/core/common_runtime/collective_rma_local_test.cc create mode 100644 tensorflow/core/common_runtime/device_resolver_local.cc create mode 100644 tensorflow/core/common_runtime/device_resolver_local.h create mode 100644 tensorflow/core/common_runtime/device_resolver_local_test.cc create mode 100644 tensorflow/core/framework/collective.cc create mode 100644 tensorflow/core/framework/collective.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 4726946277..712106492b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -455,6 +455,7 @@ tf_cuda_library( "framework/attr_value_util.h", "framework/bfloat16.h", "framework/cancellation.h", + "framework/collective.h", "framework/common_shape_fns.h", "framework/control_flow.h", # TODO(josh11b): Make internal? "framework/dataset.h", @@ -2172,6 +2173,11 @@ tf_cuda_library( CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/allocator_retry.h", "common_runtime/bfc_allocator.h", + "common_runtime/collective_executor_mgr.h", + "common_runtime/collective_param_resolver_local.h", + "common_runtime/collective_rma_local.h", + "common_runtime/device_resolver_local.h", + "common_runtime/buf_rendezvous.h", "common_runtime/build_graph_options.h", "common_runtime/constant_folding.h", "common_runtime/copy_tensor.h", @@ -2210,7 +2216,11 @@ tf_cuda_library( "common_runtime/accumulate_n_optimizer.cc", "common_runtime/allocator_retry.cc", "common_runtime/bfc_allocator.cc", + "common_runtime/buf_rendezvous.cc", "common_runtime/build_graph_options.cc", + "common_runtime/collective_executor_mgr.cc", + "common_runtime/collective_param_resolver_local.cc", + "common_runtime/collective_rma_local.cc", "common_runtime/constant_folding.cc", "common_runtime/copy_tensor.cc", "common_runtime/costmodel_manager.cc", @@ -2218,6 +2228,7 @@ tf_cuda_library( "common_runtime/device.cc", "common_runtime/device_factory.cc", "common_runtime/device_mgr.cc", + "common_runtime/device_resolver_local.cc", "common_runtime/device_set.cc", "common_runtime/executor.cc", "common_runtime/function.cc", @@ -2825,6 +2836,11 @@ tf_cc_tests( name = "higher_level_tests", size = "small", srcs = [ + "common_runtime/buf_rendezvous_test.cc", + "common_runtime/collective_executor_mgr_test.cc", + "common_runtime/collective_param_resolver_local_test.cc", + "common_runtime/collective_rma_local_test.cc", + "common_runtime/device_resolver_local_test.cc", "common_runtime/device_set_test.cc", "common_runtime/optimization_registry_test.cc", "common_runtime/pending_counts_test.cc", diff --git a/tensorflow/core/common_runtime/buf_rendezvous.cc b/tensorflow/core/common_runtime/buf_rendezvous.cc new file mode 100644 index 0000000000..b57eb2943a --- /dev/null +++ b/tensorflow/core/common_runtime/buf_rendezvous.cc @@ -0,0 +1,166 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/buf_rendezvous.h" + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/notification.h" + +namespace tensorflow { + +BufRendezvous::~BufRendezvous() { + mutex_lock l(mu_); + if (!hook_table_.empty()) { + PurgeTable(errors::Internal("Delete called on non-empty BufRendezvous"), + &hook_table_); + } +} + +void BufRendezvous::StartAbort(const Status& s) { + CHECK(!s.ok()); + HookTable dummy_table; + { + mutex_lock l(mu_); + status_.Update(s); + hook_table_.swap(dummy_table); + } + PurgeTable(s, &dummy_table); +} + +void BufRendezvous::PurgeTable(const Status& s, HookTable* table) { + for (auto& it : *table) { + Hook* h = it.second; + if (h->cons_cb != nullptr) { + h->cons_cb(s, nullptr); + } + if (h->prod_cb != nullptr) { + h->prod_cb(s); + } + delete h; + } + table->clear(); +} + +string BufRendezvous::Hook::DebugString() const { + return strings::StrCat("[dev:", (prod_dev ? prod_dev->name() : "none"), + ", ctx:", reinterpret_cast(prod_ctx), + ", val:", reinterpret_cast(prod_value), + ", pcb:", reinterpret_cast(&prod_cb), + ", ccb:", reinterpret_cast(&cons_cb), "]"); +} + +void BufRendezvous::ProvideBuf(const string& key, Device* dev, + DeviceContext* dev_ctx, const Tensor* v, + const AllocatorAttributes& attr, + const ProducerCallback& done) { + Hook* h = nullptr; + Status providebuf_status; + do { + mutex_lock l(mu_); + if (!status_.ok()) { + providebuf_status = status_; + break; + } else { + auto it = hook_table_.find(key); + if (it == hook_table_.end()) { + h = new Hook; + it = hook_table_.insert(std::make_pair(key, h)).first; + } else { + if (it->second->prod_cb != nullptr) { + providebuf_status = errors::Internal( + "BufRendezvous::ProvideBuf already called for key ", key); + break; + } + h = it->second; + } + // Populate Hook with all of the prod values. + h->prod_dev = dev; + h->prod_ctx = dev_ctx; + h->prod_value = v; + h->prod_attr = attr; + h->prod_cb = done; + // If consumer is waiting, kick off right away, removing Hook from table. + if (h->cons_cb != nullptr) { + hook_table_.erase(it); + } else { + h = nullptr; + } + } + } while (false); + if (h) { + h->cons_cb(Status::OK(), h); + } + if (!providebuf_status.ok()) { + done(providebuf_status); + } +} + +void BufRendezvous::ConsumeBuf(const string& key, + const ConsumerCallback& done) { + Hook* existing_hook = nullptr; + Status consumebuf_status; + do { + mutex_lock l(mu_); + if (!status_.ok()) { + consumebuf_status = status_; + break; + } + auto it = hook_table_.find(key); + if (it != hook_table_.end()) { + // Prepare to consume immediately. + if (it->second->cons_cb) { + consumebuf_status = + errors::Internal("Second consumer arrived for key ", key); + break; + } + existing_hook = it->second; + hook_table_.erase(it); + existing_hook->cons_cb = done; + } else { + // Hang consumer callback on the Hook. + Hook* h = new Hook; + hook_table_[key] = h; + h->cons_cb = done; + return; + } + } while (false); + if (existing_hook) { + existing_hook->cons_cb(Status::OK(), existing_hook); + return; + } + if (!consumebuf_status.ok()) { + done(consumebuf_status, nullptr); + return; + } +} + +/*static*/ +void BufRendezvous::DoneWithHook(Hook* h) { + h->prod_cb(Status::OK()); + delete h; +} + +void BufRendezvous::LogContents() { + mutex_lock l(mu_); + LOG(INFO) << strings::StrCat("BufRendezvous ", + strings::Hex(reinterpret_cast(this)), + " step_id=", step_id_, " current contents:"); + for (auto it : hook_table_) { + LOG(INFO) << it.first << ":" << it.second->DebugString(); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/buf_rendezvous.h b/tensorflow/core/common_runtime/buf_rendezvous.h new file mode 100644 index 0000000000..e94e88b323 --- /dev/null +++ b/tensorflow/core/common_runtime/buf_rendezvous.h @@ -0,0 +1,103 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMMON_RUNTIME_BUF_RENDEZVOUS_H_ +#define TENSORFLOW_COMMON_RUNTIME_BUF_RENDEZVOUS_H_ + +#include +#include + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +class Device; +class DeviceContext; +class Tensor; + +// EXPERIMENTAL: RDMA oriented producer/consumer rendezvous on a local +// Tensor value for which DMAHelper::CanUseDMA() is true, i.e. dense +// numeric types. Similar to Rendezvous but never owns a Ref on the +// tensor, instead it uses an explicit callback to the producer when +// the consumer side is finished with the value. This allows the +// producer to perform in-place updates on the source buffer or to take +// other actions that depend on knowing the consumer has passed a certain +// execution point. +class BufRendezvous { + public: + explicit BufRendezvous(uint64 step_id) : step_id_(step_id) {} + + ~BufRendezvous(); + + // Inform all all waiting parties that this BufRendezvous is defunct + // because of an error Status interrupting the Step. + void StartAbort(const Status& s); + + struct Hook; + // Provided by the consumer to be called when access to the buffer + // is available. If the Status arg is not OK, then hook will not + // be populated. Ownership of Hook passes to consumer with the + // callback. + typedef std::function ConsumerCallback; + // Provided by the producer to be called when the consumer has finished + // reading the buffer and will no longer access it. + typedef std::function ProducerCallback; + + struct Hook { + Device* prod_dev; + DeviceContext* prod_ctx; + const Tensor* prod_value; + AllocatorAttributes prod_attr; + ProducerCallback prod_cb; + ConsumerCallback cons_cb; + Hook() + : prod_dev(nullptr), + prod_ctx(nullptr), + prod_value(nullptr), + prod_cb(nullptr), + cons_cb(nullptr) {} + string DebugString() const; + }; + + // Called to advertise availability of a Tensor value corresponding + // to key. That value must stay valid until done is called. + void ProvideBuf(const string& key, Device* dev, DeviceContext* dev_ctx, + const Tensor* v, const AllocatorAttributes& attr, + const ProducerCallback& done); + + // Called to request access to a Tensor value corresponding to key. + // Consumer is provide with a Hook as soon as availble. + void ConsumeBuf(const string& key, const ConsumerCallback& done); + + // Consumer must call this function when it's done reading the Hook provided + // by the ConsumerCallback. This function will invoke the producer callback + // and then delete h. + static void DoneWithHook(Hook* h); + + // Write the current contents of the table to the INFO log. + void LogContents(); + + protected: + const uint64 step_id_; + mutex mu_; + Status status_ GUARDED_BY(mu_); + typedef gtl::FlatMap HookTable; + HookTable hook_table_ GUARDED_BY(mu_); + + void PurgeTable(const Status& s, HookTable* table); +}; +} // namespace tensorflow +#endif // TENSORFLOW_COMMON_RUNTIME_BUF_RENDEZVOUS_H_ diff --git a/tensorflow/core/common_runtime/buf_rendezvous_test.cc b/tensorflow/core/common_runtime/buf_rendezvous_test.cc new file mode 100644 index 0000000000..0e798235bf --- /dev/null +++ b/tensorflow/core/common_runtime/buf_rendezvous_test.cc @@ -0,0 +1,197 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/buf_rendezvous.h" + +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +#define NUM_DEVS 3 + +class BufRendezvousTest : public ::testing::Test { + protected: + BufRendezvousTest() { + br_.reset(new BufRendezvous(123)); + fake_dev_ptr_ = reinterpret_cast(512LLU); + fake_dev_ctx_ = reinterpret_cast(1024LLU); + a_ = Tensor(DT_FLOAT, TensorShape({24})); + b_ = Tensor(DT_FLOAT, TensorShape({24})); + } + + Device* fake_dev_ptr_ = nullptr; + DeviceContext* fake_dev_ctx_ = nullptr; + Tensor a_; + Tensor b_; + AllocatorAttributes aa_; + std::unique_ptr br_; +}; + +TEST_F(BufRendezvousTest, CorrectUseProducerFirst) { + Status prod_status; + Status cons_status; + bool prod_callback_called = false; + bool cons_callback_called = false; + Notification note; + br_->ProvideBuf( + "key0", fake_dev_ptr_, fake_dev_ctx_, &a_, aa_, + [¬e, &prod_status, &prod_callback_called](const Status& s) { + prod_status = s; + prod_callback_called = true; + note.Notify(); + }); + EXPECT_FALSE(prod_callback_called); + br_->ConsumeBuf("key0", [this, &cons_status, &cons_callback_called]( + const Status& s, BufRendezvous::Hook* h) { + cons_status = s; + cons_callback_called = true; + ASSERT_TRUE(h != nullptr); + EXPECT_EQ(h->prod_dev, fake_dev_ptr_); + EXPECT_EQ(h->prod_ctx, fake_dev_ctx_); + EXPECT_EQ(h->prod_value, &a_); + br_->DoneWithHook(h); + }); + EXPECT_TRUE(cons_callback_called); + note.WaitForNotification(); + EXPECT_TRUE(prod_callback_called); + TF_EXPECT_OK(cons_status); + TF_EXPECT_OK(prod_status); +} + +TEST_F(BufRendezvousTest, CorrectUseConsumerFirst) { + Status prod_status; + Status cons_status; + bool prod_callback_called = false; + bool cons_callback_called = false; + Notification note; + br_->ConsumeBuf("key0", [this, &cons_status, &cons_callback_called]( + const Status& s, BufRendezvous::Hook* h) { + cons_status = s; + cons_callback_called = true; + ASSERT_TRUE(h != nullptr); + EXPECT_EQ(h->prod_dev, fake_dev_ptr_); + EXPECT_EQ(h->prod_ctx, fake_dev_ctx_); + EXPECT_EQ(h->prod_value, &a_); + br_->DoneWithHook(h); + }); + EXPECT_FALSE(cons_callback_called); + br_->ProvideBuf( + "key0", fake_dev_ptr_, fake_dev_ctx_, &a_, aa_, + [¬e, &prod_status, &prod_callback_called](const Status& s) { + prod_status = s; + prod_callback_called = true; + note.Notify(); + }); + EXPECT_TRUE(cons_callback_called); + note.WaitForNotification(); + EXPECT_TRUE(prod_callback_called); + TF_EXPECT_OK(cons_status); + TF_EXPECT_OK(prod_status); +} + +TEST_F(BufRendezvousTest, ErrorDuplicatePut) { + bool prod_callback_called = false; + br_->ProvideBuf("key0", fake_dev_ptr_, fake_dev_ctx_, &a_, aa_, + [this, &prod_callback_called](const Status& s) { + prod_callback_called = true; + }); + Status bad_status; + Notification note; + br_->ProvideBuf("key0", fake_dev_ptr_, fake_dev_ctx_, &a_, aa_, + [&bad_status, ¬e](const Status& s) { + bad_status = s; + note.Notify(); + }); + note.WaitForNotification(); + EXPECT_FALSE(bad_status.ok()); + EXPECT_EQ("BufRendezvous::ProvideBuf already called for key key0", + bad_status.error_message()); + EXPECT_FALSE(prod_callback_called); + br_.reset(); +} + +TEST_F(BufRendezvousTest, ErrorDeleteNonEmpty) { + Status cons_status; + br_->ConsumeBuf( + "key0", [this, &cons_status](const Status& s, BufRendezvous::Hook* h) { + cons_status = s; + EXPECT_EQ(h, nullptr); + }); + EXPECT_TRUE(cons_status.ok()); + br_.reset(); + EXPECT_FALSE(cons_status.ok()); + EXPECT_EQ("Delete called on non-empty BufRendezvous", + cons_status.error_message()); +} + +TEST_F(BufRendezvousTest, AbortNonEmpty) { + Status cons_status; + Status prod_status; + Notification prod_note; + Notification cons_note; + br_->ConsumeBuf("key0", [this, &cons_note, &cons_status]( + const Status& s, BufRendezvous::Hook* h) { + cons_status = s; + cons_note.Notify(); + }); + br_->ProvideBuf("key1", fake_dev_ptr_, fake_dev_ctx_, &a_, aa_, + [this, &prod_note, &prod_status](const Status& s) { + prod_status = s; + prod_note.Notify(); + }); + br_->StartAbort(errors::Internal("Falling sky detected")); + prod_note.WaitForNotification(); + cons_note.WaitForNotification(); + EXPECT_FALSE(prod_status.ok()); + EXPECT_EQ(prod_status.error_message(), "Falling sky detected"); + EXPECT_FALSE(cons_status.ok()); + EXPECT_EQ(cons_status.error_message(), "Falling sky detected"); +} + +TEST_F(BufRendezvousTest, AbortEmpty) { + br_->StartAbort(errors::Internal("Falling sky detected")); +} + +TEST_F(BufRendezvousTest, UseAfterAbort) { + br_->StartAbort(errors::Internal("Falling sky detected")); + Status cons_status; + Status prod_status; + Notification prod_note; + Notification cons_note; + br_->ConsumeBuf("key0", [this, &cons_note, &cons_status]( + const Status& s, BufRendezvous::Hook* h) { + cons_status = s; + cons_note.Notify(); + }); + br_->ProvideBuf("key1", fake_dev_ptr_, fake_dev_ctx_, &a_, aa_, + [this, &prod_note, &prod_status](const Status& s) { + prod_status = s; + prod_note.Notify(); + }); + prod_note.WaitForNotification(); + cons_note.WaitForNotification(); + EXPECT_FALSE(prod_status.ok()); + EXPECT_EQ(prod_status.error_message(), "Falling sky detected"); + EXPECT_FALSE(cons_status.ok()); + EXPECT_EQ(cons_status.error_message(), "Falling sky detected"); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_executor_mgr.cc b/tensorflow/core/common_runtime/collective_executor_mgr.cc new file mode 100644 index 0000000000..a5c4946e58 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_executor_mgr.cc @@ -0,0 +1,114 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/collective_executor_mgr.h" + +#include "tensorflow/core/common_runtime/build_graph_options.h" +#include "tensorflow/core/common_runtime/collective_rma_local.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/protobuf/config.pb.h" + +namespace tensorflow { +namespace { +// TODO(tucker): Temporary class just until a real CollectiveExecutor +// implementation is submitted in a later CL. +class DummyCollectiveExecutor : public CollectiveExecutor { + public: + explicit DummyCollectiveExecutor(CollectiveExecutorMgr* ce_mgr) + : CollectiveExecutor(ce_mgr) {} + + ~DummyCollectiveExecutor() override {} + + void RecvFromPeer(const string& peer_device, const string& peer_task, + bool peer_is_local, const string& key, Device* to_device, + DeviceContext* to_device_ctx, + const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) override { + done(errors::Internal("Unimplemented")); + } + + void PostToPeer(const string& peer_device, const string& peer_task, + const string& key, Device* from_device, + DeviceContext* from_device_ctx, + const AllocatorAttributes& from_alloc_attr, + const Tensor* from_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) override { + done(errors::Internal("Unimplemented")); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(DummyCollectiveExecutor); +}; +} // namespace + +CollectiveExecutorMgr::CollectiveExecutorMgr( + const ConfigProto& config, const DeviceMgr* dev_mgr, + DeviceResolverInterface* dev_resolver, + ParamResolverInterface* param_resolver) + : dev_mgr_(dev_mgr), + dev_resolver_(dev_resolver), + param_resolver_(param_resolver) {} + +CollectiveExecutorMgr::~CollectiveExecutorMgr() { + for (auto iter : executor_table_) { + iter.second->Unref(); + } +} + +CollectiveExecutor* CollectiveExecutorMgr::FindOrCreate(int64 step_id) { + CollectiveExecutor* ce = nullptr; + { + mutex_lock l(exec_mu_); + auto it = executor_table_.find(step_id); + if (it != executor_table_.end()) { + ce = it->second; + } else { + ce = new DummyCollectiveExecutor(this); + executor_table_[step_id] = ce; + } + ce->Ref(); + } + return ce; +} + +void CollectiveExecutorMgr::Cleanup(int64 step_id) { + CollectiveExecutor* ce = nullptr; + { + mutex_lock l(exec_mu_); + auto it = executor_table_.find(step_id); + if (it != executor_table_.end()) { + ce = it->second; + executor_table_.erase(it); + } + } + if (ce) ce->Unref(); +} + +void CollectiveExecutorMgr::GetStepSequenceAsync( + const GetStepSequenceRequest* request, GetStepSequenceResponse* response, + const StatusCallback& done) { + done(errors::Internal( + "CollectiveExecutorMgr does not implement GetStepSequence.")); +} + +void CollectiveExecutorMgr::RefreshStepIdSequenceAsync( + int64 graph_key, const StatusCallback& done) { + done(errors::Internal( + "CollectiveExecutorMgr does not implement RefreshStepIdSequence.")); +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_executor_mgr.h b/tensorflow/core/common_runtime/collective_executor_mgr.h new file mode 100644 index 0000000000..4b42e2b4d1 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_executor_mgr.h @@ -0,0 +1,70 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_EXECUTOR_MGR_H_ +#define TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_EXECUTOR_MGR_H_ + +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/lib/gtl/flatmap.h" + +namespace tensorflow { +class ConfigProto; +class DeviceMgr; + +class CollectiveExecutorMgr : public CollectiveExecutorMgrInterface { + public: + CollectiveExecutorMgr(const ConfigProto& config, const DeviceMgr* dev_mgr, + DeviceResolverInterface* dev_resolver, + ParamResolverInterface* param_resolver); + + virtual ~CollectiveExecutorMgr(); + + CollectiveExecutor* FindOrCreate(int64 step_id) override; + + void Cleanup(int64 step_id) override; + + ParamResolverInterface* GetParamResolver() const override { + return param_resolver_.get(); + } + + DeviceResolverInterface* GetDeviceResolver() const override { + return dev_resolver_.get(); + } + + void GetStepSequenceAsync(const GetStepSequenceRequest* request, + GetStepSequenceResponse* response, + const StatusCallback& done) override; + + void RefreshStepIdSequenceAsync(int64 graph_key, + const StatusCallback& done) override; + + int64 NextStepId(int64 graph_key) override { + return CollectiveExecutor::kInvalidId; + } + + void RetireStepId(int64 graph_key, int64 step_id) override {} + + protected: + const DeviceMgr* dev_mgr_; + std::unique_ptr dev_resolver_; + std::unique_ptr param_resolver_; + CollectiveRemoteAccess* remote_access_; + string task_name_; + mutex exec_mu_; + // Map from step_id to CollectiveExecutor + gtl::FlatMap executor_table_ GUARDED_BY(exec_mu_); +}; + +} // namespace tensorflow +#endif // TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_EXECUTOR_MGR_H_ diff --git a/tensorflow/core/common_runtime/collective_executor_mgr_test.cc b/tensorflow/core/common_runtime/collective_executor_mgr_test.cc new file mode 100644 index 0000000000..34c9163d6a --- /dev/null +++ b/tensorflow/core/common_runtime/collective_executor_mgr_test.cc @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/collective_executor_mgr.h" + +#include "tensorflow/core/common_runtime/collective_param_resolver_local.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_resolver_local.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { +namespace { + +#define NUM_DEVS 3 + +class CollectiveExecutorMgrTest : public ::testing::Test { + protected: + CollectiveExecutorMgrTest() { + ConfigProto cp; + SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + string task_name = "/job:localhost/replica:0/task:0"; + device_count->insert({"CPU", NUM_DEVS}); + TF_CHECK_OK(DeviceFactory::AddDevices(options, task_name, &devices_)); + device_mgr_.reset(new DeviceMgr(devices_)); + DeviceResolverLocal* drl = new DeviceResolverLocal(device_mgr_.get()); + cme_.reset(new CollectiveExecutorMgr( + cp, device_mgr_.get(), drl, + new CollectiveParamResolverLocal(device_mgr_.get(), drl, task_name))); + } + + std::unique_ptr cme_; + std::vector devices_; + std::unique_ptr device_mgr_; +}; + +TEST_F(CollectiveExecutorMgrTest, FindOrCreate) { + CollectiveExecutor::Handle* h = + new CollectiveExecutor::Handle(cme_->FindOrCreate(1), true); + EXPECT_TRUE(h->get()); + CollectiveExecutor::Handle* h2 = + new CollectiveExecutor::Handle(cme_->FindOrCreate(1), true); + EXPECT_EQ(h->get(), h2->get()); + CollectiveExecutor* ce = h->get(); + delete h; + delete h2; + CollectiveExecutor::Handle h3(cme_->FindOrCreate(1), true); + EXPECT_EQ(ce, h3.get()); + cme_->Cleanup(1); +} + +TEST_F(CollectiveExecutorMgrTest, StepSequenceRelated) { + EXPECT_EQ(CollectiveExecutor::kInvalidId, cme_->NextStepId(123)); + Notification ss_note; + Status ss_status; + cme_->RefreshStepIdSequenceAsync( + 123, [this, &ss_status, &ss_note](const Status& s) { + ss_status = s; + ss_note.Notify(); + }); + ss_note.WaitForNotification(); + EXPECT_FALSE(ss_status.ok()); + EXPECT_EQ(ss_status.error_message(), + "CollectiveExecutorMgr does not implement RefreshStepIdSequence."); + Notification gs_note; + Status gs_status; + GetStepSequenceRequest* req = nullptr; + GetStepSequenceResponse* resp = nullptr; + cme_->GetStepSequenceAsync(req, resp, + [this, &gs_status, &gs_note](const Status& s) { + gs_status = s; + gs_note.Notify(); + }); + gs_note.WaitForNotification(); + EXPECT_FALSE(gs_status.ok()); + EXPECT_EQ(gs_status.error_message(), + "CollectiveExecutorMgr does not implement GetStepSequence."); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc new file mode 100644 index 0000000000..b34950b2f4 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc @@ -0,0 +1,666 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/collective_param_resolver_local.h" + +#include "tensorflow/core/common_runtime/device_mgr.h" + +namespace tensorflow { + +CollectiveParamResolverLocal::CollectiveParamResolverLocal( + const DeviceMgr* dev_mgr, DeviceResolverInterface* dev_resolver, + const string& task_name) + : dev_mgr_(dev_mgr), dev_resolver_(dev_resolver), task_name_(task_name) {} + +void CollectiveParamResolverLocal::CompleteGroupAsync( + const CompleteGroupRequest* request, CompleteGroupResponse* response, + CancellationManager* cancel_mgr, const StatusCallback& done) { + done( + errors::Internal("CompleteGroup is not implemented by " + "CollectiveParamResolverLocal which is " + "intended only for non-distributed deployment.")); +} + +void CollectiveParamResolverLocal::CompleteGroupLocal( + const string& device, CollectiveParams* cp, const GroupRecCallback& done) { + VLOG(1) << "CompleteGroupLocal " << cp << ": " << cp->ToString(); + std::vector to_be_called; + GroupRec* gr = nullptr; + { + mutex_lock l(group_mu_); + auto it = group_table_.find(cp->group.group_key); + if (it == group_table_.end()) { + gr = new GroupRec; + gr->group.group_key = cp->group.group_key; + gr->group.group_size = cp->group.group_size; + gr->group.device_type = cp->group.device_type; + group_table_[gr->group.group_key].reset(gr); + VLOG(2) << "New group_key=" << gr->group.group_key + << " group_size=" << gr->group.group_size; + } else { + gr = it->second.get(); + } + } + Status status; + { + mutex_lock gr_lock(gr->mu); + if (!gr->device_set.empty()) { + // Check for consistency with existing GroupRec. + if (cp->group.device_type != gr->group.device_type) { + status = errors::Internal( + "Collective Op ", cp->name, " is assigned to device ", device, + " with type ", cp->group.device_type.type_string(), + " and group_key ", cp->group.group_key, " but that group has type ", + gr->group.device_type.type_string()); + } else if (cp->group.group_size != gr->group.group_size) { + status = errors::Internal( + "Collective Op ", cp->name, " has group_size ", + cp->group.group_size, " and group_key", cp->group.group_key, + " but that group has size ", gr->group.group_size); + } + } + if (status.ok()) { + // Insert device if not already present. + auto it = gr->device_set.find(device); + if (it == gr->device_set.end()) { + if (gr->device_set.size() == gr->group.group_size) { + // The group is already full. + status = errors::Internal( + "Collective Op ", cp->name, " is assigned to device ", device, + " and group_key ", cp->group.group_key, + " but that group doesn't contain that device."); + } else { + // This is a new device that has not yet joined the group. + gr->device_set.insert(device); + gr->device_list.push_back(device); + DeviceNameUtils::ParsedName parsed_device; + DeviceNameUtils::ParseFullName(device, &parsed_device); + string task_name = strings::StrCat("/job:", parsed_device.job, + "/replica:", parsed_device.replica, + "/task:", parsed_device.task); + gr->task_set.insert(task_name); + gr->task_list.push_back(task_name); + gr->group.num_tasks = static_cast(gr->task_set.size()); + VLOG(1) << "group_key=" << gr->group.group_key + << " group_size=" << gr->group.group_size + << " dev_set=" << gr->device_set.size(); + } + } + } + + if (status.ok()) { + // If the group is not yet complete, queue to wait for it. + VLOG(2) << "group_size " << gr->group.group_size << " set size " + << gr->device_set.size() << " gr " << gr; + + if (gr->device_set.size() < gr->group.group_size) { + gr->waiting.push_back(std::bind(done, std::placeholders::_1, gr)); + return; + } + CHECK_EQ(gr->device_set.size(), gr->group.group_size); + if (!gr->waiting.empty()) { + std::swap(to_be_called, gr->waiting); + } + } + } + done(status, gr); + for (int i = 0; i < to_be_called.size(); ++i) { + to_be_called[i](Status::OK()); + } +} + +namespace { + +struct DevRec { + string task; + string device; + int original_rank; + int local_rank; + int global_rank; + const DeviceLocality* locality; +}; +typedef std::unordered_map TaskDeviceMap; +typedef std::unordered_map GlobalDeviceMap; + +// Create a populated GlobalDeviceMap from CollInstanceParams and localities. +GlobalDeviceMap BuildDevRecs(const CollInstanceParams& ip, + const std::vector& localities) { + GlobalDeviceMap gdm; + CHECK_EQ(ip.device_names.size(), ip.task_names.size()); + CHECK_EQ(ip.device_names.size(), localities.size()); + for (int i = 0; i < ip.device_names.size(); ++i) { + TaskDeviceMap& tdm = gdm[ip.task_names[i]]; + DevRec* dr = &tdm[ip.device_names[i]]; + dr->task = ip.task_names[i]; + dr->device = ip.device_names[i]; + dr->original_rank = i; + dr->local_rank = 0; // Will be populated later by OrderTaskDeviceMap. + dr->global_rank = 0; // Will be populated later by EstablishGlobalRank. + dr->locality = &localities[i]; + } + return gdm; +} + +void OrderTaskDeviceMap(TaskDeviceMap* tdm) { + CHECK_GT(tdm->size(), 0); // Should never be called with 0 devices + int least_rank = -1; + string next_device; + std::set selected; + // Starting device is one with the least initial rank. + for (const auto& it : *tdm) { + if (least_rank < 0 || it.second.original_rank < least_rank) { + least_rank = it.second.original_rank; + next_device = it.second.device; + } + } + CHECK_GE(least_rank, 0); + DeviceNameUtils::ParsedName parsed_name; + CHECK(DeviceNameUtils::ParseFullName(next_device, &parsed_name)); + // NOTE: InterconnectLink has only a device_id, nothing more, so for + // the time being if there's more than one device at a task we + // assume they're all GPUs. + + int next_rank = 0; + while (true) { + selected.insert(next_device); + DevRec* dr = &(*tdm)[next_device]; + dr->local_rank = next_rank; + ++next_rank; + if (selected.size() == tdm->size()) { + break; + } + // For the present time we assume Locality links only cover GPUs. + // For multiple CPUs, just take them in order. + const InterconnectLink* best_link = nullptr; + if (parsed_name.type == "GPU") { + for (const InterconnectLink& il : dr->locality->links().link()) { + parsed_name.id = il.device_id(); + string endpoint_device = + DeviceNameUtils::ParsedNameToString(parsed_name); + if (selected.find(endpoint_device) != selected.end()) { + continue; + } + if (best_link == nullptr || il.strength() > best_link->strength()) { + best_link = &il; + } + } + } + if (best_link != nullptr) { + // Follow the best edge + parsed_name.id = best_link->device_id(); + next_device = DeviceNameUtils::ParsedNameToString(parsed_name); + } else { + // No good edges, alas. Pick the lowest initial rank among remaining + // devices. + least_rank = -1; + for (const auto& it : *tdm) { + if (selected.find(it.second.device) != selected.end()) { + continue; + } + if (least_rank < 0 || it.second.original_rank < least_rank) { + least_rank = it.second.original_rank; + next_device = it.second.device; + } + } + CHECK_GE(least_rank, 0); + } + } +} + +// The first time a shared CollectiveParams is established for a +// shared set of instances we compute a good rank order for all the +// devices in the group, that is appropriate for a ring algorithm. +// This order need not be the same across different instance groups +// sharing the same device group where there is more than one good +// order. +GlobalDeviceMap EstablishGlobalRank( + CollectiveParams* cp, const std::vector& localities) { + VLOG(1) << "EstablishGlobalRank"; + GlobalDeviceMap gdm = BuildDevRecs(cp->instance, localities); + for (auto& iter : gdm) { + TaskDeviceMap& tdm = iter.second; + OrderTaskDeviceMap(&tdm); + } + // Connect the global rank order by the order in which tasks first appear. + std::set ordered_tasks; + int next_rank = 0; + for (int i = 0; i < cp->instance.task_names.size(); ++i) { + const string& task_name = cp->instance.task_names[i]; + if (ordered_tasks.find(task_name) != ordered_tasks.end()) { + continue; + } + ordered_tasks.insert(task_name); + TaskDeviceMap* tdm = &gdm[task_name]; + for (auto& it : *tdm) { + it.second.global_rank = it.second.local_rank + next_rank; + } + next_rank += tdm->size(); + } + return gdm; +} + +// Sort cp->instance.device_names lexicographically, but do by first +// computing a reordering permutation so we can keep cp->instance.task_names +// in corresponding order. +void SortDevicesAndTasks(CollectiveParams* cp) { + VLOG(1) << "SortDevicesAndTasks " << cp << " instance " << &cp->instance; + CHECK(cp); + CHECK_EQ(cp->group.group_size, cp->instance.device_names.size()); + CHECK_EQ(cp->group.group_size, cp->instance.task_names.size()); + std::vector perm(cp->group.group_size); + // TODO(tucker): substitute std::iota when the windows build supports it. + // std::iota(perm.begin(), perm.end(), 0); + for (int i = 0; i < perm.size(); ++i) { + perm[i] = i; + } + std::sort(perm.begin(), perm.end(), [cp](const int& a, const int& b) { + return cp->instance.device_names[a] < cp->instance.device_names[b]; + }); + std::vector new_devs; + std::vector new_tasks; + new_devs.reserve(cp->group.group_size); + new_tasks.reserve(cp->group.group_size); + for (int pi : perm) { + new_devs.push_back(cp->instance.device_names[pi]); + new_tasks.push_back(cp->instance.task_names[pi]); + } + cp->instance.device_names = std::move(new_devs); + cp->instance.task_names = std::move(new_tasks); + VLOG(1) << "Modified device_names on " << cp; +} + +// Establish the requested number of subdivision permutations based on the +// ring order implicit in the device order. +void GenerateSubdivPerms(const string& device, int source_rank, + CollectiveParams* cp) { + CHECK_GT(cp->instance.impl_details.subdiv_offsets.size(), 0); + cp->instance.impl_details.subdiv_permutations.resize( + cp->instance.impl_details.subdiv_offsets.size()); + // Each subdiv permutation is a ring formed by rotating each + // single-task subsequence of devices by an offset. This makes most + // sense when each task has the same number of devices but we can't + // depend on that being the case so we'll compute something that + // works in any case. + + // Start by counting the devices in each task. + // Precondition: device_names must be sorted so that all devices in + // the same task are adjacent. + VLOG(2) << "Sorted task names: " + << str_util::Join(cp->instance.task_names, ", "); + std::vector dev_per_task; + const string* prior_task_name = &cp->instance.task_names[0]; + int dev_count = 1; + for (int di = 1; di < cp->group.group_size; ++di) { + if (cp->instance.task_names[di] != *prior_task_name) { + dev_per_task.push_back(dev_count); + dev_count = 1; + prior_task_name = &cp->instance.task_names[di]; + } else { + ++dev_count; + } + } + dev_per_task.push_back(dev_count); + CHECK_EQ(cp->group.num_tasks, dev_per_task.size()); + + // Generate a ring permutation for each requested offset. + CHECK_GT(cp->instance.impl_details.subdiv_offsets.size(), 0); + VLOG(2) << "Setting up perms for cp " << cp << " subdiv_permutations " + << &cp->instance.impl_details.subdiv_permutations; + cp->instance.impl_details.subdiv_permutations.resize( + cp->instance.impl_details.subdiv_offsets.size()); + cp->subdiv_rank.resize(cp->instance.impl_details.subdiv_offsets.size(), -1); + for (int sdi = 0; sdi < cp->instance.impl_details.subdiv_offsets.size(); + ++sdi) { + std::vector& perm = cp->instance.impl_details.subdiv_permutations[sdi]; + CHECK_EQ(perm.size(), 0); + int offset = cp->instance.impl_details.subdiv_offsets[sdi]; + int prior_dev_count = 0; + for (int ti = 0; ti < cp->group.num_tasks; ++ti) { + for (int di = 0; di < dev_per_task[ti]; ++di) { + int offset_di = (di + offset) % dev_per_task[ti]; + int permuted_di = prior_dev_count + offset_di; + perm.push_back(permuted_di); + if (cp->instance.device_names[prior_dev_count + di] == device) { + CHECK_EQ(prior_dev_count + di, cp->default_rank); + cp->subdiv_rank[sdi] = permuted_di; + } + } + prior_dev_count += dev_per_task[ti]; + } + CHECK_EQ(cp->group.group_size, perm.size()); + } + + if (cp->instance.type == BROADCAST_COLLECTIVE) { + CHECK_GE(source_rank, 0); + cp->subdiv_source_rank.resize( + cp->instance.impl_details.subdiv_offsets.size(), -1); + for (int sdi = 0; sdi < cp->subdiv_source_rank.size(); ++sdi) { + for (int j = 0; j < cp->group.group_size; ++j) { + if (cp->instance.impl_details.subdiv_permutations[sdi][j] == + source_rank) { + cp->subdiv_source_rank[sdi] = j; + break; + } + } + CHECK_GE(cp->subdiv_source_rank[sdi], 0); + } + } + + if (VLOG_IS_ON(1)) { + // Log the computed ring order for each subdiv. + string buf; + for (int sdi = 0; + sdi < cp->instance.impl_details.subdiv_permutations.size(); ++sdi) { + buf = strings::StrCat("Subdiv ", sdi, " device order:\n"); + for (int di = 0; + di < cp->instance.impl_details.subdiv_permutations[sdi].size(); + ++di) { + int idx = cp->instance.impl_details.subdiv_permutations[sdi][di]; + strings::StrAppend(&buf, cp->instance.device_names[idx], "\n"); + } + strings::StrAppend(&buf, " subdiv_offsets: "); + for (auto o : cp->instance.impl_details.subdiv_offsets) + strings::StrAppend(&buf, o, " "); + strings::StrAppend(&buf, " SubdivRank: "); + for (auto d : cp->subdiv_rank) strings::StrAppend(&buf, d, " "); + VLOG(1) << buf; + } + } +} + +} // namespace + +void CollectiveParamResolverLocal::CompleteTaskIsLocal(const string& task_name, + CollectiveParams* cp) { + cp->task.is_local.resize(cp->group.group_size, false); + for (int i = 0; i < cp->group.group_size; ++i) { + cp->task.is_local[i] = (cp->instance.task_names[i] == task_name); + } +} + +void CollectiveParamResolverLocal::SetDefaultRank(const string& device, + CollectiveParams* cp) { + CHECK_EQ(cp->group.group_size, cp->instance.device_names.size()) << cp; + for (int i = 0; i < cp->group.group_size; ++i) { + if (cp->instance.device_names[i] == device) { + cp->default_rank = i; + break; + } + } +} + +Status CollectiveParamResolverLocal::InitInstanceSharedParams( + GroupRec* gr, const CollectiveParams* cp, InstanceRec* ir) { + VLOG(1) << "InitInstanceSharedParams " << ir; + ir->shared.instance = cp->instance; + { + mutex_lock gl(gr->mu); + ir->shared.group = gr->group; + ir->shared.instance.device_names.assign(gr->device_list.begin(), + gr->device_list.end()); + ir->shared.instance.task_names.assign(gr->task_list.begin(), + gr->task_list.end()); + VLOG(2) << "Initialized names for instance: " + << ir->shared.instance.ToString(); + } + ir->shared.default_rank = -1; + + // Sort devce_names lexicographcally, keeping task_names in + // corresponding order. + SortDevicesAndTasks(&ir->shared); + + // Get Locality data for all devices. + + // Set is_local and task_names in *shared prior to invoking + // GetDeviceLocalitiesAsync. In a distributed context this function can be + // called by a derived class, some of the devices may be non-local and + // GetDeviceLocalitiesAsync will use those fields to launch RPCs. + CompleteTaskIsLocal(task_name_, &ir->shared); + std::vector localities; + Notification note; + Status status; + dev_resolver_->GetDeviceLocalitiesAsync(ir->shared.instance, &localities, + [¬e, &status](const Status& s) { + status = s; + note.Notify(); + }); + note.WaitForNotification(); + if (status.ok()) { + CompleteDefaultRanking(gr, cp, ir, localities); + } + return status; +} + +void CollectiveParamResolverLocal::CompleteDefaultRanking( + GroupRec* gr, const CollectiveParams* cp, InstanceRec* ir, + const std::vector& localities) { + // Establish an instance-specific default rank order for devices + // based on localities. This rank order should be a good ring + // order, if possible. + GlobalDeviceMap gdm = EstablishGlobalRank(&ir->shared, localities); + // Reflect the new global ranking on shared + size_t num_devices = ir->shared.group.group_size; + std::vector new_device_names(num_devices, ""); + std::vector new_task_names(num_devices, ""); + for (const auto& git : gdm) { + const TaskDeviceMap& tdm = git.second; + for (const auto& tit : tdm) { + const DevRec& dr = tit.second; + new_device_names[dr.global_rank] = + ir->shared.instance.device_names[dr.original_rank]; + new_task_names[dr.global_rank] = + ir->shared.instance.task_names[dr.original_rank]; + } + } + + ir->shared.instance.device_names = new_device_names; + ir->shared.instance.task_names = new_task_names; + if (VLOG_IS_ON(2)) { + string buf; + for (const auto& d : cp->instance.device_names) + strings::StrAppend(&buf, "\n", d); + VLOG(2) << "Optimized device order for " << ir->shared.name << ": " << buf; + } +} + +void CollectiveParamResolverLocal::CallbackWithStatus( + const InstanceRecCallback& done, InstanceRec* irec) { + Status s; + { + mutex_lock l(irec->out_mu); + s = irec->status; + } + done(s, irec); +} + +void CollectiveParamResolverLocal::FindInstanceRec( + GroupRec* gr, CollectiveParams* cp, const InstanceRecCallback& done) { + InstanceRec* irec = nullptr; + bool exit_outside_locks = false; + { + mutex_lock l(instance_mu_); + auto it = instance_table_.find(cp->instance.instance_key); + if (it != instance_table_.end()) { + irec = it->second.get(); + { + mutex_lock l(irec->in_mu); + if (irec->is_init) { + exit_outside_locks = true; + } else { + irec->init_waiters.push_back([this, gr, cp, done](InstanceRec* irec) { + CallbackWithStatus(done, irec); + }); + return; + } + } + } else { + // Create new InstanceRec. + irec = new InstanceRec; + instance_table_[cp->instance.instance_key].reset(irec); + } + } + if (exit_outside_locks) { + CallbackWithStatus(done, irec); + return; + } + // Initialize the new InstanceRec while holding out_mu. + { + mutex_lock il(irec->out_mu); + irec->known.resize(cp->group.group_size, false); + irec->status = InitInstanceSharedParams(gr, cp, irec); + } + // Prepare to invoke any waiters that accumlated during initialization. + std::vector init_waiters; + { + mutex_lock tl(instance_mu_); + { + mutex_lock l(irec->in_mu); + irec->is_init = true; + if (!irec->init_waiters.empty()) { + std::swap(init_waiters, irec->init_waiters); + } + } + } + CallbackWithStatus(done, irec); + for (auto& f : init_waiters) { + f(irec); + } +} + +void CollectiveParamResolverLocal::CompleteParamsAsync( + const string& device, CollectiveParams* cp, CancellationManager* cancel_mgr, + const StatusCallback& done) { + VLOG(1) << "CompleteParams " << device << " for " << cp << ": " + << cp->ToString(); + CompleteGroupLocal( + device, cp, [this, device, cp, done](const Status& s, GroupRec* gr) { + if (s.ok()) { + CompleteInstanceLocal(device, gr, cp, cp->is_source, done); + } else { + done(s); + } + }); +} + +void CollectiveParamResolverLocal::CompleteInstanceAsync( + const CompleteInstanceRequest* request, CompleteInstanceResponse* response, + CancellationManager* cancel_mgr, const StatusCallback& done) { + done( + errors::Internal("CompleteInstance is not implemented by " + "CollectiveParamResolverLocal which is " + "intended only for non-distributed deployment.")); +} + +void CollectiveParamResolverLocal::CompleteInstanceLocal( + const string& device, GroupRec* gr, CollectiveParams* cp, bool is_source, + const StatusCallback& done) { + VLOG(1) << "CompleteInstanceLocal " << device + << " instance_key: " << cp->instance.instance_key << " gr " << gr; + + // Populate the group portion of *cp from *gr. Most of it should already + // match. + DCHECK_EQ(cp->group.group_key, gr->group.group_key); + DCHECK_EQ(cp->group.group_size, gr->group.group_size); + DCHECK_EQ(cp->group.device_type, gr->group.device_type); + cp->group = gr->group; + + // Get the shared InstanceRec for this instance. + FindInstanceRec(gr, cp, + [this, device, gr, cp, is_source, done](const Status& s, + InstanceRec* ir) { + if (s.ok()) { + CompleteInstanceFromInitializedIRec(device, gr, cp, ir, + is_source, done); + } else { + done(s); + } + }); +} + +void CollectiveParamResolverLocal::CompleteInstanceFromInitializedIRec( + const string& device, GroupRec* gr, CollectiveParams* cp, InstanceRec* ir, + bool is_source, const StatusCallback& done) { + // Populate the fields common across instance. + { + mutex_lock l(ir->out_mu); + // custom operator= does a deep copy. + cp->instance = ir->shared.instance; + } + // Populate the fields common across task, also default_rank. + SetDefaultRank(device, cp); + CompleteTaskIsLocal(task_name_, cp); + // If broadcast, may need to wait for source discovery. + if (cp->instance.type == BROADCAST_COLLECTIVE) { + CompleteInstanceSource(ir, cp, is_source, + [this, ir, device, cp, done](InstanceRec* irec) { + CHECK_EQ(ir, irec); + Status s; + int source_rank; + { + mutex_lock l(irec->out_mu); + s = irec->status; + source_rank = ir->source_rank; + } + if (s.ok()) { + GenerateSubdivPerms(device, source_rank, cp); + } + done(s); + }); + return; + } else { + GenerateSubdivPerms(device, 0, cp); + } + done(Status::OK()); +} + +void CollectiveParamResolverLocal::CompleteInstanceSource(InstanceRec* ir, + CollectiveParams* cp, + bool is_source, + const IRConsumer& f) { + std::vector ready_waiters; + { + mutex_lock l(ir->out_mu); + CHECK_EQ(cp->group.group_size, ir->known.size()); + CHECK_GE(cp->default_rank, 0); + if (!ir->known[cp->default_rank]) { + ir->known[cp->default_rank] = true; + ++ir->known_count; + if (is_source) { + if (ir->source_rank >= 0) { + ir->status = errors::Internal("Instance ", cp->instance.instance_key, + " already has source ", ir->source_rank, + ", recevied second claim from ", + cp->default_rank); + } else { + ir->source_rank = cp->default_rank; + } + } + } + if (ir->known_count < ir->shared.group.group_size) { + ir->known_waiters.push_back(f); + return; + } + CHECK_EQ(ir->known_count, ir->shared.group.group_size); + CHECK_GE(ir->source_rank, 0); + if (!ir->known_waiters.empty()) { + ready_waiters = std::move(ir->known_waiters); + } + } + f(ir); + for (auto& f : ready_waiters) { + f(ir); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.h b/tensorflow/core/common_runtime/collective_param_resolver_local.h new file mode 100644 index 0000000000..ff3415b0a9 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.h @@ -0,0 +1,209 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_PARAM_RESOLVER_LOCAL_H_ +#define TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_PARAM_RESOLVER_LOCAL_H_ + +#include + +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/lib/gtl/flatmap.h" + +namespace tensorflow { +class CompleteGroupRequest; +class CompleteGroupResponse; +class CompleteInstanceRequest; +class CompleteInstanceResponse; +class DeviceMgr; + +// Implements ParamResolverInterface for a single-task context. +// It also implements the functionality necessary to serve as the +// group leader for param resolution in a multi-task context. +class CollectiveParamResolverLocal : public ParamResolverInterface { + public: + CollectiveParamResolverLocal(const DeviceMgr* dev_mgr, + DeviceResolverInterface* dev_resolver, + const string& task_name); + + ~CollectiveParamResolverLocal() override {} + + void CompleteParamsAsync(const string& device, CollectiveParams* cp, + CancellationManager* cancel_mgr, + const StatusCallback& done) override; + + void CompleteGroupAsync(const CompleteGroupRequest* request, + CompleteGroupResponse* response, + CancellationManager* cancel_mgr, + const StatusCallback& done) override; + + void CompleteInstanceAsync(const CompleteInstanceRequest* request, + CompleteInstanceResponse* response, + CancellationManager* cancel_mgr, + const StatusCallback& done) override; + + protected: + // Used to complete/verify CollGroup. + struct GroupRec { + CollGroupParams group; + mutex mu; + Status status GUARDED_BY(mu); + std::set device_set GUARDED_BY(mu); + std::vector device_list GUARDED_BY(mu); + std::set task_set GUARDED_BY(mu); + std::vector task_list GUARDED_BY(mu); + std::vector waiting GUARDED_BY(mu); + }; + + // Finds the GroupRec that corresponds to cp->group_key. + // Also populates cp->group from that group_rec. + // Will wait until GroupRec is fully populated or an error arises before + // calling done. Callback GroupRec* arg is only valid if status is ok. + // Ownership of GroupRec stays with this object and does not pass to the + // callback. + typedef std::function GroupRecCallback; + void CompleteGroupLocal(const string& device, CollectiveParams* cp, + const GroupRecCallback& done) + LOCKS_EXCLUDED(group_mu_); + + // Used to complete/verify CollInstance. + struct InstanceRec; + typedef std::function IRConsumer; + struct InstanceRec { + // This structure has two mutexes so that a possibly long + // initialization can be done without holding the instance_mu_ + // table lock the whole time (which can cause an excessive number + // of threads to block on it), and because the compiler may not + // permit mutex locks to be taken in more than one order. + // + // out_mu guards access to most of the fields. + // in_mu guards access to a queue of comsumer callbacks wanting to + // read the fields guarded by out_mu. + // + // The in_mu should be locked only while holding instance_mu_; the + // out_mu should be locked only while not holding + // instance_mu_. + // + // When is_init is false (the initial value) any potential user + // other than the creator should queue a callback on init_waiters. + // As soon as the shared member of this structure is fully + // initialized is_init will be set true and those callbacks will + // be invoked. + // + // Once inserted in the table this structure will never be replaced + // so users can capture the pointer while holding instance_mu_, + // drop that lock, then take a lock on out_mu before + // reading/modifying its values. + mutex in_mu; + bool is_init GUARDED_BY(in_mu); + std::vector init_waiters GUARDED_BY(in_mu); + + // Values to be shared by all instances, constant after initialization. + mutex out_mu; + CollectiveParams shared GUARDED_BY(out_mu); + // If an error occurs during initialization this structure stays in + // the table with a non-OK status. Purging the table and restarting + // needs to be done at a higher level. + Status status GUARDED_BY(out_mu); + + // These fields are used to count the instances that have called + // in and become known while resolving broadcast source identity. + int source_rank GUARDED_BY(out_mu); + int known_count GUARDED_BY(out_mu); + std::vector known GUARDED_BY(out_mu); + std::vector known_waiters GUARDED_BY(out_mu); + + InstanceRec() : is_init(false), source_rank(-1), known_count(0) {} + }; + + // Find the InstanceRec with the same instance_key as cp. If it doesn't + // already exist, create and initialize from gr and cp. + // + // Precondition: *gr must be a complete GroupRec, i.e. the value set + // by CompleteGroupLocal. *cp must be populated with all the fields + // required by InitInstanceSharedParams. Ownership of InstanceRec stays + // with this object and does not pass to the callback. + typedef std::function + InstanceRecCallback; + void FindInstanceRec(GroupRec* gr, CollectiveParams* cp, + const InstanceRecCallback& done) + LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_); + + // Populate *ir with device membership from gr, then initialize to be specific + // to cp->instance_key, i.e. order the devices and tasks. + // + // Preconditions: + // cp is populated with all DeviceLocalities + Status InitInstanceSharedParams(GroupRec* gr, const CollectiveParams* cp, + InstanceRec* ir) + EXCLUSIVE_LOCKS_REQUIRED(ir->out_mu) LOCKS_EXCLUDED(gr->mu); + + // Establishes the final order of ir->shared.instance.device_names and + // ir->shared.instance.task_names by considering localities of all devices. + void CompleteDefaultRanking(GroupRec* gr, const CollectiveParams* cp, + InstanceRec* ir, + const std::vector& localities) + EXCLUSIVE_LOCKS_REQUIRED(ir->out_mu); + + // Finish populating *cp. + // Precondition: *gr has been fully populated by CompleteGroupLocal. + void CompleteInstanceLocal(const string& device, GroupRec* gr, + CollectiveParams* cp, bool is_source, + const StatusCallback& done) + LOCKS_EXCLUDED(instance_mu_, gr->mu, group_mu_); + + // Finish populating *cp from fully initialized *ir. + // Precondition: *gr and *ir are fully populated. + void CompleteInstanceFromInitializedIRec(const string& device, GroupRec* gr, + CollectiveParams* cp, + InstanceRec* ir, bool is_source, + const StatusCallback& done) + LOCKS_EXCLUDED(ir->out_mu); + + // Complete source data for a broadcast instance. + // Precondition: *cp has complete group data and default_rank. + void CompleteInstanceSource(InstanceRec* ir, CollectiveParams* cp, + bool is_source, const IRConsumer& f) + LOCKS_EXCLUDED(ir->out_mu); + + // If cp.device_names contains only devices local to this process + // populates *localities, else returns an error. + Status GetLocalDeviceLocalities(const CollectiveParams& cp, + std::vector* localities); + + // Sets CollTaskParams.is_local and CollectiveParams.default_rank. + // Precondition: cp->device_names is fully populated and in final order. + void CompleteTaskIsLocal(const string& task_name, CollectiveParams* cp); + + // Sets cp->instance_default_rank according to location of device in + // current ordering of cp->instance.device_names. + void SetDefaultRank(const string& device, CollectiveParams* cp); + + // Helper to grab status under lock, invoke callback out of lock. + void CallbackWithStatus(const InstanceRecCallback& done, InstanceRec* irec) + LOCKS_EXCLUDED(irec->out_mu); + + const DeviceMgr* dev_mgr_; + DeviceResolverInterface* dev_resolver_; + string task_name_; + mutex group_mu_; + gtl::FlatMap> group_table_ + GUARDED_BY(group_mu_); + mutex instance_mu_; + gtl::FlatMap> instance_table_ + GUARDED_BY(instance_mu_); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_PARAM_RESOLVER_LOCAL_H_ diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc b/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc new file mode 100644 index 0000000000..4e3c7125f2 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc @@ -0,0 +1,151 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/collective_executor_mgr.h" + +#include "tensorflow/core/common_runtime/collective_param_resolver_local.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_resolver_local.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { +namespace { + +#define NUM_DEVS 3 + +class CollectiveParamResolverLocalTest : public ::testing::Test { + protected: + CollectiveParamResolverLocalTest() { + ConfigProto cp; + SessionOptions options; + string task_name = "/job:localhost/replica:0/task:0"; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", NUM_DEVS}); + TF_CHECK_OK(DeviceFactory::AddDevices(options, task_name, &devices_)); + device_mgr_.reset(new DeviceMgr(devices_)); + drl_.reset(new DeviceResolverLocal(device_mgr_.get())); + prl_.reset(new CollectiveParamResolverLocal(device_mgr_.get(), drl_.get(), + task_name)); + } + + std::vector devices_; + std::unique_ptr device_mgr_; + std::unique_ptr drl_; + std::unique_ptr prl_; +}; + +TEST_F(CollectiveParamResolverLocalTest, CompleteParamsReduction1Task) { + CollectiveParams cps[NUM_DEVS]; + Status statuses[NUM_DEVS]; + Notification note[NUM_DEVS]; + for (int i = 0; i < NUM_DEVS; ++i) { + CollectiveParams* cp = &cps[i]; + cp->group.group_key = 1; + cp->group.group_size = 3; + cp->group.device_type = DeviceType("CPU"); + cp->group.num_tasks = 1; + cp->instance.instance_key = 7; + cp->instance.type = REDUCTION_COLLECTIVE; + cp->instance.data_type = DataType(DT_FLOAT); + cp->instance.shape = TensorShape({5}); + cp->instance.device_names.push_back( + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i)); + cp->instance.impl_details.subdiv_offsets.push_back(0); + cp->is_source = false; + Env::Default()->SchedClosure([this, i, cp, ¬e, &statuses]() { + prl_->CompleteParamsAsync(cp->instance.device_names[0], cp, + nullptr /*CancellationManager*/, + [this, &statuses, ¬e, i](const Status& s) { + statuses[i] = s; + note[i].Notify(); + }); + }); + } + for (int i = 0; i < NUM_DEVS; ++i) { + note[i].WaitForNotification(); + } + for (int i = 0; i < NUM_DEVS; ++i) { + TF_ASSERT_OK(statuses[i]); + ASSERT_EQ(cps[i].instance.device_names.size(), 3); + for (int j = 0; j < NUM_DEVS; ++j) { + EXPECT_EQ( + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", j), + cps[i].instance.device_names[j]); + EXPECT_TRUE(cps[i].task.is_local[j]); + } + EXPECT_EQ(cps[i].subdiv_rank[0], i); + EXPECT_EQ(cps[i].subdiv_source_rank.size(), 0); + EXPECT_FALSE(cps[i].is_source); + EXPECT_EQ(cps[i].default_rank, i); + } +} + +TEST_F(CollectiveParamResolverLocalTest, CompleteParamsBroadcast1Task) { + CollectiveParams cps[NUM_DEVS]; + Status statuses[NUM_DEVS]; + Notification note[NUM_DEVS]; + for (int i = 0; i < NUM_DEVS; ++i) { + CollectiveParams* cp = &cps[i]; + cp->group.group_key = 1; + cp->group.group_size = 3; + cp->group.device_type = DeviceType("CPU"); + cp->group.num_tasks = 1; + cp->instance.instance_key = 3; + cp->instance.type = BROADCAST_COLLECTIVE; + cp->instance.data_type = DataType(DT_FLOAT); + cp->instance.shape = TensorShape({5}); + cp->instance.device_names.push_back( + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i)); + cp->instance.impl_details.subdiv_offsets.push_back(0); + cp->is_source = (i == 1); + Env::Default()->SchedClosure([this, i, cp, ¬e, &statuses]() { + prl_->CompleteParamsAsync(cp->instance.device_names[0], cp, + nullptr /*CancellationManager*/, + [this, &statuses, ¬e, i](const Status& s) { + statuses[i] = s; + note[i].Notify(); + }); + }); + } + for (int i = 0; i < NUM_DEVS; ++i) { + note[i].WaitForNotification(); + } + for (int i = 0; i < NUM_DEVS; ++i) { + TF_ASSERT_OK(statuses[i]); + ASSERT_EQ(cps[i].instance.device_names.size(), 3); + for (int j = 0; j < NUM_DEVS; ++j) { + EXPECT_EQ( + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", j), + cps[i].instance.device_names[j]); + EXPECT_TRUE(cps[i].task.is_local[j]); + } + ASSERT_GT(cps[i].subdiv_rank.size(), 0); + EXPECT_EQ(cps[i].subdiv_rank[0], i); + ASSERT_GT(cps[i].subdiv_source_rank.size(), 0); + EXPECT_EQ(cps[i].subdiv_source_rank[0], 1); + EXPECT_EQ(cps[i].is_source, (i == 1)); + EXPECT_EQ(cps[i].default_rank, i); + } +} + +// TEST_F(CollectiveParamResolverLocalTest, + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_rma_local.cc b/tensorflow/core/common_runtime/collective_rma_local.cc new file mode 100644 index 0000000000..ad9b32ce35 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_rma_local.cc @@ -0,0 +1,108 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/collective_rma_local.h" + +#include "tensorflow/core/common_runtime/copy_tensor.h" +#include "tensorflow/core/common_runtime/dma_helper.h" + +namespace tensorflow { + +void CollectiveRemoteAccessLocal::StartAbort(const Status& s) { + buf_rendezvous_.StartAbort(s); +} + +void CollectiveRemoteAccessLocal::RecvFromPeer( + const string& peer_device, const string& peer_task, bool peer_is_local, + const string& key, Device* to_device, DeviceContext* to_device_ctx, + const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, + const DeviceLocality& client_locality, const StatusCallback& done) { + VLOG(1) << "RecvFromPeer " << this << " from " << peer_device << " key " + << key; + if (!peer_is_local) { + done( + errors::Internal("CollectiveRemoteAccessLocal::RecvFromPeer " + "called with peer_is_local=false")); + return; + } + buf_rendezvous_.ConsumeBuf( + key, [this, to_tensor, to_device_ctx, to_device, to_alloc_attr, done]( + const Status& s, BufRendezvous::Hook* hook) { + if (!s.ok()) { + done(s); + delete hook; + } else { + int64 recv_bytes = to_tensor->TotalBytes(); + CHECK_EQ(recv_bytes, hook->prod_value->TotalBytes()); + MemCpyAsync(hook->prod_ctx, // src DeviceContext + to_device_ctx, // dst DeviceContext + hook->prod_dev, // src Device + to_device, // dst Device + hook->prod_attr, // src AllocatorAttributes + to_alloc_attr, // dst AllocatorAttributes + hook->prod_value, // src Tensor* + to_tensor, // dst Tensor* + [hook, done](const Status& s) { + done(s); + hook->prod_cb(s); + delete hook; + }); + } + }); +} + +void CollectiveRemoteAccessLocal::PostToPeer( + const string& peer_device, const string& peer_task, const string& key, + Device* from_device, DeviceContext* from_device_ctx, + const AllocatorAttributes& from_alloc_attr, const Tensor* from_tensor, + const DeviceLocality& client_locality, const StatusCallback& done) { + VLOG(1) << "PostToPeer " << this << " key " << key + << " step_id_=" << step_id_; + buf_rendezvous_.ProvideBuf(key, from_device, from_device_ctx, from_tensor, + from_alloc_attr, done); +} + +/*static*/ +void CollectiveRemoteAccessLocal::MemCpyAsync( + DeviceContext* src_dev_ctx, DeviceContext* dst_dev_ctx, Device* src_dev, + Device* dst_dev, const AllocatorAttributes& src_attr, + const AllocatorAttributes& dst_attr, const Tensor* src, Tensor* dst, + const StatusCallback& done) { + // We want a real copy to happen, i.e. the bytes inside of src should be + // transferred to the buffer backing dst. If src and dst are on different + // devices then CopyTensor::ViaDMA will do just that. But if they're both + // the same CPU, then it will actually just reset dst to point to src. + // Since this routine is used for copying between devices and within a + // device, we need to detect and bypass the wrong-semantics case. + const DeviceType src_device_type( + src_attr.on_host() ? DEVICE_CPU : src_dev->attributes().device_type()); + const DeviceType dst_device_type( + dst_attr.on_host() ? DEVICE_CPU : dst_dev->attributes().device_type()); + const bool non_cpu_src = src_device_type != DeviceType(DEVICE_CPU); + const bool non_cpu_dst = dst_device_type != DeviceType(DEVICE_CPU); + if (non_cpu_src) CHECK(src_dev_ctx); + if (non_cpu_dst) CHECK(dst_dev_ctx); + if (non_cpu_src || non_cpu_dst) { + CopyTensor::ViaDMA("", // edge name (non-existent) + src_dev_ctx, dst_dev_ctx, src_dev, dst_dev, src_attr, + dst_attr, src, dst, done); + } else { + int64 bytes = src->TotalBytes(); + DCHECK_EQ(dst->TotalBytes(), bytes); + memcpy(DMAHelper::base(dst), DMAHelper::base(src), bytes); + done(Status::OK()); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_rma_local.h b/tensorflow/core/common_runtime/collective_rma_local.h new file mode 100644 index 0000000000..d25dd5f04a --- /dev/null +++ b/tensorflow/core/common_runtime/collective_rma_local.h @@ -0,0 +1,88 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_RMA_LOCAL_ACCESS_H_ +#define TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_RMA_LOCAL_ACCESS_H_ +#include "tensorflow/core/common_runtime/buf_rendezvous.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/framework/rendezvous.h" + +namespace tensorflow { + +// Basic implementation of PerStepCollectiveRemoteAccess. +class CollectiveRemoteAccessLocal : public PerStepCollectiveRemoteAccess { + public: + CollectiveRemoteAccessLocal(const DeviceMgr* dev_mgr, + DeviceResolverInterface* dev_resolver, + int64 step_id) + : dev_mgr_(dev_mgr), + dev_resolver_(dev_resolver), + buf_rendezvous_(step_id), + step_id_(step_id) {} + + virtual ~CollectiveRemoteAccessLocal() {} + + void StartAbort(const Status& s); + + void RecvFromPeer(const string& peer_device, const string& peer_task, + bool peer_is_local, const string& key, Device* to_device, + DeviceContext* to_device_ctx, + const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) override; + + void PostToPeer(const string& peer_device, const string& peer_task, + const string& key, Device* from_device, + DeviceContext* from_device_ctx, + const AllocatorAttributes& from_alloc_attr, + const Tensor* from_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) override; + + void GetDeviceLocalitiesAsync(const CollInstanceParams& ci_params, + std::vector* localities, + const StatusCallback& done) override { + dev_resolver_->GetDeviceLocalitiesAsync(ci_params, localities, done); + } + + void GetLocalityAsync(const string& device, const string& task, + DeviceLocality* locality, + const StatusCallback& done) override { + dev_resolver_->GetLocalityAsync(device, task, locality, done); + } + + void ClearTask(const string& task) override { + dev_resolver_->ClearTask(task); + } + + // Copy utility that always copies bytes from src to dst even if + // they are on the same device, unlike CopyTensor::ViaDMA which will + // just change the dst buffer pointer in that case. + static void MemCpyAsync(DeviceContext* src_dev_ctx, + DeviceContext* dst_dev_ctx, Device* src_dev, + Device* dst_dev, const AllocatorAttributes& src_attr, + const AllocatorAttributes& dst_attr, + const Tensor* src, Tensor* dst, + const StatusCallback& done); + + protected: + const DeviceMgr* dev_mgr_; // not owned + DeviceResolverInterface* dev_resolver_; // not owned + BufRendezvous buf_rendezvous_; + int64 step_id_; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_COMMON_RUNTIME_COLLECTIVE_RMA_LOCAL_ACCESS_H_ diff --git a/tensorflow/core/common_runtime/collective_rma_local_test.cc b/tensorflow/core/common_runtime/collective_rma_local_test.cc new file mode 100644 index 0000000000..dcd4272d96 --- /dev/null +++ b/tensorflow/core/common_runtime/collective_rma_local_test.cc @@ -0,0 +1,148 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/collective_rma_local.h" + +#include "tensorflow/core/common_runtime/buf_rendezvous.h" +#include "tensorflow/core/common_runtime/collective_param_resolver_local.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_resolver_local.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { +namespace { + +#define NUM_DEVS 3 +static const int kStepId = 123; + +class CollectiveRemoteAccessLocalTest : public ::testing::Test { + protected: + const string kTaskName = "/job:localhost/replica:0/task:0"; + + CollectiveRemoteAccessLocalTest() { + ConfigProto cp; + SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", NUM_DEVS}); + TF_CHECK_OK(DeviceFactory::AddDevices(options, kTaskName, &devices_)); + device_mgr_.reset(new DeviceMgr(devices_)); + drl_.reset(new DeviceResolverLocal(device_mgr_.get())); + prl_.reset(new CollectiveParamResolverLocal(device_mgr_.get(), drl_.get(), + kTaskName)); + rma_.reset(new CollectiveRemoteAccessLocal(device_mgr_.get(), drl_.get(), + kStepId)); + } + + std::vector devices_; + std::unique_ptr device_mgr_; + std::unique_ptr drl_; + std::unique_ptr prl_; + std::unique_ptr rma_; +}; + +TEST_F(CollectiveRemoteAccessLocalTest, PostRecvCPU0) { + Device* cpu0 = nullptr; + AllocatorAttributes attr; + DeviceLocality dev_locality; + TF_ASSERT_OK(device_mgr_->LookupDevice(kTaskName + "/device:CPU:0", &cpu0)); + Tensor sink_tensor(DT_FLOAT, TensorShape({8})); + Notification recv_note; + Status recv_status; + rma_->RecvFromPeer(kTaskName + "/device:CPU:0", kTaskName, true /*is_local*/, + "key_0", cpu0 /*to_device*/, nullptr /*to_device_ctx*/, + attr /*to_alloc_attr*/, &sink_tensor, dev_locality, + [this, &recv_note, &recv_status](const Status& s) { + recv_status = s; + recv_note.Notify(); + }); + Tensor source_tensor(DT_FLOAT, TensorShape({8})); + for (int i = 0; i < 8; ++i) { + source_tensor.flat()(i) = i / 2; + } + // Tensors have distinct storage. + EXPECT_NE(DMAHelper::base(&source_tensor), DMAHelper::base(&sink_tensor)); + Notification send_note; + Status send_status; + rma_->PostToPeer(kTaskName + "/device:CPU:0", kTaskName, "key_0", + cpu0 /*from_device*/, nullptr /*from_device_ctx*/, + attr /*to_alloc_attr*/, &source_tensor, dev_locality, + [this, &send_note, &send_status](const Status& s) { + send_status = s; + send_note.Notify(); + }); + recv_note.WaitForNotification(); + send_note.WaitForNotification(); + TF_EXPECT_OK(recv_status); + TF_EXPECT_OK(send_status); + // Sink tensor gets the source tensor values. + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(sink_tensor.flat()(i), i / 2); + } + // And still has distinct storage. + EXPECT_NE(DMAHelper::base(&source_tensor), DMAHelper::base(&sink_tensor)); +} + +TEST_F(CollectiveRemoteAccessLocalTest, PostRecvCPU1_2) { + Device* cpu2 = nullptr; + AllocatorAttributes attr; + DeviceLocality dev_locality; + TF_ASSERT_OK(device_mgr_->LookupDevice(kTaskName + "/device:CPU:2", &cpu2)); + Tensor sink_tensor(DT_FLOAT, TensorShape({8})); + Notification recv_note; + Status recv_status; + rma_->RecvFromPeer(kTaskName + "/device:CPU:1", kTaskName, true /*is_local*/, + "key_0", cpu2 /*to_device*/, nullptr /*to_device_ctx*/, + attr /*to_alloc_attr*/, &sink_tensor, dev_locality, + [this, &recv_note, &recv_status](const Status& s) { + recv_status = s; + recv_note.Notify(); + }); + Tensor source_tensor(DT_FLOAT, TensorShape({8})); + for (int i = 0; i < 8; ++i) { + source_tensor.flat()(i) = i / 2; + } + // Tensors have distinct storage. + EXPECT_NE(DMAHelper::base(&source_tensor), DMAHelper::base(&sink_tensor)); + Device* cpu1 = nullptr; + TF_ASSERT_OK(device_mgr_->LookupDevice(kTaskName + "/device:CPU:1", &cpu1)); + Notification send_note; + Status send_status; + rma_->PostToPeer(kTaskName + "/device:CPU:2", kTaskName, "key_0", + cpu1 /*from_device*/, nullptr /*from_device_ctx*/, + attr /*to_alloc_attr*/, &source_tensor, dev_locality, + [this, &send_note, &send_status](const Status& s) { + send_status = s; + send_note.Notify(); + }); + recv_note.WaitForNotification(); + send_note.WaitForNotification(); + TF_EXPECT_OK(recv_status); + TF_EXPECT_OK(send_status); + // Sink tensor gets the source tensor values. + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(sink_tensor.flat()(i), i / 2); + } + // And still has distinct storage. + EXPECT_NE(DMAHelper::base(&source_tensor), DMAHelper::base(&sink_tensor)); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/device_resolver_local.cc b/tensorflow/core/common_runtime/device_resolver_local.cc new file mode 100644 index 0000000000..17ef4a2284 --- /dev/null +++ b/tensorflow/core/common_runtime/device_resolver_local.cc @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/device_resolver_local.h" + +#include "tensorflow/core/common_runtime/device_mgr.h" + +namespace tensorflow { + +void DeviceResolverLocal::GetDeviceLocalitiesAsync( + const CollInstanceParams& ci_params, + std::vector* localities, const StatusCallback& done) { + localities->clear(); + for (const string& device_name : ci_params.device_names) { + Device* dev; + Status s = dev_mgr_->LookupDevice(device_name, &dev); + if (!s.ok()) { + done(s); + return; + } + localities->push_back(dev->attributes().locality()); + } + done(Status::OK()); +} + +void DeviceResolverLocal::GetLocalityAsync(const string& device, + const string& task, + DeviceLocality* locality, + const StatusCallback& done) { + Device* dev; + Status s = dev_mgr_->LookupDevice(device, &dev); + if (s.ok()) { + *locality = dev->attributes().locality(); + } + done(s); +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/device_resolver_local.h b/tensorflow/core/common_runtime/device_resolver_local.h new file mode 100644 index 0000000000..098eccdf84 --- /dev/null +++ b/tensorflow/core/common_runtime/device_resolver_local.h @@ -0,0 +1,48 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMMON_RUNTIME_DEVICE_RESOLVER_LOCAL_H_ +#define TENSORFLOW_COMMON_RUNTIME_DEVICE_RESOLVER_LOCAL_H_ + +#include + +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/framework/device_attributes.pb.h" + +namespace tensorflow { +class DeviceMgr; + +// Implements DeviceResolverInterface in a single-task context. +class DeviceResolverLocal : public DeviceResolverInterface { + public: + DeviceResolverLocal(const DeviceMgr* dev_mgr) : dev_mgr_(dev_mgr) {} + + virtual ~DeviceResolverLocal() {} + + void GetDeviceLocalitiesAsync(const CollInstanceParams& ci_params, + std::vector* localities, + const StatusCallback& done) override; + + void GetLocalityAsync(const string& device, const string& task, + DeviceLocality* locality, + const StatusCallback& done) override; + + void ClearTask(const string& task) override {} + + protected: + const DeviceMgr* dev_mgr_; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_COMMON_RUNTIME_DEVICE_RESOLVER_LOCAL_H_ diff --git a/tensorflow/core/common_runtime/device_resolver_local_test.cc b/tensorflow/core/common_runtime/device_resolver_local_test.cc new file mode 100644 index 0000000000..f5a6471ff7 --- /dev/null +++ b/tensorflow/core/common_runtime/device_resolver_local_test.cc @@ -0,0 +1,87 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/device_resolver_local.h" + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { +namespace { + +#define NUM_DEVS 3 + +class DeviceResolverLocalTest : public ::testing::Test { + protected: + DeviceResolverLocalTest() { + ConfigProto cp; + SessionOptions options; + string task_name = "/job:localhost/replica:0/task:0"; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", NUM_DEVS}); + TF_CHECK_OK(DeviceFactory::AddDevices(options, task_name, &devices_)); + device_mgr_.reset(new DeviceMgr(devices_)); + drl_.reset(new DeviceResolverLocal(device_mgr_.get())); + } + + std::vector devices_; + std::unique_ptr device_mgr_; + std::unique_ptr drl_; +}; + +TEST_F(DeviceResolverLocalTest, GetDeviceLocalitiesKnown) { + CollectiveParams cp; + std::vector localities; + cp.instance.device_names.push_back( + "/job:localhost/replica:0/task:0/device:CPU:1"); + cp.instance.device_names.push_back( + "/job:localhost/replica:0/task:0/device:CPU:2"); + Notification note; + Status status; + drl_->GetDeviceLocalitiesAsync(cp.instance, &localities, + [this, ¬e, &status](const Status& s) { + status = s; + note.Notify(); + }); + note.WaitForNotification(); + TF_EXPECT_OK(status); + EXPECT_EQ(2, localities.size()); +} + +TEST_F(DeviceResolverLocalTest, GetDeviceLocalitiesUnknown) { + CollectiveParams cp; + std::vector localities; + // In some builds there may be 1 GPU, but there should never be 9. + cp.instance.device_names.push_back( + "/job:localhost/replica:0/task:0/device:GPU:9"); + Notification note; + Status status; + drl_->GetDeviceLocalitiesAsync(cp.instance, &localities, + [this, ¬e, &status](const Status& s) { + status = s; + note.Notify(); + }); + note.WaitForNotification(); + EXPECT_FALSE(status.ok()); + EXPECT_EQ(0, localities.size()); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/framework/collective.cc b/tensorflow/core/framework/collective.cc new file mode 100644 index 0000000000..a26f2c2f31 --- /dev/null +++ b/tensorflow/core/framework/collective.cc @@ -0,0 +1,120 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/collective.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace tensorflow { + +string CollGroupParams::ToString() const { + return strings::StrCat("CollGroupParams {group_key=", group_key, + " group_size=", group_size, + " device_type=", device_type.type_string(), + " num_tasks=", num_tasks, "}"); +} + +CollInstanceParams& CollInstanceParams::operator=( + const CollInstanceParams& other) { + if (this != &other) { + instance_key = other.instance_key; + type = other.type; + data_type = other.data_type; + shape = other.shape; + device_names.clear(); + device_names.assign(other.device_names.begin(), other.device_names.end()); + task_names.assign(other.task_names.begin(), other.task_names.end()); + impl_details.subdiv_offsets.assign( + other.impl_details.subdiv_offsets.begin(), + other.impl_details.subdiv_offsets.end()); + impl_details.subdiv_permutations.clear(); + for (auto p : other.impl_details.subdiv_permutations) { + impl_details.subdiv_permutations.push_back( + std::vector(p.begin(), p.end())); + } + impl_details.subdiv_source_rank.assign( + other.impl_details.subdiv_source_rank.begin(), + other.impl_details.subdiv_source_rank.end()); + } + return *this; +} + +string CollInstanceParams::ToString() const { + string v = strings::StrCat("CollInstanceParams { instance_key=", instance_key, + " type=", type, " data_type=", data_type, + " shape=", shape.DebugString(), " devices {"); + for (const auto& d : device_names) { + strings::StrAppend(&v, d, ","); + } + strings::StrAppend(&v, "} task_names={"); + for (const auto& n : task_names) { + strings::StrAppend(&v, n, ", "); + } + strings::StrAppend(&v, "}, subdiv_offsets={"); + for (const auto& d : impl_details.subdiv_offsets) { + strings::StrAppend(&v, d, ","); + } + strings::StrAppend(&v, "}, subdiv_perms={"); + for (const auto& p : impl_details.subdiv_permutations) { + strings::StrAppend(&v, "{"); + for (const auto& i : p) { + strings::StrAppend(&v, i, ","); + } + strings::StrAppend(&v, "}"); // one subdiv + } + strings::StrAppend(&v, "}"); // all subdivs + return v; +} + +string CollTaskParams::ToString() const { + string v = strings::StrCat("CollTaskParams {is_local={"); + for (const auto& b : is_local) { + strings::StrAppend(&v, static_cast(b), ","); + } + strings::StrAppend(&v, "}}"); + return v; +} + +string CollectiveParams::ToString() const { + string v = strings::StrCat("CollectiveParams ", name, " {", group.ToString()); + strings::StrAppend(&v, " ", instance.ToString()); + strings::StrAppend(&v, " ", task.ToString()); + strings::StrAppend(&v, " default_rank=", default_rank, + " is_source=", is_source, " subdiv_rank={"); + for (const auto& r : subdiv_rank) { + strings::StrAppend(&v, r, ","); + } + if (!subdiv_source_rank.empty()) { + strings::StrAppend(&v, " subdiv_rank={"); + for (const auto& r : subdiv_source_rank) { + strings::StrAppend(&v, r, ","); + } + strings::StrAppend(&v, "}"); + } + strings::StrAppend(&v, "}}"); + return v; +} + +/*static*/ OpKernelContext::Params* CollectiveExecutor::CtxParams( + OpKernelContext* ctx) { + return ctx->params_; +} + +/*static*/ +int64 CollectiveExecutor::kInvalidId = -1; + +} // namespace tensorflow diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h new file mode 100644 index 0000000000..362d345133 --- /dev/null +++ b/tensorflow/core/framework/collective.h @@ -0,0 +1,308 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_FRAMEWORK_COLLECTIVE_EXECUTOR_H_ +#define TENSORFLOW_FRAMEWORK_COLLECTIVE_EXECUTOR_H_ + +#include +#include + +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +class BufRendezvous; +class CancellationManager; +class CompleteGroupRequest; +class CompleteGroupResponse; +class CompleteInstanceRequest; +class CompleteInstanceResponse; +class DeviceLocality; +class GetStepSequenceRequest; +class GetStepSequenceResponse; +class Op; +class Tensor; + +// Types of supported collective operations. +enum CollectiveType { + REDUCTION_COLLECTIVE = 0, + BROADCAST_COLLECTIVE, + UNDEFINED_COLLECTIVE, +}; + +// Data common to all members of a device group. +// All members share the same device set but its order is +// particular to an instance so it is stored there. +struct CollGroupParams { + int32 group_key; + int32 group_size; + DeviceType device_type; + int32 num_tasks; // number of distinct tasks in group + string ToString() const; + CollGroupParams() : device_type(DEVICE_CPU) {} +}; + +// The best implementation of a collective op depends on many factors +// including the number of devices involved, the topology of +// interconnects between them and the sizes of inputs. This structure +// is used in generating and representing data movement choreography +// for each specific algorithm, hence it does not have a single, fixed +// interpretation. On first execution the runtime will update this +// structure with decisions that will guide all subsequent executions. +struct CollImplDetails { + std::vector> subdiv_permutations; + std::vector subdiv_offsets; + // broadcast only: rank of source in each subdiv + std::vector subdiv_source_rank; +}; + +// Data common to all members of a collective instance. +struct CollInstanceParams { + int32 instance_key; // Identifies all participating graph nodes. + CollectiveType type; + DataType data_type; + TensorShape shape; + // Fully qualified name of device for each member, in default rank order. + std::vector device_names; + // Task name prefix of corresponding device name. + std::vector task_names; + CollImplDetails impl_details; + string ToString() const; + CollInstanceParams& operator=(const struct CollInstanceParams& other); +}; + +// Data common to all instance members in the same task. +struct CollTaskParams { + // True for devices that are local to the process, i.e. no RPC needed. + std::vector is_local; + string ToString() const; +}; + +// Unique to a single CollectiveOp node. +struct CollectiveParams { + CollGroupParams group; + CollInstanceParams instance; + CollTaskParams task; + + string name; // node name used only for log or error messages + int default_rank; // index of this op within device_names + bool is_source; // broadcast only + // Rank of this device in each subdivision permutation. + std::vector subdiv_rank; + std::vector subdiv_source_rank; + const Tensor* in_tensor; // kernel input + Tensor* out_tensor; // kernel output + std::unique_ptr merge_op; // reduction only + std::unique_ptr final_op; // reduction only + OpKernelContext* op_context; + string ToString() const; +}; + +class CollectiveExecutor; + +// Interface that provides resolution of device localities. +class DeviceResolverInterface { + public: + virtual ~DeviceResolverInterface() {} + + // Collects DeviceLocality protobufs from all of the devices identified + // in 'col_params'. + virtual void GetDeviceLocalitiesAsync(const CollInstanceParams& inst_params, + std::vector* localities, + const StatusCallback& done) = 0; + + // Populate *locality with the DeviceLocality of the specified + // device. + virtual void GetLocalityAsync(const string& device, const string& task, + DeviceLocality* locality, + const StatusCallback& done) = 0; + + // Clear the cache of device data belonging + // to the specified task. + virtual void ClearTask(const string& task) = 0; +}; + +// Interface that provides resolution of shared CollectiveParams fields. +class ParamResolverInterface { + public: + virtual ~ParamResolverInterface() {} + + // Called by each collective op at first execution in order to fill out + // the CollectiveParams structure with data gathered from the full + // (maybe distributed) collection of peer nodes. + virtual void CompleteParamsAsync(const string& device, CollectiveParams* cp, + CancellationManager* cancel_mgr, + const StatusCallback& done) = 0; + + // Used within a distributed implementation to discover/verify + // data shared across a device group. + virtual void CompleteGroupAsync(const CompleteGroupRequest* request, + CompleteGroupResponse* response, + CancellationManager* cancel_mgr, + const StatusCallback& done) = 0; + + // Used within a distributed implementation to discover/verify data + // shared across an instance group. + virtual void CompleteInstanceAsync(const CompleteInstanceRequest* request, + CompleteInstanceResponse* response, + CancellationManager* cancel_mgr, + const StatusCallback& done) = 0; +}; + +// Graphs which utilize Collective Ops in a common instance must +// execute with identical step_ids even if they are disjoint graphs +// run by otherwise independent tasks. This interface supplies +// coordinated step_ids to use in such cases. +class StepSequenceInterface { + public: + virtual ~StepSequenceInterface() {} + + // Used with a distributed implementation to coordinate step_id + // sequences across tasks. + virtual void GetStepSequenceAsync(const GetStepSequenceRequest* request, + GetStepSequenceResponse* response, + const StatusCallback& done) = 0; + + // Refresh the local per-graph_key step_id sequence from collective + // group leader, if applicable. + virtual void RefreshStepIdSequenceAsync(int64 graph_key, + const StatusCallback& done) = 0; + + // Returns the the step_id that should be used for initiating a new execution + // on the specified graph. May return the same step_id multiple times if + // RetireStepId or RefreshStepIdReservation is not called. + virtual int64 NextStepId(int64 graph_key) = 0; + + // Reports that execution of the given step has completed successfully. + // Should be called immediately after a step completes with OK status, + // prior to calling NextStepId(). If the step fails, don't call. + virtual void RetireStepId(int64 graph_key, int64 step_id) = 0; +}; + +// Interface that provides access to per-step CollectiveExecutor +// instances and various distributed resolution capabilities. +class CollectiveExecutorMgrInterface : public StepSequenceInterface { + public: + virtual ~CollectiveExecutorMgrInterface() {} + + // Returns the step-specific CollectiveExecutor, creating if one does not + // already exist. The caller assumes ownership of one Ref on the object. + virtual CollectiveExecutor* FindOrCreate(int64 step_id) = 0; + + // If there is a CollectiveExecutor for step_id, remove it from the + // table. + virtual void Cleanup(int64 step_id) = 0; + + virtual ParamResolverInterface* GetParamResolver() const = 0; + + virtual DeviceResolverInterface* GetDeviceResolver() const = 0; +}; + +// Interface that a Collective Op implementation uses to exchange data +// with peers. Note that data exchange is currently limited to types +// for which DMAHelper::CanUseDMA() returns true, i.e. dense numeric +// types. +class PeerAccessInterface { + public: + virtual ~PeerAccessInterface() {} + + virtual void RecvFromPeer(const string& peer_device, const string& peer_task, + bool peer_is_local, const string& key, + Device* to_device, DeviceContext* to_device_ctx, + const AllocatorAttributes& to_alloc_attr, + Tensor* to_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) = 0; + + virtual void PostToPeer(const string& peer_device, const string& peer_task, + const string& key, Device* from_device, + DeviceContext* from_device_ctx, + const AllocatorAttributes& from_alloc_attr, + const Tensor* from_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) = 0; +}; + +class PerStepCollectiveRemoteAccess; + +// A step-specific object that can execute a collective operation completely +// described by a CollectiveParams object. +class CollectiveExecutor : public PeerAccessInterface, public core::RefCounted { + public: + virtual void StartAbort(const Status& s) {} + + virtual void ExecuteAsync(OpKernelContext* ctx, + const CollectiveParams& col_params, + const string& exec_key, StatusCallback done) { + done(errors::Internal( + "A collective Op has been called in a context in which " + "a CollectiveExecutor has not been provided.")); + } + + virtual void CompleteParamsAsync(const string& device, CollectiveParams* cp, + CancellationManager* cancel_mgr, + StatusCallback done) { + cem_->GetParamResolver()->CompleteParamsAsync(device, cp, cancel_mgr, done); + } + + virtual PerStepCollectiveRemoteAccess* remote_access() { return nullptr; } + + // Used to designate an invalid group or instance key. + static int64 kInvalidId; + + // Lexically scoped handle for Ref. + class Handle { + public: + explicit Handle(CollectiveExecutor* ce, bool inherit_ref) : ce_(ce) { + if (!inherit_ref) ce->Ref(); + } + ~Handle() { ce_->Unref(); } + CollectiveExecutor* get() const { return ce_; } + + private: + CollectiveExecutor* ce_; + }; + + protected: + explicit CollectiveExecutor(CollectiveExecutorMgrInterface* cem) + : cem_(cem) {} + + // For use only by derived classes + static OpKernelContext::Params* CtxParams(OpKernelContext* ctx); + CollectiveExecutorMgrInterface* cem_; + + TF_DISALLOW_COPY_AND_ASSIGN(CollectiveExecutor); +}; + +// Interface of a helper object that provices a CollectiveExecutor with +// all of the remote access it needs. +class CollectiveRemoteAccess : public PeerAccessInterface, + public DeviceResolverInterface { + public: + virtual ~CollectiveRemoteAccess() {} +}; + +// A per-step version of CollectiveRemoteAccess that cleans up outstanding +// communications in case step execution is abandoned. +class PerStepCollectiveRemoteAccess : public CollectiveRemoteAccess { + public: + virtual ~PerStepCollectiveRemoteAccess() {} + virtual void StartAbort(const Status& s) = 0; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_FRAMEWORK_COLLECTIVE_EXECUTOR_H_ diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 5ccd45efc9..2d97160830 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -1101,6 +1101,7 @@ class OpKernelContext { void NotifyUseOfPersistentTensor(const Tensor& tensor); Status status_; + friend class CollectiveExecutor; // for access to params_ Params* params_; // not owned mutable mutex mu_; // mutable so const accessors can acquire the lock gtl::InlinedVector wrapped_allocators_ GUARDED_BY(mu_); -- GitLab From 4be2f41f30554d71ba48eb03b44d05a424bf41af Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 28 Mar 2018 17:16:10 -0700 Subject: [PATCH 648/960] Missed ScopedUnref in ResourceGather PiperOrigin-RevId: 190861558 --- tensorflow/core/kernels/resource_variable_ops.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index e134e476f6..d1675f27dd 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -503,6 +503,7 @@ class ResourceGatherOp : public OpKernel { void Compute(OpKernelContext* c) override { Var* v = nullptr; OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v)); + core::ScopedUnref su(v); // NOTE: We hold the lock for the whole gather operation instead // of increasing the reference count of v->tensor() to avoid a // situation where a write to the same variable will see a -- GitLab From 74949ee09b0ff48a2ff1ca7a27475ec6c2583d43 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 17:36:30 -0700 Subject: [PATCH 649/960] Further speed up statistical_testing_test by breaking up DKWM test. PiperOrigin-RevId: 190863893 --- .../kernel_tests/statistical_testing_test.py | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py index c0e7bdd259..0400c80c29 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -141,16 +141,16 @@ class StatisticalTestingTest(test.TestCase): def test_dkwm_mean_two_sample_assertion(self): rng = np.random.RandomState(seed=0) - num_samples = 15000 + num_samples = 4000 - # 15000 samples is chosen to be enough to find discrepancies of - # size 0.1 or more with assurance 1e-6, as confirmed here: + # 4000 samples is chosen to be enough to find discrepancies of + # size 0.2 or more with assurance 1e-6, as confirmed here: with self.test_session() as sess: d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( num_samples, 0., 1., num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) d = sess.run(d) - self.assertLess(d, 0.1) + self.assertLess(d, 0.2) # Test that the test assertion agrees that the standard # uniform distribution has the same mean as itself. @@ -160,6 +160,15 @@ class StatisticalTestingTest(test.TestCase): sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6)) + def test_dkwm_mean_two_sample_assertion_beta_2_1_false(self): + rng = np.random.RandomState(seed=0) + num_samples = 4000 + samples1 = rng.uniform(size=num_samples).astype(np.float32) + + # As established above, 4000 samples is enough to find discrepancies + # of size 0.2 or more with assurance 1e-6. + + with self.test_session() as sess: # Test that the test assertion confirms that the mean of the # standard uniform distribution is different from the mean of beta(2, 1). beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) @@ -169,6 +178,15 @@ class StatisticalTestingTest(test.TestCase): beta_high_samples, 0., 1., false_fail_rate=1e-6)) + def test_dkwm_mean_two_sample_assertion_beta_1_2_false(self): + rng = np.random.RandomState(seed=0) + num_samples = 4000 + samples1 = rng.uniform(size=num_samples).astype(np.float32) + + # As established above, 4000 samples is enough to find discrepancies + # of size 0.2 or more with assurance 1e-6. + + with self.test_session() as sess: # Test that the test assertion confirms that the mean of the # standard uniform distribution is different from the mean of beta(1, 2). beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) -- GitLab From 628552228c76d2ee7f2eef4d56175a89941e3e1d Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 28 Mar 2018 17:54:01 -0700 Subject: [PATCH 650/960] TPU: Implement 3rd gen input pipeline config. In this new configuration, we are able to drive a Cloud TPU at full device performance, and achieve over 3k images/sec on ResNet-50. The previous bottleneck was the un-pipeline-able split that occurred after the iterator.get_next() call. This split (when not splitting on the batch-major dimension) caused the training job to be single-threaded-CPU-bottlenecked, resulting in a performance of only ~2650 images/sec on ResNet-50. This latest input pipeline configuration requires the use of datasets. By requiring datasets, we gain the ability to call get_next() num_replicas times per host, and avoid the expensive split op. (Note: this also opens up potential future avenues for further optimization.) Despite this, we retain a lot of nice usability properties that per_host_v1 (aka input pipeline config v2) gave us. PiperOrigin-RevId: 190865741 --- .../contrib/tpu/python/tpu/tpu_config.py | 32 ++++++-- .../contrib/tpu/python/tpu/tpu_context.py | 12 ++- .../contrib/tpu/python/tpu/tpu_estimator.py | 79 +++++++++++++++++-- 3 files changed, 107 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 38b5ea2310..cc1a7fd801 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -35,10 +35,16 @@ _TF_CONFIG_ENV = run_config_lib._TF_CONFIG_ENV _SERVICE_KEY = run_config_lib._SERVICE_KEY _TPU_WORKER_JOB_NAME = 'tpu_worker_job_name' _NUM_CORES_PER_HOST = 8 - # pylint: enable=protected-access +class InputPipelineConfig(object): + r"""Please see the definition of these values in TPUConfig.""" + PER_SHARD_V1 = 1 + PER_HOST_V1 = 2 + PER_HOST_V2 = 3 + + # TODO(b/72511246) Provide a simplified api to configure model parallelism. class TPUConfig( collections.namedtuple('TPUConfig', [ @@ -68,13 +74,16 @@ class TPUConfig( partitioned across 4 cores which span two cores in both x and y coordinates. Please refer to @{tf.contrib.tpu.Topology} for the geometry of a TPU mesh. - per_host_input_for_training: If `True`, `input_fn` is invoked Per-Host - rather than Per-Core. With Per-Host input pipeline deployment, `input_fn` - is invoked once on each host. With Per-Core input pipeline deployment, it - is invoked once for each core. To be precise, with a global batch size - `train_batch_size` in `TPUEstimator` constructor, the batch size for each - shard is `train_batch_size` // #hosts. With Per-Core input pipeline - deployment, the shard batch size is `train_batch_size` // #cores. + per_host_input_for_training: If `True`, `PER_HOST_V1`, or `PER_HOST_V2`, + `input_fn` is invoked per-host rather than per-core. With per-host input + pipeline configuration, `input_fn` is invoked once on each host. With the + per-core input pipeline configuration, it is invoked once for each core. + With a global batch size `train_batch_size` in `TPUEstimator` constructor, + the batch size for each shard is `train_batch_size` // #hosts in the + `True` or `PER_HOST_V1` mode. In `PER_HOST_V2` mode, it is + `train_batch_size` // #cores. With the per-core input pipeline + configuration, the shard batch size is also `train_batch_size` // #cores. + Note: per_host_input_for_training==PER_SHARD_V1 only supports mode.TRAIN. tpu_job_name: The name of the TPU job. Typically, this name is auto-inferred within TPUEstimator, however when using ClusterSpec propagation in more esoteric cluster configurations, you may need to specify the job name as a @@ -117,6 +126,13 @@ class TPUConfig( raise ValueError('computation_shape elements can only be 1 or 2; got ' 'computation_shape={}'.format(computation_shape)) + # per_host_input_for_training may be True, False, or integer in [1..3]. + # Map legacy values (True, False) to numeric values. + if per_host_input_for_training is False: + per_host_input_for_training = InputPipelineConfig.PER_SHARD_V1 + elif per_host_input_for_training is True: + per_host_input_for_training = InputPipelineConfig.PER_HOST_V1 + # Check initial_infeed_sleep_secs. if initial_infeed_sleep_secs: util_lib.check_positive_integer(initial_infeed_sleep_secs, diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index 3bac2db77e..fbc1173e49 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -24,6 +24,7 @@ import copy import numpy as np from tensorflow.contrib.tpu.python.tpu import device_assignment as tpu_device_assignment +from tensorflow.contrib.tpu.python.tpu import tpu_config from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.platform import tf_logging as logging @@ -205,7 +206,13 @@ class _TPUContext(object): """Return true if input_fn is invoked per-core (other than per-host).""" mode = self._assert_mode() return (mode == model_fn_lib.ModeKeys.TRAIN and - not self._config.tpu_config.per_host_input_for_training) + (self._config.tpu_config.per_host_input_for_training is + tpu_config.InputPipelineConfig.PER_SHARD_V1)) + + def is_input_per_host_with_iterators(self): + """Return true if input_fn should be run in the per-host v2 config.""" + return (self._config.tpu_config.per_host_input_for_training is + tpu_config.InputPipelineConfig.PER_HOST_V2) def is_running_on_cpu(self, is_export_mode=False): """Determines whether the input_fn and model_fn should be invoked on CPU. @@ -271,7 +278,8 @@ class _TPUContext(object): return global_batch_size # On TPU - if self.is_input_sharded_per_core(): + if self.is_input_sharded_per_core() or ( + self.is_input_per_host_with_iterators()): # We prohibit per core input sharding for the model parallelism case, # therefore it is safe to use num_cores here. return global_batch_size // self.num_cores diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 152f8c8c69..fa56708f44 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -740,6 +740,61 @@ def generate_per_host_enqueue_ops_fn_for_host( return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset +def generate_per_host_v2_enqueue_ops_fn_for_host( + ctx, input_fn, inputs_structure_recorder, device, host_id): + """Generates infeed enqueue ops for per-host input_fn on a single host.""" + del host_id # unused + captured_infeed_queue = _CapturedObject() + hooks = [] + + with ops.device(device): + inputs = _Inputs.from_input_fn(input_fn()) + + is_dataset = inputs.is_dataset + if not is_dataset: + raise TypeError('`input_fn` must return a `Dataset` for the PER_HOST_V2 ' + 'input pipeline configuration.') + if ctx.mode == model_fn_lib.ModeKeys.PREDICT: + # TODO(b/XXX): Add predict support for PER_HOST_V2 + raise TypeError('Most PREDICT not yet supported in PER_HOST_V2 mode.') + + hooks.append(inputs.dataset_initializer_hook()) + + def enqueue_ops_fn(): + """Generates the per_host enqueue ops.""" + control_deps = [] + per_host_sharded_inputs = [] + num_replicas_per_host = ctx.num_of_replicas_per_host + with ops.device(device): + if not inputs.is_dataset: + raise TypeError('`input_fn` must return a `Dataset` for this mode.') + for _ in range(num_replicas_per_host): + # Use control dependencies to ensure a deterministic ordering. + with ops.control_dependencies(control_deps): + features, labels = inputs.features_and_labels() # Calls get_next() + + inputs_structure_recorder.validate_and_record_structure( + features, labels) + flattened_inputs = ( + inputs_structure_recorder.flatten_features_and_labels( + features, labels)) + + control_deps.extend(flattened_inputs) + per_host_sharded_inputs.append(flattened_inputs) + + infeed_queue = tpu_feed.InfeedQueue( + number_of_tuple_elements=len(per_host_sharded_inputs[0])) + captured_infeed_queue.capture(infeed_queue) + infeed_queue.set_configuration_from_sharded_input_tensors( + per_host_sharded_inputs) + + per_host_enqueue_ops = infeed_queue.generate_enqueue_ops( + per_host_sharded_inputs, tpu_ordinal_function=ctx.tpu_ordinal_function) + return per_host_enqueue_ops + + return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset + + class _InputPipeline(object): """`_InputPipeline` handles invoking `input_fn` and piping to infeed queue. @@ -975,10 +1030,17 @@ class _InputPipeline(object): host_device = tpu_host_placement_fn(host_id=host_id) with ops.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): - enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = ( - generate_per_host_enqueue_ops_fn_for_host( - self._ctx, self._input_fn, self._inputs_structure_recorder, - self._batch_axis, host_device, host_id)) + if self._ctx.is_input_per_host_with_iterators(): + enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = ( + generate_per_host_v2_enqueue_ops_fn_for_host( + self._ctx, self._input_fn, + self._inputs_structure_recorder, host_device, host_id)) + else: + enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = ( + generate_per_host_enqueue_ops_fn_for_host( + self._ctx, self._input_fn, + self._inputs_structure_recorder, self._batch_axis, + host_device, host_id)) all_hooks.extend(hooks) # NOTE(xiejw): We dispatch here based on the return type of the @@ -1724,7 +1786,7 @@ class TPUEstimator(estimator_lib.Estimator): labels to match up with the corresponding images. If None is supplied, and per_host_input_for_training is True, batches will be sharded based on the major dimension. If tpu_config.per_host_input_for_training is - False, batch_axis is ignored. + False or `PER_HOST_V2`, batch_axis is ignored. Raises: ValueError: `params` has reserved keys already. @@ -1744,7 +1806,8 @@ class TPUEstimator(estimator_lib.Estimator): raise ValueError('`train_batch_size` cannot be `None`') util_lib.check_positive_integer(train_batch_size, 'train_batch_size') - if (not config.tpu_config.per_host_input_for_training and + if (config.tpu_config.per_host_input_for_training is + tpu_config.InputPipelineConfig.PER_SHARD_V1 and config.tpu_config.computation_shape): raise ValueError( 'Model parallelism only supports per host input for training. ' @@ -2362,6 +2425,10 @@ class _Inputs(object): def features_and_labels(self): """Gets `features` and `labels`.""" if self.is_dataset: + if self._iterator is None: + raise RuntimeError('Internal error: Must call dataset_initializer_hook ' + 'before calling features_and_labels(). Please file ' + 'a bug!') return _Inputs._parse_inputs(self._iterator.get_next()) return (self._features, self._labels) -- GitLab From 17dfe3ed7db7fb4d41f8933adead4737c30a92c9 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 28 Mar 2018 18:26:30 -0700 Subject: [PATCH 651/960] Implement assert_same_structure in C++ Also implements helper functions nest._is_namedtuple nest._same_namedtuple. Also, fix a bug in FlattenHelper where error from recursive calls were not propagated up immediately. This change implements a good chunk of machinery that will allow us to move map_structure to C++. Before: entry { name: "NestBenchmark.assert_same_structure_6_elem" iters: 30000 wall_time: 4.79532718658e-05 } entry { name: "NestBenchmark.assert_same_structure_60_elem" iters: 30000 wall_time: 0.000403008667628 } After: entry { name: "NestBenchmark.assert_same_structure_6_elem" iters: 30000 wall_time: 1.65301720301e-05 } entry { name: "NestBenchmark.assert_same_structure_60_elem" iters: 30000 wall_time: 0.000147621099154 } PiperOrigin-RevId: 190869007 --- tensorflow/python/BUILD | 1 + tensorflow/python/framework/test_util.py | 8 +- .../kernel_tests/functional_ops_test.py | 4 +- tensorflow/python/util/nest.py | 90 +---- tensorflow/python/util/nest_test.py | 156 +++++--- tensorflow/python/util/util.cc | 374 +++++++++++++++++- tensorflow/python/util/util.h | 51 +++ tensorflow/python/util/util.i | 9 + 8 files changed, 545 insertions(+), 148 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4f61c01f65..09c1965d7e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -298,6 +298,7 @@ cc_library( srcs = ["util/util.cc"], hdrs = ["util/util.h"], deps = [ + ":safe_ptr", "//tensorflow/core:framework", "//tensorflow/core:lib", "//util/python:python_headers", diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 4192a27f65..bf00fa6439 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -487,7 +487,13 @@ def assert_no_new_pyobjects_executing_eagerly(f): gc.collect() # There should be no new Python objects hanging around. new_count = len(gc.get_objects()) - self.assertEqual(previous_count, new_count) + # In some cases (specifacally on MacOS), new_count is somehow + # smaller than previous_count. + # Using plain assert because not all classes using this decorator + # have assertLessEqual + assert new_count <= previous_count, ( + "new_count(%d) is not less than or equal to previous_count(%d)" % ( + new_count, previous_count)) gc.enable() return decorator diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py index f5717a5a21..1301ef9d19 100644 --- a/tensorflow/python/kernel_tests/functional_ops_test.py +++ b/tensorflow/python/kernel_tests/functional_ops_test.py @@ -229,7 +229,7 @@ class FunctionalOpsTest(test.TestCase): with self.test_session(): nums = np.array([1, 2, 3, 4, 5, 6]) with self.assertRaisesRegexp( - TypeError, r"two structures don't have the same sequence type."): + TypeError, r"two structures don't have the same nested structure"): # lambda emits tuple, but dtype is a list functional_ops.map_fn( lambda x: ((x + 3) * 2, -(x + 3) * 2), @@ -316,7 +316,7 @@ class FunctionalOpsTest(test.TestCase): initializer = np.array(1.0) # Multiply a * 1 each time with self.assertRaisesRegexp( - ValueError, "two structures don't have the same number of elements"): + ValueError, "two structures don't have the same nested structure"): functional_ops.scan(lambda a, x: (a, -a), elems, initializer) def testScan_Scoped(self): diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index 23c2c48f4b..5622431bc9 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -60,15 +60,7 @@ def _is_namedtuple(instance, strict=False): Returns: True if `instance` is a `namedtuple`. """ - # Attemp to limit the test to plain namedtuple (not stuff inheriting from it). - if not isinstance(instance, tuple): - return False - if strict and instance.__class__.__base__ != tuple: - return False - return ( - hasattr(instance, "_fields") and - isinstance(instance._fields, _collections.Sequence) and - all(isinstance(f, _six.string_types) for f in instance._fields)) + return _pywrap_tensorflow.IsNamedtuple(instance, strict) def _sequence_like(instance, args): @@ -157,76 +149,7 @@ def flatten(nest): def _same_namedtuples(nest1, nest2): """Returns True if the two namedtuples have the same name and fields.""" - if nest1._fields != nest2._fields: - return False - if nest1.__class__.__name__ != nest2.__class__.__name__: - return False - return True - - -def _recursive_assert_same_structure(nest1, nest2, check_types): - """Helper function for `assert_same_structure`. - - See `assert_same_structure` for further information about namedtuples. - - Args: - nest1: An arbitrarily nested structure. - nest2: An arbitrarily nested structure. - check_types: If `True` (default) types of sequences are checked as - well, including the keys of dictionaries. If set to `False`, for example - a list and a tuple of objects will look the same if they have the same - size. Note that namedtuples with identical name and fields are always - considered to have the same shallow structure. - - Returns: - True if `nest1` and `nest2` have the same structure. - - Raises: - ValueError: If the two structure don't have the same nested structre. - TypeError: If the two structure don't have the same sequence type. - ValueError: If the two dictionaries don't have the same set of keys. - """ - is_sequence_nest1 = is_sequence(nest1) - if is_sequence_nest1 != is_sequence(nest2): - raise ValueError( - "The two structures don't have the same nested structure.\n\n" - "First structure: %s\n\nSecond structure: %s." % (nest1, nest2)) - - if not is_sequence_nest1: - return # finished checking - - if check_types: - type_nest1 = type(nest1) - type_nest2 = type(nest2) - - # Duck-typing means that nest should be fine with two different namedtuples - # with identical name and fields. - if _is_namedtuple(nest1, True) and _is_namedtuple(nest2, True): - if not _same_namedtuples(nest1, nest2): - raise TypeError( - "The two namedtuples don't have the same sequence type. First " - "structure has type %s, while second structure has type %s." - % (type_nest1, type_nest2)) - else: - if type_nest1 != type_nest2: - raise TypeError( - "The two structures don't have the same sequence type. First " - "structure has type %s, while second structure has type %s." - % (type_nest1, type_nest2)) - - if isinstance(nest1, dict): - keys1 = set(_six.iterkeys(nest1)) - keys2 = set(_six.iterkeys(nest2)) - if keys1 != keys2: - raise ValueError( - "The two dictionaries don't have the same set of keys. First " - "structure has keys {}, while second structure has keys {}." - .format(keys1, keys2)) - - nest1_as_sequence = [n for n in _yield_value(nest1)] - nest2_as_sequence = [n for n in _yield_value(nest2)] - for n1, n2 in zip(nest1_as_sequence, nest2_as_sequence): - _recursive_assert_same_structure(n1, n2, check_types) + return _pywrap_tensorflow.SameNamedtuples(nest1, nest2) def assert_same_structure(nest1, nest2, check_types=True): @@ -257,14 +180,7 @@ def assert_same_structure(nest1, nest2, check_types=True): TypeError: If the two structures differ in the type of sequence in any of their substructures. Only possible if `check_types` is `True`. """ - len_nest1 = len(flatten(nest1)) if is_sequence(nest1) else 1 - len_nest2 = len(flatten(nest2)) if is_sequence(nest2) else 1 - if len_nest1 != len_nest2: - raise ValueError("The two structures don't have the same number of " - "elements.\n\nFirst structure (%i elements): %s\n\n" - "Second structure (%i elements): %s" - % (len_nest1, nest1, len_nest2, nest2)) - _recursive_assert_same_structure(nest1, nest2, check_types) + _pywrap_tensorflow.AssertSameStructure(nest1, nest2, check_types) def flatten_dict_items(dictionary): diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py index 4439d6241e..2f12b25354 100644 --- a/tensorflow/python/util/nest_test.py +++ b/tensorflow/python/util/nest_test.py @@ -19,11 +19,14 @@ from __future__ import division from __future__ import print_function import collections +import time import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -32,6 +35,9 @@ from tensorflow.python.util import nest class NestTest(test.TestCase): + PointXY = collections.namedtuple("Point", ["x", "y"]) # pylint: disable=invalid-name + + @test_util.assert_no_new_pyobjects_executing_eagerly def testFlattenAndPack(self): structure = ((3, 4), 5, (6, 7, (9, 10), 8)) flat = ["a", "b", "c", "d", "e", "f", "g", "h"] @@ -39,8 +45,8 @@ class NestTest(test.TestCase): self.assertEqual( nest.pack_sequence_as(structure, flat), (("a", "b"), "c", ("d", "e", ("f", "g"), "h"))) - point = collections.namedtuple("Point", ["x", "y"]) - structure = (point(x=4, y=2), ((point(x=1, y=0),),)) + structure = (NestTest.PointXY(x=4, y=2), + ((NestTest.PointXY(x=1, y=0),),)) flat = [4, 2, 1, 0] self.assertEqual(nest.flatten(structure), flat) restructured_from_flat = nest.pack_sequence_as(structure, flat) @@ -66,6 +72,7 @@ class NestTest(test.TestCase): with self.assertRaises(ValueError): nest.pack_sequence_as([5, 6, [7, 8]], ["a", "b", "c"]) + @test_util.assert_no_new_pyobjects_executing_eagerly def testFlattenDictOrder(self): """`flatten` orders dicts by key, including OrderedDicts.""" ordered = collections.OrderedDict([("d", 3), ("b", 1), ("a", 0), ("c", 2)]) @@ -87,12 +94,14 @@ class NestTest(test.TestCase): ordered_reconstruction) self.assertEqual({"d": 3, "b": 1, "a": 0, "c": 2}, plain_reconstruction) + Abc = collections.namedtuple("A", ("b", "c")) # pylint: disable=invalid-name + + @test_util.assert_no_new_pyobjects_executing_eagerly def testFlattenAndPack_withDicts(self): # A nice messy mix of tuples, lists, dicts, and `OrderedDict`s. - named_tuple = collections.namedtuple("A", ("b", "c")) mess = [ "z", - named_tuple(3, 4), + NestTest.Abc(3, 4), { "c": [ 1, @@ -111,7 +120,7 @@ class NestTest(test.TestCase): structure_of_mess = [ 14, - named_tuple("a", True), + NestTest.Abc("a", True), { "c": [ 0, @@ -157,6 +166,7 @@ class NestTest(test.TestCase): nest.pack_sequence_as(["hello", "world"], ["and", "goodbye", "again"]) + @test_util.assert_no_new_pyobjects_executing_eagerly def testIsSequence(self): self.assertFalse(nest.is_sequence("1234")) self.assertTrue(nest.is_sequence([1, 3, [4, 5]])) @@ -186,6 +196,23 @@ class NestTest(test.TestCase): ValueError, "Key had [0-9]* elements, but value had [0-9]* elements"): nest.flatten_dict_items(another_bad_dictionary) + # pylint does not correctly recognize these as class names and + # suggests to use variable style under_score naming. + # pylint: disable=invalid-name + Named0ab = collections.namedtuple("named_0", ("a", "b")) + Named1ab = collections.namedtuple("named_1", ("a", "b")) + SameNameab = collections.namedtuple("same_name", ("a", "b")) + SameNameab2 = collections.namedtuple("same_name", ("a", "b")) + SameNamexy = collections.namedtuple("same_name", ("x", "y")) + SameName1xy = collections.namedtuple("same_name_1", ("x", "y")) + SameName1xy2 = collections.namedtuple("same_name_1", ("x", "y")) + NotSameName = collections.namedtuple("not_same_name", ("a", "b")) + # pylint: enable=invalid-name + + class SameNamedType1(SameNameab): + pass + + @test_util.assert_no_new_pyobjects_executing_eagerly def testAssertSameStructure(self): structure1 = (((1, 2), 3), 4, (5, 6)) structure2 = ((("foo1", "foo2"), "foo3"), "foo4", ("foo5", "foo6")) @@ -198,23 +225,32 @@ class NestTest(test.TestCase): with self.assertRaisesRegexp( ValueError, - ("don't have the same number of elements\\.\n\n" - "First structure \\(6 elements\\):.*?" - "\n\nSecond structure \\(2 elements\\):")): + ("The two structures don't have the same nested structure\\.\n\n" + "First structure:.*?\n\n" + "Second structure:.*\n\n" + "More specifically: Substructure " + r'"type=tuple str=\(\(1, 2\), 3\)" is a sequence, while ' + 'substructure "type=str str=spam" is not')): nest.assert_same_structure(structure1, structure_different_num_elements) with self.assertRaisesRegexp( ValueError, - ("don't have the same number of elements\\.\n\n" - "First structure \\(2 elements\\):.*?" - "\n\nSecond structure \\(1 elements\\):")): + ("The two structures don't have the same nested structure\\.\n\n" + "First structure:.*?\n\n" + "Second structure:.*\n\n" + r'More specifically: Substructure "type=list str=\[0, 1\]" ' + r'is a sequence, while substructure "type=ndarray str=\[0 1\]" ' + "is not")): nest.assert_same_structure([0, 1], np.array([0, 1])) with self.assertRaisesRegexp( ValueError, - ("don't have the same number of elements\\.\n\n" - "First structure \\(1 elements\\):.*" - "\n\nSecond structure \\(2 elements\\):")): + ("The two structures don't have the same nested structure\\.\n\n" + "First structure:.*?\n\n" + "Second structure:.*\n\n" + r'More specifically: Substructure "type=list str=\[0, 1\]" ' + 'is a sequence, while substructure "type=int str=0" ' + "is not")): nest.assert_same_structure(0, [0, 1]) self.assertRaises(TypeError, nest.assert_same_structure, (0, 1), [0, 1]) @@ -225,21 +261,21 @@ class NestTest(test.TestCase): "First structure: .*?\n\nSecond structure: ")): nest.assert_same_structure(structure1, structure_different_nesting) - named_type_0 = collections.namedtuple("named_0", ("a", "b")) - named_type_1 = collections.namedtuple("named_1", ("a", "b")) self.assertRaises(TypeError, nest.assert_same_structure, (0, 1), - named_type_0("a", "b")) + NestTest.Named0ab("a", "b")) - nest.assert_same_structure(named_type_0(3, 4), named_type_0("a", "b")) + nest.assert_same_structure(NestTest.Named0ab(3, 4), + NestTest.Named0ab("a", "b")) self.assertRaises(TypeError, nest.assert_same_structure, - named_type_0(3, 4), named_type_1(3, 4)) + NestTest.Named0ab(3, 4), NestTest.Named1ab(3, 4)) with self.assertRaisesRegexp( ValueError, ("don't have the same nested structure\\.\n\n" "First structure: .*?\n\nSecond structure: ")): - nest.assert_same_structure(named_type_0(3, 4), named_type_0([3], 4)) + nest.assert_same_structure(NestTest.Named0ab(3, 4), + NestTest.Named0ab([3], 4)) with self.assertRaisesRegexp( ValueError, @@ -258,36 +294,33 @@ class NestTest(test.TestCase): "don't have the same set of keys"): nest.assert_same_structure({"a": 1}, {"b": 1}) - same_name_type_0 = collections.namedtuple("same_name", ("a", "b")) - same_name_type_1 = collections.namedtuple("same_name", ("a", "b")) - nest.assert_same_structure(same_name_type_0(0, 1), same_name_type_1(2, 3)) + nest.assert_same_structure(NestTest.SameNameab(0, 1), + NestTest.SameNameab2(2, 3)) # This assertion is expected to pass: two namedtuples with the same # name and field names are considered to be identical. - same_name_type_2 = collections.namedtuple("same_name_1", ("x", "y")) - same_name_type_3 = collections.namedtuple("same_name_1", ("x", "y")) nest.assert_same_structure( - same_name_type_0(same_name_type_2(0, 1), 2), - same_name_type_1(same_name_type_3(2, 3), 4)) + NestTest.SameNameab(NestTest.SameName1xy(0, 1), 2), + NestTest.SameNameab2(NestTest.SameName1xy2(2, 3), 4)) expected_message = "The two structures don't have the same.*" with self.assertRaisesRegexp(ValueError, expected_message): - nest.assert_same_structure(same_name_type_0(0, same_name_type_1(1, 2)), - same_name_type_1(same_name_type_0(0, 1), 2)) + nest.assert_same_structure( + NestTest.SameNameab(0, NestTest.SameNameab2(1, 2)), + NestTest.SameNameab2(NestTest.SameNameab(0, 1), 2)) - same_name_type_1 = collections.namedtuple("not_same_name", ("a", "b")) self.assertRaises(TypeError, nest.assert_same_structure, - same_name_type_0(0, 1), same_name_type_1(2, 3)) + NestTest.SameNameab(0, 1), NestTest.NotSameName(2, 3)) - same_name_type_1 = collections.namedtuple("same_name", ("x", "y")) self.assertRaises(TypeError, nest.assert_same_structure, - same_name_type_0(0, 1), same_name_type_1(2, 3)) + NestTest.SameNameab(0, 1), NestTest.SameNamexy(2, 3)) - class SameNamedType1(collections.namedtuple("same_name", ("a", "b"))): - pass self.assertRaises(TypeError, nest.assert_same_structure, - same_name_type_0(0, 1), SameNamedType1(2, 3)) + NestTest.SameNameab(0, 1), NestTest.SameNamedType1(2, 3)) + EmptyNT = collections.namedtuple("empty_nt", "") # pylint: disable=invalid-name + + @test_util.assert_no_new_pyobjects_executing_eagerly def testMapStructure(self): structure1 = (((1, 2), 3), 4, (5, 6)) structure2 = (((7, 8), 9), 10, (11, 12)) @@ -310,9 +343,8 @@ class NestTest(test.TestCase): self.assertEqual((), nest.map_structure(lambda x: x + 1, ())) self.assertEqual([], nest.map_structure(lambda x: x + 1, [])) self.assertEqual({}, nest.map_structure(lambda x: x + 1, {})) - empty_nt = collections.namedtuple("empty_nt", "") - self.assertEqual(empty_nt(), nest.map_structure(lambda x: x + 1, - empty_nt())) + self.assertEqual(NestTest.EmptyNT(), nest.map_structure(lambda x: x + 1, + NestTest.EmptyNT())) # This is checking actual equality of types, empty list != empty tuple self.assertNotEqual((), nest.map_structure(lambda x: x + 1, [])) @@ -352,10 +384,12 @@ class NestTest(test.TestCase): with self.assertRaisesRegexp(ValueError, "Only valid keyword argument"): nest.map_structure(lambda x: None, structure1, check_types=False, foo="a") + ABTuple = collections.namedtuple("ab_tuple", "a, b") # pylint: disable=invalid-name + + @test_util.assert_no_new_pyobjects_executing_eagerly def testMapStructureWithStrings(self): - ab_tuple = collections.namedtuple("ab_tuple", "a, b") - inp_a = ab_tuple(a="foo", b=("bar", "baz")) - inp_b = ab_tuple(a=2, b=(1, 3)) + inp_a = NestTest.ABTuple(a="foo", b=("bar", "baz")) + inp_b = NestTest.ABTuple(a=2, b=(1, 3)) out = nest.map_structure(lambda string, repeats: string * repeats, inp_a, inp_b) @@ -363,8 +397,8 @@ class NestTest(test.TestCase): self.assertEqual("bar", out.b[0]) self.assertEqual("bazbazbaz", out.b[1]) - nt = ab_tuple(a=("something", "something_else"), - b="yet another thing") + nt = NestTest.ABTuple(a=("something", "something_else"), + b="yet another thing") rev_nt = nest.map_structure(lambda x: x[::-1], nt) # Check the output is the correct structure, and all strings are reversed. nest.assert_same_structure(nt, rev_nt) @@ -431,10 +465,8 @@ class NestTest(test.TestCase): # This assertion is expected to pass: two namedtuples with the same # name and field names are considered to be identical. - same_name_type_0 = collections.namedtuple("same_name", ("a", "b")) - same_name_type_1 = collections.namedtuple("same_name", ("a", "b")) - inp_shallow = same_name_type_0(1, 2) - inp_deep = same_name_type_1(1, [1, 2, 3]) + inp_shallow = NestTest.SameNameab(1, 2) + inp_deep = NestTest.SameNameab2(1, [1, 2, 3]) nest.assert_shallow_structure(inp_shallow, inp_deep, check_types=False) nest.assert_shallow_structure(inp_shallow, inp_deep, check_types=True) @@ -466,7 +498,7 @@ class NestTest(test.TestCase): [1, {"c": 2}, 3, (4, 5)]) # Namedtuples. - ab_tuple = collections.namedtuple("ab_tuple", "a, b") + ab_tuple = NestTest.ABTuple input_tree = ab_tuple(a=[0, 1], b=2) shallow_tree = ab_tuple(a=0, b=1) input_tree_flattened_as_shallow_tree = nest.flatten_up_to(shallow_tree, @@ -681,5 +713,31 @@ class NestTest(test.TestCase): list(nest.flatten_with_joined_string_paths(inputs)), expected) +class NestBenchmark(test.Benchmark): + + def run_and_report(self, s1, s2, name): + burn_iter, test_iter = 100, 30000 + + for _ in xrange(burn_iter): + nest.assert_same_structure(s1, s2) + + t0 = time.time() + for _ in xrange(test_iter): + nest.assert_same_structure(s1, s2) + t1 = time.time() + + self.report_benchmark(iters=test_iter, wall_time=(t1 - t0) / test_iter, + name=name) + + def benchmark_assert_structure(self): + s1 = (((1, 2), 3), 4, (5, 6)) + s2 = ((("foo1", "foo2"), "foo3"), "foo4", ("foo5", "foo6")) + self.run_and_report(s1, s2, "assert_same_structure_6_elem") + + s1 = (((1, 2), 3), 4, (5, 6)) * 10 + s2 = ((("foo1", "foo2"), "foo3"), "foo4", ("foo5", "foo6")) * 10 + self.run_and_report(s1, s2, "assert_same_structure_60_elem") + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index a41fa7df25..70aee4a3f6 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/python/lib/core/safe_ptr.h" namespace tensorflow { namespace swig { @@ -27,6 +28,113 @@ PyObject* CollectionsSequenceType = nullptr; bool WarnedThatSetIsNotSequence = false; +bool IsString(PyObject* o) { + return PyBytes_Check(o) || +#if PY_MAJOR_VERSION < 3 + PyString_Check(o) || +#endif + PyUnicode_Check(o); +} + +// Equivalent to Python's 'o.__class__.__name__' +// Note that '__class__' attribute is set only in new-style classes. +// A lot of tensorflow code uses __class__ without checks, so it seems like +// we only support new-style classes. +StringPiece GetClassName(PyObject* o) { + // __class__ is equivalent to type() for new style classes. + // type() is equivalent to PyObject_Type() + // (https://docs.python.org/3.5/c-api/object.html#c.PyObject_Type) + // PyObject_Type() is equivalent to o->ob_type except for Py_INCREF, which + // we don't need here. + PyTypeObject* type = o->ob_type; + + // __name__ is the value of `tp_name` after the last '.' + // (https://docs.python.org/2/c-api/typeobj.html#c.PyTypeObject.tp_name) + StringPiece name(type->tp_name); + size_t pos = name.rfind('.'); + if (pos != StringPiece::npos) { + name.remove_prefix(pos + 1); + } + return name; +} + +string PyObjectToString(PyObject* o) { + if (o == nullptr) { + return ""; + } + PyObject* str = PyObject_Str(o); + if (str) { +#if PY_MAJOR_VERSION < 3 + string s(PyString_AS_STRING(str)); +#else + string s(PyUnicode_AsUTF8(str)); +#endif + Py_DECREF(str); + return tensorflow::strings::StrCat("type=", GetClassName(o), " str=", s); + } else { + return ""; + } +} + +// Implements the same idea as tensorflow.util.nest._yield_value +// During construction we check if the iterable is a dictionary. +// If so, we construct a sequence from its sorted keys that will be used +// for iteration. +// If not, we construct a sequence directly from the iterable. +// At each step, we get the next element from the sequence and use it +// either as a key or return it directly. +// +// 'iterable' must not be modified while ValIterator is used. +class ValIterator { + public: + explicit ValIterator(PyObject* iterable) : dict_(nullptr), index_(0) { + if (PyDict_Check(iterable)) { + dict_ = iterable; + // PyDict_Keys returns a list, which can be used with + // PySequence_Fast_GET_ITEM. + seq_ = PyDict_Keys(iterable); + // Iterate through dictionaries in a deterministic order by sorting the + // keys. Notice this means that we ignore the original order of + // `OrderedDict` instances. This is intentional, to avoid potential + // bugs caused by mixing ordered and plain dicts (e.g., flattening + // a dict but using a corresponding `OrderedDict` to pack it back). + PyList_Sort(seq_); + } else { + seq_ = PySequence_Fast(iterable, ""); + } + size_ = PySequence_Fast_GET_SIZE(seq_); + } + + ~ValIterator() { Py_DECREF(seq_); } + + // Return a borrowed reference to the next element from iterable. + // Return nullptr when iteration is over. + PyObject* next() { + PyObject* element = nullptr; + if (index_ < size_) { + // Both PySequence_Fast_GET_ITEM and PyDict_GetItem return borrowed + // references. + element = PySequence_Fast_GET_ITEM(seq_, index_); + ++index_; + if (dict_ != nullptr) { + element = PyDict_GetItem(dict_, element); + if (element == nullptr) { + PyErr_SetString(PyExc_RuntimeError, + "Dictionary was modified during iteration over it"); + return nullptr; + } + } + } + return element; + } + + private: + PyObject* seq_; + PyObject* dict_; + Py_ssize_t size_; + Py_ssize_t index_; +}; + // Returns 1 if `o` is considered a sequence for the purposes of Flatten(). // Returns 0 otherwise. // Returns -1 if an error occurred. @@ -38,7 +146,7 @@ int IsSequenceHelper(PyObject* o) { "so consider avoiding using them."; WarnedThatSetIsNotSequence = true; } - if (CollectionsSequenceType == nullptr) { + if (TF_PREDICT_FALSE(CollectionsSequenceType == nullptr)) { PyErr_SetString( PyExc_RuntimeError, tensorflow::strings::StrCat( @@ -49,11 +157,7 @@ int IsSequenceHelper(PyObject* o) { } int is_instance = PyObject_IsInstance(o, CollectionsSequenceType); if (is_instance == -1) return -1; - return static_cast(is_instance != 0 && !PyBytes_Check(o) && -#if PY_MAJOR_VERSION < 3 - !PyString_Check(o) && -#endif - !PyUnicode_Check(o)); + return static_cast(is_instance != 0 && !IsString(o)); } bool FlattenHelper(PyObject* nested, PyObject* list) { @@ -75,12 +179,16 @@ bool FlattenHelper(PyObject* nested, PyObject* list) { // while the method is running. PyObject* key = PyList_GET_ITEM(keys, i); PyObject* val = PyDict_GetItem(nested, key); - if (Py_EnterRecursiveCall(" in Flatten")) { + if (Py_EnterRecursiveCall(" in flatten")) { Py_DECREF(keys); return false; } - FlattenHelper(val, list); + const bool success = FlattenHelper(val, list); Py_LeaveRecursiveCall(); + if (!success) { + Py_DECREF(keys); + return false; + } } Py_DECREF(keys); return true; @@ -90,13 +198,159 @@ bool FlattenHelper(PyObject* nested, PyObject* list) { PyObject* item; PyObject* iterator = PyObject_GetIter(nested); while ((item = PyIter_Next(iterator)) != nullptr) { - FlattenHelper(item, list); + if (Py_EnterRecursiveCall(" in flatten")) { + Py_DECREF(iterator); + Py_DECREF(item); + return false; + } + bool success = FlattenHelper(item, list); + Py_LeaveRecursiveCall(); + if (!success) { + Py_DECREF(iterator); + Py_DECREF(item); + return false; + } Py_DECREF(item); } Py_DECREF(iterator); return true; } +// Sets error using keys of 'dict1' and 'dict2'. +// 'dict1' and 'dict2' are assumed to be Python dictionaries. +void SetDifferentKeysError(PyObject* dict1, PyObject* dict2, string* error_msg, + bool* is_type_error) { + PyObject* k1 = PyDict_Keys(dict1); + PyObject* k2 = PyDict_Keys(dict2); + *is_type_error = false; + *error_msg = tensorflow::strings::StrCat( + "The two dictionaries don't have the same set of keys. " + "First structure has keys ", + PyObjectToString(k1), ", while second structure has keys ", + PyObjectToString(k2)); + Py_DECREF(k1); + Py_DECREF(k2); +} + +// Returns true iff there were no "internal" errors. In other words, +// errors that has nothing to do with structure checking. +// If an "internal" error occured, the appropriate Python error will be +// set and the caller can propage it directly to the user. +// +// Both `error_msg` and `is_type_error` must be non-null. `error_msg` must +// be empty. +// Leaves `error_msg` empty if structures matched. Else, fills `error_msg` +// with appropriate error and sets `is_type_error` to true iff +// the error to be raised should be TypeError. +bool AssertSameStructureHelper(PyObject* o1, PyObject* o2, bool check_types, + string* error_msg, bool* is_type_error) { + DCHECK(error_msg); + DCHECK(is_type_error); + const bool is_seq1 = IsSequence(o1); + const bool is_seq2 = IsSequence(o2); + if (PyErr_Occurred()) return false; + if (is_seq1 != is_seq2) { + string seq_str = is_seq1 ? PyObjectToString(o1) : PyObjectToString(o2); + string non_seq_str = is_seq1 ? PyObjectToString(o2) : PyObjectToString(o1); + *is_type_error = false; + *error_msg = tensorflow::strings::StrCat( + "Substructure \"", seq_str, "\" is a sequence, while substructure \"", + non_seq_str, "\" is not"); + return true; + } + + // Got to scalars, so finished checking. Structures are the same. + if (!is_seq1) return true; + + if (check_types) { + const PyTypeObject* type1 = o1->ob_type; + const PyTypeObject* type2 = o2->ob_type; + + // We treat two different namedtuples with identical name and fields + // as having the same type. + const PyObject* o1_tuple = IsNamedtuple(o1, true); + if (o1_tuple == nullptr) return false; + const PyObject* o2_tuple = IsNamedtuple(o2, true); + if (o2_tuple == nullptr) { + Py_DECREF(o1_tuple); + return false; + } + bool both_tuples = o1_tuple == Py_True && o2_tuple == Py_True; + Py_DECREF(o1_tuple); + Py_DECREF(o2_tuple); + + if (both_tuples) { + const PyObject* same_tuples = SameNamedtuples(o1, o2); + if (same_tuples == nullptr) return false; + bool not_same_tuples = same_tuples != Py_True; + Py_DECREF(same_tuples); + if (not_same_tuples) { + *is_type_error = true; + *error_msg = tensorflow::strings::StrCat( + "The two namedtuples don't have the same sequence type. " + "First structure ", + PyObjectToString(o1), " has type ", type1->tp_name, + ", while second structure ", PyObjectToString(o2), " has type ", + type2->tp_name); + return true; + } + } else if (type1 != type2) { + *is_type_error = true; + *error_msg = tensorflow::strings::StrCat( + "The two namedtuples don't have the same sequence type. " + "First structure ", + PyObjectToString(o1), " has type ", type1->tp_name, + ", while second structure ", PyObjectToString(o2), " has type ", + type2->tp_name); + return true; + } + + if (PyDict_Check(o1)) { + if (PyDict_Size(o1) != PyDict_Size(o2)) { + SetDifferentKeysError(o1, o2, error_msg, is_type_error); + return true; + } + + PyObject* key; + Py_ssize_t pos = 0; + while (PyDict_Next(o1, &pos, &key, nullptr)) { + if (PyDict_GetItem(o2, key) == nullptr) { + SetDifferentKeysError(o1, o2, error_msg, is_type_error); + return true; + } + } + } + } + + ValIterator iter1(o1); + ValIterator iter2(o2); + + while (true) { + PyObject* v1 = iter1.next(); + PyObject* v2 = iter2.next(); + if (v1 != nullptr && v2 != nullptr) { + if (Py_EnterRecursiveCall(" in assert_same_structure")) { + return false; + } + bool no_internal_errors = AssertSameStructureHelper( + v1, v2, check_types, error_msg, is_type_error); + Py_LeaveRecursiveCall(); + if (!no_internal_errors) return false; + if (!error_msg->empty()) return true; + } else if (v1 == nullptr && v2 == nullptr) { + // Done with all recursive calls. Structure matched. + return true; + } else { + *is_type_error = false; + *error_msg = tensorflow::strings::StrCat( + "The two structures don't have the same number of elements. ", + "First structure: ", PyObjectToString(o1), + ". Second structure: ", PyObjectToString(o2)); + return true; + } + } +} + } // anonymous namespace void RegisterSequenceClass(PyObject* sequence_class) { @@ -123,5 +377,107 @@ PyObject* Flatten(PyObject* nested) { return nullptr; } } + +PyObject* IsNamedtuple(PyObject* o, bool strict) { + // Must be subclass of tuple + if (!PyTuple_Check(o)) { + Py_RETURN_FALSE; + } + + // If strict, o.__class__.__base__ must be tuple + if (strict) { + PyObject* klass = PyObject_GetAttrString(o, "__class__"); + if (klass == nullptr) return nullptr; + PyObject* base = PyObject_GetAttrString(klass, "__base__"); + Py_DECREF(klass); + if (base == nullptr) return nullptr; + + const PyTypeObject* base_type = reinterpret_cast(base); + // built-in object types are singletons + bool tuple_base = base_type == &PyTuple_Type; + Py_DECREF(base); + if (!tuple_base) { + Py_RETURN_FALSE; + } + } + + if (TF_PREDICT_FALSE(CollectionsSequenceType == nullptr)) { + PyErr_SetString( + PyExc_RuntimeError, + tensorflow::strings::StrCat( + "collections.Sequence type has not been set. " + "Please call RegisterSequenceClass before using this module") + .c_str()); + return nullptr; + } + + // o must have attribute '_fields' and every element in + // '_fields' must be a string. + int has_fields = PyObject_HasAttrString(o, "_fields"); + if (!has_fields) { + Py_RETURN_FALSE; + } + + Safe_PyObjectPtr fields = make_safe(PyObject_GetAttrString(o, "_fields")); + int is_instance = PyObject_IsInstance(fields.get(), CollectionsSequenceType); + if (is_instance == 0) { + Py_RETURN_FALSE; + } else if (is_instance == -1) { + return nullptr; + } + + Safe_PyObjectPtr seq = make_safe(PySequence_Fast(fields.get(), "")); + const Py_ssize_t s = PySequence_Fast_GET_SIZE(seq.get()); + for (Py_ssize_t i = 0; i < s; ++i) { + // PySequence_Fast_GET_ITEM returns borrowed ref + PyObject* elem = PySequence_Fast_GET_ITEM(seq.get(), i); + if (!IsString(elem)) { + Py_RETURN_FALSE; + } + } + + Py_RETURN_TRUE; +} + +PyObject* SameNamedtuples(PyObject* o1, PyObject* o2) { + PyObject* f1 = PyObject_GetAttrString(o1, "_fields"); + PyObject* f2 = PyObject_GetAttrString(o2, "_fields"); + if (f1 == nullptr || f2 == nullptr) { + Py_XDECREF(f1); + Py_XDECREF(f2); + PyErr_SetString( + PyExc_RuntimeError, + "Expected namedtuple-like objects (that have _fields attr)"); + return nullptr; + } + + if (PyObject_RichCompareBool(f1, f2, Py_NE)) { + Py_RETURN_FALSE; + } + + if (GetClassName(o1).compare(GetClassName(o2)) == 0) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types) { + string error_msg; + bool is_type_error = false; + AssertSameStructureHelper(o1, o2, check_types, &error_msg, &is_type_error); + if (!error_msg.empty()) { + PyErr_SetString( + is_type_error ? PyExc_TypeError : PyExc_ValueError, + tensorflow::strings::StrCat( + "The two structures don't have the same nested structure.\n\n", + "First structure: ", PyObjectToString(o1), "\n\nSecond structure: ", + PyObjectToString(o2), "\n\nMore specifically: ", error_msg) + .c_str()); + return nullptr; + } + Py_RETURN_NONE; +} + } // namespace swig } // namespace tensorflow diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h index 2af71dc753..c325baa5f8 100644 --- a/tensorflow/python/util/util.h +++ b/tensorflow/python/util/util.h @@ -33,6 +33,57 @@ namespace swig { // dict. bool IsSequence(PyObject* o); +// Implements the same interface as tensorflow.util.nest._is_namedtuple +// Returns Py_True iff `instance` should be considered a `namedtuple`. +// +// Args: +// instance: An instance of a Python object. +// strict: If True, `instance` is considered to be a `namedtuple` only if +// it is a "plain" namedtuple. For instance, a class inheriting +// from a `namedtuple` will be considered to be a `namedtuple` +// iff `strict=False`. +// +// Returns: +// True if `instance` is a `namedtuple`. +PyObject* IsNamedtuple(PyObject* o, bool strict); + +// Implements the same interface as tensorflow.util.nest._same_namedtuples +// Returns Py_True iff the two namedtuples have the same name and fields. +// Raises RuntimeError if `o1` or `o2` don't look like namedtuples (don't have +// '_fields' attribute). +PyObject* SameNamedtuples(PyObject* o1, PyObject* o2); + +// Asserts that two structures are nested in the same way. +// +// Note that namedtuples with identical name and fields are always considered +// to have the same shallow structure (even with `check_types=True`). +// For intance, this code will print `True`: +// +// ```python +// def nt(a, b): +// return collections.namedtuple('foo', 'a b')(a, b) +// print(assert_same_structure(nt(0, 1), nt(2, 3))) +// ``` +// +// Args: +// nest1: an arbitrarily nested structure. +// nest2: an arbitrarily nested structure. +// check_types: if `true`, types of sequences are checked as +// well, including the keys of dictionaries. If set to `false`, for example +// a list and a tuple of objects will look the same if they have the same +// size. Note that namedtuples with identical name and fields are always +// considered to have the same shallow structure. +// +// Raises: +// ValueError: If the two structures do not have the same number of elements or +// if the two structures are not nested in the same way. +// TypeError: If the two structures differ in the type of sequence in any of +// their substructures. Only possible if `check_types` is `True`. +// +// Returns: +// Py_None on success, nullptr on error. +PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types); + // Implements the same interface as tensorflow.util.nest.flatten // // Returns a flat list from a given nested structure. diff --git a/tensorflow/python/util/util.i b/tensorflow/python/util/util.i index d69084fc00..b7f201b6fe 100644 --- a/tensorflow/python/util/util.i +++ b/tensorflow/python/util/util.i @@ -34,6 +34,15 @@ limitations under the License. %unignore tensorflow::swig::IsSequence; %noexception tensorflow::swig::IsSequence; +%unignore tensorflow::swig::IsNamedtuple; +%noexception tensorflow::swig::IsNamedtuple; + +%unignore tensorflow::swig::SameNamedtuples; +%noexception tensorflow::swig::SameNamedtuples; + +%unignore tensorflow::swig::AssertSameStructure; +%noexception tensorflow::swig::AssertSameStructure; + %unignore tensorflow::swig::Flatten; %noexception tensorflow::swig::Flatten; -- GitLab From 59a12553545c3d8f957a1a6e618561d4228f7f59 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Wed, 28 Mar 2018 18:26:46 -0700 Subject: [PATCH 652/960] Relax CuDNN version requirements because CuDNN is backwards compatible within a major release starting with CuDNN 7.0 PiperOrigin-RevId: 190869028 --- tensorflow/stream_executor/BUILD | 6 +- tensorflow/stream_executor/cuda/cuda_dnn.cc | 87 +++++++++++++------ .../stream_executor/cuda/cudnn_version.cc | 42 +++++++++ .../stream_executor/cuda/cudnn_version.h | 51 +++++++++++ .../cuda/cudnn_version_test.cc | 75 ++++++++++++++++ 5 files changed, 233 insertions(+), 28 deletions(-) create mode 100644 tensorflow/stream_executor/cuda/cudnn_version.cc create mode 100644 tensorflow/stream_executor/cuda/cudnn_version.h create mode 100644 tensorflow/stream_executor/cuda/cudnn_version_test.cc diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD index 1865240014..27cdb860fe 100644 --- a/tensorflow/stream_executor/BUILD +++ b/tensorflow/stream_executor/BUILD @@ -56,7 +56,10 @@ cc_library( [ "cuda/*.cc", ], - exclude = ["cuda/cuda_platform_id.cc"], + exclude = [ + "cuda/*_test.cc", + "cuda/cuda_platform_id.cc", + ], ), ), copts = select({ @@ -72,6 +75,7 @@ cc_library( ":stream_executor", "//tensorflow/core:lib", "//tensorflow/core/kernels:ops_util", + "@com_google_absl//absl/strings", "@local_config_cuda//cuda:cuda_headers", ] + if_cuda_is_configured([ "//tensorflow/core:cuda", diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index ab5e6590e0..1aea0485fd 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -18,7 +18,9 @@ limitations under the License. #include #include +#include "absl/strings/str_cat.h" #include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/env_var.h" #include "tensorflow/stream_executor/cuda/cuda_activation.h" #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h" @@ -27,6 +29,7 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_platform_id.h" #include "tensorflow/stream_executor/cuda/cuda_stream.h" #include "tensorflow/stream_executor/cuda/cuda_timer.h" +#include "tensorflow/stream_executor/cuda/cudnn_version.h" #include "tensorflow/stream_executor/dnn.h" #include "tensorflow/stream_executor/lib/env.h" #include "tensorflow/stream_executor/lib/error.h" @@ -55,15 +58,6 @@ NarrowT CheckedNarrowing(const WideT& wide) { return narrow; } -// Returns the "Compatibility" version number from the CuDNN version number. -// This is the number that tries to indicate ABI compatibility. -// -// For example, if cudnn_version is 5107, the compatibility version -// number will be 5100. -size_t cudnnCompatibilityVersion(size_t cudnn_version) { - return (cudnn_version / 100) * 100; -} - } // namespace namespace perftools { @@ -109,6 +103,22 @@ string ToString(cudnnStatus_t status) { } } +#if CUDNN_VERSION >= 6000 +string ToString(libraryPropertyType type) { + switch (type) { + case MAJOR_VERSION: + return "MAJOR_VERSION"; + case MINOR_VERSION: + return "MINOR_VERSION"; + case PATCH_LEVEL: + return "PATCH_LEVEL"; + default: + return absl::StrCat( + "(type), ">"); + } +} +#endif + template cudnnDataType_t GetCudnnDataType(); @@ -360,6 +370,34 @@ cudnnConvolutionBwdFilterAlgo_t ToConvBackwardFilterAlgo( } } +#if CUDNN_VERSION >= 6000 +port::Status GetCudnnProperty(libraryPropertyType type, int* value) { + cudnnStatus_t status = cudnnGetProperty(type, value); + if (status != CUDNN_STATUS_SUCCESS) { + const string error = + absl::StrCat("cudnnGetProperty failed for type: ", ToString(type), + " with status: ", ToString(status)); + LOG(ERROR) << error; + return port::Status{port::error::INTERNAL, error}; + } + return port::Status::OK(); +} +#endif + +port::Status GetLoadedCudnnVersion(CudnnVersion* version) { +#if CUDNN_VERSION >= 6000 + TF_RETURN_IF_ERROR(GetCudnnProperty(MAJOR_VERSION, &version->major_version)); + TF_RETURN_IF_ERROR(GetCudnnProperty(MINOR_VERSION, &version->minor_version)); + TF_RETURN_IF_ERROR(GetCudnnProperty(PATCH_LEVEL, &version->patch_level)); +#else + size_t loaded_version = ::cudnnGetVersion(); + version->major_version = loaded_version / 1000; + version->minor_version = (loaded_version / 100) % 10; + version->patch_level = loaded_version % 100; +#endif + return port::Status::OK(); +} + } // namespace CudnnSupport::CudnnSupport(CUDAExecutor* parent) @@ -376,24 +414,19 @@ port::Status CudnnSupport::Init() { auto status = wrap::cudnnCreate( parent_, reinterpret_cast(&dnn_handle_)); if (status == CUDNN_STATUS_SUCCESS) { - // Check whether loaded version of CuDNN matches what the source - // was built with. - size_t loaded_version = ::cudnnGetVersion(); - size_t loaded_compat_version = cudnnCompatibilityVersion(loaded_version); - size_t compiled_compat_version = cudnnCompatibilityVersion(CUDNN_VERSION); - bool library_loaded_matches_source = - (loaded_compat_version == compiled_compat_version); - if (!library_loaded_matches_source) { - const string error = - port::StrCat("Loaded runtime CuDNN library: ", loaded_version, - " (compatibility version ", loaded_compat_version, - ") but source was compiled with ", CUDNN_VERSION, - " (compatibility version ", compiled_compat_version, - "). If using a binary install, upgrade your CuDNN " - "library to match. If building from sources, " - "make sure the library loaded at runtime matches a " - "compatible version specified during compile " - "configuration."); + CudnnVersion source_version(CUDNN_MAJOR, CUDNN_MINOR, CUDNN_PATCHLEVEL); + + CudnnVersion loaded_version; + TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&loaded_version)); + if (!IsSourceCompatibleWithCudnnLibrary(source_version, loaded_version)) { + const tensorflow::string error = absl::StrCat( + "Loaded runtime CuDNN library: ", loaded_version.ToString(), + " but source was compiled with: ", source_version.ToString(), + ". CuDNN library major and minor version needs to match or have " + "higher minor version in case of CuDNN 7.0 or later version. If " + "using a binary install, upgrade your CuDNN library. If building " + "from sources, make sure the library loaded at runtime is compatible " + "with the version specified during compile configuration."); LOG(ERROR) << error; return port::Status{port::error::INTERNAL, error}; } diff --git a/tensorflow/stream_executor/cuda/cudnn_version.cc b/tensorflow/stream_executor/cuda/cudnn_version.cc new file mode 100644 index 0000000000..5591801aae --- /dev/null +++ b/tensorflow/stream_executor/cuda/cudnn_version.cc @@ -0,0 +1,42 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/cuda/cudnn_version.h" + +namespace perftools { +namespace gputools { +namespace cuda { + +bool IsSourceCompatibleWithCudnnLibrary(CudnnVersion source_version, + CudnnVersion loaded_version) { + // Major version is neither forward or backward compatible and therefore major + // versions needs to match between source and library. + // + // Minor version is backward-compatible beginning with CuDNN 7 and therefore + // minor version of library needs to be same or higher. + // + // Patch releases are always forward and backward compatible and therefore + // need not match. + if (loaded_version.major_version != source_version.major_version) { + return false; + } + return ((loaded_version.minor_version == source_version.minor_version) || + (source_version.major_version >= 7 && + loaded_version.minor_version >= source_version.minor_version)); +} + +} // namespace cuda +} // namespace gputools +} // namespace perftools diff --git a/tensorflow/stream_executor/cuda/cudnn_version.h b/tensorflow/stream_executor/cuda/cudnn_version.h new file mode 100644 index 0000000000..058cc87bfa --- /dev/null +++ b/tensorflow/stream_executor/cuda/cudnn_version.h @@ -0,0 +1,51 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDNN_VERSION_H_ +#define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDNN_VERSION_H_ + +#include + +#include "absl/strings/str_join.h" + +namespace perftools { +namespace gputools { +namespace cuda { + +struct CudnnVersion { + CudnnVersion() = default; + + CudnnVersion(int major, int minor, int patch) + : major_version(major), minor_version(minor), patch_level(patch) {} + + std::string ToString() const { + return absl::StrJoin({major_version, minor_version, patch_level}, "."); + } + + int major_version; + int minor_version; + int patch_level; +}; + +// Returns true if the given source CuDNN version is compatible with the given +// loaded version. +bool IsSourceCompatibleWithCudnnLibrary(CudnnVersion source_version, + CudnnVersion loaded_version); + +} // namespace cuda +} // namespace gputools +} // namespace perftools + +#endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDNN_VERSION_H_ diff --git a/tensorflow/stream_executor/cuda/cudnn_version_test.cc b/tensorflow/stream_executor/cuda/cudnn_version_test.cc new file mode 100644 index 0000000000..230adafeb1 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cudnn_version_test.cc @@ -0,0 +1,75 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/cuda/cudnn_version.h" + +#include "testing/base/public/gunit.h" +#include "tensorflow/core/platform/test.h" + +namespace perftools { +namespace gputools { +namespace cuda { +namespace { + +TEST(CuDNNVersion, ToString) { + CudnnVersion version(7, 0, 12); + EXPECT_EQ(version.ToString(), "7.0.12"); +} + +TEST(IsSourceCompatibleWithCudnnLibraryTest, Basic) { + // Returns true if both major and minor versions are matching and even if the + // patch versions are not matching. + EXPECT_TRUE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(7, 0, 12), + /*loaded_version=*/CudnnVersion(7, 0, 14))); + EXPECT_TRUE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(6, 1, 14), + /*loaded_version=*/CudnnVersion(6, 1, 00))); + + // Returns false if major versions are not matching as they are neither + // forward or backward compatible. + EXPECT_FALSE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(7, 0, 12), + /*loaded_version=*/CudnnVersion(6, 1, 14))); + EXPECT_FALSE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(8, 1, 15), + /*loaded_version=*/CudnnVersion(7, 0, 14))); + + // Returns true if the loaded version is equal or higher because minor version + // are backward compatible with CuDNN version 7. + EXPECT_TRUE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(7, 0, 14), + /*loaded_version=*/CudnnVersion(7, 1, 14))); + EXPECT_TRUE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(7, 0, 14), + /*loaded_version=*/CudnnVersion(7, 1, 15))); + EXPECT_FALSE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(7, 1, 15), + /*loaded_version=*/CudnnVersion(7, 0, 14))); + + // Returns false if minor versions are not matching for version 6. Before + // version 7, minor versions are also neither forward or backward compatible. + EXPECT_FALSE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(6, 0, 14), + /*loaded_version=*/CudnnVersion(6, 1, 15))); + EXPECT_FALSE(IsSourceCompatibleWithCudnnLibrary( + /*source_version=*/CudnnVersion(6, 1, 14), + /*loaded_version=*/CudnnVersion(6, 0, 14))); +} + +} // namespace +} // namespace cuda +} // namespace gputools +} // namespace perftools -- GitLab From 2b41d75654012f917cda1b54aee090d73086ab84 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 18:54:09 -0700 Subject: [PATCH 653/960] [XLA] Redesign: implement GetComputationStats. PiperOrigin-RevId: 190871262 --- tensorflow/compiler/xla/client/client.cc | 47 ++++++++++++++++++++-- tensorflow/compiler/xla/client/client.h | 2 + tensorflow/compiler/xla/service/service.cc | 20 ++++++++- 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index a857c4ff0b..c4c8894374 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -276,7 +276,12 @@ StatusOr> Client::Execute( if (execution_profile != nullptr) { *execution_profile = response.profile(); - // TODO(b/74197823): Get execution stats for the graph and VLOG(1) them. + if (VLOG_IS_ON(1)) { + TF_ASSIGN_OR_RETURN( + auto execution_stats, + ExecutionStatsAsString(computation, response.profile())); + VLOG(1) << execution_stats; + } } return MakeUnique(stub_, response.output()); @@ -402,8 +407,22 @@ StatusOr Client::GetComputationStats( StatusOr Client::GetComputationStats( const XlaComputation& computation, const DebugOptions& debug_options) const { - return Unimplemented( - "GetComputationStats is not yet implemented for XlaComputation"); + ComputationGraphStatsRequest request; + + // TODO(b/74197823): Find a way to avoid the copy of the hlo proto. + *request.mutable_computation() = computation.proto(); + *request.mutable_debug_options() = debug_options; + ComputationStatsResponse response; + + VLOG(1) << "making computation graph stats request"; + Status s = stub_->GetComputationGraphStats(&request, &response); + VLOG(1) << "done with request"; + + if (!s.ok()) { + return s; + } + CHECK(response.has_stats()); + return response.stats(); } StatusOr> Client::GetComputationShape( @@ -467,6 +486,28 @@ StatusOr Client::ExecutionStatsAsString( return string("[Execution Statistics] not available."); } +StatusOr Client::ExecutionStatsAsString( + const XlaComputation& computation, const ExecutionProfile& profile) { + TF_ASSIGN_OR_RETURN( + auto computation_stats, + GetComputationStats(computation, + legacy_flags::GetDebugOptionsFromFlags())); + int64 total_flops = + computation_stats.flop_count() + computation_stats.transcendental_count(); + if (profile.compute_time_ns() > 0) { + int64 nanoseconds = profile.compute_time_ns(); + int64 cycle_count = profile.compute_cycle_count(); + double gflops = total_flops / nanoseconds; + return tensorflow::strings::StrCat( + "[Execution Statistics] flop count: ", computation_stats.flop_count(), + ", transcendental count: ", computation_stats.transcendental_count(), + ", compute execution time: ", nanoseconds, " nsec", + ", compute cycles: ", cycle_count, ", performance: ", gflops, + "gflop/s"); + } + return string("[Execution Statistics] not available."); +} + StatusOr Client::CreateChannelHandle() { CreateChannelHandleRequest request; CreateChannelHandleResponse response; diff --git a/tensorflow/compiler/xla/client/client.h b/tensorflow/compiler/xla/client/client.h index 226b788d54..05d707dab1 100644 --- a/tensorflow/compiler/xla/client/client.h +++ b/tensorflow/compiler/xla/client/client.h @@ -241,6 +241,8 @@ class Client { // ExecutionProfile returned from an execution of the computation. StatusOr ExecutionStatsAsString(const Computation& computation, const ExecutionProfile& profile); + StatusOr ExecutionStatsAsString(const XlaComputation& computation, + const ExecutionProfile& profile); ServiceInterface* stub_; // Stub that this client is connected on. diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index af05e3f516..ca8071b7bb 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -1452,7 +1452,25 @@ tensorflow::Status Service::GetComputationStats( tensorflow::Status Service::GetComputationGraphStats( const ComputationGraphStatsRequest* arg, ComputationStatsResponse* result) { - return Unimplemented("get-computation-graph-stats is not yet implemented"); + HloModuleConfig config; + config.set_debug_options(arg->debug_options()); + TF_ASSIGN_OR_RETURN(std::unique_ptr module, + HloModule::CreateFromProto(arg->computation(), config)); + + hlo_graph_dumper::MaybeDumpHloModule(*module, + "computation statistics subject"); + + // Run HLO analysis to get the computation statistics. + HloCostAnalysis analysis( + execute_backend_->compiler()->ShapeSizeBytesFunction()); + + TF_RETURN_IF_ERROR(module->entry_computation()->Accept(&analysis)); + + ComputationStats stats; + stats.set_flop_count(analysis.flop_count()); + stats.set_transcendental_count(analysis.transcendental_count()); + *result->mutable_stats() = stats; + return tensorflow::Status::OK(); } template -- GitLab From 3e51f9ede54bc61a8d4f7797992ab78140467d08 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 28 Mar 2018 18:59:13 -0700 Subject: [PATCH 654/960] Default to disable including the coordinator in the job --- .../cluster_resolver/python/training/tpu_cluster_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 300b19733e..95c5c920aa 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -73,7 +73,7 @@ class TPUClusterResolver(ClusterResolver): zone=None, project=None, job_name='worker', - coordinator_name='coordinator', + coordinator_name=None, coordinator_address=None, credentials='default', service=None): -- GitLab From 991e205a78f67ce21b0918613a45cfd7c3e348fd Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Thu, 29 Mar 2018 10:05:43 +0800 Subject: [PATCH 655/960] Fix the incorect format of math equation in factorization_ops (#18054) * Fix the incorect format of math equation in factorization_ops * Fix minor intent format * Fix pylint issues * Fix serveral minor intent --- .../python/ops/factorization_ops.py | 81 ++++++++++--------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/factorization/python/ops/factorization_ops.py b/tensorflow/contrib/factorization/python/ops/factorization_ops.py index 8e0ed1d80e..3f3e3e0f25 100644 --- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py +++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py @@ -51,9 +51,9 @@ class WALSModel(object): r"""A model for Weighted Alternating Least Squares matrix factorization. It minimizes the following loss function over U, V: - \\( - \|\sqrt W \odot (A - U V^T) \|_F^2 + \lambda (\|U\|_F^2 + \|V\|_F^2) - )\\ + $$ + \|\sqrt W \odot (A - U V^T)\|_F^2 + \lambda (\|U\|_F^2 + \|V\|_F^2) + $$ where, A: input matrix, W: weight matrix. Note that the (element-wise) square root of the weights @@ -61,12 +61,12 @@ class WALSModel(object): U, V: row_factors and column_factors matrices, \\(\lambda)\\: regularization. Also we assume that W is of the following special form: - \\( W_{ij} = W_0 + R_i * C_j )\\ if \\(A_{ij} \ne 0)\\, - \\(W_{ij} = W_0)\\ otherwise. + \\( W_{ij} = W_0 + R_i * C_j \\) if \\(A_{ij} \ne 0\\), + \\(W_{ij} = W_0\\) otherwise. where, - \\(W_0)\\: unobserved_weight, - \\(R_i)\\: row_weights, - \\(C_j)\\: col_weights. + \\(W_0\\): unobserved_weight, + \\(R_i\\): row_weights, + \\(C_j\\): col_weights. Note that the current implementation supports two operation modes: The default mode is for the condition where row_factors and col_factors can individually @@ -82,14 +82,15 @@ class WALSModel(object): normalized as follows: _, _, unregularized_loss, regularization, sum_weights = update_row_factors(sp_input) - if sp_input contains the rows {A_i, i \in I}, and the input matrix A has n - total rows, then the minibatch loss = unregularized_loss + regularization is - \\( + if sp_input contains the rows \\({A_i, i \in I}\\), and the input matrix A + has n total rows, then the minibatch loss = unregularized_loss + + regularization is + $$ (\|\sqrt W_I \odot (A_I - U_I V^T)\|_F^2 + \lambda \|U_I\|_F^2) * n / |I| + \lambda \|V\|_F^2 - )\\ + $$ The sum_weights tensor contains the normalized sum of weights - sum(W_I) * n / |I|. + \\(sum(W_I) * n / |I|\\). A typical usage example (pseudocode): @@ -217,13 +218,13 @@ class WALSModel(object): - When set to None, w_ij = unobserved_weight, which simplifies to ALS. Note that col_weights must also be set to "None" in this case. - If it is a list of lists of non-negative real numbers, it needs to be - in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of - inner lists matching the number of row factor shards and the elements in - each inner list are the weights for the rows of the corresponding row - factor shard. In this case, w_ij = unobserved_weight + - row_weights[i] * col_weights[j]. + in the form of \\([[w_0, w_1, ...], [w_k, ... ], [...]]\\), with the + number of inner lists matching the number of row factor shards and the + elements in each inner list are the weights for the rows of the + corresponding row factor shard. In this case, \\(w_ij\\) = + unobserved_weight + row_weights[i] * col_weights[j]. - If this is a single non-negative real number, this value is used for - all row weights and w_ij = unobserved_weight + row_weights * + all row weights and \\(w_ij\\) = unobserved_weight + row_weights * col_weights[j]. Note that it is allowed to have row_weights as a list while col_weights a single number or vice versa. @@ -665,18 +666,18 @@ class WALSModel(object): factors. unregularized_loss: A tensor (scalar) that contains the normalized minibatch loss corresponding to sp_input, without the regularization - term. If sp_input contains the rows {A_{i, :}, i \in I}, and the input - matrix A has n total rows, then the unregularized loss is: - (\|\sqrt W_I \odot (A_I - U_I V^T)\|_F^2 * n / |I| + term. If sp_input contains the rows \\({A_{i, :}, i \in I}\\), and the + input matrix A has n total rows, then the unregularized loss is: + \\(\|\sqrt W_I \odot (A_I - U_I V^T)\|_F^2 * n / |I|\\) The total loss is unregularized_loss + regularization. regularization: A tensor (scalar) that contains the normalized regularization term for the minibatch loss corresponding to sp_input. - If sp_input contains the rows {A_{i, :}, i \in I}, and the input matrix - A has n total rows, then the regularization term is: - \lambda \|U_I\|_F^2) * n / |I| + \lambda \|V\|_F^2. + If sp_input contains the rows \\({A_{i, :}, i \in I}\\), and the input + matrix A has n total rows, then the regularization term is: + \\(\lambda \|U_I\|_F^2) * n / |I| + \lambda \|V\|_F^2\\). sum_weights: The sum of the weights W_I corresponding to sp_input, - normalized by a factor of n / |I|. The root weighted squared error is: - \sqrt(unregularized_loss / sum_weights). + normalized by a factor of \\(n / |I|\\). The root weighted squared + error is: \sqrt(unregularized_loss / sum_weights). """ return self._process_input_helper( True, sp_input=sp_input, transpose_input=transpose_input) @@ -698,18 +699,18 @@ class WALSModel(object): factors. unregularized_loss: A tensor (scalar) that contains the normalized minibatch loss corresponding to sp_input, without the regularization - term. If sp_input contains the columns {A_{:, j}, j \in J}, and the - input matrix A has m total columns, then the unregularized loss is: - (\|\sqrt W_J \odot (A_J - U V_J^T)\|_F^2 * m / |I| + term. If sp_input contains the columns \\({A_{:, j}, j \in J}\\), and + the input matrix A has m total columns, then the unregularized loss is: + \\(\|\sqrt W_J \odot (A_J - U V_J^T)\|_F^2 * m / |I|\\) The total loss is unregularized_loss + regularization. regularization: A tensor (scalar) that contains the normalized regularization term for the minibatch loss corresponding to sp_input. - If sp_input contains the columns {A_{:, j}, j \in J}, and the input - matrix A has m total columns, then the regularization term is: - \lambda \|V_J\|_F^2) * m / |J| + \lambda \|U\|_F^2. + If sp_input contains the columns \\({A_{:, j}, j \in J}\\), and the + input matrix A has m total columns, then the regularization term is: + \\(\lambda \|V_J\|_F^2) * m / |J| + \lambda \|U\|_F^2\\). sum_weights: The sum of the weights W_J corresponding to sp_input, - normalized by a factor of m / |J|. The root weighted squared error is: - \sqrt(unregularized_loss / sum_weights). + normalized by a factor of \\(m / |J|\\). The root weighted squared + error is: \sqrt(unregularized_loss / sum_weights). """ return self._process_input_helper( False, sp_input=sp_input, transpose_input=transpose_input) @@ -720,8 +721,8 @@ class WALSModel(object): projection_weights=None): """Projects the row factors. - This computes the row embedding u_i for an observed row a_i by solving - one iteration of the update equations. + This computes the row embedding \\(u_i\\) for an observed row \\(a_i\\) by + solving one iteration of the update equations. Args: sp_input: A SparseTensor representing a set of rows. Please note that the @@ -753,8 +754,8 @@ class WALSModel(object): projection_weights=None): """Projects the column factors. - This computes the column embedding v_j for an observed column a_j by solving - one iteration of the update equations. + This computes the column embedding \\(v_j\\) for an observed column + \\(a_j\\) by solving one iteration of the update equations. Args: sp_input: A SparseTensor representing a set of columns. Please note that @@ -938,7 +939,7 @@ class WALSModel(object): loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input) if transpose_input else new_sp_input) # sp_approx is the low rank estimate of the input matrix, formed by - # computing the product for (i, j) in loss_sp_input.indices. + # computing the product <\\(u_i, v_j\\)> for (i, j) in loss_sp_input.indices. sp_approx_vals = gen_factorization_ops.masked_matmul( new_left_values, right, -- GitLab From a5a90e6b55c19bd14d5effa5cb1695ddbe31026f Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Wed, 28 Mar 2018 19:21:08 -0700 Subject: [PATCH 656/960] Relax limitations on rerouting graph outputs. - Allow multiple outputs of output_tensors in fold_batch_norms. - Allow duplicate consumers in quantize. - I also quick a fix issue for matching final layers that have batch norm. PiperOrigin-RevId: 190873003 --- .../quantize/python/fold_batch_norms.py | 6 +++--- tensorflow/contrib/quantize/python/quantize.py | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 5750be6f4c..4a8f8a04cc 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -134,9 +134,9 @@ def _FoldFusedBatchNorms(graph, is_training, freeze_batch_norm_delay): nodes_modified_count = graph_editor.reroute_ts(bias_add_tensor, match.output_tensor) - if nodes_modified_count != 1: - raise ValueError( - 'Unexpected inputs to op: %s' % match.output_tensor.name) + if nodes_modified_count == 0: + raise ValueError('Folding batch norms failed, %s had no outputs.' % + match.output_tensor.name) def _FindFusedBatchNorms(graph): diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 019d123a68..2889016a84 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -305,7 +305,8 @@ def _FindLayersToQuantize(graph): # the output of the final BiasAdd must be quantized. So we treat the BiasAdd # as the 'activation_op' in the _LayerMatch, to ensure that it's output is # quantized. - final_layer_matcher = graph_matcher.GraphMatcher(bias_add_pattern) + final_layer_matcher = graph_matcher.GraphMatcher( + graph_matcher.OneofPattern([bias_add_pattern, folded_bias_add_pattern])) for match_result in final_layer_matcher.match_graph(graph): layer_op = match_result.get_op(layer_pattern) weight_tensor = match_result.get_tensor(weight_identity_pattern) @@ -463,11 +464,16 @@ def _InsertQuantOp(context, lambda: inputs, name=name_prefix + '/delayed_quant') - nodes_modified_count = graph_editor.reroute_ts( - [quant], [inputs], can_modify=consumers) - if nodes_modified_count != len(consumers): - raise ValueError('Some inputs not quantized for ops: [%s]' % ', '.join( - [consumer.name for consumer in consumers])) + if consumers: + tensors_modified_count = graph_editor.reroute_ts( + [quant], [inputs], can_modify=consumers) + # Some operations can have multiple output tensors going to the same + # consumer. Since consumers is a set, we need to ensure that + # tensors_modified_count is greater than or equal to the length of the set + # of consumers. + if tensors_modified_count < len(consumers): + raise ValueError('No inputs quantized for ops: [%s]' % ', '.join( + [consumer.name for consumer in consumers])) def _GetContextFromOp(op): -- GitLab From aef7d8b3e877924973e3d8d8e6266ba7b8322a66 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 28 Mar 2018 19:27:36 -0700 Subject: [PATCH 657/960] Fix the test --- .../python/training/tpu_cluster_resolver_test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 48c3f6bb4f..e1e3e6867a 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -117,7 +117,8 @@ class TPUClusterResolverTest(test.TestCase): zone=None, tpu=['test-tpu-1'], credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) + service=self.mock_service_client(tpu_map=tpu_map), + coordinator_name='coordinator') actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ @@ -170,6 +171,7 @@ class TPUClusterResolverTest(test.TestCase): project='test-project', zone='us-central1-c', tpu=['test-tpu-1'], + coordinator_name='coordinator', coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) @@ -196,6 +198,7 @@ class TPUClusterResolverTest(test.TestCase): project='test-project', zone='us-central1-c', tpu='test-tpu-1', + coordinator_name='coordinator', coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) @@ -239,7 +242,8 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver( tpu='test-tpu-1', credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) + service=self.mock_service_client(tpu_map=tpu_map), + coordinator_name='coordinator') actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ -- GitLab From 789e442513e85ab1caeb1e03997b0aafa3cd76d7 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Mar 2018 20:44:51 -0700 Subject: [PATCH 658/960] [tf.data] Maintain a reference on the FunctionBufferingResource while a get-next operation is active. Previously, the reference count on a FunctionBufferingResource could drop to 0 and it could be deleted (e.g. by a DestroyResourceOp) while a get-next operation is active on it. This would lead to use-after-free errors. PiperOrigin-RevId: 190878208 --- tensorflow/contrib/data/kernels/prefetching_kernels.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index f51570db85..2afb8dbbf4 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -374,25 +374,27 @@ class FunctionBufferingResourceGetNextOp : public AsyncOpKernel { OP_REQUIRES_OK_ASYNC( ctx, LookupResource(ctx, handle, &buffer), done); - core::ScopedUnref s(buffer); if (buffer->Finished()) { + buffer->Unref(); ctx->SetStatus(errors::OutOfRange("end_of_sequence")); done(); return; } FunctionBufferCallback callback = - [ctx, done](const BufferElement& buffer_element) { + [ctx, buffer, done](const BufferElement& buffer_element) { Status s = buffer_element.status; if (!s.ok()) { ctx->SetStatus(s); + buffer->Unref(); done(); return; } for (size_t i = 0; i < buffer_element.value.size(); ++i) { ctx->set_output(i, buffer_element.value[i]); } + buffer->Unref(); done(); }; buffer->MaybeGet(std::move(callback)); -- GitLab From bb582f1b6fad474bc446c78a6683247a8eb6048e Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Wed, 28 Mar 2018 20:46:14 -0700 Subject: [PATCH 659/960] Remove all_opensource_files. It's not needed any more. PiperOrigin-RevId: 190878279 --- tensorflow/BUILD | 298 ------------------ tensorflow/c/BUILD | 15 - tensorflow/cc/BUILD | 12 - tensorflow/cc/saved_model/BUILD | 15 - tensorflow/cc/saved_model/python/BUILD | 12 - tensorflow/cc/tools/BUILD | 15 - tensorflow/compiler/aot/BUILD | 14 - tensorflow/compiler/aot/tests/BUILD | 14 - tensorflow/compiler/jit/BUILD | 14 - tensorflow/compiler/jit/graphcycles/BUILD | 14 - tensorflow/compiler/jit/kernels/BUILD | 14 - tensorflow/compiler/jit/legacy_flags/BUILD | 14 - tensorflow/compiler/jit/ops/BUILD | 14 - tensorflow/compiler/plugin/BUILD | 14 - tensorflow/compiler/tests/BUILD | 14 - tensorflow/compiler/tf2xla/BUILD | 14 - tensorflow/compiler/tf2xla/cc/BUILD | 14 - tensorflow/compiler/tf2xla/kernels/BUILD | 14 - tensorflow/compiler/tf2xla/lib/BUILD | 14 - tensorflow/compiler/tf2xla/ops/BUILD | 14 - tensorflow/compiler/xla/BUILD | 12 - tensorflow/compiler/xla/client/BUILD | 14 - tensorflow/compiler/xla/client/lib/BUILD | 14 - .../compiler/xla/client/xla_client/BUILD | 14 - tensorflow/compiler/xla/legacy_flags/BUILD | 14 - tensorflow/compiler/xla/python/BUILD | 12 - tensorflow/compiler/xla/service/BUILD | 14 - tensorflow/compiler/xla/service/cpu/BUILD | 14 - tensorflow/compiler/xla/service/gpu/BUILD | 14 - .../xla/service/gpu/llvm_gpu_backend/BUILD | 14 - .../compiler/xla/service/interpreter/BUILD | 11 - tensorflow/compiler/xla/service/llvm_ir/BUILD | 14 - tensorflow/compiler/xla/tests/BUILD | 14 - tensorflow/compiler/xla/tools/BUILD | 14 - tensorflow/compiler/xla/tools/parser/BUILD | 14 - tensorflow/contrib/BUILD | 12 - tensorflow/contrib/all_reduce/BUILD | 13 - tensorflow/contrib/android/BUILD | 14 - tensorflow/contrib/batching/BUILD | 11 - tensorflow/contrib/batching/test_util/BUILD | 11 - tensorflow/contrib/batching/util/BUILD | 12 - tensorflow/contrib/bayesflow/BUILD | 12 - tensorflow/contrib/boosted_trees/BUILD | 9 - .../boosted_trees/estimator_batch/BUILD | 9 - tensorflow/contrib/boosted_trees/lib/BUILD | 11 - tensorflow/contrib/boosted_trees/proto/BUILD | 11 - .../contrib/boosted_trees/resources/BUILD | 11 - tensorflow/contrib/cloud/BUILD | 12 - tensorflow/contrib/cloud/kernels/BUILD | 14 - tensorflow/contrib/cluster_resolver/BUILD | 13 - tensorflow/contrib/coder/BUILD | 11 - tensorflow/contrib/compiler/BUILD | 12 - tensorflow/contrib/copy_graph/BUILD | 12 - tensorflow/contrib/crf/BUILD | 12 - tensorflow/contrib/cudnn_rnn/BUILD | 12 - tensorflow/contrib/data/BUILD | 14 - tensorflow/contrib/data/kernels/BUILD | 11 - .../contrib/data/python/kernel_tests/BUILD | 14 - tensorflow/contrib/data/python/ops/BUILD | 12 - tensorflow/contrib/decision_trees/proto/BUILD | 8 - tensorflow/contrib/deprecated/BUILD | 12 - tensorflow/contrib/distributions/BUILD | 12 - tensorflow/contrib/eager/proto/BUILD | 11 - tensorflow/contrib/eager/python/BUILD | 13 - tensorflow/contrib/estimator/BUILD | 12 - tensorflow/contrib/factorization/BUILD | 13 - .../contrib/factorization/examples/BUILD | 11 - .../contrib/factorization/kernels/BUILD | 11 - tensorflow/contrib/feature_column/BUILD | 12 - tensorflow/contrib/ffmpeg/BUILD | 12 - tensorflow/contrib/ffmpeg/default/BUILD | 12 - tensorflow/contrib/framework/BUILD | 12 - tensorflow/contrib/fused_conv/BUILD | 12 - tensorflow/contrib/gan/BUILD | 12 - tensorflow/contrib/gdr/BUILD | 12 - tensorflow/contrib/graph_editor/BUILD | 12 - tensorflow/contrib/grid_rnn/BUILD | 12 - tensorflow/contrib/hooks/BUILD | 11 - .../contrib/hvx/clock_cycle_profiling/BUILD | 12 - .../contrib/hvx/hvx_ops_support_checker/BUILD | 11 - tensorflow/contrib/image/BUILD | 12 - tensorflow/contrib/input_pipeline/BUILD | 11 - .../contrib/input_pipeline/kernels/BUILD | 11 - tensorflow/contrib/integrate/BUILD | 11 - tensorflow/contrib/kafka/BUILD | 14 - tensorflow/contrib/keras/BUILD | 12 - tensorflow/contrib/kernel_methods/BUILD | 12 - tensorflow/contrib/kfac/BUILD | 12 - tensorflow/contrib/kfac/examples/BUILD | 12 - tensorflow/contrib/kfac/examples/tests/BUILD | 12 - .../contrib/kfac/python/kernel_tests/BUILD | 12 - tensorflow/contrib/kfac/python/ops/BUILD | 12 - tensorflow/contrib/labeled_tensor/BUILD | 11 - tensorflow/contrib/layers/BUILD | 12 - tensorflow/contrib/layers/kernels/BUILD | 11 - tensorflow/contrib/learn/BUILD | 12 - .../contrib/learn/python/learn/datasets/BUILD | 12 - tensorflow/contrib/legacy_seq2seq/BUILD | 12 - tensorflow/contrib/libsvm/BUILD | 12 - tensorflow/contrib/linalg/BUILD | 12 - tensorflow/contrib/linear_optimizer/BUILD | 11 - tensorflow/contrib/lite/BUILD | 15 - .../contrib/lite/examples/label_image/BUILD | 12 - tensorflow/contrib/lite/java/BUILD | 12 - .../contrib/lite/java/demo/app/src/main/BUILD | 12 - .../lite/java/demo/app/src/main/assets/BUILD | 12 - .../contrib/lite/java/src/main/native/BUILD | 12 - .../testhelper/java/org/tensorflow/lite/BUILD | 12 - tensorflow/contrib/lite/kernels/BUILD | 12 - .../contrib/lite/kernels/internal/BUILD | 12 - tensorflow/contrib/lite/models/BUILD | 12 - .../contrib/lite/models/smartreply/BUILD | 12 - tensorflow/contrib/lite/nnapi/BUILD | 12 - tensorflow/contrib/lite/python/BUILD | 12 - tensorflow/contrib/lite/schema/BUILD | 12 - tensorflow/contrib/lite/testing/BUILD | 12 - tensorflow/contrib/lite/toco/BUILD | 12 - .../toco/graph_transformations/tests/BUILD | 12 - tensorflow/contrib/lite/toco/python/BUILD | 12 - .../lite/toco/tensorflow_graph_matching/BUILD | 12 - tensorflow/contrib/lite/toco/tflite/BUILD | 12 - tensorflow/contrib/lite/tools/BUILD | 12 - tensorflow/contrib/lookup/BUILD | 12 - tensorflow/contrib/losses/BUILD | 12 - tensorflow/contrib/makefile/BUILD | 9 - tensorflow/contrib/memory_stats/BUILD | 12 - tensorflow/contrib/meta_graph_transform/BUILD | 12 - tensorflow/contrib/metrics/BUILD | 11 - tensorflow/contrib/model_pruning/BUILD | 12 - .../model_pruning/examples/cifar10/BUILD | 12 - tensorflow/contrib/mpi_collectives/BUILD | 12 - tensorflow/contrib/nccl/BUILD | 12 - tensorflow/contrib/nearest_neighbor/BUILD | 12 - tensorflow/contrib/nn/BUILD | 11 - tensorflow/contrib/opt/BUILD | 11 - tensorflow/contrib/periodic_resample/BUILD | 12 - tensorflow/contrib/predictor/BUILD | 12 - tensorflow/contrib/quantization/BUILD | 12 - tensorflow/contrib/quantize/BUILD | 12 - tensorflow/contrib/receptive_field/BUILD | 12 - tensorflow/contrib/reduce_slice_ops/BUILD | 12 - .../contrib/remote_fused_graph/pylib/BUILD | 12 - tensorflow/contrib/resampler/BUILD | 11 - tensorflow/contrib/rnn/BUILD | 13 - tensorflow/contrib/saved_model/BUILD | 12 - .../contrib/saved_model/cc/saved_model/BUILD | 6 - tensorflow/contrib/seq2seq/BUILD | 12 - tensorflow/contrib/session_bundle/BUILD | 12 - .../contrib/session_bundle/example/BUILD | 13 - tensorflow/contrib/signal/BUILD | 12 - tensorflow/contrib/slim/BUILD | 12 - .../contrib/slim/python/slim/data/BUILD | 12 - .../contrib/slim/python/slim/nets/BUILD | 12 - tensorflow/contrib/solvers/BUILD | 13 - tensorflow/contrib/sparsemax/BUILD | 12 - tensorflow/contrib/specs/BUILD | 12 - tensorflow/contrib/staging/BUILD | 12 - tensorflow/contrib/stat_summarizer/BUILD | 12 - tensorflow/contrib/stateless/BUILD | 12 - tensorflow/contrib/summary/BUILD | 12 - tensorflow/contrib/tensor_forest/BUILD | 14 - tensorflow/contrib/tensor_forest/hybrid/BUILD | 12 - .../contrib/tensor_forest/kernels/v4/BUILD | 5 - tensorflow/contrib/tensor_forest/proto/BUILD | 8 - tensorflow/contrib/tensorboard/BUILD | 12 - tensorflow/contrib/tensorboard/db/BUILD | 6 - tensorflow/contrib/tensorrt/BUILD | 12 - tensorflow/contrib/testing/BUILD | 12 - tensorflow/contrib/text/BUILD | 11 - tensorflow/contrib/tfprof/BUILD | 12 - tensorflow/contrib/timeseries/BUILD | 12 - tensorflow/contrib/timeseries/examples/BUILD | 12 - .../timeseries/python/timeseries/BUILD | 12 - .../timeseries/state_space_models/BUILD | 12 - tensorflow/contrib/tpu/BUILD | 13 - tensorflow/contrib/tpu/profiler/BUILD | 12 - tensorflow/contrib/tpu/proto/BUILD | 11 - tensorflow/contrib/training/BUILD | 12 - tensorflow/contrib/util/BUILD | 12 - tensorflow/contrib/verbs/BUILD | 12 - tensorflow/core/BUILD | 14 +- tensorflow/core/api_def/BUILD | 12 - tensorflow/core/common_runtime/eager/BUILD | 15 - tensorflow/core/debug/BUILD | 15 - tensorflow/core/distributed_runtime/BUILD | 12 - tensorflow/core/distributed_runtime/rpc/BUILD | 12 - tensorflow/core/grappler/BUILD | 12 - tensorflow/core/grappler/clusters/BUILD | 12 - tensorflow/core/grappler/costs/BUILD | 12 - tensorflow/core/grappler/inputs/BUILD | 12 - tensorflow/core/grappler/optimizers/BUILD | 12 - tensorflow/core/grappler/utils/BUILD | 12 - tensorflow/core/kernels/BUILD | 12 - tensorflow/core/kernels/batching_util/BUILD | 12 - tensorflow/core/kernels/data/BUILD | 12 - tensorflow/core/kernels/data/sql/BUILD | 12 - tensorflow/core/kernels/fuzzing/BUILD | 12 - tensorflow/core/kernels/hexagon/BUILD | 12 - tensorflow/core/kernels/neon/BUILD | 12 - tensorflow/core/lib/db/BUILD | 6 - tensorflow/core/ops/compat/BUILD | 15 - tensorflow/core/platform/cloud/BUILD | 14 - .../core/platform/default/build_config/BUILD | 12 - tensorflow/core/platform/hadoop/BUILD | 12 - tensorflow/core/platform/s3/BUILD | 12 - tensorflow/core/profiler/BUILD | 15 - tensorflow/core/profiler/internal/BUILD | 14 - .../core/profiler/internal/advisor/BUILD | 15 - tensorflow/core/util/ctc/BUILD | 12 - tensorflow/core/util/tensor_bundle/BUILD | 15 - tensorflow/examples/adding_an_op/BUILD | 12 - tensorflow/examples/android/BUILD | 16 - tensorflow/examples/benchmark/BUILD | 6 - .../examples/get_started/regression/BUILD | 12 - .../examples/how_tos/reading_data/BUILD | 12 - tensorflow/examples/image_retraining/BUILD | 12 - tensorflow/examples/label_image/BUILD | 16 +- tensorflow/examples/learn/BUILD | 12 - tensorflow/examples/multibox_detector/BUILD | 14 - tensorflow/examples/saved_model/BUILD | 13 - tensorflow/examples/speech_commands/BUILD | 12 - .../examples/tutorials/estimators/BUILD | 12 - tensorflow/examples/tutorials/layers/BUILD | 12 - tensorflow/examples/tutorials/mnist/BUILD | 12 - tensorflow/examples/tutorials/monitors/BUILD | 12 - tensorflow/examples/tutorials/word2vec/BUILD | 11 - tensorflow/examples/wav_to_spectrogram/BUILD | 14 - tensorflow/java/BUILD | 12 - tensorflow/python/BUILD | 12 - tensorflow/python/data/BUILD | 12 - tensorflow/python/data/kernel_tests/BUILD | 12 - tensorflow/python/data/ops/BUILD | 12 - tensorflow/python/data/util/BUILD | 12 - tensorflow/python/debug/BUILD | 12 - tensorflow/python/eager/BUILD | 15 - tensorflow/python/estimator/BUILD | 12 - tensorflow/python/feature_column/BUILD | 12 - tensorflow/python/keras/BUILD | 12 - tensorflow/python/kernel_tests/BUILD | 12 - .../python/kernel_tests/distributions/BUILD | 12 - tensorflow/python/kernel_tests/linalg/BUILD | 12 - tensorflow/python/kernel_tests/random/BUILD | 12 - tensorflow/python/ops/distributions/BUILD | 12 - tensorflow/python/ops/linalg/BUILD | 12 - tensorflow/python/ops/losses/BUILD | 12 - tensorflow/python/profiler/BUILD | 15 - tensorflow/python/profiler/internal/BUILD | 15 - tensorflow/python/saved_model/BUILD | 12 - tensorflow/python/tools/BUILD | 14 - tensorflow/tools/api/generator/BUILD | 12 - tensorflow/tools/api/golden/BUILD | 12 - tensorflow/tools/api/lib/BUILD | 12 - tensorflow/tools/api/tests/BUILD | 12 - tensorflow/tools/benchmark/BUILD | 9 - tensorflow/tools/build_info/BUILD | 15 - tensorflow/tools/common/BUILD | 11 - tensorflow/tools/compatibility/BUILD | 15 - tensorflow/tools/dist_test/server/BUILD | 12 - tensorflow/tools/docker/BUILD | 12 - tensorflow/tools/docker/notebooks/BUILD | 12 - tensorflow/tools/docs/BUILD | 11 - tensorflow/tools/git/BUILD | 15 - tensorflow/tools/graph_transforms/BUILD | 11 - tensorflow/tools/mlpbtxt/BUILD | 12 - tensorflow/tools/proto_text/BUILD | 15 - tensorflow/tools/quantization/BUILD | 12 - tensorflow/tools/test/BUILD | 12 - tensorflow/user_ops/BUILD | 12 - third_party/hadoop/BUILD | 12 - third_party/mpi/BUILD | 12 - third_party/sycl/BUILD | 12 - third_party/sycl/sycl/BUILD | 12 - 272 files changed, 4 insertions(+), 3610 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 6ab43638ba..0021b657d8 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -394,304 +394,6 @@ package_group( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - -filegroup( - name = "all_opensource_files", - data = [ - ":all_files", - "//tensorflow/c:all_files", - "//tensorflow/cc:all_files", - "//tensorflow/cc/saved_model:all_files", - "//tensorflow/cc/saved_model/python:all_files", - "//tensorflow/cc/tools:all_files", - "//tensorflow/compiler/aot:all_files", - "//tensorflow/compiler/aot/tests:all_files", - "//tensorflow/compiler/jit:all_files", - "//tensorflow/compiler/jit/graphcycles:all_files", - "//tensorflow/compiler/jit/kernels:all_files", - "//tensorflow/compiler/jit/legacy_flags:all_files", - "//tensorflow/compiler/jit/ops:all_files", - "//tensorflow/compiler/plugin:all_files", - "//tensorflow/compiler/tests:all_files", - "//tensorflow/compiler/tf2xla:all_files", - "//tensorflow/compiler/tf2xla/cc:all_files", - "//tensorflow/compiler/tf2xla/kernels:all_files", - "//tensorflow/compiler/tf2xla/lib:all_files", - "//tensorflow/compiler/tf2xla/ops:all_files", - "//tensorflow/compiler/xla:all_files", - "//tensorflow/compiler/xla/client:all_files", - "//tensorflow/compiler/xla/client/lib:all_files", - "//tensorflow/compiler/xla/client/xla_client:all_files", - "//tensorflow/compiler/xla/legacy_flags:all_files", - "//tensorflow/compiler/xla/python:all_files", - "//tensorflow/compiler/xla/service:all_files", - "//tensorflow/compiler/xla/service/cpu:all_files", - "//tensorflow/compiler/xla/service/gpu:all_files", - "//tensorflow/compiler/xla/service/gpu/llvm_gpu_backend:all_files", - "//tensorflow/compiler/xla/service/interpreter:all_files", - "//tensorflow/compiler/xla/service/llvm_ir:all_files", - "//tensorflow/compiler/xla/tests:all_files", - "//tensorflow/compiler/xla/tools:all_files", - "//tensorflow/compiler/xla/tools/parser:all_files", - "//tensorflow/contrib:all_files", - "//tensorflow/contrib/all_reduce:all_files", - "//tensorflow/contrib/android:all_files", - "//tensorflow/contrib/autograph:all_files", - "//tensorflow/contrib/autograph/converters:all_files", - "//tensorflow/contrib/autograph/impl:all_files", - "//tensorflow/contrib/autograph/pyct:all_files", - "//tensorflow/contrib/autograph/pyct/static_analysis:all_files", - "//tensorflow/contrib/autograph/utils:all_files", - "//tensorflow/contrib/batching:all_files", - "//tensorflow/contrib/bayesflow:all_files", - "//tensorflow/contrib/boosted_trees:all_files", - "//tensorflow/contrib/boosted_trees/estimator_batch:all_files", - "//tensorflow/contrib/boosted_trees/lib:all_files", - "//tensorflow/contrib/boosted_trees/proto:all_files", - "//tensorflow/contrib/boosted_trees/resources:all_files", - "//tensorflow/contrib/cloud:all_files", - "//tensorflow/contrib/cloud/kernels:all_files", - "//tensorflow/contrib/cluster_resolver:all_files", - "//tensorflow/contrib/coder:all_files", - "//tensorflow/contrib/compiler:all_files", - "//tensorflow/contrib/copy_graph:all_files", - "//tensorflow/contrib/crf:all_files", - "//tensorflow/contrib/cudnn_rnn:all_files", - "//tensorflow/contrib/data:all_files", - "//tensorflow/contrib/data/kernels:all_files", - "//tensorflow/contrib/data/python/kernel_tests:all_files", - "//tensorflow/contrib/data/python/ops:all_files", - "//tensorflow/contrib/decision_trees/proto:all_files", - "//tensorflow/contrib/deprecated:all_files", - "//tensorflow/contrib/distributions:all_files", - "//tensorflow/contrib/eager/proto:all_files", - "//tensorflow/contrib/eager/python:all_files", - "//tensorflow/contrib/estimator:all_files", - "//tensorflow/contrib/factorization:all_files", - "//tensorflow/contrib/factorization/examples:all_files", - "//tensorflow/contrib/factorization/kernels:all_files", - "//tensorflow/contrib/feature_column:all_files", - "//tensorflow/contrib/ffmpeg:all_files", - "//tensorflow/contrib/ffmpeg/default:all_files", - "//tensorflow/contrib/framework:all_files", - "//tensorflow/contrib/fused_conv:all_files", - "//tensorflow/contrib/gan:all_files", - "//tensorflow/contrib/gdr:all_files", - "//tensorflow/contrib/graph_editor:all_files", - "//tensorflow/contrib/grid_rnn:all_files", - "//tensorflow/contrib/hooks:all_files", - "//tensorflow/contrib/hvx/clock_cycle_profiling:all_files", - "//tensorflow/contrib/hvx/hvx_ops_support_checker:all_files", - "//tensorflow/contrib/image:all_files", - "//tensorflow/contrib/input_pipeline:all_files", - "//tensorflow/contrib/input_pipeline/kernels:all_files", - "//tensorflow/contrib/integrate:all_files", - "//tensorflow/contrib/keras:all_files", - "//tensorflow/contrib/kernel_methods:all_files", - "//tensorflow/contrib/kfac:all_files", - "//tensorflow/contrib/kfac/examples:all_files", - "//tensorflow/contrib/kfac/examples/tests:all_files", - "//tensorflow/contrib/kfac/python/kernel_tests:all_files", - "//tensorflow/contrib/kfac/python/ops:all_files", - "//tensorflow/contrib/labeled_tensor:all_files", - "//tensorflow/contrib/layers:all_files", - "//tensorflow/contrib/layers/kernels:all_files", - "//tensorflow/contrib/learn:all_files", - "//tensorflow/contrib/learn/python/learn/datasets:all_files", - "//tensorflow/contrib/legacy_seq2seq:all_files", - "//tensorflow/contrib/libsvm:all_files", - "//tensorflow/contrib/linalg:all_files", - "//tensorflow/contrib/linear_optimizer:all_files", - "//tensorflow/contrib/lite:all_files", - "//tensorflow/contrib/lite/java:all_files", - "//tensorflow/contrib/lite/java/demo/app/src/main:all_files", - "//tensorflow/contrib/lite/java/demo/app/src/main/assets:all_files", - "//tensorflow/contrib/lite/java/src/main/native:all_files", - "//tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite:all_files", - "//tensorflow/contrib/lite/kernels:all_files", - "//tensorflow/contrib/lite/kernels/internal:all_files", - "//tensorflow/contrib/lite/models/smartreply:all_files", - "//tensorflow/contrib/lite/nnapi:all_files", - "//tensorflow/contrib/lite/python:all_files", - "//tensorflow/contrib/lite/schema:all_files", - "//tensorflow/contrib/lite/testing:all_files", - "//tensorflow/contrib/lite/toco:all_files", - "//tensorflow/contrib/lite/toco/graph_transformations/tests:all_files", - "//tensorflow/contrib/lite/toco/python:all_files", - "//tensorflow/contrib/lite/toco/tensorflow_graph_matching:all_files", - "//tensorflow/contrib/lite/toco/tflite:all_files", - "//tensorflow/contrib/lite/tools:all_files", - "//tensorflow/contrib/lookup:all_files", - "//tensorflow/contrib/losses:all_files", - "//tensorflow/contrib/makefile:all_files", - "//tensorflow/contrib/memory_stats:all_files", - "//tensorflow/contrib/meta_graph_transform:all_files", - "//tensorflow/contrib/metrics:all_files", - "//tensorflow/contrib/model_pruning:all_files", - "//tensorflow/contrib/model_pruning/examples/cifar10:all_files", - "//tensorflow/contrib/nccl:all_files", - "//tensorflow/contrib/nearest_neighbor:all_files", - "//tensorflow/contrib/nn:all_files", - "//tensorflow/contrib/opt:all_files", - "//tensorflow/contrib/periodic_resample:all_files", - "//tensorflow/contrib/predictor:all_files", - "//tensorflow/contrib/quantize:all_files", - "//tensorflow/contrib/receptive_field:all_files", - "//tensorflow/contrib/reduce_slice_ops:all_files", - "//tensorflow/contrib/remote_fused_graph/pylib:all_files", - "//tensorflow/contrib/resampler:all_files", - "//tensorflow/contrib/rnn:all_files", - "//tensorflow/contrib/saved_model:all_files", - "//tensorflow/contrib/saved_model/cc/saved_model:all_files", - "//tensorflow/contrib/seq2seq:all_files", - "//tensorflow/contrib/session_bundle:all_files", - "//tensorflow/contrib/session_bundle/example:all_files", - "//tensorflow/contrib/signal:all_files", - "//tensorflow/contrib/slim:all_files", - "//tensorflow/contrib/slim/python/slim/data:all_files", - "//tensorflow/contrib/slim/python/slim/nets:all_files", - "//tensorflow/contrib/solvers:all_files", - "//tensorflow/contrib/sparsemax:all_files", - "//tensorflow/contrib/specs:all_files", - "//tensorflow/contrib/staging:all_files", - "//tensorflow/contrib/stat_summarizer:all_files", - "//tensorflow/contrib/stateless:all_files", - "//tensorflow/contrib/summary:all_files", - "//tensorflow/contrib/tensor_forest:all_files", - "//tensorflow/contrib/tensor_forest/hybrid:all_files", - "//tensorflow/contrib/tensor_forest/kernels/v4:all_files", - "//tensorflow/contrib/tensor_forest/proto:all_files", - "//tensorflow/contrib/tensorboard:all_files", - "//tensorflow/contrib/tensorboard/db:all_files", - "//tensorflow/contrib/tensorrt:all_files", - "//tensorflow/contrib/testing:all_files", - "//tensorflow/contrib/text:all_files", - "//tensorflow/contrib/tfprof:all_files", - "//tensorflow/contrib/timeseries:all_files", - "//tensorflow/contrib/timeseries/examples:all_files", - "//tensorflow/contrib/timeseries/python/timeseries:all_files", - "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:all_files", - "//tensorflow/contrib/tpu:all_files", - "//tensorflow/contrib/tpu/profiler:all_files", - "//tensorflow/contrib/tpu/proto:all_files", - "//tensorflow/contrib/training:all_files", - "//tensorflow/contrib/util:all_files", - "//tensorflow/contrib/verbs:all_files", - "//tensorflow/core:all_files", - "//tensorflow/core/api_def:all_files", - "//tensorflow/core/common_runtime/eager:all_files", - "//tensorflow/core/debug:all_files", - "//tensorflow/core/distributed_runtime:all_files", - "//tensorflow/core/distributed_runtime/rpc:all_files", - "//tensorflow/core/grappler:all_files", - "//tensorflow/core/grappler/clusters:all_files", - "//tensorflow/core/grappler/costs:all_files", - "//tensorflow/core/grappler/inputs:all_files", - "//tensorflow/core/grappler/optimizers:all_files", - "//tensorflow/core/grappler/utils:all_files", - "//tensorflow/core/kernels:all_files", - "//tensorflow/core/kernels/batching_util:all_files", - "//tensorflow/core/kernels/data:all_files", - "//tensorflow/core/kernels/data/sql:all_files", - "//tensorflow/core/kernels/fuzzing:all_files", - "//tensorflow/core/kernels/hexagon:all_files", - "//tensorflow/core/kernels/neon:all_files", - "//tensorflow/core/lib/db:all_files", - "//tensorflow/core/ops/compat:all_files", - "//tensorflow/core/platform/cloud:all_files", - "//tensorflow/core/platform/default/build_config:all_files", - "//tensorflow/core/platform/hadoop:all_files", - "//tensorflow/core/platform/s3:all_files", - "//tensorflow/core/profiler:all_files", - "//tensorflow/core/profiler/internal:all_files", - "//tensorflow/core/profiler/internal/advisor:all_files", - "//tensorflow/core/util/ctc:all_files", - "//tensorflow/core/util/tensor_bundle:all_files", - "//tensorflow/examples/adding_an_op:all_files", - "//tensorflow/examples/android:all_files", - "//tensorflow/examples/benchmark:all_files", - "//tensorflow/examples/get_started/regression:all_files", - "//tensorflow/examples/how_tos/reading_data:all_files", - "//tensorflow/examples/image_retraining:all_files", - "//tensorflow/examples/label_image:all_files", - "//tensorflow/examples/learn:all_files", - "//tensorflow/examples/multibox_detector:all_files", - "//tensorflow/examples/saved_model:all_files", - "//tensorflow/examples/speech_commands:all_files", - "//tensorflow/examples/tutorials/estimators:all_files", - "//tensorflow/examples/tutorials/layers:all_files", - "//tensorflow/examples/tutorials/mnist:all_files", - "//tensorflow/examples/tutorials/monitors:all_files", - "//tensorflow/examples/tutorials/word2vec:all_files", - "//tensorflow/examples/wav_to_spectrogram:all_files", - "//tensorflow/go:all_files", - "//tensorflow/java:all_files", - "//tensorflow/java/src/main/java/org/tensorflow/examples:all_files", - "//tensorflow/java/src/main/native:all_files", - "//tensorflow/python:all_files", - "//tensorflow/python/data:all_files", - "//tensorflow/python/data/kernel_tests:all_files", - "//tensorflow/python/data/ops:all_files", - "//tensorflow/python/data/util:all_files", - "//tensorflow/python/debug:all_files", - "//tensorflow/python/eager:all_files", - "//tensorflow/python/estimator:all_files", - "//tensorflow/python/feature_column:all_files", - "//tensorflow/python/keras:all_files", - "//tensorflow/python/kernel_tests:all_files", - "//tensorflow/python/kernel_tests/distributions:all_files", - "//tensorflow/python/kernel_tests/linalg:all_files", - "//tensorflow/python/kernel_tests/random:all_files", - "//tensorflow/python/kernel_tests/testdata:all_files", - "//tensorflow/python/ops/distributions:all_files", - "//tensorflow/python/ops/linalg:all_files", - "//tensorflow/python/ops/losses:all_files", - "//tensorflow/python/profiler:all_files", - "//tensorflow/python/profiler/internal:all_files", - "//tensorflow/python/saved_model:all_files", - "//tensorflow/python/tools:all_files", - "//tensorflow/tools/api/generator:all_files", - "//tensorflow/tools/api/golden:all_files", - "//tensorflow/tools/api/lib:all_files", - "//tensorflow/tools/api/tests:all_files", - "//tensorflow/tools/benchmark:all_files", - "//tensorflow/tools/build_info:all_files", - "//tensorflow/tools/ci_build/gpu_build:all_files", - "//tensorflow/tools/common:all_files", - "//tensorflow/tools/compatibility:all_files", - "//tensorflow/tools/dist_test/server:all_files", - "//tensorflow/tools/docker:all_files", - "//tensorflow/tools/docker/notebooks:all_files", - "//tensorflow/tools/docs:all_files", - "//tensorflow/tools/git:all_files", - "//tensorflow/tools/graph_transforms:all_files", - "//tensorflow/tools/mlpbtxt:all_files", - "//tensorflow/tools/proto_text:all_files", - "//tensorflow/tools/quantization:all_files", - "//tensorflow/tools/test:all_files", - "//tensorflow/user_ops:all_files", - "//third_party/eigen3:all_files", - "//third_party/fft2d:all_files", - "//third_party/flatbuffers:all_files", - "//third_party/hadoop:all_files", - "//third_party/sycl:all_files", - "//third_party/sycl/sycl:all_files", - ], - visibility = ["//visibility:public"], -) - load( "//third_party/mkl:build_defs.bzl", "if_mkl", diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 249135f728..2367014cd0 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -287,18 +287,3 @@ tf_cuda_library( "//tensorflow/python:cpp_shape_inference_proto_cc", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index 9060c19e9d..079e063d3e 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -620,18 +620,6 @@ tf_cc_binary( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "queue_runner", srcs = ["training/queue_runner.cc"], diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD index d29ad3ebcb..06a3be18e0 100644 --- a/tensorflow/cc/saved_model/BUILD +++ b/tensorflow/cc/saved_model/BUILD @@ -94,18 +94,3 @@ filegroup( "testdata/half_plus_two/**", ]), ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/cc/saved_model/python/BUILD b/tensorflow/cc/saved_model/python/BUILD index f5fbc75edc..6f04ebdc55 100644 --- a/tensorflow/cc/saved_model/python/BUILD +++ b/tensorflow/cc/saved_model/python/BUILD @@ -7,18 +7,6 @@ package( default_visibility = ["//visibility:public"], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - load("//tensorflow/core:platform/default/build_config.bzl", "tf_py_clif_cc") tf_py_clif_cc( diff --git a/tensorflow/cc/tools/BUILD b/tensorflow/cc/tools/BUILD index f413a5cc52..6f1c873540 100644 --- a/tensorflow/cc/tools/BUILD +++ b/tensorflow/cc/tools/BUILD @@ -41,18 +41,3 @@ tf_cc_test( "//tensorflow/core:testlib", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD index ffa2d08829..fa03b1f3c2 100644 --- a/tensorflow/compiler/aot/BUILD +++ b/tensorflow/compiler/aot/BUILD @@ -250,17 +250,3 @@ exports_files([ "benchmark_main.template", # used by tf_library(...,gen_benchmark=True) "test.cc", # used by tf_library(...,gen_test=True) ]) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/aot/tests/BUILD b/tensorflow/compiler/aot/tests/BUILD index 28aab6eb61..b053dad1b5 100644 --- a/tensorflow/compiler/aot/tests/BUILD +++ b/tensorflow/compiler/aot/tests/BUILD @@ -182,17 +182,3 @@ tf_cc_test( "//third_party/eigen3", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 8e505da622..9ea246ffdc 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -365,20 +365,6 @@ tf_cc_test( ], ) -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # This target can be used by XLA device plugins to prevent circular dependencies, and provides access to all of the required headers for building a device library. cc_header_only_library( name = "xla_jit_headers_lib", diff --git a/tensorflow/compiler/jit/graphcycles/BUILD b/tensorflow/compiler/jit/graphcycles/BUILD index 15507b3851..676f71a75a 100644 --- a/tensorflow/compiler/jit/graphcycles/BUILD +++ b/tensorflow/compiler/jit/graphcycles/BUILD @@ -27,17 +27,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD index 616a7f8f15..00a6f4075f 100644 --- a/tensorflow/compiler/jit/kernels/BUILD +++ b/tensorflow/compiler/jit/kernels/BUILD @@ -41,17 +41,3 @@ cc_library( ], alwayslink = 1, ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/jit/legacy_flags/BUILD b/tensorflow/compiler/jit/legacy_flags/BUILD index 9cd66fc13c..5d211f4d73 100644 --- a/tensorflow/compiler/jit/legacy_flags/BUILD +++ b/tensorflow/compiler/jit/legacy_flags/BUILD @@ -63,17 +63,3 @@ cc_library( "//tensorflow/core:lib", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/jit/ops/BUILD b/tensorflow/compiler/jit/ops/BUILD index e5787ca4c8..c9e46bc147 100644 --- a/tensorflow/compiler/jit/ops/BUILD +++ b/tensorflow/compiler/jit/ops/BUILD @@ -17,17 +17,3 @@ cc_library( deps = ["//tensorflow/core:framework"], alwayslink = 1, ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/plugin/BUILD b/tensorflow/compiler/plugin/BUILD index da4bc44c7a..238fd15166 100644 --- a/tensorflow/compiler/plugin/BUILD +++ b/tensorflow/compiler/plugin/BUILD @@ -49,17 +49,3 @@ cc_library( "//tensorflow/compiler/jit:xla_device", ], ) - -#----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 1c5a8f8e69..edabdc218a 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -835,17 +835,3 @@ tf_xla_py_test( "//tensorflow/python:platform_test", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index eb20ca501c..8c33bf179c 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -462,17 +462,3 @@ cc_library( "//tensorflow/core:protos_all_cc", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD index 311dddca94..c30bb9cacd 100644 --- a/tensorflow/compiler/tf2xla/cc/BUILD +++ b/tensorflow/compiler/tf2xla/cc/BUILD @@ -51,17 +51,3 @@ cc_library( "//tensorflow/core:protos_all_cc", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 0bbfe86de3..f1bc7d6af4 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -217,17 +217,3 @@ cc_library( ], alwayslink = 1, ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index 488fda74bf..344773c8c5 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -140,17 +140,3 @@ cc_library( "//tensorflow/core:lib", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/tf2xla/ops/BUILD b/tensorflow/compiler/tf2xla/ops/BUILD index 98f72b3792..aeb743a663 100644 --- a/tensorflow/compiler/tf2xla/ops/BUILD +++ b/tensorflow/compiler/tf2xla/ops/BUILD @@ -39,17 +39,3 @@ tf_gen_op_wrapper_py( ":sendrecv_ops", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index cd13db4d30..751777222f 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -654,18 +654,6 @@ tf_cc_test( # ----------------------------------------------------------------------------- -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # This is a headers target that extra XLA devices can use to prevent circular dependencies. Devices that are compiled as separate shared objects can also use it to prevent linking of library code. cc_header_only_library( name = "xla_headers_lib", diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD index 5094e5ce67..a299c2afd4 100644 --- a/tensorflow/compiler/xla/client/BUILD +++ b/tensorflow/compiler/xla/client/BUILD @@ -214,17 +214,3 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index fca2bf2688..d02972f2c0 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -48,17 +48,3 @@ cc_library( "//tensorflow/core:lib", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/client/xla_client/BUILD b/tensorflow/compiler/xla/client/xla_client/BUILD index 60f13e04cb..b1dba16856 100644 --- a/tensorflow/compiler/xla/client/xla_client/BUILD +++ b/tensorflow/compiler/xla/client/xla_client/BUILD @@ -76,17 +76,3 @@ tf_cc_test( "//tensorflow/core:test", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/legacy_flags/BUILD b/tensorflow/compiler/xla/legacy_flags/BUILD index 0a9725db0a..89353448e2 100644 --- a/tensorflow/compiler/xla/legacy_flags/BUILD +++ b/tensorflow/compiler/xla/legacy_flags/BUILD @@ -75,17 +75,3 @@ tf_cc_test( "//tensorflow/core:test", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index e2972f0601..0517a5502e 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -72,15 +72,3 @@ tf_py_wrap_cc( "//tensorflow/compiler/xla/service:cpu_plugin", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index bde749d317..b7d1bf64d0 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2651,17 +2651,3 @@ cc_library( "//tensorflow/core:lib", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 0faa9e9c41..966e2d0fc5 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -916,17 +916,3 @@ tf_cc_test( "//tensorflow/core:test", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 93b2f2a474..f1707442fe 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -700,17 +700,3 @@ tf_cc_test( "//tensorflow/core:test", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD index f4c4dcdafd..86c4ac18b0 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD @@ -68,17 +68,3 @@ tf_cc_test( "@llvm//:support", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD index 0db3863f24..4550548495 100644 --- a/tensorflow/compiler/xla/service/interpreter/BUILD +++ b/tensorflow/compiler/xla/service/interpreter/BUILD @@ -120,14 +120,3 @@ cc_library( "//tensorflow/core:stream_executor_no_cuda", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 37261ed1e6..f1e7fc2953 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -169,17 +169,3 @@ cc_library( "@llvm//:core", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 2fd97fa38e..e337669aeb 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1960,17 +1960,3 @@ tf_cc_test( "//tensorflow/core:test", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index 2e55f609d1..0bc4045a54 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -223,17 +223,3 @@ tf_cc_binary( "//tensorflow/core:lib", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/tools/parser/BUILD b/tensorflow/compiler/xla/tools/parser/BUILD index 97aacf6b39..0fa4b98d0a 100644 --- a/tensorflow/compiler/xla/tools/parser/BUILD +++ b/tensorflow/compiler/xla/tools/parser/BUILD @@ -70,17 +70,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index bdbd738906..1ca70e7122 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -159,15 +159,3 @@ cc_library( "//tensorflow/contrib/tpu:all_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/all_reduce/BUILD b/tensorflow/contrib/all_reduce/BUILD index 8dff93b4f8..62d1b1cf07 100644 --- a/tensorflow/contrib/all_reduce/BUILD +++ b/tensorflow/contrib/all_reduce/BUILD @@ -45,16 +45,3 @@ tf_py_test( "//tensorflow/python:state_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/android/BUILD b/tensorflow/contrib/android/BUILD index 4bff3c27d2..60306ebdc6 100644 --- a/tensorflow/contrib/android/BUILD +++ b/tensorflow/contrib/android/BUILD @@ -38,20 +38,6 @@ cc_library( alwayslink = 1, ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # JAR with Java bindings to TF. android_library( name = "android_tensorflow_inference_java", diff --git a/tensorflow/contrib/batching/BUILD b/tensorflow/contrib/batching/BUILD index ee67909133..d65c990c87 100644 --- a/tensorflow/contrib/batching/BUILD +++ b/tensorflow/contrib/batching/BUILD @@ -112,14 +112,3 @@ py_test( "//tensorflow/python:script_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/batching/test_util/BUILD b/tensorflow/contrib/batching/test_util/BUILD index 6db627faad..7cb2d8079b 100644 --- a/tensorflow/contrib/batching/test_util/BUILD +++ b/tensorflow/contrib/batching/test_util/BUILD @@ -8,17 +8,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) - cc_library( name = "fake_clock_env", testonly = 1, diff --git a/tensorflow/contrib/batching/util/BUILD b/tensorflow/contrib/batching/util/BUILD index 2a84a7712a..8f81b6702f 100644 --- a/tensorflow/contrib/batching/util/BUILD +++ b/tensorflow/contrib/batching/util/BUILD @@ -8,18 +8,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "**/google_*", - ], - ), -) - cc_library( name = "periodic_function_dynamic", hdrs = ["periodic_function.h"], diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index a55029b314..5a2d7f6a3c 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -57,15 +57,3 @@ cuda_py_test( "//tensorflow/python:random_seed", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD index 6fdcd0f996..ddeda0079c 100644 --- a/tensorflow/contrib/boosted_trees/BUILD +++ b/tensorflow/contrib/boosted_trees/BUILD @@ -14,15 +14,6 @@ load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = ["**/OWNERS"], - ), - visibility = ["//tensorflow:__subpackages__"], -) - package_group(name = "friends") cc_library( diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index dcd235f876..17e20c4b31 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -10,15 +10,6 @@ package( load("//tensorflow:tensorflow.bzl", "py_test") -filegroup( - name = "all_files", - srcs = glob( - include = ["**/*"], - exclude = ["**/OWNERS"], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "init_py", srcs = ["__init__.py"], diff --git a/tensorflow/contrib/boosted_trees/lib/BUILD b/tensorflow/contrib/boosted_trees/lib/BUILD index 131bd48562..3028c22817 100644 --- a/tensorflow/contrib/boosted_trees/lib/BUILD +++ b/tensorflow/contrib/boosted_trees/lib/BUILD @@ -15,17 +15,6 @@ load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_binary") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # Utils cc_library( diff --git a/tensorflow/contrib/boosted_trees/proto/BUILD b/tensorflow/contrib/boosted_trees/proto/BUILD index 9a61e163eb..b07f0a4314 100644 --- a/tensorflow/contrib/boosted_trees/proto/BUILD +++ b/tensorflow/contrib/boosted_trees/proto/BUILD @@ -4,17 +4,6 @@ exports_files(["LICENSE"]) load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "learner_proto", srcs = [ diff --git a/tensorflow/contrib/boosted_trees/resources/BUILD b/tensorflow/contrib/boosted_trees/resources/BUILD index 9fc101612f..c065186845 100644 --- a/tensorflow/contrib/boosted_trees/resources/BUILD +++ b/tensorflow/contrib/boosted_trees/resources/BUILD @@ -9,17 +9,6 @@ package( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "stamped_resource", hdrs = ["stamped_resource.h"], diff --git a/tensorflow/contrib/cloud/BUILD b/tensorflow/contrib/cloud/BUILD index fe8bd072af..f3a75e8688 100644 --- a/tensorflow/contrib/cloud/BUILD +++ b/tensorflow/contrib/cloud/BUILD @@ -14,18 +14,6 @@ load( "tf_py_test", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_gen_op_libs( op_lib_names = ["bigquery_reader_ops"], deps = [ diff --git a/tensorflow/contrib/cloud/kernels/BUILD b/tensorflow/contrib/cloud/kernels/BUILD index d5fc604de9..ff46f0daa8 100644 --- a/tensorflow/contrib/cloud/kernels/BUILD +++ b/tensorflow/contrib/cloud/kernels/BUILD @@ -20,20 +20,6 @@ load( "tf_proto_library", ) -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_kernel_library( name = "bigquery_reader_ops", srcs = ["bigquery_reader_ops.cc"], diff --git a/tensorflow/contrib/cluster_resolver/BUILD b/tensorflow/contrib/cluster_resolver/BUILD index 1a124eca36..c239e6f8f9 100644 --- a/tensorflow/contrib/cluster_resolver/BUILD +++ b/tensorflow/contrib/cluster_resolver/BUILD @@ -10,19 +10,6 @@ package( licenses(["notice"]) # Apache 2.0 -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) - py_library( name = "cluster_resolver_pip", srcs = [ diff --git a/tensorflow/contrib/coder/BUILD b/tensorflow/contrib/coder/BUILD index ec3d550b70..ce12e38248 100644 --- a/tensorflow/contrib/coder/BUILD +++ b/tensorflow/contrib/coder/BUILD @@ -154,14 +154,3 @@ tf_py_test( ], main = "python/ops/coder_ops_test.py", ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD index 388d8e6ed6..bcee0b04c8 100644 --- a/tensorflow/contrib/compiler/BUILD +++ b/tensorflow/contrib/compiler/BUILD @@ -46,15 +46,3 @@ cuda_py_test( ], xla_enabled = True, ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/copy_graph/BUILD b/tensorflow/contrib/copy_graph/BUILD index 8ec706df74..fa44c4d54e 100644 --- a/tensorflow/contrib/copy_graph/BUILD +++ b/tensorflow/contrib/copy_graph/BUILD @@ -41,15 +41,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/crf/BUILD b/tensorflow/contrib/crf/BUILD index 7aad4abdb9..5c1a17df4f 100644 --- a/tensorflow/contrib/crf/BUILD +++ b/tensorflow/contrib/crf/BUILD @@ -40,15 +40,3 @@ cuda_py_tests( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD index fa86ad38c9..8b5d13f725 100644 --- a/tensorflow/contrib/cudnn_rnn/BUILD +++ b/tensorflow/contrib/cudnn_rnn/BUILD @@ -123,15 +123,3 @@ cuda_py_test( "requires_cudnn5", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 9e25a77d9f..35312f06b3 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -44,17 +44,3 @@ tf_custom_op_library( tf_gen_op_libs( op_lib_names = ["dataset_ops"], ) - -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/data/kernels/BUILD b/tensorflow/contrib/data/kernels/BUILD index c87da7dfaa..83ada6fb67 100644 --- a/tensorflow/contrib/data/kernels/BUILD +++ b/tensorflow/contrib/data/kernels/BUILD @@ -61,14 +61,3 @@ cc_library( "@protobuf_archive//:protobuf_headers", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 0b3bf63f79..0f4c9e48cf 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -513,17 +513,3 @@ tf_py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 647620eb84..236792bb98 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -183,15 +183,3 @@ py_library( "//tensorflow/python/data/util:sparse", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/decision_trees/proto/BUILD b/tensorflow/contrib/decision_trees/proto/BUILD index ae3847b8b6..3b50a48336 100644 --- a/tensorflow/contrib/decision_trees/proto/BUILD +++ b/tensorflow/contrib/decision_trees/proto/BUILD @@ -13,14 +13,6 @@ load( "tf_pyclif_proto_library", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "generic_tree_model", srcs = ["generic_tree_model.proto"], diff --git a/tensorflow/contrib/deprecated/BUILD b/tensorflow/contrib/deprecated/BUILD index 3dfbbf5527..401527f1e7 100644 --- a/tensorflow/contrib/deprecated/BUILD +++ b/tensorflow/contrib/deprecated/BUILD @@ -30,15 +30,3 @@ py_test( "//tensorflow/python:logging_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 682448b84b..231abaa2f3 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -746,18 +746,6 @@ cuda_py_test( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # === Bijector Tests ========================================================== cuda_py_test( diff --git a/tensorflow/contrib/eager/proto/BUILD b/tensorflow/contrib/eager/proto/BUILD index aedfec8924..b016d2dcb5 100644 --- a/tensorflow/contrib/eager/proto/BUILD +++ b/tensorflow/contrib/eager/proto/BUILD @@ -4,17 +4,6 @@ exports_files(["LICENSE"]) load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "checkpointable_object_graph_proto", srcs = [ diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 4fba014d6f..7a8c11e3bb 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -272,16 +272,3 @@ cuda_py_test( ], tags = ["notsan"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index c846343d6d..d125e40f6c 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -9,18 +9,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "estimator_py", srcs = ["__init__.py"], diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index ad8568ad44..0a648d5d40 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -347,16 +347,3 @@ cuda_py_test( ], main = "python/kernel_tests/masked_matmul_benchmark.py", ) - -# All files -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/factorization/examples/BUILD b/tensorflow/contrib/factorization/examples/BUILD index bbe842bd5c..363baa121a 100644 --- a/tensorflow/contrib/factorization/examples/BUILD +++ b/tensorflow/contrib/factorization/examples/BUILD @@ -21,14 +21,3 @@ tf_py_test( ], tags = ["notsan"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/factorization/kernels/BUILD b/tensorflow/contrib/factorization/kernels/BUILD index 44eab56011..ea8b9a17a2 100644 --- a/tensorflow/contrib/factorization/kernels/BUILD +++ b/tensorflow/contrib/factorization/kernels/BUILD @@ -67,14 +67,3 @@ tf_cc_test( "//tensorflow/core:testlib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 3614b2b15a..aab7d0c9e8 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -8,18 +8,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "feature_column_py", srcs = ["__init__.py"], diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD index eccce99071..f7b3273a4d 100644 --- a/tensorflow/contrib/ffmpeg/BUILD +++ b/tensorflow/contrib/ffmpeg/BUILD @@ -180,15 +180,3 @@ py_library( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/ffmpeg/default/BUILD b/tensorflow/contrib/ffmpeg/default/BUILD index 6b455567d7..59bad8982d 100644 --- a/tensorflow/contrib/ffmpeg/default/BUILD +++ b/tensorflow/contrib/ffmpeg/default/BUILD @@ -74,15 +74,3 @@ tf_cc_test( "//tensorflow/core:test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index ac043fda06..b1c8ad49ea 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -321,15 +321,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD index ce37672895..0eb6889db1 100644 --- a/tensorflow/contrib/fused_conv/BUILD +++ b/tensorflow/contrib/fused_conv/BUILD @@ -157,15 +157,3 @@ cuda_py_test( "requires_cudnn6", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index 0eb0e3cbe2..9e56d3c039 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -544,15 +544,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD index 707ae25d48..e534fdc177 100644 --- a/tensorflow/contrib/gdr/BUILD +++ b/tensorflow/contrib/gdr/BUILD @@ -9,18 +9,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "c_srcs", data = glob([ diff --git a/tensorflow/contrib/graph_editor/BUILD b/tensorflow/contrib/graph_editor/BUILD index 967ad2fc09..1711100e3a 100644 --- a/tensorflow/contrib/graph_editor/BUILD +++ b/tensorflow/contrib/graph_editor/BUILD @@ -39,18 +39,6 @@ py_library( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "match", srcs = ["tests/match.py"], diff --git a/tensorflow/contrib/grid_rnn/BUILD b/tensorflow/contrib/grid_rnn/BUILD index d601a1ec6f..d0b4464066 100644 --- a/tensorflow/contrib/grid_rnn/BUILD +++ b/tensorflow/contrib/grid_rnn/BUILD @@ -41,15 +41,3 @@ cuda_py_tests( "//tensorflow/python:variables", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/hooks/BUILD b/tensorflow/contrib/hooks/BUILD index 1b528d7afc..d65b2d6026 100644 --- a/tensorflow/contrib/hooks/BUILD +++ b/tensorflow/contrib/hooks/BUILD @@ -23,14 +23,3 @@ py_library( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD b/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD index 324035100d..e39c60b252 100644 --- a/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD +++ b/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD @@ -13,18 +13,6 @@ exports_files(["LICENSE"]) package(default_visibility = ["//visibility:public"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_cc_binary( name = "clock_cycle_profiling", testonly = 1, diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD b/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD index 909dc396a3..0081fb6177 100644 --- a/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD +++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD @@ -10,17 +10,6 @@ exports_files(["LICENSE"]) load("//tensorflow:tensorflow.bzl", "tf_cc_binary") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) - tf_cc_binary( name = "hvx_ops_support_checker", testonly = 1, diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index 79eb3762ed..da450480b3 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -384,15 +384,3 @@ cuda_py_test( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/input_pipeline/BUILD b/tensorflow/contrib/input_pipeline/BUILD index 9d6b4d5d87..0e34315db4 100644 --- a/tensorflow/contrib/input_pipeline/BUILD +++ b/tensorflow/contrib/input_pipeline/BUILD @@ -114,14 +114,3 @@ tf_cc_tests( "//tensorflow/core:testlib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/input_pipeline/kernels/BUILD b/tensorflow/contrib/input_pipeline/kernels/BUILD index f20a6e38d4..797605b8fe 100644 --- a/tensorflow/contrib/input_pipeline/kernels/BUILD +++ b/tensorflow/contrib/input_pipeline/kernels/BUILD @@ -17,14 +17,3 @@ cc_library( ], alwayslink = 1, ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/integrate/BUILD b/tensorflow/contrib/integrate/BUILD index 66948c1ea1..0b7d64f4ed 100644 --- a/tensorflow/contrib/integrate/BUILD +++ b/tensorflow/contrib/integrate/BUILD @@ -42,14 +42,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index 1c3974871c..3913c9dc7a 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -119,17 +119,3 @@ tf_py_test( "notap", ], ) - -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/keras/BUILD b/tensorflow/contrib/keras/BUILD index 7e0019ce4a..7a4cab20d1 100644 --- a/tensorflow/contrib/keras/BUILD +++ b/tensorflow/contrib/keras/BUILD @@ -52,15 +52,3 @@ py_library( "//tensorflow/python/keras", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/kernel_methods/BUILD b/tensorflow/contrib/kernel_methods/BUILD index eff7dfeb4c..87c2dcd89b 100644 --- a/tensorflow/contrib/kernel_methods/BUILD +++ b/tensorflow/contrib/kernel_methods/BUILD @@ -90,15 +90,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/kfac/BUILD b/tensorflow/contrib/kfac/BUILD index 9a5759bf14..b719046b37 100644 --- a/tensorflow/contrib/kfac/BUILD +++ b/tensorflow/contrib/kfac/BUILD @@ -24,15 +24,3 @@ py_library( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/kfac/examples/BUILD b/tensorflow/contrib/kfac/examples/BUILD index 89965eda37..7dd40c19c5 100644 --- a/tensorflow/contrib/kfac/examples/BUILD +++ b/tensorflow/contrib/kfac/examples/BUILD @@ -58,15 +58,3 @@ py_library( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/kfac/examples/tests/BUILD b/tensorflow/contrib/kfac/examples/tests/BUILD index ce7da95c12..ede7f183fe 100644 --- a/tensorflow/contrib/kfac/examples/tests/BUILD +++ b/tensorflow/contrib/kfac/examples/tests/BUILD @@ -50,15 +50,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index 146ae8b7e2..f73c24f8fb 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -155,15 +155,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD index d721ad08af..b897fd68a0 100644 --- a/tensorflow/contrib/kfac/python/ops/BUILD +++ b/tensorflow/contrib/kfac/python/ops/BUILD @@ -244,15 +244,3 @@ py_library( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 894e6f6946..18b265ae80 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -213,14 +213,3 @@ py_test( "//tensorflow/python:math_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index 852d06e1e3..4be55468db 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -390,15 +390,3 @@ py_test( "//tensorflow/python:variables", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/layers/kernels/BUILD b/tensorflow/contrib/layers/kernels/BUILD index e407a9ce01..7aae09ff3e 100644 --- a/tensorflow/contrib/layers/kernels/BUILD +++ b/tensorflow/contrib/layers/kernels/BUILD @@ -18,14 +18,3 @@ cc_library( ], alwayslink = 1, ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 9c59150580..924918be4f 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -873,15 +873,3 @@ py_binary( "//tensorflow/python:platform", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/learn/python/learn/datasets/BUILD b/tensorflow/contrib/learn/python/learn/datasets/BUILD index 8bf372841d..2c7215bba3 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/BUILD +++ b/tensorflow/contrib/learn/python/learn/datasets/BUILD @@ -44,18 +44,6 @@ py_binary( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_test( name = "base_test", size = "small", diff --git a/tensorflow/contrib/legacy_seq2seq/BUILD b/tensorflow/contrib/legacy_seq2seq/BUILD index 1fa55132b1..8c2c4fd29c 100644 --- a/tensorflow/contrib/legacy_seq2seq/BUILD +++ b/tensorflow/contrib/legacy_seq2seq/BUILD @@ -60,15 +60,3 @@ cuda_py_tests( ], tags = ["noasan"], # times out b/63678675 ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/libsvm/BUILD b/tensorflow/contrib/libsvm/BUILD index df96402a4f..4dccb9be7c 100644 --- a/tensorflow/contrib/libsvm/BUILD +++ b/tensorflow/contrib/libsvm/BUILD @@ -88,15 +88,3 @@ tf_py_test( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD index 359255374d..a7812f74d1 100644 --- a/tensorflow/contrib/linalg/BUILD +++ b/tensorflow/contrib/linalg/BUILD @@ -61,15 +61,3 @@ cuda_py_test( shard_count = 4, tags = ["noasan"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/linear_optimizer/BUILD b/tensorflow/contrib/linear_optimizer/BUILD index cea3627ed5..5b89c6cef9 100644 --- a/tensorflow/contrib/linear_optimizer/BUILD +++ b/tensorflow/contrib/linear_optimizer/BUILD @@ -138,14 +138,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 18efa64507..ac269d540a 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -271,18 +271,3 @@ cc_test( # ], # }), #) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "downloads", - "examples", - "gen", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/examples/label_image/BUILD b/tensorflow/contrib/lite/examples/label_image/BUILD index 959347b549..9322e186a2 100644 --- a/tensorflow/contrib/lite/examples/label_image/BUILD +++ b/tensorflow/contrib/lite/examples/label_image/BUILD @@ -69,15 +69,3 @@ cc_library( # "//testing/base/public:gunit", # ], # ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index f52d6ba6c5..7f7a2632dd 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -167,15 +167,3 @@ tflite_jni_binary( "//tensorflow/contrib/lite/java/src/main/native", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD index 5eb749aae6..d6fbef9cc9 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/BUILD @@ -27,15 +27,3 @@ android_binary( "@androidsdk//com.android.support:support-v4-25.2.0", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD index dd0cd6c98f..ce68160b68 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD +++ b/tensorflow/contrib/lite/java/demo/app/src/main/assets/BUILD @@ -10,15 +10,3 @@ exports_files( ], ), ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/java/src/main/native/BUILD b/tensorflow/contrib/lite/java/src/main/native/BUILD index 3571182ca9..4399ed2025 100644 --- a/tensorflow/contrib/lite/java/src/main/native/BUILD +++ b/tensorflow/contrib/lite/java/src/main/native/BUILD @@ -95,15 +95,3 @@ exports_files( "version_script.lds", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD index 2b4f37bc6c..b524246d43 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/BUILD @@ -16,15 +16,3 @@ android_library( "//tensorflow/contrib/lite/java:tensorflowlite_java", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 1450c1e14b..058f995d75 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -911,16 +911,4 @@ tf_cc_test( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index aa3957bee1..167c0f1fde 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -431,15 +431,3 @@ cc_library( ) exports_files(["optimized/eigen_tensor_reduced_instantiations_oss.h"]) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/models/BUILD b/tensorflow/contrib/lite/models/BUILD index 6a1255b586..efa47b06fa 100644 --- a/tensorflow/contrib/lite/models/BUILD +++ b/tensorflow/contrib/lite/models/BUILD @@ -12,15 +12,3 @@ load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") exports_files(glob([ "testdata/*", ])) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/models/smartreply/BUILD b/tensorflow/contrib/lite/models/smartreply/BUILD index 733c3f4c7f..a82d1f2eb6 100644 --- a/tensorflow/contrib/lite/models/smartreply/BUILD +++ b/tensorflow/contrib/lite/models/smartreply/BUILD @@ -86,15 +86,3 @@ cc_test( "@com_google_googletest//:gtest", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/nnapi/BUILD b/tensorflow/contrib/lite/nnapi/BUILD index 402f1e949b..467a2b7a7b 100644 --- a/tensorflow/contrib/lite/nnapi/BUILD +++ b/tensorflow/contrib/lite/nnapi/BUILD @@ -11,15 +11,3 @@ cc_library( ], linkopts = ["-ldl"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 411d5c0d27..e70aa51298 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -118,15 +118,3 @@ py_library( ":convert_saved_model", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index da65ec659c..246ec85fe4 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -70,16 +70,4 @@ cc_test( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 555ea90034..10e810a6e0 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -373,16 +373,4 @@ tf_cc_test( }), ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 8ed3e0e14e..bba61627f9 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -420,15 +420,3 @@ tf_cc_test( "@com_google_googletest//:gtest_main", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD index b975cc996b..a2008ddbdb 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -39,15 +39,3 @@ tf_cc_test( "@com_google_googletest//:gtest_main", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 17115047d2..5a40451b3a 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -63,15 +63,3 @@ tf_py_test( ], tags = ["no_pip"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD index 0c1a1141fc..336e94de1e 100644 --- a/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD +++ b/tensorflow/contrib/lite/toco/tensorflow_graph_matching/BUILD @@ -88,15 +88,3 @@ cc_library( "//tensorflow/core:protos_all_cc", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/toco/tflite/BUILD b/tensorflow/contrib/lite/toco/tflite/BUILD index 9d3e1daf12..e0191801a0 100644 --- a/tensorflow/contrib/lite/toco/tflite/BUILD +++ b/tensorflow/contrib/lite/toco/tflite/BUILD @@ -137,15 +137,3 @@ tf_cc_test( "@flatbuffers", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index b5abbc0712..44fde69a1e 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -91,18 +91,6 @@ cc_library( deps = ["//tensorflow/contrib/lite:framework"], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "verifier", srcs = ["verifier.cc"], diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 8ca03f4193..02b4f80252 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -47,15 +47,3 @@ tf_py_test( ], grpc_enabled = True, ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/losses/BUILD b/tensorflow/contrib/losses/BUILD index 5694211521..728f75f8ef 100644 --- a/tensorflow/contrib/losses/BUILD +++ b/tensorflow/contrib/losses/BUILD @@ -97,15 +97,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/makefile/BUILD b/tensorflow/contrib/makefile/BUILD index 701eeb44fe..1abb46f4d4 100644 --- a/tensorflow/contrib/makefile/BUILD +++ b/tensorflow/contrib/makefile/BUILD @@ -3,12 +3,3 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = ["**/OWNERS"], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/memory_stats/BUILD b/tensorflow/contrib/memory_stats/BUILD index 72424c32e7..63843b993c 100644 --- a/tensorflow/contrib/memory_stats/BUILD +++ b/tensorflow/contrib/memory_stats/BUILD @@ -79,15 +79,3 @@ cuda_py_test( "//tensorflow/python:random_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/meta_graph_transform/BUILD b/tensorflow/contrib/meta_graph_transform/BUILD index 4b5b1c3e15..24400789f8 100644 --- a/tensorflow/contrib/meta_graph_transform/BUILD +++ b/tensorflow/contrib/meta_graph_transform/BUILD @@ -59,15 +59,3 @@ filegroup( "**/*.py", ]), ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/metrics/BUILD b/tensorflow/contrib/metrics/BUILD index e90c525113..5ca42f41c1 100644 --- a/tensorflow/contrib/metrics/BUILD +++ b/tensorflow/contrib/metrics/BUILD @@ -97,14 +97,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/model_pruning/BUILD b/tensorflow/contrib/model_pruning/BUILD index ca3f13479e..f50575b2cf 100644 --- a/tensorflow/contrib/model_pruning/BUILD +++ b/tensorflow/contrib/model_pruning/BUILD @@ -125,15 +125,3 @@ py_library( ":rnn_cells", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/model_pruning/examples/cifar10/BUILD b/tensorflow/contrib/model_pruning/examples/cifar10/BUILD index e7848adcc5..30ea912222 100644 --- a/tensorflow/contrib/model_pruning/examples/cifar10/BUILD +++ b/tensorflow/contrib/model_pruning/examples/cifar10/BUILD @@ -68,15 +68,3 @@ py_binary( "//tensorflow/contrib/model_pruning:pruning", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/mpi_collectives/BUILD b/tensorflow/contrib/mpi_collectives/BUILD index 9f9802b8fe..a7be92a35e 100644 --- a/tensorflow/contrib/mpi_collectives/BUILD +++ b/tensorflow/contrib/mpi_collectives/BUILD @@ -126,15 +126,3 @@ tf_py_test( ], tags = ["manual"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index 94d01efee1..6cbfd03881 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -141,15 +141,3 @@ cuda_py_test( "notap", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/nearest_neighbor/BUILD b/tensorflow/contrib/nearest_neighbor/BUILD index 9500c18b1d..6fa7624467 100644 --- a/tensorflow/contrib/nearest_neighbor/BUILD +++ b/tensorflow/contrib/nearest_neighbor/BUILD @@ -111,15 +111,3 @@ tf_py_test( "//tensorflow/python:client_testlib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/nn/BUILD b/tensorflow/contrib/nn/BUILD index 5543eb6c6e..ef7ab22646 100644 --- a/tensorflow/contrib/nn/BUILD +++ b/tensorflow/contrib/nn/BUILD @@ -98,14 +98,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index bacf15bbd6..c57c5e3f29 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -265,14 +265,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/periodic_resample/BUILD b/tensorflow/contrib/periodic_resample/BUILD index bd9078ae76..6ca7fe8b6e 100644 --- a/tensorflow/contrib/periodic_resample/BUILD +++ b/tensorflow/contrib/periodic_resample/BUILD @@ -94,18 +94,6 @@ py_test( # srcs_version = "PY2AND3", # ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "custom_op_sources", srcs = glob( diff --git a/tensorflow/contrib/predictor/BUILD b/tensorflow/contrib/predictor/BUILD index a80f060b91..36e21af618 100644 --- a/tensorflow/contrib/predictor/BUILD +++ b/tensorflow/contrib/predictor/BUILD @@ -8,18 +8,6 @@ exports_files(["LICENSE"]) load("//tensorflow:tensorflow.bzl", "py_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "predictor", srcs = ["__init__.py"], diff --git a/tensorflow/contrib/quantization/BUILD b/tensorflow/contrib/quantization/BUILD index c19a31afb2..2de10e8fae 100644 --- a/tensorflow/contrib/quantization/BUILD +++ b/tensorflow/contrib/quantization/BUILD @@ -49,15 +49,3 @@ filegroup( "**/*.py", ]), ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD index 0b76296204..b9918fdee1 100644 --- a/tensorflow/contrib/quantize/BUILD +++ b/tensorflow/contrib/quantize/BUILD @@ -246,15 +246,3 @@ py_test( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/receptive_field/BUILD b/tensorflow/contrib/receptive_field/BUILD index e975aeaea7..9325a14745 100644 --- a/tensorflow/contrib/receptive_field/BUILD +++ b/tensorflow/contrib/receptive_field/BUILD @@ -106,15 +106,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/reduce_slice_ops/BUILD b/tensorflow/contrib/reduce_slice_ops/BUILD index b31f4488f5..02b3d66e46 100644 --- a/tensorflow/contrib/reduce_slice_ops/BUILD +++ b/tensorflow/contrib/reduce_slice_ops/BUILD @@ -101,15 +101,3 @@ tf_cc_test( "//tensorflow/core:testlib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 27f0a7f58f..996b55f9b8 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -48,15 +48,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/resampler/BUILD b/tensorflow/contrib/resampler/BUILD index f0ecc8b85a..48345d7030 100644 --- a/tensorflow/contrib/resampler/BUILD +++ b/tensorflow/contrib/resampler/BUILD @@ -85,14 +85,3 @@ cuda_py_test( "//tensorflow/python:array_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD index 7e5e35d0b5..43c0f75955 100644 --- a/tensorflow/contrib/rnn/BUILD +++ b/tensorflow/contrib/rnn/BUILD @@ -321,19 +321,6 @@ tf_cc_test( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "tools/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_gen_op_libs( op_lib_names = [ "lstm_ops", diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index 245fe07f2b..faad40d335 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -81,15 +81,3 @@ py_test( "//tensorflow/python/saved_model:utils", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/saved_model/cc/saved_model/BUILD b/tensorflow/contrib/saved_model/cc/saved_model/BUILD index ea4da80ba3..3c616c555b 100644 --- a/tensorflow/contrib/saved_model/cc/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/cc/saved_model/BUILD @@ -49,9 +49,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -filegroup( - name = "all_files", - srcs = glob(["*"]), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/seq2seq/BUILD b/tensorflow/contrib/seq2seq/BUILD index ab80c68b1a..a62069a252 100644 --- a/tensorflow/contrib/seq2seq/BUILD +++ b/tensorflow/contrib/seq2seq/BUILD @@ -211,15 +211,3 @@ cuda_py_test( "//tensorflow/python:variables", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 75a753ed89..31717305e7 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -17,18 +17,6 @@ load( "tf_cc_test", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), -) - # TODO(b/32673259): add a test to continuously validate these files. filegroup( name = "session_bundle_half_plus_two", diff --git a/tensorflow/contrib/session_bundle/example/BUILD b/tensorflow/contrib/session_bundle/example/BUILD index dbbae01f36..9a56eab431 100644 --- a/tensorflow/contrib/session_bundle/example/BUILD +++ b/tensorflow/contrib/session_bundle/example/BUILD @@ -10,19 +10,6 @@ exports_files(["LICENSE"]) # vardef("PYTHON_BIN_PATH", "/usr/bin/python") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), - visibility = ["//visibility:public"], -) - py_binary( name = "export_half_plus_two", srcs = [ diff --git a/tensorflow/contrib/signal/BUILD b/tensorflow/contrib/signal/BUILD index a83fc20596..fdecceff52 100644 --- a/tensorflow/contrib/signal/BUILD +++ b/tensorflow/contrib/signal/BUILD @@ -130,15 +130,3 @@ cuda_py_tests( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/slim/BUILD b/tensorflow/contrib/slim/BUILD index c2f106c2b2..516e3ea073 100644 --- a/tensorflow/contrib/slim/BUILD +++ b/tensorflow/contrib/slim/BUILD @@ -178,15 +178,3 @@ py_test( "//tensorflow/python:summary", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 5daabbd62e..dc12e67fc6 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -193,15 +193,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/slim/python/slim/nets/BUILD b/tensorflow/contrib/slim/python/slim/nets/BUILD index 7f03aaf085..8bbdf96384 100644 --- a/tensorflow/contrib/slim/python/slim/nets/BUILD +++ b/tensorflow/contrib/slim/python/slim/nets/BUILD @@ -317,15 +317,3 @@ py_test( "//tensorflow/python:variables", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/solvers/BUILD b/tensorflow/contrib/solvers/BUILD index 87b67486ad..5247288d54 100644 --- a/tensorflow/contrib/solvers/BUILD +++ b/tensorflow/contrib/solvers/BUILD @@ -93,16 +93,3 @@ cuda_py_test( "//tensorflow/python:platform_test", ], ) - -# All files -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/sparsemax/BUILD b/tensorflow/contrib/sparsemax/BUILD index fcfaa2aba4..b729fff261 100644 --- a/tensorflow/contrib/sparsemax/BUILD +++ b/tensorflow/contrib/sparsemax/BUILD @@ -65,15 +65,3 @@ cuda_py_tests( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/specs/BUILD b/tensorflow/contrib/specs/BUILD index 084953a0a2..055b04db8a 100644 --- a/tensorflow/contrib/specs/BUILD +++ b/tensorflow/contrib/specs/BUILD @@ -60,15 +60,3 @@ tf_py_test( "//tensorflow/python:variables", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/staging/BUILD b/tensorflow/contrib/staging/BUILD index bc4a289468..0c86f3db1d 100644 --- a/tensorflow/contrib/staging/BUILD +++ b/tensorflow/contrib/staging/BUILD @@ -6,18 +6,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "staging", srcs = ["__init__.py"], diff --git a/tensorflow/contrib/stat_summarizer/BUILD b/tensorflow/contrib/stat_summarizer/BUILD index 5fd02efbf6..d4096751c4 100644 --- a/tensorflow/contrib/stat_summarizer/BUILD +++ b/tensorflow/contrib/stat_summarizer/BUILD @@ -32,15 +32,3 @@ tf_py_test( "//tensorflow/python:variables", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD index 6e259e1d32..dcbef2881d 100644 --- a/tensorflow/contrib/stateless/BUILD +++ b/tensorflow/contrib/stateless/BUILD @@ -38,15 +38,3 @@ cuda_py_test( "//tensorflow/python:random_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD index 80563c5e15..fda1367b15 100644 --- a/tensorflow/contrib/summary/BUILD +++ b/tensorflow/contrib/summary/BUILD @@ -83,18 +83,6 @@ py_library( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # NOTE: target cannot be testonly because it needs to be in the pip # package. Sigh. py_library( diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 1e4cc3f095..11a59ec22b 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -16,20 +16,6 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "kernels/v4/*", - "proto/*", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # ---------------------------------- V2 ops ------------------------------------------# filegroup( name = "v2_op_sources", diff --git a/tensorflow/contrib/tensor_forest/hybrid/BUILD b/tensorflow/contrib/tensor_forest/hybrid/BUILD index a2a3b485f6..b7185e09c7 100644 --- a/tensorflow/contrib/tensor_forest/hybrid/BUILD +++ b/tensorflow/contrib/tensor_forest/hybrid/BUILD @@ -11,18 +11,6 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "custom_op_sources", srcs = glob( diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/BUILD b/tensorflow/contrib/tensor_forest/kernels/v4/BUILD index 794b76d858..b1b1559383 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/BUILD +++ b/tensorflow/contrib/tensor_forest/kernels/v4/BUILD @@ -11,11 +11,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob(["**/*"]), -) - DECISION_TREE_RESOURCE_DEPS = [ ":decision_node_evaluator", ":input_data", diff --git a/tensorflow/contrib/tensor_forest/proto/BUILD b/tensorflow/contrib/tensor_forest/proto/BUILD index 1cfef44af1..04fd6a9839 100644 --- a/tensorflow/contrib/tensor_forest/proto/BUILD +++ b/tensorflow/contrib/tensor_forest/proto/BUILD @@ -6,14 +6,6 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") package(default_visibility = ["//visibility:public"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "fertile_stats_proto", srcs = ["fertile_stats.proto"], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index d833744d0c..f4efd9717d 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -88,15 +88,3 @@ py_test( "//tensorflow/python:platform", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index 4175d8adb5..3f6b4cdc9a 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -135,9 +135,3 @@ tf_cc_binary( "//tensorflow/core/lib/db:sqlite", ], ) - -filegroup( - name = "all_files", - srcs = glob(["*"]), - visibility = ["//tensorflow:__pkg__"], -) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 906cc3f034..2f316767b3 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -272,15 +272,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/testing/BUILD b/tensorflow/contrib/testing/BUILD index 0be6aa755b..8a40e111d7 100644 --- a/tensorflow/contrib/testing/BUILD +++ b/tensorflow/contrib/testing/BUILD @@ -22,15 +22,3 @@ py_library( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/text/BUILD b/tensorflow/contrib/text/BUILD index 698fdd830f..38d91f7e49 100644 --- a/tensorflow/contrib/text/BUILD +++ b/tensorflow/contrib/text/BUILD @@ -111,14 +111,3 @@ py_test( "//tensorflow/python:training", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/tfprof/BUILD b/tensorflow/contrib/tfprof/BUILD index 28adce71d4..e7f4ebdd36 100644 --- a/tensorflow/contrib/tfprof/BUILD +++ b/tensorflow/contrib/tfprof/BUILD @@ -20,15 +20,3 @@ py_library( "//tensorflow/python/profiler:tfprof_logger", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/timeseries/BUILD b/tensorflow/contrib/timeseries/BUILD index 6ba069778c..f2b8786a52 100644 --- a/tensorflow/contrib/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/BUILD @@ -31,15 +31,3 @@ py_library( "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:test_utils", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index bb86ecb220..40cf9147b3 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -106,15 +106,3 @@ py_test( "//tensorflow/python/estimator:estimator_py", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index ed3ed4c0e1..55a25e39fe 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -442,15 +442,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index c86d06e923..ca25ccd2b8 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -268,15 +268,3 @@ py_library( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 95dc6f5ced..3e32a7a85c 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -283,16 +283,3 @@ tf_py_test( "//tensorflow/python:framework_test_lib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index 56ddd7eff1..1c32993e8e 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -6,18 +6,6 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library_cc") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "tpu_profiler_proto", srcs = ["tpu_profiler.proto"], diff --git a/tensorflow/contrib/tpu/proto/BUILD b/tensorflow/contrib/tpu/proto/BUILD index e166098567..fcfbbe1a21 100644 --- a/tensorflow/contrib/tpu/proto/BUILD +++ b/tensorflow/contrib/tpu/proto/BUILD @@ -4,17 +4,6 @@ exports_files(["LICENSE"]) load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "tpu_embedding_config_proto", srcs = [ diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD index 6ae2f38252..4d2bfd3e43 100644 --- a/tensorflow/contrib/training/BUILD +++ b/tensorflow/contrib/training/BUILD @@ -308,18 +308,6 @@ py_test( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_proto_library( name = "protos_all", srcs = glob(["**/*.proto"]), diff --git a/tensorflow/contrib/util/BUILD b/tensorflow/contrib/util/BUILD index 6c766e4f1c..d9ccda8e89 100644 --- a/tensorflow/contrib/util/BUILD +++ b/tensorflow/contrib/util/BUILD @@ -75,15 +75,3 @@ py_library( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD index 80a5d07ea4..9720fd6e86 100644 --- a/tensorflow/contrib/verbs/BUILD +++ b/tensorflow/contrib/verbs/BUILD @@ -11,18 +11,6 @@ load("//tensorflow:tensorflow.bzl", "tf_cuda_library") exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "c_srcs", data = glob([ diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 712106492b..d46241450c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -149,6 +149,8 @@ load( "if_mkl", ) +exports_files(["ops/ops.pbtxt"]) + # ----------------------------------------------------------------------------- # Public targets @@ -3851,18 +3853,6 @@ cc_library( # ----------------------------------------------------------------------------- # Google-internal targets go here (must be at the end). -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - alias( name = "android_srcs_no_runtime", actual = ":mobile_srcs_no_runtime", diff --git a/tensorflow/core/api_def/BUILD b/tensorflow/core/api_def/BUILD index 58dbac4e8e..19d6438809 100644 --- a/tensorflow/core/api_def/BUILD +++ b/tensorflow/core/api_def/BUILD @@ -17,18 +17,6 @@ load( "tf_cc_test", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "base_api_def", srcs = glob(["base_api/*"]), diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 9e8baab618..941a0e61c7 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -135,21 +135,6 @@ tf_cc_test( ], ) -# ----------------------------------------------------------------------------- -# Google-internal targets. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "execute", srcs = ["execute.cc"], diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index f6fe9edb02..5fab740e92 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -339,18 +339,3 @@ cc_library( # ], # visibility = ["//visibility:public"], # ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 434626bd2d..b07cb8cdcb 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -7,18 +7,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "c_srcs", data = glob([ diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD index 9dae1b9859..9c655bfa31 100644 --- a/tensorflow/core/distributed_runtime/rpc/BUILD +++ b/tensorflow/core/distributed_runtime/rpc/BUILD @@ -5,18 +5,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "c_srcs", data = glob([ diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index 2ca9b720ee..9dcc6765f5 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -3,18 +3,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "op_types", srcs = ["op_types.cc"], diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index b653f902e8..9ecf5a6cf7 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -8,18 +8,6 @@ load( "tf_cuda_tests_tags", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - config_setting( name = "xsmm", licenses = ["notice"], diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index df5a26f475..33949319d5 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -6,18 +6,6 @@ load( "tf_protos_grappler", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "graph_properties_testdata", srcs = glob([ diff --git a/tensorflow/core/grappler/inputs/BUILD b/tensorflow/core/grappler/inputs/BUILD index b683216590..ffa204028c 100644 --- a/tensorflow/core/grappler/inputs/BUILD +++ b/tensorflow/core/grappler/inputs/BUILD @@ -2,18 +2,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "utils", srcs = [ diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 19ff788aba..0d3a488f85 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -12,18 +12,6 @@ load( "tf_protos_grappler", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "static_schedule", srcs = ["static_schedule.cc"], diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 939031c44b..baf24c2505 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -2,18 +2,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "scc", srcs = ["scc.cc"], diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b469c01881..ca54978421 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6147,18 +6147,6 @@ tf_kernel_library( # ----------------------------------------------------------------------------- # Google-internal targets. These must be at the end for syncrepo. -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - # Library to link with when compiling the cwise_op kernels directly, # e.g. for selective registration. # should not be linked by projects that also link the cwise_op library. diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD index 4397410a5c..de05c647d6 100644 --- a/tensorflow/core/kernels/batching_util/BUILD +++ b/tensorflow/core/kernels/batching_util/BUILD @@ -8,18 +8,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "**/google_*", - ], - ), -) - cc_library( name = "periodic_function_dynamic", srcs = ["periodic_function.cc"], diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index a8784e3656..8c4f0218ee 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -13,18 +13,6 @@ load( "tf_cc_test", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "stats_aggregator", hdrs = ["stats_aggregator.h"], diff --git a/tensorflow/core/kernels/data/sql/BUILD b/tensorflow/core/kernels/data/sql/BUILD index f4698bdaf7..dc59120875 100644 --- a/tensorflow/core/kernels/data/sql/BUILD +++ b/tensorflow/core/kernels/data/sql/BUILD @@ -7,18 +7,6 @@ package( licenses(["notice"]) # Apache 2.0 -filegroup( - name = "all_files", - srcs = glob( - include = ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "sql", srcs = [ diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD index 9a7eca03ce..aab4b009b5 100644 --- a/tensorflow/core/kernels/fuzzing/BUILD +++ b/tensorflow/core/kernels/fuzzing/BUILD @@ -17,18 +17,6 @@ cc_library( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - load("//tensorflow/core/kernels/fuzzing:tf_ops_fuzz_target_lib.bzl", "tf_ops_fuzz_target_lib") tf_ops_fuzz_target_lib("identity") diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 7688305019..4870d9ae20 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -13,18 +13,6 @@ load( "tf_kernel_library", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_cc_test( name = "graph_transferer_test", size = "small", diff --git a/tensorflow/core/kernels/neon/BUILD b/tensorflow/core/kernels/neon/BUILD index c3d24e50ef..313d40c082 100644 --- a/tensorflow/core/kernels/neon/BUILD +++ b/tensorflow/core/kernels/neon/BUILD @@ -12,18 +12,6 @@ load( "tf_kernel_library", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_kernel_library( name = "neon_depthwise_conv_op", hdrs = [ diff --git a/tensorflow/core/lib/db/BUILD b/tensorflow/core/lib/db/BUILD index 9ff87e8d66..ce09c2009a 100644 --- a/tensorflow/core/lib/db/BUILD +++ b/tensorflow/core/lib/db/BUILD @@ -42,9 +42,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -filegroup( - name = "all_files", - srcs = glob(["*"]), - visibility = ["//tensorflow:__pkg__"], -) diff --git a/tensorflow/core/ops/compat/BUILD b/tensorflow/core/ops/compat/BUILD index 6cdb1586bc..c613ab144f 100644 --- a/tensorflow/core/ops/compat/BUILD +++ b/tensorflow/core/ops/compat/BUILD @@ -57,18 +57,3 @@ tf_cc_binary( "//tensorflow/core:lib", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 21636641e7..3ee7be3c4e 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -14,20 +14,6 @@ load( "if_windows", ) -filegroup( - name = "all_files", - srcs = glob( - include = [ - "**/*", - ], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "expiring_lru_cache", hdrs = ["expiring_lru_cache.h"], diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index afb1d84d14..447056eb4b 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -223,15 +223,3 @@ alias( actual = ":mobile_srcs", visibility = ["//visibility:public"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/core/platform/hadoop/BUILD b/tensorflow/core/platform/hadoop/BUILD index 774a439855..7c38c399bd 100644 --- a/tensorflow/core/platform/hadoop/BUILD +++ b/tensorflow/core/platform/hadoop/BUILD @@ -12,18 +12,6 @@ load( "tf_cc_test", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "hadoop_file_system", srcs = ["hadoop_file_system.cc"], diff --git a/tensorflow/core/platform/s3/BUILD b/tensorflow/core/platform/s3/BUILD index 3a0ad2e9bd..21038cfeb1 100644 --- a/tensorflow/core/platform/s3/BUILD +++ b/tensorflow/core/platform/s3/BUILD @@ -13,18 +13,6 @@ load( "tf_cc_test", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - tf_cc_binary( name = "s3_file_system.so", srcs = [ diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD index 5ce6f1046d..3d3203cdaa 100644 --- a/tensorflow/core/profiler/BUILD +++ b/tensorflow/core/profiler/BUILD @@ -4,21 +4,6 @@ package( licenses(["notice"]) # Apache 2.0 -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - load("//tensorflow:tensorflow.bzl", "tf_cc_binary") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos") diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD index 05a798bff8..8dcfde9a2a 100644 --- a/tensorflow/core/profiler/internal/BUILD +++ b/tensorflow/core/profiler/internal/BUILD @@ -365,17 +365,3 @@ cc_library( "//tensorflow/core:regexp_internal", ], ) -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/core/profiler/internal/advisor/BUILD b/tensorflow/core/profiler/internal/advisor/BUILD index 40cfd1e12e..1fedb05ae3 100644 --- a/tensorflow/core/profiler/internal/advisor/BUILD +++ b/tensorflow/core/profiler/internal/advisor/BUILD @@ -73,18 +73,3 @@ tf_cc_test( "//tensorflow/core/profiler/internal:tfprof_tf_testlib", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/core/util/ctc/BUILD b/tensorflow/core/util/ctc/BUILD index 1521349e4d..317420204e 100644 --- a/tensorflow/core/util/ctc/BUILD +++ b/tensorflow/core/util/ctc/BUILD @@ -26,18 +26,6 @@ alias( actual = ":mobile_srcs", ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "ctc", deps = [ diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD index 166bd0f659..648358606c 100644 --- a/tensorflow/core/util/tensor_bundle/BUILD +++ b/tensorflow/core/util/tensor_bundle/BUILD @@ -75,18 +75,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/adding_an_op/BUILD b/tensorflow/examples/adding_an_op/BUILD index b3ed6589ed..cf8054be6a 100644 --- a/tensorflow/examples/adding_an_op/BUILD +++ b/tensorflow/examples/adding_an_op/BUILD @@ -139,15 +139,3 @@ tf_cc_binary( "//tensorflow/core:framework", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index 1214647797..a088d7cf2f 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -100,22 +100,6 @@ filegroup( ) # LINT.ThenChange(//tensorflow/examples/android/download-models.gradle) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - "gradleBuild/**", - "libs/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - filegroup( name = "java_files", srcs = glob(["src/**/*.java"]), diff --git a/tensorflow/examples/benchmark/BUILD b/tensorflow/examples/benchmark/BUILD index c4bb0a5bd9..98611a9aad 100644 --- a/tensorflow/examples/benchmark/BUILD +++ b/tensorflow/examples/benchmark/BUILD @@ -23,9 +23,3 @@ tf_py_logged_benchmark( name = "sample_logged_benchmark", target = "//tensorflow/examples/benchmark:sample_benchmark", ) - -filegroup( - name = "all_files", - srcs = glob(["**/*"]), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/get_started/regression/BUILD b/tensorflow/examples/get_started/regression/BUILD index 577b970c90..bee94d7d90 100644 --- a/tensorflow/examples/get_started/regression/BUILD +++ b/tensorflow/examples/get_started/regression/BUILD @@ -2,18 +2,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_test( name = "test", size = "medium", diff --git a/tensorflow/examples/how_tos/reading_data/BUILD b/tensorflow/examples/how_tos/reading_data/BUILD index 4a43585d53..64a054d371 100644 --- a/tensorflow/examples/how_tos/reading_data/BUILD +++ b/tensorflow/examples/how_tos/reading_data/BUILD @@ -54,15 +54,3 @@ py_binary( "//tensorflow/examples/tutorials/mnist:input_data", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/image_retraining/BUILD b/tensorflow/examples/image_retraining/BUILD index 9f9244a74c..ecd79a3b00 100644 --- a/tensorflow/examples/image_retraining/BUILD +++ b/tensorflow/examples/image_retraining/BUILD @@ -49,15 +49,3 @@ py_test( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD index 2abbe9dacc..c50fd93d03 100644 --- a/tensorflow/examples/label_image/BUILD +++ b/tensorflow/examples/label_image/BUILD @@ -9,6 +9,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +exports_files(["data/grace_hopper.jpg"]) + load("//tensorflow:tensorflow.bzl", "tf_cc_binary") tf_cc_binary( @@ -60,17 +62,3 @@ py_binary( "//tensorflow:tensorflow_py", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/learn/BUILD b/tensorflow/examples/learn/BUILD index aba7f600b5..bdbcb0b163 100644 --- a/tensorflow/examples/learn/BUILD +++ b/tensorflow/examples/learn/BUILD @@ -152,15 +152,3 @@ sh_test( "notap", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/multibox_detector/BUILD b/tensorflow/examples/multibox_detector/BUILD index 91a5bfa51c..4f9908cd52 100644 --- a/tensorflow/examples/multibox_detector/BUILD +++ b/tensorflow/examples/multibox_detector/BUILD @@ -27,17 +27,3 @@ tf_cc_binary( "//tensorflow/core:tensorflow", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/saved_model/BUILD b/tensorflow/examples/saved_model/BUILD index 1cdf5ec6e1..ebefc6576d 100644 --- a/tensorflow/examples/saved_model/BUILD +++ b/tensorflow/examples/saved_model/BUILD @@ -8,19 +8,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "g3doc/sitemap.md", - ], - ), - visibility = ["//visibility:public"], -) - py_binary( name = "saved_model_half_plus_two", srcs = [ diff --git a/tensorflow/examples/speech_commands/BUILD b/tensorflow/examples/speech_commands/BUILD index 12479211c3..13bca34a86 100644 --- a/tensorflow/examples/speech_commands/BUILD +++ b/tensorflow/examples/speech_commands/BUILD @@ -245,15 +245,3 @@ tf_cc_binary( "//tensorflow/core:protos_all_cc", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/estimators/BUILD b/tensorflow/examples/tutorials/estimators/BUILD index ecbc1a431d..bab609f208 100644 --- a/tensorflow/examples/tutorials/estimators/BUILD +++ b/tensorflow/examples/tutorials/estimators/BUILD @@ -20,15 +20,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/layers/BUILD b/tensorflow/examples/tutorials/layers/BUILD index f8a29c79c6..aad78b1840 100644 --- a/tensorflow/examples/tutorials/layers/BUILD +++ b/tensorflow/examples/tutorials/layers/BUILD @@ -19,15 +19,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD index 6d4e67063d..aa1b2ec2db 100644 --- a/tensorflow/examples/tutorials/mnist/BUILD +++ b/tensorflow/examples/tutorials/mnist/BUILD @@ -132,15 +132,3 @@ py_test( "//tensorflow:tensorflow_py", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/monitors/BUILD b/tensorflow/examples/tutorials/monitors/BUILD index 4220e8144d..1c49e3fe53 100644 --- a/tensorflow/examples/tutorials/monitors/BUILD +++ b/tensorflow/examples/tutorials/monitors/BUILD @@ -23,15 +23,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/examples/tutorials/word2vec/BUILD b/tensorflow/examples/tutorials/word2vec/BUILD index bfcf459269..2e19c038bd 100644 --- a/tensorflow/examples/tutorials/word2vec/BUILD +++ b/tensorflow/examples/tutorials/word2vec/BUILD @@ -21,14 +21,3 @@ py_binary( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/examples/wav_to_spectrogram/BUILD b/tensorflow/examples/wav_to_spectrogram/BUILD index c99870c686..cc8835728d 100644 --- a/tensorflow/examples/wav_to_spectrogram/BUILD +++ b/tensorflow/examples/wav_to_spectrogram/BUILD @@ -49,17 +49,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 5a533e3b60..acaf1a44eb 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -388,15 +388,3 @@ genrule( cmd = "cp $< $@", output_to_bindir = 1, ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 09c1965d7e..0c3c3c4e06 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4389,18 +4389,6 @@ py_test( ], ) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cuda_py_test( name = "accumulate_n_benchmark", size = "large", diff --git a/tensorflow/python/data/BUILD b/tensorflow/python/data/BUILD index b5bee36dcd..3e08c1587e 100644 --- a/tensorflow/python/data/BUILD +++ b/tensorflow/python/data/BUILD @@ -15,15 +15,3 @@ py_library( "//tensorflow/python/data/ops:readers", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index 8b8adefa65..ed0c11e6c1 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -367,15 +367,3 @@ tf_py_test( "no_windows", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index 3119ab0037..fa2e86eab1 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -59,15 +59,3 @@ py_library( "//tensorflow/python/eager:context", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index b1bdbdab37..0fc32d51b9 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -109,15 +109,3 @@ py_test( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 512d292ee2..4195586313 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -1095,15 +1095,3 @@ sh_test( ":offline_analyzer", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 0e089a26eb..8c0d3feece 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -398,21 +398,6 @@ py_test( ], ) -# ----------------------------------------------------------------------------- -# Google-internal targets. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "imperative_grad", srcs = ["imperative_grad.py"], diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 5afb5a7dd5..1fcff18a3a 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -9,18 +9,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "estimator_py", srcs = ["estimator_lib.py"], diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index 238a90b67d..0ae9900a1d 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -6,18 +6,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_library( name = "feature_column_py", srcs = ["feature_column_lib.py"], diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 16033e9b8f..2a06907f49 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -868,15 +868,3 @@ py_library( "//third_party/py/numpy", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 05f34db14b..ea210346c1 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2945,15 +2945,3 @@ tf_py_test( "//tensorflow/python/eager:tape", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD index e220d05692..f3cc9636f9 100644 --- a/tensorflow/python/kernel_tests/distributions/BUILD +++ b/tensorflow/python/kernel_tests/distributions/BUILD @@ -280,15 +280,3 @@ cuda_py_test( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD index fd1b5bab6f..9555e51099 100644 --- a/tensorflow/python/kernel_tests/linalg/BUILD +++ b/tensorflow/python/kernel_tests/linalg/BUILD @@ -140,15 +140,3 @@ cuda_py_test( ], shard_count = 5, ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/kernel_tests/random/BUILD b/tensorflow/python/kernel_tests/random/BUILD index 88a4ddf7f2..acd7566eec 100644 --- a/tensorflow/python/kernel_tests/random/BUILD +++ b/tensorflow/python/kernel_tests/random/BUILD @@ -121,15 +121,3 @@ cuda_py_test( "//tensorflow/python:random_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/ops/distributions/BUILD b/tensorflow/python/ops/distributions/BUILD index 50b956a267..9d9ede7ad7 100644 --- a/tensorflow/python/ops/distributions/BUILD +++ b/tensorflow/python/ops/distributions/BUILD @@ -26,15 +26,3 @@ py_library( "@six_archive//:six", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/ops/linalg/BUILD b/tensorflow/python/ops/linalg/BUILD index ce8c1580fe..07659ef44c 100644 --- a/tensorflow/python/ops/linalg/BUILD +++ b/tensorflow/python/ops/linalg/BUILD @@ -34,15 +34,3 @@ py_library( "//tensorflow/python:special_math_ops", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/ops/losses/BUILD b/tensorflow/python/ops/losses/BUILD index 07741e0c3c..4aea0265a7 100644 --- a/tensorflow/python/ops/losses/BUILD +++ b/tensorflow/python/ops/losses/BUILD @@ -43,15 +43,3 @@ py_test( "//tensorflow/python:framework_for_generated_wrappers", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD index c815aad0a0..0654104a34 100644 --- a/tensorflow/python/profiler/BUILD +++ b/tensorflow/python/profiler/BUILD @@ -156,18 +156,3 @@ py_test( "@com_google_pprof//:pprof_proto_py", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD index 362a1c49e6..994206cd63 100644 --- a/tensorflow/python/profiler/internal/BUILD +++ b/tensorflow/python/profiler/internal/BUILD @@ -70,18 +70,3 @@ cuda_py_test( "no_pip", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 30e0a099d8..2609a5d222 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -235,15 +235,3 @@ py_test( # ----------------------------------------------------------------------------- # Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD index 1de1adcfbc..6e39ce8c80 100644 --- a/tensorflow/python/tools/BUILD +++ b/tensorflow/python/tools/BUILD @@ -258,17 +258,3 @@ py_test( "//tensorflow/core:protos_all_py", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - "bin/**", - "gen/**", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD index d9b0260c9f..6722536358 100644 --- a/tensorflow/tools/api/generator/BUILD +++ b/tensorflow/tools/api/generator/BUILD @@ -5,18 +5,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - py_binary( name = "create_python_api", srcs = ["create_python_api.py"], diff --git a/tensorflow/tools/api/golden/BUILD b/tensorflow/tools/api/golden/BUILD index 08436396a6..ebdf42df2c 100644 --- a/tensorflow/tools/api/golden/BUILD +++ b/tensorflow/tools/api/golden/BUILD @@ -10,15 +10,3 @@ filegroup( name = "api_golden", srcs = glob(["*.pbtxt"]), ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/api/lib/BUILD b/tensorflow/tools/api/lib/BUILD index 2d3b838957..3f4fb91042 100644 --- a/tensorflow/tools/api/lib/BUILD +++ b/tensorflow/tools/api/lib/BUILD @@ -26,15 +26,3 @@ py_library( "//tensorflow/python:util", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 15bf1abb5f..0dc154b6d2 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -42,15 +42,3 @@ tf_cc_binary( "//tensorflow/core:op_gen_lib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/benchmark/BUILD b/tensorflow/tools/benchmark/BUILD index 6ed2594e6a..566a172ea7 100644 --- a/tensorflow/tools/benchmark/BUILD +++ b/tensorflow/tools/benchmark/BUILD @@ -90,12 +90,3 @@ tf_cc_binary( visibility = ["//visibility:public"], deps = [":benchmark_model_lib"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = ["**/OWNERS"], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/build_info/BUILD b/tensorflow/tools/build_info/BUILD index cdc47076ce..7307417805 100644 --- a/tensorflow/tools/build_info/BUILD +++ b/tensorflow/tools/build_info/BUILD @@ -9,18 +9,3 @@ exports_files( "gen_build_info.py", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/common/BUILD b/tensorflow/tools/common/BUILD index 316e5469e7..b9032c046e 100644 --- a/tensorflow/tools/common/BUILD +++ b/tensorflow/tools/common/BUILD @@ -44,14 +44,3 @@ py_test( "//tensorflow/python:platform_test", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/tools/compatibility/BUILD b/tensorflow/tools/compatibility/BUILD index 4f90c4d940..b7bfb29aae 100644 --- a/tensorflow/tools/compatibility/BUILD +++ b/tensorflow/tools/compatibility/BUILD @@ -68,18 +68,3 @@ exports_files( "testdata/test_file_v0_11.py", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/dist_test/server/BUILD b/tensorflow/tools/dist_test/server/BUILD index 865af8dd7b..003a19a9ab 100644 --- a/tensorflow/tools/dist_test/server/BUILD +++ b/tensorflow/tools/dist_test/server/BUILD @@ -37,15 +37,3 @@ py_test( "//tensorflow/python:client_testlib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/docker/BUILD b/tensorflow/tools/docker/BUILD index 7d5ae0a94d..849ba49f71 100644 --- a/tensorflow/tools/docker/BUILD +++ b/tensorflow/tools/docker/BUILD @@ -13,15 +13,3 @@ py_binary( srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/docker/notebooks/BUILD b/tensorflow/tools/docker/notebooks/BUILD index 89f473df4b..e9f26899c9 100644 --- a/tensorflow/tools/docker/notebooks/BUILD +++ b/tensorflow/tools/docker/notebooks/BUILD @@ -3,15 +3,3 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index 8f10bc9e0c..d370fbd246 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -142,14 +142,3 @@ py_test( "//tensorflow/python:client_testlib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/tools/git/BUILD b/tensorflow/tools/git/BUILD index 942ceab85f..daa17fbd50 100644 --- a/tensorflow/tools/git/BUILD +++ b/tensorflow/tools/git/BUILD @@ -9,18 +9,3 @@ licenses(["notice"]) # Apache 2.0 exports_files( ["gen_git_source.py"], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 6e21aa2846..1ad1895269 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -313,14 +313,3 @@ tf_py_test( ], main = "python/transform_graph_test.py", ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/tools/mlpbtxt/BUILD b/tensorflow/tools/mlpbtxt/BUILD index f9f48c6500..89c683c8c4 100644 --- a/tensorflow/tools/mlpbtxt/BUILD +++ b/tensorflow/tools/mlpbtxt/BUILD @@ -32,15 +32,3 @@ tf_cc_binary( "//tensorflow/core:op_gen_lib", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/proto_text/BUILD b/tensorflow/tools/proto_text/BUILD index 39c4aac1e8..ef7bfdd3c9 100644 --- a/tensorflow/tools/proto_text/BUILD +++ b/tensorflow/tools/proto_text/BUILD @@ -96,18 +96,3 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) - -# ----------------------------------------------------------------------------- -# Google-internal targets. These must be at the end for syncrepo. - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/quantization/BUILD b/tensorflow/tools/quantization/BUILD index e99ad06a06..17443a8617 100644 --- a/tensorflow/tools/quantization/BUILD +++ b/tensorflow/tools/quantization/BUILD @@ -76,15 +76,3 @@ py_binary( "//tensorflow/python:platform", ], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/tools/test/BUILD b/tensorflow/tools/test/BUILD index 159a8c1cfb..4b2026b947 100644 --- a/tensorflow/tools/test/BUILD +++ b/tensorflow/tools/test/BUILD @@ -92,15 +92,3 @@ tf_py_logged_benchmark( name = "rnn_op_benchmark", target = "//tensorflow/python/kernel_tests:rnn_test", ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/user_ops/BUILD b/tensorflow/user_ops/BUILD index e8198efe2e..71443cc41e 100644 --- a/tensorflow/user_ops/BUILD +++ b/tensorflow/user_ops/BUILD @@ -50,15 +50,3 @@ tf_py_test( additional_deps = ["//tensorflow:tensorflow_py"], data = [":invalid_op.so"], ) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/third_party/hadoop/BUILD b/third_party/hadoop/BUILD index 9e98154400..c3c5e428be 100644 --- a/third_party/hadoop/BUILD +++ b/third_party/hadoop/BUILD @@ -4,18 +4,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE.txt"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - cc_library( name = "hdfs", hdrs = ["hdfs.h"], diff --git a/third_party/mpi/BUILD b/third_party/mpi/BUILD index ff3f437e92..1d6ac2fceb 100644 --- a/third_party/mpi/BUILD +++ b/third_party/mpi/BUILD @@ -1,17 +1,5 @@ licenses(["restricted"]) -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) - load("//third_party/mpi:mpi.bzl", "mpi_hdr") load("//third_party/mpi:mpi.bzl", "if_mpi") diff --git a/third_party/sycl/BUILD b/third_party/sycl/BUILD index fbdf19f205..f631b6df06 100644 --- a/third_party/sycl/BUILD +++ b/third_party/sycl/BUILD @@ -1,15 +1,3 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/third_party/sycl/sycl/BUILD b/third_party/sycl/sycl/BUILD index bc1d18b7b5..b045609954 100644 --- a/third_party/sycl/sycl/BUILD +++ b/third_party/sycl/sycl/BUILD @@ -5,15 +5,3 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache 2.0 - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) -- GitLab From 3f7adc710495e1160acd956c482779247ef1f101 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Wed, 28 Mar 2018 20:51:01 -0700 Subject: [PATCH 660/960] Support structured source in GradientTape.gradient Before this change, it was easy to forget [] around the source tensor. This mistake lead to GradientTape.gradient(), returning a list of Nones. Nones normally tell to the user that the source and the target are not connected via differentiable operations, which is not the source of the error in this case. Instead of adding a check that `sources` is a list of tensors, this CL adds ability to handle structured source (which includes a lone tensor), similarly to many existing TensorFlow APIs. Also, with Alex's help, it fixes a bug where repeated tensors in `sources` were not handled correctly. PiperOrigin-RevId: 190878583 --- tensorflow/c/eager/tape.h | 21 ++++++---- tensorflow/python/eager/backprop.py | 33 ++++++++++------ tensorflow/python/eager/backprop_test.py | 47 +++++++++++++++++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 4 ++ 4 files changed, 85 insertions(+), 20 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index c7bd3bdafd..97c323b872 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -601,23 +601,28 @@ Status GradientTape::ComputeGradient( } CHECK(state.op_tape.empty()); result->reserve(source_tensor_ids.size()); + gtl::FlatSet used_gradient_ids(source_tensor_ids.size()); for (auto is : source_tensor_ids) { auto grad_it = gradients.find(is); if (grad_it == gradients.end()) { result->push_back(nullptr); } else { - if (grad_it->second.size() == 1) { - result->push_back(grad_it->second[0]); - } else { - result->push_back(vspace.AggregateGradients(grad_it->second)); + if (grad_it->second.size() > 1) { + Gradient* grad = vspace.AggregateGradients(grad_it->second); + grad_it->second.clear(); + grad_it->second.push_back(grad); } - gradients.erase(grad_it); + result->push_back(grad_it->second[0]); + used_gradient_ids.insert(is); } } - VLOG(1) << "Final gradients size: " << gradients.size(); + VLOG(1) << "Final gradients size: " + << gradients.size() - used_gradient_ids.size(); for (auto grad_pair : gradients) { - for (const auto& g : grad_pair.second) { - vspace.DeleteGradient(g); + if (used_gradient_ids.find(grad_pair.first) == used_gradient_ids.end()) { + for (const auto& g : grad_pair.second) { + vspace.DeleteGradient(g); + } } } return Status::OK(); diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index c54a5a1445..209b012621 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -646,6 +646,13 @@ _default_vspace = imperative_grad.VSpace( ones=_ones) +def _handle_or_self(x): + """If x is ResourceVariable, return its handle, else x.""" + if isinstance(x, resource_variable_ops.ResourceVariable): + x = x.handle + return x + + @tf_export("GradientTape") class GradientTape(object): """Record operations for automatic differentiation. @@ -723,9 +730,7 @@ class GradientTape(object): tensor: a Tensor or list of Tensors. """ for t in nest.flatten(tensor): - if isinstance(t, resource_variable_ops.ResourceVariable): - t = t.handle - tape.watch(t) + tape.watch(_handle_or_self(t)) def watched_variables(self): # Sorting variables by id, which is monotonically increasing in construction @@ -739,14 +744,15 @@ class GradientTape(object): Args: target: Tensor to be differentiated. - sources: a list of Tensors or Variables. `target` will be differentiated - against elements in `sources`. + sources: a list or nested structure of Tensors or Variables. `target` + will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. Returns: - a list of Tensors (or IndexedSlices, or None), one for each element in - `sources`. + a list or nested structure of Tensors (or IndexedSlices, or None), + one for each element in `sources`. Returned structure is the same as + the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more @@ -756,12 +762,15 @@ class GradientTape(object): raise RuntimeError("GradientTape.gradient can only be called once " "on non-persistent tapes, and " "only when the context manager has exited.") - sources = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) - else x - for x in sources] - grad = imperative_grad.imperative_grad( - _default_vspace, self._tape, [target], sources, + flat_sources = nest.flatten(sources) + flat_sources = [_handle_or_self(x) for x in flat_sources] + + flat_grad = imperative_grad.imperative_grad( + _default_vspace, self._tape, [target], flat_sources, output_gradients=output_gradients) + if not self._persistent: self._tape = None + + grad = nest.pack_sequence_as(sources, flat_grad) return grad diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index f04d89a6d9..991b4dbe7a 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -369,6 +369,53 @@ class BackpropTest(test.TestCase): self.assertEqual(backprop.implicit_grad(f)()[0][0], None) + @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() + def testGradientTapeRepeatedSource(self): + with backprop.GradientTape(persistent=False) as g: + x = constant_op.constant(3.0) + g.watch(x) + y = 2 * x + grad = g.gradient(target=y, sources=[x, x]) + self.assertEqual(self.evaluate(grad), [2.0, 2.0]) + + @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() + def testPersistentGradientTapeRepeatedSource(self): + with backprop.GradientTape(persistent=True) as g: + x = constant_op.constant(3.0) + y = constant_op.constant(5.0) + g.watch(x) + g.watch(y) + z = x * x + x * y + grad = g.gradient(target=z, sources=[x, x]) + self.assertEqual(self.evaluate(grad), [11.0, 11.0]) + grad = g.gradient(target=z, sources=[y, x]) + self.assertEqual(self.evaluate(grad), [3.0, 11.0]) + + @test_util.assert_no_new_tensors + @test_util.run_in_graph_and_eager_modes() + def testGradientTapeStructure(self): + with backprop.GradientTape(persistent=True) as g: + # Using different constant values because constant tensors are + # cached, leading to a different gradient then what one might expect. + x1 = constant_op.constant(3.0) + x2 = constant_op.constant(3.1) + x3 = constant_op.constant(3.2) + g.watch(x1) + g.watch(x2) + g.watch(x3) + y = x1 + 2 * x2 + 3 * x3 + self.assertEqual(self.evaluate(g.gradient(y, x1)), [1.0]) + self.assertEqual(self.evaluate(g.gradient(y, (x1,))), (1.0,)) + self.assertEqual(self.evaluate(g.gradient(y, (x1, x2))), (1.0, 2.0)) + self.assertEqual(self.evaluate(g.gradient(y, [(x1, x2), (x2, x3)])), + [(1.0, 2.0), (2.0, 3.0)]) + self.assertEqual(self.evaluate(g.gradient(y, (x1, x2, [x1, x3]))), + (1.0, 2.0, [1.0, 3.0])) + self.assertEqual(self.evaluate(g.gradient(y, [x1, {'x2': x2, 'x3': x3}])), + [1.0, {'x2': 2.0, 'x3': 3.0}]) + @test_util.assert_no_new_tensors @test_util.run_in_graph_and_eager_modes() def testGradientTape(self): diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 73482792d5..8a398f6447 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1372,11 +1372,15 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, } if (!result.empty()) { PyObject* py_result = PyList_New(result.size()); + tensorflow::gtl::FlatSet seen_results(result.size()); for (int i = 0; i < result.size(); ++i) { if (result[i] == nullptr) { Py_INCREF(Py_None); result[i] = Py_None; + } else if (seen_results.find(result[i]) != seen_results.end()) { + Py_INCREF(result[i]); } + seen_results.insert(result[i]); PyList_SET_ITEM(py_result, i, reinterpret_cast(result[i])); } return py_result; -- GitLab From 5bc7c510fd99dd6f887eb2c5834ae8297891dea7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 28 Mar 2018 21:07:02 -0700 Subject: [PATCH 661/960] Fixed the shape function of the SplitV op that incorrectly often assumed that the shape of all the outputs is the same. PiperOrigin-RevId: 190879600 --- .../python/kernel_tests/shape_ops_test.py | 5 +---- .../contrib/signal/python/ops/shape_ops.py | 2 -- tensorflow/core/ops/array_ops.cc | 17 +++++++++-------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py b/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py index bc4663fbb0..64cc8c7ea5 100644 --- a/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/contrib/signal/python/kernel_tests/shape_ops_test.py @@ -338,10 +338,7 @@ class FrameTest(test.TestCase): def test_constant_folding(self): """frame should be constant foldable for constant inputs.""" - # Padding is incorrectly defined in shape_ops.py (the rank of the padding - # tensor should be equal to the rank of the input tensor + 1): only test - # with padding set to False to avoid this. - for pad_end in [False]: + for pad_end in [True, False]: g = ops.Graph() with g.as_default(): frame_length, frame_step = 32, 16 diff --git a/tensorflow/contrib/signal/python/ops/shape_ops.py b/tensorflow/contrib/signal/python/ops/shape_ops.py index 97fe20866b..1ddc2941ec 100644 --- a/tensorflow/contrib/signal/python/ops/shape_ops.py +++ b/tensorflow/contrib/signal/python/ops/shape_ops.py @@ -139,8 +139,6 @@ def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1, [[0, pad_samples]], array_ops.zeros([num_inner_dimensions, 2], dtype=pad_samples.dtype)], 0) - # TODO(rjryan): the paddings tensor must of rank tf.rank(signal) + 1. This - # isn't the case here and should be fixed. signal = array_ops.pad(signal, paddings, constant_values=pad_value) signal_shape = array_ops.shape(signal) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 88d2aa3f41..af8afc90f5 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -494,9 +494,9 @@ REGISTER_OP("SplitV") const Tensor* size_splits = c->input_tensor(1); if (rank == InferenceContext::kUnknownRank) { // If the rank of input tensor is unknown, then return unknown shapes. - output_shape = c->UnknownShape(); + // Note that the shape of each output can be different. for (int i = 0; i < num_outputs; ++i) { - c->set_output(i, output_shape); + c->set_output(i, c->UnknownShape()); } } else if (rank == 0) { // Throw error if input is a scalar. @@ -505,18 +505,19 @@ REGISTER_OP("SplitV") // If split dimension is known, but the sizes are unknown, then // only the split dimension is unknown output_shape = input; - TF_RETURN_IF_ERROR(c->ReplaceDim(output_shape, - c->Value(split_dimension), - c->UnknownDim(), &output_shape)); for (int i = 0; i < num_outputs; ++i) { + TF_RETURN_IF_ERROR(c->ReplaceDim(output_shape, + c->Value(split_dimension), + c->UnknownDim(), &output_shape)); c->set_output(i, output_shape); } } else if (size_splits == nullptr && !c->ValueKnown(split_dimension)) { // If split dimension or tensor containing the split sizes is unknown, - // then return unknown shapes of same rank as input. - output_shape = c->UnknownShapeOfRank(rank); + // then return unknown shapes of same rank as input. Note that each + // output shape can be different since splitv doesn't always split + // tensors evenly. for (int i = 0; i < num_outputs; ++i) { - c->set_output(i, output_shape); + c->set_output(i, c->UnknownShapeOfRank(rank)); } } else { // Determine the output shape if split dimension and split sizes are -- GitLab From aeaec465f2f08e32c524e23fb7b0ac016f3dc6a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 21:11:16 -0700 Subject: [PATCH 662/960] Fix TensorList decoding bug. Thanks to Alexandre Passos for finding this. PiperOrigin-RevId: 190879840 --- tensorflow/core/kernels/list_kernels.cc | 1 + tensorflow/core/kernels/list_kernels.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc index baf0a4abe4..9e7786f25e 100644 --- a/tensorflow/core/kernels/list_kernels.cc +++ b/tensorflow/core/kernels/list_kernels.cc @@ -112,6 +112,7 @@ bool TensorList::Decode(const VariantTensorData& data) { dims.push_back(scratch); } } + element_shape = PartialTensorShape(dims); return true; } diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h index 9733883001..8af48f0a67 100644 --- a/tensorflow/core/kernels/list_kernels.h +++ b/tensorflow/core/kernels/list_kernels.h @@ -83,7 +83,8 @@ class TensorListStack : public OpKernel { DataTypeString(l->element_dtype))); OP_REQUIRES(c, l->element_shape.IsFullyDefined(), errors::InvalidArgument("Tried to stack elements from a list " - "with non-fully-defined shape.")); + "with non-fully-defined shape: ", + l->element_shape.DebugString())); if (num_elements_ != -1) { OP_REQUIRES(c, l->tensors.size() == num_elements_, errors::InvalidArgument("Operation expected a list with ", -- GitLab From 163bf8d0620a08d186c1315b0789e898f09759f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Mar 2018 21:52:30 -0700 Subject: [PATCH 663/960] DistributionStrategy-enable Estimator. PiperOrigin-RevId: 190882152 --- .../python/learn/estimators/run_config.py | 11 +- tensorflow/python/estimator/estimator.py | 341 +++++++++++++----- tensorflow/python/estimator/run_config.py | 3 +- 3 files changed, 271 insertions(+), 84 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index 1d161093de..f3500bf56f 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -290,8 +290,15 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig): Note - using this argument, it is easy to provide settings which break otherwise perfectly good models. Use with care. """ - super(RunConfig, self).__init__( - master=master, evaluation_master=evaluation_master) + # Neither parent class calls super().__init__(), so here we have to + # manually call their __init__() methods. + ClusterConfig.__init__( + self, master=master, evaluation_master=evaluation_master) + # For too long this code didn't call: + # core_run_config.RunConfig.__init__(self) + # so instead of breaking compatibility with that assumption, we + # just manually initialize this field: + self._distribute = None gpu_options = config_pb2.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 6a4132bca2..2fe521b063 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -41,8 +41,11 @@ from tensorflow.python.estimator.export.export import get_temp_export_dir from tensorflow.python.estimator.export.export import get_timestamped_export_dir from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import metrics as metrics_lib +from tensorflow.python.ops import resources +from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import builder as saved_model_builder @@ -50,6 +53,7 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.summary import summary from tensorflow.python.summary.writer import writer_cache from tensorflow.python.training import device_setter +from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import evaluation from tensorflow.python.training import monitored_session from tensorflow.python.training import saver @@ -183,6 +187,9 @@ class Estimator(object): config) self._config = config + # The distribute field contains an instance of DistributionStrategy. + self._distribution = self._config.distribute + # Model directory. model_dir = compat_internal.path_to_str(model_dir) if (model_dir is not None) and (self._config.model_dir is not None): @@ -682,11 +689,25 @@ class Estimator(object): def _get_features_and_labels_from_input_fn(self, input_fn, mode): """Extracts the `features` and labels from return values of `input_fn`.""" result = self._call_input_fn(input_fn, mode) + # TODO(anjalisridhar): What about the default DistributionStrategy? Perhaps + # using any input is alright in that case. There is also a + # has_dataset_or_queue_runner function that we may want to extend and use. + if (self._distribution is not None and + not isinstance(result, dataset_ops.Dataset)): + raise ValueError('input_fn() must return a tf.data.Dataset when using a ' + 'DistributionStrategy.') input_hooks = [] if isinstance(result, dataset_ops.Dataset): - iterator = result.make_initializable_iterator() - input_hooks.append(_DatasetInitializerHook(iterator)) - result = iterator.get_next() + if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN: + # TODO(josh11b): This is currently using a one-shot iterator, we + # will update this to an initializeable iterator once the + # necessory support for creating an initializable iterator is + # available. + result = self._distribution.distribute_dataset(result).get_next() + else: + iterator = result.make_initializable_iterator() + input_hooks.append(_DatasetInitializerHook(iterator)) + result = iterator.get_next() if isinstance(result, (list, tuple)): if len(result) != 2: raise ValueError( @@ -815,6 +836,12 @@ class Estimator(object): return model_fn_results def _train_model(self, input_fn, hooks, saving_listeners): + if self._distribution: + return self._train_model_distributed(input_fn, hooks, saving_listeners) + else: + return self._train_model_default(input_fn, hooks, saving_listeners) + + def _train_model_default(self, input_fn, hooks, saving_listeners): worker_hooks = [] with ops.Graph().as_default() as g, g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) @@ -826,86 +853,209 @@ class Estimator(object): worker_hooks.extend(input_hooks) estimator_spec = self._call_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.config) + return self._train_with_estimator_spec(estimator_spec, worker_hooks, + hooks, global_step_tensor, + saving_listeners) - if self._warm_start_settings: - logging.info('Warm-starting with WarmStartSettings: %s' % - (self._warm_start_settings,)) - # pylint: disable=protected-access - warm_starting_util.warm_start(*self._warm_start_settings) - # pylint: enable=protected-access - # Check if the user created a loss summary, and add one if they didn't. - # We assume here that the summary is called 'loss'. If it is not, we will - # make another one with the name 'loss' to ensure it shows up in the right - # graph in TensorBoard. - if not any([x.op.name == 'loss' - for x in ops.get_collection(ops.GraphKeys.SUMMARIES)]): - summary.scalar('loss', estimator_spec.loss) - ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss) - worker_hooks.extend(hooks) - worker_hooks.extend([ - training.NanTensorHook(estimator_spec.loss), - training.LoggingTensorHook( - { - 'loss': estimator_spec.loss, - 'step': global_step_tensor - }, - every_n_iter=self._config.log_step_count_steps) - ]) - worker_hooks.extend(estimator_spec.training_hooks) - - if not (estimator_spec.scaffold.saver or - ops.get_collection(ops.GraphKeys.SAVERS)): - ops.add_to_collection( - ops.GraphKeys.SAVERS, - training.Saver( - sharded=True, - max_to_keep=self._config.keep_checkpoint_max, - keep_checkpoint_every_n_hours=( - self._config.keep_checkpoint_every_n_hours), - defer_build=True, - save_relative_paths=True)) - - chief_hooks = [] - all_hooks = worker_hooks + list(estimator_spec.training_chief_hooks) - saver_hooks = [ - h for h in all_hooks if isinstance(h, training.CheckpointSaverHook)] - if (self._config.save_checkpoints_secs or - self._config.save_checkpoints_steps): - if not saver_hooks: - chief_hooks = [ - training.CheckpointSaverHook( - self._model_dir, - save_secs=self._config.save_checkpoints_secs, - save_steps=self._config.save_checkpoints_steps, - scaffold=estimator_spec.scaffold) - ] - saver_hooks = [chief_hooks[0]] - if saving_listeners: - if not saver_hooks: - raise ValueError( - 'There should be a CheckpointSaverHook to use saving_listeners. ' - 'Please set one of the RunConfig.save_checkpoints_steps or ' - 'RunConfig.save_checkpoints_secs.') + def _train_model_distributed(self, input_fn, hooks, saving_listeners): + worker_hooks = [] + with ops.Graph().as_default() as g: + with self._distribution.scope(): + random_seed.set_random_seed(self._config.tf_random_seed) + features, labels, input_hooks = ( + self._get_features_and_labels_from_input_fn( + input_fn, model_fn_lib.ModeKeys.TRAIN)) + worker_hooks.extend(input_hooks) + global_step_tensor = self._create_and_assert_global_step(g) + # The default destination for the global_step_tensor fetch call is the + # CPU. + global_step_read_tensor = self._distribution.fetch(global_step_tensor) + # we want to add to the global collection in the main thread not the + # tower threads. + ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY, + global_step_read_tensor) + grouped_estimator_spec = self._distribution.call_for_each_tower( + self._call_model_fn, + features, + labels, # although this will be None it seems + model_fn_lib.ModeKeys.TRAIN, + self.config) + + # TODO(anjalisridhar): Figure out how to resolve the folowing scaffold + # parameters: init_feed_dict, init_fn. + scaffold_list = self._distribution.unwrap( + grouped_estimator_spec.scaffold) + init_feed_dict = [ + s.init_feed_dict + for s in scaffold_list + if s.init_feed_dict is not None + ] + if init_feed_dict: + init_feed_dict = self._distribution.group(init_feed_dict) else: - # It is expected to have one CheckpointSaverHook. If multiple, we pick - # up the first one to add listener. - saver_hooks[0]._listeners.extend(saving_listeners) # pylint: disable=protected-access - with training.MonitoredTrainingSession( - master=self._config.master, - is_chief=self._config.is_chief, - checkpoint_dir=self._model_dir, - scaffold=estimator_spec.scaffold, - hooks=worker_hooks, - chief_only_hooks=( - tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)), - save_checkpoint_secs=0, # Saving is handled by a hook. - save_summaries_steps=self._config.save_summary_steps, - config=self._session_config, - log_step_count_steps=self._config.log_step_count_steps) as mon_sess: - loss = None - while not mon_sess.should_stop(): - _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) - return loss + init_feed_dict = None + + init_fn = [s.init_fn for s in scaffold_list if s.init_fn is not None] + if init_fn: + init_fn = self._distribution.group(init_fn) + else: + init_fn = None + + init_op = [s.init_op for s in scaffold_list if s.init_op is not None] + if init_op: + init_op = self._distribution.group(init_op) + else: + init_op = None + + ready_op = self._distribution.call_for_each_tower( + create_per_tower_ready_op, grouped_estimator_spec.scaffold) + if ready_op is not None: + ready_op = self._distribution.group(ready_op) + else: + ready_op = None + + ready_for_local_init_op = self._distribution.call_for_each_tower( + create_per_tower_ready_for_local_init_op, + grouped_estimator_spec.scaffold) + if ready_for_local_init_op is not None: + ready_for_local_init_op = self._distribution.group( + ready_for_local_init_op) + else: + ready_for_local_init_op = None + + local_init_op = [ + s.local_init_op + for s in scaffold_list + if s.local_init_op is not None + ] + if local_init_op: + local_init_op = self._distribution.group(local_init_op) + else: + local_init_op = None + + summary_op = [ + s.summary_op for s in scaffold_list if s.summary_op is not None + ] + if summary_op: + summary_op = self._distribution.group(summary_op) + else: + summary_op = None + + scaffold = monitored_session.Scaffold( + init_op=init_op, + ready_op=ready_op, + ready_for_local_init_op=ready_for_local_init_op, + local_init_op=local_init_op, + summary_op=summary_op, + init_feed_dict=init_feed_dict, + init_fn=init_fn) + + def get_hooks_from_the_first_device(per_device_hooks): + hooks_list = self._distribution.unwrap(per_device_hooks) + assert hooks_list + return hooks_list[0] + + training_hooks = get_hooks_from_the_first_device( + grouped_estimator_spec.training_hooks) + training_chief_hooks = get_hooks_from_the_first_device( + grouped_estimator_spec.training_chief_hooks) + + estimator_spec = model_fn_lib.EstimatorSpec( + mode=grouped_estimator_spec.mode, + loss=self._distribution.unwrap( + self._distribution.reduce(distribute_lib.get_loss_reduction(), + grouped_estimator_spec.loss, + destinations='/device:CPU:0'))[0], + train_op=self._distribution.group(grouped_estimator_spec.train_op), + training_hooks=training_hooks, + training_chief_hooks=training_chief_hooks, + scaffold=scaffold) + return self._train_with_estimator_spec(estimator_spec, worker_hooks, + hooks, global_step_read_tensor, + saving_listeners) + + def _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks, + global_step_tensor, saving_listeners): + """Train a model with the given Estimator Spec.""" + if self._warm_start_settings: + logging.info('Warm-starting with WarmStartSettings: %s' % + (self._warm_start_settings,)) + # pylint: disable=protected-access + warm_starting_util.warm_start(*self._warm_start_settings) + # pylint: enable=protected-access + # Check if the user created a loss summary, and add one if they didn't. + # We assume here that the summary is called 'loss'. If it is not, we will + # make another one with the name 'loss' to ensure it shows up in the right + # graph in TensorBoard. + if not any([x.op.name == 'loss' + for x in ops.get_collection(ops.GraphKeys.SUMMARIES)]): + summary.scalar('loss', estimator_spec.loss) + ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss) + worker_hooks.extend(hooks) + worker_hooks.extend([ + training.NanTensorHook(estimator_spec.loss), + training.LoggingTensorHook( + { + 'loss': estimator_spec.loss, + 'step': global_step_tensor + }, + every_n_iter=self._config.log_step_count_steps) + ]) + worker_hooks.extend(estimator_spec.training_hooks) + + if not (estimator_spec.scaffold.saver or + ops.get_collection(ops.GraphKeys.SAVERS)): + ops.add_to_collection( + ops.GraphKeys.SAVERS, + training.Saver( + sharded=True, + max_to_keep=self._config.keep_checkpoint_max, + keep_checkpoint_every_n_hours=( + self._config.keep_checkpoint_every_n_hours), + defer_build=True, + save_relative_paths=True)) + + chief_hooks = [] + all_hooks = worker_hooks + list(estimator_spec.training_chief_hooks) + saver_hooks = [ + h for h in all_hooks if isinstance(h, training.CheckpointSaverHook)] + if (self._config.save_checkpoints_secs or + self._config.save_checkpoints_steps): + if not saver_hooks: + chief_hooks = [ + training.CheckpointSaverHook( + self._model_dir, + save_secs=self._config.save_checkpoints_secs, + save_steps=self._config.save_checkpoints_steps, + scaffold=estimator_spec.scaffold) + ] + saver_hooks = [chief_hooks[0]] + if saving_listeners: + if not saver_hooks: + raise ValueError( + 'There should be a CheckpointSaverHook to use saving_listeners. ' + 'Please set one of the RunConfig.save_checkpoints_steps or ' + 'RunConfig.save_checkpoints_secs.') + else: + # It is expected to have one CheckpointSaverHook. If multiple, we pick + # up the first one to add listener. + saver_hooks[0]._listeners.extend(saving_listeners) # pylint: disable=protected-access + with training.MonitoredTrainingSession( + master=self._config.master, + is_chief=self._config.is_chief, + checkpoint_dir=self._model_dir, + scaffold=estimator_spec.scaffold, + hooks=worker_hooks, + chief_only_hooks=( + tuple(chief_hooks) + tuple(estimator_spec.training_chief_hooks)), + save_checkpoint_secs=0, # Saving is handled by a hook. + save_summaries_steps=self._config.save_summary_steps, + config=self._session_config, + log_step_count_steps=self._config.log_step_count_steps) as mon_sess: + loss = None + while not mon_sess.should_stop(): + _, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss]) + return loss def _evaluate_model(self, input_fn, @@ -972,6 +1122,35 @@ class Estimator(object): return eval_results +def create_per_tower_ready_op(scaffold): + """Create a Scaffold.ready_op inside a tower.""" + if scaffold.ready_op: + return scaffold.ready_op + + def default_ready_op(): + return array_ops.concat([ + variables.report_uninitialized_variables(), + resources.report_uninitialized_resources() + ], 0) + + return monitored_session.Scaffold.get_or_default( + 'ready_op', ops.GraphKeys.READY_OP, default_ready_op) + + +def create_per_tower_ready_for_local_init_op(scaffold): + """Create a Scaffold.ready_for_local_init_op inside a tower.""" + if scaffold.ready_for_local_init_op: + return scaffold.ready_for_local_init_op + + def default_ready_for_local_init_op(): + return variables.report_uninitialized_variables( + variables.global_variables()) + + return monitored_session.Scaffold.get_or_default( + 'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP, + default_ready_for_local_init_op) + + def _check_checkpoint_available(model_dir): latest_path = saver.latest_checkpoint(model_dir) if not latest_path: diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 141eaeff64..41415b89e9 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -688,7 +688,7 @@ class RunConfig(object): Only the properties in the following list are allowed to be replaced: - - `model_dir`. + - `model_dir`, - `tf_random_seed`, - `save_summary_steps`, - `save_checkpoints_steps`, @@ -697,6 +697,7 @@ class RunConfig(object): - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, + - `distribute`. In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). -- GitLab From 695aa649da752315596934319dd601854495dec5 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 28 Mar 2018 22:46:25 -0700 Subject: [PATCH 664/960] Add --announce_rc Bazel arg to several of our builds. This will help to... - Refactor the build scripts without accidently adding functional changes. - Help debug several issues where some options aren't being added correctly by configure script. PiperOrigin-RevId: 190884531 --- tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh | 1 + tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh | 1 + tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh index 338066131b..c7cc16e669 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh @@ -33,6 +33,7 @@ yes "" | $PYTHON_BIN_PATH configure.py which bazel bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac,-no_mac \ --test_timeout 300,450,1200,3600 --config=opt \ + --announce_rc \ --test_size_filters=small,medium \ --jobs=${N_JOBS} --build_tests_only --test_output=errors -k -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh index 920a261ae3..7e0e81a1eb 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh @@ -31,6 +31,7 @@ export PYTHON_BIN_PATH=$(which python3) yes "" | $PYTHON_BIN_PATH configure.py which bazel bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac,-no_mac \ + --announce_rc \ --test_timeout 300,450,1200,3600 \ --test_size_filters=small,medium \ --jobs=${N_JOBS} --build_tests_only --test_output=errors -k -- \ diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh index 94276c6c5c..7dfee8f371 100644 --- a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh @@ -41,7 +41,7 @@ run_configure_for_gpu_build # build_libtensorflow_tarball in ../builds/libtensorflow.sh # cannot be used on Windows since it relies on pkg_tar rules. # So we do something special here -bazel build -c opt --copt=/arch:AVX \ +bazel build -c opt --copt=/arch:AVX --announce_rc \ tensorflow:libtensorflow.so \ tensorflow/tools/lib_package:clicenses_generate \ tensorflow/java:libtensorflow_jni.so \ -- GitLab From 8df77178a8d41b392928ec17e6ca4867698407ff Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 28 Mar 2018 23:31:26 -0700 Subject: [PATCH 665/960] Move the swapping kernels to the all_kernels library to avoid registering them more than once from tensorflow/contrib. PiperOrigin-RevId: 190887394 --- tensorflow/core/BUILD | 4 ++++ tensorflow/core/grappler/optimizers/BUILD | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d46241450c..b8dbd90ab8 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -144,6 +144,7 @@ load( "tf_cuda_tests_tags", "if_static", ) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load( "//third_party/mkl:build_defs.bzl", "if_mkl", @@ -939,6 +940,9 @@ cc_library( "//tensorflow/core/kernels:mkl_softmax_op", "//tensorflow/core/kernels:mkl_tfconv_op", "//tensorflow/core/kernels:mkl_aggregate_ops", + ]) + if_cuda([ + "//tensorflow/core/grappler/optimizers:gpu_swapping_kernels", + "//tensorflow/core/grappler/optimizers:gpu_swapping_ops", ]), ) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 0d3a488f85..2c365c467c 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -361,6 +361,7 @@ tf_kernel_library( srcs = [ "gpu_swapping_kernels.cc", ], + visibility = ["//tensorflow:__subpackages__"], deps = [ "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", @@ -373,6 +374,7 @@ cc_library( srcs = [ "gpu_swapping_ops.cc", ], + visibility = ["//tensorflow:__subpackages__"], deps = [ "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", @@ -406,10 +408,7 @@ cc_library( "//tensorflow/core/grappler/costs:graph_properties", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/utils:traversal", - ] + if_cuda([ - ":gpu_swapping_kernels", - ":gpu_swapping_ops", - ]), + ], ) tf_cuda_only_cc_test( @@ -417,6 +416,8 @@ tf_cuda_only_cc_test( srcs = ["memory_optimizer_test.cc"], tags = ["no_cuda_on_cpu_tap"], # Do not re-enable again without actually testing. deps = [ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", ":memory_optimizer", "//tensorflow/cc:cc_ops", "//tensorflow/core:ops", -- GitLab From fd25620e80d628d77c5e9a03e87d6a4e10eccd27 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 29 Mar 2018 04:34:29 -0700 Subject: [PATCH 666/960] Internal change. PiperOrigin-RevId: 190913047 --- tensorflow/contrib/eager/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 7a8c11e3bb..48372d7ae0 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -80,6 +80,7 @@ cuda_py_test( "//tensorflow/python/data", "//tensorflow/python/eager:test", ], + tags = ["noguitar"], ) py_library( -- GitLab From 93cf42ac3530d24009179c45c88a444383719c9b Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Thu, 29 Mar 2018 22:54:53 +0800 Subject: [PATCH 667/960] Fix math equation format in layers (#18069) --- tensorflow/contrib/layers/python/layers/layers.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 350bcb3bca..10d7f6d076 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -3045,16 +3045,16 @@ def legacy_fully_connected(x, `activation_fn` is `None`, the result of `y = w * x + b` is returned. - If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)] - with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix + If `x` has shape [\\(\text{dim}_0, \text{dim}_1, ..., \text{dim}_n\\)] + with more than 2 dimensions (\\(n > 1\\)), then we repeat the matrix multiply along the first dimensions. The result r is a tensor of shape - [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`], - where \\\( r_{i_0, ..., i_{n-1}, k} = - \\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\). + [\\(\text{dim}_0, ..., \text{dim}_{n-1},\\) `num_output_units`], + where \\( r_{i_0, ..., i_{n-1}, k} = + \sum_{0 \leq j < \text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\). This is accomplished by reshaping `x` to 2-D - [\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)] + [\\(\text{dim}_0 \cdot ... \cdot \text{dim}_{n-1}, \text{dim}_n\\)] before the matrix multiply and afterwards reshaping it to - [\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`]. + [\\(\text{dim}_0, ..., \text{dim}_{n-1},\\) `num_output_units`]. This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting `bias_init` to `None`. -- GitLab From 481dca1987e030f9986ce16ae05142617d631641 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 29 Mar 2018 07:55:23 -0700 Subject: [PATCH 668/960] Default disable including the coordinator in the TPU job (#18073) * Default disable including the coordinator in the TPU job * Fix the test --- .../python/training/tpu_cluster_resolver.py | 2 +- .../python/training/tpu_cluster_resolver_test.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 300b19733e..95c5c920aa 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -73,7 +73,7 @@ class TPUClusterResolver(ClusterResolver): zone=None, project=None, job_name='worker', - coordinator_name='coordinator', + coordinator_name=None, coordinator_address=None, credentials='default', service=None): diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 48c3f6bb4f..e1e3e6867a 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -117,7 +117,8 @@ class TPUClusterResolverTest(test.TestCase): zone=None, tpu=['test-tpu-1'], credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) + service=self.mock_service_client(tpu_map=tpu_map), + coordinator_name='coordinator') actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ @@ -170,6 +171,7 @@ class TPUClusterResolverTest(test.TestCase): project='test-project', zone='us-central1-c', tpu=['test-tpu-1'], + coordinator_name='coordinator', coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) @@ -196,6 +198,7 @@ class TPUClusterResolverTest(test.TestCase): project='test-project', zone='us-central1-c', tpu='test-tpu-1', + coordinator_name='coordinator', coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) @@ -239,7 +242,8 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver( tpu='test-tpu-1', credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) + service=self.mock_service_client(tpu_map=tpu_map), + coordinator_name='coordinator') actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ -- GitLab From 76c569a29ec33d1965757eeed1bdc317f2fb5e87 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Thu, 29 Mar 2018 07:56:15 -0700 Subject: [PATCH 669/960] Add meta-distribution which reshapes batch dims. PiperOrigin-RevId: 190930846 --- tensorflow/contrib/distributions/BUILD | 14 + tensorflow/contrib/distributions/__init__.py | 4 +- .../python/kernel_tests/batch_reshape_test.py | 531 ++++++++++++++++++ .../distributions/python/ops/batch_reshape.py | 333 +++++++++++ 4 files changed, 881 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/batch_reshape.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 231abaa2f3..de08eb491b 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -456,6 +456,20 @@ cuda_py_test( ], ) +cuda_py_test( + name = "batch_reshape_test", + size = "small", + srcs = ["python/kernel_tests/batch_reshape_test.py"], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "sample_stats_test", size = "medium", diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 61c411271d..4d4489468d 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -24,6 +24,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops.autoregressive import * +from tensorflow.contrib.distributions.python.ops.batch_reshape import * from tensorflow.contrib.distributions.python.ops.binomial import * from tensorflow.contrib.distributions.python.ops.cauchy import * from tensorflow.contrib.distributions.python.ops.chi2 import * @@ -96,9 +97,10 @@ _allowed_symbols = [ 'ReparameterizationType', 'Distribution', 'Autoregressive', - 'Binomial', + 'BatchReshape', 'Bernoulli', 'Beta', + 'Binomial', 'BetaWithSoftplusConcentration', 'Categorical', 'Chi2', diff --git a/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py b/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py new file mode 100644 index 0000000000..4d2f40e27f --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/batch_reshape_test.py @@ -0,0 +1,531 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for BatchReshape.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import batch_reshape as batch_reshape_lib +from tensorflow.contrib.distributions.python.ops import mvn_diag as mvn_lib +from tensorflow.contrib.distributions.python.ops import wishart as wishart_lib +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import array_ops +from tensorflow.python.ops.distributions import normal as normal_lib +from tensorflow.python.platform import test + + +class _BatchReshapeTest(object): + + def make_wishart(self, dims, new_batch_shape, old_batch_shape): + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + + scale = self.dtype([ + [[1., 0.5], + [0.5, 1.]], + [[0.5, 0.25], + [0.25, 0.75]], + ]) + scale = np.reshape(np.concatenate([scale, scale], axis=0), + old_batch_shape + [dims, dims]) + scale_ph = array_ops.placeholder_with_default( + scale, shape=scale.shape if self.is_static_shape else None) + wishart = wishart_lib.WishartFull(df=5, scale=scale_ph) + reshape_wishart = batch_reshape_lib.BatchReshape( + distribution=wishart, + batch_shape=new_batch_shape_ph, + validate_args=True) + + return wishart, reshape_wishart + + def test_matrix_variate_sample_and_log_prob(self): + dims = 2 + new_batch_shape = [4] + old_batch_shape = [2, 2] + wishart, reshape_wishart = self.make_wishart( + dims, new_batch_shape, old_batch_shape) + + batch_shape = reshape_wishart.batch_shape_tensor() + event_shape = reshape_wishart.event_shape_tensor() + + expected_sample_shape = [3, 1] + new_batch_shape + [dims, dims] + x = wishart.sample([3, 1], seed=42) + expected_sample = array_ops.reshape(x, expected_sample_shape) + actual_sample = reshape_wishart.sample([3, 1], seed=42) + + expected_log_prob_shape = [3, 1] + new_batch_shape + expected_log_prob = array_ops.reshape( + wishart.log_prob(x), expected_log_prob_shape) + actual_log_prob = reshape_wishart.log_prob(expected_sample) + + with self.test_session() as sess: + [ + batch_shape_, + event_shape_, + expected_sample_, actual_sample_, + expected_log_prob_, actual_log_prob_, + ] = sess.run([ + batch_shape, + event_shape, + expected_sample, actual_sample, + expected_log_prob, actual_log_prob, + ]) + + self.assertAllEqual(new_batch_shape, batch_shape_) + self.assertAllEqual([dims, dims], event_shape_) + self.assertAllClose(expected_sample_, actual_sample_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_log_prob_, actual_log_prob_, + atol=0., rtol=1e-6) + if not self.is_static_shape: + return + self.assertAllEqual(new_batch_shape, reshape_wishart.batch_shape) + self.assertAllEqual([dims, dims], reshape_wishart.event_shape) + self.assertAllEqual(expected_sample_shape, actual_sample.shape) + self.assertAllEqual(expected_log_prob_shape, actual_log_prob.shape) + + def test_matrix_variate_stats(self): + dims = 2 + new_batch_shape = [4] + old_batch_shape = [2, 2] + wishart, reshape_wishart = self.make_wishart( + dims, new_batch_shape, old_batch_shape) + + expected_scalar_stat_shape = new_batch_shape + expected_matrix_stat_shape = new_batch_shape + [dims, dims] + + expected_entropy = array_ops.reshape( + wishart.entropy(), expected_scalar_stat_shape) + actual_entropy = reshape_wishart.entropy() + + expected_mean = array_ops.reshape( + wishart.mean(), expected_matrix_stat_shape) + actual_mean = reshape_wishart.mean() + + expected_mode = array_ops.reshape( + wishart.mode(), expected_matrix_stat_shape) + actual_mode = reshape_wishart.mode() + + expected_stddev = array_ops.reshape( + wishart.stddev(), expected_matrix_stat_shape) + actual_stddev = reshape_wishart.stddev() + + expected_variance = array_ops.reshape( + wishart.variance(), expected_matrix_stat_shape) + actual_variance = reshape_wishart.variance() + + with self.test_session() as sess: + [ + expected_entropy_, actual_entropy_, + expected_mean_, actual_mean_, + expected_mode_, actual_mode_, + expected_stddev_, actual_stddev_, + expected_variance_, actual_variance_, + ] = sess.run([ + expected_entropy, actual_entropy, + expected_mean, actual_mean, + expected_mode, actual_mode, + expected_stddev, actual_stddev, + expected_variance, actual_variance, + ]) + + self.assertAllClose(expected_entropy_, actual_entropy_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_mean_, actual_mean_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_mode_, actual_mode_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_stddev_, actual_stddev_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_variance_, actual_variance_, + atol=0., rtol=1e-6) + if not self.is_static_shape: + return + self.assertAllEqual(expected_scalar_stat_shape, actual_entropy.shape) + self.assertAllEqual(expected_matrix_stat_shape, actual_mean.shape) + self.assertAllEqual(expected_matrix_stat_shape, actual_mode.shape) + self.assertAllEqual(expected_matrix_stat_shape, actual_stddev.shape) + self.assertAllEqual(expected_matrix_stat_shape, actual_variance.shape) + + def make_normal(self, new_batch_shape, old_batch_shape): + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + + scale = self.dtype(0.5 + np.arange( + np.prod(old_batch_shape)).reshape(old_batch_shape)) + scale_ph = array_ops.placeholder_with_default( + scale, shape=scale.shape if self.is_static_shape else None) + normal = normal_lib.Normal(loc=self.dtype(0), scale=scale_ph) + reshape_normal = batch_reshape_lib.BatchReshape( + distribution=normal, + batch_shape=new_batch_shape_ph, + validate_args=True) + return normal, reshape_normal + + def test_scalar_variate_sample_and_log_prob(self): + new_batch_shape = [2, 2] + old_batch_shape = [4] + + normal, reshape_normal = self.make_normal( + new_batch_shape, old_batch_shape) + + batch_shape = reshape_normal.batch_shape_tensor() + event_shape = reshape_normal.event_shape_tensor() + + expected_sample_shape = new_batch_shape + x = normal.sample(seed=52) + expected_sample = array_ops.reshape(x, expected_sample_shape) + actual_sample = reshape_normal.sample(seed=52) + + expected_log_prob_shape = new_batch_shape + expected_log_prob = array_ops.reshape( + normal.log_prob(x), expected_log_prob_shape) + actual_log_prob = reshape_normal.log_prob(expected_sample) + + with self.test_session() as sess: + [ + batch_shape_, + event_shape_, + expected_sample_, actual_sample_, + expected_log_prob_, actual_log_prob_, + ] = sess.run([ + batch_shape, + event_shape, + expected_sample, actual_sample, + expected_log_prob, actual_log_prob, + ]) + self.assertAllEqual(new_batch_shape, batch_shape_) + self.assertAllEqual([], event_shape_) + self.assertAllClose(expected_sample_, actual_sample_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_log_prob_, actual_log_prob_, + atol=0., rtol=1e-6) + if not self.is_static_shape: + return + self.assertAllEqual(new_batch_shape, reshape_normal.batch_shape) + self.assertAllEqual([], reshape_normal.event_shape) + self.assertAllEqual(expected_sample_shape, actual_sample.shape) + self.assertAllEqual(expected_log_prob_shape, actual_log_prob.shape) + + def test_scalar_variate_stats(self): + new_batch_shape = [2, 2] + old_batch_shape = [4] + + normal, reshape_normal = self.make_normal(new_batch_shape, old_batch_shape) + + expected_scalar_stat_shape = new_batch_shape + + expected_entropy = array_ops.reshape( + normal.entropy(), expected_scalar_stat_shape) + actual_entropy = reshape_normal.entropy() + + expected_mean = array_ops.reshape( + normal.mean(), expected_scalar_stat_shape) + actual_mean = reshape_normal.mean() + + expected_mode = array_ops.reshape( + normal.mode(), expected_scalar_stat_shape) + actual_mode = reshape_normal.mode() + + expected_stddev = array_ops.reshape( + normal.stddev(), expected_scalar_stat_shape) + actual_stddev = reshape_normal.stddev() + + expected_variance = array_ops.reshape( + normal.variance(), expected_scalar_stat_shape) + actual_variance = reshape_normal.variance() + + with self.test_session() as sess: + [ + expected_entropy_, actual_entropy_, + expected_mean_, actual_mean_, + expected_mode_, actual_mode_, + expected_stddev_, actual_stddev_, + expected_variance_, actual_variance_, + ] = sess.run([ + expected_entropy, actual_entropy, + expected_mean, actual_mean, + expected_mode, actual_mode, + expected_stddev, actual_stddev, + expected_variance, actual_variance, + ]) + self.assertAllClose(expected_entropy_, actual_entropy_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_mean_, actual_mean_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_mode_, actual_mode_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_stddev_, actual_stddev_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_variance_, actual_variance_, + atol=0., rtol=1e-6) + if not self.is_static_shape: + return + self.assertAllEqual(expected_scalar_stat_shape, actual_entropy.shape) + self.assertAllEqual(expected_scalar_stat_shape, actual_mean.shape) + self.assertAllEqual(expected_scalar_stat_shape, actual_mode.shape) + self.assertAllEqual(expected_scalar_stat_shape, actual_stddev.shape) + self.assertAllEqual(expected_scalar_stat_shape, actual_variance.shape) + + def make_mvn(self, dims, new_batch_shape, old_batch_shape): + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + + scale = np.ones(old_batch_shape + [dims], self.dtype) + scale_ph = array_ops.placeholder_with_default( + scale, shape=scale.shape if self.is_static_shape else None) + mvn = mvn_lib.MultivariateNormalDiag(scale_diag=scale_ph) + reshape_mvn = batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True) + return mvn, reshape_mvn + + def test_vector_variate_sample_and_log_prob(self): + dims = 3 + new_batch_shape = [2, 1] + old_batch_shape = [2] + mvn, reshape_mvn = self.make_mvn( + dims, new_batch_shape, old_batch_shape) + + batch_shape = reshape_mvn.batch_shape_tensor() + event_shape = reshape_mvn.event_shape_tensor() + + expected_sample_shape = [3] + new_batch_shape + [dims] + x = mvn.sample(3, seed=62) + expected_sample = array_ops.reshape(x, expected_sample_shape) + actual_sample = reshape_mvn.sample(3, seed=62) + + expected_log_prob_shape = [3] + new_batch_shape + expected_log_prob = array_ops.reshape( + mvn.log_prob(x), expected_log_prob_shape) + actual_log_prob = reshape_mvn.log_prob(expected_sample) + + with self.test_session() as sess: + [ + batch_shape_, + event_shape_, + expected_sample_, actual_sample_, + expected_log_prob_, actual_log_prob_, + ] = sess.run([ + batch_shape, + event_shape, + expected_sample, actual_sample, + expected_log_prob, actual_log_prob, + ]) + self.assertAllEqual(new_batch_shape, batch_shape_) + self.assertAllEqual([dims], event_shape_) + self.assertAllClose(expected_sample_, actual_sample_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_log_prob_, actual_log_prob_, + atol=0., rtol=1e-6) + if not self.is_static_shape: + return + self.assertAllEqual(new_batch_shape, reshape_mvn.batch_shape) + self.assertAllEqual([dims], reshape_mvn.event_shape) + self.assertAllEqual(expected_sample_shape, actual_sample.shape) + self.assertAllEqual(expected_log_prob_shape, actual_log_prob.shape) + + def test_vector_variate_stats(self): + dims = 3 + new_batch_shape = [2, 1] + old_batch_shape = [2] + mvn, reshape_mvn = self.make_mvn( + dims, new_batch_shape, old_batch_shape) + + expected_scalar_stat_shape = new_batch_shape + + expected_entropy = array_ops.reshape( + mvn.entropy(), expected_scalar_stat_shape) + actual_entropy = reshape_mvn.entropy() + + expected_vector_stat_shape = new_batch_shape + [dims] + + expected_mean = array_ops.reshape( + mvn.mean(), expected_vector_stat_shape) + actual_mean = reshape_mvn.mean() + + expected_mode = array_ops.reshape( + mvn.mode(), expected_vector_stat_shape) + actual_mode = reshape_mvn.mode() + + expected_stddev = array_ops.reshape( + mvn.stddev(), expected_vector_stat_shape) + actual_stddev = reshape_mvn.stddev() + + expected_variance = array_ops.reshape( + mvn.variance(), expected_vector_stat_shape) + actual_variance = reshape_mvn.variance() + + expected_matrix_stat_shape = new_batch_shape + [dims, dims] + + expected_covariance = array_ops.reshape( + mvn.covariance(), expected_matrix_stat_shape) + actual_covariance = reshape_mvn.covariance() + + with self.test_session() as sess: + [ + expected_entropy_, actual_entropy_, + expected_mean_, actual_mean_, + expected_mode_, actual_mode_, + expected_stddev_, actual_stddev_, + expected_variance_, actual_variance_, + expected_covariance_, actual_covariance_, + ] = sess.run([ + expected_entropy, actual_entropy, + expected_mean, actual_mean, + expected_mode, actual_mode, + expected_stddev, actual_stddev, + expected_variance, actual_variance, + expected_covariance, actual_covariance, + ]) + self.assertAllClose(expected_entropy_, actual_entropy_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_mean_, actual_mean_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_mode_, actual_mode_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_stddev_, actual_stddev_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_variance_, actual_variance_, + atol=0., rtol=1e-6) + self.assertAllClose(expected_covariance_, actual_covariance_, + atol=0., rtol=1e-6) + if not self.is_static_shape: + return + self.assertAllEqual(expected_scalar_stat_shape, actual_entropy.shape) + self.assertAllEqual(expected_vector_stat_shape, actual_mean.shape) + self.assertAllEqual(expected_vector_stat_shape, actual_mode.shape) + self.assertAllEqual(expected_vector_stat_shape, actual_stddev.shape) + self.assertAllEqual(expected_vector_stat_shape, actual_variance.shape) + self.assertAllEqual(expected_matrix_stat_shape, actual_covariance.shape) + + def test_bad_reshape_size(self): + dims = 2 + new_batch_shape = [2, 3] + old_batch_shape = [2] # 2 != 2*3 + + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + + scale = np.ones(old_batch_shape + [dims], self.dtype) + scale_ph = array_ops.placeholder_with_default( + scale, shape=scale.shape if self.is_static_shape else None) + mvn = mvn_lib.MultivariateNormalDiag(scale_diag=scale_ph) + + if self.is_static_shape: + with self.assertRaisesRegexp( + ValueError, (r"`batch_shape` size \(6\) must match " + r"`distribution\.batch_shape` size \(2\)")): + batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True) + + else: + with self.test_session(): + with self.assertRaisesOpError(r"`batch_shape` size must match " + r"`distributions.batch_shape` size"): + batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True).sample().eval() + + def test_non_positive_shape(self): + dims = 2 + new_batch_shape = [-1, -2] # -1*-2=2 so will pass size check. + old_batch_shape = [2] + + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + + scale = np.ones(old_batch_shape + [dims], self.dtype) + scale_ph = array_ops.placeholder_with_default( + scale, shape=scale.shape if self.is_static_shape else None) + mvn = mvn_lib.MultivariateNormalDiag(scale_diag=scale_ph) + + if self.is_static_shape: + with self.assertRaisesRegexp(ValueError, r".*must be positive.*"): + batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True) + + else: + with self.test_session(): + with self.assertRaisesOpError(r".*must be positive.*"): + batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True).sample().eval() + + def test_non_vector_shape(self): + dims = 2 + new_batch_shape = 2 + old_batch_shape = [2] + + new_batch_shape_ph = ( + constant_op.constant(np.int32(new_batch_shape)) if self.is_static_shape + else array_ops.placeholder_with_default( + np.int32(new_batch_shape), shape=None)) + + scale = np.ones(old_batch_shape + [dims], self.dtype) + scale_ph = array_ops.placeholder_with_default( + scale, shape=scale.shape if self.is_static_shape else None) + mvn = mvn_lib.MultivariateNormalDiag(scale_diag=scale_ph) + + if self.is_static_shape: + with self.assertRaisesRegexp(ValueError, r".*must be a vector.*"): + batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True) + + else: + with self.test_session(): + with self.assertRaisesOpError(r".*must be a vector.*"): + batch_reshape_lib.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape_ph, + validate_args=True).sample().eval() + + +class BatchReshapeStaticTest(_BatchReshapeTest, test.TestCase): + + dtype = np.float32 + is_static_shape = True + + +class BatchReshapeDynamicTest(_BatchReshapeTest, test.TestCase): + + dtype = np.float64 + is_static_shape = False + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/batch_reshape.py b/tensorflow/contrib/distributions/python/ops/batch_reshape.py new file mode 100644 index 0000000000..c7ee9b2117 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/batch_reshape.py @@ -0,0 +1,333 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The BatchReshape distribution.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import distribution as distribution_lib + + +__all__ = [ + "BatchReshape", +] + + +class BatchReshape(distribution_lib.Distribution): + """The Batch-Reshaping distribution. + + This "meta-distribution" reshapes the batch dimensions of another + distribution. + + Note: Unlike `tf.reshape`, the `BatchReshape` distribution does not support + `-1` for flattening. + + #### Examples + + ```python + tfd = tf.contrib.distributions + + dtype = np.float32 + dims = 2 + new_batch_shape = [1, 2, 3] + old_batch_shape = [6] + + scale = np.ones(old_batch_shape + [dims], dtype) + mvn = tfd.MultivariateNormalDiag(scale_diag=scale) + reshape_mvn = tfd.BatchReshape( + distribution=mvn, + batch_shape=new_batch_shape, + validate_args=True) + + reshape_mvn.batch_shape + # ==> [1, 2, 3] + + x = reshape_mvn.sample(sample_shape=[4, 5]) + x.shape + # ==> [4, 5, 1, 2, 3, 2] == sample_shape + new_batch_shape + [dims] + + reshape_mvn.log_prob(x).shape + # ==> [4, 5, 1, 2, 3] == sample_shape + new_batch_shape + ``` + + """ + + def __init__(self, + distribution, + batch_shape, + validate_args=False, + allow_nan_stats=True, + name=None): + """Construct BatchReshape distribution. + + Args: + distribution: The base distribution instance to reshape. Typically an + instance of `Distribution`. + batch_shape: Positive `int`-like vector-shaped `Tensor` representing the + new shape of the batch dimensions. + validate_args: Python `bool`, default `False`. When `True` distribution + parameters are checked for validity despite possibly degrading runtime + performance. When `False` invalid inputs may silently render incorrect + outputs. + allow_nan_stats: Python `bool`, default `True`. When `True`, statistics + (e.g., mean, mode, variance) use the value "`NaN`" to indicate the + result is undefined. When `False`, an exception is raised if one or + more of the statistic's batch members are undefined. + name: The name to give Ops created by the initializer. + Default value: `"BatchReshape" + distribution.name`. + + Raises: + ValueError: if `batch_shape` is not a vector. + ValueError: if `batch_shape` has non-positive elements. + ValueError: if `batch_shape` size is not the same as a + `distribution.batch_shape` size. + """ + parameters = locals() + name = name or "BatchReshape" + distribution.name + self._distribution = distribution + with ops.name_scope(name, values=[batch_shape]) as name: + self._batch_shape_ = ops.convert_to_tensor( + batch_shape, + dtype=dtypes.int32, + name="batch_shape") + self._batch_shape_static = tensor_util.constant_value(self._batch_shape_) + if self._batch_shape_static is not None: + self._batch_shape_static = np.int32(self._batch_shape_static) + self._runtime_assertions = make_runtime_assertions( + self._distribution, + self._batch_shape_, + validate_args, + self._batch_shape_static) + super(BatchReshape, self).__init__( + dtype=self._distribution.dtype, + reparameterization_type=self._distribution.reparameterization_type, + validate_args=validate_args, + allow_nan_stats=allow_nan_stats, + parameters=parameters, + graph_parents=( + [self._batch_shape_] + + self._distribution._graph_parents), # pylint: disable=protected-access + name=name) + + @property + def distribution(self): + return self._distribution + + def _batch_shape_tensor(self): + with ops.control_dependencies(self._runtime_assertions): + return array_ops.identity(self._batch_shape_) + + def _batch_shape(self): + return tensor_shape.TensorShape(self._batch_shape_static) + + def _event_shape_tensor(self): + with ops.control_dependencies(self._runtime_assertions): + return array_ops.identity(self.distribution.event_shape_tensor()) + + def _event_shape(self): + return self.distribution.event_shape + + def _sample_n(self, n, seed=None): + with ops.control_dependencies(self._runtime_assertions): + x = self.distribution.sample(sample_shape=n, seed=seed) + new_shape = array_ops.concat([ + [n], + self.batch_shape_tensor(), + self.event_shape_tensor(), + ], axis=0) + return array_ops.reshape(x, new_shape) + + def _log_prob(self, x): + return self._call_reshape_input_output( + self.distribution.log_prob, x) + + def _prob(self, x): + return self._call_reshape_input_output( + self.distribution.prob, x) + + def _log_cdf(self, x): + return self._call_reshape_input_output( + self.distribution.log_cdf, x) + + def _cdf(self, x): + return self._call_reshape_input_output( + self.distribution.cdf, x) + + def _log_survival_function(self, x): + return self._call_reshape_input_output( + self.distribution.log_survival_function, x) + + def _survival_function(self, x): + return self._call_reshape_input_output( + self.distribution.survival_function, x) + + def _entropy(self): + return self._call_and_reshape_output( + self.distribution.entropy, + [], + [tensor_shape.scalar()]) + + def _mean(self): + return self._call_and_reshape_output(self.distribution.mean) + + def _mode(self): + return self._call_and_reshape_output(self.distribution.mode) + + def _stddev(self): + return self._call_and_reshape_output(self.distribution.stddev) + + def _variance(self): + return self._call_and_reshape_output(self.distribution.variance) + + def _covariance(self): + return self._call_and_reshape_output( + self.distribution.covariance, + [self.event_shape_tensor()]*2, + [self.event_shape]*2) + + def _sample_shape(self, x): + """Computes graph and static `sample_shape`.""" + x_ndims = (array_ops.rank(x) if x.shape.ndims is None else x.shape.ndims) + event_ndims = (array_ops.size(self.event_shape_tensor()) + if self.event_shape.ndims is None + else self.event_shape.ndims) + batch_ndims = (array_ops.size(self.batch_shape_tensor()) + if self.batch_shape.ndims is None + else self.batch_shape.ndims) + sample_ndims = x_ndims - batch_ndims - event_ndims + if isinstance(sample_ndims, int): + static_sample_shape = x.shape[:sample_ndims] + else: + static_sample_shape = tensor_shape.TensorShape(None) + if static_sample_shape.is_fully_defined(): + sample_shape = np.int32(static_sample_shape.as_list()) + else: + sample_shape = array_ops.shape(x)[:sample_ndims] + return sample_shape, static_sample_shape + + def _call_reshape_input_output(self, fn, x): + """Calls `fn`, appropriately reshaping its input `x` and output.""" + with ops.control_dependencies(self._runtime_assertions): + sample_shape, static_sample_shape = self._sample_shape(x) + old_shape = array_ops.concat([ + sample_shape, + self.distribution.batch_shape_tensor(), + self.event_shape_tensor(), + ], axis=0) + result = fn(array_ops.reshape(x, old_shape)) + new_shape = array_ops.concat([ + sample_shape, + self.batch_shape_tensor(), + ], axis=0) + result = array_ops.reshape(result, new_shape) + if (static_sample_shape.ndims is not None and + self.batch_shape.ndims is not None): + new_shape = static_sample_shape.concatenate(self.batch_shape) + result.set_shape(result.shape.merge_with(new_shape)) + return result + + def _call_and_reshape_output( + self, + fn, + event_shape_list=None, + static_event_shape_list=None): + """Calls `fn` and appropriately reshapes its output.""" + with ops.control_dependencies(self._runtime_assertions): + if event_shape_list is None: + event_shape_list = [self._event_shape_tensor()] + if static_event_shape_list is None: + static_event_shape_list = [self.event_shape] + new_shape = array_ops.concat( + [self.batch_shape_tensor()] + event_shape_list, + axis=0) + result = array_ops.reshape(fn(), new_shape) + if (self.batch_shape.ndims is not None and + self.event_shape.ndims is not None): + event_shape = tensor_shape.TensorShape([]) + for rss in static_event_shape_list: + event_shape = event_shape.concatenate(rss) + static_shape = result.shape.merge_with( + self.batch_shape.concatenate(event_shape)) + result.set_shape(static_shape) + return result + + +def make_runtime_assertions( + distribution, + batch_shape, + validate_args, + batch_shape_static): + """Helper to __init__ which makes or raises assertions.""" + runtime_assertions = [] + + if batch_shape.shape.ndims is not None: + if batch_shape.shape.ndims != 1: + raise ValueError("`batch_shape` must be a vector " + "(saw rank: {}).".format( + batch_shape.shape.ndims)) + elif validate_args: + runtime_assertions += [ + check_ops.assert_rank( + batch_shape, + 1, + message="`batch_shape` must be a vector.", + name="assert_batch_shape_is_vector"), + ] + + batch_size_static = np.prod(batch_shape_static) + dist_batch_size_static = ( + None if not distribution.batch_shape.is_fully_defined() + else np.prod(distribution.batch_shape).value) + + if batch_size_static is not None and dist_batch_size_static is not None: + if batch_size_static != dist_batch_size_static: + raise ValueError("`batch_shape` size ({}) must match " + "`distribution.batch_shape` size ({}).".format( + batch_size_static, + dist_batch_size_static)) + elif validate_args: + runtime_assertions += [ + check_ops.assert_equal( + math_ops.reduce_prod(batch_shape), + math_ops.reduce_prod(distribution.batch_shape_tensor()), + message=("`batch_shape` size must match " + "`distributions.batch_shape` size."), + name="assert_batch_size"), + ] + + if batch_shape_static is not None: + if np.any(batch_shape_static < 1): + raise ValueError("`batch_shape` elements must be positive " + "(i.e., larger than zero).") + elif validate_args: + runtime_assertions += [ + check_ops.assert_positive( + batch_shape, + message=("`batch_shape` elements must be positive " + "(i.e., larger than zero)."), + name="assert_batch_shape_positive") + ] + + return runtime_assertions -- GitLab From 86868a156860877fc6e8c3393baf4942b6b7dbd4 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Thu, 29 Mar 2018 07:59:46 -0700 Subject: [PATCH 670/960] Disable the toco binary in pip feature until it can used shared libs (#18061) * Disable the toco binary in pip feature until it can used shared libraries. The binary size was doubled by the saved model change. Since to process saved models most of the TensorFlow runtime is needed. A workaround to this is in the works and should be submitted in the next couple weeks. * Fix linter errors with unused tensorflow libs * Mollify the linter by removing os. --- tensorflow/contrib/lite/toco/python/BUILD | 3 --- tensorflow/contrib/lite/toco/python/toco_wrapper.py | 13 +++++++++---- tensorflow/tools/pip_package/build_pip_package.sh | 4 +++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/toco/python/BUILD b/tensorflow/contrib/lite/toco/python/BUILD index 17115047d2..86d91bd3be 100644 --- a/tensorflow/contrib/lite/toco/python/BUILD +++ b/tensorflow/contrib/lite/toco/python/BUILD @@ -45,9 +45,6 @@ py_binary( name = "toco_wrapper", srcs = ["toco_wrapper.py"], srcs_version = "PY2AND3", - deps = [ - "//tensorflow:tensorflow_py", - ], ) tf_py_test( diff --git a/tensorflow/contrib/lite/toco/python/toco_wrapper.py b/tensorflow/contrib/lite/toco/python/toco_wrapper.py index e39b5f22c7..6d6b500d7e 100644 --- a/tensorflow/contrib/lite/toco/python/toco_wrapper.py +++ b/tensorflow/contrib/lite/toco/python/toco_wrapper.py @@ -22,14 +22,19 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import sys -import tensorflow as tf def main(): # Pip installs the binary in aux-bin off of main site-package install. # Just find it and exec, passing all arguments in the process. # TODO(aselle): it is unfortunate to use all of tensorflow to lookup binary. - binary = os.path.join(tf.__path__[0], 'aux-bin/toco') - os.execvp(binary, sys.argv) + print("""TOCO from pip install is currently not working on command line. +Please use the python TOCO API or use +bazel run tensorflow/contrib/lite:toco -- from a TensorFlow source dir. +""") + sys.exit(1) + # TODO(aselle): Replace this when we find a way to run toco without + # blowing up executable size. + # binary = os.path.join(tf.__path__[0], 'aux-bin/toco') + # os.execvp(binary, sys.argv) diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh index dc31e4c5f7..feb3114bde 100755 --- a/tensorflow/tools/pip_package/build_pip_package.sh +++ b/tensorflow/tools/pip_package/build_pip_package.sh @@ -139,7 +139,9 @@ function main() { fi mkdir "${TMPDIR}/tensorflow/aux-bin" # Install toco as a binary in aux-bin. - cp bazel-bin/tensorflow/contrib/lite/toco/toco ${TMPDIR}/tensorflow/aux-bin/ + # TODO(aselle): Re-enable this when we find a way to do it without doubling + # the whl size (over the limit). + # cp bazel-bin/tensorflow/contrib/lite/toco/toco ${TMPDIR}/tensorflow/aux-bin/ fi # protobuf pip package doesn't ship with header files. Copy the headers -- GitLab From 608fb59318ca0a1f2a05fae4d23b06cf6e162300 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Thu, 29 Mar 2018 08:19:17 -0700 Subject: [PATCH 671/960] [tf.data] Optimizations on make_csv_dataset internals. PiperOrigin-RevId: 190933143 --- tensorflow/contrib/data/python/ops/readers.py | 61 ++++++++++++++----- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 95edca6cdd..9a48aa02fb 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -18,9 +18,11 @@ from __future__ import division from __future__ import print_function import csv +from math import ceil import numpy as np +from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import shuffle_ops from tensorflow.python.data.ops import dataset_ops @@ -176,6 +178,9 @@ def make_csv_dataset( shuffle_buffer_size=10000, shuffle_seed=None, prefetch_buffer_size=1, + num_parallel_reads=1, + num_parallel_parser_calls=2, + sloppy=False, default_float_type=dtypes.float32, num_rows_for_inference=100, ): @@ -231,6 +236,15 @@ def make_csv_dataset( prefetch_buffer_size: An int specifying the number of feature batches to prefetch for performance improvement. Recommended value is the number of batches consumed per training step. + num_parallel_reads: Number of threads used to read CSV records from files. + If >1, the results will be interleaved. + num_parallel_parser_calls: Number of parallel invocations of the CSV parsing + function on CSV records. + sloppy: If `True`, reading performance will be improved at + the cost of non-deterministic ordering. If `False`, the order of elements + produced is deterministic prior to shuffling (elements are still + randomized if `shuffle=True`. Note that if the seed is set, then order + of elements after shuffling is deterministic). Defaults to `False`. default_float_type: Either `tf.float32` or `tf.float64`. If defaults are not provided, float-like strings are interpreted to be this type. num_rows_for_inference: Number of rows of a file to use for type inference @@ -247,11 +261,16 @@ def make_csv_dataset( Raises: ValueError: If any of the arguments is malformed. """ - filenames = _get_file_names(file_pattern, shuffle) + # Create dataset of all matching filenames + filenames = _get_file_names(file_pattern, False) + dataset = dataset_ops.Dataset.from_tensor_slices(filenames) + if shuffle: + dataset = dataset.shuffle(len(filenames), shuffle_seed) + + # Clean arguments; figure out column names and defaults if comment is not None and len(comment) != 1: raise ValueError("`comment` arg must be a single-character string or None") - # Clean arguments; figure out column names and defaults if column_names is None: if not header: raise ValueError("Cannot infer column names without a header line.") @@ -272,7 +291,6 @@ def make_csv_dataset( filenames, len(column_names), field_delim, use_quote_delim, na_value, header, comment, default_float_type, num_rows_for_inference) - dataset = dataset_ops.Dataset.from_tensor_slices(filenames) if label_name is not None and label_name not in column_names: raise ValueError("`label_name` provided must be one of the columns.") @@ -311,16 +329,31 @@ def make_csv_dataset( return features, label return features - # TODO(rachelim): interleave records from files for better shuffling - dataset = dataset.flat_map(filename_to_dataset) - # TODO(rachelim): use fused shuffle_and_repeat for perf - if shuffle: + # Read files sequentially or in parallel + dataset = dataset.apply( + interleave_ops.parallel_interleave( + filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy)) + + if num_epochs != 1 and shuffle: + # Use shuffle_and_repeat for perf + dataset = dataset.apply( + shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs, + shuffle_seed)) + elif shuffle: dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed) - if num_epochs != 1: + elif num_epochs != 1: dataset = dataset.repeat(num_epochs) - dataset = dataset.batch(batch_size) - dataset = dataset.map(decode_csv) + # Use map_and_batch for perf + # TODO(b/76425672): use num_parallel_calls for better performance tuning when + # that is added + dataset = dataset.apply( + batching.map_and_batch( + map_func=decode_csv, + batch_size=batch_size, + num_parallel_batches=int( + ceil(num_parallel_parser_calls / batch_size)))) + dataset = dataset.prefetch(prefetch_buffer_size) return dataset @@ -416,12 +449,10 @@ def make_batched_features_dataset(file_pattern, `Tensor` or `SparseTensor` objects. """ # Create dataset of all matching filenames + filenames = _get_file_names(file_pattern, False) + dataset = dataset_ops.Dataset.from_tensor_slices(filenames) if shuffle: - dataset = dataset_ops.Dataset.list_files(file_pattern, shuffle=True) - else: - # TODO(b/73959787): Use Dataset.list_files() once ordering is deterministic. - filenames = _get_file_names(file_pattern, shuffle) - dataset = dataset_ops.Dataset.from_tensor_slices(filenames) + dataset = dataset.shuffle(len(filenames), shuffle_seed) # Read `Example` records from files as tensor objects. if reader_args is None: -- GitLab From a98351b9c6c691b6873ef5a5c3e8e48bf42bd14c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 09:41:53 -0700 Subject: [PATCH 672/960] Upgrade Eigen version. PiperOrigin-RevId: 190942370 --- tensorflow/core/kernels/cwise_ops.h | 21 --------------------- tensorflow/workspace.bzl | 8 ++++---- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 06918075a4..a80905d145 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -27,27 +27,6 @@ limitations under the License. #include "tensorflow/core/kernels/bounds_check.h" namespace Eigen { -namespace numext { -#if GOOGLE_CUDA -template <> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp( - const std::complex& x) { - auto com = ::expf(x.real()); - auto res_real = com * ::cosf(x.imag()); - auto res_imag = com * ::sinf(x.imag()); - return std::complex(res_real, res_imag); -} -template <> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp( - const std::complex& x) { - auto com = ::exp(x.real()); - auto res_real = com * ::cos(x.imag()); - auto res_imag = com * ::sin(x.imag()); - return std::complex(res_real, res_imag); -} -#endif -} // namespace numext - namespace internal { template diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 9fcbfb664b..0e31358236 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -74,11 +74,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "eigen_archive", urls = [ - "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/2355b229ea4c.tar.gz", - "https://bitbucket.org/eigen/eigen/get/2355b229ea4c.tar.gz", + "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/6913f0cf7d06.tar.gz", + "https://bitbucket.org/eigen/eigen/get/6913f0cf7d06.tar.gz", ], - sha256 = "0cadb31a35b514bf2dfd6b5d38205da94ef326ec6908fc3fd7c269948467214f", - strip_prefix = "eigen-eigen-2355b229ea4c", + sha256 = "791b836cacd03e20bae5bdd25f1c4a5505a0a9975ba94a61eb4e2631fbd1d53a", + strip_prefix = "eigen-eigen-6913f0cf7d06", build_file = str(Label("//third_party:eigen.BUILD")), patch_file = str(Label("//third_party:eigen_fix_cuda_compilation.patch")) ) -- GitLab From ae3d20f9aef78554f0d0f5eec13982e9802a45d2 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 29 Mar 2018 09:42:05 -0700 Subject: [PATCH 673/960] Add bitcast for equal bitwidth casts. Map bitcasts to XLA bitcast HLO if the bitwidth of the elementtype is the same. PiperOrigin-RevId: 190942398 --- tensorflow/compiler/tests/unary_ops_test.py | 14 ++++++ tensorflow/compiler/tf2xla/kernels/cast_op.cc | 45 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 3d3e112f48..a8ab235378 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -600,6 +600,20 @@ class UnaryOpsTest(XLATestCase): src, expected=dst) + def testBitcast(self): + self._assertOpOutputMatchesExpected( + lambda x: array_ops.bitcast(x, dtypes.int32), + np.array([1, 0x3f800000], np.int32), + expected=np.array([1, 0x3f800000], np.int32)) + self._assertOpOutputMatchesExpected( + lambda x: array_ops.bitcast(x, dtypes.float32), + np.array([1, 0x3f800000], np.int32), + expected=np.array([1e-45, 1.0], np.float32)) + self._assertOpOutputMatchesExpected( + lambda x: array_ops.bitcast(x, dtypes.int32), + np.array([1e-45, 1.0], np.float32), + expected=np.array([1, 0x3f800000], np.int32)) + def testInvertPermutation(self): self._assertOpOutputMatchesExpected( array_ops.invert_permutation, diff --git a/tensorflow/compiler/tf2xla/kernels/cast_op.cc b/tensorflow/compiler/tf2xla/kernels/cast_op.cc index 43a6a747c6..c52b2dcb7e 100644 --- a/tensorflow/compiler/tf2xla/kernels/cast_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/cast_op.cc @@ -62,5 +62,50 @@ class CastOp : public XlaOpKernel { REGISTER_XLA_OP(Name("Cast"), CastOp); +class BitcastOp : public XlaOpKernel { + public: + explicit BitcastOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &src_dtype_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("type", &dst_dtype_)); + OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(src_dtype_, &src_type_)); + OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(dst_dtype_, &dst_type_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationBuilder* builder = ctx->builder(); + xla::ComputationDataHandle input = ctx->Input(0); + xla::ComputationDataHandle output; + + if (src_dtype_ == dst_dtype_) { + output = input; + } else { + // The only complex type in XLA is C64, so error out if the bitcast has a + // complex source or destination type and the bitcast is not trivial. + OP_REQUIRES(ctx, + !xla::primitive_util::IsComplexType(src_type_) && + !xla::primitive_util::IsComplexType(dst_type_), + errors::Unimplemented("Complex types not supported.")); + // XLA bitcast requires that the bit-width of the source and destination + // matches, and currently only the simple lowering is performed. + OP_REQUIRES(ctx, + xla::primitive_util::BitWidth(src_type_) == + xla::primitive_util::BitWidth(dst_type_), + errors::Unimplemented( + "Only bitcasts between equally sized types supported.")); + output = builder->BitcastConvertType(input, dst_type_); + } + + ctx->SetOutput(0, output); + } + + protected: + DataType src_dtype_, dst_dtype_; + xla::PrimitiveType src_type_, dst_type_; + + TF_DISALLOW_COPY_AND_ASSIGN(BitcastOp); +}; + +REGISTER_XLA_OP(Name("Bitcast"), BitcastOp); + } // anonymous namespace } // namespace tensorflow -- GitLab From a2b6c3c124664d682094a1ecfa9cc00cca8ada85 Mon Sep 17 00:00:00 2001 From: Younghee Kwon Date: Thu, 29 Mar 2018 09:43:19 -0700 Subject: [PATCH 674/960] Added kernels and estimators for Gradient Boosting Trees algorithm. BoostedTreesClassifier and BoostedTreesRegressor are added to tf.estimator. Also some training utility functions are added to tf.contrib.estimator. PiperOrigin-RevId: 190942599 --- tensorflow/contrib/cmake/python_modules.txt | 1 + tensorflow/contrib/cmake/python_protos.txt | 1 + tensorflow/contrib/cmake/tf_core_ops.cmake | 7 +- tensorflow/contrib/cmake/tf_python.cmake | 1 + tensorflow/contrib/estimator/BUILD | 31 + tensorflow/contrib/estimator/__init__.py | 3 + .../python/estimator/boosted_trees.py | 323 ++++ .../python/estimator/boosted_trees_test.py | 207 +++ tensorflow/contrib/makefile/tf_op_files.txt | 6 + .../contrib/makefile/tf_proto_files.txt | 1 + tensorflow/core/BUILD | 3 + ...tedTreesCalculateBestGainsPerFeature.pbtxt | 87 + .../api_def_BoostedTreesCreateEnsemble.pbtxt | 23 + ..._def_BoostedTreesDeserializeEnsemble.pbtxt | 26 + ...BoostedTreesEnsembleResourceHandleOp.pbtxt | 5 + ...pi_def_BoostedTreesGetEnsembleStates.pbtxt | 35 + ...api_def_BoostedTreesMakeStatsSummary.pbtxt | 56 + .../api_def_BoostedTreesPredict.pbtxt | 41 + ...pi_def_BoostedTreesSerializeEnsemble.pbtxt | 23 + .../api_def_BoostedTreesTrainingPredict.pbtxt | 69 + .../api_def_BoostedTreesUpdateEnsemble.pbtxt | 82 + ...ef_IsBoostedTreesEnsembleInitialized.pbtxt | 17 + tensorflow/core/kernels/BUILD | 7 + tensorflow/core/kernels/boosted_trees/BUILD | 89 + .../kernels/boosted_trees/boosted_trees.proto | 113 ++ .../kernels/boosted_trees/prediction_ops.cc | 263 +++ .../kernels/boosted_trees/resource_ops.cc | 189 +++ .../core/kernels/boosted_trees/resources.cc | 301 ++++ .../core/kernels/boosted_trees/resources.h | 221 +++ .../core/kernels/boosted_trees/stats_ops.cc | 296 ++++ .../kernels/boosted_trees/training_ops.cc | 219 +++ tensorflow/core/ops/boosted_trees_ops.cc | 319 ++++ tensorflow/python/BUILD | 22 + tensorflow/python/__init__.py | 2 + tensorflow/python/estimator/BUILD | 48 + .../python/estimator/canned/boosted_trees.py | 736 +++++++++ .../estimator/canned/boosted_trees_test.py | 799 +++++++++ tensorflow/python/estimator/estimator_lib.py | 4 + .../python/kernel_tests/boosted_trees/BUILD | 76 + .../kernel_tests/boosted_trees/__init__.py | 0 .../boosted_trees/prediction_ops_test.py | 926 +++++++++++ .../boosted_trees/resource_ops_test.py | 228 +++ .../boosted_trees/stats_ops_test.py | 289 ++++ .../boosted_trees/training_ops_test.py | 1465 +++++++++++++++++ tensorflow/python/ops/boosted_trees_ops.py | 160 ++ tensorflow/python/training/device_setter.py | 13 +- ....estimator.-boosted-trees-classifier.pbtxt | 54 + ...w.estimator.-boosted-trees-regressor.pbtxt | 54 + .../api/golden/tensorflow.estimator.pbtxt | 8 + 49 files changed, 7939 insertions(+), 10 deletions(-) create mode 100644 tensorflow/contrib/estimator/python/estimator/boosted_trees.py create mode 100644 tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesCreateEnsemble.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesDeserializeEnsemble.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesEnsembleResourceHandleOp.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesGetEnsembleStates.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesMakeStatsSummary.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesPredict.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesSerializeEnsemble.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesTrainingPredict.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesUpdateEnsemble.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_IsBoostedTreesEnsembleInitialized.pbtxt create mode 100644 tensorflow/core/kernels/boosted_trees/BUILD create mode 100644 tensorflow/core/kernels/boosted_trees/boosted_trees.proto create mode 100644 tensorflow/core/kernels/boosted_trees/prediction_ops.cc create mode 100644 tensorflow/core/kernels/boosted_trees/resource_ops.cc create mode 100644 tensorflow/core/kernels/boosted_trees/resources.cc create mode 100644 tensorflow/core/kernels/boosted_trees/resources.h create mode 100644 tensorflow/core/kernels/boosted_trees/stats_ops.cc create mode 100644 tensorflow/core/kernels/boosted_trees/training_ops.cc create mode 100644 tensorflow/core/ops/boosted_trees_ops.cc create mode 100644 tensorflow/python/estimator/canned/boosted_trees.py create mode 100644 tensorflow/python/estimator/canned/boosted_trees_test.py create mode 100644 tensorflow/python/kernel_tests/boosted_trees/BUILD create mode 100644 tensorflow/python/kernel_tests/boosted_trees/__init__.py create mode 100644 tensorflow/python/kernel_tests/boosted_trees/prediction_ops_test.py create mode 100644 tensorflow/python/kernel_tests/boosted_trees/resource_ops_test.py create mode 100644 tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py create mode 100644 tensorflow/python/kernel_tests/boosted_trees/training_ops_test.py create mode 100644 tensorflow/python/ops/boosted_trees_ops.py create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-classifier.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-boosted-trees-regressor.pbtxt diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index 112b690511..cc7d791042 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -79,6 +79,7 @@ tensorflow/python/keras/_impl/keras/preprocessing tensorflow/python/keras/_impl/keras/utils tensorflow/python/keras/_impl/keras/wrappers tensorflow/python/kernel_tests +tensorflow/python/kernel_tests/boosted_trees tensorflow/python/kernel_tests/distributions tensorflow/python/kernel_tests/linalg tensorflow/python/kernel_tests/random diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt index c03c0c80fe..0c80d529af 100644 --- a/tensorflow/contrib/cmake/python_protos.txt +++ b/tensorflow/contrib/cmake/python_protos.txt @@ -1,4 +1,5 @@ tensorflow/core +tensorflow/core/kernels/boosted_trees tensorflow/core/profiler tensorflow/python tensorflow/contrib/boosted_trees/proto diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index d6712aa2b4..092a48bc6b 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -15,8 +15,9 @@ set(tf_op_lib_names "audio_ops" "array_ops" - "batch_ops" + "batch_ops" "bitwise_ops" + "boosted_trees_ops" "candidate_sampling_ops" "checkpoint_ops" "control_flow_ops" @@ -28,7 +29,7 @@ set(tf_op_lib_names "image_ops" "io_ops" "linalg_ops" - "list_ops" + "list_ops" "lookup_ops" "logging_ops" "manip_ops" @@ -48,7 +49,7 @@ set(tf_op_lib_names "state_ops" "stateless_random_ops" "string_ops" - "summary_ops" + "summary_ops" "training_ops" ) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 31e715b654..b776307924 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -319,6 +319,7 @@ GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("array_ops") GENERATE_PYTHON_OP_LIB("batch_ops") GENERATE_PYTHON_OP_LIB("bitwise_ops") +GENERATE_PYTHON_OP_LIB("boosted_trees_ops") GENERATE_PYTHON_OP_LIB("math_ops") GENERATE_PYTHON_OP_LIB("functional_ops") GENERATE_PYTHON_OP_LIB("candidate_sampling_ops") diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index d125e40f6c..2be62c9438 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -14,6 +14,7 @@ py_library( srcs = ["__init__.py"], srcs_version = "PY2AND3", deps = [ + ":boosted_trees", ":dnn", ":dnn_linear_combined", ":extenders", @@ -26,6 +27,36 @@ py_library( ], ) +py_library( + name = "boosted_trees", + srcs = ["python/estimator/boosted_trees.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python/estimator", + "//tensorflow/python/estimator:boosted_trees", + ], +) + +py_test( + name = "boosted_trees_test", + size = "medium", + srcs = ["python/estimator/boosted_trees_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + "notsan", + ], + deps = [ + ":boosted_trees", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:training", + "//tensorflow/python/estimator:numpy_io", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + ], +) + py_library( name = "dnn", srcs = ["python/estimator/dnn.py"], diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index 6b9f9575b6..d2fc2c4bfa 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import +from tensorflow.contrib.estimator.python.estimator.boosted_trees import * from tensorflow.contrib.estimator.python.estimator.dnn import * from tensorflow.contrib.estimator.python.estimator.dnn_linear_combined import * from tensorflow.contrib.estimator.python.estimator.extenders import * @@ -44,6 +45,8 @@ _allowed_symbols = [ 'DNNEstimator', 'DNNLinearCombinedEstimator', 'LinearEstimator', + 'boosted_trees_classifier_train_in_memory', + 'boosted_trees_regressor_train_in_memory', 'call_logit_fn', 'dnn_logit_fn_builder', 'linear_logit_fn_builder', diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py new file mode 100644 index 0000000000..5880164519 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -0,0 +1,323 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Boosted Trees estimators.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees + + +class _BoostedTreesEstimator(estimator.Estimator): + """An Estimator for Tensorflow Boosted Trees models.""" + + def __init__(self, + feature_columns, + n_batches_per_layer, + head, + model_dir=None, + weight_column=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + config=None): + """Initializes a `BoostedTreesEstimator` instance. + + Args: + feature_columns: An iterable containing all the feature columns used by + the model. All items in the set should be instances of classes derived + from `FeatureColumn`. + n_batches_per_layer: the number of batches to collect statistics per + layer. + head: the `Head` instance defined for Estimator. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to downweight or boost examples during training. It + will be multiplied by the loss of the example. If it is a string, it is + used as a key to fetch weight tensor from the `features`. If it is a + `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, + then weight_column.normalizer_fn is applied on it to get weight tensor. + n_trees: number trees to be created. + max_depth: maximum depth of the tree to grow. + learning_rate: shrinkage parameter to be used when a tree added to the + model. + l1_regularization: regularization multiplier applied to the absolute + weights of the tree leafs. + l2_regularization: regularization multiplier applied to the square weights + of the tree leafs. + tree_complexity: regularization factor to penalize trees with more leaves. + config: `RunConfig` object to configure the runtime settings. + """ + # TODO(youngheek): param validations. + + # HParams for the model. + tree_hparams = canned_boosted_trees.TreeHParams( + n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, + tree_complexity) + + def _model_fn(features, labels, mode, config): + return canned_boosted_trees._bt_model_fn( # pylint: disable=protected-access + features, labels, mode, head, feature_columns, tree_hparams, + n_batches_per_layer, config) + + super(_BoostedTreesEstimator, self).__init__( + model_fn=_model_fn, model_dir=model_dir, config=config) + + +def boosted_trees_classifier_train_in_memory( + train_input_fn, + feature_columns, + model_dir=None, + n_classes=canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT, + weight_column=None, + label_vocabulary=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + config=None, + train_hooks=None): + """Trains a boosted tree classifier with in memory dataset. + + Example: + + ```python + bucketized_feature_1 = bucketized_column( + numeric_column('feature_1'), BUCKET_BOUNDARIES_1) + bucketized_feature_2 = bucketized_column( + numeric_column('feature_2'), BUCKET_BOUNDARIES_2) + + def input_fn_train(): + dataset = create-dataset-from-training-data + # Don't use repeat or cache, since it is assumed to be one epoch + # This is either tf.data.Dataset, or a tuple of feature dict and label. + return dataset + + classifier = boosted_trees_classifier_train_in_memory( + train_input_fn, + feature_columns=[bucketized_feature_1, bucketized_feature_2], + n_trees=100, + ... + ) + + def input_fn_eval(): + ... + return dataset + + metrics = classifier.evaluate(input_fn=input_fn_eval, steps=10) + ``` + + Args: + train_input_fn: the input function returns a dataset containing a single + epoch of *unbatched* features and labels. + feature_columns: An iterable containing all the feature columns used by + the model. All items in the set should be instances of classes derived + from `FeatureColumn`. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + n_classes: number of label classes. Default is binary classification. + Multiclass support is not yet implemented. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to downweight or boost examples during training. It + will be multiplied by the loss of the example. If it is a string, it is + used as a key to fetch weight tensor from the `features`. If it is a + `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, + then weight_column.normalizer_fn is applied on it to get weight tensor. + label_vocabulary: A list of strings represents possible label values. If + given, labels must be string type and have any value in + `label_vocabulary`. If it is not given, that means labels are + already encoded as integer or float within [0, 1] for `n_classes=2` and + encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . + Also there will be errors if vocabulary is not provided and labels are + string. + n_trees: number trees to be created. + max_depth: maximum depth of the tree to grow. + learning_rate: shrinkage parameter to be used when a tree added to the + model. + l1_regularization: regularization multiplier applied to the absolute + weights of the tree leafs. + l2_regularization: regularization multiplier applied to the square weights + of the tree leafs. + tree_complexity: regularization factor to penalize trees with more leaves. + config: `RunConfig` object to configure the runtime settings. + train_hooks: a list of Hook instances to be passed to estimator.train(). + + Returns: + a `BoostedTreesClassifier` instance created with the given arguments and + trained with the data loaded up on memory from the input_fn. + + Raises: + ValueError: when wrong arguments are given or unsupported functionalities + are requested. + """ + # pylint: disable=protected-access + # TODO(nponomareva): Support multi-class cases. + if n_classes == canned_boosted_trees._HOLD_FOR_MULTI_CLASS_SUPPORT: + n_classes = 2 + head, closed_form = ( + canned_boosted_trees._create_classification_head_and_closed_form( + n_classes, weight_column, label_vocabulary=label_vocabulary)) + + # HParams for the model. + tree_hparams = canned_boosted_trees.TreeHParams( + n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, + tree_complexity) + + def _model_fn(features, labels, mode, config): + return canned_boosted_trees._bt_model_fn( + features, + labels, + mode, + head, + feature_columns, + tree_hparams, + n_batches_per_layer=1, + config=config, + closed_form_grad_and_hess_fn=closed_form, + train_in_memory=True) + + in_memory_classifier = estimator.Estimator( + model_fn=_model_fn, model_dir=model_dir, config=config) + + in_memory_classifier.train(input_fn=train_input_fn, hooks=train_hooks) + + return in_memory_classifier + # pylint: enable=protected-access + + +def boosted_trees_regressor_train_in_memory( + train_input_fn, + feature_columns, + model_dir=None, + label_dimension=canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT, + weight_column=None, + n_trees=100, + max_depth=6, + learning_rate=0.1, + l1_regularization=0., + l2_regularization=0., + tree_complexity=0., + config=None, + train_hooks=None): + """Trains a boosted tree regressor with in memory dataset. + + Example: + + ```python + bucketized_feature_1 = bucketized_column( + numeric_column('feature_1'), BUCKET_BOUNDARIES_1) + bucketized_feature_2 = bucketized_column( + numeric_column('feature_2'), BUCKET_BOUNDARIES_2) + + def input_fn_train(): + dataset = create-dataset-from-training-data + # Don't use repeat or cache, since it is assumed to be one epoch + # This is either tf.data.Dataset, or a tuple of feature dict and label. + return dataset + + regressor = boosted_trees_regressor_train_in_memory( + train_input_fn, + feature_columns=[bucketized_feature_1, bucketized_feature_2], + n_trees=100, + ... + ) + + def input_fn_eval(): + ... + return dataset + + metrics = regressor.evaluate(input_fn=input_fn_eval, steps=10) + ``` + + Args: + train_input_fn: the input function returns a dataset containing a single + epoch of *unbatched* features and labels. + feature_columns: An iterable containing all the feature columns used by + the model. All items in the set should be instances of classes derived + from `FeatureColumn`. + model_dir: Directory to save model parameters, graph and etc. This can + also be used to load checkpoints from the directory into a estimator + to continue training a previously saved model. + label_dimension: Number of regression targets per example. + Multi-dimensional support is not yet implemented. + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to downweight or boost examples during training. It + will be multiplied by the loss of the example. If it is a string, it is + used as a key to fetch weight tensor from the `features`. If it is a + `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, + then weight_column.normalizer_fn is applied on it to get weight tensor. + n_trees: number trees to be created. + max_depth: maximum depth of the tree to grow. + learning_rate: shrinkage parameter to be used when a tree added to the + model. + l1_regularization: regularization multiplier applied to the absolute + weights of the tree leafs. + l2_regularization: regularization multiplier applied to the square weights + of the tree leafs. + tree_complexity: regularization factor to penalize trees with more leaves. + config: `RunConfig` object to configure the runtime settings. + train_hooks: a list of Hook instances to be passed to estimator.train(). + + Returns: + a `BoostedTreesClassifier` instance created with the given arguments and + trained with the data loaded up on memory from the input_fn. + + Raises: + ValueError: when wrong arguments are given or unsupported functionalities + are requested. + """ + # pylint: disable=protected-access + # TODO(nponomareva): Extend it to multi-dimension cases. + if label_dimension == canned_boosted_trees._HOLD_FOR_MULTI_DIM_SUPPORT: + label_dimension = 1 + head = canned_boosted_trees._create_regression_head(label_dimension, + weight_column) + + # HParams for the model. + tree_hparams = canned_boosted_trees.TreeHParams( + n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, + tree_complexity) + + def _model_fn(features, labels, mode, config): + return canned_boosted_trees._bt_model_fn( + features, + labels, + mode, + head, + feature_columns, + tree_hparams, + n_batches_per_layer=1, + config=config, + train_in_memory=True) + + in_memory_regressor = estimator.Estimator( + model_fn=_model_fn, model_dir=model_dir, config=config) + + in_memory_regressor.train(input_fn=train_input_fn, hooks=train_hooks) + + return in_memory_regressor + # pylint: enable=protected-access diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py new file mode 100644 index 0000000000..e99a87f3b3 --- /dev/null +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees_test.py @@ -0,0 +1,207 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests boosted_trees estimators.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.estimator.python.estimator import boosted_trees +from tensorflow.python.estimator.canned import boosted_trees as canned_boosted_trees +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.platform import googletest +from tensorflow.python.training import checkpoint_utils + +NUM_FEATURES = 3 + +BUCKET_BOUNDARIES = [-2., .5, 12.] # Boundaries for all the features. +INPUT_FEATURES = np.array( + [ + [12.5, 1.0, -2.001, -2.0001, -1.999], # feature_0 quantized:[3,2,0,0,1] + [2.0, -3.0, 0.5, 0.0, 0.4995], # feature_1 quantized:[2,0,2,1,1] + [3.0, 20.0, 50.0, -100.0, 102.75], # feature_2 quantized:[2,3,3,0,3] + ], + dtype=np.float32) +CLASSIFICATION_LABELS = [[0.], [1.], [1.], [0.], [0.]] +REGRESSION_LABELS = [[1.5], [0.3], [0.2], [2.], [5.]] +FEATURES_DICT = {'f_%d' % i: INPUT_FEATURES[i] for i in range(NUM_FEATURES)} + + +def _make_train_input_fn(is_classification): + """Makes train input_fn for classification/regression.""" + + def _input_fn(): + features = dict(FEATURES_DICT) + if is_classification: + labels = CLASSIFICATION_LABELS + else: + labels = REGRESSION_LABELS + return features, labels + + return _input_fn + + +class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): + + def setUp(self): + self._head = canned_boosted_trees._create_regression_head(label_dimension=1) + self._feature_columns = { + feature_column.bucketized_column( + feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), + BUCKET_BOUNDARIES) + for i in range(NUM_FEATURES) + } + + def _assert_checkpoint(self, model_dir, expected_global_step): + self.assertEqual(expected_global_step, + checkpoint_utils.load_variable(model_dir, + ops.GraphKeys.GLOBAL_STEP)) + + def testTrainAndEvaluateEstimator(self): + input_fn = _make_train_input_fn(is_classification=False) + + est = boosted_trees._BoostedTreesEstimator( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + head=self._head, + max_depth=5) + + # It will stop after 10 steps because of the max depth and num trees. + num_steps = 100 + # Train for a few steps, and validate final checkpoint. + est.train(input_fn, steps=num_steps) + self._assert_checkpoint(est.model_dir, 11) + eval_res = est.evaluate(input_fn=input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 0.913176) + + def testInferEstimator(self): + train_input_fn = _make_train_input_fn(is_classification=False) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees._BoostedTreesEstimator( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5, + head=self._head) + + # It will stop after 5 steps because of the max depth and num trees. + num_steps = 100 + # Train for a few steps, and validate final checkpoint. + est.train(train_input_fn, steps=num_steps) + self._assert_checkpoint(est.model_dir, 6) + + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertEquals(5, len(predictions)) + self.assertAllClose([0.703549], predictions[0]['predictions']) + self.assertAllClose([0.266539], predictions[1]['predictions']) + self.assertAllClose([0.256479], predictions[2]['predictions']) + self.assertAllClose([1.088732], predictions[3]['predictions']) + self.assertAllClose([1.901732], predictions[4]['predictions']) + + +class BoostedTreesClassifierTrainInMemoryTest(test_util.TensorFlowTestCase): + + def setUp(self): + self._feature_columns = { + feature_column.bucketized_column( + feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), + BUCKET_BOUNDARIES) + for i in range(NUM_FEATURES) + } + + def _assert_checkpoint(self, model_dir, expected_global_step): + self.assertEqual(expected_global_step, + checkpoint_utils.load_variable(model_dir, + ops.GraphKeys.GLOBAL_STEP)) + + def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self): + train_input_fn = _make_train_input_fn(is_classification=True) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.boosted_trees_classifier_train_in_memory( + train_input_fn=train_input_fn, + feature_columns=self._feature_columns, + n_trees=1, + max_depth=5) + # It will stop after 5 steps because of the max depth and num trees. + self._assert_checkpoint(est.model_dir, 6) + + # Check eval. + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['accuracy'], 1.0) + + # Check predict that all labels are correct. + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertEquals(5, len(predictions)) + self.assertAllClose([0], predictions[0]['class_ids']) + self.assertAllClose([1], predictions[1]['class_ids']) + self.assertAllClose([1], predictions[2]['class_ids']) + self.assertAllClose([0], predictions[3]['class_ids']) + self.assertAllClose([0], predictions[4]['class_ids']) + + +class BoostedTreesRegressorTrainInMemoryTest(test_util.TensorFlowTestCase): + + def setUp(self): + self._feature_columns = { + feature_column.bucketized_column( + feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), + BUCKET_BOUNDARIES) + for i in range(NUM_FEATURES) + } + + def _assert_checkpoint(self, model_dir, expected_global_step): + self.assertEqual(expected_global_step, + checkpoint_utils.load_variable(model_dir, + ops.GraphKeys.GLOBAL_STEP)) + + def testRegressorTrainInMemoryAndEvalAndInfer(self): + train_input_fn = _make_train_input_fn(is_classification=False) + predict_input_fn = numpy_io.numpy_input_fn( + x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) + + est = boosted_trees.boosted_trees_regressor_train_in_memory( + train_input_fn=train_input_fn, + feature_columns=self._feature_columns, + n_trees=1, + max_depth=5) + # It will stop after 5 steps because of the max depth and num trees. + self._assert_checkpoint(est.model_dir, 6) + + # Check eval. + eval_res = est.evaluate(input_fn=train_input_fn, steps=1) + self.assertAllClose(eval_res['average_loss'], 2.2136638) + + # Validate predictions. + predictions = list(est.predict(input_fn=predict_input_fn)) + self.assertEquals(5, len(predictions)) + self.assertAllClose([0.703549], predictions[0]['predictions']) + self.assertAllClose([0.266539], predictions[1]['predictions']) + self.assertAllClose([0.256479], predictions[2]['predictions']) + self.assertAllClose([1.088732], predictions[3]['predictions']) + self.assertAllClose([1.901732], predictions[4]['predictions']) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index 5a812af4e9..15786291ed 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -228,6 +228,11 @@ tensorflow/core/kernels/cast_op_impl_int64.cc tensorflow/core/kernels/cast_op_impl_int8.cc tensorflow/core/kernels/cast_op_impl_uint16.cc tensorflow/core/kernels/cast_op_impl_uint8.cc +tensorflow/core/kernels/boosted_trees/prediction_ops.cc +tensorflow/core/kernels/boosted_trees/resource_ops.cc +tensorflow/core/kernels/boosted_trees/resources.cc +tensorflow/core/kernels/boosted_trees/stats_ops.cc +tensorflow/core/kernels/boosted_trees/training_ops.cc tensorflow/core/kernels/bias_op.cc tensorflow/core/kernels/bcast_ops.cc tensorflow/core/kernels/batch_norm_op.cc @@ -285,6 +290,7 @@ tensorflow/core/ops/data_flow_ops.cc tensorflow/core/ops/ctc_ops.cc tensorflow/core/ops/control_flow_ops.cc tensorflow/core/ops/candidate_sampling_ops.cc +tensorflow/core/ops/boosted_trees_ops.cc tensorflow/core/ops/array_ops.cc tensorflow/core/ops/array_grad.cc tensorflow/core/kernels/spacetobatch_functor.cc diff --git a/tensorflow/contrib/makefile/tf_proto_files.txt b/tensorflow/contrib/makefile/tf_proto_files.txt index d569bde637..1f254692d7 100644 --- a/tensorflow/contrib/makefile/tf_proto_files.txt +++ b/tensorflow/contrib/makefile/tf_proto_files.txt @@ -18,6 +18,7 @@ tensorflow/core/protobuf/device_properties.proto tensorflow/core/protobuf/rewriter_config.proto tensorflow/core/protobuf/tensor_bundle.proto tensorflow/core/lib/core/error_codes.proto +tensorflow/core/kernels/boosted_trees/boosted_trees.proto tensorflow/core/framework/versions.proto tensorflow/core/framework/variable.proto tensorflow/core/framework/types.proto diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b8dbd90ab8..614e06cf83 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -629,6 +629,7 @@ tf_gen_op_libs( op_lib_names = [ "batch_ops", "bitwise_ops", + "boosted_trees_ops", "candidate_sampling_ops", "checkpoint_ops", "control_flow_ops", @@ -741,6 +742,7 @@ cc_library( ":audio_ops_op_lib", ":batch_ops_op_lib", ":bitwise_ops_op_lib", + ":boosted_trees_ops_op_lib", ":candidate_sampling_ops_op_lib", ":checkpoint_ops_op_lib", ":control_flow_ops_op_lib", @@ -882,6 +884,7 @@ cc_library( "//tensorflow/core/kernels:audio", "//tensorflow/core/kernels:batch_kernels", "//tensorflow/core/kernels:bincount_op", + "//tensorflow/core/kernels:boosted_trees_ops", "//tensorflow/core/kernels:candidate_sampler_ops", "//tensorflow/core/kernels:checkpoint_ops", "//tensorflow/core/kernels:control_flow_ops", diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt new file mode 100644 index 0000000000..b1921e3507 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt @@ -0,0 +1,87 @@ +op { + graph_op_name: "BoostedTreesCalculateBestGainsPerFeature" + visibility: HIDDEN + in_arg { + name: "node_id_range" + description: <
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
+ + @@ -469,6 +471,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.10.0N/AN/A
tensorflow_gpu-1.7.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
+ @@ -483,6 +486,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.10.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
+ + diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index ca9cb043e9..778e4d3a62 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -233,6 +233,8 @@ order by how long they took. From left to right, the columns are: - The cumulative total time of this and the previous ops in the table. This is handy for understanding what the distribution of work is across the layers, to see if just a few of the nodes are taking up most of the time. + +- The amount of memory consumed by outputs of this type of op. - Name of the node. diff --git a/tensorflow/docs_src/mobile/prepare_models.md b/tensorflow/docs_src/mobile/prepare_models.md index 360ee302aa..8b22c04d87 100644 --- a/tensorflow/docs_src/mobile/prepare_models.md +++ b/tensorflow/docs_src/mobile/prepare_models.md @@ -60,7 +60,7 @@ and serialized as protocol buffers: the `NodeDef`, so if all the `Variable` weights are converted to `Const` nodes, then we only need a single `GraphDef` file to hold the model architecture and the weights. Freezing the graph handles the process of loading the - checkpoints, and then converts all Consts to Variables. You can then load the + checkpoints, and then converts all Variables to Consts. You can then load the resulting file in a single call, without having to restore variable values from checkpoints. One thing to watch out for with `GraphDef` files is that sometimes they’re stored in text format for easy inspection. These versions diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 2cc3c48c3c..b05d87635f 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1067,7 +1067,7 @@ py_test( py_test( name = "framework_importer_test", - size = "medium", + size = "large", srcs = ["framework/importer_test.py"], main = "framework/importer_test.py", srcs_version = "PY2AND3", diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index d0ba8020c1..64c1760d5e 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -315,21 +315,39 @@ class ReverseV2Test(test_util.TensorFlowTestCase): self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1]) self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1]) + # This test covers the axis validation in the shape function + # (no eval()) + def testInvalidAxis(self): + x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + with self.assertRaisesRegexp(ValueError, + "is out of valid range"): + array_ops.reverse_v2(x_np, [-30]) + with self.assertRaisesRegexp(ValueError, + "is out of valid range"): + array_ops.reverse_v2(x_np, [2]) + with self.assertRaisesRegexp(ValueError, + "axis 0 specified more than once"): + array_ops.reverse_v2(x_np, [0, -2]) + # This is the version of reverse that uses axis indices rather than # bool tensors # TODO(b/32254538): Change this test to use array_ops.reverse + # + # Note: this test passes placeholder as constant axis is validated + # in shape function (see testInvalidAxis) def testInvalid(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) + axis = array_ops.placeholder(dtypes.int32) with self.test_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, [-30]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]}) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): - array_ops.reverse_v2(x_np, [2]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [2]}) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis 0 specified more than once"): - array_ops.reverse_v2(x_np, [0, -2]).eval() + array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [0, -2]}) def testReverse1DimAuto(self): for dtype in [ @@ -890,7 +908,7 @@ class StridedSliceAssignChecker(object): var = resource_variable_ops.ResourceVariable(self.x) else: var = variables.Variable(self.x) - sess.run(variables.initialize_variables([var])) + sess.run(variables.variables_initializer([var])) val = sess.run(var[index].assign(value)) # val_copy is used to check that tf.assign works equivalently to the # assign method above. diff --git a/tensorflow/python/kernel_tests/testdata/BUILD b/tensorflow/python/kernel_tests/testdata/BUILD index a4a0dfc139..45264c773a 100644 --- a/tensorflow/python/kernel_tests/testdata/BUILD +++ b/tensorflow/python/kernel_tests/testdata/BUILD @@ -1,7 +1,7 @@ # Data files for kernel tests. package( - default_visibility = ["//tensorflow:internal"], + default_visibility = ["//visibility:public"], ) licenses(["notice"]) # Apache 2.0 diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index e3e120a4eb..60c726d54c 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -18,10 +18,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools +import sys + import numpy as np +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl @@ -88,7 +94,7 @@ class XentTest(test.TestCase): 4.]]]).astype(dtype) np_labels = np.array([[[0., 0., 0., 1.]], [[0., .5, .5, 0.]]]).astype(dtype) - self.assertRaisesRegexp(ValueError, "must be rank 2", + self.assertRaisesRegexp(ValueError, "rank 2, but is rank 3", gen_nn_ops.softmax_cross_entropy_with_logits, np_features, np_labels) @@ -128,6 +134,24 @@ class XentTest(test.TestCase): self.assertAllClose( np.array([1.3862, 1.9401]), np_loss, rtol=1.e-3, atol=1.e-3) + def testShapeBroadcast(self): + np_f = np.array([[1., 2., 3., 4.], + [1., 2., 3., 4.]]).astype(np.float32) + np_l = np.array([[0., 0., 0., 1.], + [0., .5, .5, 0.]]).astype(np.float32) + np_loss, np_backprop = self._npXent(np_f, np_l) + tf_f = constant_op.constant( + np.array([[1., 2., 3., 4.]]).astype(np.float32)) + tf_l = constant_op.constant( + np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float32)) + for use_gpu in [False, True]: + with self.test_session(use_gpu=use_gpu) as sess: + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( + tf_f, tf_l) + tf_loss, tf_backprop = sess.run([loss, backprop]) + self.assertAllCloseAccordingToType(np_loss, tf_loss) + self.assertAllCloseAccordingToType(np_backprop, tf_backprop) + def testShapeMismatch(self): with self.test_session(): with self.assertRaises(ValueError): @@ -260,5 +284,60 @@ class XentTest(test.TestCase): self.assertAllEqual(np_loss, tf_loss) +class XentBenchmark(test.Benchmark): + + def benchmarkZeroDimension(self): + for (m, n, p, use_gpu) in itertools.product( + [128], + [10, 100, 1000, 10000, 100000], + [0.001, 0.01, 0.5, 0.99, 1.0], + [False]): + k = int(p * n) + if k == 0: + continue + name = "zero_dimension_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) + device = "/%s:0" % ("gpu" if use_gpu else "cpu") + with ops.Graph().as_default(): + with ops.device(device): + labels = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) + logits = array_ops.zeros([0, 2, 4], dtype=dtypes.float32) + op = nn_ops.softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + with session.Session() as sess: + r = self.run_op_benchmark(sess, op, min_iters=100, name=name) + gb_processed_input = m * n / 1.0e9 + throughput = gb_processed_input / r["wall_time"] + print("Benchmark: %s \t wall_time: %0.03g s \t " + "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) + sys.stdout.flush() + + def benchmarkSingleClass(self): + for (m, n, p, use_gpu) in itertools.product( + [128], + [10, 100, 1000, 10000, 100000], + [0.001, 0.01, 0.5, 0.99, 1.0], + [False]): + k = int(p * n) + if k == 0: + continue + name = "single_class_m_%d_n_%d_k_%g_use_gpu_%s" % (m, n, k, use_gpu) + device = "/%s:0" % ("gpu" if use_gpu else "cpu") + with ops.Graph().as_default(): + with ops.device(device): + labels = constant_op.constant([[1.], [-1.], [0.]], + dtype=dtypes.float32) + logits = constant_op.constant([[-1.], [0.], [1.]], + dtype=dtypes.float32) + op = nn_ops.softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + with session.Session() as sess: + r = self.run_op_benchmark(sess, op, min_iters=100, name=name) + gb_processed_input = m * n / 1.0e9 + throughput = gb_processed_input / r["wall_time"] + print("Benchmark: %s \t wall_time: %0.03g s \t " + "Throughput: %0.03g GB/s" % (name, r["wall_time"], throughput)) + sys.stdout.flush() + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 74e7c63fb3..2d99b1688f 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -180,6 +180,8 @@ class _Conv(base.Layer): # bias_add when computing gradients. To use bias_add, we collapse Z # and Y into a single dimension to obtain a 4D input tensor. outputs_shape = outputs.shape.as_list() + if outputs_shape[0] is None: + outputs_shape[0] = -1 outputs_4d = array_ops.reshape(outputs, [outputs_shape[0], outputs_shape[1], outputs_shape[2] * outputs_shape[3], diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py index 160e732b67..cdb42f5bd1 100644 --- a/tensorflow/python/layers/convolutional_test.py +++ b/tensorflow/python/layers/convolutional_test.py @@ -325,6 +325,12 @@ class ConvTest(test.TestCase): self.assertEqual(conv3d.kernel_constraint, k_constraint) self.assertEqual(conv3d.bias_constraint, b_constraint) + def testConv3DChannelsFirst(self): + # Test case for GitHub issue 15655 + images = array_ops.placeholder( + dtype=dtypes.float32, shape=[None, 1, 32, 32, 32]) + conv_layers.conv3d(images, 32, 9, data_format='channels_first') + @test_util.with_c_api class SeparableConv1DTest(test.TestCase): diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 5b4fb4f7c8..170861b43f 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -429,7 +429,7 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): u, s, v_adj = np.linalg.svd(a, full_matrices=False) np_a_approx = np.dot(u, np.dot(np.diag(s), v_adj)) # tf_a_approx and np_a_approx should be numerically close. - ```` + ``` @end_compatibility """ s, u, v = gen_linalg_ops.svd( diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 6c5c9e01a7..4ce6f6d002 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -281,13 +281,14 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name scaffold=None, hooks=None, chief_only_hooks=None, - save_checkpoint_secs=600, + save_checkpoint_secs=USE_DEFAULT, save_summaries_steps=USE_DEFAULT, save_summaries_secs=USE_DEFAULT, config=None, stop_grace_period_secs=120, log_step_count_steps=100, - max_wait_secs=7200): + max_wait_secs=7200, + save_checkpoint_steps=USE_DEFAULT): """Creates a `MonitoredSession` for training. For a chief, this utility sets proper session initializer/restorer. It also @@ -310,8 +311,10 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if `is_chief==True`, ignore otherwise. save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. + using a default checkpoint saver. If both `save_checkpoint_steps` and + `save_checkpoint_secs` are set to `None`, then the default checkpoint + saver isn't used. If both are provided, then only `save_checkpoint_secs` + is used. Default 600. save_summaries_steps: The frequency, in number of global steps, that the summaries are written to disk using a default summary saver. If both `save_summaries_steps` and `save_summaries_secs` are set to `None`, then @@ -330,6 +333,11 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name become available. This should be kept relatively short to help detect incorrect code, but sometimes may need to be increased if the chief takes a while to start up. + save_checkpoint_steps: The frequency, in number of global steps, that a + checkpoint is saved using a default checkpoint saver. If both + `save_checkpoint_steps` and `save_checkpoint_secs` are set to `None`, then + the default checkpoint saver isn't used. If both are provided, then only + `save_checkpoint_secs` is used. Default not enabled. Returns: A `MonitoredSession` object. @@ -342,6 +350,15 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name elif save_summaries_steps == USE_DEFAULT: save_summaries_steps = None + if (save_checkpoint_steps == USE_DEFAULT and + save_checkpoint_secs == USE_DEFAULT): + save_checkpoint_steps = None + save_checkpoint_secs = 600 + elif save_checkpoint_secs == USE_DEFAULT: + save_checkpoint_secs = None + elif save_checkpoint_steps == USE_DEFAULT: + save_checkpoint_steps = None + scaffold = scaffold or Scaffold() if not is_chief: session_creator = WorkerSessionCreator( @@ -374,9 +391,13 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name save_steps=save_summaries_steps, save_secs=save_summaries_secs, output_dir=checkpoint_dir)) - if save_checkpoint_secs and save_checkpoint_secs > 0: + if (save_checkpoint_secs and save_checkpoint_secs > 0) or ( + save_checkpoint_steps and save_checkpoint_steps > 0): all_hooks.append(basic_session_run_hooks.CheckpointSaverHook( - checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold)) + checkpoint_dir, + save_steps=save_checkpoint_steps, + save_secs=save_checkpoint_secs, + scaffold=scaffold)) if hooks: all_hooks.extend(hooks) diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py index 159b2d5c16..3806056f01 100644 --- a/tensorflow/python/training/monitored_session_test.py +++ b/tensorflow/python/training/monitored_session_test.py @@ -282,6 +282,42 @@ class MonitoredTrainingSessionTest(test.TestCase): is_chief=True, checkpoint_dir=logdir) as session: self.assertEqual(2, session.run(gstep)) + def test_save_checkpoint_steps(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_steps') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_steps=100, + log_step_count_steps=10) as session: + for _ in range(100): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(100, session.run(gstep)) + + def test_save_checkpoint_secs(self): + logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_secs') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_checkpoint_secs=0.1, + log_step_count_steps=10) as session: + session.run(new_gstep) + time.sleep(0.2) + for _ in range(10): + session.run(new_gstep) + # A restart will find the checkpoint and recover automatically. + with monitored_session.MonitoredTrainingSession( + is_chief=True, checkpoint_dir=logdir) as session: + self.assertEqual(11, session.run(gstep)) + def test_summaries_steps(self): logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps') with ops.Graph().as_default(): diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 2d3cb415fe..fcc57d506e 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -22,6 +22,7 @@ load( load( "//third_party/mkl:build_defs.bzl", "if_mkl", + "if_mkl_lnx_x64" ) def register_extension_info(**kwargs): @@ -202,7 +203,8 @@ def tf_copts(android_optimization_level_override="-O2", is_external=False): "-ftemplate-depth=900"]) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) - + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML", "-fopenmp",]) + + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + + if_mkl_lnx_x64(["-fopenmp"]) + if_android_arm(["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + if_ios_x86_64(["-msse4.1"]) diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index c75ee474aa..bec72e1e60 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -238,7 +238,7 @@ tf_module { } member_method { name: "MonitoredTrainingSession" - argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'600\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\'], " + argspec: "args=[\'master\', \'is_chief\', \'checkpoint_dir\', \'scaffold\', \'hooks\', \'chief_only_hooks\', \'save_checkpoint_secs\', \'save_summaries_steps\', \'save_summaries_secs\', \'config\', \'stop_grace_period_secs\', \'log_step_count_steps\', \'max_wait_secs\', \'save_checkpoint_steps\'], varargs=None, keywords=None, defaults=[\'\', \'True\', \'None\', \'None\', \'None\', \'None\', \'\', \'\', \'\', \'None\', \'120\', \'100\', \'7200\', \'\'], " } member_method { name: "NewCheckpointReader" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index e1b56b9a25..7d471b4703 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -31,5 +31,5 @@ export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" -export PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +export PATH="$PATH:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" build_libtensorflow_tarball "-cpu-darwin-$(uname -m)" diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 22c73c3fe1..11f476d12c 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -70,7 +70,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 3690e7dfe5..037d13116e 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.6 +ARG TF_BRANCH=r1.7 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 69ba340f92..1fcb6428b2 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -79,7 +79,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.7 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 3fbdb5cacd..0ede8c6370 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -138,7 +138,6 @@ genrule( "@zlib_archive//:zlib.h", ] + if_mkl([ "//third_party/mkl:LICENSE", - "@mkl//:LICENSE", ]), outs = ["include/tensorflow/c/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", @@ -176,7 +175,6 @@ genrule( "@zlib_archive//:zlib.h", ] + if_mkl([ "//third_party/mkl:LICENSE", - "@mkl//:LICENSE", ]), outs = ["include/tensorflow/jni/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index dd75eda231..62fec2c402 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -127,7 +127,6 @@ filegroup( "@org_python_pypi_backports_weakref//:LICENSE", ] + if_mkl([ "//third_party/mkl:LICENSE", - "@mkl//:LICENSE", ]) + if_not_windows([ "@nccl_archive//:LICENSE.txt", ]) + tf_additional_license_deps(), diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e0152da4df..365e8d6b08 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0' +_VERSION = '1.7.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorboard >= 1.6.0, < 1.7.0', + 'tensorboard >= 1.7.0, < 1.8.0', 'termcolor >= 1.1.0', ] @@ -62,7 +62,7 @@ else: if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.7.0a0, < 1.8.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.8.0a0, < 1.9.0a0' break # weakref.finalize and enum were introduced in Python 3.4 diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0e31358236..ac6380dd3e 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -15,6 +15,11 @@ load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_ load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +# Sanitize a dependency so that it works correctly from code that includes +# TensorFlow as a submodule. +def clean_dep(dep): + return str(Label(dep)) + # If TensorFlow is linked as a submodule. # path_prefix is no longer used. # tf_repo_name is thought to be under consideration. @@ -32,17 +37,37 @@ def tf_workspace(path_prefix="", tf_repo_name=""): arm_compiler_configure( name="local_config_arm_compiler", remote_config_repo="../arm_compiler", - build_file = str(Label("//third_party/toolchains/cpus/arm:BUILD"))) + build_file = clean_dep("//third_party/toolchains/cpus/arm:BUILD")) mkl_repository( - name = "mkl", + name = "mkl_linux", + urls = [ + "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_lnx_2018.0.1.20171227.tgz", + ], + sha256 = "feacc3d82565c1231470359b42c696236fae873704e0b013436afba5fd4fd30f", + strip_prefix = "mklml_lnx_2018.0.1.20171227", + build_file = clean_dep("//third_party/mkl:mkl.BUILD") + ) + mkl_repository( + name = "mkl_windows", + urls = [ + "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip", + "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_win_2018.0.1.20171227.zip" + ], + sha256 = "24bae8d7b22b431a654acadea43f2243c46ae6b1e5a73a4a936825f31d284ee4", + strip_prefix = "mklml_win_2018.0.1.20171227", + build_file = clean_dep("//third_party/mkl:mkl.BUILD") + ) + mkl_repository( + name = "mkl_darwin", urls = [ - "https://mirror.bazel.build/github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz", - "https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz", + "https://mirror.bazel.build/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz", + "https://github.com/intel/mkl-dnn/releases/download/v0.12/mklml_mac_2018.0.1.20171227.tgz" ], - sha256 = "6b07cb7e5451db67c2e31e785ae458b18f7f363c60a61685488f69e9ae7199d4", - strip_prefix = "mklml_lnx_2018.0.1.20171007", - build_file = str(Label("//third_party/mkl:mkl.BUILD")), + sha256 = "0e954ec6fd3dc5e37f64c4043f6b5613dd687558da3df1028b3b7c29ff5cf77f", + strip_prefix = "mklml_mac_2018.0.1.20171227", + build_file = clean_dep("//third_party/mkl:mkl.BUILD") ) if path_prefix: @@ -52,12 +77,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "mkl_dnn", urls = [ - "https://mirror.bazel.build/github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz", - "https://github.com/01org/mkl-dnn/archive/e0bfcaa7fcb2b1e1558f5f0676933c1db807a729.tar.gz", + "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/v0.12.tar.gz", + "https://github.com/intel/mkl-dnn/archive/v0.12.tar.gz", ], - sha256 = "02e244f63dd95402691a361392504c143eede9a89043426f174836638a9cbf09", - strip_prefix = "mkl-dnn-e0bfcaa7fcb2b1e1558f5f0676933c1db807a729", - build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")), + sha256 = "86fa2a8c12a56e3b725945acedeaa82492746be02545aba6d710f097e013e19e", + strip_prefix = "mkl-dnn-0.12", + build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) tf_http_archive( @@ -68,7 +93,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "5996380e3e8b981f55d1c8d58e709c00dbb4806ba367be75d0925a68cc2f6478", strip_prefix = "abseil-cpp-720c017e30339fd1786ce4aac68bc8559736e53f", - build_file = str(Label("//third_party:com_google_absl.BUILD")), + build_file = clean_dep("//third_party:com_google_absl.BUILD"), ) tf_http_archive( @@ -79,8 +104,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "791b836cacd03e20bae5bdd25f1c4a5505a0a9975ba94a61eb4e2631fbd1d53a", strip_prefix = "eigen-eigen-6913f0cf7d06", - build_file = str(Label("//third_party:eigen.BUILD")), - patch_file = str(Label("//third_party:eigen_fix_cuda_compilation.patch")) + build_file = clean_dep("//third_party:eigen.BUILD"), + patch_file = clean_dep("//third_party:eigen_fix_cuda_compilation.patch") ) tf_http_archive( @@ -93,7 +118,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # remove the whitelist entry in third_party/repo.bzl. # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz", ], - build_file = str(Label("//:arm_compiler.BUILD")), + build_file = clean_dep("//:arm_compiler.BUILD"), ) tf_http_archive( @@ -104,7 +129,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ade869c3f42f23b5263c7d594aa3c7e5e61ac6a3afcaf5d6e42899d2a7986ce", strip_prefix = "libxsmm-1.8.1", - build_file = str(Label("//third_party:libxsmm.BUILD")), + build_file = clean_dep("//third_party:libxsmm.BUILD"), ) tf_http_archive( @@ -117,7 +142,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "932075525642b04ac6f1b50589f1df5cd72ec2f448b721fd32234cf183f0e755", strip_prefix = "or-tools-253f7955c6a1fd805408fba2e42ac6d45b312d15/src", - build_file = str(Label("//third_party:ortools.BUILD")), + build_file = clean_dep("//third_party:ortools.BUILD"), ) tf_http_archive( @@ -149,7 +174,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", - build_file = str(Label("//third_party:farmhash.BUILD")), + build_file = clean_dep("//third_party:farmhash.BUILD"), ) tf_http_archive( @@ -160,7 +185,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "0f30a15b1566d93f146c8d149878a06e91d9bb7ec2cfd76906df62a82be4aac9", strip_prefix = "highwayhash-dfcb97ca4fe9277bf9dc1802dd979b071896453b", - build_file = str(Label("//third_party:highwayhash.BUILD")), + build_file = clean_dep("//third_party:highwayhash.BUILD"), ) tf_http_archive( @@ -171,7 +196,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", strip_prefix = "nasm-2.12.02", - build_file = str(Label("//third_party:nasm.BUILD")), + build_file = clean_dep("//third_party:nasm.BUILD"), ) tf_http_archive( @@ -182,7 +207,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", strip_prefix = "libjpeg-turbo-1.5.1", - build_file = str(Label("//third_party/jpeg:jpeg.BUILD")), + build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"), ) tf_http_archive( @@ -193,7 +218,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "716c59c7dfc808a4c368f8ada526932be72b2fcea11dd85dc9d88b1df1dfe9c2", strip_prefix = "libpng-1.2.53", - build_file = str(Label("//third_party:png.BUILD")), + build_file = clean_dep("//third_party:png.BUILD"), ) tf_http_archive( @@ -204,7 +229,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4", strip_prefix = "sqlite-amalgamation-3200000", - build_file = str(Label("//third_party:sqlite.BUILD")), + build_file = clean_dep("//third_party:sqlite.BUILD"), ) tf_http_archive( @@ -215,7 +240,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1", strip_prefix = "giflib-5.1.4", - build_file = str(Label("//third_party:gif.BUILD")), + build_file = clean_dep("//third_party:gif.BUILD"), ) tf_http_archive( @@ -226,7 +251,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", strip_prefix = "six-1.10.0", - build_file = str(Label("//third_party:six.BUILD")), + build_file = clean_dep("//third_party:six.BUILD"), ) tf_http_archive( @@ -237,7 +262,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d", strip_prefix = "astor-0.6.2", - build_file = str(Label("//third_party:astor.BUILD")), + build_file = clean_dep("//third_party:astor.BUILD"), ) tf_http_archive( @@ -248,7 +273,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930", strip_prefix = "gast-0.2.0", - build_file = str(Label("//third_party:gast.BUILD")), + build_file = clean_dep("//third_party:gast.BUILD"), ) tf_http_archive( @@ -259,7 +284,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b", strip_prefix = "termcolor-1.1.0", - build_file = str(Label("//third_party:termcolor.BUILD")), + build_file = clean_dep("//third_party:termcolor.BUILD"), ) tf_http_archive( @@ -280,7 +305,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892", strip_prefix = "backports.weakref-1.0rc1/src", - build_file = str(Label("//third_party:backports_weakref.BUILD")), + build_file = clean_dep("//third_party:backports_weakref.BUILD"), ) tf_http_archive( @@ -291,7 +316,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", strip_prefix = "codegen-1.0", - build_file = str(Label("//third_party:codegen.BUILD")), + build_file = clean_dep("//third_party:codegen.BUILD"), ) filegroup_external( @@ -376,7 +401,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://ftp.exim.org/pub/pcre/pcre-8.39.tar.gz", ], strip_prefix = "pcre-8.39", - build_file = str(Label("//third_party:pcre.BUILD")), + build_file = clean_dep("//third_party:pcre.BUILD"), ) tf_http_archive( @@ -388,7 +413,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://pilotfiber.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz", ], strip_prefix = "swig-3.0.8", - build_file = str(Label("//third_party:swig.BUILD")), + build_file = clean_dep("//third_party:swig.BUILD"), ) tf_http_archive( @@ -399,7 +424,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://curl.haxx.se/download/curl-7.49.1.tar.gz", ], strip_prefix = "curl-7.49.1", - build_file = str(Label("//third_party:curl.BUILD")), + build_file = clean_dep("//third_party:curl.BUILD"), ) tf_http_archive( @@ -421,7 +446,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", ], strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3", - build_file = str(Label("//third_party:linenoise.BUILD")), + build_file = clean_dep("//third_party:linenoise.BUILD"), ) # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror. @@ -434,7 +459,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "1efbb9b05af88368be984d2f6526061d4a857181ef10f8841889a3a46869bb01", strip_prefix = "llvm-1c3cdea2f181d8e14ee184466c5fb237f1b4cda8", - build_file = str(Label("//third_party/llvm:llvm.BUILD")), + build_file = clean_dep("//third_party/llvm:llvm.BUILD"), ) tf_http_archive( @@ -445,7 +470,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "108532fb94c6f227558d45be3f3347b52539f0f58290a7bb31ec06c462d05326", strip_prefix = "lmdb-LMDB_0.9.19/libraries/liblmdb", - build_file = str(Label("//third_party:lmdb.BUILD")), + build_file = clean_dep("//third_party:lmdb.BUILD"), ) tf_http_archive( @@ -456,7 +481,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "07d34db40593d257324ec5fb9debc4dc33f29f8fb44e33a2eeb35503e61d0fe2", strip_prefix = "jsoncpp-11086dd6a7eba04289944367ca82cea71299ed70", - build_file = str(Label("//third_party:jsoncpp.BUILD")), + build_file = clean_dep("//third_party:jsoncpp.BUILD"), ) tf_http_archive( @@ -477,7 +502,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d", strip_prefix = "zlib-1.2.8", - build_file = str(Label("//third_party:zlib.BUILD")), + build_file = clean_dep("//third_party:zlib.BUILD"), ) tf_http_archive( @@ -487,7 +512,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz", ], sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296", - build_file = str(Label("//third_party/fft2d:fft2d.BUILD")), + build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"), ) tf_http_archive( @@ -498,7 +523,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2f7504c73d85bac842e893340333be8cb8561710642fc9562fccdd9d2c3fcc94", strip_prefix = "snappy-1.1.4", - build_file = str(Label("//third_party:snappy.BUILD")), + build_file = clean_dep("//third_party:snappy.BUILD"), ) tf_http_archive( @@ -509,7 +534,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", - build_file = str(Label("//third_party:nccl.BUILD")), + build_file = clean_dep("//third_party:nccl.BUILD"), ) tf_http_archive( @@ -520,8 +545,8 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "dd035d57c8f19b0b612dd6eefe6e5eebad76f506e302cccb7c2066f25a83585e", strip_prefix = "librdkafka-0.11.1", - build_file = str(Label("//third_party:kafka/BUILD")), - patch_file = str(Label("//third_party/kafka:config.patch")), + build_file = clean_dep("//third_party:kafka/BUILD"), + patch_file = clean_dep("//third_party/kafka:config.patch"), ) tf_http_archive( @@ -532,7 +557,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", strip_prefix = "aws-sdk-cpp-1.3.15", - build_file = str(Label("//third_party:aws.BUILD")), + build_file = clean_dep("//third_party:aws.BUILD"), ) java_import_external( @@ -568,7 +593,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", strip_prefix = "jemalloc-4.4.0", - build_file = str(Label("//third_party:jemalloc.BUILD")), + build_file = clean_dep("//third_party:jemalloc.BUILD"), ) java_import_external( @@ -613,7 +638,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4", strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650", - build_file = str(Label("//third_party:pprof.BUILD")), + build_file = clean_dep("//third_party:pprof.BUILD"), ) tf_http_archive( @@ -624,7 +649,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): ], sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", strip_prefix = "cub-1.8.0", - build_file = str(Label("//third_party:cub.BUILD")), + build_file = clean_dep("//third_party:cub.BUILD"), ) tf_http_archive( @@ -635,7 +660,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://github.com/cython/cython/archive/3732784c45cfb040a5b0936951d196f83a12ea17.tar.gz", ], strip_prefix = "cython-3732784c45cfb040a5b0936951d196f83a12ea17", - build_file = str(Label("//third_party:cython.BUILD")), + build_file = clean_dep("//third_party:cython.BUILD"), delete = ["BUILD.bazel"], ) @@ -657,7 +682,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", ], - build_file = str(Label("//third_party:arm_neon_2_x86_sse.BUILD")), + build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"), ) tf_http_archive( @@ -668,7 +693,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", "https://github.com/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz", ], - build_file = str(Label("//third_party/flatbuffers:flatbuffers.BUILD")), + build_file = clean_dep("//third_party/flatbuffers:flatbuffers.BUILD"), ) tf_http_archive( @@ -678,7 +703,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", ], - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), + build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"), ) tf_http_archive( @@ -688,7 +713,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip" ], - build_file = str(Label("//third_party:tflite_smartreply.BUILD")), + build_file = clean_dep("//third_party:tflite_smartreply.BUILD"), ) ############################################################################## @@ -752,7 +777,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # Needed by Protobuf native.bind( name = "python_headers", - actual = str(Label("//util/python:python_headers")), + actual = clean_dep("//util/python:python_headers"), ) # Needed by Protobuf diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index b27d341404..c2adf578c7 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -1,7 +1,5 @@ licenses(["notice"]) # 3-Clause BSD -exports_files(["LICENSE"]) - config_setting( name = "using_mkl", values = { @@ -10,17 +8,51 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "using_mkl_lnx_x64", + values = { + "cpu": "k8", + "define": "using_mkl=true", + }, + visibility = ["//visibility:public"], +) + load( "//third_party/mkl:build_defs.bzl", "if_mkl", ) +filegroup( + name = "LICENSE", + srcs = ["MKL_LICENSE"] + select({ + "@org_tensorflow//tensorflow:linux_x86_64": [ + "@mkl_linux//:LICENSE", + ], + "@org_tensorflow//tensorflow:darwin": [ + "@mkl_darwin//:LICENSE", + ], + "@org_tensorflow//tensorflow:windows": [ + "@mkl_windows//:LICENSE", + ], + }), + visibility = ["//visibility:public"], +) + cc_library( name = "intel_binary_blob", - srcs = if_mkl([ - "@mkl//:libmklml_intel.so", - "@mkl//:libiomp5.so", - ]), visibility = ["//visibility:public"], - deps = ["@mkl//:mkl_headers"], + deps = select({ + "@org_tensorflow//tensorflow:linux_x86_64": [ + "@mkl_linux//:mkl_headers", + "@mkl_linux//:mkl_libs_linux", + ], + "@org_tensorflow//tensorflow:darwin": [ + "@mkl_darwin//:mkl_headers", + "@mkl_darwin//:mkl_libs_darwin", + ], + "@org_tensorflow//tensorflow:windows": [ + "@mkl_windows//:mkl_headers", + "@mkl_windows//:mkl_libs_windows", + ], + }), ) diff --git a/third_party/mkl/MKL_LICENSE b/third_party/mkl/MKL_LICENSE new file mode 100644 index 0000000000..9c8f3ea087 --- /dev/null +++ b/third_party/mkl/MKL_LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 8b73ddabdd..53e02769da 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -24,6 +24,18 @@ def if_mkl(if_true, if_false = []): "//conditions:default": if_false }) +def if_mkl_lnx_x64(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with MKL. + + Returns a select statement which evaluates to if_true if we're building + with MKL enabled. Otherwise, the select statement evaluates to if_false. + + """ + return select({ + str(Label("//third_party/mkl:using_mkl_lnx_x64")): if_true, + "//conditions:default": if_false + }) + def _enable_local_mkl(repository_ctx): return _TF_MKL_ROOT in repository_ctx.os.environ diff --git a/third_party/mkl/mkl.BUILD b/third_party/mkl/mkl.BUILD index 8db97232e1..c3a71e4ff9 100644 --- a/third_party/mkl/mkl.BUILD +++ b/third_party/mkl/mkl.BUILD @@ -17,14 +17,29 @@ cc_library( visibility = ["//visibility:public"], ) -filegroup( - name = "libmklml_intel.so", - srcs = ["lib/libmklml_intel.so"], +cc_library( + name = "mkl_libs_linux", + srcs = [ + "lib/libiomp5.so", + "lib/libmklml_intel.so", + ], visibility = ["//visibility:public"], ) -filegroup( - name = "libiomp5.so", - srcs = ["lib/libiomp5.so"], +cc_library( + name = "mkl_libs_darwin", + srcs = [ + "lib/libiomp5.dylib", + "lib/libmklml.dylib", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "mkl_libs_windows", + srcs = [ + "lib/libiomp5md.lib", + "lib/mklml.lib", + ], visibility = ["//visibility:public"], ) -- GitLab From c194a0ea67a6fb61bd23b39fbb2a49b664e2dba1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 11:02:56 -0700 Subject: [PATCH 682/960] Automated g4 rollback of changelist 190808678 PiperOrigin-RevId: 190955400 --- tensorflow/contrib/lite/toco/BUILD | 1 - .../graph_transformations.h | 1 - .../swap_elementwise_binary.cc | 175 ------------------ .../toco/graph_transformations/tests/BUILD | 11 -- .../tests/swap_elementwise_binary_test.cc | 89 --------- tensorflow/contrib/lite/toco/toco_tooling.cc | 1 - 6 files changed, 278 deletions(-) delete mode 100644 tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc delete mode 100644 tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index bba61627f9..d552de313c 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -280,7 +280,6 @@ cc_library( "graph_transformations/resolve_tensorflow_switch.cc", "graph_transformations/resolve_tensorflow_tile.cc", "graph_transformations/resolve_transpose_attributes.cc", - "graph_transformations/swap_elementwise_binary.cc", "graph_transformations/unfuse_activation_functions.cc", "graph_transformations/unpartition_embedding_lookup.cc", "graph_transformations/unroll_batch_matmul.cc", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 1291825c8e..640afc7c74 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -180,7 +180,6 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantGather) DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) -DECLARE_GRAPH_TRANSFORMATION(SwapElementwiseBinary) DECLARE_GRAPH_TRANSFORMATION(Dequantize) DECLARE_GRAPH_TRANSFORMATION(UnpartitionEmbeddingLookup) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc deleted file mode 100644 index ecbce58d16..0000000000 --- a/tensorflow/contrib/lite/toco/graph_transformations/swap_elementwise_binary.cc +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include -#include -#include - -#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" -#include "tensorflow/contrib/lite/toco/model.h" -#include "tensorflow/contrib/lite/toco/runtime/types.h" -#include "tensorflow/contrib/lite/toco/tooling_util.h" -#include "tensorflow/core/platform/logging.h" - -namespace toco { - -namespace { - -bool ShapesAllowSwapping(const string& input_array_name, - const string& const_array_name, Model* model) { - const Array& input_array = model->GetOrCreateArray(input_array_name); - const Array& const_array = model->GetOrCreateArray(const_array_name); - // Wait until these shapes have been resolved. - if (!input_array.has_shape() || !const_array.has_shape()) { - return false; - } - - // Currently swapping is not handled for scalar const_array, though that could - // be done once there is a test model. - if (RequiredBufferSizeForShape(input_array.shape()) != - RequiredBufferSizeForShape(const_array.shape())) { - return false; - } - - return true; -} - -} // namespace - -// Swaps: -// Input -// \ -// (Reshape Op) Const -// \ / -// (Add/Sub/Mul/Div op) -// | -// Output -// -// To: -// -// Input Const -// \ / -// (Add/Sub/Mul/Div op) -// | -// (Reshape Op) -// | -// Output -// -// This can allow Add/Mul ops from batch normalization to be folded into an -// Input op from a FullyConnected layer. -bool SwapElementwiseBinary::Run(Model* model, std::size_t op_index) { - const auto element_wise_op_it = model->operators.begin() + op_index; - std::unique_ptr& element_wise_op = *element_wise_op_it; - DCHECK(element_wise_op); - - switch (element_wise_op->type) { - case OperatorType::kAdd: - case OperatorType::kSub: - case OperatorType::kMul: - case OperatorType::kDiv: - break; - default: - return false; - } - - int reshape_input = -1; - Operator* op = GetOpWithOutput(*model, element_wise_op->inputs[0]); - if (!op) { - return false; - } - - if (op->type == OperatorType::kTensorFlowReshape) { - reshape_input = 0; - } else { - op = GetOpWithOutput(*model, element_wise_op->inputs[1]); - if (!op || op->type != OperatorType::kTensorFlowReshape) { - return false; - } - reshape_input = 1; - } - - int const_input = (reshape_input == 0) ? 1 : 0; - const string& const_input_array = element_wise_op->inputs[const_input]; - if (!IsConstantParameterArray(*model, const_input_array)) { - return false; - } - - // Do not fold division if denominator is not constant. - if (element_wise_op->type != OperatorType::kDiv && const_input != 1) { - return false; - } - - const auto reshape_it = - FindOpWithOutput(*model, element_wise_op->inputs[reshape_input]); - // Note: we take copies of the tensor names here, instead of const-refs as we - // may overwrite the original names. - const string reshape_input_name = (*reshape_it)->inputs[0]; - const string intermediate_name = (*reshape_it)->outputs[0]; - const string element_wise_output_name = element_wise_op->outputs[0]; - - // Check the reshape op input and const op have their shapes resolved. - if (!ShapesAllowSwapping(reshape_input_name, const_input_array, model)) { - return false; - } - - int count_ops_consuming_output = CountOpsWithInput(*model, intermediate_name); - DCHECK_GE(count_ops_consuming_output, 1); - if (count_ops_consuming_output > 1) { - AddMessageF( - "Not exchanging element-wise function with %s because it is " - "consumed by more than 1 other operator", - LogName(**reshape_it)); - return false; - } - - // If the element_wise_op was originally producing an output_array we can't - // swap as otherwise the output array would change. It'd be nice to still be - // able to swap but if code is relying on the fetch names instead of array - // indices this won't work. - for (int i = 0; i < model->flags.output_arrays_size(); ++i) { - if (model->flags.output_arrays(i) == element_wise_op->outputs[0]) { - AddMessageF( - "Not exchanging activation function with %s to preserve output array " - "name %s", - LogName(**reshape_it), element_wise_op->outputs[0]); - return false; - } - } - - // Rewire by changing inputs, including all consumers. - // TODO(b/76086261): Replace with new utility function. - Operator* consumer = GetFirstOpWithInput(*model, element_wise_output_name); - while (consumer) { - for (int i = 0; i < consumer->inputs.size(); ++i) { - if (consumer->inputs[i] == element_wise_output_name) { - consumer->inputs[i] = intermediate_name; - } - } - consumer = GetFirstOpWithInput(*model, element_wise_output_name); - } - element_wise_op->inputs[reshape_input] = reshape_input_name; - (*reshape_it)->inputs[0] = element_wise_output_name; - - // Clear shapes; this will allow shape propagation to fix the sizes for us. - model->GetOrCreateArray(element_wise_output_name).clear_shape(); - - // Finally, swap operators. Note that this only works when there are no other - // direct descendents of the reshape operator. - element_wise_op.swap(*reshape_it); - - return true; -} - -} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD index a2008ddbdb..8dcd4adc90 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -18,17 +18,6 @@ tf_cc_test( ], ) -tf_cc_test( - name = "swap_elementwise_binary_test", - srcs = ["swap_elementwise_binary_test.cc"], - deps = [ - "//tensorflow/contrib/lite/toco:graph_transformations", - "//tensorflow/contrib/lite/toco:model", - "//tensorflow/contrib/lite/toco:tooling_util", - "@com_google_googletest//:gtest_main", - ], -) - tf_cc_test( name = "lstm_utils_test", srcs = ["lstm_utils_test.cc"], diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc deleted file mode 100644 index c3778017f3..0000000000 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/swap_elementwise_binary_test.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include -#include -#include - -#include -#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" -#include "tensorflow/contrib/lite/toco/model.h" -#include "tensorflow/contrib/lite/toco/tooling_util.h" - -namespace toco { - -namespace { - -int ShapeCount(const std::vector& size) { - CHECK(size.size()); - int count = 1; - for (int dim : size) { - count *= dim; - } - return count; -} - -// Adds a new parameter array to the model. -void AddConstArray(const string& name, const float* data, - const std::vector& size, Model* model) { - Array& array = model->GetOrCreateArray(name); - array.data_type = ArrayDataType::kFloat; - Shape* shape = array.mutable_shape(); - *(shape->mutable_dims()) = size; - - auto& buffer = array.GetMutableBuffer(); - buffer.data.resize(ShapeCount(size)); - std::copy(data, data + ShapeCount(size), buffer.data.data()); -} - -} // namespace - -TEST(SwapElementwiseBinaryTest, SwapsReshape) { - Model model; - const float parameters[2][4] = {{0., 1., 2., 3.}, {10., 11., 12., 13.}}; - - AddConstArray("before_reshape", parameters[0], {2, 2}, &model); - AddConstArray("add_vector", parameters[1], {1, 4}, &model); - - auto reshape_op = absl::make_unique(); - reshape_op->shape = {1, 4}; - reshape_op->inputs = {"before_reshape"}; - reshape_op->outputs = {"after_reshape"}; - Array& reshape_array = model.GetOrCreateArray("after_reshape"); - *(reshape_array.mutable_shape()) = {1, 4}; - - auto add_op = absl::make_unique(); - add_op->inputs = {"after_reshape", "add_vector"}; - add_op->outputs = {"add"}; - Array& add_array = model.GetOrCreateArray("add"); - *(add_array.mutable_shape()) = {1, 4}; - - model.operators.push_back(std::move(reshape_op)); - model.operators.push_back(std::move(add_op)); - - auto transformation = absl::make_unique(); - ASSERT_TRUE(transformation->Run(&model, 1)); - - Operator* op = GetOpWithOutput(model, "add"); - ASSERT_NE(nullptr, op); - ASSERT_EQ(OperatorType::kAdd, op->type); - ASSERT_EQ(2, op->inputs.size()); - for (const string& input : op->inputs) { - EXPECT_TRUE(IsConstantParameterArray(model, input)) - << input << " is not const input"; - } -} - -} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 41ea1481bc..30dd6fab9e 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -90,7 +90,6 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveTensorFlowTile); transformations->Add(new ResolveTensorFlowConcat); transformations->Add(new ResolveMultiplyByZero); - transformations->Add(new SwapElementwiseBinary); transformations->Add(new IdentifyDilatedConv); transformations->Add(new IdentifyL2Normalization); transformations->Add(new IdentifyL2Pool); -- GitLab From 1d7c2fa60f717dea7239970d96f7d4bf96842039 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Fri, 30 Mar 2018 02:11:55 +0800 Subject: [PATCH 683/960] Raise a nicer error message when trying to call gradients with while loop (#18052) * Produce a nicer error message when trying to call gradients on a while loop without properly serializing graph via MetaGraphDef * Fix syntax and lint error * Fix minor intent: Wrong continued indentation (add 2 spaces) --- tensorflow/python/ops/control_flow_ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 1278768d8b..710287012e 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -833,6 +833,9 @@ class GradLoopState(object): if outer_grad_state: outer_forward_ctxt = outer_grad_state.forward_context else: + if not hasattr(forward_ctxt, 'outer_context'): + raise ValueError("Failed to call gradients on a while loop without" + "properly serializing graph via MetaGraphDef") outer_forward_ctxt = forward_ctxt.outer_context # Add the forward loop counter. -- GitLab From d044a1ffa87e772076a14ace7a16bb97886a0804 Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Thu, 29 Mar 2018 11:12:07 -0700 Subject: [PATCH 684/960] Update README.md (#18076) Add YouTube channel --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0a309ebe2d..c66f7e3f3f 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ The TensorFlow project strives to abide by generally accepted best practices in * [TensorFlow Website](https://www.tensorflow.org) * [TensorFlow White Papers](https://www.tensorflow.org/about/bib) +* [TensorFlow YouTube Channel](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ) * [TensorFlow Model Zoo](https://github.com/tensorflow/models) * [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730) * [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si) -- GitLab From f2c3b869d354c05e497a79118a13a599dfc256bc Mon Sep 17 00:00:00 2001 From: "Xiaoming (Jason) Cui" Date: Thu, 29 Mar 2018 11:12:36 -0700 Subject: [PATCH 685/960] [INTEL MKL] utilize test_util.IsMklEnabled() to check if the MKL support is turned on or not (#18062) * Fixed issue #92, timeline_test unit test fails, changed the test so that it can take cpu name changed with MKLDNN naming conversion * [INTEL MKL] utilize test_util.IsMklEnabled() to check if the MKL support is turned on or not --- tensorflow/python/client/timeline_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index 5e6b5acdb0..c046e9cfd4 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -24,6 +24,7 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.client import timeline from tensorflow.python.framework import constant_op +from tensorflow.python.framework import test_util from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables @@ -155,9 +156,7 @@ class TimelineTest(test.TestCase): ctf = step_analysis.chrome_trace.format_to_string() self._validateTrace(ctf) maximums = step_analysis.allocator_maximums - cpuname = 'cpu' - if 'mklcpu' in maximums: - cpuname = 'mkl' + cpuname + cpuname = 'mklcpu' if test_util.IsMklEnabled() else 'cpu' self.assertTrue(cpuname in maximums) cpu_max = maximums[ 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname] -- GitLab From 7c7350dfb35276eff2b8039bfa2def13bb736a4b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 11:24:44 -0700 Subject: [PATCH 686/960] Update ops-related pbtxt files. PiperOrigin-RevId: 190959179 --- .../core/ops/compat/ops_history.v1.pbtxt | 348 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 348 ++++++++++++++++++ 2 files changed, 696 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 05d6e02281..7cdf36f423 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -10340,6 +10340,342 @@ op { } is_commutative: true } +op { + name: "BoostedTreesCalculateBestGainsPerFeature" + input_arg { + name: "node_id_range" + type: DT_INT32 + } + input_arg { + name: "stats_summary_list" + type: DT_FLOAT + number_attr: "num_features" + } + output_arg { + name: "node_ids_list" + type: DT_INT32 + number_attr: "num_features" + } + output_arg { + name: "gains_list" + type: DT_FLOAT + number_attr: "num_features" + } + output_arg { + name: "thresholds_list" + type: DT_INT32 + number_attr: "num_features" + } + output_arg { + name: "left_node_contribs_list" + type: DT_FLOAT + number_attr: "num_features" + } + output_arg { + name: "right_node_contribs_list" + type: DT_FLOAT + number_attr: "num_features" + } + attr { + name: "l1" + type: "float" + } + attr { + name: "l2" + type: "float" + } + attr { + name: "tree_complexity" + type: "float" + } + attr { + name: "max_splits" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_features" + type: "int" + has_minimum: true + minimum: 1 + } +} +op { + name: "BoostedTreesCreateEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "stamp_token" + type: DT_INT64 + } + input_arg { + name: "tree_ensemble_serialized" + type: DT_STRING + } + is_stateful: true +} +op { + name: "BoostedTreesDeserializeEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "stamp_token" + type: DT_INT64 + } + input_arg { + name: "tree_ensemble_serialized" + type: DT_STRING + } + is_stateful: true +} +op { + name: "BoostedTreesEnsembleResourceHandleOp" + output_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "BoostedTreesGetEnsembleStates" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + output_arg { + name: "stamp_token" + type: DT_INT64 + } + output_arg { + name: "num_trees" + type: DT_INT32 + } + output_arg { + name: "num_finalized_trees" + type: DT_INT32 + } + output_arg { + name: "num_attempted_layers" + type: DT_INT32 + } + is_stateful: true +} +op { + name: "BoostedTreesMakeStatsSummary" + input_arg { + name: "node_ids" + type: DT_INT32 + } + input_arg { + name: "gradients" + type: DT_FLOAT + } + input_arg { + name: "hessians" + type: DT_FLOAT + } + input_arg { + name: "bucketized_features_list" + type: DT_INT32 + number_attr: "num_features" + } + output_arg { + name: "stats_summary" + type: DT_FLOAT + } + attr { + name: "max_splits" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_buckets" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_features" + type: "int" + has_minimum: true + minimum: 1 + } +} +op { + name: "BoostedTreesPredict" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "bucketized_features" + type: DT_INT32 + number_attr: "num_bucketized_features" + } + output_arg { + name: "logits" + type: DT_FLOAT + } + attr { + name: "num_bucketized_features" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "logits_dimension" + type: "int" + } + attr { + name: "max_depth" + type: "int" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "BoostedTreesSerializeEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + output_arg { + name: "stamp_token" + type: DT_INT64 + } + output_arg { + name: "tree_ensemble_serialized" + type: DT_STRING + } + is_stateful: true +} +op { + name: "BoostedTreesTrainingPredict" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "cached_tree_ids" + type: DT_INT32 + } + input_arg { + name: "cached_node_ids" + type: DT_INT32 + } + input_arg { + name: "bucketized_features" + type: DT_INT32 + number_attr: "num_bucketized_features" + } + output_arg { + name: "partial_logits" + type: DT_FLOAT + } + output_arg { + name: "tree_ids" + type: DT_INT32 + } + output_arg { + name: "node_ids" + type: DT_INT32 + } + attr { + name: "num_bucketized_features" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "logits_dimension" + type: "int" + } + attr { + name: "max_depth" + type: "int" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "BoostedTreesUpdateEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "feature_ids" + type: DT_INT32 + } + input_arg { + name: "node_ids" + type: DT_INT32 + number_attr: "num_features" + } + input_arg { + name: "gains" + type: DT_FLOAT + number_attr: "num_features" + } + input_arg { + name: "thresholds" + type: DT_INT32 + number_attr: "num_features" + } + input_arg { + name: "left_node_contribs" + type: DT_FLOAT + number_attr: "num_features" + } + input_arg { + name: "right_node_contribs" + type: DT_FLOAT + number_attr: "num_features" + } + attr { + name: "max_depth" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "learning_rate" + type: "float" + } + attr { + name: "pruning_mode" + type: "int" + has_minimum: true + } + attr { + name: "num_features" + type: "int" + has_minimum: true + } + is_stateful: true +} op { name: "BroadcastArgs" input_arg { @@ -23333,6 +23669,18 @@ op { } } } +op { + name: "IsBoostedTreesEnsembleInitialized" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + output_arg { + name: "is_initialized" + type: DT_BOOL + } + is_stateful: true +} op { name: "IsFinite" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 274a7fbf75..42a68cb712 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -3995,6 +3995,342 @@ op { } is_commutative: true } +op { + name: "BoostedTreesCalculateBestGainsPerFeature" + input_arg { + name: "node_id_range" + type: DT_INT32 + } + input_arg { + name: "stats_summary_list" + type: DT_FLOAT + number_attr: "num_features" + } + output_arg { + name: "node_ids_list" + type: DT_INT32 + number_attr: "num_features" + } + output_arg { + name: "gains_list" + type: DT_FLOAT + number_attr: "num_features" + } + output_arg { + name: "thresholds_list" + type: DT_INT32 + number_attr: "num_features" + } + output_arg { + name: "left_node_contribs_list" + type: DT_FLOAT + number_attr: "num_features" + } + output_arg { + name: "right_node_contribs_list" + type: DT_FLOAT + number_attr: "num_features" + } + attr { + name: "l1" + type: "float" + } + attr { + name: "l2" + type: "float" + } + attr { + name: "tree_complexity" + type: "float" + } + attr { + name: "max_splits" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_features" + type: "int" + has_minimum: true + minimum: 1 + } +} +op { + name: "BoostedTreesCreateEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "stamp_token" + type: DT_INT64 + } + input_arg { + name: "tree_ensemble_serialized" + type: DT_STRING + } + is_stateful: true +} +op { + name: "BoostedTreesDeserializeEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "stamp_token" + type: DT_INT64 + } + input_arg { + name: "tree_ensemble_serialized" + type: DT_STRING + } + is_stateful: true +} +op { + name: "BoostedTreesEnsembleResourceHandleOp" + output_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "BoostedTreesGetEnsembleStates" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + output_arg { + name: "stamp_token" + type: DT_INT64 + } + output_arg { + name: "num_trees" + type: DT_INT32 + } + output_arg { + name: "num_finalized_trees" + type: DT_INT32 + } + output_arg { + name: "num_attempted_layers" + type: DT_INT32 + } + is_stateful: true +} +op { + name: "BoostedTreesMakeStatsSummary" + input_arg { + name: "node_ids" + type: DT_INT32 + } + input_arg { + name: "gradients" + type: DT_FLOAT + } + input_arg { + name: "hessians" + type: DT_FLOAT + } + input_arg { + name: "bucketized_features_list" + type: DT_INT32 + number_attr: "num_features" + } + output_arg { + name: "stats_summary" + type: DT_FLOAT + } + attr { + name: "max_splits" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_buckets" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_features" + type: "int" + has_minimum: true + minimum: 1 + } +} +op { + name: "BoostedTreesPredict" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "bucketized_features" + type: DT_INT32 + number_attr: "num_bucketized_features" + } + output_arg { + name: "logits" + type: DT_FLOAT + } + attr { + name: "num_bucketized_features" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "logits_dimension" + type: "int" + } + attr { + name: "max_depth" + type: "int" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "BoostedTreesSerializeEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + output_arg { + name: "stamp_token" + type: DT_INT64 + } + output_arg { + name: "tree_ensemble_serialized" + type: DT_STRING + } + is_stateful: true +} +op { + name: "BoostedTreesTrainingPredict" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "cached_tree_ids" + type: DT_INT32 + } + input_arg { + name: "cached_node_ids" + type: DT_INT32 + } + input_arg { + name: "bucketized_features" + type: DT_INT32 + number_attr: "num_bucketized_features" + } + output_arg { + name: "partial_logits" + type: DT_FLOAT + } + output_arg { + name: "tree_ids" + type: DT_INT32 + } + output_arg { + name: "node_ids" + type: DT_INT32 + } + attr { + name: "num_bucketized_features" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "logits_dimension" + type: "int" + } + attr { + name: "max_depth" + type: "int" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "BoostedTreesUpdateEnsemble" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + input_arg { + name: "feature_ids" + type: DT_INT32 + } + input_arg { + name: "node_ids" + type: DT_INT32 + number_attr: "num_features" + } + input_arg { + name: "gains" + type: DT_FLOAT + number_attr: "num_features" + } + input_arg { + name: "thresholds" + type: DT_INT32 + number_attr: "num_features" + } + input_arg { + name: "left_node_contribs" + type: DT_FLOAT + number_attr: "num_features" + } + input_arg { + name: "right_node_contribs" + type: DT_FLOAT + number_attr: "num_features" + } + attr { + name: "max_depth" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "learning_rate" + type: "float" + } + attr { + name: "pruning_mode" + type: "int" + has_minimum: true + } + attr { + name: "num_features" + type: "int" + has_minimum: true + } + is_stateful: true +} op { name: "BroadcastArgs" input_arg { @@ -11365,6 +11701,18 @@ op { } } } +op { + name: "IsBoostedTreesEnsembleInitialized" + input_arg { + name: "tree_ensemble_handle" + type: DT_RESOURCE + } + output_arg { + name: "is_initialized" + type: DT_BOOL + } + is_stateful: true +} op { name: "IsFinite" input_arg { -- GitLab From 1c6e292e7cc348218db2048b241a7330cacbbef6 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Thu, 29 Mar 2018 11:54:55 -0700 Subject: [PATCH 687/960] Initialize pointer to ScopedAllocatorMgr in BaseGPUDevice. PiperOrigin-RevId: 190964008 --- tensorflow/core/common_runtime/gpu/gpu_device.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 52fd20e479..0b9e8f9cc2 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -257,6 +257,7 @@ BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name, physical_device_desc)), gpu_allocator_(gpu_allocator), cpu_allocator_(cpu_allocator), + scoped_allocator_mgr_(new ScopedAllocatorMgr(name)), tf_gpu_id_(tf_gpu_id), sync_every_op_(sync_every_op), max_streams_(max_streams) { -- GitLab From 4ebb2eac303a22b06597facd07793595e105169b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 12:02:50 -0700 Subject: [PATCH 688/960] Leaves attributes on outside_compilation nodes so they can be replicated in a later pass. PiperOrigin-RevId: 190965218 --- .../jit/encapsulate_subgraphs_pass.cc | 51 ++-- .../jit/encapsulate_subgraphs_pass_test.cc | 279 ++++++++++-------- tensorflow/contrib/tpu/ops/replication_ops.cc | 2 + 3 files changed, 187 insertions(+), 145 deletions(-) diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 7fc43fb263..53ec6c1e60 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -254,7 +254,8 @@ class Encapsulator { // Adds _RecvAtHost and _SendFromHost nodes, where needed, to graph_out. Status AddOutsideCompilationHostIONodes( - const string& subgraph_name, + const string& group_attribute, const string& subgraph_name, + const string& outside_compilation_attribute, const std::unordered_map& node_images, Graph* graph_out); @@ -405,7 +406,9 @@ class Encapsulator { // Builds a _RecvAtHost node producing all the inputs of an // outside_compilation subgraph and stores it in oc_subgraph.recv_at_host. - Status AddRecvAtHostNode(const string& subgraph_name, + Status AddRecvAtHostNode(const string& group_attribute, + const string& subgraph_name, + const string& outside_compilation_attribute, const string& oc_subgraph_name, OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out); @@ -414,8 +417,10 @@ class Encapsulator { // outside_compilation subgraph and stores it in oc_subgraph.send_from_host. Status AddSendFromHostNode( const std::unordered_map& node_images, - const string& subgraph_name, const string& oc_subgraph_name, - OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out); + const string& group_attribute, const string& subgraph_name, + const string& outside_compilation_attribute, + const string& oc_subgraph_name, OutsideCompilationSubgraph* oc_subgraph, + Graph* graph_out); // The subgraph extracted from the input graph, suitable for being turned // into a FunctionDef. Inputs are fed by _Arg nodes, and outputs are @@ -1114,7 +1119,8 @@ Status Encapsulator::Subgraph::AddHostComputeKeyPlaceholder( } Status Encapsulator::Subgraph::AddRecvAtHostNode( - const string& subgraph_name, const string& oc_subgraph_name, + const string& group_attribute, const string& subgraph_name, + const string& outside_compilation_attribute, const string& oc_subgraph_name, OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) { if (host_compute_key_placeholder_ == nullptr) { TF_RETURN_IF_ERROR(AddHostComputeKeyPlaceholder(oc_subgraph, graph_out)); @@ -1135,14 +1141,15 @@ Status Encapsulator::Subgraph::AddRecvAtHostNode( NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name, "_", oc_subgraph_name, "_recv"), kRecvAtHostOp); - // TODO(misard) When we add replication the device placement will have to be - // redone. builder.Device(device_); builder.Attr("Toutputs", dtypes); - // TODO(misard) For now we only support TPU device 0. + // The correct device_ordinal will be inserted during replication in a + // subsequent rewrite. builder.Attr("device_ordinal", 0); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, "_", oc_subgraph_name)); + builder.Attr(group_attribute, subgraph_name); + builder.Attr(outside_compilation_attribute, oc_subgraph_name); builder.Input(host_compute_key_placeholder_->name(), 0, DT_STRING); Status s = builder.Finalize(&recv_def); if (!s.ok()) return s; @@ -1163,7 +1170,8 @@ Status Encapsulator::Subgraph::AddRecvAtHostNode( Status Encapsulator::Subgraph::AddSendFromHostNode( const std::unordered_map& node_images, - const string& subgraph_name, const string& oc_subgraph_name, + const string& group_attribute, const string& subgraph_name, + const string& outside_compilation_attribute, const string& oc_subgraph_name, OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) { if (host_compute_key_placeholder_ == nullptr) { TF_RETURN_IF_ERROR(AddHostComputeKeyPlaceholder(oc_subgraph, graph_out)); @@ -1188,14 +1196,15 @@ Status Encapsulator::Subgraph::AddSendFromHostNode( NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name, "_", oc_subgraph_name, "_send"), kSendFromHostOp); - // TODO(misard) When we add replication the device placement will have to be - // redone. builder.Device(device_); builder.Attr("Tinputs", dtypes); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, "_", oc_subgraph_name)); - // TODO(misard) For now we only support TPU device 0. + // The correct device_ordinal will be inserted during replication in a + // subsequent rewrite. builder.Attr("device_ordinal", 0); + builder.Attr(group_attribute, subgraph_name); + builder.Attr(outside_compilation_attribute, oc_subgraph_name); builder.Input(inputs); builder.Input(host_compute_key_placeholder_->name(), 0, DT_STRING); Status s = builder.Finalize(&send_def); @@ -1216,7 +1225,8 @@ Status Encapsulator::Subgraph::AddSendFromHostNode( } Status Encapsulator::Subgraph::AddOutsideCompilationHostIONodes( - const string& subgraph_name, + const string& group_attribute, const string& subgraph_name, + const string& outside_compilation_attribute, const std::unordered_map& node_images, Graph* graph_out) { for (auto& outside_compilation_subgraph_entry : @@ -1226,14 +1236,16 @@ Status Encapsulator::Subgraph::AddOutsideCompilationHostIONodes( outside_compilation_subgraph_entry.second; if (!oc_subgraph.inputs.empty() || !oc_subgraph.control_inputs.empty()) { - TF_RETURN_IF_ERROR( - AddRecvAtHostNode(subgraph_name, oc_name, &oc_subgraph, graph_out)); + TF_RETURN_IF_ERROR(AddRecvAtHostNode(group_attribute, subgraph_name, + outside_compilation_attribute, + oc_name, &oc_subgraph, graph_out)); } if (!oc_subgraph.outputs_by_src.empty() || !oc_subgraph.control_outputs.empty()) { - TF_RETURN_IF_ERROR(AddSendFromHostNode(node_images, subgraph_name, - oc_name, &oc_subgraph, graph_out)); + TF_RETURN_IF_ERROR(AddSendFromHostNode( + node_images, group_attribute, subgraph_name, + outside_compilation_attribute, oc_name, &oc_subgraph, graph_out)); } } return Status::OK(); @@ -1450,8 +1462,6 @@ Status Encapsulator::CopyNodesToOutputGraph( "Parallel checking is not supported when outside_compilation " "clusters are present."); } - image->ClearAttr(group_attribute_); - image->ClearAttr(outside_compilation_attribute_); } (*node_images)[node] = image; } @@ -1477,7 +1487,8 @@ Status Encapsulator::AddOutsideCompilationHostIONodes( const string& subgraph_name = subgraph_entry.first; Subgraph& subgraph = subgraph_entry.second; TF_RETURN_IF_ERROR(subgraph.AddOutsideCompilationHostIONodes( - subgraph_name, node_images, graph_out)); + group_attribute_, subgraph_name, outside_compilation_attribute_, + node_images, graph_out)); } return Status::OK(); } diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 94481a1fde..7899b5d72d 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -382,24 +382,36 @@ Node* KeyPlaceholder(const string& call_node, .FinalizeBuilder(&node_builder); } -Node* RecvAtHost(ops::NodeOut key_input, const string& key, +Node* RecvAtHost(ops::NodeOut key_input, const string& cluster, + const string& oc_cluster, const gtl::ArraySlice& dtypes, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; - NodeBuilder node_builder(opts.GetNameForOp("_XlaRecvAtHost"), + string key = + strings::StrCat("host_compute_channel_", cluster, "_", oc_cluster); + string name = strings::StrCat("outside_compilation_", cluster, "_", + oc_cluster, "_recv"); + NodeBuilder node_builder(opts.WithName(name).GetNameForOp("_XlaRecvAtHost"), "_XlaRecvAtHost", opts.op_registry()); node_builder.Input(std::move(key_input)); return opts.WithAttr("Toutputs", dtypes) .WithAttr("key", key) .WithAttr("device_ordinal", 0) + .WithAttr("_encapsulate", cluster) + .WithAttr("_outside", oc_cluster) .FinalizeBuilder(&node_builder); } -Node* SendFromHost(ops::NodeOut key_input, const string& key, +Node* SendFromHost(ops::NodeOut key_input, const string& cluster, + const string& oc_cluster, const std::vector& inputs, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; - NodeBuilder node_builder(opts.GetNameForOp("_XlaSendFromHost"), + string key = + strings::StrCat("host_compute_channel_", cluster, "_", oc_cluster); + string name = strings::StrCat("outside_compilation_", cluster, "_", + oc_cluster, "_send"); + NodeBuilder node_builder(opts.WithName(name).GetNameForOp("_XlaSendFromHost"), "_XlaSendFromHost", opts.op_registry()); node_builder.Input(inputs); node_builder.Input(std::move(key_input)); @@ -410,6 +422,8 @@ Node* SendFromHost(ops::NodeOut key_input, const string& key, return opts.WithAttr("Tinputs", dtypes) .WithAttr("key", key) .WithAttr("device_ordinal", 0) + .WithAttr("_encapsulate", cluster) + .WithAttr("_outside", oc_cluster) .FinalizeBuilder(&node_builder); } @@ -856,14 +870,14 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); Node* key_constant = KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); - Node* recv = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - shape.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, shape.opts()); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), - shape.opts().WithName("E")); - SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); + shape.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts()); TF_EXPECT_OK( AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); } @@ -901,17 +915,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* recv = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, b2.opts()); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), - b2.opts().WithName("E").WithControlInputs({recv, b})); - Node* send = SendFromHost(ops::NodeOut(key_constant, 0), - "host_compute_channel_F1_O1", {e}, - b2.opts() - .WithName("outside_compilation_F1_O1_send") - .WithControlInput(e)); + b2.opts() + .WithName("E") + .WithControlInputs({recv, b}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, + b2.opts().WithControlInput(e)); Node* s = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), @@ -976,14 +989,14 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); Node* key_constant = KeyPlaceholderShape(shape1.opts().WithName("KnownShape/_0")); - Node* recv = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - shape1.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, shape1.opts()); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), - shape1.opts().WithName("E")); - SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {e}, shape1.opts().WithName("outside_compilation_F1_O1_send")); + shape1.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts()); TF_EXPECT_OK( AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected)); } @@ -992,19 +1005,21 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately); Node* key_constant = KeyPlaceholderShape(shape2.opts().WithName("KnownShape/_0")); - Node* recv1 = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - shape2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, shape2.opts()); Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), - shape2.opts().WithName("E")); - Node* recv2 = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", - {DT_FLOAT, DT_FLOAT}, - shape2.opts().WithName("outside_compilation_F1_O2_recv")); - Node* h = Binary(ops::NodeOut(recv2, 0), e, shape2.opts().WithName("H")); - SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", - {h}, shape2.opts().WithName("outside_compilation_F1_O2_send")); + shape2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT, DT_FLOAT}, shape2.opts()); + Node* h = Binary(ops::NodeOut(recv2, 0), e, + shape2.opts() + .WithName("H") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h}, shape2.opts()); TF_EXPECT_OK( AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected)); } @@ -1054,28 +1069,32 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* recv1 = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, b2.opts()); Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), - b2.opts().WithName("E").WithControlInputs({recv1, b})); - Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), - "host_compute_channel_F1_O1", {e}, - b2.opts() - .WithName("outside_compilation_F1_O1_send") - .WithControlInput(e)); - - Node* recv2 = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", - {DT_FLOAT, DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O2_recv")); + b2.opts() + .WithName("E") + .WithControlInputs({recv1, b}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, + b2.opts().WithControlInput(e)); + + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", + {DT_FLOAT, DT_FLOAT}, b2.opts()); Node* g = Binary(e, ops::NodeOut(recv2, 1), - b2.opts().WithName("G").WithControlInputs({recv2, e})); - Node* h = Binary(ops::NodeOut(recv2, 0), e, b2.opts().WithName("H")); - Node* send2 = SendFromHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", {h}, - b2.opts().WithName("outside_compilation_F1_O2_send")); + b2.opts() + .WithName("G") + .WithControlInputs({recv2, e}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2")); + Node* h = Binary(ops::NodeOut(recv2, 0), e, + b2.opts() + .WithName("H") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O2")); + Node* send2 = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h}, b2.opts()); Node* s = Sequencer(b2.opts() .WithName("F1_sequencer") @@ -1139,14 +1158,14 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); Node* key_constant = KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); - Node* recv = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - shape.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, shape.opts()); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), - shape.opts().WithName("E")); - SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); + shape.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts()); TF_EXPECT_OK( AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); } @@ -1207,17 +1226,16 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { Node* key_constant1 = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* recv1 = - RecvAtHost(ops::NodeOut(key_constant1, 0), "host_compute_channel_F1_O1", - {DT_FLOAT, DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant1, 0), "F1", "O1", + {DT_FLOAT, DT_FLOAT}, b2.opts()); Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), - b2.opts().WithName("E").WithControlInputs({recv1, b})); - Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), - "host_compute_channel_F1_O1", {e}, - b2.opts() - .WithName("outside_compilation_F1_O1_send") - .WithControlInput(e)); + b2.opts() + .WithName("E") + .WithControlInputs({recv1, b}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), "F1", "O1", {e}, + b2.opts().WithControlInput(e)); Node* s1 = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), "F1"); @@ -1229,13 +1247,15 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { Node* key_constant2 = KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder")); - Node* recv2 = RecvAtHost( - ops::NodeOut(key_constant2, 0), "host_compute_channel_F2_O1", - {DT_FLOAT}, b2.opts().WithName("outside_compilation_F2_O1_recv")); - Node* h = Binary(ops::NodeOut(call1, 1), recv2, b2.opts().WithName("H")); - Node* send2 = SendFromHost( - ops::NodeOut(key_constant2, 0), "host_compute_channel_F2_O1", {h}, - b2.opts().WithName("outside_compilation_F2_O1_send")); + Node* recv2 = RecvAtHost(ops::NodeOut(key_constant2, 0), "F2", "O1", + {DT_FLOAT}, b2.opts()); + Node* h = Binary(ops::NodeOut(call1, 1), recv2, + b2.opts() + .WithName("H") + .WithAttr("_encapsulate", "F2") + .WithAttr("_outside", "O1")); + Node* send2 = SendFromHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {h}, + b2.opts()); Node* s2 = Sequencer( b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}), @@ -1311,12 +1331,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { Node* a = InputShaped(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); - Node* e = Unary(a, b2.opts().WithName("E")); + Node* e = Unary(a, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* send1 = SendFromHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, - b2.opts().WithName("outside_compilation_F1_O1_send")); + Node* send1 = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts()); Node* s1 = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInput(send1), "F1"); NodeBuilder node_builder1("F1", "F1", lib_def.get()); @@ -1395,12 +1417,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv1 = - RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {}, b2.opts().WithName("outside_compilation_F1_O1_recv")); - Node* e = Unary(a, b2.opts().WithName("E").WithControlInput(recv1)); - Node* send1 = SendFromHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, - b2.opts().WithName("outside_compilation_F1_O1_send")); + RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {}, b2.opts()); + Node* e = Unary(a, b2.opts() + .WithName("E") + .WithControlInput(recv1) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send1 = + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts()); Node* s1 = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), "F1"); @@ -1470,10 +1494,12 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* recv1 = RecvAtHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); - Node* e = Unary(recv1, b2.opts().WithName("E")); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = Unary(recv1, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); Node* s1 = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInput(recv1), "F1"); NodeBuilder node_builder1("F1", "F1", lib_def.get()); @@ -1547,15 +1573,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* recv1 = RecvAtHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); - Node* e = Unary(recv1, b2.opts().WithName("E")); - Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), - "host_compute_channel_F1_O1", {}, - b2.opts() - .WithName("outside_compilation_F1_O1_send") - .WithControlInput(e)); + Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = Unary(recv1, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {}, + b2.opts().WithControlInput(e)); Node* s1 = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}), "F1"); @@ -1615,7 +1640,10 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) { Node* a = Input(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); - Node* e = Unary(a, b2.opts().WithName("E")); + Node* e = Unary(a, b2.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); @@ -1666,12 +1694,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { Node* key_constant = KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); Node* known = KnownShape({2}, shape.opts().WithName("KnownShape/_1")); - Node* recv = RecvAtHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, - shape.opts().WithName("outside_compilation_F1_O1_recv")); - Node* e = BinaryUnknownShape(known, recv, shape.opts().WithName("E")); - SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", - {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, shape.opts()); + Node* e = BinaryUnknownShape(known, recv, + shape.opts() + .WithName("E") + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts()); TF_EXPECT_OK( AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected)); } @@ -1709,17 +1739,16 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { Node* key_constant = KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); - Node* recv = RecvAtHost( - ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); - Node* e = BinaryUnknownShape( - c, ops::NodeOut(recv, 0), - b2.opts().WithName("E").WithControlInputs({recv, b})); - Node* send = SendFromHost(ops::NodeOut(key_constant, 0), - "host_compute_channel_F1_O1", {e}, - b2.opts() - .WithName("outside_compilation_F1_O1_send") - .WithControlInput(e)); + Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", + {DT_FLOAT}, b2.opts()); + Node* e = BinaryUnknownShape(c, ops::NodeOut(recv, 0), + b2.opts() + .WithName("E") + .WithControlInputs({recv, b}) + .WithAttr("_encapsulate", "F1") + .WithAttr("_outside", "O1")); + Node* send = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, + b2.opts().WithControlInput(e)); Node* s = Sequencer( b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}), diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc index cba71c6b98..3bdf7c2f83 100644 --- a/tensorflow/contrib/tpu/ops/replication_ops.cc +++ b/tensorflow/contrib/tpu/ops/replication_ops.cc @@ -27,6 +27,7 @@ REGISTER_OP("TPUReplicateMetadata") .Attr("topology: string = \"\"") .Attr("device_assignment: list(int) = []") .Attr("computation_shape: list(int) = []") + .Attr("host_compute_core: list(string) = []") .SetShapeFn(shape_inference::UnknownShape); REGISTER_OP("TPUReplicatedInput") @@ -68,6 +69,7 @@ REGISTER_OP("TPUReplicate") .Attr("num_replicas: int >= 1") .Attr("topology: string = \"\"") .Attr("device_assignment: list(int) = []") + .Attr("host_compute_core: list(string) = []") .Attr("computation_shape: list(int) = []") .Attr("Tinputs: list(type) >= 0") .Attr("Tbroadcast_inputs: list(type) >= 0") -- GitLab From b80960d8b7c87a3cf221cdbbb9c68c5970bfd3c7 Mon Sep 17 00:00:00 2001 From: Zhixian Yan Date: Thu, 29 Mar 2018 12:39:33 -0700 Subject: [PATCH 689/960] Add more tflite hosted models like resnet, inception-v4, nasnet. PiperOrigin-RevId: 190970367 --- tensorflow/contrib/lite/g3doc/models.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/g3doc/models.md b/tensorflow/contrib/lite/g3doc/models.md index 48f43d4fc4..d8134d5a00 100644 --- a/tensorflow/contrib/lite/g3doc/models.md +++ b/tensorflow/contrib/lite/g3doc/models.md @@ -1,7 +1,13 @@ # List of Hosted Models -* [Inception V3 2015](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_2015_2017_11_10.zip) -* [Inception V3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) +* [NASNet large](https://storage.googleapis.com/download.tensorflow.org/models/tflite/nasnet_large_2018_03_27.zip) +* [NASNet mobile](https://storage.googleapis.com/download.tensorflow.org/models/tflite/nasnet_mobile_2018_03_27.zip) +* [ResNet v2 101](https://storage.googleapis.com/download.tensorflow.org/models/tflite/resnet_v2_101_2018_03_27.zip) +* [ResNet v2 50](https://storage.googleapis.com/download.tensorflow.org/models/tflite/resnet_v2_50_2018_03_27.zip) +* [Inception ResNet v2](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_resnet_v2_2018_03_27.zip) +* [Inception v4](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v4_2018_03_27.zip) +* [Inception v3 2015](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_2015_2017_11_10.zip) +* [Inception v3 Slim 2016](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) * [Mobilenet 0.25 128 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_128_float_2017_11_08.zip) * [Mobilenet 0.25 160 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_160_float_2017_11_08.zip) * [Mobilenet 0.25 192 Float](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_0.25_192_float_2017_11_08.zip) -- GitLab From e58e4c754fa6145af2a411b940d8f7347a071b6f Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 29 Mar 2018 12:54:59 -0700 Subject: [PATCH 690/960] Minor adjustments to an error message. PiperOrigin-RevId: 190972253 --- tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py index 493d1848c0..eea57ed336 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py @@ -72,9 +72,9 @@ def _query_tpu_system_metadata(master_address, run_config, tpu_core_count += 1 break except errors.DeadlineExceededError: - msg = ('Fail to connect Tensorflow master. It could be the TPU worker is ' - 'not ready (still under scheduling) or Tensorflow ' - 'master address is correct: got (%s).' % + msg = ('Failed to connect to the Tensorflow master. The TPU worker may ' + 'not be ready (still scheduling) or the Tensorflow master address ' + 'is incorrect: got (%s).' % (master_address)) # TODO(xiejw): For local or grpc master we might not need retry logic -- GitLab From d9e5f2754cabd9680d5481464a4085e79856eb78 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 29 Mar 2018 12:58:43 -0700 Subject: [PATCH 691/960] Avoid evaluating SaveSpec Tensors multiple times when executing eagerly The Saver now calls a SaveSpec callable once when saving and not at all when restoring. Previously saving evaluated the callable twice and restoring once (copying a variable's value each time). Requires a dtype be passed to a SaveSpec if its tensor is callable. PiperOrigin-RevId: 190972754 --- tensorflow/python/training/saver.py | 40 +++++++++++++++++++----- tensorflow/python/training/saver_test.py | 31 ++++++++++++++++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index cec581d997..e40b8d22ed 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -91,17 +91,27 @@ class BaseSaverBuilder(object): class SaveSpec(object): """Class used to describe tensor slices that need to be saved.""" - def __init__(self, tensor, slice_spec, name): + def __init__(self, tensor, slice_spec, name, dtype=None): """Creates a `SaveSpec` object. Args: tensor: the tensor to save or callable that produces a tensor to save. slice_spec: the slice to be saved. See `Variable.SaveSliceInfo`. name: the name to save the tensor under. + dtype: The data type of the Tensor. Required if `tensor` is callable. + Used for error checking in the restore op. """ self._tensor = tensor self.slice_spec = slice_spec self.name = name + if callable(self._tensor): + if dtype is None: + raise AssertionError( + "When passing a callable `tensor` to a SaveSpec, an explicit " + "dtype must be provided.") + self.dtype = dtype + else: + self.dtype = tensor.dtype @property def tensor(self): @@ -117,14 +127,27 @@ class BaseSaverBuilder(object): op: the "producer" object that this class wraps; it produces a list of tensors to save. E.g., a "Variable" object saving its backing tensor. specs: a list of SaveSpec, each element of which describes one tensor to - save under this object. + save under this object. All Tensors must be on the same device. name: the name to save the object under. """ self.op = op self.specs = specs self.name = name - # The device of this saveable. All tensors must be on the same device. - self.device = specs[0].tensor.device + self._device = None + + @property + def device(self): + """The device for SaveSpec Tensors.""" + # Note that SaveSpec.tensor runs Tensor-gathering ops when executing + # eagerly, making this call potentially very expensive. + # + # TODO(allenl): Consider another way to gather device information. Lower + # priority since this property isn't part of the normal save()/restore() + # workflow, but does come up when some alternative builders are passed to + # the Saver. + if self._device is None: + self._device = self.specs[0].tensor.device + return self._device def restore(self, restored_tensors, restored_shapes): """Restores this object from 'restored_tensors'. @@ -148,7 +171,7 @@ class BaseSaverBuilder(object): """SaveableObject implementation that handles Variables.""" def __init__(self, var, slice_spec, name): - spec = BaseSaverBuilder.SaveSpec(var, slice_spec, name) + spec = BaseSaverBuilder.SaveSpec(var, slice_spec, name, dtype=var.dtype) super(BaseSaverBuilder.VariableSaveable, self).__init__(var, [spec], name) def restore(self, restored_tensors, restored_shapes): @@ -186,7 +209,8 @@ class BaseSaverBuilder(object): raise ValueError( "Saveable is neither a resource variable nor a read operation." " Got: %s" % repr(var)) - spec = BaseSaverBuilder.SaveSpec(tensor, slice_spec, name) + spec = BaseSaverBuilder.SaveSpec(tensor, slice_spec, name, + dtype=var.dtype) super(BaseSaverBuilder.ResourceVariableSaveable, self).__init__( var, [spec], name) @@ -295,7 +319,7 @@ class BaseSaverBuilder(object): filename_tensor, [spec.name], [spec.slice_spec], - [spec.tensor.dtype])[0]) + [spec.dtype])[0]) return tensors # pylint: enable=unused-argument @@ -854,7 +878,7 @@ class BulkSaverBuilder(BaseSaverBuilder): restore_specs = [] for saveable in saveables: for spec in saveable.specs: - restore_specs.append((spec.name, spec.slice_spec, spec.tensor.dtype)) + restore_specs.append((spec.name, spec.slice_spec, spec.dtype)) names, slices, dtypes = zip(*restore_specs) # Load all tensors onto CPU 0 for compatibility with existing code. diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index d1c24b3930..14dda79979 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2980,6 +2980,37 @@ class CheckpointableCompatibilityTests(test.TestCase): self.assertEqual(42., self.evaluate(v.non_dep_variable)) self.assertEqual(42., self.evaluate(v.mirrored)) + def testSingleTensorEvaluation(self): + + class _CountingSaveable(saver_module.BaseSaverBuilder.SaveableObject): + + def __init__(self, name): + self.eval_count = 0 + def _tensor(): + self.eval_count += 1 + return constant_op.constant([1.]) + dummy_op = constant_op.constant([2.]) + super(_CountingSaveable, self).__init__( + dummy_op, + [saver_module.BaseSaverBuilder.SaveSpec( + _tensor, "", name, dtype=dummy_op.dtype)], + name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into both variables.""" + pass + + with context.eager_mode(): + v = _CountingSaveable("foo") + saver = saver_module.Saver(var_list=[v]) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + with self.test_session() as sess: + save_path = saver.save(sess, prefix) + self.assertEqual(1, v.eval_count) + saver.restore(sess, save_path) + self.assertEqual(1, v.eval_count) + if __name__ == "__main__": test.main() -- GitLab From a259ba951d3af9f62a0f95a881abf9ebaa45782b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 13:18:54 -0700 Subject: [PATCH 692/960] Fix docstring. PiperOrigin-RevId: 190975767 --- tensorflow/contrib/autograph/converters/break_statements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/autograph/converters/break_statements.py b/tensorflow/contrib/autograph/converters/break_statements.py index 721bc0ccd0..48026bccab 100644 --- a/tensorflow/contrib/autograph/converters/break_statements.py +++ b/tensorflow/contrib/autograph/converters/break_statements.py @@ -27,7 +27,7 @@ from tensorflow.contrib.autograph.pyct.static_analysis.annos import NodeAnno class BreakCanonicalizationTransformer(transformer.Base): - """Canonicalizes continue statements into additional conditionals.""" + """Canonicalizes break statements into additional conditionals.""" def __init__(self, context): super(BreakCanonicalizationTransformer, self).__init__(context) -- GitLab From eb2be37c12ae2b6c996f3f4c064e3d10f9565eab Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Thu, 29 Mar 2018 13:22:45 -0700 Subject: [PATCH 693/960] Internal change. PiperOrigin-RevId: 190976338 --- tensorflow/python/layers/normalization.py | 76 +++++++++++------------ tensorflow/python/training/distribute.py | 10 +++ 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 29fb92ccb5..83b201e642 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -32,12 +32,12 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.layers import base from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn -from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import init_ops from tensorflow.python.ops import state_ops +from tensorflow.python.training import distribute as distribute_lib from tensorflow.python.training import moving_averages from tensorflow.python.util.tf_export import tf_export @@ -178,6 +178,11 @@ class BatchNormalization(base.Layer): self.renorm_clipping = renorm_clipping self.renorm_momentum = renorm_momentum + def _add_tower_local_variable(self, *args, **kwargs): + tower_context = distribute_lib.get_tower_context() + with tower_context.tower_local_var_scope('mean'): + return self.add_variable(*args, **kwargs) + def build(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape) if not input_shape.ndims: @@ -305,14 +310,14 @@ class BatchNormalization(base.Layer): self._scope.set_partitioner(None) else: partitioner = None - self.moving_mean = self.add_variable( + self.moving_mean = self._add_tower_local_variable( name='moving_mean', shape=param_shape, dtype=param_dtype, initializer=self.moving_mean_initializer, trainable=False) - self.moving_variance = self.add_variable( + self.moving_variance = self._add_tower_local_variable( name='moving_variance', shape=param_shape, dtype=param_dtype, @@ -328,7 +333,7 @@ class BatchNormalization(base.Layer): # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): - var = self.add_variable( + var = self._add_tower_local_variable( name=name, shape=shape, dtype=param_dtype, @@ -336,24 +341,19 @@ class BatchNormalization(base.Layer): trainable=False) return var - with ops.device(None): - device = ( - self.moving_mean.device if context.executing_eagerly() else - (lambda _: self.moving_mean.device)) - with ops.device(device): - self.renorm_mean = _renorm_variable('renorm_mean', param_shape) - self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) - # We initialize renorm_stddev to 0, and maintain the (0-initialized) - # renorm_stddev_weight. This allows us to (1) mix the average - # stddev with the minibatch stddev early in training, and (2) compute - # the unbiased average stddev by dividing renorm_stddev by the weight. - device = ( - self.moving_variance.device if context.executing_eagerly() else - (lambda _: self.moving_variance.device)) - with ops.device(device): - self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) - self.renorm_stddev_weight = _renorm_variable( - 'renorm_stddev_weight', ()) + with distribute_lib.get_distribution_strategy().colocate_vars_with( + self.moving_mean): + self.renorm_mean = _renorm_variable('renorm_mean', param_shape) + self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) + # We initialize renorm_stddev to 0, and maintain the (0-initialized) + # renorm_stddev_weight. This allows us to (1) mix the average + # stddev with the minibatch stddev early in training, and (2) compute + # the unbiased average stddev by dividing renorm_stddev by the weight. + with distribute_lib.get_distribution_strategy().colocate_vars_with( + self.moving_variance): + self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) + self.renorm_stddev_weight = _renorm_variable('renorm_stddev_weight', + ()) finally: if partitioner: self._scope.set_partitioner(partitioner) @@ -362,12 +362,11 @@ class BatchNormalization(base.Layer): def _assign_moving_average(self, variable, value, momentum): with ops.name_scope(None, 'AssignMovingAvg', [variable, value, momentum]) as scope: - with ops.colocate_with(variable): - decay = ops.convert_to_tensor(1.0 - momentum, name='decay') - if decay.dtype != variable.dtype.base_dtype: - decay = math_ops.cast(decay, variable.dtype.base_dtype) - update_delta = (variable - value) * decay - return state_ops.assign_sub(variable, update_delta, name=scope) + decay = ops.convert_to_tensor(1.0 - momentum, name='decay') + if decay.dtype != variable.dtype.base_dtype: + decay = math_ops.cast(decay, variable.dtype.base_dtype) + update_delta = (variable - value) * decay + return state_ops.assign_sub(variable, update_delta, name=scope) def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" @@ -473,16 +472,13 @@ class BatchNormalization(base.Layer): return array_ops.identity(var) return utils.smart_cond(training, _do_update, _fake_update) - with ops.colocate_with(self.moving_mean): - new_mean = _update_renorm_variable(self.renorm_mean, - self.renorm_mean_weight, - mean) - with ops.colocate_with(self.moving_variance): - new_stddev = _update_renorm_variable(self.renorm_stddev, - self.renorm_stddev_weight, - stddev) - # Make sqrt(moving_variance + epsilon) = new_stddev. - new_variance = math_ops.square(new_stddev) - self.epsilon + # TODO(yuefengz): colocate the operations + new_mean = _update_renorm_variable(self.renorm_mean, + self.renorm_mean_weight, mean) + new_stddev = _update_renorm_variable(self.renorm_stddev, + self.renorm_stddev_weight, stddev) + # Make sqrt(moving_variance + epsilon) = new_stddev. + new_variance = math_ops.square(new_stddev) - self.epsilon return (r, d, new_mean, new_variance) diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index f98872775a..d5106752dd 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -1082,6 +1082,16 @@ class _DefaultDistributionStrategy(DistributionStrategy): return _CurrentDistributionContext( self, variable_scope.variable_creator_scope(creator)) + def tower_local_var_scope(self, reduce_method): + """Does not set to resource variables.""" + def create_tower_local_variable(next_creator, *args, **kwargs): + _require_distribution_strategy_scope(self) + kwargs["tower_local_reduce_method"] = reduce_method + return next_creator(*args, **kwargs) + + _require_distribution_strategy_scope(self) + return variable_scope.variable_creator_scope(create_tower_local_variable) + def colocate_vars_with(self, colocate_with_variable): """Does not require `self.scope`.""" _require_distribution_strategy_scope(self) -- GitLab From ae94d2caaa713393d9c046f46e1ed7303ecf308c Mon Sep 17 00:00:00 2001 From: Nathan Burnham Date: Thu, 29 Mar 2018 16:26:07 -0400 Subject: [PATCH 694/960] Fixed a spelling error that broke the GANEstimator documentation example (#18097) Fixed a spelling error that broke the tfgan.estimator.GANEstimator documentation example --- .../contrib/gan/python/estimator/python/gan_estimator_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index 082c42eba1..e3fc6bf0f0 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -88,8 +88,8 @@ class GANEstimator(estimator.Estimator): discriminator_fn=discriminator_fn, generator_loss_fn=tfgan.losses.wasserstein_generator_loss, discriminator_loss_fn=tfgan.losses.wasserstein_discriminator_loss, - generator_optimizer=tf.train.AdamOptimizier(0.1, 0.5), - discriminator_optimizer=tf.train.AdamOptimizier(0.1, 0.5)) + generator_optimizer=tf.train.AdamOptimizer(0.1, 0.5), + discriminator_optimizer=tf.train.AdamOptimizer(0.1, 0.5)) # Train estimator. gan_estimator.train(train_input_fn, steps) -- GitLab From 690ecae1f2519ed54693d51af0d28372a02ff31e Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 29 Mar 2018 16:26:19 -0400 Subject: [PATCH 695/960] Initial commit for the demo notebook (#18093) * Create touch.txt Dummy file to create the branch and directory structure * Add files via upload Initial commit * Delete touch.txt --- .../notebooks/dev_summit_2018_demo.ipynb | 1970 +++++++++++++++++ 1 file changed, 1970 insertions(+) create mode 100644 tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb diff --git a/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb b/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb new file mode 100644 index 0000000000..3129a39a4b --- /dev/null +++ b/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb @@ -0,0 +1,1970 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Dev Summit 2018 - Autograph", + "version": "0.3.2", + "views": {}, + "default_view": {}, + "provenance": [ + { + "file_id": "1wCZUh73zTNs1jzzYjqoxMIdaBWCdKJ2K", + "timestamp": 1522238054357 + }, + { + "file_id": "1_HpC-RrmIv4lNaqeoslUeWaX8zH5IXaJ", + "timestamp": 1521743157199 + }, + { + "file_id": "1mjO2fQ2F9hxpAzw2mnrrUkcgfb7xSGW-", + "timestamp": 1520522344607 + } + ], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python2", + "display_name": "Python 2" + } + }, + "cells": [ + { + "metadata": { + "id": "g7nGs4mzVUHP", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Experimental: TF Autograph\n", + "**TensorFlow Dev Summit, 2018.**\n", + "\n", + "This interactive notebook demonstrates **autograph**, an experimental source-code transformation library to automatically convert TF.Eager and Python code to TensorFlow graphs.\n", + "\n", + "**Note: this is pre-alpha software!** The notebook works best with Python 2, for now.\n", + "\n", + "> ![alt text](https://lh3.googleusercontent.com/QOvy0clmg7siaVKzwmSPAjicWWNQ0OeyaB16plDjSJMf35WD3vLjF6mz4CGrhSHw60HnlZPJjkyDCBzw5XOI0oBGSewyYw=s688)\n", + "\n", + "### Table of Contents\n", + "1. _Write Eager code that is fast and scalable._\n", + "2. _Case study: complex control flow._\n", + "3. _Case study: training MNIST with Keras._\n", + "4. _Case study: building an RNN._" + ] + }, + { + "metadata": { + "id": "uFcgBENZqkB2", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "# Install TensorFlow; note that Colab notebooks run remotely, on virtual\n", + "# instances provided by Google.\n", + "!pip install -U -q tf-nightly" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Pa2qpEmoVOGe", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "import os\n", + "import time\n", + "\n", + "import tensorflow as tf\n", + "from tensorflow.contrib import autograph\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import six\n", + "\n", + "from google.colab import widgets" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ZVKfj5ttVkqz", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# 1. Write Eager code that is fast and scalable\n", + "\n", + "TF.Eager gives you more flexibility while coding, but at the cost of losing the benefits of TensorFlow graphs. For example, Eager does not currently support distributed training, exporting models, and a variety of memory and computation optimizations.\n", + "\n", + "Autograph gives you the best of both worlds: write your code in an Eager style, and we will automatically transform it into the equivalent TF graph code. The graph code can be executed eagerly (as a single op), included as part of a larger graph, or exported." + ] + }, + { + "metadata": { + "id": "snaZRFdWd9ym", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "For example, autograph can convert a function like this:" + ] + }, + { + "metadata": { + "id": "9__n8cSIeDnD", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def g(x):\n", + " if x > 0:\n", + " x = x * x\n", + " else:\n", + " x = 0\n", + " return x" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "gq0eQcuReHET", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "... into a TF graph-building function:" + ] + }, + { + "metadata": { + "id": "sELSn599ePUF", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 413 + }, + "outputId": "bb0c7216-1ca3-4da1-d1fb-589902cdcd1a", + "executionInfo": { + "status": "ok", + "timestamp": 1522345737505, + "user_tz": 240, + "elapsed": 243, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "print(autograph.to_code(g))" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "from __future__ import print_function\n", + "import tensorflow as tf\n", + "from tensorflow.contrib.autograph.impl import api as autograph_api\n", + "from tensorflow.contrib.autograph import utils as autograph_utils\n", + "\n", + "def tf__g(x):\n", + " with tf.name_scope('g'):\n", + "\n", + " def if_true():\n", + " with tf.name_scope('if_true'):\n", + " x_1, = x,\n", + " x_1 = x_1 * x_1\n", + " return x_1,\n", + "\n", + " def if_false():\n", + " with tf.name_scope('if_false'):\n", + " x_1, = x,\n", + " x_1 = 0\n", + " return x_1,\n", + " x = autograph_utils.run_cond(tf.greater(x, 0), if_true, if_false)\n", + " return x\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "j74n-8hEe6dk", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "You can then use the converted function as you would any regular TF op -- you can pass `Tensor` arguments and it will return `Tensor`s:" + ] + }, + { + "metadata": { + "id": "AkVaY0-dfEbH", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "outputId": "4ffe3757-c44d-424c-c2a8-7ddc973bfcce", + "executionInfo": { + "status": "ok", + "timestamp": 1522345737841, + "user_tz": 240, + "elapsed": 257, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "tf_g = autograph.to_graph(g)\n", + "\n", + "with tf.Graph().as_default(): \n", + "\n", + " g_ops = tf_g(tf.constant(9))\n", + "\n", + " with tf.Session() as sess:\n", + " tf_g_result = sess.run(g_ops)\n", + "\n", + " print('g(9) = %s' % g(9))\n", + " print('tf_g(9) = %s' % tf_g_result)" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "g(9) = 81\n", + "tf_g(9) = 81\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "trrHQBM1VnD0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# 2. Case study: complex control flow\n", + "\n", + "Autograph can convert a large chunk of the Python language into graph-equivalent code, and we're adding new supported language features all the time. In this section, we'll give you a taste of some of the functionality in autograph.\n", + "Autograph will automatically convert most Python control flow statements into their correct graph equivalent.\n", + " " + ] + }, + { + "metadata": { + "id": "u0YG3DPgZxoW", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "We support common statements like `while`, `for`, `if`, `break`, `return` and more. You can even nest them as much as you like. Imagine trying to write the graph version of this code by hand:" + ] + }, + { + "metadata": { + "id": "xJYDzOcrZ8pI", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "outputId": "6c244ee4-b141-4ad6-eefa-cfffa71f33c6", + "executionInfo": { + "status": "ok", + "timestamp": 1522345738402, + "user_tz": 240, + "elapsed": 483, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "def sum_even(numbers):\n", + " s = 0\n", + " for n in numbers:\n", + " if n % 2 > 0:\n", + " continue\n", + " s += n\n", + " return s\n", + "\n", + "\n", + "tf_sum_even = autograph.to_graph(sum_even)\n", + "\n", + "with tf.Graph().as_default(): \n", + " with tf.Session() as sess:\n", + " result = sess.run(tf_sum_even(tf.constant([10, 12, 15, 20])))\n", + "\n", + " print('Sum of even numbers: %s' % result)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Sum of even numbers: 42\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "_YXo4KOcbKrn", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Try replacing the `continue` in the above code with `break` -- Autograph supports that as well!" + ] + }, + { + "metadata": { + "id": "xHmC0rBIavW_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "The Python code above is much more readable than the matching graph code. Autograph takes care of tediously converting every piece of Python code into the matching TensorFlow graph version for you, so that you can quickly write maintainable code, but still benefit from the optimizations and deployment benefits of graphs." + ] + }, + { + "metadata": { + "id": "UEHWGpBXbS7g", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Let's try some other useful Python constructs, like `print` and `assert`. We automatically convert Python `assert` statements into the equivalent `tf.Assert` code. " + ] + }, + { + "metadata": { + "id": "qUU57xlEbauI", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "outputId": "add3db4a-2077-4dd5-f7a7-a5b5a4529c26", + "executionInfo": { + "status": "ok", + "timestamp": 1522345738697, + "user_tz": 240, + "elapsed": 253, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "def f(x):\n", + " assert x != 0, 'Do not pass zero!'\n", + " return x * x\n", + "\n", + "tf_f = autograph.to_graph(f)\n", + "with tf.Graph().as_default(): \n", + " with tf.Session() as sess:\n", + " try:\n", + " print(sess.run(tf_f(tf.constant(0))))\n", + " except tf.errors.InvalidArgumentError as e:\n", + " print('Got error message: %s' % e.message)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Got error message: assertion failed: [Do not pass zero!]\n", + "\t [[Node: f/Assert/Assert = Assert[T=[DT_STRING], summarize=3, _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"](f/NotEqual, f/Assert/Assert/data_0)]]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "w5hBZaVJbck4", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "You can also use `print` functions in-graph:" + ] + }, + { + "metadata": { + "id": "6NdzRKLEboRv", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "outputId": "fb82dfc3-790f-4127-87f6-361805be9e9b", + "executionInfo": { + "status": "ok", + "timestamp": 1522345739013, + "user_tz": 240, + "elapsed": 247, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "def print_sign(n):\n", + " if n >= 0:\n", + " print(n, 'is positive!')\n", + " else:\n", + " print(n, 'is negative!')\n", + " return n\n", + "\n", + "\n", + "tf_print_sign = autograph.to_graph(print_sign)\n", + "with tf.Graph().as_default():\n", + " with tf.Session() as sess:\n", + " sess.run(tf_print_sign(tf.constant(1)))" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1 is positive!\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "9u_Z3i3AivLA", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "We can convert lists to TensorArray, so appending to lists also works, with a few modifications:" + ] + }, + { + "metadata": { + "id": "MjhCQJVuiTNR", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "outputId": "dc320b87-595b-4392-d29c-994486fd8a0a", + "executionInfo": { + "status": "ok", + "timestamp": 1522345744470, + "user_tz": 240, + "elapsed": 5391, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "def f(n):\n", + " numbers = []\n", + " # We ask you to tell us about the element dtype.\n", + " autograph.utils.set_element_type(numbers, tf.int32)\n", + " for i in range(n):\n", + " numbers.append(i)\n", + " return numbers.stack() # Stack the list so that it can be used as a Tensor\n", + "\n", + "\n", + "tf_f = autograph.to_graph(f)\n", + "with tf.Graph().as_default():\n", + " with tf.Session() as sess:\n", + " print(sess.run(tf_f(tf.constant(5))))" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[0 1 2 3 4]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "UdG8ZFrkTAF2", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "And all of these functionalities, and more, can be composed into more complicated code:\n" + ] + }, + { + "metadata": { + "id": "DVs6wt8NKaGQ", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 1 + } + ], + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "cellView": "code", + "outputId": "0a4b8d08-8f65-4bbc-85ba-dc4c60563519", + "executionInfo": { + "status": "ok", + "timestamp": 1522345745186, + "user_tz": 240, + "elapsed": 658, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "def print_primes(n):\n", + " \"\"\"Returns all the prime numbers less than n.\"\"\"\n", + " assert n > 0\n", + " \n", + " primes = []\n", + " autograph.utils.set_element_type(primes, tf.int32)\n", + " for i in range(2, n):\n", + " is_prime = True\n", + " for k in range(2, i):\n", + " if i % k == 0:\n", + " is_prime = False\n", + " break\n", + " if not is_prime:\n", + " continue\n", + " primes.append(i)\n", + " all_primes = primes.stack()\n", + "\n", + " print('The prime numbers less than', n, 'are:')\n", + " print(all_primes)\n", + " return tf.no_op()\n", + "\n", + " \n", + "tf_print_primes = autograph.to_graph(print_primes)\n", + "with tf.Graph().as_default(): \n", + " with tf.Session() as sess:\n", + " n = tf.constant(50)\n", + " sess.run(tf_print_primes(n))" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "The prime numbers less than 50 are:\n", + "[ 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47]\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "JQ8kQT99VqDk", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# 3. Case study: training MNIST with Keras\n", + "\n", + "As we've seen, writing control flow in Autograph is easy. So running a training loop in graph should be easy as well!\n", + "\n", + "Here, we show an example of such a training loop for a simple Keras model that trains on MNIST." + ] + }, + { + "metadata": { + "id": "0CrtGWgwuLJr", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "import gzip\n", + "import shutil\n", + "\n", + "from six.moves import urllib\n", + "\n", + "\n", + "def download(directory, filename):\n", + " filepath = os.path.join(directory, filename)\n", + " if tf.gfile.Exists(filepath):\n", + " return filepath\n", + " if not tf.gfile.Exists(directory):\n", + " tf.gfile.MakeDirs(directory)\n", + " url = 'https://storage.googleapis.com/cvdf-datasets/mnist/' + filename + '.gz'\n", + " zipped_filepath = filepath + '.gz'\n", + " print('Downloading %s to %s' % (url, zipped_filepath))\n", + " urllib.request.urlretrieve(url, zipped_filepath)\n", + " with gzip.open(zipped_filepath, 'rb') as f_in, open(filepath, 'wb') as f_out:\n", + " shutil.copyfileobj(f_in, f_out)\n", + " os.remove(zipped_filepath)\n", + " return filepath\n", + "\n", + "\n", + "def dataset(directory, images_file, labels_file):\n", + " images_file = download(directory, images_file)\n", + " labels_file = download(directory, labels_file)\n", + "\n", + " def decode_image(image):\n", + " # Normalize from [0, 255] to [0.0, 1.0]\n", + " image = tf.decode_raw(image, tf.uint8)\n", + " image = tf.cast(image, tf.float32)\n", + " image = tf.reshape(image, [784])\n", + " return image / 255.0\n", + "\n", + " def decode_label(label):\n", + " label = tf.decode_raw(label, tf.uint8)\n", + " label = tf.reshape(label, [])\n", + " return tf.to_int32(label)\n", + "\n", + " images = tf.data.FixedLengthRecordDataset(\n", + " images_file, 28 * 28, header_bytes=16).map(decode_image)\n", + " labels = tf.data.FixedLengthRecordDataset(\n", + " labels_file, 1, header_bytes=8).map(decode_label)\n", + " return tf.data.Dataset.zip((images, labels))\n", + "\n", + "\n", + "def mnist_train(directory):\n", + " return dataset(directory, 'train-images-idx3-ubyte',\n", + " 'train-labels-idx1-ubyte')\n", + "\n", + "def mnist_test(directory):\n", + " return dataset(directory, 't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "2zu1U9Nqir6L", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "First, we'll define a small three-layer neural network using the Keras API" + ] + }, + { + "metadata": { + "id": "x_MU13boiok2", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def mlp_model(input_shape):\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(100, activation='relu', input_shape=input_shape),\n", + " tf.keras.layers.Dense(100, activation='relu'),\n", + " tf.keras.layers.Dense(10, activation='softmax')])\n", + " model.build()\n", + " return model" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Wuqg3H8mi0Xj", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Let's connect the model definition (here abbreviated as `m`) to a loss function, so that we can train our model." + ] + }, + { + "metadata": { + "id": "W51sfbONiz_5", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def predict(m, x, y):\n", + " y_p = m(x)\n", + " losses = tf.keras.losses.categorical_crossentropy(y, y_p)\n", + " l = tf.reduce_mean(losses)\n", + " accuracies = tf.keras.metrics.categorical_accuracy(y, y_p)\n", + " accuracy = tf.reduce_mean(accuracies)\n", + " return l, accuracy" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "035tNWQki9tr", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Now the final piece of the problem specification (before loading data, and clicking everything together) is backpropagating the loss through the model, and optimizing the weights using the gradient." + ] + }, + { + "metadata": { + "id": "CsAD0ajbi9iZ", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def fit(m, x, y, opt):\n", + " l, accuracy = predict(m, x, y)\n", + " opt.minimize(l)\n", + " return l, accuracy" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "PcVRIacKjSwb", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "These are some utility functions to download data and generate batches for training" + ] + }, + { + "metadata": { + "id": "RVw57HdTjPzi", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def setup_mnist_data(is_training, hp, batch_size):\n", + " if is_training:\n", + " ds = mnist_train('/tmp/autograph_mnist_data')\n", + " ds = ds.shuffle(batch_size * 10)\n", + " else:\n", + " ds = mnist_test('/tmp/autograph_mnist_data')\n", + " ds = ds.repeat()\n", + " ds = ds.batch(batch_size)\n", + " return ds\n", + "\n", + "def get_next_batch(ds):\n", + " itr = ds.make_one_shot_iterator()\n", + " image, label = itr.get_next()\n", + " x = tf.to_float(tf.reshape(image, (-1, 28 * 28)))\n", + " y = tf.one_hot(tf.squeeze(label), 10)\n", + " return x, y" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "2zEJH5XNjgFz", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "This function specifies the main training loop. We instantiate the model (using the code above), instantiate an optimizer (here we'll use SGD with momentum, nothing too fancy), and we'll instantiate some lists to keep track of training and test loss and accuracy over time.\n", + "\n", + "In the loop inside this function, we'll grab a batch of data, apply an update to the weights of our model to improve its performance, and then record its current training loss and accuracy. Every so often, we'll log some information about training as well." + ] + }, + { + "metadata": { + "id": "UUI0566FjZPx", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def train(train_ds, test_ds, hp):\n", + " m = mlp_model((28 * 28,))\n", + " opt = tf.train.MomentumOptimizer(hp.learning_rate, 0.9)\n", + " train_losses = []\n", + " train_losses = autograph.utils.set_element_type(train_losses, tf.float32)\n", + " test_losses = []\n", + " test_losses = autograph.utils.set_element_type(test_losses, tf.float32)\n", + " train_accuracies = []\n", + " train_accuracies = autograph.utils.set_element_type(train_accuracies,\n", + " tf.float32)\n", + " test_accuracies = []\n", + " test_accuracies = autograph.utils.set_element_type(test_accuracies,\n", + " tf.float32)\n", + " i = tf.constant(0)\n", + " while i < hp.max_steps:\n", + " train_x, train_y = get_next_batch(train_ds)\n", + " test_x, test_y = get_next_batch(test_ds)\n", + " step_train_loss, step_train_accuracy = fit(m, train_x, train_y, opt)\n", + " step_test_loss, step_test_accuracy = predict(m, test_x, test_y)\n", + " if i % (hp.max_steps // 10) == 0:\n", + " print('Step', i, 'train loss:', step_train_loss, 'test loss:',\n", + " step_test_loss, 'train accuracy:', step_train_accuracy,\n", + " 'test accuracy:', step_test_accuracy)\n", + " train_losses.append(step_train_loss)\n", + " test_losses.append(step_test_loss)\n", + " train_accuracies.append(step_train_accuracy)\n", + " test_accuracies.append(step_test_accuracy)\n", + " i += 1\n", + " return (train_losses.stack(), test_losses.stack(), train_accuracies.stack(),\n", + " test_accuracies.stack())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cYiUQ1ppkHzk", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Everything is ready to go, let's train the model and plot its performance!" + ] + }, + { + "metadata": { + "id": "K1m8TwOKjdNd", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 13 + }, + { + "item_id": 14 + }, + { + "item_id": 15 + } + ], + "base_uri": "https://localhost:8080/", + "height": 988 + }, + "outputId": "f9d3eef3-5bea-45c1-ddf9-4edee73e4436", + "executionInfo": { + "status": "ok", + "timestamp": 1522345800262, + "user_tz": 240, + "elapsed": 52391, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "with tf.Graph().as_default():\n", + " hp = tf.contrib.training.HParams(\n", + " learning_rate=0.05,\n", + " max_steps=500,\n", + " )\n", + " train_ds = setup_mnist_data(True, hp, 50)\n", + " test_ds = setup_mnist_data(False, hp, 1000)\n", + " tf_train = autograph.to_graph(train)\n", + " (train_losses, test_losses, train_accuracies,\n", + " test_accuracies) = tf_train(train_ds, test_ds, hp)\n", + "\n", + " with tf.Session() as sess:\n", + " sess.run(tf.global_variables_initializer())\n", + " (train_losses, test_losses, train_accuracies,\n", + " test_accuracies) = sess.run([train_losses, test_losses, train_accuracies,\n", + " test_accuracies])\n", + " plt.title('MNIST train/test losses')\n", + " plt.plot(train_losses, label='train loss')\n", + " plt.plot(test_losses, label='test loss')\n", + " plt.legend()\n", + " plt.xlabel('Training step')\n", + " plt.ylabel('Loss')\n", + " plt.show()\n", + " plt.title('MNIST train/test accuracies')\n", + " plt.plot(train_accuracies, label='train accuracy')\n", + " plt.plot(test_accuracies, label='test accuracy')\n", + " plt.legend(loc='lower right')\n", + " plt.xlabel('Training step')\n", + " plt.ylabel('Accuracy')\n", + " plt.show()" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz to /tmp/autograph_mnist_data/train-images-idx3-ubyte.gz\n", + "Downloading https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz to /tmp/autograph_mnist_data/train-labels-idx1-ubyte.gz\n", + "Downloading https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz to /tmp/autograph_mnist_data/t10k-images-idx3-ubyte.gz\n", + "Downloading https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz to /tmp/autograph_mnist_data/t10k-labels-idx1-ubyte.gz\n", + "Step 0 train loss: 2.244329 test loss: 2.2499208 train accuracy: 0.12 test accuracy: 0.161\n", + "Step 50 train loss: 0.64771986 test loss: 0.56013924 train accuracy: 0.82 test accuracy: 0.836\n", + "Step 100 train loss: 0.49011207 test loss: 0.42143965 train accuracy: 0.84 test accuracy: 0.879\n", + "Step 150 train loss: 0.3768609 test loss: 0.39319593 train accuracy: 0.88 test accuracy: 0.883\n", + "Step 200 train loss: 0.36007702 test loss: 0.37089333 train accuracy: 0.9 test accuracy: 0.881\n", + "Step 250 train loss: 0.182115 test loss: 0.28543878 train accuracy: 0.94 test accuracy: 0.915\n", + "Step 300 train loss: 0.2119576 test loss: 0.22305593 train accuracy: 0.92 test accuracy: 0.93\n", + "Step 350 train loss: 0.12932214 test loss: 0.29057172 train accuracy: 0.96 test accuracy: 0.906\n", + "Step 400 train loss: 0.22937602 test loss: 0.2200287 train accuracy: 0.92 test accuracy: 0.925\n", + "Step 450 train loss: 0.23444137 test loss: 0.19857481 train accuracy: 0.94 test accuracy: 0.94\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe8AAAFnCAYAAACPasF4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3XmAFNW9Pvynlt5mYdhmQMHggnGN\nS9zCD0ElKug1edUY9ZoQTYze3GuiRk1uYjRqRHNj4n5NrhKjiUYlbihGQFRUFDSoKIvgICAO6+xL\n711V5/2jlq7qZaZnpnumZ3g+/zjTXV1dXSP91PecU+dIQggBIiIiGjLkwT4AIiIi6h2GNxER0RDD\n8CYiIhpiGN5ERERDDMObiIhoiGF4ExERDTEMb6JeOOigg3DllVdmPf6rX/0KBx10kGe766+/3rPN\ne++9h9mzZwMAtm3bhkMPPdR57osvvsCPfvQjzJw5EzNnzsTZZ5+NV199FQBw0003YdasWZg1axYO\nO+wwnHLKKc7v4XDY8x7JZBLz58/v9edavXo1Lr300oK2XbBgAebMmdPn97J19/rZs2fjhRde6PO+\niYY7hjdRL3366aee0Ewmk1izZk3WditXrsQnn3xS0D6vu+46TJs2DYsXL8bixYtxyy234LrrrsPO\nnTtxyy23YNGiRVi0aBHGjRuH3//+987vVVVVnv188sknfQrUI444Ag8//HBB2y5fvhxTpkzp83vZ\n+vt6oj0Zw5uol0444QQsWbLE+f3tt9/GV77ylaztrrnmGtx+++0F7bO+vh5HHnmk8/uRRx6JxYsX\nY/z48QUfV3NzM3784x/jo48+wkUXXQTAbAF48MEHMXPmTOi6jlWrVuHcc8/FrFmzcOaZZ2L58uUA\nzFaB0047DQBw//334ze/+Q2uuOIKfP3rX8d5552HxsZG533ee+89HHzwwVnv9cEHH+Bb3/oWTjvt\nNJx//vloaGgAAOzevRsXX3wxzjzzTJx66qm4++67cx5rPu+99x7OOecczJo1C9/+9redC6Vc++3u\ncSEE/vd//xczZ87EKaecgjlz5kDXdQDAwoULcdZZZ+GMM87AN77xDbz33nsFn3eiwcDwJuqlM844\nAy+99JLz+z//+U/MmjUr53ZCCCxatKjHfU6fPh1XXnkl/va3v2HTpk0AgHHjxkGSpIKPa+zYsbjm\nmmtw1FFH4YknnnAeF0Jg8eLFUBQFv/71r3HppZdi0aJFuPzyy3HTTTfl3NeiRYtw/fXX49VXX8WY\nMWPw7LPPAgA2bdqE2tpaTJgwwfNe4XAY//mf/4lrrrkGS5Yswfe+9z1cddVVAIBHH30Uxx13HF5+\n+WUsWLAADQ0NMAwj57FmikQiuOqqq3DDDTdg0aJF+OEPf4jrrrsOhmHk3G9jY2Pex1944QUsWrQI\nzzzzDJYsWYKGhgY8+eSTAIBbbrkFDz74IBYuXIibbroJr7/+esHnnWgwMLyJeun444/Hxo0b0dLS\nglgshlWrVmHKlCk5t73++uvxhz/8AYlEott9/v73v8d3vvMdLFiwAGeddRZmzJjhBEt/nXzyyc7P\n8+fPxxlnnAEAOOaYY5zqONOxxx6LCRMmQJIkHHLIIdi5cycAYMWKFTk/6wcffIBx48Zh6tSpAICz\nzjoLX3zxBXbs2IExY8bg7bffxvvvvw+/34+77roLdXV1BR376tWrMX78eBxzzDEAgJkzZ6KtrQ3b\nt2/Pu998jy9duhTf+ta3UF1dDVVV8e1vfxuvvPIKAGDMmDF46qmnsH37dhx77LH45S9/WdjJJRok\n6mAfANFQoygKTj/9dCxcuBCjR4/GiSeeCFXN/U/psMMOw3HHHYdHHnkERx99dN59BgIBXHrppbj0\n0kvR2dmJRYsW4fbbb8fEiRMxbdq0fh3vyJEjnZ8XLFiAv/3tb4hEIjAMA/mWNqiurnZ+VhTFaV5+\n5513cMkll2Rt39nZiYaGBk8LhN/vR2trKy655BIYhoFbbrkFjY2N+M53voOf/OQnBR17a2srRowY\nkXVsLS0tefeb7/Guri48/PDDmDdvHgBA13WMHj0aAPCnP/0Jf/rTn3Duuedir732wvXXX4/jjz++\noGMkGgwMb6I+OPPMM3H33Xdj1KhRPfbZ/vSnP8W5556LiRMn5ny+tbUV69evd6rWESNG4Pzzz8ey\nZctQX1/f7/C27d69GzfccAOefvppHHLIIfj8888xc+bMgl+vaRrWrFmT8yKkrq4O+++/P5577rmc\nr7388stx+eWXY8uWLbjsssucSronY8aMQXt7u/O7EAIdHR0YM2YMVFXNud+pU6fmfLyurg4zZszA\nd7/73az3+dKXvoTf/va3MAwD8+fPx7XXXotly5YVeGaIBh6bzYn64Oijj0ZjYyM2btzYY4VWV1eH\n73znO7j//vtzPh+Px3HllVd6wmLr1q34+OOPceyxx/bquFRVRTgczllRt7a2oqKiAvvvvz80TXMq\n0EgkUtC+V69ejYMOOgh+vz/rvY488kg0NTXh448/BgA0NDTgZz/7GYQQ+PWvf4133nkHgBmSY8eO\nhSRJ3R6r7YgjjkBzczNWrVoFwBxfMH78eEycODHvfvM9/vWvfx0vvPACYrEYAOCpp57C888/j9bW\nVnz/+99HOByGLMs48sgjezXWgGgwsPIm6gNJknDaaachFotBlnu+Bv7BD36Ap59+Oudze++9N/70\npz/hvvvuw5w5cyCEQFVVFX75y196RqAX4phjjsEf/vAHTJs2DW+++abnuYMPPhjTp0/HzJkzMWbM\nGPziF7/Ahx9+iNmzZ+O///u/e9y3fYtYvve67777cOuttyISicDn8+Gqq66CJEm48MIL8etf/xq3\n3norhBCYMWMGpkyZgh07dnheryhK1ntWVFTgnnvuwa233opoNIrRo0fjrrvu6na/I0eOzPk4AGzc\nuBHnnHMOADPYb7vtNowePRrTpk3Dt771LSiKAp/Ph9tuu61X551ooElcz5uIiGhoYbM5ERHREMPw\nJiIiGmIY3kREREMMw5uIiGiIYXgTERENMUPmVrGmpq6i7m/UqAq0tUWLus89Ec9j//Ec9h/PYXHw\nPPZfsc9hbW11zsf32MpbVbPvKaXe43nsP57D/uM5LA6ex/4bqHO4x4Y3ERHRUMXwJiIiGmIY3kRE\nREMMw5uIiGiIYXgTERENMQxvIiKiIYbhTURENMQwvImIaNh6443XCt723nvvxI4d23vc7sMP38cN\nN/y8P4fVbwxvIiIalnbu3IFXX11c8PZXXXUt9t57QgmPqHiGzPSoREREvXHXXb/D+vXr8Mgjc2EY\nBnbs2I6dO3fgnnv+iN/+9jdoampELBbDD35wOaZOnYYf//hyXHPNz7F06WuIRML44out2L59G668\n8lpMmTI153u89toSzJv3dyiKgoMOOgS33XYL6us34M47fwefzwe/349bbvktdu7cnvVYdXXuqU8L\nsceGd0c4gfc3NOLYg+sG+1CIiIa9f7z+GVZuaCzqPo87uA7nz5ic9/l///fZeO65f+D7378MDz/8\nIDQthT/+8c9oa2vF8cd/DWeccRa2b9+GG2/8BaZOneZ5bWPjbvzhD/fh3XeX44UXns0Z3tFoFA89\n9AAeeeQJVFRU4Oc//yneffddvPzyyzjnnPMwa9a/4YMPVqK1tQUvv7wg6zGGdx9ceecbaO2M46ZL\njsOk8X0/gURENDQccshhAIDq6hFYv34dXnzxOUiSjM7OjqxtjzjiKABAXV0dwuFwzv01NHyBiRO/\nhIqKCgDA0Ucfg/Xr1+PEE0/CH/7wP2ho+AJf//ppmDRp35yP9cceGd5b23YiPOFNSMnD0dwRZ3gT\nEZXY+TMmd1slDwSfzwcAWLJkETo7O/HAA39GZ2cnfvjD2VnbKkp6gREhRM79SZL3OU1LQZJCOPbY\n4/HnP/8Ny5cvw5w5N+PHP74652Nf/eqxff4se2R4f7ztCyjVbTBG70RLZ3ywD4eIiEpAlmXoup71\neHt7O/baa2/Isow333wdqVSqT/vfZ59J2LbtC0SjEVRUVGLVqg9x1VU/xrPPzsOUKSfi9NPPgBAC\n9fUbsGXLpqzHGN69dPykA7G4CZArO9DSwfAmIhqOJk3aD59+ugH33XcnKiurnMdPPnkGfvGLa/DJ\nJ2vxb//2TdTV1eGRR+b2ev+hUAhXXHEVrr32J5AkGUcccRSOPfZY7NzZghtv/AWqqqrg8/lw/fU3\nob7+06zH+kMS+doDykxTU1dR93fjit+ipTOCQyLn4yfnHlHUfe9Jamuri/632dPwHPYfz2Fx8Dz2\nX7HPYW1t7m7dPfY+7y+P2Q+SL4mmcOtgHwoREVGv7LHhPbFmPACgLdk2yEdCRETUO3tseI8JjQIA\nxBFGPKkN8tEQEREVbs8N74rRAADJH+egNSIiGlL22PAeW2FW3pI/xtvFiIhoSNljw3uME96svImI\naGjZY8M75AvCLwcg+eNoZuVNRDQs9WZJUNtHH32ItjbvnUjlsAyo2x4b3gAwMlDDypuIaJjq7ZKg\ntn/+88Ws8C43e+QMa7a6ijFojDWiqSt7UnoiIhra3EuCXnDBRbj99lvQ1dUFXddx9dU/w+TJB+Lx\nxx/Fm28uhSzLmDp1Gg455FAsW/YGtmzZjDlz7sD48eOz9pu5DOjVV1/nLANaWRkCIJdkGVC3PTy8\nxwItQKfWPtiHQkQ0rD332UtY1bimqPs8uu4rOHfyWXmfdy8J+uijf8YJJ/w/fOMbZ2PLls24994/\n4J57/oinnnoc8+cvgqIomD//WRx33NcwefKXcc01P88Z3LmWAf3ww/fx1ltLcc4552H27AuxaNHr\nJVkG1G2PDu/a0FgAQAysvImIhrM1a1ajvb0Nixe/DABIJMzu0pNP/jquvvq/cNpps3D66bN63E+u\nZUDr6zc4S362tOzClCknlWQZULc9OrzrKszwTildMISALEmDfERERMPTuZPP6rZKLjWfT8VPf/oz\nHH64dy2L6677JbZu/Ryvv74EP/nJf+Chh/7a7X5yLQMaCAScJT/XrFlZsmVA3fboAWt25Y1gFNE4\nZ1kjIhpO3EuCHnro4XjrrTcAAFu2bMZTTz2OcDiMRx6Zi0mT9sX3v38ZqqtrEI1G8i4lCniXAQWA\nVas+xEEHHYpnn52Hzs4OfPOb38QFF1yE+voNzmOnn36G81ix7NGV96hgDSQhQw5EEYmnUBXyDfYh\nERFRkbiXBP3hD3+E2267Gf/1Xz+EYRi4+urrUFVVhfb2Nlx22fcQClXg8MOPwIgRNTjqqK/ihhv+\nG7/97Z3Yf/8DPPvMtQzokUcehVgsihtv/AVGjaoBIJdkGVC3PXZJUHvZtp++fgvicQM/O+pa7L/3\niKK+x56ASwj2H89h//EcFgfPY/9xSdABEpCCkNQUIvHUYB8KERFRQfb48A4qIUiqhs4oJ2ohIqKh\nYY8P7wo1BABoj4YH+UiIiIgKs8eHd6XPvFevIxEZ5CMhIiIqzB4f3iMClQCAjjjDm4iIhoY9PrxH\nVZgj+Xa1tw3ykRARERVmjw/v0RXm7WE7OjrQHk4M8tEQERH1bI8P70qfOWBNUpNYvallkI+GiIio\nZwxvn9nnDSWFpvbY4B4MERFRAUo6Peodd9yBDz74AJqm4T/+4z9w+umnO88tX74cd911FxRFwfTp\n03HFFVeU8lDysm8Vk9QUWjvZbE5EROWvZOH97rvvYuPGjZg3bx7a2tpwzjnneMJ7zpw5ePjhhzFu\n3Dh897vfxcyZMzF58uRSHU5eITVo/qBoaOviRC1ERFT+Shbexx13HI44wlx6bcSIEYjFYtB1HYqi\noKGhATU1Ndhrr70AACeddBJWrFgxKOHtV/wAAJ9foK2NlTcREZW/koW3oijOYuXPPPMMpk+fDkVR\nAABNTU0YPXq0s+3o0aPR0NDQ7f5GjaqAqipFPcba2mqM1M3K2+8XaI8kMXZsFSSu690r+SbOp8Lx\nHPYfz2Fx8Dz230Ccw5IvCfrqq6/imWeewV/+8pd+7aetLVqkIzLZK78IISBLMiTFQCKpY+u2NlQG\nuTRoobgKUf/xHPYfz2Fx8Dz237BYVWzZsmX4v//7P8ydOxfV1ekDqKurQ3Nzs/P77t27UVdXV8pD\nyUuSJPhlP2TVXHi9jYPWiIiozJUsvLu6unDHHXfgwQcfxMiRIz3PTZw4EeFwGNu2bYOmaVi6dCmm\nTp1aqkPpkV/xAbIZ3h2R5KAdBxERUSFK1mz+8ssvo62tDVdffbXz2AknnICDDjoIp512Gm6++WZc\ne+21AIAzzzwT++23X6kOpUd+xY9kyhxpHo5xXW8iIipvJQvvCy64ABdccEHe54877jjMmzevVG/f\nKwHFjw6YS4IyvImIqNzt8TOsAYBf9kMXZmhHGN5ERFTmGN4w+7wNGIBksPImIqKyx/BGeqIWyDrC\ncYY3ERGVN4Y3zD5vAGZ4s/ImIqIyx/CG2ecNAKrPYJ83ERGVPYY3rPu8AYRCEitvIiIqewxvpPu8\nQyEgHNMG+WiIiIi6x/BGus87GABiCQ26YQzyEREREeXH8Ea68g5YS3tH46y+iYiofDG8Afhls89b\nVc2KO57UB/NwiIiIusXwRrryllUBwGw6JyIiKlcMbwABJQAAzrKgrLyJiKicMbwBhFQzvCXVrLjj\nSVbeRERUvhjeAIKKNVJNNu/xjiVYeRMRUflieAMIWpW3IZkVd4yVNxERlTGGN4CQGgIAGJJZecdZ\neRMRURljeAMIWgPWdCQBsM+biIjKG8MbgCqrUCQFmhXe7PMmIqJyxvAGIEkSgmoAKWGFNytvIiIq\nYwxvS1AJImkkAABxTtJCRERljOFtCaoBJHQrvDlJCxERlTGGtyWkBpHQk1BkNpsTEVF5Y3hbgkoQ\nAgKBoOCtYkREVNYY3hZ7opZgCIiyz5uIiMoYw9sSVM0pUitCQCSWGuSjISIiyo/hbQlZ85sHQwJJ\nzUAixaZzIiIqTwxvi115B4IGAFbfRERUvhjeFrvP2+c3wzvM8CYiojLF8LbYzeYqw5uIiMocw9ti\nV96yz+zrZngTEVG5YnhbglblLavmbWIMbyIiKlcMb4tdeUNheBMRUXljeFtC1mhzQzJDm+FNRETl\niuFtCTK8iYhoiGB4W+w+b3tNb85vTkRE5YrhbfHJKmRJdtb0TunGIB8RERFRbgxviyRJCClBZ01v\nneFNRERliuHtElQDiGlxKLLEypuIiMoWw9slqAYR1xJQFRmaJgb7cIiIiHJieLsErWZzRQE0Vt5E\nRFSmGN4uITUAAQHVLxjeRERUthjeLj7Fb/5XNRjeRERUthjeLn7ZBwCQVYGUzj5vIiIqTwxvF5+s\nAgAU1YCm9b/ybutK4MEX16G5I9bvfREREdkY3i4+xay8FaU4fd5PvFqP9z7Zjb8u3NDvfREREdkY\n3i4+u9ncZ0ArQrN5PKl7/ktERFQMDG8Xu89bUQwYQsAw2O9NRETlh+HtYjebS4rZZM5Z1oiIqBwx\nvF2c0eayGdq8XYyIiMoRw9vF7vO2K+9i9HsTEREVG8PbxWk2tyvvItwuRkREVGwlDe/6+nqceuqp\nePzxx7OemzFjBi666CLMnj0bs2fPxu7du0t5KAWxK2/I5ujwfjebC1buRERUfGqpdhyNRnHrrbdi\nypQpebeZO3cuKisrS3UIvebPCG8OWCMionJUssrb7/dj7ty5qKurK9VbFF1Ws3mxwlsqzm6IiIiA\nElbeqqpCVbvf/U033YTt27fjmGOOwbXXXgtJGtyUs6dHFZLdbM5mbyIiKj8lC++eXHnllZg2bRpq\nampwxRVXYPHixZg1a1be7UeNqoCqKkU9htraas/vcf9IAIBqLi6Gqqpg1ja94fObp9enKv3aT7kb\nzp9toPAc9h/PYXHwPPbfQJzDQQvvs88+2/l5+vTpqK+v7za829qiRX3/2tpqNDV1eR4Lx1IAgJSW\nBAA0t4TRVBPo83ukkpq1Pz3rvYaLXOeReofnsP94DouD57H/in0O810IDMqtYl1dXbj00kuRTJoh\nuXLlShx44IGDcSge9mhzQ+KANSIiKl8lq7zXrl2L3/3ud9i+fTtUVcXixYsxY8YMTJw4Eaeddhqm\nT5+OCy64AIFAAIceemi3VfdA8St2n7dZMevs8yYiojJUsvA+/PDD8dhjj+V9/uKLL8bFF19cqrfv\nE6fyBitvIiIqX5xhzUWRFEiQYMCsvDnDGhERlSOGt4skSfApPqfy7uk+7x3hXXjsk38grsUH4vCI\niIgADOJo83Lll33QhTVKvIc+7/s+eghdyTDGVdTi9H1PGYjDIyIiYuWdKagEkDQSAAC9m8p7W2MY\nXckwACBpJAfk2IiIiACGd5bairGIGREEv/oqtic3593ulfcbnJ8lzn9KREQDiOGdYXyFORe7pGpY\nrb2af0N3i/ogT+tKRER7FoZ3hnGV6YVUVPjzbifAe8CJiGhwMLwzjK+oTf8iCquoZTabExHRAGJ4\nZxhfOc75OYEINEPLvaGn8GZ4ExHRwGF4Z6j2V+EHX/4h9I4xgCTQGm/r8TXs8iYiooHE8M5h/5pJ\nMLpGAQCaYq05t/GMV8tTebNXnIiISoHhnYOqSBApc7BaLJV7KVLhSmbeKkZERAOJ4Z2DqsiAYU4+\nl8g7AYsnvYmIiAYMwzsHVZEhDAUAkNBzh3chzeZERESlwPDOQVUkQDfDO5knvHuD4U5ERMXE8M5B\nkiQo1pot21o6cm8kvNsTERENFIZ3Hgp8AICV9TuwsyWS9TxHkhMR0WBheOdhhzdkHZ2R7pvO2SxO\nREQDieGdhyqlwzsX4bpXzBD5lw4lIiIqNoZ3HnZ4S0qe6VFd3EFORERUagzvPHyKz5yIRdaR1Lqv\nrA2w8iYiooHD8M5DlRXAUCApOpKp7KZzd7HNZnMiIhpIDO88fKp1r7esI5nqofJmszkREQ0ghnce\n5ixrKiRFQ0LLUXm7f2blTUREA4jhnYeqyAVX3nqe8GZBTkREpcDwzkOWJXN+c1lHIpljxLn7VjEO\nWCMiogHE8M7DMIQ5YE0WSGiprOe9zeY9lNicw4WIiIqI4Z2HYQhAN+c3j2mJ7rdlnzcREQ0ghnce\nuiGcZUFjqXj2Bp5bxdi5TUREA6eg8F67di2WLl0KALj77rtx8cUX4/333y/pgQ023RAQyQAAIKKH\nu92Wfd5ERDSQCgrvOXPmYL/99sP777+PNWvW4MYbb8R9991X6mMbVIYhIBIhAEBMdGU9z1vFiIho\nsBQU3oFAAPvuuy9ee+01nH/++Zg8eTJkeXi3uJuVdzfh7VmYhM3mREQ0cApK4FgshoULF+LVV1/F\niSeeiPb2dnR2dpb62AaVIQREMggASCLXet7pwK7f1pZzxDkXLCEiolIoKLyvueYaLFiwAD/96U9R\nVVWFxx57DJdcckmJD21w6a5m86Scq8873VS+uy2CpvZY9hZ2djPDiYioiNRCNvra176Gww8/HFVV\nVWhubsaUKVPw1a9+tdTHNqgMwwAMFUJToSvRrOfdlTckkQ5q9zZW5c0KnIiIiqmgyvvWW2/FwoUL\n0d7ejgsvvBCPP/44br755hIf2uD60rhqAIBIhKCrkawAzry3O3ezub1taY6RiIj2TAWF9yeffIJv\nf/vbWLhwIc455xzcc8892Lp1a6mPbVBdcsbB+N7Mg+DTqwFZR0cyo49fSieyxMqbiIgGUEHhbYfP\nG2+8gRkzZgAAkslk6Y6qDFQGfTj56AkIiBEAgMZok+d54b63WxI5VyGxA53ZTURExVRQeO+33344\n88wzEYlEcMghh2D+/Pmoqakp9bGVhZAwP+fOsDe8vROziJwBzcqbiIhKoaABa3PmzEF9fT0OOOAA\nAMDkyZNxxx13lPTAykW1MgotALZ37fY8nll557rXO+lrARSJfd5ERFRUBYV3PB7H66+/jnvvvReS\nJOGoo47C5MmTS31sZWGkbzSAXM3mmaPNvQm9qf1ztO31OvwVYyBaTy71YRIR0R6koGbzG2+8EeFw\nGBdeeCHOP/98NDc344Ybbij1sZWFmmAVhACi1uIkH3zaiBfe3gJkNJvruje869s2AQCUmhb2eRMR\nUVEVVHk3Nzfjrrvucn4/5ZRTMHv27JIdVDmpCKpAVIJm6ACAB55fCwA4cLLrukcS6EqGcdt7D+Oc\nyf+GQ8cchNZ4KwBApHzs8yYioqIqeHrUWCw9g1g0GkUi0f0a18NFZVAFhAxN1z2Pp9y/S8DqjlXY\nEdmFBz5+GADQEm8DAIhkiH3eRERUVAVV3hdccAHOOOMMHH744QCAdevW4aqrrirpgZWLiqAPEBI0\n4Q3vpK65fhMQGQndaoe3prLyJiKioioovM877zxMnToV69atgyRJuPHGG/HYY4+V+tjKgll5S9AN\n74xqKS0d3lLGgDUhhFN5QzYY3kREVFQFhTcA7LXXXthrr72c31evXl2SAyo3duWti+6azYUnoBN6\n0pk+VZJ1DlgjIqKi6vOi3HtKNVkZVCGEbC5U4pIy3GEunAFtABDX4+mnFH2POVdERDQw+hzekiQV\n8zjKVkVQBSBBhze8tYzKO2GkB/DFtHR4S7LOAWtERFRU3Tabn3TSSTlDWgiBtra2kh1UOamw+ryF\n8PZdp3QNAdd2yTzhzT5vIiIqtm7D+4knnhio4yhbiixDEjIMpKC5J2KR3TOsGXkrb7DPm4iIiqzb\n8J4wYcJAHUdZkyUJAgZSWrqpXJJdt4pJQMod3qmoazsDBpjeRERUPH3u8y5EfX09Tj31VDz++ONZ\nzy1fvhznnXceLrjgAjzwwAOlPIx+kyADEEhprn5v1R3eAkmRDu+2RIfn9ULSQEREVCwlC+9oNIpb\nb70VU6ZMyfn8nDlzcP/99+PJJ5/EO++8g88++6xUh9JviiRDSAaSrvD2VN4QSBnp9c2d8DbM0ysk\n721mRERE/VGy8Pb7/Zg7dy7q6uqynmtoaEBNTQ322msvyLKMk046CStWrCjVofSbLCkABOJJVwgr\n3klaUsIV3vF28wctCAAQYOVNRETFU7LwVlUVwWAw53NNTU0YPXq08/vo0aPR1NSUc9tyoMgyJFmg\nI5JuGpdUb+Wtwd1sboV3yhwf+2w3AAAgAElEQVSPzsqbiIiKqeAZ1gbbqFEVUFWlqPusra0uaDtV\nMU+TUFzXOlblLTQVkj/puQu8PWk1m1vhDVkv+L2GouH82QYKz2H/8RwWB89j/w3EORyU8K6rq0Nz\nc7Pz++7du3M2r7u1tUW7fb63amur0dTUVdC2spABCdi2M31vu2SHt+5zqvB9qvZGQ3gHuhJh87lU\nABIAQ9IKfq+hpjfnkXLjOew/nsPi4Hnsv2Kfw3wXAiUdbZ7PxIkTEQ6HsW3bNmiahqVLl2Lq1KmD\ncSgFUWTzNHVE0/3akDXz/m3NvP6pwlhMm+AdnCecZnP2eRMRUfGUrPJeu3Ytfve732H79u1QVRWL\nFy/GjBkzMHHiRJx22mm4+eabce211wIAzjzzTOy3336lOpR+UxUF0IHOqGvaU1UDdBWQzHu4fQhA\nldOn0y/7ENet5nb2eRMRURGVLLwPP/zwbpcNPe644zBv3rxSvX1RqbIV3rH0oDQoGoSuOn3fighA\nkdN98j7Fh6iumE0bDG8iIiqiQWk2H2pUK5S7oq7R5opZeUtOePuhSunwViU1fZ+3zGZzIiIqHoZ3\nAXyKGcrhuN3nLZzK2x6spghvs7kqqxCaFeYyK28iIioehncB/NatYl0xK7xlA5IkzD5v2A/5PM3m\nqqxA6NbvisaVxYiIqGgY3gXwWfeX68KqoJ3bxNzh7Tebyi2qpMIwrN9lnUuTEBFR0TC8C1Dh91k/\nmRHszGvuCm9J+Jy+cQBmFW5V3pJVeXdGk7j/2dXY1hgekOMmIqLhieFdAL/PCm/JmkdNsSpwwzXj\nm65mNJur6cpcMdf0/ufyrVi1sRn3Pbt6AI6aiIiGK4Z3ARTJOk2SgCSlK293szkMNavZ3A53STYr\nb3s98GSKA9iIiKjvGN4FUOxbwCSByqAPvoDVg+2qvCXd22yuuprNoegw2OlNRERFMmQWJhlMslV5\nS5JAZciHsGrAACB0BYmNR0EZ2QRZqvbcKqZIKgAZQpedypuIiKgYWHkXIN1sbuDEr4yHL2D1fRsq\njLbxSG35CoRhB7b9GsXZxu7zdkjSwBw4ERENSwzvAshWEP/7qZNx5tcmweczk9i5jxuArouMZnPV\n2UbKvM+bVTgREfUDw7sAduU9fmwIkiRB8dmVtyu8hchoNndV3jL7vImIqHgY3gWQrSVBDWGGtqxa\no8Vdo811XXjmNreb0IWuAIoGwzDSO2SzORER9QPDuwB2Fa1b4S1Z93m7m80NQzgD2wCkg1xXIUlA\nyuDiJEREVBwcbV4AO5S/6NyGz9o3A0rKfMJwVd6GAclVUctOs7n537iWXguceuetj3dgQm0lDti7\nZrAPhYioLDC8C2D3eS/e+joAQLZWGfMMWDPSk7CYr/Fuc+fqe3AELhqQ4x1OYgkNjy7cAAD4yy9m\nDPLREBGVBzabF0B29WUDgJCyJ2nRDYHHXql3frfDW1LNKj2hJyBghntnJIkHnlsDg6POe6TpRs8b\nERHtYRjeBVAk72kSMMy7vQxvn/fGhnbXa8zntN2T0tsgXZl/UN+Enc2REh0xERENZwzvAmSGNwAr\nuNN93PGk7unztkebG51jobWMN3+Gd05znfeP9YhniIgoG8O7ALKsZD+oe4cLRGIpz+8KXK+xKnQD\n3hHnDO+esWeBiCgbw7sAco7KWxgZ/eAwB1c5r5HdK45Z94lL3srbYHj3iOdoePvX+t247I6l2N0a\nHexDIRpSGN4FyN9s7hV2Vd+K69TaQS/YbN5rDO/h7c8vfQLdEFi2eudgHwrRkMLwLkDmaHMATjXt\n5g7j5vZk+glhbtssbYLkT1cYKY6k7hFH5A9v/PMS9Q3DuwC5Ku9RVaFuX7P4vW3pX6yg362uQ/Co\nt5yHUymGd08Y3kRE2RjeBcjV5z1+VBWqQj4AQCjQw1w3OZrYAVbehWCzORFRNoZ3AZQczeaqrDrL\nfFZX+Lp9vcjRxA4AyZSe83FKY3jvGbhWD1HvMLwLIOf4ZnEv/1kdyg7vQyeNxrUXHoWvHTouu/KW\nNQACb3e8jHe2vwcAWPDOFsxd8ElRj3s4YHYTEWVjeBcgksq+jcW9/Gd1hT/r+ZHVfhy272jzOeE9\nzZI/DskfxxfJT/HEp88CAJ5ftgUr1u0q8pEPnLWbW7BibfGPn5U3EVE2hncBJo3YBwDw5VGTncfM\nZnPz5xGV6fAWmlmFj6kYCQCQ5exmcykQg+RPrzJmrxMOwGmKz2f+ss34+LPmPnyK0rrrHx9j7kvF\nbznggDUiomwM7wJU+6vwwIw7cOa+pzqPqa5Z1/yq7Axei6+ZisTGo3DUhP0BABUBNavZXArEIAVi\nzu+NkXQYdxdWndEkXnznc9z7zOr+faAS6unio7fKObxffGeLs+IZEdFAYnj3guIKbFVWPfNu71NX\nBQCoCYzAdbPOwKTx1QCAiqAv655wSUlB8qfD+4GP/+KsEa7p3YR3JJn3uXKR1Io7gr6cm83nL9uC\ntz7eMdiHMaSV8bUZUVljePeCu59blVQ4y2ZIwMGTRgEAamtCzs8AUBlSs/q8IQlP5d2aaIW692YA\ngN7N7WPhaCrvc+Wi2CPoyzm8iYgGC8O7F7Iqbye7JZxxwpfwzan74rJvHOp5TWXQlzUPOmTDCe+v\n7zPd3F9tAyBrWZV3Uk9i4ZZX0Z7oQGe0/CvvRLHD23U6/ufvH6KxPZZ/40HCC4y+4y1iRH3D8O4F\n9/3e7j5vSQJURcbZ0/ZH7UjvzGuVOZrNIRmQ/HEoIoBzDzwLB4a+AknVIPnj0DIq73/Uv4CXtryC\nFzctQke4/MM7WeRZ49x93vUN7Zj32sai7r8YONlO37HZnKhvGN69oHbT551PZUjN7vOWDEi+JFQj\nCAAwdOt5SUBzVXHtiQ6s2LnS+b0jo897xY6VWLBpUS8/RWkVu/IWGVVtOS7mknnBRURUagzvXvBU\n3pKCQtK7MuiDENnN5lBSkI0AAEDT0o+7+7zvWzU3/RJJdgashQLm/h7f8DQWbX0dulE+M7UVu887\nM6zLsYk6VeRBekREPWF490L2aHMzSLrrtzNvFcucpCUBSQIk3QzvlDUOTZIM6Fafd2ckieZYi/Oa\nqBZzKu+qjBndolr59AMnSthsnuv3cqAxvPuNfd9EvcPw7gXPaHO5h8VILLIsZd/n7TMnaJE0c3IX\n3S5WJQOaYQbBtX98C7rQceDIAwAAsVQMHZEEAMCvKp77qbuS4d5/mCJyH0vxR5tn/l4e4a27Dox9\n3n0nCup8IqJMDO9eUFyBrcqq606xHsoG4X1e8pshbM/GJgzreUkgkdTxxqrt0GWzyq5UK+BX/Ihq\nMcQTZjDqhkBcT8/QFklF+vyZisHdtF30Pu/MyrtMwlvT0sfR3b35ROXglZUNWL+1bbAPg4qI4d0L\n7nW9PQPWemzyywhvnxnMwqq8Dd16Xjbw7Fub8bfFn0JSzI5wvxxEhRpCTIs5FZ4hBLqS6cAO55h7\nfSC5w9tdeacMDXd/+Ces2LEy18sKkt1s3uddFZW72uaANSpniZSOp17biN8/uWqwD4WKiOHdC1kD\n1iw9ZXfdqFDOx42kWXk74S0Z2N5kNoFLqtkRHrDCO6rFnYFRhiEQTrnDe5Arb1d4ufu8t3Y24LP2\nLXh8w9N933eZjjZ3BzYHrFE5K5fWKiouhncvSK5RNYprkpae3Pz94zC+60QkPj3G83gqYTbD233e\nkiTgXApY06X65QBCaghxLY6UZm5oCOFpKh/sZnMtT+WtGVquzXsl84unXAasuQepsc+7H6w/Z+bY\nBiLqHsO7j3yyAvf0qN0J+lVMCnwZRkcthKv/OxaVYRgCuqvytkmqGXw+KYAKXxACAklh9pUbRmaz\n+WBX3q4+by0d3rmWUu2tzLDOvO97sLgvWDjavP/K5aJsOOK5HZ4Y3n2UOT1qTxTFOtVGeluR8iMS\nT0HX0n3e6RdYlbcUQIVaYT1vPmYIIJxKjzAPJ7NDMvMfbCKl4911u5zqvZjczebJZPrnLtcxLnrv\niz7tO/N7Rx/gLyJNN7BkZQOice+88u7AZp93/7Fpt3TKpauJiovh3UfmwiSmQu5R9dnh7VqkROgq\nWjsTcPLUGm0OpPu8VRFASA1ab2pW45l93pnN5l/s7sIPf7cUb3603XnsuTc346EFn2D+si0FfT63\ndVta8doH2/I+7xlt7ro4CLtuYVv4Xu/fF8jRbD7AX0TPv7UZT762EX9fUu953N1Uzmbz/mN4l065\ntFZRcTG8+6jQ+7xtimIlvHuFMV3BLY+uRDhid3obTsVsh7cCPypUc8CbZFXjhiE8TdI7Irs8s6wt\nX7sLAPDU6585jzU0dgEANu3o7NVxA8Cd8z7C35fU521+y9fn7b7/3JD7tiJa1gxrA/w9tHFbBwCg\ntTPheZwD1orD/nOyabd0mN3DE8O7j1RZ6dWiCnblLQz7vxKc028FuiS5m83NKlsRfgTUgPVYesBa\nXDPv8z669itoT3Rgbct656V2FSP7EqhvMwPcp5qj4/vTbJ6vOvKMNk+6wtvVIiAUb/gV/J5Z93kP\nbFC2dZnHPbI64Hnc22wuEI2nsO7z1gE9tuGEAVM6bNUYnhjefaRIhU2PalNVO6itjQ1X5S7Sk7TY\n7CpbNgLpJnopfatYzArv0yedAgB4a9sK57VOv/A+a3DvqofwcdNa+K33T/ajSszXt5tvkhZ35d3X\n8M5s8hvoUcntYfO4R1T4PY+nXIP0NM3AnfM+xp1PfYT6hvYBPb6hzv6nM9AXZW5CCCz9cBt2tQ7u\nfAmlwlaN4Ynh3UeqrOLkoycAAA760qgCtvc2m8vCHd7Wn8E1YE3yJyAMCbLwwWc10UtyepKWmBaD\nLBQ89sIuHDhyf2xo24hdkd3m7uzAC5lBMn/Ty/D5zPdI9WPu8XwDX9yjzd39v+5BdYbct+VMM9/S\nEAKabmTNvFYq9mfOfD8tY5KWLTvN7oimMlxvvJw5zeaD2POweWcnHnulHr+a++7gHUQJsfIenhje\nfeSTFXzntC/jjh9NwWH7ju5xe6fytprNZeSqvN3hHYNIhqDrrv512Wo2N4CYFofQfdi8vRMnjDfv\nH/+0bZP5vN1vnjJHqTdGmyGpZgWZ7EezuZ5jGlAhhGeeb/e0oRHXKHhD6Wt4Zw9Yu/z3b2DO3z7o\n0/56w90FkDkoTcszYI0LbPRNb6vD9zc04sEX1xWlqozEzC6q4VqgsvIenhjefaTKKmRJwtiRuWdP\ny9o+Y7S5e7S63Q9uN5srqg7Jn4RIhKDpBnyKtYqY5K6844BuTtEaUioBAM+/vRHN7bF0FaOkB4nF\nVXOFMvfgqriW7hMvRGbl3RZvx/ee+ylWtb0PqGY4u0MtYbgCW+1bs3n2gDXzd7vSLaXWrvT88cmM\nFgv3eXT/LGekdyyhIZbo/2Q1A03TDWzd1TVg79fb6vCP89fivU92Y3cBTd2vf7itV5/FMATunPeR\n526NoYyV9/DE8O4j91SphVCt0eb2JC2q5FrW0x6wZjWLT5xo7lskQkhpRlazOWCGt6GZj8swt4+m\n4nhx+efpK20lHRqblXcANenp835iwzO4d9VD+KhpbUGfQc+oPjd1fI6ElsCyliUIHvkG4Is74W0I\nA5qhQYHVV9zHyjuzz3sgFwGx108Huq+8NU/l7Q3vK+5+C1fc/VaJjrB0HlrwCW55dOWA9eH3tTp0\n5k/Io7E9hsdfqcctj+afXz+ztaSxPYZ1W1rx10Wf9umYyg2ze3gqaXjffvvtuOCCC3DhhRdi9erV\nnudmzJiBiy66CLNnz8bs2bOxe/fuUh5K0Vz+le/hrP1metb2LoRdeUtWda1KKn71PWu61IxmcyVo\n9puKRAU03Ug3m9vN6rIBXegwUnZ4p5/fvKPTudIWcgp1FWMBAElEoY773FMl2iPUN1rN7T3pbrIH\nSTGgjGx0giypm8EXRJV1Avp2q1jSSHpaEIq95Gh3uqLp982cRU3zDFhzDTTsY7N5S6wVN7xzOza0\nbuzbDors/Q2NAFBQZVsMfa0OMy8oMxXy/0vmn2y4dX2w8h6eShbe//rXv7B161bMmzcPt912G267\n7basbebOnYvHHnsMjz32GMaNG1eqQymqI2sPxxn7fb3Xr3Oaza3wliDjgL1roMiS0w/ujDb3m1+Y\nduWdsjPErrytMBO6VZFb64VLio4dzRGzipEMQNYxOjAKFx30LfP5jACt9JnN7YVOr6plfAnYI95t\nysgmZxR2QrdmiDMqrffuW+W9UnseoWNegz20qbezRdW3fYZH1j2BVB/mWe+KuirvjLECqTxzm/e1\ngnz1izfRlmjH3DWPFbS9EAJPLKnHui2lvT2tKuTreaMi6Gu+9HSPfUE5LHX765DHPu/hqWThvWLF\nCpx66qkAgAMOOAAdHR0Ih8M9vGr4UuzR5s7tZVbftyJD2KPN7T5t1aq8k0GkdAML3ramFrUGrNnL\nhcIKbwjF83wkrjkBH1KDOGj0gZ7nbVU+c0BbOJk7vNvi7Xh03ZOQrIuJzCrHvtf8lJFnw4hXQK5u\ncyrUlNXfrYgghC47y6D2VhhmOMk1zX16/b2rHsL7uz/CxwV2Dbh1uirvzJDwDNJzN6FrfWz+tVpy\nNFHYRcb2pghe/WAb7pz3UZ/erzvukfWZF2yl4q4OOyNJrN3ckndbz/H11I3ShzJ6uGXdnlh5G0Lg\nd3//EP9c8flgH0rJ9G6asF5obm7GYYcd5vw+evRoNDU1oaqqynnspptuwvbt23HMMcfg2muvzeov\ndBs1qgKq2rum6p7U1lYXdX/dGdlsNT/azeaKitraavhUGYmUt887FJKBlFlZ+/wqWtpTwCjXJC5W\neAvdrIpGjrA+hxXOmiGchU1GVY/AXnXWrWzW/u3PXREIAl1AzIjmPBfLPnkbK3evQuAIGfH3T0f1\niJBnu+QXZrhVh6ogkgHIwSg0w0BtbTVi7eaAMkX2QST9gJrM+R7vbVuFUcEafHns/p7Ho/EUKoLp\nqk8Zux1GR61nm978/Xyh3v+9U64vPUOSPK/3B9LHJrv6XYMVfmc7dytBT+8dCJj/FDVDK+g4O+Lp\nC7Fi/3/c1pluUQm5Pk8pqT7FeZ9fPLQEja1R3Hftydhv75qsbSOx9EVVVXXQeV2u44y7rrnyfY6a\n1phnm5he+N9tKGgKpy+cC/k8w+Ezh6NJfNrQjk8b2nHJN78y4O8/IP9mSv4Olsz7ZK+88kpMmzYN\nNTU1uOKKK7B48WLMmjUr7+vb2orb91ZbW42mpoEbTdvVZX1BWAEqdKCpqQuyLOWYpMX6YhYyOrvi\nUCUFCddrneZva8BaW6s5ktsO/65Iup9Y0hR0tVnPWzO0NTZ2QpIkdMbMintXuAlf7GyCX/Z5+vI7\nwzFnv1IwjJaWCJpC5nsmkjpefOdTqOOARFQ4rQApI4mmpi7s6jAHOukpCdD8kIKRrPNtCAN3vvMQ\nAOCBGXc4j2/Y2oY7nlyF807e32yokAClphkpyfBML9ubv19LR5dn+22NYUgSMKG2yrPdZ9s7cO/T\nH+Pq849EY0u6RSIWT3le3+EKuIireb2tPepsF0+mq+jujvWtxmVY9Nkbzu/23ydTMqXj3U9247iD\n69DWnv73UKz/j1es24VdLVEctl/61sfWtuiA/DuJu85vo9XPvnFLC6p82Y2Dja576Ztawmiq9uf9\n99zSkm7ty/c5OjLOZVNzz68ZSlpb0/8f9/R5Bvp7sVQiroWEBvrzFPsc5rsQKFmzeV1dHZqb002d\njY2NqK1NV05nn302xowZA1VVMX36dNTX1+fazbDh3ELk6vMGkNHnbQ1YU+1FjmVougG/Yo3Ylg0E\nfIrTbG73ecPwNpvHk5qzTYUagk/2eZ5/7q3NeGfNTqfPOqEncd1bv8b1Cx/CZ9s7nGN2z58uBSOe\npuKuWNJpAZAMn3MsQtagG4bTbA5dgdB8kBQdCc3bdJ7Qc98+ttIaLLVw5RanA1JSNchVfR/5HE15\nJ0/59V/+hRsf/lfWds8s/QyRuIZnlm5yms1HVPg8zea6YeCL3el/nO4+b/e98O6R/d01Xc5bu8Dz\ne0TLfaG6YPnneHThBjz56saSNO3OXfAJFiz/HM0d6XM1UPO25+qXzddk7668e1qOtZAxEpnvM9xW\n4ervx+mIJPHBp03FOZgBMtz+hrmULLynTp2KxYsXAwDWrVuHuro6p8m8q6sLl156KZJJ88t85cqV\nOPDAA0t1KGUhff+vNe847D5vKV1NWuEtK1Z1bshIaQYCavo+74BPTt8CZjWb6xrM6t0K51hCd6rz\nkBqCIiuQhAzJev6fK7bi4X+uzxpwFg5twd3/SPehulcFU0Y1oiWRHhxlGMK5QJCFz6m8JUWDpgkk\nrNHmwlAgUubFR1vcezUaTaXf372wiv1FLqvmY0I3L07kEd5+UPMiQcsK5lw6k7mvhA1hYHc0/cVk\nT6aj6Qa6oklUhXwI+BVPiK1YuxtrXQPFtDyD19yz2el5phAzRPbjLbHcg9B2tZih/vmu0t7j3tKR\n/ruUMrzdrXG5Lm7yjST3hHcPo80LGayV+d7D7YvffQ76MjPh//z9Qzzw/JohNfVvrgmlhpuShfdX\nv/pVHHbYYbjwwgsxZ84c3HTTTXjuueewZMkSVFdXY/r06c5tZKNHj+62yXw4kK0Ba3a/tV15T6yt\nAmA1nctWVW6FNwwFKV0gqKbv8w74FSek7cldNN2AJBSn2Tye0JyAt5cTlaB41wuHQEJPoNJeKxyA\n0FTPVbq78lZrt+PvDQ8imdJR39BuTlpih7er8oaiIaUbSOr2iHgZ0Mzwrm/x3pIW1dKh61772/4y\ntS8OjC6zGVcOeQc8aprAw2sfw8+W3YRIKuoJccMQePHtLc5kOJ3J3IG3cMur+M27v3fudbfvCtB0\nga5oCtUVPvhUb3hv3tHh2UfKM2DNtba5PUJd0pHQcg9Ei2vZrQ/5Rv/b/w/phijpl1OLq0uglMud\nunMkZ3jnCdGwK7x7Or5CgjgrvHvY519eXo///r/lPe63XLg/X19Gntu3Cw6lqX/zXSwPJyXt877u\nuus8vx988MHOzxdffDEuvvjiUr59WXEqbzugrdHml5xxMPbfewdeiSsw7AFp9n+FDE0zYOj23Oe6\n2Wwup8MdsKoj4Qp1pG/NspcTlYXqHW2uaBAQ2K9mknO/t4hXpudgR+4Q+cvL6/Gv9Y046/9NgqRo\nELoC3YCn8tZ1A0nDWr5UV2AkzGOYt+kZHD/hSAStVdJirvDuSHRiZMAcnGR/v8jWoDsRr4DQFUhB\n7/GkdANrms1j//mymwEAPzjsIhwz7ij8a/1uzH97C0LHCEABOhPp4HdXa+/sMCfvWLV7DY6qPdwJ\n70RKRziWwoSxlYgndU94B/2utdxlAy0j/gUlPgJSMIIW3Q/AHHyXTBmAZCB45FuYv6kT3z3sW1nn\nM7P1AzAHreVi37FgGKLHirM/8lXeiZSOrmgSY2sKm1WwJ+4gyZWx+YI3Ek+fn54uYgoZaZ35Pj0F\n/turd1rbGVDk8p/nyhPehkAP89rkNZRaJIbSsfZV+f+fN0yMqQlaP3mbzasr/Pi3KftClc1wkkc2\nIpwKQ7Kq8ZRuIJGy/keUzD5vJ9ytyjulGea93q5wloLm1fLY0Bjzd6E4zeZAetWyoBLElV/5sfmg\nrHtmrAqnIhgd9C668q/1Zn/0xoYOc1CcrkI3BISRWXlbzea6DL3xSxBJM7Cjrv5cd3gv3rrUqdad\nudntixFdhYhXWp/JfZtQdoDtipjHZ37BC2cZ1Q5X5e2e6tReS317s9msbs+E12atJmZW3rInxGLW\nQLTbLjsBoTFtSFR/Dv8Bq+GbsAmrxItOU3hS0yH545D8CWzsyD0NbVw3gzKoBHDKPicCQM570qOp\nmPP31Q0BrciVhbs5tdm1drm7JeGOJ1bh539a4Zl5rj/0HirCfBcoUdd0sz1V3oWFd+ZtgIV98Wd2\nKQgh8PFnzWU3Ha773PYn1IbSLWdsNqeiGVUdwG2XneBUywq8k18okgJJ1RD48ofYEdllzaomIaUZ\nSKYEhCFDkg34fa6QFq5lPo2McA5EASFhTMhscpZyVN4AsGZjJ3738GdQjRAgG051J4RAOBVBlTWR\nS6akpluVt2p++en2RDEaNF044W1oMiBk6O3mYEU7oAHvILKPm9bipS3mGIms6V11FUas0hz17k+/\nRtMMyJL3f2FP8Lmmh+1IdDrv51621J4AJ2m9zl533V6UpLrSD58qw3AtwBK3ngv61Zwz7dmfMakZ\nkHxmOLfEWz2f3WZX3idNnIqJVXtnfwaYs9XNee9ObAq+CkBYK6sV98vJHUSeytsVjvZ88u5m6/5w\nh0GuUMn3GfU83RQ5t+1L5a27jyv//jOPb/naXbj3mdX466INPb7nQHJ/hP5c8w2lanYoXWj0FcN7\nAO01phIHpk6D3joOU+r+n+e5zBHGftkHVTFHmyeSulllywZURXaazYWr2VwYCiRfylkARA5GIeuh\n9LzoIqMyt5qk7XlzzIsD3ak8E3oSmqGhUq1AcrN5n6Q9hzoAJDTdDEddhaYJT5/3X15ej664GZS6\nZq+mZr7WDnXAW3kDwEeNa8xNnT5vb+UNAFIo3XSe0DSnYjyq1jzGlGEHp56ezAaAgMD7uz92nks/\nYU9ba430z2hTHFFhhjeQDji7sgr6FShq9rdh0khCNwxsbwxD8iec998VzZ4C2J7oJqQGnb9VKiPk\nP2hcjY5kJ8LyLsjVbVafd3Er77hrBTXPimnWZ3avsFasL3F3tZ85h735Prk/o2dq2j40my98dytW\nb2rJu02+VfIyZVbe9iDGTdtLv2hOb3gGBvbjNoVi/z9XSkPpQqOvGN4D7IpZU3H1cT/AtMO+1O12\nqqzCp5qVdyJlhndNtYq6kaH0wDO72Vw3zIFhAEJfXQqoCUj+BJRUFYQQ+OPzaxCNCUiygNPsbM8X\n7p5iVU734dn93RVqJTsCVIYAACAASURBVPTmCdA7R8OADsBuEk5Bks3Qjic1T5/3Z9s6sGqTGVS6\nZlXyVmVu94UDQDSjv7cl3obGaHO6/9OpvBWIlNns7p5mNZyMQkDgyNrD8c39Z5rnwqpaw7GU83q9\nrQ5CAG82LEdjW8QTRPY99kKyBt9l3F49wmo2B9Jf1vGEBglAwK84I+LdknoKTy/dhKde/wzwpZug\nd4R3ZW1rV95BNQjVuqVPM7zhvXLXh87PytjtVp934V9OWzq+wJ/XPo5wMpJ3tHE8zxzg9mduaDSv\n8tQJG/HytpcKfu/u9NRsnm+kuztce2w2z9hvNK7h6Tc24Z6nP05v002fd+b+3ecvc8rcZmtA11in\ni6w86D20cBS8nyE09Vyxu5XKEcN7gPl9Cg6eNKrb2eQAwCer8Cmy1WyuQ5FVCDkJQ04BkvWl4fR5\n604VDgDKSPP+eilZiVhCw/ufNmXdCy75zdCwQ1FYfeZ25b0zYgbNCL81QYDzeiu8EXFeH4lrnsob\nSFfYuqZ4Xp9wVd6fN5mVynXH/Bhn7GtOpdueaE9/Qcqu+9l17/EDQJc1rWuVr8IJPrvyjsRS6dHq\nsSroLXthV2wXfvXss3jh7S3pE23tLwnzizfzy7raVXl/vH0zVu/YjFhSRzCgQJYkyEqu8E5imTWo\nya68ge7DO6QE0pV3RrN5S7wN1b4qyEKFXNmRNWBtV2Q33tuZf33zf9TPx6rG1bjx5b/i9sdzbxfP\n009rn4+GRnNMgG/CJqxu/xDhWApPLKnvVxO6O0dyZUrmMqw276IwvWs2D8ey++u7azbPvIBwH1Pm\nc01Wd8OoEYFuj2mgGT3cklfwfoZQNbsn9HkP2Axr1DuqYt5fHEtqSGoGAlAQTnXhXflvgGwu4iKE\nq9lcl5wFFeRqs0lQSoUQtkfmWkHv+9IGpD4/DFLAXrnMHDls6GZzvH070spdq8ztIxMAtDmVM2Qd\nMFTEpU4oMEeoR42Uq/K2mrqtCwwtZVW2OZrN12zdBXUsMMJf5dzSFtPi6S8J2T52NT0JnSss7TnZ\nK32V8Cne4AvHNE+fubbjAKhjd0Ie0YpVG9OTB9n3w8eMMO54/37ElNEA9nKe19ROrK94BsrY/fBk\nwyLz/RJnOyPOJVflbUSrIFeEkdCTqAgoiCU0p88bAHZEssM77qq884V3OBnBmNAoSMlKdIR2Q0fK\nEzi3vncnAKAmMAL71UxCwJ7Uxz4uawBdomIbNm34ctYxAN5mczc7HKMJLf33ADD3pbVYs6kNmiHw\nvZkH5XxtT/L1ecuSBEOIvCuCefq8ezlgzT1ffa73BrxVW+b+3bPmuS/0hBDOQL5yC7nM0eZ9NZQC\nkc3mNGh8soqAT0VXxPyySfc3Cydw7C/7ZMqAcC2bKVeYVZIwZIStLys7PNW6bZBrmiFb4W3Ezfu8\nDatvWlENGMLA6uZPEMIIvPCKNWGIYc+/bo149pnNqCJe4am81boGyKN2Oc3Qeko2mxFzhLd7xLs7\nvJ2Kxj2TnD0Lnavytu9Dr/RVOLPI2f3F4VjK2b/QVAgtPdGNh7WNhhS2djag0f+x5+k2YxeSUgT+\n/dMLm8STOkKBdDcBACTWHwe9dbzzGYP281blXaFU5q68dbvPO2Teiw/vrWIpQ0Ncj6PaV4VqqRaS\nBBiBzpxNyvd/NBd3rLwPQgi8/uE2ayY2gdZ4m3ksqubchZApka/Z3AooTRee127aaf5/YfSjedId\nJO4+b7v1J6nluaDoplk7U+aXeFeOkfLdNptrmeGt53zOfftavhYDIQSefXOTZxbDgeAZbd6Ppu+B\nWqSmGPaE+7wZ3mXi9Emn4OBRBzr3OvtkFUG/4vzDE3L6S8eerGREyAy8aELzNM/KlWZ4G7qcbtbU\nXaOiJQEpEDOraWsCFfteclk2oBk6UkYKWjQEZ35Su9ncqnxl64vciFeiPZxIr3AGwDfhMwgrZFMp\nCaOqA1AlMzyT1rSpumFO8iKEOUNb0BXedsUl5HSfd2azPwBENKvyViucCxk7+CKxFKCmK3e4lk1N\nnwcjPSFOPkp2c3IslUDQnx5dDwBC8zvvsaO1AxV2ePviEJoPtYFx6Eh2eia+AbwD1pZ+YIb79tb0\ngCd7lrsqfyWqYN72JwKdeQcP7Yo2YmP7Jjz+Sj2WvN+AjmSXZzIcuTJ7MFVcS2Bly/KsVeeAdEBp\nugHZdZ99NGVdlAT7vmSonmcglWKHd54Q9FbehQ9YE0KgM9q7ZvPsyts1sM89IY/r4iffRcfnu7rw\nzxVbcftj+bs4SsGd17kGBvbEPb/AUFGs1oZyxvAuE//fAWfgJ0dfZt0iBqiy2WxuS0npL2A7qGsq\nrfCOa5B82TN1CUN2+vjcfeKSrEMKRK0mc8nZFgBk1XACUNcl1768fd72hCkiXoH2cDIdrjCb0u3K\nW+gKVEXGiKDZPG/fLhWOpszPofmh6cKpvONaHAnNACCQ8rUDwgxGu9nefTucHUqVvgrzVjtIzoC4\ncCzlDG4TKX+Oyl3Af9BKz2fPJQVr/vdPj4HeYYYnanYh4LcvqlyD6qxz8MTrG8zKXElBCkZhRKsw\n2mfeKpdZfcdc4b16o1khN7anJ5SxZ56r9lVBMsygFJLebRW0rvlT5+ftHebAwUlV5gBJKZQ9Teyz\nG1/EB13L4Nvn06zn7PBKaYZ5+6HFvmjpzz3NIk+zuT1oMpmnP7uvfd66Yc6alykz4LuvvHM3m7tb\nLqJGFz5sXJ31Pok8XROl1t8Ba3Z4r9ncUvKpeYuluwuw7vx10QZnEp5yx/AuM4p137JPVhH0eatl\nN2HIGGmFdyyhQW8dl7Uvs/K2vmxEOoilQBSSqkEkXTNluSpbu8/VHinuft4OTykQMydesSZnCfpV\nnDzCmkFMNiAk3ZqaVIJPlVFTYb5XJGmGVWs4BikQgxGvQDJleJrNkykdck0z9EA7KhP7mHO4Z1T+\nABC1Ku8qfyUkSYJPVhFJxLHgnS1WeNvN5n4AMoQhpcPfl4AywgxLvWW8s09ZT48U3m+vaucWPpEM\nQsTMufn9B6xG20irerIH1bmqe8jmjGxyVbvZzN01GmN85t9nc8fnnr+RHd6bGiLOHPbuPm+7X7/K\nX+lZtz39hZT9ZRxOpPvZd0fN/v0vjzBnN5QrO7NGnO+KmhPb5Ap2O7xSuuFtclfSa8c/99YmvPbB\ntqzX9iTfaPN05e0Nu2ff3ISXln+OsNTsdH/0ps9b13NX3lpGuOVbqx0AYnmazd2tBLvHvYSH1z6O\n19au97z2b1/8H/wHZy+GUyhDCM/FQ8Gv62cVav89GhrD+M2j72PTjg7c98xqRON9v3ArplxjI/py\nwZJM6Xjzox34y8vre964DDC8y4w9baoqKZ7KO4sho6bKbPKOJjSkPj8MX459A0YsPamK0CWn8naW\nEQUgWc3u9qxn9v4As9nZvlXJXXlnjVZXXCPMAVQEVewd2MfaRoMOzemHVhUZFX4zFCNWsOzobIYk\nCYh4BZKajpCSEd6VZr9gZXQ/81hzNJt3psyFEsYEzYrYp/iwszWM55dtMf/B+lyVN2BeaNjH75rn\nXa4I43jpAnNbpB8/cOJIp5lbaH7rIsDU5WswH5fTg+Ls1gF17834rKUBcrV5cWCER2Kczzw3G9q8\nM63t6mqF0GX88dkNCPnM/bv7vO1b9qp9Va7WAyNdfarZYbR5d5vzc7u1GEyNOhpGIgg51JX1ZaZI\n9oWZAXWfDVDqvjB/l1zN5prhad2RrM8djafw0vKt+PuSeuyM7Ma8T5/POV97Lkae+7ztqYTXbmnF\nR5+lBxf+c8VWvLBuGT6vfhnq3uY8+T32eXtmFzN6rLx1XXQ72txTeWu5K2+7p+mJ1z51LpQMYaAj\n1QZlRO5FZwpx3zOr8V93vdVta4emG57lMM337t993plTwP7+yVX46LNmvPnR9l7vq9iefXMTfnTn\nm9jZ4p06ubtBh24bt7Wjrcv8/zVfyLd0xEs6HXFfMbzLjF15GxCe8E6PJbcYCmoqrfCOpwChoEau\n9TRf64bkDFjzfPFat4l5mrqtn9uqV6fvv3Y1J2eFp6x7Xl8RUOFTFXMOckWH4QlvCRX/P3vfGW9H\nVa/9TN/19H5OzknvIR0SEjpEulIFiShYLyI2BEQR9PpD5aJX5d5XQbHAtYAIypULWABpIXRIg5De\nc0pO3XXKej+sMmv2npOQkJAE5vlAOHvKXrNm9jzr356/wfTMWax0xyDt5EUKSdiOhxjTYM+5eRSY\nJjgdgxEYq+w2H3D7YGkmKkxqERuqESB33eR9z/k51OD4GZztI2F5FehIjxDu/mRMxylzRvgxascA\nsf34rgGLndIG8VhnODZGNZaFPvlpkZvgZSphKnG0pVqwrm+DiGl7xENPoQskT5vT8LwDuXXqoBTz\nlhdQPO6rsAXKmMqRmBc/EwCwTYqZ9xeY7CuJg2QroJhF9GT7AxKnQo42MQijeQN0Rt5xU4fteMg5\nebylPwa10idSYXnnfCL58Su348mtS/D0tucwHAghIg8j2DDD30f+/Cf3Bd3PWgO18DU2Fsfx8Pra\nnmFL1uRzOR4JlXYtzXrfXZ33cAlroXFuhYhEtqCG/b4RAReWkRvHlOJbv34Bn//RUwGyGS488Xah\nlogfcC/Du9Uudnd4aMlGAMCK9cFFUVAlL/yaO3uz+O7/vIyb734RQDjJ9/Tn8dWfPov/vPe1sm0H\nGxF5H2Lgcp8e8QJu85OSl+CyKR8RWeeEqEjFDWiqItxXhq4G4rfE8RPWvLxvkYsabznWy+uwY9vx\n5zUPBT4D4Mufqi4AAqjB2vKERevS4eo0EU3xyds0NCTMIHl35ujLl1reXiDmXXRcEVsnpCRhTvXd\nxUNuH+pitejpz+P7v30Zjh20qDXTptYwczcTT2rqwv51drbD3dXC+qYbgOohldBx2xePRW1lDBk7\nA0MxAaIGLG9DYeSt+AI1gfkC/GQ3x4DjemhPt8EhDr551xMAgK5cD4jiwcumEDM1DGR4jbwjXLfC\n8jZTUtzft7x5XH989RjUaC1iO8cga4X6qwfXw8vSmv2/L1+BL972NJ5bSePv/YX+wHGinaylwXE9\nPLVlCfr1jZClCbjl3ZcpAIoL64h/iYXGa10rUIqubA8Gi0P4zSNv4KofP4Wt3ZlhNbdLX7YD2SJW\n71oLrWETVOba91gI47W1PfjRH1/Dd38d7o4W51IdvLTzFQxk/UUst4qD3+1hlf009DYa/y+zvAsS\nebthbnP/M0X1hFUnt9flHqF9xe6M561d9HmRPQHDLYzeLrRS5aJDEKW6GYFF2zBW86ad9J70MC3/\nsORH3tt+1cbesm0HGxF5H2KQyduSyLs53YA5jTNgKiwm66mIWzoqkqZI7DE0FfUVPkm7riLI29ky\nDsUNkwFIwiEy2Uj//0bvWwDoAsHfTv/fHLNMxHJlyzwRM6DrKrW8VQdEcaEpPB6uIcXc5isHX8fD\n6/+B3iJzKRcSKNgutnXmoCoqNnX3omh7Qq5UfAdhMWtOiEYBLhxk+k1c87MleHNzHwaGXMhtTxW9\n6LvMAboA4W5zVodOHJal7hIYGvMU6P6POGNnYaksN0Amb7Yw8VAU4QNSQt6KXqTKdVDgekR0U4Pq\nghCC7Sx5jeTSKBRdZLJ+jTx/6QvL20j6fd+lmDe3vFNGCiopDy2IVquOKch7xY4NAIA7HlwJQgj6\nCiWlSxqXf6WWt0tCrErNRUXSxMBQEUosCzVGX3KqomJd/4aApekRD//x4m345Yrf4cnXaDLQ+m0D\nw8a8S8l73dYB/PjV22GOXClkfVHiiXpdcq/L4C9xo/0N3Lf+TxhM+fFM/rIujXFvJstgtKwXf3O8\n2rUcAwU/L6DUba5YGcSP/Jv/5ap/H4ekKgO5MmRf8HZ6cpcuSDj25DbfuGMA/3X/soDrfTjyPpRy\nuEuH+HZi3p0lLU7DKjhMQyv77FBBRN6HGHi3MZd4Abd5OkGJQybvmKkFpBh1XUVrbYX4m3gaeofY\nKp9ocDtHiAYn/Bz+viGPQpjbHIA1eSn9n5KYNz1GBzQXRHVE85WYqSNp+eP86/q/+brmtonnV+7E\nt3/zIlxbw2CBveTYGF23xDvAPuelal2d0o+LaDSBTKUdxVy1ECBcriInn58vWlzPg6XSfXVDJu8M\nYiodu6gVB+ApLC9AsUXSXqAcDzSWbjHCdlxPiKcomgvXI9gyRInMy6XYi1AR96efuXeHbE7eKSGB\nC9XzrT5meadNSu6EoKScboiGDYgKkqXPhpyYtq2/F45EzqpnsnI6D6ahwnY9IfIiQzc8NFbHA+1n\nTxt5MmY3TAfgl8ABtAFNxsliTd86keCn6wrk05JhyAYA1m4rr4tWNV8NcHcQOvkshGEnt4ltnHxl\nK01uHSvvs7p3LX6+7C68YD8ItXon9Kb1cFwPHgsDFG0XetPGkkF6tIwS/n0EEBDuKRsv8fZIzm8n\nbC1n4e+N5X3lfzyOl1d34bkV5Tr85eMoP9ef1/zfbtX+9hV7Gnep5e0GLO/wY3mf8mSMl5mW73co\nl5lF5H2IgVvepJS848wFzcmbqIgZQfI2NEVYhAAATxUPKIXix39RalmHrDBD3OoyFATd5lTpTYei\nuVAU1gwFQNzUUBEP9oC2XZ6lreI1Fssjju5b1oxcbVt+80iWM3P9J9W0fz2uCkUliM/5BxQzD6J4\nAVc37bxGaDy9pDOb6xKYjFw1g373uv4NsD0HMS3BxufPnY0C8k4BLmxhvYfNkaHQc7oegcoFDVUX\nRdtFX44nDkpa2GyBMsAWXZt6ekA8BU7RD4koiivkTLmLO2kkqTEqhwYA5NwMVI/OPfdCFIlv+e0c\npLHCZHYU8svno4KwzHvdEfK8A5LL18vTcyUTCpK8xpuPQU+IOZRlcLn17xFPJPFpqor/emCZf97d\nSHgOZotlOR+q7ore67sDf4nzygoS8/MBuFXtegSKlYU17Um83hOMsfMXf3+BHpdVemGNewVG+5so\n2g4efHo9rvrxU1i5cRfUip7AsUrA8vYTqoazvPNOAf/+3K349crf7/aa9qTbrVZ24U3mPQPefsxb\nnvdk3F+YD7eYKP246Nr4+6YncNeqe3Y7vr2F63n42h1LcO9j4W11AZQ6YgJW9HCaCNt76LuxtoL+\n/sLc6283Uc0jBKs29r6jxi97i4i8DzHwhDW3JObNLW9D5a5XD5apo7bSJ0VdV8vIuxQyAQXIhoQ8\nCnsgd/m7EjFK3rL1yWO0MVNDMhaU7LSJLb6DJxEpngHD8jCiIeWXAknhQSK3PWX/5vNAR2MaJ8xs\nDYxXYdnqPMnMMjV/PjSnrDOb43rCbc47hf34lTsAUPUzOugY7E1UCtT2CljeQ12w3mBN4FwyTGbN\nP7NsOx54YpMYe9HxROa9fJ9UaIDioj9TxKadg+jNDQKOibVbB0RCG1RPlCxx8t64Nc+6z0neBcVD\nkRRgEDZ+fq3En1SejY5CEqZdA5PF8hXNgc403Tlx0fmk280YEd4WbnnHtLjwLshKev1539LnBNfd\nnwsmj9VtwD82/QuATzAXnzyOnstxxaLW3jyOnch5W+QtkvGYkp6iEtF5T7a89Za1UONZPLL1kcDx\nolQupMd63inikefpPX3hra1Q48GMZyhyzFsi72Es78c2P4nOXDde3PkqVvS8OSxp7l6m1IM14SX8\nz9q7sXGAVkS83WzzjTv9+/R2Er5K8wGyUmfEMG/NviKTc9DVl8fGnYPIOTlsGiwvS1R3Y3kPN/4u\nFs/mW12XAEYe8SMfwWObnsTTr2/H7//xVuixpXjxjU6ahf9WePjmQCAi70MMgZh3wG1OicVQfOvN\nKnWba6qwfADfsm6o9gl+WPIOURIjw7jNOUbUVfqHqwomtlcFysc42cRMXciJcnDxEz6GuKVjYmsD\nHGLjcxeNRnUFHWdBTiJ2Zbc3V3BTkU4YaKyOB65HJOUxy7syYYpriM96TKjQnbuQkoHjEphsMdLX\n9Dh6cr2iZGt2zZHivM6OUXD7a2ATGy/upPrvXBa11G0OACZbbK3fPuhnzGsOirYrVMrkudVVHVA9\nPL9qJ2761QtQDBq394h0P1TXLxdi5L1lexH5okv34d4JVmGQGeT3UQFxNbjwJ7UvT4nZKRiIW5oY\nLzQbpk7H1S/FefkCSTdcn7wNej5L8cm74PrWZU/Od3trjLxL1dOMjlV4YM1DcDwXhAAT26tw1GRa\nG19winCJC7evDs72MaKigTeMCUPfUAGPv7JVkJDcjc4cuRJqRXfA8pZDQNLFis5hgYQzBqphH1zA\naH3t0gLDldzmEnlb4eQtJ/r9v9fuxLLulaH7DVceV3RtaHV+WODxzc8AKPdqEELws9d/jf9dG1yo\n9PTnWRc8EiDm4cgvb7t4+LmNWL2ZlmxmbT+GvCvfF3rMvoCX5xUdF//96p34/gs/KRM7KvXWBGr3\nh1ns8NACfw4c1xNVDH9a81f86onnsKnTv++7s8K3sERBfr/fDUTkfYhBVcOzzbmVoTGZUUV1ETM0\n1ErkPba1UsiE0pPQ40c2paXPJHeYbJmXan5Lx7O9yzaPb6kVC4yBrI3KlIXjjmgX23lTkpipiZcc\nh6uxlxnLJm+pTeDoFkqSd6+6V4xHdpvLMWthgbsa0gkDDdWJwPWUlsNVJM3A9ei1NN5cnaIuccfz\nhCeBqDbuXf0AvcaqMWhPjQheuEv3W9e3EaYSA8nR+f33y+eXzRG3vAH4Cxtmeedt+sKvTib8/TUD\niurhjU19gOJC0VwQx4TrefA8iJg4J29uUaowaRmT7DYXjVmkBZur0wx5Bp4QZ+cNxC1dkLeiOeLe\n9hcly7tAnzdV84TbnBOXqcZD3ea9WXo8IUy6Vy+KEkYK/9nryXK3ugKTkXPe4wsxQ1wDNCcQ81ZT\nvbjnzT8LBb8f3PMq7n70TSxdxWK3hpSAVbMT1sQXBQm6HvGrGmRIuQWDdjl5F11byMNyD4ilxv3K\nDtUT3gWZvLmGQSl6MoMwSAIN8ToAwxPgcKpyj21+MqDBz/NKZPL9xV9XYe2urVjWvRKPbHxMfH7/\nk2tx9zPPIT7zceitawLqdjIxquke4bnY3p3BH59Yi+/9lraslWV4d2a7Qse4L8ixDP9C0cP6Aerp\nKG3y44sJ2VjbtwHLi0/AGE1DII7r4anXt+FP/1obOIaHRGQJYPkdEZu6BErCf/YzuxGl6WFW/HCS\nvgcCEXkfYjihbSEA4JSO42GZ5daAcFUzy7u1jr4oJnVUY1RzhbAeAQh3LHe5A74rm26XasI7R8Dp\nbIPT2eZvl9zQJJeCN1SJRNHvuGVqJs4/kVoZU0ZS17HIqAZQLPjkHbf0gIAMjAIjW7pPU20Ccxpn\nYFrdJKzr34AhbTs7h3TxcsyaK615GtIJk1qBsopcSUb9rPH1qEun/HMxxboYUzVzXSL01wFfxtXQ\nDJhG8GfCSSTjZGEp/uKptT6FUpiaLITj66sXbQ95FhNorvGTDGO6IVnOvshMNu/QlzBbwMiWNyGA\n6rG+6l65d0K27ImniVp2wE+kKuboPbKYWA50h1U7EAzZQ1DsONA5GvYW1pVMc0SiD0+aMxCDxa5X\nJm/umvcGqJiOVrsNg3JrTql0atsQJVtVVYVlXfAYKTAvCl3EObAMSU9/1HI8ufVZPLLhn3A9V5RM\n9bA2nUqImI3sNlfCyrcUV7yMB/dgeXMPSEz1PUCK6olqD368O1gFNZ4RLm0ZWTuHQk7DB0efAcDv\nA5ArOPjt31eL/Rw3PKntjV1BFy8PXcge7KGcjd89/1TZsX99diNyBiVEo3VtoFc5J38lNgRr0guw\nJtM6/lK1uqyUUf+/ax8WvyEZL+54Bc9L/elLUXSL5fr/kuXN8asVv8OPX75dhMe4Vfzwhn/ihy//\nP2zxVkKv2waoDhzPw6/+7w08tGRjwAshpH+55e2RMg+kKpP3btrfdrPnbLgGPwcCEXkfYphcOwE/\nOf67mNVwBCyj/PYYnGBUDzFTQ1XKwq1XHI0vf5hm+fK4LQBBvgH3ouwelC1vosHeMBVeVs5WD24v\nrJyPKUnfhWxqBj588nh89zPzMGMctRZiElnlmfEbs6jbvLBsIYobJ4rtinR+njRycvvx9OsUjyXE\nlMfdrSnPCsubeNTytgzNT3aDH1fk1xC3NMwd7y88eDcxUzOggCa1aFKHXMI8Dbyvugw5NGCqscC2\ns0afiqq+2eLvQHvOgHyqi6JXBCEKWup8z0jcNH0vCCccx0Qmb9MsbE+lMe+Cr3QH1wAhCLjNdU3y\nTngl91+aJy7/6hQMxE0NMS3OzmvT5iu6DZe4UPKViO+a5hOo6kiWN53LJ1/y+8bLMW+e8ObsGAli\nGzDa30Rfwbcqq6sly3Dlb2BNfRquPgRNVaEqCoqk3PImqitCSfLcPrrxMVz1xNegN9FSL9cjwoPh\nZYOLq0DCWgi5Q/WEBSqTN/GYfKtr+6ED9jwljARkHf3BnA3Xc7FhYBNMLyUWMLe8eBs2D/ou7oJt\nU8liR0c2xyxCRn6PLN0kyc8SbM9tx5cfvwlPrH8hMNwEy81wulqhQRM6/4E4t+KhR6WJX2rp619a\n5PXYdBHVP1TwNQXYgpiXBZaSmWx5bx7ahqU7yrPOf7Xy9/jNyj+Iv4u2G9B8v/Wl/8Y1T92EXfle\n/HrFH7Az0yme9VIZ1NV9a6HX00UQJ+A1fesD+6iJwYDbnH+XJ2nYc0+G63riPnLIev6lynUyIvKO\nAADQVPYjUspdedzyVlRPuNJrKmJCwjBgeTOrNpDMEaKqxlGZNANxW0teCDA0VfrkbmoGFEVBY7Xv\n9pXJm7+EYqbGXKBKILNazlavTtPjWpK+znhpkhx3hauJId8t7lLL2zTUACnpsWLgHKauBRc2jNhM\nzYSmqXC8oOXNyVtX9fLYqpQ3YKlWYNOpI09ERW6cv13zr5d7PbTa7XijawOKjg14KtobZcvbpN4F\nkDLL2/OISNrjpMFnvQAAIABJREFULwlFt0EcAx4jb3gaFIVlC3P3eWkSIRfaAc1GB2huQNzS/fun\nOYiZmp87UIhTS5yoILaBIjJSzLsI4mpYsqwbdz9MXZM524/r8nixl6mE09kORSEYcCh5X/Ghqaiu\nCU6vmhhCwaRuV8NQy8ibXoODdFJ+PoOWqNEuNVlhiwsu7AJQFz63vF3J8k5qKcQd2kRGUT3YbJ4H\n7SHEtBgaN58HZ/toAIBDbL8Gmn1Hykj4vyvFQ6Ho4q3e9cg5eaSdVnj9dWIMXUO+KtiWHuZKdw0M\nZei4RJMdRhp6y1rE5vwdD+/6LYrI4c8rngxc85Cdpde1fip01RALKNntrbe+BcegnhAufyyseOn3\ns9T5E17dvAFf+q9nxCLHigWJqbQ3Oq9l93rpb3ht34bAdtlb4HgObNfG13/+HP7th/8Sn29l5ZM3\nPPtdvLDzZfxr6xLkmOVdCHNJMw8cH2NNrDqwWUkMBkrF1vZuxuceuwbLu94Qn8ltb6EHr4mrJAI0\ncY4QgnvefACvdvnhCdvx0McSE4frQX8gEJH3IYzm2gROmNWKL15whPgskE0eAqOEcDVVCcgbkuEs\nbwC1lbEAoafiQWICgNYaP0lNjudyWJLb3M821/06TEk0hYu4AEBVih4XsFRLM+Cl97OaYGVWsuWt\ny+QddJsbuio0vGWYqgFNU6h2t7SY4Mk3pmqUkbec9CeTsxibNN+xEMtbjWXxt/7f04Q1TxPudtNQ\nYeq+Z0V0RXNMZPIOtSCY5e1fqA04BmzH893mAGJxlLnNP3bqBEHufFvOy9JnytMRs3RhvampPjqn\njLy9giU8EKQYR8YblFzGRX9O2Hf15XwrLONkaEzZMcR+3EqLWRo8zd83zix/T6X3z9BUOGD3UnwH\n/d5kXAqTlHTVcwerpG1sIWeb8DIVYpz8he95HqAX4RXi+FjHlTAddqzqsg531PJOm0nYDoSHpugV\nxQKAex8qrKT4XXHPx7Iu6vKOF5vhDVXD3joGAPDcm742+NZeupghjo6BQWZpMsubX6Wa3hUoAyxk\ng7+/jJ1hc6RAgyFCF778bT/05vVAMQEvkxZiQaVqfRxLNvtlc+NHVOH8U/zcDzW1C/IP8ub/eQld\ng9TFbO8YgYQex/qSJjy25xPj7ct+gy8/eQN2ubQpztbuTKjVammmkKQNI0au9Oc4dCxyxjtA3d6y\nbsCDq/8BAPjjW38Wn/FjHdcLvEMAQElI5J230Vvow5Nbl+Dny+4Sn+8azIuZiCzvCACoxfzRRRNw\nxBh/tT4hdQTcwSoU3pwdekwpucdMLaiQJJM3CZKZkDdlkBOpONpqq6T9yxcSlaZvRfKXWMyULT//\n/LpE3tzy1lTNJ9mSxUVx3RHwWMIUb0nKY96moQlXOOCX9nDi0jU1IBwiX4OuKtjUOYTfPbpOfM7L\no/RQ8vYXKLy0SoYWIG95MVOSw6C6UIkuLNiKhCnlNDjCCiCOQd3mhJM3LwVzoai0tj5XcFDgbnMA\nlim7zTUoACxDCyTNAUCB5JDQaC5CwtIRN+j86rU70KmsgWKypKd8TMwDKcThEgdEp5nJil70xXDY\ngi3LuscRQtDv7PLbz7Lvz7t0e6ezCTuTVNr0kxMvw1ktF9Ahs/71pqHCUYKVA2JRKB5PAhhFjEx3\n4OjmuXRqpC588iKosHIezGKNyDsAaLKiYhQB2/QXSACgUMvbIx6G7AzSZgq27Sc2Op4jkTf9jsp4\nWlLCo9v6cixbv0jnmeTpwAekBc6OPhZGcA08vIS6yGWyo99B5X7zrx1L50hx8M+XttA+5ZkiuocG\nxBzpii5i5tzw1iq7oShAYeN4EMeEogDZYkHEkkvj/tttP8FLU5WA0Iw1+XmYE18AJ/A1W/rx6rrt\nbJ4NjKrsQHd+FwaKfqWCHMte2fMmrftnv+MbfrEUP/uLb81yFN2i0DRwPSJCFv7AWNWJ68sJK0RF\n7oVF9JqsbMBtvnEbnfNdhV6YE16A3voWc6F71I3O3iFpvQLENpnbnC0M8g5eWeu3C/WIhy2dQ3h+\nVac/3ihhLcJwiGtxFFfNg9dfH7q9lLwtUyuxvOWENXr7501uxPc/O5+2/pMs7/GtJf5MMMuCn1sr\nt7wbEv5Cg1tIPGv5e5+Zh7PmjQ0dK7e8AYiM5dIMYJJP4SOTzg1+oWR5u92tZePxX8R+9q0MQzWk\nemH/+3gs2ND0snri0fX+NRoh5C3PtxJI6C8JA6gudMUQZXQVSRNtKRqX16q6AuSbZZa3r89OAuSe\nLTjCbQ4Ahkl8kvc0aBpLAJOS5gACm+TAeSNmakjr/uIrhz5R1uQWLDEPXoH1ZlcGac9yzRPhEL5Y\n4q1fu3O7YKMgLF5ueXsKJYo3B/3yqE1bbdz10Dq2ncdXM7Br3gKgCNLjC4BYjL2U9SIUBUjoSVw0\n4TwoTgyKbqOphu4vXP/FGEBUmAodP7f+O6uepIsgT4Preb4YDot5d2W74REP9fE62K4nFp02sSWl\nO2Z5mwkpt4Fu4yEEl1Vf8DnK2QUUbRe/fGgVVm1hjXpcXWyX8wb4dRLHFGI7ikoT2VZv7sMdf10B\nWymI+VWhS25ztsBgdegkmxZz2DkwhKLtQW9eC60mqKrW7/o1y6qqBKRhAdAOaZK1nnPZ78s10Jyg\nZX49OT80kA35/cmu+tfWlau6FdyicJtD8VhIyQfPc+GLqEwxA89mioKuCkVzgqV10m9Qq+wRTXgc\nhzDLm97Hj7Z/Bl6mIuClsl0Pv3/CL9+7c/n/4DtLbsMDT/niMZHlHWFYqHu4Y2aJNWwZWlD3N1Aq\nRh/kptoE6qvi0FQ1QO6TO2rF/08fU4svXzjdj8ejNL5OURvzCZ94Kl08MJd5Q3UCR030CVa4iAGk\nErIrmrfwLL/YMXV+TJx386pImNA1Bc7WsdQqIeUxfuIRVFoVpaeDqRmi5EgJqXU3VCMgvfjZD07B\n5R+YIf7Ww8hb2t+TpEcntNUFd9RtqIqOuGR5z2+eCxAFesNmP6Pe1ZHJ2zR2yaw6a+ozfptXx0Au\nHyTvbGIjI2h6nzVNoeTLXtqJlEvdpooHz6afxS0dST2Jwlv0+lzFFpa3V4gL0RauVpbxBkUSk8hl\nYN+/dscu/Owvy/HEm5ScSYaFW3jZGrdwTN+789yrg4J8XOY2R7IXUF2MVuaCFBOB73hsgJYUcosx\nriawoycLt6hDt1xRiREgb0BkxOftIjziIR9jFmM+IfIKAFoOt2pjL/64lNbzt6aaqQyqwlu32uVu\n81hSkD/XyOdVBbativtJPy/gsZe34ull27F5Fy2Rq02m/aQ/j7vNFYgcCFsqeWT3dzBrY8122mKX\ne0Dyeep2J4SAe43VWAbEU0AKcXGN37l7KdZvH4AxgvUzcHTkXliEpNMIBzZ4GZ+mKgErmkP+zdhM\nuY84hig5zEreroxdImID3+1N57A8abDgFkTCGkqSyVJGUhCrIyzvrK+q6OqA7gT7juvhSWe268F1\nCfNuqMgXJE8Zu8ai7QbG8GrXcmjpXvEbEfu8S4jI+z2GcLe5VPIVEvMOWJYSudek/Bfr/KlNmDra\nJ3MAAUEYDpnc4WkBlzk9p2+5VyV88RiZ8MQChBGVSGarS6IuLnkDPA26ptDEKkUBoIAUEtAhuarZ\nS8ojBIs6TkBV/wwa72OQyZk37pARqJsHVZKrkhYBCb085i27zT2pfj5VojKnKABcFQ3VCUwbXYu5\nExtQHauC5VZBiQ8FMuppqZhfh6omhkQmLHENDOVtFGzfbd6XXI7p09l99VToqsK6zrH5GPMMVNZb\nmj8TBduFoijwhmhoxEYOipmHQhTAlmLezPLut/tgxIPkzc/Vn83h+VWd+PsKSt6lljePLTpMMCb/\n+kLs7CmIuHavthHfff5HUHU6B5br51qIlynJQzFzIt5tKQlk8g6Ia8BVCjBYtUYpefNcjbybD1iD\nzrYxrByPC9FQ8n19K81gbk01U8ubu80JJe+EpQuXdtKK+d4PI0jeXPdAdPDzCsiKen3672lzx4rf\nnS2XWqksROKYoGI7qng+sgUnQJwAkMl6ICBwPMePeceyIIUEAFXyDri49wnfclR0ByAqFM8MzLWm\nKqHlcmD3UYkPQq3sogtqT4NC6Dhkb1fGLre8lVjWJz+jnFjzbkGUivE5cnc14gOpy2CqlvjMcT04\nnoO8mxeqisTVoaiOmGN6fSXfwd3ujkcXAJoNuAZts+zySgJ/n7LjpTkChkmqO0CIyPsww5566IZa\n3oGYd3mdNycbz/MCCWvyQqBUfrB0eygUglhJrbpsrVt6+PG8QQh/iR49tQkfP20irr5oBkzNpCtu\nNv50wixrSiCTN38RVqUs6KqO6sKkQHtUUzVEOdCYmjZ888jrMK5q9LDXaGhqoJZ9XHMdPnDkCNx0\n2Vz/slUFhVVzoearsKDZF27hgh4yKhMJ6JqKL104HfOnUq+CiQR9YRh+0h0tFSOB8h7enAWOgf6h\nYHY9APSxzm10kaMGLG8A0OtYwhT7bOG0ZurZYQRAyTEPnSQAKNA1BQumNWFEFQ3Z7Mr3wUqweHKJ\n5a0YBVimImWrJ6Brip/YptlQFF+qlQghGVVoxW8Z2ibmwHN8qV23zw8ZKUZRWN4WErSch32HbrJY\nrsVkMJnHIGXRf9/Y0oVfPkL7NDudbSDFOAtNcPJmI0pQi7Ml2URj3sxtTsnbRdzSka70UBmjrV1L\nyZ+7r4u8RxD7DRb1XRhwugHFhcZEgyqsBGrScRAiuc0VOW4vJe0x0ti4Y1C4r0lJ7kHRsyl560W6\nwGChB+Fh01ykkiE04AYXWSqzvHUvjuKGSbC3jaLbmSWqN9MFDsnR3vSKS8chk3e2pH4bAPSGLYjN\noNnmhuWTYFKpggIFBafot2FlY/EKcShOnC7CJLc5j6kHLG/NoUTMUGrdK6oHKJS4HZewcj0ahvIX\nOPQ7MnlbkLfT3QJnRzubA7o9bmmR5R1heBT3QN56iaVoGcGENeLJ2xXpv1wmskSqkyGsLWCY5Q0A\nY6voD5vYFsa0BF3VMtEamoFvf+JI3HrF0SXnpeTI5V1jpoZjp7eIuHiVVcmuRRMNW2QYEnlfcfZ0\nfP7caRjTSo/RNZW9YPh1aSJO1VSTQGOqBhWmbJkH57M0/p00E/jwiePQ3ugfo6kKvMFaJDedgOqY\n/7nc7IGjpabc2rcU+oJVOem4GnIFF7miCy3mZ1VziyWQxyBZ+l051vCFuc0NPRgWEYlVnobPnD0F\nNRUxen+IBuJqKHg5KLotXsS6ruITZ0zGNefT+9WT3wU9zsnbEucCAK1yF5qmv+W77l0dLbVJsVDQ\n67Yj1rLJF3MJkZYFAM9gCnBF+twkYjq83iZUDbIKDL0o9NKTajWyeceP+zJLTjGZNeZpuOXf5iPN\nyHv11h68vtFPsgJo8ppX4vZWrBwsNYZ7/rYJBP4C1CU0YU2zCsg4Q+iobGGeJhWEKFA1Rt6eDVM1\nYLOsZrHAqejBC7gPWsNmaJXsGow4aiuo0EvOLhey4fFuIkkFr9vZi9gUKpzCFy5y3NzziEgMEwtX\nISTjoChJ2Y6qYhnlJeENVSXoLw4grqThdnb4izUtaBUX3jgyMA5ZMjUjZYIHcmMAQPEEeRc3TMIM\n71xYmomiWxAxb1GD7eooFF0YqinKHh2X+Cp2IrFRh6J5yBSkeWTkrQ42wB1gZWWqC9vxqKdDs0Fc\ng3lwuOVNv38wa4v5cLvaUB1jybvsGY9behTzjjA89mR5lwovWKaGie30ITthVmvoS5KngHgEQQlR\nibiUEPIu7fTEccX0T6Bi2wkgmUqcefTIYcdqqDra6lOoqYiVfS6j1Hr33dYkKNTBj5dUz2pSScwc\n71tqhqYG6n0BX7iBZ30nDD9cUGZ5l2Sex8JKxbgbnhDELR1nzO/AvMmNWDituWxfSyuPmXPyVrhl\nzd2sRRfElPpCM3KXQx2Vdf6LWGQrexp0VaVubzlhx2KuVtfXntfZfSaOgSFniMqzshc5d5vH9TgS\nehy7cr2iJI+/zD+4YIw4f6eyRri947qJz35oqug0BwBoXYmCW4ACJZDMJ5frOBq1evM5Rt5snKpD\nCXj2tCTMup3wCnGkvUYqYcleuq/iL4DqUPJm46urjPueE83P6OcvfM8jovWqxvu6aw5yWQVLWJtM\ngy1aHTh0MZ2gNdod6XamSgfAU6FoLJud2DA1U2RNBxfQ/iKNz21N2gI8Ddtz23HHsrsgMvqlccLT\nxMKoM+tnO8ulcIBP3rwlqli4Sm5z3oa3Wm3CN46/KuCh4ffC1bLwiIdxDS04YVYrxrXUse22P5eA\nOG57Fx1vgLyZZXzVjE/jqhmfDswBvRd8gRLDUMaFpZk0YU3EvNn8cfJWTJFQ5rgeduWpp8kX86H/\nDhX8MSi6DS+TRmbVLH8Bwo7f5qyHogDeUGXAbS5yC3K2mA/iGJg7voWek2kimHpkeUfYDfbU67fU\nhZyMGWitT+G2Lx6DxaeMLy9XAkRrPyrmIFnGEonK33v6yJNRH68NxH5lWJqJq886ATd8bI7I+JXB\nm6+UeglKt/NyH8sILjgqmeWt6DbSyZBac0XOXA9+h6YpActbBpf7TOp+LH5PlndY9yQeo+eqcecd\nNwafPnsKmmuTqDQqA/uGldvFWekWfzEeN82vr1WkF7/O483SgqxdnVZ2PuL6lrdcIy7I3/W158e3\nV2H+lEY0pqtEaRBPaNOlhUtNrBo9+V7U1zOyZy/CU+YEdeCJ4oB4KmaOa0RTTQLfuuyowPa8W4Cl\nmaiuKF8EAUBRoyV7OUbeosENK9dzk53wFAfurkbs7M0hm7eF29RGHlrtdroAkcSBYixPQdHcMnf0\n8nW70DfAXMUa70jmBBZIwuOkuFTVLk5Jo6OizV9oen5M2iU2DCk8M5yXAQDqE3WoqYgJ1/1rXcuR\nJf2wJlBJUd/y1oU7l3sv7O0j4Q0wi5aR8zPbn4dLiBAb4QtX/syYY19FhtDx1+ktSFspGLoKj7e5\nZZamrdHjm5J1+OiiCWirpgaBYmVhjnsZWsUudk56n555lWaqb+rpFdfG3ea5IR1JI/heUHQbnsEs\nZ9tAf6YIS7NQCIl5wzGQLzq+qJLqoug6uH/NX+k1MhU7fo0i1q7QOm6xuJcaBdmOh60OFW5xu1tD\nLe+hrB2o8EjH6Hvig8eOwHc/PQ+WoUUx7wjDY97kJswaX4+vLZ4Vup1nVNdZ9Zg2uhZnL6Qu7GSM\nJWaFkTezvUvbBcqiJvKmM0Yvwk3zrw0mp5WgtjKGUc3h5M7JebiYOd8u9MdLkt7SJn0BKbqDdLyc\nvGXLu1RIxtBUv+SoBJwYZMtbLyFXTmAfnXQhxlaNwsiKkqYlAM6Y34HT5rXjU2dNLtv2hWlXBWr0\nwzL2E5rk1lc0jGzyCb+m+xhfI54n+kj3tEFvxw+O/ffgCT1VinlL99RgbnfPz3jXVBWfOmsK6lL+\nvXOKLAFLWrjUxqphezZ67R7qvuS92y3//GkjhYq0CsXTce6xNI9A04KLy135PliaiY+dOhFnzO8o\nmwvCwgCdPVRHnN8jTmI7MszqtC1s685Qy1tWA+WhBdvCSbOobn/CYG1Nx7wu+otzwn9pdZcIJ2ga\nK8nTnMACSSgPcnI26QJjRLoVlsmS+jwNHmhfe9uz0dMnJToNoxxYvXURLM2kSoeyVCk2+vMhW95C\n598RcyD2Y8f/c9OT6NPXQIlTD0ap5a1oHvQxNJuee5Fk8halWCo9vi5OibE6Sc9jtKyHVs3ugfQc\ncm8N11bo7suhM0sJ/Sf3vFn221fTPSC1G0CKFrxsBdZvH0BPn4OcUxAiLVzbgdgx5G0XGgwxxgIZ\nQme2G9PrpooWvdzyzhTZ74Qt1OIaj/v7mgeO6yGDXpCiBZJL0wx1fs/ZImkoZ0uuewOVTGggFgcq\nUxZMQ0XRdvdoYO0vhJs+EQ5ZWKaGK88tt644UkYS35p/HSrMVHhMOqS1J3/Zlbb+k6340pZ77wS+\n5R1O/pogb/riLiXvuJThHeY214gpvqd0gUHJV8XE+Cx0NFQFtnELf3duc+5Wntc8B/Oa54SOP27p\nuOD4saHbUrFYwPIPu0dJPQk4/vbqtH+9llcJe/0UWJOfB1GZFSBZhZahIaZbSOoJP8bo6dBUBTFL\ng9vTgqJuw+zw5SHh6mVd32TLyCkyYpeItyZO44V9hX5U6JXg7RsURUFh9UxY41+B7dlIWAZqrKQI\njWglnouck0M6UYdpo2sxbXQtHlqyEYWVR6FuyhoMEhazJ4Bjq+hoTQjPByfvXqaRbqoxbO/JImbq\nouc44MtbLpzSjounUNnadExanNWzpL1Aq1z6Ha45CKj1rCpAmmON11mzeD57oSeNhL/wJCo8uEhY\nOlzVLbG2gwsYTt5xRp7phBH4neaJn+XtDXBi8suYeLlVIJ9B+v+COgjVGoKXjwsPguyB4z9z/rsy\nNBW9/QRWo+/9Kap0DPUsVl2TKM/VCHw/I/8iyeP1tT348f8+g9gR6+EO1ACuIQiZQ6vugqIAxS3j\nAU9HvujCLChQzaLoC6ym+kEIdWsXii7i4HF5FzZT4RvoKxeEGsxnAZjQ0rS6okKvQhcQ0DywHQ8O\n8ZUCs3lb/K54eCJrboNVxWrfPRWVcbqIzjssVBUbgDbiDeSKp5XNzYFAZHm/B1EXrxk2mexblx9V\n9hmnZbIbgi61yt8JRFx+mFOqSnncXkZCcmvL7U55rJlnm4dZtfzlf3TNSTh7zKmh35PYjdv8ncLU\ntYALN8z7kNb9a7JUU7jhAZoMN29C0NqX3eomW4BUxZi1ThSAKNBUBcmYgfqqONydI6ES2UrSAhYz\nAD+jHxAvYpng5Xr+ilgSs8bXi0XloglzUUmakXcLyDm5gJiPripCHpQjVhL394aqMdM8xf/A1QEo\naK5Jipp8txict7pUBTp7cxjMFuF2t+KktuMB+PKWDekKUXWRNMt/G7LkbYJtH4qvg9FOFzky2QkJ\nYJ6Mp9iIaVbwuXV05L0cYpVDVMSmpLJDBifvGHvuUnED8o8jD2r1FlbPFORbEaf7ajXbpQ575RoO\nAPWsKUYRhCWr3fjxuThnQfnikqvrmYYq7jl3mxdUujyrZ5Z3Q0U5eZeqNxJPRc7JY9naHiEA43bS\nZ3XZup7gHHDPgOwV83QoCi+1I1CT/SC5FFRCyZ9b3takpbAVapWv3iDVkrt8AcF6rTdSQZZRFvOI\nSZZ10XHhoCg8BnLuBPds8GeBjRhxg3fQo+SdSa6F0bwBm3qD7UoPFCLyfp9hREMKN867BjcvuEF8\nxt08Lvt3XP+5uOGoqwPH7U/y5pa1S8KTO9QSy7s05t2UpOpN9bHaQO05fzlzyzuMGPnLP6xjG/+e\nZIjlffHJ4zB+RFVACW5fQL9fETHN/kJ5b2ceFgAASw+St6oquPC4oDu+QrIkeQ9s/pKloQefMK5f\nPBvzpjQibUrk7Oplcyxn3E8f2YxPnDEJx83wBXZ4xj9A5+vKc6dhFksMvPCEsRjZQL8/5+QDSXma\npsLZOg7FdVP9awxJ2otp/vg4cTbX+Za36ypI6v51N1VVwiME67cPQFVUnNi+AIBfTiff0zE1I/zs\neAauugbQOefQG1g3L4mYDFWjiyJmeXuqLeLoHPa2MSDwUGxaRj/gmvNmubdJMQsgRKEd5cDIW/N/\nG0VlqGwMHfXU82GOXAU11ReYJ/n7AKDIiJfY1LXb0ZTG+NagZgPgaxYQ4ru9eYJWERkYqiEWdfXp\n8pBYaSIeHAMFL49ETIfCeoDzkM+ytUHyVpmSn+w14Za8Yuap7oHmwstUwjI1arm7vuVcTG1mx0jN\nhQIxawIt3Y8RqTbUWLWB8Sqai6ydp78VtmjpzxQDx1tSCaCzg4Z3+D1/bPNTeHzz037mPQn3KO5v\nROT9PkRDog6VVvnKmbvGLSWFpmRDYFtIXtY+QxXkHX5SlcfaWcy71FoZXdmBzx7xcXxp9hXB47ik\nNCPv0pp3gFs1VIq0FMJtHmJ5nzJnBK67ZFawZn4foCgKrr5oBmbWURWz0sQdAEiYMVHrbGkmkjFd\nkLKmBUkLAJKmFONn19DMFjgcfOlVmbLw6bOmoDImddjytLJEx3qplOeoiW1YMK1ZzB2AwPOTCLsG\naQ5ly5vfS/klHUbeimv6nhNO3jVJcbzreUhLY2itoZ6ATN5BIqajwkoHqiHkMVZYaehvLkL+9YXi\nM/k+xIzdh5scj8BQLKiJQSjxAXhKOXl7fY1I6Wm4Zl/g+NLKCQFXF2JKybgRVC5TWaxXImdu9QF+\nXH+4RLiiTscwsq4ON1xKQz1hiZIJk96zwWxRsjqZ2xw5VJgp8ZzIz5x8DYCfsEk8DXllANvct0SN\nNo9Db9hRrtYG0JJD/qyLOTviaRH+IPkkYqaGfNHBUM6fI0/3NQ9Kx2N0rGJzRFATr0KMe5mE5e0K\nsR5O/rmCFPPWHDom1QOxTdibJtFxSc/tfW89KMJYils+twcCEXm/j/GlC6ejrT6JY46gJQ+cvGWC\nGsvqoxtq4uUn2EfwOHRYpjbgW+Ya++2kQmq5p9VNLluAcLe5RuiPKszyPml2G7568UyMaPDJ6+On\nTUR7QwpjWqk1EbS8939ayOSRNbjsiPNxycTzsajjxLLtluRaNzUqQiMatygKNFULvDiC5E3nroy8\nSxwnSSNoeZeiPu6Tt0zEHBVSA5rQBUiAvP2xcs+HHDoI08h3XCI8LPwl3taQFDK6iZiBasn676j3\n3fiJmA5VUZHQ/WssXfAYugqST6Hw5mwU101FXaU/3ngIecvEWSi6mKgfDUVzoTdtgIci4iELkLRR\nCaIEO7uVhif880slmpoaIG9P9ZOkOJRA1QDvXS/FsaUua45OiW/e+A7RwS6szDNlMNlbqVaeyt8S\n2MghLXljShd79BqY5rzJa8jpta8k//TVANl5t3aXS6USxwCIhoZqdi8kmeOWDno9Zx45HjGTajP0\nZf2ySRKKQnq/AAAbb0lEQVQbCJxfHo+i29CbN7BrTIjx8Xtijl6OdblV9CBXFzkAckKbodN7Egif\nlNxzy6I/suaaYEXJgUJE3u9jTBtdi29/4ihhhXLXuKym9qULp+Nri2dhTMv+eyC55T0cefPt6YSO\nb19+JCrfpqv6iDHUHdZWzVyKIdZFzNQxqaM68PI5dnoLbrr8SBh6iOUdco79AV3VcXTLkaFjNAzV\n713NSJeTN19Y8aoCUzUQtyTVOmF5S33R6ZkCf6UMyfIOJW/frZowysm7UnqRlxJj6THyNXLrkkus\nAggo1vE6esfxfCEPRkS1FTGcMa8DC6Y14XPnTEW15Sccjm32a/m5cE9ausbSaxC1+P31cLvbxLMD\nhJO3vMAp2C7aY+MB0HI7opAyyxsAKqQmL9yKa2sIL1NEqbWmlv825PvUW/Sbhvglf/52N6RxkRyO\naU+34YNjTgsQZMqS480avEwaWsUuaPVbQBQvcDwAjKroQEqpFp2+xjbV4rR57SK0YW8eL/bV0n1C\nOnU48FBGQzUTKUr6IaW8QRu3jKqvQ8LSkc07yG0ZAQyy6xS96/05mDLC9x7yTPWkkfS9H9K+y7LP\n0jE4BkawBQ4kt7mha9QL4egY1ZzGly+cXrbodPUcTM1EOvHOQmtvFxF5RxDglrfspo5bOsa1VQ13\nyD5B3UPMW1jm8IZ/2YXg46dNxFXnHYGFU6hs4R7lW4eBoRnCZbuv53gnMHVNvMy5vCTPOOfZ2jUx\npg6lKIhJ8WruNi9VsCq1vNvSkmBMyAtVJtQwy1te1ISRe3wYy1tkrEtWolySyMvRbMcT18jjoYqi\nIBEz8IkzJqO5NonqmL+gbK3zn9EjxtJrr5LIPVGywCgNxUwf689XmEuYuJqwVfNFF0nTAnE1EVMP\nI++URN7cyjv32NE4//gxZfsSV99ziZFENos6ThL/z/UQZOudZCqRe/HkQGy/QiJfRVGwqOMEJGzf\nQ5OWyRsKiutoAqLesLnseAC4es7nME+7UCwARjdX4YLjx4pyPrenFflXj5PGT3uNDwfujeGWt73N\nn6ch0Bh52kxhVEsFXI+gp9dD88CxwXOwRe8lp4zHF844DpW5CQAANUkt85SRRJznHYQtJFzd98rx\nksGqbrg1a2jioWvgklMmYOro2jLvQ09uV6gH5kAhIu8IAsfPpAlJs8aHtxvdXxhTORJAmHVIMb1u\nCh1P24K9Om/M1DFjXB3SZhKWZgaSrvYWPEZ6INzme4IpWd5claqmgvc7py8MTmxFtwhLiqNazHug\nqzqumvFptPfRspVSWhhd6ddUf2NxeQWCjHgIecsItbyHiXnLXp20Qq1dXv8L+Ja37bpoSNDnkBBg\nzsRgDgYAVPMFDILiOVzJri3V4o+x1PKWyPu4GS1oqfWvIWUl8K3510IfkhY4ro6PnEItyUVzR9De\n6LYpuqrFJaW9tnrqrm9IBpvoANSDcvq8Dnz9yC/jxBHHBM4vo3YoqONAXE2QCQBMrh+Lr5TkfJSF\nPzxdlNQBCP09aNLzLSc+AsBXPngs4GqC+NIhxxMoYlw8h+Wy0yYKWWRSjAnLXHZpA0DlllNwztgz\n/HOxPIiKBPME9jYF8hIA6k2Z2O7f95baCmiuf2/5d+SLDlRFRYfH+ruzkreUZHmHClY5BkYIqWMF\nbj99RovJbfQjVy+rfvHnItwDc6AQ1XlHEDh9XgcWTmt+227qfcWF4z+ICdVjMatxeuj2SbXj8b2F\n3wyWK+0FNFXDNXM+H3AN7y0Sehx9hf6DYnkbuubXm7JabRHzZqRTKxGX/DIxpSz6CTVjYbkZAD1l\n7D0i5WeOj24O96x8ceZnsHFwS5m7tBRhZYmyNR6WkAYA7eZErCg8g+aUb/0J8nY8zG2cgXV9GxDL\njMTZx00qO77GCo77psvmYiBbFHM1qmoEsCV8DHweZ42vx8dOnRgcu6WjLl6LOSNH47lupn3u6Zg/\npQknzaZCL6+s7mJSpdQzInsqvrZ4Nrr6cuhVN4nvT5oWvvrJo0TYoCXVhIUtR+GxzU/R87s6IHHC\nCGUatrxYg/icf9APSohGVYKJi6RE2lh87vj3Jox8PdZ61elpQoKFX266bC5Wb+7D5JE1UFdUwovv\nYseXPwfHTm/GP5dyOWBK3o01CXz90jlYuWEX/vPe10DsGHXtl1xDtVGHk9tnYkzlKNz94t+xsZMu\ndqaMqsH0TbUY0ZjCX5esByGK8C6kzBTGj/AXXifPbsPmt6rQyYVYuDgMlzw2LZCiJRZZKTOJmDK8\n5U1cHUdPbcIf/klbpBbfnIPYnL/BNQbE9pgxvOt/uGf9QCCyvCMIKIpywIkboC7Go5pn79aqTUuZ\nrfuCpmQjUua+kT9A3c5pI1VWc/5uwNJV1roRqGSJYaUxb9EUAcGyt9KSLz6DpIS9Dc3AhOqxGF9V\n7sLlGFc9Bie3Hzfsdo6w8IdcBx6WkAYAU5Nz8NkjPo6zR/v19kdNpkQ+sb0auqrjkknn47w5c0Q+\nggzZbQ4A7Y1pTB3lx65HVUqysiXPkio66ZW7qrkVP76+zf/Q1f0sZQCmqQUy5mXhoLilo70xHcgb\nOG56G1rqgs9jXPYGuDpOnOV/XzpBVdZ4YlmYlRhIOvR8adLW+iTOOYY1B5LqpsPCHzWDM+H2NqCt\nOE/McXtjGiczmVut6Lv+wyz3uso4KhL02r2S52DyyBpceMJYv1lKyTV091PCHVXZjqnG8aLne1NN\nAl+4YDqV2iWqyI8wVB2WZiIVN/CJMybhyxdOR3tjGpMa/C6AHz91Mlrrk2KR1TdUCCRHUstbCx0P\nAMAxkIobuP6js3Hy7DYACohtwVN5A52g5X3JxPOFpxAID58cKESWd4QIIfjY5ItQcIvvaAGxr9B1\nFW5nG2yjgCvPOx8AUJP21a+AYDKWXH5klpK3SJ0t/56rZn66/MO9wHnjzsIDax7CxJpxZdsaE37o\nZTjytkwd0+qCNevnHDMacyc2BKoBhgOPaZdm1nPwpD7e31lGW30SmzuHUFc1/MtWvi7iagGXv6Vr\nAZd02Eu7OdmI5mQjtmd2oqmy3LshW84nzhyB8SP8fToa0wAUmEocBZIVVutpR7ULyeOEEYcChS7M\nJCI6YWYrTpzVhhNnt+Hz/1WEmuqDSozQZ/mCo+bi0aWNWPyhCaFzoBerwIVd08N4ssZVjcZLna/5\n1QESqtIWyFZekkUt81HNVP50guT+lhdn3PuSZImH3kAN1FgWtudn4C+QmvzMbZyJf215BgBNPj12\nuh8uaapJYHl3HGqKJr8ljSRivN+BY+L85o/h3rcegJryLWuAVtmMba3EkZMace+W5diapS4U4hgB\nsaKjW47ElNqJeK2b9q1/N2PeEXlHiBACUzOHVak70DCYhKuzdRzqE9R6a29M4ZxjRokOaTweXB+v\nDVjbZoj4DDCsmN07wokjjgnGbSXIRCFaNZYgbKyqqgTaq+4Opmbg5gXfGHZxAAC5l06iL9sPBD//\n6AcmoK0hxayr0rHTfyulxjtfuXB2YB/TUAPkHQ/pLqcoCq6Z83m81rUC0+unlm3XVA03zbsWf1n3\nMOY1B88/aSQlNiPXgEJsA5QEJRdNU8X9VhUVcT2GrJMrkTv1O7DpJIbCiqNRlQ4nlTEtlbjinOHl\nluOZDgwZW2FU9pZpP3AsnnQBptVNxuyQMFgypkvtR6llfvyMFpxz7KhABYvrlmfX88WS21/ni+WE\nYGTFCDQmGtCSKs+h+dAxo5B5eRJeGKKqZykjAVPKjxhd3Q4vlxbkXZr1P7atEvW91YK8DcUs03pI\nmynoqg7HcyLLO0KE9zOSMQOXfmBCwPpUFAVnLRgl/q6NV+PauVehxqoWtdMAy1QPwbvUKyGARR0n\n4G8bH0d7upwggXIvwb6gcpjOdhy3f+kUhDlPYqaO044qb4RSio9Nvggv7HwFExpaA5/HTC0QTx7u\npW1qJuY2zRz2/PWJWnxy6uKyz6tSFlrrkujcUgN97Aa/R3XJjeTiIh111Th/8Ww8+vwmHD2Fkpii\nKHBcD4CCUU27n6fhoCkGiqtnY+KY6mFzH3Z3jRPaq1D7Vgp96BYKZJapBcIbANA7SGPSFSG9Cnij\nkXFVo8u2AfQ6bzjqK6GeBUPXcMmcU/DCE4/T79aswH6WqQXaKB83pTyMJHdPTJrhXRJrrCp05roD\nuQ8HGhF5R4hwCIJn/u8OYaSol3Tt8t9T7z57nz36VBzVNCvUnQr4mfEHEqX913eHay6eiQeeWheY\n+yObZuHIpvIOfg3VCZw4bQyeHqB61/EDYHGdd/wY/PTPWRQ3TII3SC3x0kXYpJrxWLVrNU4ffRLG\n1ldibFu4FT1hxL6Ve/L5U/YxPUpTVZw//Rj8YvkGuF10XsMWmJUpujiZPKqmbBscE1+c/CWMqKsu\n38awu/CWoer4ztHXI+fky/bTVQWktxVesg+jnWNw6YfKEyPlBWKlVU7eAK3+6Mx1v6sJaxF5R4jw\nHkLpy4n/fRAMbyiKMixxA8O7+A8WJnZU42sds/e8I8MHpx8JrO2GSzyMZuWP+xMzxtZhbGsVVm30\nPQSliYeXT/kICm4xkMAYhpHDtOfdEy47bRLufvRNXHRSeV7D28XMhmn45lFfxdeefx1AeF+Bs44e\nicqkhWOOaA58fvnpk/Dy6i6MaWh6R9LE1bEqhFG/rqtQMrUoLF+A6qnhYYFKSU2wKhHufeClm2Hh\nkwOFiLwjRHg/4GCw9x5Qmhl/uCFhxHHxxPMO6Hc01iSwamOv+LvU8k4YiVBteY5vXDoHb27uxbi2\nfVNIbKlL4tpLyj0Pe4vGZD14NnxYuMTQNZEhLmPhEc1YWELo+xO6poqSjPgwuvNT6yahVZuADVuK\n+MAJ4eWtnLwjt3mECBH2Cj/43AK4XnnSz26SzQ869kfM+72OxupgedecCeHW4XAY3VKB0S37ZnUf\nKBxKizZFAbhBP5znPWkkcN2xl9Me4lY4ZY6pot6RltSBW2iUIiLvCBHeA6geJpt4yqgavPRmF2aN\nrwvdfjBxqLnND0U0VvtW9e1XH79XMfxDFeYwCmXvJj555iS8sbGPlX3tObSkKsqwxA0A46vH4gfH\n/ntkeUeIEGH/4NjpLRjVVPG26qbfbZjvASI60JgyqhqTOqpx9NSm9wRxA76lezBx9NRmHD2VWsn7\nK6fz3SRuICLvCBHe01AVBR1N+67xfiDwqbMmY/32gVDVtAhBGLqGr148fKnZ4YTT53Xg2eXbUfUu\nqDjuDfzQ0qEYXBoeB3Qpd/PNN+PDH/4wLrroIrz++uuBbc8++yzOP/98fPjDH8Z///d/H8hhRIgQ\n4RDC/ClN+MjJ4/e8Y4T3FM4/fgx+eOXCQBOZQwEXn0wz6WVltsMBB8zyfv7557Fx40bcc889WLt2\nLa6//nrcc889Yvt3vvMd3HnnnWhsbMTixYvxgQ98AGPHjj1Qw4kQIUKECBHKILvQDyccsCXQkiVL\ncPLJJwMAxowZg/7+fgwNDQEANm/ejMrKSjQ3N0NVVRx33HFYsmTJgRpKhAgRIkSI8J7CAbO8u7u7\nMWWK322lpqYGXV1dSKVS6OrqQk1NTWDb5s2bd3u+6uoE9P0cI6uvP7RigYcronl854jm8J0jmsP9\ng2ge3znejTl81xLWSjV59xa9vdn9NBKK+vo0uroG9+s534+I5vGdI5rDd45oDvcPonl859jfczjc\nQuCAuc0bGhrQ3d0t/u7s7ER9fX3otp07d6KhYe/EByJEiBAhQoT3Kw4YeS9YsACPPvooAGDFihVo\naGhAKkVrTdva2jA0NIQtW7bAcRw8/vjjWLBgwYEaSoQIESJEiPCewgFzm8+aNQtTpkzBRRddBEVR\ncOONN+L+++9HOp3GKaecgptuuglf+cpXAACnn346Ro0atYczRogQIUKECBEAQCHvNBj9LmF/x2Gi\n2M7+QTSP7xzRHL5zRHO4fxDN4zvHYR/zjhAhQoQIESIcGETkHSFChAgRIhxmiMg7QoQIESJEOMwQ\nkXeECBEiRIhwmCEi7wgRIkSIEOEww2GTbR4hQoQIESJEoIgs7wgRIkSIEOEwQ0TeESJEiBAhwmGG\niLwjRIgQIUKEwwwReUeIECFChAiHGSLyjhAhQoQIEQ4zROQdIUKECBEiHGY4YF3FDmXcfPPNeO21\n16AoCq6//nocccQRB3tIhzRWr16NK664Ah//+MexePFibN++Hddccw1c10V9fT3+4z/+A6Zp4sEH\nH8RvfvMbqKqKCy+8EBdccMHBHvohg1tuuQUvvfQSHMfBZz7zGUybNi2aw71ALpfDddddh56eHhQK\nBVxxxRWYOHFiNIf7iHw+jzPPPBNXXHEF5s+fH83jXmDp0qX4whe+gHHjxgEAxo8fj09+8pPv/hyS\n9xmWLl1KPv3pTxNCCFmzZg258MILD/KIDm1kMhmyePFi8o1vfIPcfffdhBBCrrvuOvJ///d/hBBC\nfvCDH5Df/va3JJPJkEWLFpGBgQGSy+XIGWecQXp7ew/m0A8ZLFmyhHzyk58khBCya9cuctxxx0Vz\nuJd46KGHyB133EEIIWTLli1k0aJF0Ry+A/zwhz8k5557LvnTn/4UzeNe4rnnniOf//znA58djDl8\n37nNlyxZgpNPPhkAMGbMGPT392NoaOggj+rQhWma+PnPf46Ghgbx2dKlS3HSSScBAE444QQsWbIE\nr732GqZNm4Z0Oo1YLIZZs2bh5ZdfPljDPqQwd+5c/PjHPwYAVFRUIJfLRXO4lzj99NPxqU99CgCw\nfft2NDY2RnO4j1i7di3WrFmD448/HkD0e94fOBhz+L4j7+7ublRXV4u/a2pq0NXVdRBHdGhD13XE\nYrHAZ7lcDqZpAgBqa2vR1dWF7u5u1NTUiH2iefWhaRoSiQQA4L777sOxxx4bzeE+4qKLLsLVV1+N\n66+/PprDfcT3v/99XHfddeLvaB73HmvWrMFnP/tZXHzxxXjmmWcOyhy+L2PeMkikDvuOMNz8RfNa\njn/84x+477778Mtf/hKLFi0Sn0dz+Pbxhz/8AatWrcJXv/rVwPxEc/j28Oc//xkzZszAiBEjQrdH\n87hnjBw5EldeeSVOO+00bN68GZdeeilc1xXb3605fN+Rd0NDA7q7u8XfnZ2dqK+vP4gjOvyQSCSQ\nz+cRi8Wwc+dONDQ0hM7rjBkzDuIoDy089dRT+NnPfoZf/OIXSKfT0RzuJZYvX47a2lo0Nzdj0qRJ\ncF0XyWQymsO9xBNPPIHNmzfjiSeewI4dO2CaZvQs7iUaGxtx+umnAwDa29tRV1eHZcuWvetz+L5z\nmy9YsACPPvooAGDFihVoaGhAKpU6yKM6vHD00UeLOfzb3/6GY445BtOnT8eyZcswMDCATCaDl19+\nGXPmzDnIIz00MDg4iFtuuQW33347qqqqAERzuLd48cUX8ctf/hIADX1ls9loDvcBP/rRj/CnP/0J\n9957Ly644AJcccUV0TzuJR588EHceeedAICuri709PTg3HPPfdfn8H3ZVezWW2/Fiy++CEVRcOON\nN2LixIkHe0iHLJYvX47vf//72Lp1K3RdR2NjI2699VZcd911KBQKaGlpwXe/+10YhoFHHnkEd955\nJxRFweLFi3H22Wcf7OEfErjnnntw2223YdSoUeKz733ve/jGN74RzeHbRD6fx9e//nVs374d+Xwe\nV155JaZOnYprr702msN9xG233YbW1lYsXLgwmse9wNDQEK6++moMDAzAtm1ceeWVmDRp0rs+h+9L\n8o4QIUKECBEOZ7zv3OYRIkSIECHC4Y6IvCNEiBAhQoTDDBF5R4gQIUKECIcZIvKOECFChAgRDjNE\n5B0hQoQIESIcZnjfibREiHC44ZZbbsGyZctQKBSwcuVKzJw5EwBw3nnn4UMf+tDbOscdd9yB8ePH\nCz3rMHz0ox/Fr3/9a2iatj+GHcDOnTuxbt06zJ8/f7+fO0KE9yOiUrEIEQ4TbNmyBR/5yEfw5JNP\nHuyh7DUefPBBrF27Fl/60pcO9lAiRHhPILK8I0Q4jHHbbbdhy5Yt2LZtG6699lrk83nceuutME0T\n+XweN954I6ZMmYLrrrsOs2fPxvz58/Fv//ZvWLhwIV5//XVkMhncfvvtaGxsxIQJE7BixQr89Kc/\nRV9fH3bs2IGNGzfiqKOOwg033IBCoYBrr70WW7duRVNTEzRNw4IFCwI9ijOZDL7yla9gYGAAjuPg\nhBNOwJlnnokf/ehHIISgqqoKl1xyCb797W9j48aNyGQyOPPMM3H55Zfj/vvvx9///ncoioKdO3di\n9OjRuPnmm2EYxkGc4QgRDk1EMe8IEQ5zbNmyBXfddRemTp2Kvr4+3HTTTbjrrrtw6aWX4vbbby/b\nf+3atTj33HPx29/+FpMmTcLDDz9cts/KlSvxk5/8BPfddx/uv/9+9Pf348EHH4TjOPjjH/+Ib37z\nm3jmmWfKjnv22WfhOA5+97vf4Q9/+AMSiQRaW1txzjnn4Oyzz8Zll12Gu+66Cw0NDbj77rvxxz/+\nEQ899BDeeOMNAMCyZctw66234r777sO2bdsOSy9DhAjvBiLLO0KEwxzTp0+HoigAgLq6Otxyyy0o\nFAoYHBxEZWVl2f7V1dUYN24cAKClpQV9fX1l+8yePRuapkHTNFRXV6O/vx+rVq3CkUceCQCor6/H\n7Nmzy46bNWsWfvKTn+ALX/gCjjvuOFxwwQVQ1aCNsHTpUuzYsQMvvPACAKBYLGLTpk3ieN4+debM\nmVi7dq3okxwhQgQfEXlHiHCYQ3YrX3PNNfjWt76F+fPn4/HHHxfNPGSUJqSFpb2E7eN5XoCIS0kZ\noL2M//KXv+CVV17BP//5T5x33nl44IEHAvuYponPfe5zOPXUUwOf33///fA8b7fjihAhAkXkNo8Q\n4T2E7u5ujBs3Dq7r4pFHHkGxWNxv5x49ejReeeUVAEBPTw9eeun/t3eHOAoDYRTHHyGYJlwAMAjg\nAFROSC0STCWCIJCYBhwOwxEqegIkuqLBbRN0LQaBxkBZsdkaDJutmeb/05PJ517eZCbz9bYmSRLF\ncazhcKggCOQ4jm63m2q1mh6Ph6SfVv97VJ/nuXa7XdH+z+ez7ve7Xq+X0jTVYDAobX6gSmjeQIUs\nFgvNZjO1Wi3N53MFQaAoikrZezqdKo5j+b6vTqcj13XfGnq329V6vVYYhqrX6zLGqN1uy3VdrVYr\nNRoNLZdLZVkm3/f1fD7leV7xVWq/39dms9HlclGv15MxppTZgarhqRiAj1yvV6VpqvF4rDzPNZlM\ntN1ui3fn/3U4HHQ6nbTf70vZD6gymjeAjzSbTR2Px+J/4tFoVFpwA/gbmjcAAJbhwhoAAJYhvAEA\nsAzhDQCAZQhvAAAsQ3gDAGAZwhsAAMt8AxJ5C+54P8QOAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe8AAAFnCAYAAACPasF4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsvXe8XVWZ///e5dTba3pCQiAJCSWE\nIJGmoSSgjsg4gmCb4Tf+dCwURUdEQXGs41gYFQvDiIyIiKIIJIAgEBJCgJBKertpt59z76m7fv9Y\nu55zboiQBCL783rllXt2WXvttfden6et55Fs27aJECFChAgRIhw1kF/vDkSIECFChAgR/jZE5B0h\nQoQIESIcZYjIO0KECBEiRDjKEJF3hAgRIkSIcJQhIu8IESJEiBDhKENE3hEiRIgQIcJRhoi8I7yp\nMW3aND796U9Xbf/iF7/ItGnTQsfdcMMNoWOWL1/OBz/4QQB2797NCSec4O3btWsXH/vYx1iwYAEL\nFizgkksu4bHHHgPgpptuYuHChSxcuJCZM2fy9re/3fudy+VC19A0jfvvv/9vvq/Vq1dz1VVXHdSx\nDzzwAF/72tde9bVcvNbz3wi46667+P73v/96dyNChFeE+np3IEKE1xsbN24kl8tRX18PCBJas2ZN\n1XErVqxg/fr1IZIeCZ/97Gd597vfzW233QbAqlWr+PCHP8zDDz/MV77yFe+4+fPn8+1vf5vTTjut\nZjvr16/n/vvv55JLLvmb7umkk07i9ttvP6hjly5dyvnnn/+qr+XitZ7/RsAHPvCB17sLESIcFCLN\nO8KbHm95y1t49NFHvd9LlizhxBNPrDruuuuu4+tf//pBtblp0yZOPvlk7/fJJ5/M4sWLGT169EH3\nq6+vj09+8pO89NJLXHHFFYCwAPz0pz9lwYIFmKbJypUrufTSS1m4cCEXX3wxS5cuBYRV4IILLgDg\n1ltv5atf/Sqf+MQnOO+883jve99LT0+Pd53ly5czffr0qmu98MIL/OM//iMXXHAB73vf++jq6gKg\nu7ubD3/4w1x88cWcf/75fO9736vZ18p7ueqqq1i4cCHz58/njjvu8PatXbuWSy+9lAULFvCBD3zA\nu85I26dNm8b+/fu9893fy5cv5/LLL+fqq6/mM5/5DAD33nsvF110ERdeeCFXXnkle/bsAcC2bb7x\njW8wf/58FixYwC9+8QtvrL74xS8CsH///pD15MknnwTAMAy++MUvsmDBAi644AI++clPVllMIkQ4\n3IjIO8KbHhdddBF//vOfvd8PPvggCxcurHmcbdssWrToFds855xz+PSnP82dd97J1q1bARg1ahSS\nJB10v9rb27nuuus45ZRT+PWvf+1tt22bxYsXoygKX/7yl7nqqqtYtGgRH/3oR7nppptqtrVo0SJu\nuOEGHnvsMdra2rjvvvsA2Lp1Kx0dHYwbNy50rVwux8c//nGuu+46Hn30UT70oQ9x9dVXA/C///u/\nzJ07l4ceeogHHniArq4uLMuq2VcXP/nJTxg/fjyLFi3il7/8Jd/97nfZt28fIISiq6++msWLF3P+\n+edzyy23HHD7gbB+/Xouv/xyvvvd79Lf389Xv/pV7rjjDh555BEmTpzIj3/8YwD+9Kc/sXr1ahYv\nXsx9993HXXfdxerVq0Ntff7zn2f69OksXryYn/3sZ3zuc59jcHCQJUuWsHv3bhYtWsQjjzzC1KlT\nWbly5Sv2LUKEQ4mIvCO86XH66aezefNm+vv7KRaLrFy5knnz5tU89oYbbuA///M/KZfLB2zzO9/5\nDldeeSUPPPAA73znO5k/fz533333Ienv2972Nu/v+++/n4suugiAOXPmeNppJU477TTGjRuHJEnM\nmDHDI85ly5bVvNcXXniBUaNGceaZZwLwzne+k127drF3717a2tpYsmQJzz//PPF4nP/6r/+is7Pz\ngH2+8cYb+dKXvgTAhAkT6OjoYPfu3Wzfvp3BwUHOPfdcQJitb7311hG3vxKSyaR3P21tbbzwwgue\nteO0007zxuepp55iwYIFxGIx6uvreeihh0LWlkKhwPLly/nIRz4CwKRJk5gzZw5PPvkkra2tbN26\nlUcffZRiscg111zD2Wef/Yp9ixDhUCLyeUd400NRFC688EIefvhhWltbOeuss1DV2p/GzJkzmTt3\nLnfccQezZ88esc1EIsFVV13FVVddxdDQEIsWLeLrX/8648ePf80TfXNzs/f3Aw88wJ133kk+n8ey\nLEYqVdDQ0OD9rSgKpmkC8Mwzz3gEFcTQ0BBdXV0hC0Q8HmdgYICPfOQjWJbFV77yFXp6erjyyiv5\n1Kc+dcA+r1mzxtO2ZVmmt7cXy7IYHBwM9U1VVVRVHXH7K6Gpqcn72zRNfvjDH/L4449jmib5fJ7J\nkycDMDg4SGNjo3dsOp0OtTM8PIxt21x++eXetkKhwBlnnMFJJ53EjTfeyK9+9Ss+//nPM3/+fG66\n6aZQexEiHG5E5B0hAnDxxRfzve99j5aWlpo+2yCuvfZaLr30UsaPH19z/8DAAC+//LKntTY2NvK+\n972Pp59+mk2bNh0yLa27u5sbb7yRe++9lxkzZrBjxw4WLFhw0OcbhsGaNWtqCiGdnZ1MmTKF3//+\n9zXP/ehHP8pHP/pRtm/fzr/+678yZ86cA17r+uuv58Mf/jDvf//7kSTJG4OWlhYymQyWZSHLMrqu\n093dPeL28ePHI8uyJ3xks9kRr/nQQw/x+OOPc9ddd9Ha2spvf/tbHnjgAe+6g4OD3rF9fX0kk0nv\nd1tbG4qicN9991FXV1fVtrs6IJPJcMMNN3D77bdz7bXXHnAMIkQ4lIjM5hEiALNnz6anp4fNmzdz\n+umnH/DYzs5OrrzyyhHNuKVSiU9/+tM8/fTT3radO3eyatWqEaPKR4KqquRyuZoa9cDAAOl0milT\npmAYBvfccw8A+Xz+oNpevXo106ZNIx6PV13r5JNPpre3l1WrVgHQ1dXF9ddfj23bfPnLX+aZZ54B\nYOLEibS3tyNJ0gH72t/fz6xZs5AkiT/84Q8Ui0UKhQLHHHMMo0eP5pFHHgHgd7/7HV/+8pdH3A7Q\n0dHBhg0bALjvvvuQ5drTWH9/P+PGjaO1tZXBwUEefvhhb2zmz5/Pgw8+iKZpFAoFrrjiCjZt2hQa\n93PPPZff/OY3ABSLRb7whS+wb98+7rvvPn70ox8BwgoyZcqUgxrvCBEOJSLyjhABkCSJCy64gLe+\n9a0jkkEQ//Iv/4Ku6zX3jR07lp/85CdeVPiFF17Itddeyxe+8IVQBPrBYM6cOfT09HD22Wd72qaL\n6dOnc84557BgwQIuu+wy5s+fzymnnOKtPX8lLF26NOTvDl4rFovxwx/+kFtuuYWLLrqIT3ziEyxc\nuBBJkrj88sv53ve+50W4z549m3nz5h2wr1dffTWf+MQneNe73kWhUOCyyy7jS1/6El1dXfzgBz/g\ntttu48ILL+TPf/4zN998M5Ik1dwOwvJx88038+53v5tUKuUt8avEO9/5TjKZDBdccAGf+cxnuOaa\na9i/fz/f/OY3ufjiiznrrLO48MILec973sN73/teTj311ND5N998MytWrGDhwoW85z3vYcKECYwZ\nM4bzzjuPdevWceGFF3LRRRexZcsW/vmf//mgxjxChEMFKarnHSFChAgRIhxdiDTvCBEiRIgQ4ShD\nRN4RIkSIECHCUYaIvCNEiBAhQoSjDBF5R4gQIUKECEcZIvKOECFChAgRjjIcNUlaenuHD2l7LS1p\nBgcLh7TNNyOicXztiMbwtSMaw0ODaBxfOw71GHZ0NNTc/qbVvFVVeb278HeBaBxfO6IxfO2IxvDQ\nIBrH144jNYZvWvKOECFChAgRjlZE5B0hQoQIESIcZYjIO0KECBEiRDjKEJF3hAgRIkSIcJQhIu8I\nESJEiBDhKENE3hEiRIgQIcJRhoi8I0SIECFChKMMEXlHiBAhQoQIRxkOK3lv2rSJ888/n7vuuqtq\n39KlS3nve9/LZZddxo9+9KPD2Y0IESJEiBDh7wqHjbwLhQK33HIL8+bNq7n/a1/7Grfeeit33303\nzzzzDFu2bDlcXYkQIUKECBH+rnDYyDsej/Pzn/+czs7Oqn1dXV00NTUxZswYZFnm3HPPZdmyZYer\nKxEivGmhGxZL1+6jWDZe76542NuXZ822/te7G0cNXtjYy879wyxduw/Lsl/v7rxq9GWKrN8x8Hp3\nA4D9AwVWbekDoKyZPPdyN7Y98tjmSzovbOw54DFHGoetMImqqqhq7eZ7e3tpbW31fre2ttLV1XXA\n9lpa0oc8Z+xICd8j/G2IxvG143CN4d2PbOTXizdw3twc11x+6mG5xt+Kf/nm4wDc/+13oSiHTn/4\ne3wP9/Tm+NEf1ni/48k4F8075rBe83CNo/vcf3XzQpobEoflGn9rX+79+jv4+d0vsmzNPmRV4aK3\nTq55/I9/8SzPv9zNdVecytvnTHjF9o/Eu3jUVBU71JVuOjoaDnmlsjcjonF87TicY7hhu9BwN+wY\neMM9p737syTjh2YK+nt9D7dWaKobt/dz2tS2w3a9IzGOXXsz6K3pw3qNg0V3zzArN/YAsGnnAKcd\n117zuA3Oc3hh/X5mTWw+YJuHegzfUFXFOjs76evr8353d3fXNK9HiBDhtcE180lIr3NPqqEZ1uvd\nhTc8SroZ+m2aR/+YvZFcOJZtY5jiG1EPYAVqrheWgsHh8hHp18HgdSHv8ePHk8vl2L17N4Zh8MQT\nT3DmmWe+Hl2JEOHvGq6LTnrjcTdGRN6viHIFeRtHsc/bRb6kv95d8GBaticQqcrIH0mLY+bP5N44\n5H3YzOZr167lW9/6Fnv27EFVVRYvXsz8+fMZP348F1xwATfffDOf+cxnALj44ouZPLm2ryFChAiv\nHW9E8tYj8n5FaHp4jEzz6CfvQun11byDQWeWZeP+UuSRddn6VAyAzAE072x5iKZE4yHp48HgsJH3\nrFmz+NWvfjXi/rlz53LPPfccrstHiPCGwf6BAo3pOOmk+Nx6MkXSCdWbEGqhe6BAQzpGOukf0z1Y\noLk+QSJWHbiZzZUxLZvWxmRou+Wazd+A7H0kzOYDQyUUWaKp/rUHSFm2TVd3jgmj6pEliZ7BAk11\nCRLx8PMoayZ9QyXGtde9pusVSjq7e3OhbYPDJbJ5jaa6uLetN1MkGVdoSMcrm6BYNtiyJ8u49rqq\ndwOEANWXLTKmrbqvA0Ml4jGF/mzJu+dK2LZNV0+Ose11ntnZtm329OUZ116H5IxTXeBdz5cM9vbl\n6WxJeedYts2W3VniMZljRjfSkynSmI6FYiJ2dQ8zpq2OmFqbZGudUwslzbdmmJZV9bdl2WzenSGV\nUEknVOJxxfuOhgo6fZkidakYqYR/naV7V/B/G+7lIye8n4s7zjng9Q8VjpqAtQgRjkaUNZMbfvYs\njXVxvv+pswD499uWIQG3//v8mufohsXNd6xg9nHtfPQfZgLQny1x48+X8455k7jk7ClV51z7388A\n8D8VbbpKhvw6cLdpmWzL7mBq85SawsOR0Lw/++OlQPW4vBosfm4X9z6xlcvPO45Tj2vn33/6LBOm\nZ7A7N3LdnH+jOdEEwDf/70V2dg/z7Y/No7059aqvd/MdK+jLlkLbNuzKcO2tS0L3c8Mf7sYup/nF\nxy6vauOexzezZNdKmuoVvnvlZVX7n1i5h3v+spmvXHU64zvqve2mZXljB3DlBcdz3pzxVeev2TbA\n9+9dxZknjuaqd5wAwOMv7uH/Ht3E+88/jtOmdfLvP32W9iZfcFi5uZdfLd7I208dxwcvnAbAqi19\n3HqfiKq//v2z+c7dK5k+sZnPXSFWSGzcNci3fr2SudM7+fgls6r6kc2V+ffbljF1XBM3fHBOjdH0\nEdT8g0vvXFJ/YVMvP7l/rbddSuZomLUSuXkaVqaTz922jHHtddzy/73FO+avu5cAsKJ7JRefeGTI\nO0qPGiHCYYRmiAlhKK8BYDj+tQMZP4tlg7Juhvxre/pymJb9N/vcfBPhkWfvezf/ie+v/CkrulfW\n3K8bZs3tbyT8cevDfGHJLWimxsrNIsh21ZY+9vYXIFair/FZ+kuDdA3v8c7Z2S0ijQcOMrhJt2qb\nkSuJuxZ2De0hPmkDieNfrLm/qzdH4riXKI15ofY1MkVsoKsnrOFXmrbXjrAuf9veLADPrNnvbXtx\nUy8AK17uYSivIaWHyE+7H7mpx2lLRG4/8aI/Zv2Be3VzAGzYlfHb3LsRKV5gxYaemv3oHiwCsGVP\ntuZ+0zIxLfG+BX3uZoC8NSe+oC9bDJ0rN/Wjy3kxxoo4d09fPnRM2RDPOqkcuSVwEXlHiHAYURlf\ndDDapghSssnGtzGsiUm1NyMmt6DPc1XvWnoLB0524pL366F5P71HJF7al+/2tlkBf+PR4PN+ZOcT\nDGnD9BbD45zJlVEa/WVcRaOaaJUDBEC52DW8m2v+egNL9jxb+wDJIjZlNXJzd2iz+1yf6FpywPZ7\nC/6qHsuuHm83IK43EyasSvIeyVRdy6Li3rdhWpiWTWzsVtHGpA0j9jN4vf4KoaW/OMCSwu+Jz3hu\nxPMrz6nEf734E7723HeBcLR7Tisi1WWIH/8Cw8ZwVV8AJFXz/pbragsHZVMck1CqXReHCxF5R4hw\nGFG5tEc/iKU+Zd1Ead9LpvU5/mfdrwF/cnU1hf35Hn625k7+47n/8jQGoCoDl6d315hkNw5sYU3f\n+oO+l1eLxri/TjVI2G/0pWLuhAygW+EI6d5MESnuE0ZBD5MfgHIQEtOjO/8KwIPbH625X2ndj9q+\nl8TxYeuFu7xpW3YnALZR7QEtaQZFxSfvWn0cibzzAQKT6rIMpNbXJH+5xj2qTuCXYdqifcVpq0Yf\na12vp6Ivq3qFCVtOjEzQwf6XtDD5dg3vYcfQLnoKfWim7l9L0fn+y98mOfNZlOZetiUfreoLgBQr\nB/7WqIWyeeSj0CPyjvC64I2UZvBwwqwgU10/OPKWG4RWtze3D/AnJ3epUG9RTMq6pYcmm0rh4EBW\n8x++9DNuW/2/r9ifkfBiz2q+8dz3KRrVpBCc6IPEFyTv16p5W7bFrSt/7hFg9X4bpW0vcsv+mvtf\nCbuGdnt/l4zw5NybKSLFAuRtVCeRMi2bIW2Y32/+MzktX7U/eI2JDeOq+g4gN4nnbNvhB1jWTWzb\nJlN2TMuKUUWufZkScr2vKeZr9NGNZu/LhImx4JiWpbosyZnL2Bd/gd3De6vOryWfuEuuTMuirJtI\nqmjLNqsDNF3BsxAwZe9zTNKphAgEfMkhb9saWRjakFvtPefKe1m+z3cZ5PScZzavJGJNyZLT86G+\nACEhDTV8zu83/5lfrL0LzXnHa1lgDhci8n6TwPUvWrZdMYGaNY97pW2vBat61/LJJz7PtuyOmvst\n2+Kl3rUU9dIBr23VEAAOVV9/uf433LT0m6/6fLcfQfI2TCtEriMJMJpmIsUFIbYmW4Cg2VycP1jy\n/YHByaaSED2zeeC3bdvkdJ9MKv3ouiGIwbKtmtqWi9vX3sXu3F5W9673zgHxXILm/JAGG+hfppzh\nzvX30F3oxbQsBofL2LZd9QxFX6rHqrfYz4bBzdy/9aGq4yzbxjBMYsesIzaxtrm2ss18SQ+ZTHcN\n++RdOSn3ZkpIcX/cCjUEGNO0+c7z/81fup7imb3Lvevphskftz7MZ578Mn0lIaTJkh+xXiwb7O4f\nRG7sQ2l0yLscDnzTdJMhLYdhi/5KEhR1v49lzWT7/iGkhE/Y+RoCxLDdR+yYtfQMD4W254o6iZlL\nSc70a04MlAZDxzyzdzm7zZeRm3tInb6IPbl92LbtRZAXk7tZMfCMr3lXQB2zleuXfImCXgwJoK5F\nJp2IYds2e/Ou8CXh2pJ0w2JgqESuqDNYyrIz/gyJ414C/JgDwzK4e+PveaFntX9fWt5/xnJ1v7Kl\noWqzeby25m1ZNn/peoqVgfaPJHlH0eZvAnQPFvjCT5/l4jMm8fLOQbbvG+J//n0+Dy7bwX1PbuPm\nf57LxFEN/PWlPdy5aCPXv382MyYJ0vjNXzbzyIouvvWxeXS8hsjZIO7fIibbv3Y9w5SmY6r2L937\nHHdv/D2N+jF0r5zOj649J7QsA2BTV4Zv/t+LfPySWcydLrLzPfp8F3c/tpkbPjgHPdVNS6KZ0XWv\nLnPfc/tFAJBhGajy3/aZvLxzkO/cvZIPLZzGceP9VIr5khEycRumTUyt1iZKuomUEGSwfVeZNW39\nXhCNaxbvCfgyQ5p3gBwf2LaY4eQQ0IYkSZR1k49/90nOOGEU557lB9Zc96On+MAFM5h/6niG8hrX\n3LqEc04eiz3xRV7u38R/nHUjsQOMgaZb/P//+SRnnTiGf3nHDD73k6VkpN0kRCBxyKToE7PFnwZv\nB6A50cSG5Z1s2JUhEVcoayafuewUZk5u5cWe1WzdrPDw091V0dvd+XDw0n/d8xLb9g3xb+85ke/+\n5iWuuGgikmKCbGFaJorsE+SqLX384Herue59JzNrShvb9w1xyy+fB+CbH5tHZ3OKVV27vON/8dBq\nxqnT/WsnXkJp9f3QtUzSmfKgR3j3P72de34Dn79iNt/69UpSpz8ROvalbfvZ2TlMc32cz922DOnY\n5SSm+wKQVKHxLVu3n98/v5LkTH9btpynLp6mpBlc/+Ol5EsGiVk+mQxr1Zp3X/ol1NR+8oqBbvhR\n0kPlAnJdmNAHy2F/76833AdAfLLQqH+y5EFSPafQ0SKeUXncc7yUAzlZ+x5iEzZj2LBpcAu5cnXf\nDNPi9kWrKDrmckm2QBZC4pduX06PE6TWcEwXeJ+5ze0PvsykUQ3stzZXxRIM6znyJdFfqYZQ8Z3f\nPUeb7FtB5MZ+5PQw2BJIdugeilr1+bWEuMOFSPN+E2CjE7X50LM72b5PfJCWbXPfk9sAPzr0waXC\nf7Z07T7v3EdWiIIxm7p8Te+1wtXmRlp77EbuZhFmuv6hamn2ryvFMfc+4ZeSve+vIjBm+cbd/PdL\nv+CW5f/5mvsa1BoPFktWi34/tGxnyOddKOkhzbsye5aLkmYguf492eLhZ3d6y1hcTb7HMZvHlXhI\nU3DJ0bAMFu34CwPNKwDxvIediPdn13fTlfMjfVEML3p2l6O1PLVqN893v0TeKJAp1Q7ScbEvI/Yv\nWbMPy7YZGCqHTI1lI0jezrNP+pO1bulelHDZuc+la/ezYWAzt6+9i8cHBUm8vCus+e2vIO91OwYp\nlk1+/dgG1NHbWbRGmFslSZivg1i0XBDzn5buAMS6eq/d/gLL973Attxmb5tml0NLBOzOTeJ/UwgE\ntczmPaVe729TEtaRxc+NUIBJ0dm0O0NXTw7dsFCawgFykmqA5L87f1ixKqQVA2RLeedehCY7arRN\nLOW/vzm9uo+u8UFp2093xo84HypWa+lBzTtoNZJi4t4yWYOd3TnvGVYimbb454unM2/mKN4+2yfI\nn6/9FXvG/L7KvVHWTZZt3hHaJsU0hgs6PYNFL9+BlvYtJA2NYoy27xtClqvzIeS0PANDzvtYg7yF\nZu5bshLTxfcjmXHn+v67nCtWZ4orRWbzCIcSlZGiUrzAQCHrmbfcCdUNsKn000LtwJRXCzenkSzV\nfv08U63j56t15ZST8CS47MM1t9nqa5N+g6biVxOIogdyJQfHslAyQj5vbQTyHtZySJK7QNsMCS8e\neRd8Yqg1BtlymKzcyF8Xe3P+RCkpJprmulWcyzb4wlqmXE3ewTEKEpebgSpE3gEBSDMsUDXkQKT2\nYDHnBWC5SMQVT4iT04JUKpPT7A1EsQ+X/D70JlcTm7iR4lh/nfJAKSx8uglz+lIr+dnqX4aEqoHC\nEHe+fA92zH+PJMW3mtiYge2Oz7aW5q0NBI4TRDE40lI/xaA3U/RiG1yhIISA1hcb5QsBVkEEBA47\nhNubKSI39DM0cTGmFCDvcjUhm5Lfn64B35ozXK6+n6DmXWt5m6aJ96w0AnnbisbZJ43lX981k7NO\nGlO1X2kJC2NlKUd8imOSdueCWJk9TuKaGcc0ITf1hPz6l54nhILebBHd9L8LN2ZgWM95Yyyp1fcg\nqToZR8gNCktSsQXbkpDrhjyXVqYQHs/GeEOkeUc4tIhVJNxPnvIUNy3/ukfq7oTvEnStmsG1siu9\nWnjBOCO8fqZDDF6QTsW1C3qRVervUNp3UyxXTxSGUjs46GAR/AC1V6F5uzm7K8k7XzIOSvMeChCv\npBj0Z/0J1jQtbNv2JlLN1MgXq33Kg+UwWelGONZhMKhNK4bXF89H3uATT7YGebtL2AAKZoA43Ykx\noKGEzeYWyROXED/Gj3LPFKsrMCVisqfpuYFKlQUt9hd88t7W7U/87uQaxEDRHw/TMulveB65foBy\n0xZW9a1DM/yJfqhUYwJWDIYdTasQ8C3bloRqJ8g774zhPV+bQS2gPTvrg0dapy8pOn2ZkhfbYBuB\n4C6XuFS/j5Ll77cKIrmKaxbvzRRDhKZaooJXvkLz3jS4FSvhH7c34z/zXMDEbmbbkGyZTDDOooal\nwRVkhgvV38yY5Hh0S/e+p2DSFu/8eFhrjU3Y4AluFB33k6qxs1tsax6TJTFNuLdkW4yHkhTj25sp\nUTQDz0lLOPeV9yPTlWrNWVI133IQ0MylvTPBjCHFyyROehqAgYL/DZzVeiGtyRaKRumIBeNG5P1m\nQ0CajKsVmrdyZDXvkczmXiCRM2lVLrfaNbybItmQ9haEJudqbg9ix9CuUDRxELkAMb0as7k7gcdU\nudpsHiDQkTSUIT3Qf8UMBVaZlk1eL2AENJ+hgJbktl+pLZtWOFguUwoICLJBWQ8njwlOpBkt7PuE\nsHBQNH1hyU0sEgzyqQxYq4zyHa6hESZiCrudSHsMYbKsDCQKmnF39PmWCKxqrTWoea8f2EivuoHE\nCc+BLO47GImdr0HekmJ4VoVcYLy1DXNRiFN0iNHVzmPHrmJDYZV/vqPlDeVqvE+2BIpBT7ZAr5sg\nJBCZHdNF/Elw3EzE3+ZQC9aQKBE6rPmad5D8k5YgviB59xT6+MHKn3r3D9CdC0SmOwKKOdSKtuUU\nVCsVGsOagVmOcDGU10LzjLZsY2voAAAgAElEQVRtFi0J0Qc3ULJWamC5gryDEfbGUKM3Bjv2i/dR\nSfnHj5VEPIIuFVBkib5MMWzCdsZzqJwj4zyDWj5vggKSs9/oHYdeSHrjLzlj1p0V42V0T6TdmEZK\nTWLaZkjjP5yIyPvvEAOlQTQzaEoNkETg5Yx55C32u2ZzzSpXSemHMsmHa3IdSZu3bLe/Yn/l8ifX\nZOx+XJU+tqLtE9NI0dI/X/Mrfvly7dz6w4Go3FdjNvfIW5FCVox8yXCehY0yagd7ctVLbwAKhk/e\nUkVErGnZVcQ8FCC/2uQttO6gmX446ANWzCrNO6g51zKbBzX3vOFf39e8S9imgm0qlMxqn3cQtZaa\nxWOyPz6KDtgVS+L00Du6dzAgyFnV01qwv7XKoxYD1oOc5vfX1WpRDE+wcTXvuvxUrFwrshX3rDWu\nEKS2+W4J28bT8mwIERtA0m5Ckm36snl6B4uOUO2/N0nTIe9gwJfzHWtbT8Z2hJu8JvrQmy2FiClh\nC7N60KKU06sF3IGCP0Yll7wHRoMZQzHTDGnDXpayWm4C1zIwVNA9rdUcGIXZN576mMidviWzXRxb\n49sXAl8wsMA/xsoJ8pcSBU/zjiXEOOp7jmVS8ngAslqWtqYkvZli2IK2XUT2DRQDgmhgjLRts5x7\nEGOcSqj+flMV30dIKLTodSL0bSNGb6ZIWhWBevkaY3M4EEWbH4XY1T3ML/68nk9eeiKdLeGi9nm9\nwJeWfoPx9WP5wunX8H+PbOIvL/oaZnACiFVq3g5Db2m+l889bdO89VLv2B/9YS3nzxnPFRccf1B9\nfOCZ7by8c5Dr3z/b+1DveXwz2ZxGLqWBAiDxg3tXsdkpSPCpfzyJyWMasQhr3pWlI7tdf6/zcXUP\nFkKBa3nTn4SeXtPFI8v3ceOHTvMi1otGkUw5S4MzET350h4ef3EPLQ0Jpk9soXNyteb98PKdrHi5\nhxs/dNorWiFcYUOpMJs//uJu9vUXkFI54pM2cHfXBo4ZfQ0dHdNC5xfMvC9WKz7hqopMMbGXb6y4\nN3R8vkLzfu7lbh7ZvAncVNWySV+2xH/+5iVQyySmr6Bo+WSlxk1PAHIzuAXJ+4k1W2nMdHHh3An8\n/qmtDA6VGTfL13SDxPenZ3aI8+MlbC2JpOrsGxxC003iMSUsSOL4CZ3+j++oZ3dvDqV1L08Ul1G2\nXQ1JRBkPFzRu+eXzDAyVuPyiseLWJBnLtugeGgScSHTJH3NbjyPFNJ7esI1ZiX5mTWmrGVQUFCCW\nb9hLYgZItoK2+VSSJz8VIkPN0kgACScVpmzF0S2DsqlVuUJsSwJLDWt5FRpfPpNAaRVBcXv6ZEa1\npukPHJM0WxlmK/Gpqyi91Iytpfz2jJj4B+zoHeCWX66gN1MiMcrC/WpiOLWoy4M8u24/v35sM23j\nstDqdlJEUvfkMlzzvb/S2ZRkz2CWeDNgim9GNtPY2GTKQ7SlWnjmZT8S37+voNbqru0W53em2wG4\nc/09nNwxq3a8i2yKNtzgMMdaUVpzJraewDZU1NE76Fk3FmjwrmFmOmlKNEFJBLt2NI9l3fYBVm7d\nBzEorT4Lu1SHjEJ/UVhrmuvj5J0xHNf/DrYMlGHKWi/4rqkuTlmvuIf++fR0OMl0VIP+vAZpMUZ9\nmRId44UrIK8XSODniT9ciDTvoxD//fs17O7Nc/+S7VX7smUhDe52tJYgcYP/UUE1eXuk5Ex++/rD\n2vdjL4i2Hti6iJuWfjNkuq3EH57ezoZdmdBktvi5Lp5d3+1V7zEsg1Vb+ymUDTI5jXXbhfZkOaTq\nkXeFGd9dJuVOYNv2DbFuh29CzZm+VnnnY2vZ11/w2ga8NchlS5DDLxdtpKsnx+qt/fz2iS0hs7mr\nzdz7xFZ27B9mYFhM/I/tepIvLf1GTbO6YYj+xlQ51Hd3PIPEuLXGWveiJTRZ24gJE51kkUooJOMK\nQx3LveMkXZBVIUBGmmFy2x/XMRQ0dct+pLrascf3IzqIxy3vOXmacUwTpk5bwlSKvLBR+JT/vHQn\nz6zd7yWPUWWVklXh/5QNpJiOrSWxTRXd0ti0W5hcS3p4vNpTbRhogM2oVnE/8amrKdhhbV+KldnX\nX2D7viGyeY2N+4VmO8FJbtJfEM9/zrQOkulAycfhZmxLwlIK/OB3Ivgp6Au1ikIjdCO1g+M1Sj8Z\nu5wS5tsa5OvmsVY1oRWu69/gCUFuxrPyunnYhhr2V1eQt2vilhQD07LpaEqScuSQs8fOo8mY6B3b\n0C76Kak6tiWDrXiad1e+i+37hsgVdU8rbUk0M8Y6EXO4ha58F89u3k6uqLN70DeB1xtCECrbBbbu\nzrJsXbfXx6ljBOm675rrLtm63/fne/0P3CMO8br7zh53BhMbxmNjM1QerhKgZEMoIak5j3uBeZKq\nYVsSdrEejDj67uORZBu5LksqoVK2xHc0a2In582ayrFNk9kwuJlp08XzH8yL91wkh5EYk5jAgN6L\nlMgzujXtPceGRJpPvWc2tiV5yk1bU9IXnB3yTlvtTFBO8PqWLfrfaaGkM7vzJE5sn0FnXTtHAhF5\nH4UYcgJC6pPVfiP7gCUvCJnN3UxIru9VqTRlSbVNzot2Pk5faaAqorkWatbudYSDsiHuo9NZu+ul\nAK2INh9Z8xb34i6BclG2AxOxc7/BW3Ozk2mmVm1Wl02yAZPycCk8ybhc/IctDzJQGqyZdcowLZAN\nCsmuqr7Hj3+exPTnvd+1AuLKtiBDu5T2+pSIKSiyhGwEAn1KQrovB8hINyykZA65MbBGOKC9h9Jo\n6qItJRYgb9MCbKRYmeZEI7aeQIqXqtJn7sntJ6HEmdQwAc0uhd6Vjsni2tZwC5gKyCb5ongPKrN8\n1cXS4n1QjFCZSxcJSbwbUkwjmw8kRTHFxDyrbTqqpNDPDuIxiX+7ZBbHT/K1HqtUJywA8ZJnBXH9\ntbGhiRyfOA2A/ny1sCNZCiCBWaE5O/uTagJFlogPC3J9dt/zvrAqmyT0Nuxio/C31iB/M9sqtErX\nv+1sb29OolllpjQdw+XT30NCrqO8+RQAzjhFVC5D1T2N2y6lMTPtKI0DyM3i25Ad8rzm1I+RUJKY\nvULI2Ws6Firn2zH6RzPJnOe0GXgXnb5ceubxpBMqaOJdcZMDlZx3Ttt6EqUX345VrAuR97hRTh4B\nh/iS8RjHNYtqeHkjXxWVPaV9lD+8DQNCaFUMMGOeddF2+iCpGsm44rXxrxefQjKhcsGkc0UDDb1M\n7KzHkp3+OO/8xLiwcClt+xjVmvb6m5QTzD6uQ5i9nW1j2tJV1gNVkZkxfpTTB92L1bDNGLppM731\nOD520j8TV0Yu9XsoEZH3UQg3pWFDuka6wVcIsAp+YO5yD9eXqCgyECAb+cDZygyrOjBDMzVRLEEO\ntx3KmuWQd8kh77FO3WOXICqXihkBn7duGV6gkrf8JlS9yaZsB5f4iD4G/es9gexfwSUvUqJA6rRH\nWbTjL962XLmCvKtyh9fI8mZaxI9byZ66p9haeDm0T2nuC/2u5VPXKGDbYDlZtSTFEOStSEiaX3fZ\ndLRGzfKfeX+5j8TMZUiq4S83CvrNAwFKdllMikrM94drugmqjiTbNMQbsEsppHiJTD6Q7U6y6Cn2\nMrZuNC1JQSZBa4LWuB3bkjB6JmBbigjGGhSknXeimG1TYczwOYK8Ee9lMmVWvXMtsrOkKFYmGxDS\nipYg77H1Yzix/QSM2BAtHWUkSfKIxb1HW0tCrOwJGG4msobisXTUif5nQwF8jvbs+DhtUw2Rr/ve\nJeQE8ZiCVaynM93OjuwuQd6ShSTbWIZ/vhCgrND5Vq4Fu9jgkYvSIN7rlqYYNjZJ1dHsFckjLtcq\nIyl6IChNwth/DBAonOFcI6UmUWQJa0jYyPP0e+MNYPaOp0FtAFsKPUM1bnrnx2Iylkve5QyWbVGS\nRTu2HgdkQdJObAKAGjP8sUMQn/usl+xZ7uVkd3FSu59tJnHcS6RmPYuk6khmjNaGROBaQEwnHlMo\nODEPKUX0rTMlNN5sOYuqytiyLtwWtqC5MbFjkWwFdew28vWbkRQD25KIqWIcG2PNSIkCclOvqG8e\n8Hm7z8H1a8tNfdhjnRUThloVVHskEJH3UYxaUeFBM26tJQth8hbHFsoOwcmSZ+6CEaIxAyjVIJ4l\ne5fzu81/Iu6UKHQTHlQm+we8oLr6VIzm+rgXqeznwq4OWOst9PmEqRiA7ZVelOoyJE56GjsogDj3\nIAX81K7mDWHylOurE9G4ZnMXlZp0Lf+pYfpJNoaMAye3qWV216WiiLB2J+eA5m1LgQxteYe8bf8e\n9pZ3ICkmetfxGN2TgLDmHXz+linGRFZNz/pSMjSSJz0FQL1aj1VOIUli+ZUXSZ7MY9kWY+pGe+lb\ng9HphprHLtWBkRBaqwQ9Q4Jsi6azpGr/MSQK47wJXU6UeEL/XxLTnwtZB1plx7edyntCK0DRsa40\nJxqZ2ijiMFIteeceAuRddDRvyRcw3ICidCzFqAZB3iUr8Jwd8rZ0550xYk6kcfC9EwlyknGFsm7S\nnmwjbxTIlYre+YYhuwMi/ne/rQpSsDVBCLGJGyFWoqlRXNclJUWWQBcEVrByoh+q7hEj+OlTvWVy\nAdO+KsvYWgoZGSvmm91BmHx1A5JyGjk9jNK2l8TMZ5AdQSKlJokpMmbJ1byz/HHrwxjNwuftWg2E\ni8dGaXfW5scCPnkH7rNetm8Fd738W4KYN2Yu7516iX8/ySFQdFRJCEiiLdcXrpGIyRSMIkkl4WXO\na3LqqWfKQyKHhaO5e5kiTJVYYTSSbLFOe1ospzNVYoo4/8KxFyFJoI7ewdi2tDf/ueMcU2XqnMC7\n2Litfl/N2EEVHDrUiMj7KEN/IUPylCeQW/bXXCccJCOj1gtVg7xdYrVtO+QTr6V5u/5qqC7WAHjL\nJJTGAZANr22fvG1PA3LN5om4Qkdziv6hEoZp+ZHyjoYeLIPZEyBeSRZtuZp3/NhVyE7mLjcgxtMw\nAm0EyTtoqQhOhi4KevgeNcMK+fprJWUIErxsB9s8sLAFIg7AUHJYpTS25ZyrGCTiCrIsYztadHnT\nqZ42plt+H12t08o3CpO1c76LEHk7yT0kxcS0bAzTIqsNeoFCti152rmUKLK/wmffnGyixZkw3XSu\nSBaWpHt+WDdCtzebc8bL0byNGLph0eFoS7HxIpuZXJ8N9bfDnoptg9wUWAoGDNvid0uiBVsT10qk\nxHlFowSWjLZtltBuXXOrI2C4qTjr4knGNAvhQx29E6VjlzceAGXNyXtQSiPJlne+uz+pCGIp6xYt\nSeH3HigP+uTtnO8SnEsGleZYs38Mck6YY+Vkgfp6cZ6vecvYegJsWJ9ZizJqJ5IEiuW7GWwthW0H\nnoOsE1fiKLLiLAGVSMuNSIkCx09o8oPLzBjDeY3ZTWcgqQbxY1cj1w1jJ7NOH5IidqMk+rJjaCeP\n7XrSfxCGK4CIMY5PWRsao+A35Uac10JKTXJK58zQNkm2ScpJLzmPa2lQO/YwNGoJBb1ISvXT5SbV\nBEklSaacJaZIQrM2VRJxcb5hWpT2jQ9dwzZjnvvw2JaJIj4hVqalIeG9h+51Fdm3HoTa0OO159rD\njIi8jzI8vnOZSBRw3Es10xAGyeBHf1hbtT84eWu2+LusmTy8fKfIchUMOlGq28+X/PaD5kkXwQpS\nUsqv4OOlHJRsQbrgVeJJxAR52zbc9sd1dGcdE6ZD8oWSwc/+tI7t+4ZYvlVIvHaAmAbcDGSBicI1\nobkfYFk3uOuRDdz57OPszPjpX3/1WKAkZg0ff7DYA8AdD7/MMxt2+PsDWt7zG3q4/+ltXoY1gBUv\nB7JG1bBkDBULWJbN/zz0Mt/9zUrW7u0CyRZBOs49SorQvFVZwpZ1UnIdVqbTm1SCfmQ3iMc2Yz75\nB4Uwl0D2noDpaeZim6abFAMa6JT6qb5Glyiw28ls5b5DxbzM48sdM6yreTt+U9fE6fZxy0AXS9fu\n8zVcI0ZXT47lS2JYxXqkej/gUJIgZTfz2TmfADOOlWsRVhGnbbmhnyFpPzNaj6cp0YBWEtey4jl+\nt/lPDJYzKHoDZt94QAqR992PbWbzfiG8NSTqGN/a4l03Ptl5FxzXgusxsYvChy4lnVgKR4BKqkkS\nMZmybnoWiKyWDWhsYdLxfMoBUhjVmgYkFIe8pXiJtMNHSVX0W5Yc06/zPONOXeykEqg1YMvCwuCQ\ntyXrYc0dyA6oSDGd3WN+i5zyg62GCjpnj52HbVbTQVJJEFcV8jlQibNreE9ovyuY6Hum+hslv5JY\ncL16LeKb0Xo8lxx7MZIk0ZRoqNqfjqX9zHqm6iXsKSf3M1jOkI6F6y00JxrZm9/PQMOLYi4zVS+W\n4p7Ht1AeaGFy9p3e8dZwi5dpsi6pYhtxJFVHVWTf8uhp3lLoHqxcE+WNp4KexDBtNnVl+PkD6w95\nIaeREJH3UQYvwtGSR9C8fXJdvbWvan9wOYcZINpHV3RhmFY4KrZm1R0/GKyW5h3URCXVr9Lk19AN\nrNX1yFtm1mThk3txU6+fwcohnadW7eXZ9d3c8svneXHnDsDRLBHLSTzLgeFrIp3pDq8PAC9s7OXJ\nrmdZXlgUShm5bmcgM1egb66/2fXLu9jTm+dXj/vJN4JLjH58/1r+9MyOUCYwSxJ/nzdnfDga10Hf\ncJ6+TJElq/exbscgK7YJ4cQq1vtai2z4ZnPZICY596m7+Zb9PnqWF1MNkH+15l3YO9Zr31svr1ue\nyVnfNY2J6SkhzXt3r/PsHRLq2qvRtdsJ7nK1UvcenWdh9ApNRx23lb+8sMeL5G9K1lPWTdZvz2L2\nj64al3pzFJObJqEbFtZwC5IEckoIdW5ynrdPOFvcS05MY7uNjSLeApDxScMn7yKPPt/FcKmIbcP0\n8e00pfzJ2DYVLjhtgvfeubGK9YrQqpV0nqb6uDdeKVVohZpmehaIgVLGF5Zcn7kTeCincsSOXYXa\nIVZttNbVMXl0g9NH8b6NHS15edBd8vVQIfx1NjaGfttaCjlRIjZlFSYaKZf8HfK2HdO7jS0sHDbU\nxVNccf5xjG2vJ2ZWL29SZMVZlSJR6vOjqMubT0HvOs57zu3pZibEnWWkqk5RFXPP5I4OLxVqXQ3N\n+x+mLOSCSW8T/ayxfGx0U6OnOYPvv3aRVivJ2zGdpzYiyRa2odJQkRBmzqQp3t9mpsPLQJlMqCTk\nJHJcFwKPa4Gq4bcHMHomYGU7aUzHMEyLp1ftZdm6/fRnj0x+84i8jzJ4ZGHEvIQQQYQCoCo0ye99\n8kzGjvJfZMM2mDymkUmjGiiUDQzDqliPWi0cZEv+MqNaPu9g6khJMTzy9uoDy7XIW+GMmaOZM80h\nXOe6biajYPUeOT0szLmOyTc4oU0d3eH9PaqCvLsHi6GsX36DZu2/HXPgLv1lnt+/MnxOILCndi7j\ngHnc6d+0Cc186rJpVUdqZjkkhA3oTgnIYr0n8UuqCNBRFAkUHQXXzxj0AYoJzrUE2IbqTToN9YHP\nXNHF0idLEe3bEpYsyLikGb7mbsQo66YnxMjJgle8xBUWTC0WIka3L+75AP/90X9gUuMElPpBhu0e\n9sbEWH78nbO5/v2zAbDyTVXjIluOS8AwPdLxVg441291TNXZrF1V79pSfWuEbz1wBQwDhRjzZo5B\nkiTU3aeKZUKKiT12LRPGiOuVyqKm9LX/cBYA8+c1M3/2uEAwWIJETMEGGmMueQ/6AW+u5u1o7kpr\nN2rbPuQ6IYTc8E9v83IPWI5PefrxKe+7cjVvL6eMHo7GnzVugvf3tz8+jwmdghzV9n0YUtkjb1fz\ndp+Vi3Qsxa1Xn8Ox45qIqTIzx4r2bEOl05jBW8ecDvhLSs0+EX9QrzZgDY5G6fdzPnz742+lNS2+\nydiY7QzYuzm+ZSpffN+5/MvFM4BqzbshVs/ExrAZ28qFBZJxLS2hnPZSxZyUrqHNB2FrqVBFwpnH\ntPD2U8czPjlZXG+oDdW5P1mSOG5MBzYW31/zQz/4L0DeDTFfwDH7x5KMKzTVJzBMS9R4l6Ct6dBU\nX3wlROR9lMElC3dyrUTIh1rxosdUORSJbdg6MVUmnVTRdKeggHJgzXsoQN7lGpp3KFtWgLx9zdvv\nk0fejmTtfaTudR3ydgPzpGQOuW4IK9vmE1egv2ogAdKYOmfpiUMm/dlSyKzuJVEIEHZQcLED2ZTu\nWH936B6DwVmVZnWlfTdKu798zG1TUYTJOwjbktCscMrUYVMEuNmlOo+0pFiZZFxBloVA4+ZxxlKw\nLRkppnnpJstWwIXg3INhB56pE8ErGEEiIdWhSeKZarqFZrmFMWLifdATyCjIyYDP2yFRvaSCGcM2\nFc9c6xKrazaPqTJtyRaQID9KVGhS+o9lcvNEjxRcK0pojB0TsW5YHmkpDYNI8aInILgTaX8mXPEL\nwFQC5K255F1EqsuKwCzbJ8J0cRJG9zEAPLN/GT3SJm98VUVmVLodWZJZ2buGPnmrFxOQiiW9dzet\nOMVB9Kz/jjvjbzlL+jwyQBBZc6LJS0lslcWzzpQy3mqKSh+xvP2tnNJxovd7Zsdx3t8xVWFWy6zQ\n8UmPvMU4G/smc0LsLG99eqXw3ZRwnoNkc4w1jytnvFcc5xatGWrj7JaFvHvM+wHoqCCphrgjPIze\nSVxOcOnUd4b2B8n7golv4wunX0MlyhtOp7R2nve7M91BIjYyTbkCigvTDs95drE+RN5u8NtFoy6l\n+OLbwYyhBoJZ61RxDz3FXuRkwUmyI85RFZn6eB0fPuFyJmbeAbZMQzqGqsjohk1vtkRrQ7KqENTh\nQkTebzDolsEL3atGXPJVMv3JtbbPO1A4voJ8Y6rirSEGQLZEBKVTYSlb0MKm3Rqad1+gwENNzTto\nNlcM8k4ke9Eh76CJ17CdJTfOB5WIK0h1Wc8n7loO3OVZSpvwVZt943yTslJtogY/o5N7P2U9LJgk\nqHP6GPQHB9ZDl/2JqXISDd5DZWrP+JS1xKesCbTpkLcsoVNhTrNUdFsLZR3TLFdzjnvFFKR4mURM\n8QOeLH+JkK3HQfXJW7c1Z3mM4pnNTcIJQkLEJTWiSQWQRIpUL3LdUB3BSyItNSKlh4jPWYSUzHkC\nUbkozKlWoQEplUNp24OccM+Pe/fdFHdIIZHHzHQwpnwasiT7BXMMv7b4jBZhnVBNJ5LesDxBQB29\nk+QpTwrLhy15/s7eTMl7Z+aOOhWAMaXT/HE2Y9iGitLc65fRDFilEjEFK+dr/xa+5qwqMnElzsJj\nzmNYy/F88REUZy11OuYHU6WoR5UUitKQJxDalkIqoYAR9zK9uVAlX5sDMHSFhBJnsJxlq5NCdErT\nJIKQyg28a8qF3u+JjX5lrrgqc+boed56cPDJ0hUQsFSmpWbzDqeNyY1+8hcARXIEViksCfnr6yVa\n9anETfE8O5rDxOmSN8C5o89hQsPY0H41UBP+H45d6AsLQVgqdqGJy6dcwTWzP8bcUbOrqskFMXfU\n7NDv9x1/ifftg1jn71aQA0g6wlZSjXvvnRog2/p4WJMXFj4xfm5g2+mjT0XVxfuSTsaIKRKGKQJn\nK8fkcCIi7zcYFu94nP9Z93/cv/Xhmvu9IDFbqql5B3OaV5KvLNuUrTC5xlWZdDIG2BhNO/xoVaiK\nNpcb+1jc/cfqvgQQIrMKn7c6ertXHxfAQiz18j5OtRSuUSyHydsNGDKHW3yTskNoqiJj4pN3a7KV\nhJIIpYN1NSZzqIVOyzFhBwQc19xp5ZrQd87wtlea+4Jthgs01Fiap7h542VPsDGHm/nQ8R/ANhVM\nWw/lHNesssiFbckhzTsekz1BwF0/DIARR1I16p01/yaaFyTkBqwFyRtVJyb5ZFkvi0lIbhhk7eAa\nj7xtM0beqaLVoDoR5bKN2tnlkVCx4JiF841IEsSPXYM6QQRTeZYRSQpN0ma2jQ4nKU88oFG17lnI\nl97yWT4y/QOUN84hVRJJRXTDCsUyiPHQUOwEsiRjWlaoZOqxzZP477d/i3GcGDonKIyBsxzPQTyu\nYA2OQt80h45Um3+Q5QsYFx9zPh+Y8T5/V66JZEz1a0obNu2pdqz4MI0Nzn2ZKumEeBZuJjcXHzxB\ntOUSgmHatCRb6C8OsGlwK82JJi8ILvhadaTamdQwgYXHnIcs++MXU2WSCVUkxnFwaufJ4hoBzTKm\nysyfcDafnfMJPjDjn0J9mtYqgs7M3rApOzPsv++9mZIXhNpWURmsMemblMc3d3IgjFQO2MVJ7TM4\nrmUKkiQRj/vvu7Z9JlY5yWzlHXz5LZ9leutxofPG1o/m+jmf8n7bxbqQ5u0+r2CKYzVQddHVvF2Y\nw63e30GN2nUDphNqiPzbm4+MyRwi8j6ieOz5Lrp6wqkphwoaDyz1g5y2O8kLdgyJZSuPrOjinsc3\ne8uhvCQNstCUlq3bz12PbOS3T2xhqKCFfd4V5Js3CuGkIgHNW27qJT55HWpnIA96KEDGJhYo4wjV\nAWuWbYfK5EmKEYo2V8eJ7E5mthUz60ySkuV9nHvl1aH2RE1rvw61p7kYcZ/YHD92XVL1lr5JG9/G\njq4yacXPmAR45KdvO9ExHVdq3k7U9daTwIxTeukcZDPBkBZ+ZkENasPgZm596o/c+9ctxBM1stsF\nNG+3kIax5zjmjj0RLAXN0vnLCr82s47uCCaSuE9bglhZWCVc4UMPrO/V40iK5UUoIxu+VcJdR+ya\n6yUTSbZIyP6k26AKv3HsmHX8pe8BhmNOX4yY9+wanWPASTiiashWjGLJoqUhUdNnbet+bEWQvO1S\n2iPvYKnaJI2MruskEVOxsh1YTlSxHtC8XcjJAoolnv/9T2/HtGxithCwOlLtSJJUpa25goxVrMPM\ntnGccoa3TxwrYWY7mBYkA0vxtFZJkpg35jRa4+K9NbonElMV7zovbupF1uqRFJN0Y9k737VqeX57\nQNtyMjNahb/YNWnbNqT0jnwAACAASURBVExrOZaSWaZgFJnaPLlm8Q5FVvjc3E/xrikLKrZLwrxs\nJLC1BAoqJ7YLAVRRwiQPMLlpkhfU6eLE9hOIbTsXfdf00PZgVbvebJGCM1e11CdCxzUHyLsj3Uot\n/MeZX+Rrb72h5r4g4oHnlwz8bfZOoLzqbYxPTmZUXW0BIRiBbpdTNc3mSlCgCZJ3haBuF/0I+CDJ\nu0pJXSoW2t4RkfffH/b05vj1Y5u56X+eC23/5cMb+MNT2/ijk6fc9dmokkJfpshv/rKZxc+JZTa6\nqfumV4e871y0kcdf3MOi5bt4fkNPyOddGdzh1om2yk6QkWx6Pu9ahelD/uBE0VtD7aLSbL5++wAl\no+RPtorOcN5J0lLWQRITsbb5VH8Nsmx6H+cwvdiWRGnVOZhZ5+OXA+StaiKBhy37EbxOn+rTMXRL\nx9bjFLJJfvC71aTUdCi5hr+EJ4ahy1X36Js7nWIMRh2y1iisCZIFWMSnP4fS0uMtWQHYYDzDw8/u\n8pa+ubBtKeTzdjNC2UYMWZbEGnDZ4PHnffK2EMk3Jo6qByTQ48LnHTCbG5r/2bpaaSJtihSwiu6T\ntvMcOjqcNe+uoBPQLprjzc44OlYBxe+jm9K0PuaTr6Tqjt88TqFk0NaYrE3eAW3ZM5sjfPluLedY\nYFJWHRLz/LPOulndsJDNMEkAyGaSkmbw4DIh7F7UcQVXTv8nprUI7TFoKhX37rgjSmm0jXOZnvTN\n6u77Z9swJu2n6cSWQxM7wPunXIm2YwZm/1hiqkzKuc79T29n5y4np3Z6nTjdUkgnVc49ZayXZAXC\na59PmSpMvP9w5jGcMdrv07njzwx0vur2PbQ42cckSfJIpLT2TK4c93FPuw1mF4yrI5ugAS47ay7Y\nMmec4I/DhXP9wLjeTNEjrvGdgqynTxTvUFOAvNuStcm7OdHkrYmvhfEd4t0MCl+1zOYH8oMDTJFP\nQ983GZBFeteKtoKat+dWIOxDNzPtmAP+OAQ17wWnC5fD204ZGyJvNxvckUBUVewIoViuvfZv/4CY\nLF3Tn0veiqzSE8gnPVzQ6Q/UL0a2KGtmyHReLBuUpaDmHSbkYUeDtMtpSJRANompCnXJcO7lifHj\n2aVtCpO/G6S07xhGG7Pon/DnqoC1fFlDUkyxbjemISkG/UMlLMumZJSRZJvpbZP5+GfO56tPbKef\nHpAt74PSEZnF7HLaXx8qW77ZPKb564fLKSRkSAhLREdTim5LCwWaqXZSRKzLplgj6yWmUCmXbVER\nyCHsKWMbSU2qZ1sOT7CoS6pCY0oBqoYkWSL5DAifbkX0uhtjEJfjDL94BvETlgc0b9kb/1s+IqKX\nZTuGpYhc4u4MLSkGthHnuHHNfPby2Vz/2FKkZE5MHE77Wjkwm7sJJGI6LQ0xioqF5Wb0slQScoJU\nyuCmj8zllj8+AAjTopsfqjXRChUp6t1o9JyjeadUn4ileAlUDb2QwrJt0kmVn3ziHfz48TgbSi+i\nNGRoUBspBsgqpHlrqZqatxfxK0tIkh+kqBsWsRqEI5kJT7iYPKaBK+efSl+fbyFprwim0ndNI3Hc\nS+h7BbknAqbYoJY3OqTNSSGTKMC4pk7MHuGLjqkyHQHTsV1hGscU39aHFkyjdetuFu0Sgkaw1vak\n0Q386NpzhGUFeNv4M2lPtYX93QcoV/Ctj83zAh49Td2Ih4g0SE6V91OJd501hVMmt4a01ffNn8q7\nz5rMt3+9kr39ec+d0tqQ4NZrziYVF8cGY0Nqrek+GHz5I3PRdCtErkGzubftAH5wgOPUuazrEgpR\nkLxdn/dImncwT4W2KRA3QVggPPeUsZw+YxTppMpTq/wA1WT8yFFqpHkfIVg1UpUCVaYxw3KLhMih\nYhCFkkFf0c/JLSt+SktX8ivr1oE1b6fghuf/U0zH5+1XParPT+WczvnO/mCqVD/pQn3MCc6p8Hkv\nyQo/va0lhd9WFVWSBoZLXtnIpkQ9qiLTXu8kvohp3sdZtovexGa7NZklV/O2QdWwveAmmTq5Ednx\ng7c3J0WQn+l/1JYernYkMi4JE2nZ4V3h47dJxhUM1zfsCACphIrlraUuh0zwthFnsiYKIXjJLZzx\nPnPs6SiWWKftEroiS2TKQ0hIjKpvce6gVhIVE0yVeFymPhUThUEUC1s2sJVgoJjTDyeoTZcLtLW4\n5nKfHBpiDWS1Idqakshp8fyntvqaVGstDckQZnt3kp7deiqzW+eIcUgNI8m2l9WsLqkSjym0SZPR\nd8yEgQl8aMpVBNXFUGCSLdf0eYeIXJFFwiBElbRa0buSmfDM+lPGNFV9R5WBQ9bgaN7ffg22YyUI\nam5BIvdWKQT6EkRdYAKPq3LIxxn0N4MTsJZUkSSJ9nTAOmGE1x2nEiqyJCFJEv90/Lt5+4Szqu53\nJKiKHCLaWvcUJKr4K5C3JElV7cnOto7mJLphsddZMphOxqhLxjyiDa7jrmXyPxioilxlNamteR+Y\nvINCyiuZzYPHnth+AnVqmium/2NVm3WBQlCSJHn9DL67ifiRo9SIvI8QauUZr7Xd07wlhd6MT475\nUrXm7aIuJV4iTTdDRSrCmrfN5sw28Zdjcg6bzZ3lN6UpXuIKKWg293Ihq6QSKkk1GdK8dctgW2GD\nc7AFZswj/L5Myat85Urnk5pEQJJclyURU9AtQ0RKuyZ3JxmDu9YbVRdm4YD/s0FpEfV3FZ2OppQg\n74DmrZXcQLhAZivHZFly5CK1bT9Kx26x3MPSPHJXFYlEXMEoOwJATAv5usHGGhjDhORkp9604Res\nUBNiQgsUtFBkiaw2RH28zsvFbLqme1dIkiyRWMJUfFOuQ85lCuiKIF+9GCAMxyeXNftobnL8pwGz\nbGO8kbxeYG+xy8vHPGOUr9U1JeqIyeIeZdstpOFkbnPMo+lEgg/NfC9WOemZ122nIlk66QpbNnax\nAXXvKbSkwlHESSXBuPix6LunoioyTfV+JLoLNaAdKrLkFXoQmncN8jYSnvm2crKH2r7HukCyjrBZ\n1m+/MR7O8hXsl/gd9h8Hr2NraYrPBXzRAZ93Q9zXhG0zTN6viFfBg0HNVKkIbHu1cO91pxO3U1dJ\nskqck9pnVvnjXytqEXWlUHWg/alEtQl+pIC1hng93z7nZs4c+5aqNmu9ZxAm/1cSKg4lIvI+QhiB\nu6vglsNUZYW+rK9590gb+O2m+/0DA8TqlgYtaWaIUIOat9zcy7J9ItLbKjnLpGJlYorsmM2dNddy\niua0Y/JSqoO9MEVO6sZ4AwOlQTQnA1le9zOvGb0TsE0VJSau35sp8v/Yu/P4qMqzf/yfs81MJpls\nkAAJ+yabICgo4i5Qt69WWxUXcKlaRVu1daFUpbUPuFT9Wbva1trqQ12hllddeLpp1YLWlcUVtAjI\nkkD2zHaW3x9nmXMmM5mQZCYZ5vP+h8xkZnLmJMx1rvu+7uuOWevL7eA9uXqMeVyl+/CrDx7Gqzv+\nbZ6npJ7Y/knrzKBmNUZxFy/ZhVRCoB0DyvxQDc0zbG533rKDrjkkbZ6rcLsrWJTvRWNwM3a173Iy\nd0kU4VckaBFX8PZUrsdR3xSBz96yUo45vxO/5IMkCOb6Z8mcKxdFc7cjuwMUAGiqfYFiPq+4OLGk\nx+nnbL3fiNaGmGiNnESCieYeVrOaFr0egZB1fK7gXR4wA+nKj58xHx8NYGBxYs4x4JfNoXMAJdoQ\nCLGg01TEzmz9PslsEqO5A5V5UWF/gNt75IiC0GGeWBAEnFJ9DtQvx6KqPODMwbqzMzkp8/YOm4uY\nV3YhYlumIbLpaMS3j4PSNNK5uEgOIgBQXtJx7tGdOaWbUxUEAQtGXYzohzM7HFcyRRZR2mFnP8HZ\nIcuI+Z2LG89FQYoe+r3NfUHiHjbvjeAdjWnmErqkQCUIAr459RKcMvLkbv+MVFLOb2e4oHFfdLmH\nsv0phs2TL9DSCaYY4QAS9RoAg/dByT1s/t6n9dANsxfuftd2llu/bEJMtTJcw0BdY9jJAPeXJ/aA\n1ttLrAIq8zXNDy8De/WtqI/sT/xQV/C1h5dlQYbeWG0uMSpuhqJ4h819QgClwSLo4SDEUKPzGu7M\ne2d9G0q1oYjpcTyx/jWs/2C3s7etumeY9fqJIri6pjBiVqFdsbWOcnRlrbmOdsBufNGyHau2/MU8\nUCt428PmghL3NOZwF0KVyFaLVCWC0lJ7eU7iP09Ts13oZm1VKCUqsdvaBGCH+SErVdShrug980nW\n/2NZMiuW7QsdqWq7N/OW4tjXHIEMa3jWmuMHzEzTzLytD3YljrgRQVxXUe4aQraXfCnDzKYgpSF7\nIwvZmUqwq5TbjVZExCYYutnD2mn5GPfDiPuwH19ik/oP8xQ0JqqI7S077SmX2JbEOmDAXPs/sMgM\n3pE2H9o3HO08xh42d9bhC4lhUfu47OBk/32bc9YdPwztAJuuGtcdJCVR8BSs+WQRgwODoe0fAqO9\nFOquMdBVn7MbXjDFvvbuzMo5Blfm7Q48/qQ51YmV46C3mFXlyRciboospXyvV0y5GPH35gGaL2Xm\nndziMxv8nmJAd/DufnAZ6JqKSHXBlC2ZsuxMz3FPz9gXAuky784Up/g7AwBZTrwWg/dByJ15P7Rq\nA155dyfuXvmOp9HK8sfeRn2zOTcc0+PY1xxFZWkAJQEFQsRd9GP9J7IztiIFUtUObCsyd/s5acDp\nAOBtB2oF4UvGLwIMEXpbGUR/GIZoNfiQzbaZPsmHoF+GVl8LQdQhVe72PB+agoaWKN57y/xD/vN7\nr+PXaz7Axm3m4+zgamgydMEMmvubo4gLVvC2Mm9REJ0Mz3OeUvTrdg9ZuzPvErnEeZ8lQSvwuTJv\nLe7q2Caaeyy7s57wl0M9VePun2suvZGgt1ZgQukkSKFGSFWJZXSxz6bCMIC4nZl7Mm+/uYey9f7E\nYDPaNHsLy0TWO7bYWspTuQcQVZSUJC5A7A+BgUHz8a/sfx5hcb815SGgusIOgmaTFA0xRIw2xHeM\nxfTBiTXqdvAGzPXtRpv5evaccNAvozpoBqq2Fsks7LOCS0u7+Tu3i3wG+BJroO3gbVfXjq01f85h\nYwc6owLuoFhZGoAAYGhVx9854B16lCUhkXlrZuadnDFqmp5YrpNuODPpQzlV4RLQ8QPXPUfaWYGX\nnbFVliay/AGl5haVMsy/02CK4J343XXN6CHm//3Dxg3M8MhERul+T6LYu5k3kH4IORvsn+WTRQyz\nKtyryjpvhuK+6PLMSSuJkbVU3+/KcSSTPXPeuQverDbPESczKWmAPPQTfLjTu7ymTdgLsbzOmeON\najFEYqq5jlY30KaJEACE6meiWbaWFllV1MGA7GzacP74ryKyx9zooXaIjMtPnAXdMPBKXQPW7/3M\nyXy11lKIZXUIi/sQ9I81s1PV3NtWFAV8e958/PKjTzF1qoh3/55ocFLiC6IZifWPdr/o3U1WW0+7\nGMfOOiUVMVWHjihEeCtSJ9YOxsdNiZaR5vPNDz17pACAOd/tt+daXQ1GrOB95IwifN76mfVzXWug\nnUYuGgTr/BiajOKAbA25CmZBmL9jsxnJNSw4o2w2Pmr+wNmJKbLhWHO/agDtLQJQYgV915y3JEWc\nJVRicTNaVfPnuzPvb596PJa/vBX75E8hKDFUlgWxwzpGe8574UmH4uebXI1rrO5XQwYU47yTxiIU\n9OGdLwdgXcM/UOoL4ZRDvo5h1SFcEDYb5OwT/us89fCRI3HWCWbryTsunYn9zebWh9VNZlCwh8JP\nnjEUf39nBzTdQHFAdoYd506agj98bG7KcsnJhyEkDMCU0WbWfszUIRhUUYTRNWaf7GWXzkSFK6hV\nlRfh9kuPwODK1FXInjlvSUQsrsEwDKfaPDljVDU9MSef5kP1vmuPxusbduGZl825fk+Rmiu4JQc0\nd5CXU2Tw9187B63huJN1L7t0JprazIs+ewcrO4ja2Zq7u9hti7xVzJnMnjIYA8sCGF2ToiNZkvsW\nH43G1ljSnHfXC9Y6M6DU3BfdMHIbvAM+GcsunYnykB+KJODLfe2oTXMRaHNfdCkpRlnSFay53X/t\nHLz18V488Tdzu9p0GXqqi4NcYPDOEbswTSzdB6m0Ac2tewAkrh63la6Fuyg3psUQi5vLqEQB2Cuo\nKJaLIDQMhVi1y1xcJOowAAQUGYJsZn2TB0zA3z7ZZ3bv8oedtZhavVWQ5jOvnu250jZhPwRBgCDH\nYaiJhgMTBtdC/FhE3JpntTPvimAJmvfHzbXWmuQUpe1vbzH/muxqcSdwxs0GNIpVze5aQlJRFAK8\nsdvJrGPbJiIweb35GnIMorVlpN6ayFxDivke3t3/Nt7d/7Z5pyvzdobQ5Rj8483vG9EihII+TB0z\nEOs273aCfak+BOMGV2P9f8zzJEuCk50FjUrobaUQi5s9xwgATc0wg7ccd4oI/ZIPoiA4PbvF4ia0\nqOZzy1xz3n6fhOpQGfaFzfqD8jLRXLalJ4bNq0PeavD4DrOJSHFAdrLYE8dNw4mY5nmcX5FQWQpU\nxkc79w0vH4RqK3sqtiqFAWDW4MPx7KsfQ9s/BJNHVngyQ3e2NW5AotBt+qihngsxURBwyPBEtfWI\nwR23dxw5OH3w6ThsbjhD58mZt98nQdONRJerNMOZpUGf50PeHdB8nmHljnP0smQeQ6oP9oqQ31lf\nDQChoA+hoLeRjP1+3BcCK+bcDkkUUaIcWMFa8rntTFmJH2VJ8/2pmrR0hyyJqAz5sa85mnYIOVvc\nf0/2KE9n5DRLwVIWrKW4QAPM3/PIFH/Hydw1BRw2Pwg5w+ZWT+WInmKHKxd7yZdfkcwPJ1GDIvoQ\njWuQkpYYybIASUlsU1jXGIERC6BdS6x7tVtzhvzmB669WUMM7TAMwwrePkSsHbxkUUaFvxx14X0Q\ny/dCHmAPi7u2WlQVZ/mUvduYMyft7GGsojUchVhsZuYhV+FOyrWg9rB7WzmiH1vLk5QoxFAj9EgR\nEHd1B/OluPp27fhlX0BIFXsgKHFIrYOh7jSDn7OUyPp9+EQ/Lp9yEcT95m5DdsEaYFZdq9aOSoYu\neLL7BmsBgFi6z5mXtzd+QDwAI+aHEGzGjlZzyL0maSlSib1LkRxDaYld7Z0YNncXOk0UToTeYI6q\ndDXzce+6lG7tbUD2I/blKECXMLC8KG27R3exXbHcvXW86aQqWItZ65cVyRu8Az4JqmZkHDYHktY4\np8mQUs2P2wVv7u1dD4Q9kuD+PZX5Qx365OdCb2XeQOJiLpeZd3d4Mu8U1eBdybzNx2U+X+6Lg1R/\nS9nC4J0jTsGatYGCs/uTLWlLQ7tHud9ndmkSJA2KYG4Dam9q4ARvSXSGtQNyAPWNYQhqAO1qO1Td\nvD+shiEKIoKKGbTsIdKI0WbuYiQYgKZ4CuiqigagOdYC//h3nPvawq41yZriFGm1WNXmRofMW0Wj\n8hnE4hZUxMd4AkiqYOLeYcoZQg81QJDjHdbRFrn28lWsYUnPPLp1DGKRtca8bZIzn+tklFa2LMIq\nHrP+I8qS4BS6tEfi0PbVmIFb9cFd6mq0l0JvLYNUXg+p2mxp65f8zsWa3h6C6I9g475NKJKLMCxU\n63kPIcVV+e/XneO2P2R8kmvNtpgYdTiQzOeiCeeiSC7C5AET0j4mZm0vW1bs82Qi7vXSgiDg4gnn\n4uyxp3d7HW86SoqlYnbzEZ8ieoJOQJGg6ZmHzYH0WZV7Pa6U4jF2Zt3Y0vlFdjr2h36uM9RUpG4U\nZ6VjX8wV+/v+fXXGezHoyox9nS8VS9aF2J2x8U22MHhnQTiqoqXduyuY0yXMyvTiRlJ3LtVbgOEE\nb8Xa9UtUAV1GLK5Bttbl2vPjih28NRkCBNQ1heEXzMBoN2Zpj4dRJAcgSaK5VCfuh2EIaFWb8ceP\nVgEAtP2DPB9WA4OuTRosEVenOMOqKPcrIiL2Bh3OnLcVOBUVMdnMumvh3bIwOXifNOxYs2DKZjVZ\nEcvMPa6T23C650EXTVqAG2dcA3XXqMTx6e75bxHlYiLrtT+c7WAfMMzXtocYJUl0/qO3RVRA9SH+\n3ymIb0/sya3IImCIiG01h6ztna38kh/2SgB7HXZYi2B8+egOGzKU+q3aASXmbCBiaHLK5TElUuLi\npegAMp+ja2bivuN+2GlbSltxkeL5MEquDp9dMxNzhx/f5Z/dZUnLxlQtfebt90nQNHPY3C4sTCdd\n5uS+v7Pg3dDazeCdIvPuK+5h855edOVL5q2kec+pMu/OCtbc2/Wm09MLou5i8M6C6x96Ddc/9Jrn\nPrt6Fk7w9gZ3Q9CgR4LmGlZRcTbZ8CsSSopkCJKOPfti5iYMgt061GroIgnOhhRtERXhqIZia3/h\npqg51xpWwwhamar5QWj2zd7eth0fNXyKQfIIaPW1GDwgEVA9OyxZhg8yX3dAqd8pShs62Oc0QnEy\nb6dtpwpVtIbsFe/8kXsI8VuHXdlh/99xQ8xWlfb/PfcmAYD3P+DQkhqMLR8FGCnmvGF1RLOqdodV\nlzgZROyzqYhvH4ehhrk8ys4AZVFwisbs4VmtvhbaPnP4fNSQUqdHtxEtcvrFA4Bf9jkdLY32xEhA\nqsy3LJAI3s7fhC55AtL/G30Kjhx8OAJS9pbq2PPcQyqDnkrsQTnaaMHdrEiWBOiG4azEUBTJO2yu\nmHPebRHVHJXqJCBJXVjDW2o1jXFn92NqzIu5IQO6N8ztVyQU+eU++2B3S3Vx0l2DrL+T0mJfhkf2\nrXS/d/v3ka63eTK7F7zYyd9YV9eJ97b+ffmUp+xCG8MwnA+WRPA2/9XQMXhDC0DdNQbV46LYGfkC\nsLbLnDV5IF54C04wcipXreCtSCIgxWFEfE5L1XJ/KRoANESbMArmnLddLFUe8mPP/nZz/tgXRZEc\nwHdnX463yxow3bUcZXz5mMTxqTK+MuJknHTUZLz7aT3iqo6nt5hrzysrJWyPxc1kU1NwzNQhEMsF\n/CeyCZKswfBFYBhCh0zbfXt8xRgIgoAbzp0Gnyxi9/52zJo4CDe/9ifnfert3jluWRKwaOL52NL4\neYcLjbISH5paDRiGGfyrS8px+lEjURr0YfaUwSgOKLj27EPx8z9thLprDIQae7g8kXnbRU32nuTj\nhpbhzGNGYV9TBDPGV+FnqzbA3GFcgN40EKK1I5tn2Nx1wTFj0FQkq7CaqMiDvsCGfebPOe+YyZ6i\no1NGmu1qX3rjC+e+AaW9u2/wLRdMx0dfNGDK6AGIxTV8/YQxB1Qo1VPupZR2sLH/litCfs8oix3I\nW9oT+5ink/yhe+uF0xGNe7OpMTVluPTUCc4GGwBw8hFD4fdJmDHeu/NWV10wd5wzrN/XejN4H35I\nFRbOH4+jJg/utdfMhuRs+vuLDkdTa+Iz1/130dn5GTE4hEtPnYBDhqcfteqrCzQG7yxStcSmCppm\nz3mbHxya4A3eEPREYxIkdtzy+yRnq0l7DbOdeTt7RUsCdCEOQwtiZ521UUdxJT4PA/sjDeZuZLrq\nZN5V5QEzeFsXEjXFQ1CsFOG4ad4sa3jpUAwOVmN3+15EP5yFY4+ag1DQh+Om1WD9B4lK7WDQgICw\ntbm9gLOPHY09cRn/ec8cNheUKBD3IVDi/aAt9lQrm+996hgzCE8YYfX/1v3QxXarUKxjRe+RVYfj\nyCGHdzj35SV+8z+rIQCCgepQGfyKhLlHJPp6H35IFYJ+Ge1R1cmU7Yst93CsnXkfOnoAJo9MVH+7\nq5zj28cDhoihlRVQRNmpcTDCJdAjRThhzHTPHL1znEWJC5KdrealwNETRnV4HODNEFJ1EOuJytIA\njp4yBIBZiX3aUSMyPKN3uTNve5jX3rSnqjzgyYrt77dFVFRXdF44l5xVpbsYOW5ajee2KAgd7jsQ\n44ZmnqLIld4M3pIo4sQZQzM/sI8lz0PbIympZJpKyPR30JWitmzo+zGdg5j7Cl/Tra+tYXNnj2Xz\nljlfavfztpc7WTtuOXt0W8HSZ+/HbDdOkTSn4GxHnVn1XVtqZtANkUa0W/PRRdY+t1X2jkuy+bo1\nJemvom8+4jpEP5wJI1zq3bQ+oCSGyJUwRH8EmpUZK7LobK0nyCoEXxRGzO/Zlxfo2s5DJa3mHLPe\n2DED6uxDKRS06wLMoFDiSz386QzJG4bntuyZ844797l55v00H+LbJmGYPsN6Qet+Q0R0w3E4b/xZ\nKX9+kc/nFA8CZuFdukpud/FVLqtac8Gdedvnede+xI5x7mFz9+890/RBbwaufCX1g6H7XMvlUHa6\nTaeyrfB+qzlkL7sCEsPmguBu2WmxN9+wMm87wxZE1QreiblQAAiIAc9rGFYWb2gydlrBe0SlOV+8\nP9Jo7kcNIGgFVLvNYeyzQzEiNAynjpyb9j0E5IDTKtL9HyIYkJ3g/Z+ItZtYuGPwhq/dXI8eD3To\nhdyV4F0ePgSRjXMQ+++UDt/r7EMped/idEt07Kvu5P9/dntUIJF5JweCVEU79lW49+VStwwFzGVP\n0Q+OcgJ4mb/jDlm2rhTP5Bv7nRquM2af50TmXeQ59+7fe6bCqT76XO1XCvECJpdD2U5ilmMcNu9F\numF45lI8mbfmLVhzb7cJwargtoKz0SHztrqLWXPefsneDMMM3ppgXQioMnZY2/UNG1AJn6hgQ/1m\nJxjYa4ZDRebws948ELfMPK/L7y8580bS7kh24xdFFlFkWM1gfFZns5i/Q1WwLMo4YegcDAqmn1eU\nZbFDoZqts0KT5Cvv9MHb/Dd52FwUEsHbnitLfs1Uy4DsD8p0u8h1PE4RRqzIXG5WuRdiJzsu2Mv4\netJoo78RBAGGYSRl3lbw3tcOvyIhFFSSNjFxZ96dz3l39fdwMGPwzi7nsz3HDp5PgT62e387rrjn\nn/j7267+1/HEsqrkgjVB6ph5G9awuW7vNuWLwO9zZ97mtZZTdWy9hu7KvJvaYigt9iHgk+GTzCD9\nft0mAMCospEA9OeBHQAAIABJREFUgPJQ9ypFk5tcOHtuW/RwCLIkmPv/Wpm37rOat8T9Kfv+njv+\nLBw39Oi0P7OzZRzJnbHcyoq9c8IBOXWBlz13XGQdWyITTPS/brcadSRn+ikzbyl1Jp+JfcEW19MX\nOdmvOXxQ560h84m9JMtd4S675rQHlgc6jES4f++ZMu+DbXqhOwrxHOTygqWvLqaZefeStz7aCwBY\n+ddPnPvcm444QyuiO/M2AAiJPautYFgSrwX8m6AM+wQ++WRnztvOvINyEIh3zLxrKspR7h/gVMi2\nurbpBIDRZWYR0qSRlTh99ghMH9e1Stprzz4UX9a3ej4EKkJ+nHroNLypbcWYkrF4+8P9MNpKofgT\nFfHmkjd7NzJft1oHdjZ3lep7t1wwHe9vrcfXjh8DUQT+ZT9WTP2zrz17Cl5Y/wVOn20VaLlesqqi\nCKOGhPD5LnP0IPkDIdV8qzNsbkVaWRJxwdxxad8DAFx51hSsa9iBrZFdnuHjZGcdMwqqpuOsY1IX\ntOWj75w/Df/3n+04+fBEEdSx02rQ0h6Hbhg4ekqiHuPCueMQ8MnY+mWip26m4D24MohTjhyOKaMq\nO33cwcyvSDhzzshO29MebIr8svmeh6R/zxfPH9/lTUk6M2N8FU6cXotjpw3p8WsdCAbvA7S18b+o\nC9fjqCHezQXswCYEWiGW1UPbMwLRlJm3GagF0XA2FnGG0q3g7YsOQoV/KBqKdwBSvMOcd5EcgBAX\nnNakLaq5DehXpo/F7JpEj+sLJ3wNL3z+NzRGm5znAeaQ8NeOTywDy+TwQ6pw+CEdA/3Xjp6Mq6uO\nwsdb67B+rbkft/sqNCgH0BSzh/SVbgZvMem22XMaSD1sPmFEhVOpfv5J4/Avc5dMKGLq4dXqiiAu\nPTWx/lpAYthbFARcd85UfPfnr6c8lmCKLlPJAf74aTU4cXpth8e5nXncGEzYfiZ+uWEfFhxydtrH\nBQMyFn7lkLTfz0dDBhTjklO869/H1pbh21/vuKzOXimwbXeLc1+mYXNBEHDeiWN74Ujz21ePHZ35\nQQeZTO/5pF6qmpclsU/+XzJ4H6AH3vkFAGDW4Bmebln2XHdgqtmcJdJa4Q3emrdgDYCZfetyIhu3\nhs1jcQ2KHgREQBOirszbqjZXJBTFi9BqBe9P2z+CKIiYPND7ITin5kjMqTkSb+95DwNTNFzpLe4P\nUPeSnqASRFMssZtXd7bLSw6YPlmCqplDy501TrAdWzsbr+5ch9HWlEEmiepz89+yksQUQ3Kmn7pg\nzTts3tWGVhWBciyddWPXHlzg3Bdt7o0/iAoJ//K7SdVVZ04ZAJKnWARRSxo2TypYgznsbcQDEKwm\nJPYcciSmQdB9ZvAWI4iq1lIxe523LCIoF6FNaoXgb8OeyJeYWDnes4mF2+GDDuvRe83Ep4hmP2rd\n8GTe7mpyo7uZd9J8kt8nOXPQXWn1eN74s/DVMachIHdvXbS3mYP3WFIOm9tz3vbwd+FNN2ad5ClY\n40cYFSYWrHWTmlRY1CGQGELSsLm9zjuRedubeiSGzc3gFo1rEKyGJHEjirC1TttemuWTRQSVIkCO\nQRpgNvaYOWh6z99UNwmC4HyIuueQPOuVXZttHIjkbPdAd0USBfGAAnfyum+3mKp5bqfaijIx523/\nfEbv3uYtWOvfG2QQZQuDdzfFda3zBwg6/vi3T7Hps30AXJm36FoTaFecJw2bb9vdgi92mvPcUSOM\ntri53tVu0qLIEkqUIATRgFS1A7IgY2rV5J6/qR6wP0QVxTtsbjNUxbOTU1clD5tne79cZ847xfda\n2uOe26kL1rpXbU5dx8ybiMG725Izb7ufucMKyA88/T6AdMPmKgZVFGFghbWHtWvplb0dZtQIO3tx\n25m3IotOYBT9EQwtHppoitJHjpo0CANKAzh8fLVzX1BJtAOVoXSrjaA7eI8cHMKF88b37EAzSZEo\nf+/iGZgwvByzJ3v34lZkEbMmVnsKopKHzZl4976JIyowqDKISSMrUFHau21iifIFL1u7SdW9WVgs\nufuVNY9tf3inK1i7Y9FMPP3uK3izHYAuosgvIRzVnK0129VE8LaboiiyiGI90XQk5Ov7db9nHjMK\nZyYtYXIPm/vl7q0td+/zfPslR2R9swdnnbfr1zRuaDluuXBGx8cKAq4+y+z89vQ/twBwVZsbicdQ\n7xo3tBx3XXVUXx8GUZ9i5t1NquEdNjdbV7rms63MuzJkZsTJvc0Bs1GLTxFRVGT9GgzRqdy2s+zW\nWBva4+3wiT5nWN0niyj3JdYvhtIUqvU1d8FadyrNgUTBmiSaLUaz3Xwh0XGte+Pe9pJBnfVqRJRF\nWc28V6xYgffffx+CIGDp0qWYOjWxdnPlypVYs2YNRFHElClT8P3vfz+bh9LrkofN46ruZNsAnK8H\nWMN6qea8BVmFJIoIBqzgrZutIOubIs6weVu8De1qGAEpALs1hSKLKJMSwbs0zaYbfc09593duWq7\nOMkO2tnecEBI7pd6gOSkJi3MvIkoG7KWeb/55pvYtm0bnnrqKSxfvhzLly93vtfa2opHHnkEK1eu\nxBNPPIGtW7fivffey9ahZEVyG8u4qnn7lVvBu9Rqv6m72qMaqnnNJPniePHzv6MdjQDMOe9ia39i\nSfdBgIDWeBva42FnO0/ALFgr8yeCd1mgf3ZOch9z8qYkXWVn3nZGm+3t9xLD5t2M3slLBhm7iSgL\nsvZJuG7dOsyda+5WNWbMGDQ1NaG11exzrSgKFEVBe3s7VFVFOBxGWVn6/Vb7Ql1jGI+t/djZDjJZ\nqszbvVOY0/LUCgLujUnsrFoYsAN/+XwtXtn5uvVY0Wk6IUkiipUgGqNNiGgRTxaryKI3ePeDOe9U\nZDExsNPtzFtK7K8N5K5Pc7eLxa0n9tU2gURUGLI2bF5fX4/JkxPLlyorK1FXV4eSkhL4/X5ce+21\nmDt3Lvx+P04//XSMGtV5v+aKiiBkuXeXCVVVpZ8rXrHyHWzZ3oiyUABXnNVxO8rikOJ5viCJiXXb\ngJN5y4qEqqoQJFkCYEAQAD3uAwLtHY+nrBhl1hy5IokYXl6DD+o+BQBUliSC9eDqEAJFiYA9fNAg\nVA3su3nvdOfRCNYC7wB6OIjSEn+n5zudygpzSkCWRef5AZ+E8cMruvV6mVx46kT86JE3cN68Q7r1\n+iWhAKqqQrj6nKn45aoNOHXO6C69TjbeS6HhOewdPI89l4tzmLNqc/cwZGtrKx5++GG89NJLKCkp\nwSWXXIKPPvoIEyZMSPv8hoaOwa4nqqpCqKtrSfv9fY1h69/2lI/b19CCOiVxf2tbzDNsPn54CB/s\nBMLhOOrqWtAeiSWK1TQZhi4msnPLVacfin+tM3+uKAoY5B+ED2AGb9lINKNoaQ5DiyZ+dVq72Ol7\nyabOzqMAH04oPh8vvl0HjDO6dYzhtqj1WnCe/7MbjoMgICvveVRVMX57y4kQRaFbr9/cHEZdXQtm\njhuIw7v4Opn+FikznsPewfPYc719DtNdCGRt2Ly6uhr19fXO7b1796KqytzcYuvWrRg2bBgqKyvh\n8/lwxBFHYNOmTdk6lG6xLzbSjdJ2GDbX9KRtPs3MW7NeJ2q0QvBHrBcXALXjdZMsys7wuiyJqA3V\nON9zL7tSJNFTCFWi9M9hcwCo8g0GNB/8Svf+1Ox13u4qc9GqPM+W3hqaL8StGIkoN7IWvOfMmYO1\na9cCADZv3ozq6mqUlJhBpra2Flu3bkUkYgazTZs2YeTIkdk6lG4xUqzTdY8edChYi2uA7B42N7Nq\nOxjvqFqDwNRXrRcSYcQ7NpdQRBmqtaRMEgUMK0kEb/dabrt/+IiQuctSd/t254IdfANK9wZ5ZDm3\nc91ERPkga8PmM2bMwOTJk7FgwQIIgoBly5Zh9erVCIVCmDdvHr7xjW9g0aJFkCQJ06dPxxFHHJH5\nRXPIvdRH1VX88aNVmO3aBlQ1OmbeYlFiqMQQzO87Veae1xZgtJVBLDYff+GEr+HThs9RVTQQmlYH\nwCxYqykZjONqZ0MWZcypmYUnsN45JgD47uGLu70eOVfsgjNfN1qjAole6WKWq8x7C+vUiCgXsjrn\nfdNNN3luu+e0FyxYgAULFmTzx/eIu8nGxvoP8cbut/HG7red76tJvc1jqg6xsinxfGgQBQGaYeCL\nvc3eFzdE6K3lQPUOAImtO4FEm1VZFCAKIs7vZH9nScxun+/eYGfe3a02l6zny3mSeff3iykiOjjk\nRzrTBxKZd+pGG8lz3jEtBiHYAr3NrArXoEIUBei6gR/8YZ33yboIvS310rhZE83+2ccfVpPy+/mm\nImQO6Q8o7V7v9UTm3b+D9xGHmPUcIwf3zzX3RHRwYW/zDARBgF/s2Jc7ntzbXG6EIBjQWiogBJuh\nG6qzx7Wn8xoAASLu/8ZXsOaLCMaVj/Z878hJgzC2tgyVKTZc+NkNxyY6teWJMbVluPvq2RhY1r3g\nbQ+79/fg/c2zJuO8ligGlhVlfjARUQ8xeKfhDJsLqbt6JQ+bq4K5lE2PBiHpkpN5R2Kad/03zD2m\ny0sCWDTp/JQ/e0CaQJevexdXl3c/oLl7m/dnkigycBNRznDYPI3EUjEBmqF3+H6HLUFFa9vOmB/Q\nRWiGBkkUsLehvUPmDZ2nvavsXuH9PfMmIsolRpE03FXDWlKWDXirzQ3DgC5Za7jjPhi6BNWIQxIF\nGAY6ZN727mCUmZ1550vBGhFRLjCKpJEp845riYC8e387oJidwIy4HzBEqIaayBalpODP4N1lSp7M\neRMR5RKjSBruOW/N6Dzz/vmfNkFQYgCAUn8I0BKZNwAIHQrWGIi6yqdIkCUBRT6WZxAR2Ri803A3\nadFTDZu75rwjMRWCEoUiKvjhJbNRO6AUcT0OwT67ycPmev9fn91fyJKI755/GM4/aWxfHwoRUb/B\ndCYDM/NOVbCWCOiabkDyx1DmC6G02I/yYDF2RXRIkpW+JxesaflZNd5XDhle0deHQETUrzDzTkN3\nNWlJOeftWuet6ToMKWoOmQPwS+YabdGa6xaS57w1XjMREVH3MXin47RHFVLPebuGzXUhBggGQtbu\nXgEreAv2RiVi0rC51rHpCxERUVcxeKdhrxRLV7AW01QnOzdEs9K8WDG37fRbu3wJaTNvDpsTEVH3\nMXhnIKYpWPt8dyN+9Zy5B7kmmpXmQTt4S1ZmbQftDpk3h82JiKj7GLy7INWcN0QNb31sbt9pWMG7\nWDaDtzNszsybiIiygME7A90wUg6bu7umGZJZvNZh2FxUARgQ/OGkF2XmTURE3cfgnYFupMm8reBt\nGAYMKXnY3NoRTFQhDdoGsbjZ05iFTVqIiKgnGLwzMAwj5Zy3ORRuQDcMCLKdeZu7StnD5pBUSKX7\nAQCXTlqQk+MlIqKDH4N3BuaweYrMGwAkFf/e+SaU2q0AgGDSnLchqBCKWmDEFVQFB+bkeImI6ODH\n4J2BYaRYKmavAZdUPPnpaufu5DlvTYpADIRhREKQRc5zExFR72DwzkDXOxasybDntL33FyctFYvI\n9eY3wqEO+38TERF1F4N3BobRcT9vUU/MaeuRIud+RTSXgNnD5mFxHwBAiIYwpHgQJMOH+M4xEFiv\nRkREPcDgnYFhGNCT5rwF3cysBUmFEU0Eb8GKyvawuV1ULmh++CQfZukLoe4cl/2DJiKigxqDdwYp\nC9bsJiuSCgjmBPjo8Hzn285SMYtgPV4wmHITEVHPMXin8NH+TyH4zMYqqQrWjLgVjK3gbRgCSvUa\n5/uKKENxFagJujeYExER9QSDd5KWWCt++t5v4J/2CgAz8/7vnibPY1S79kxSIQgGYAiQRG9WHfKF\nnK9FnbuIERFR72HwTtIWbwcAp6hM1XTsaWjzPMauX7MzbxgCxKQzGfKVOF+LOnuZExFR72HwThLT\nY57bcVV35rVtumadNsnsXW4Gb++pLHUFbwHmELoB7+sQERF1B4N3koga8dyOxXVAMAvWoh8dgaJw\nLbR6c37bnXlLSeu/Qkpi2Dz5e0RERD3B4J2kPSl4x1XNybz15gEI7T0aajQAABCUqBO8haQzWepP\nBG8hKXgn3yYiIjoQ7NmZJBz3bt8Zs4bNDQMABLRH44Dqg6EqEAJt1lruFAVrimvOW2SwJiKi3sPM\nO0lY6zhsLgg6YJinqj1ilprr4WIIgTAEUYNhCB0CtGepGDNtIiLqRQzeSbyZt4G4pjtD4wAQjpql\n5kakGIJgQPBFAUPskHlLouR8bX+L5WpERNQbGLyTeDJvwUAsrgGCDlmUMLqmFLo5fg4jXJx4nCFA\nTMquJw+YABgCYtsmdPgeERFRTzB4JwnHXcFbVJ05b1EQ4VcS2bQeDSYel2LYPOQrwYTGi6DtGclh\ncyIi6lUM3knCqmvYXNSdanMR3uAN3fV1ig5rAKwit8SwORERUW9g8E4Sdi0VEyTVWectQoLf5w7Y\n7lPXMfMG4AyxC4zeRETUixi8k3gzbw2abkBwhs1dp8u9Q1iKOW8gEbyd2M2KNSIi6gUM3kncTVoE\n0W5ibgZvn2vY3NATpy7VUjEAmDKyEgBw2NiBnvs5BU5ERD3BJi1JYpqrt7lkB28doiBBkdJn3qnm\nvOfOHIZDhldgWHVJh+8RERF1F4N3kqh7Y5KkzFv2BG9vIE+VeYuCgBGDQx3uJyIi6gkGbxfDMBDX\n4s7txLC5DkmQkoK3O1innvMmIiLKBs55u8R11bttp5TIvCVBhCy5h8q9WXiqYfNkrFcjIqLewODt\nYs93S4JZmCaIGiCqEATAJ/o9mbe7YC3dsHk6zNGJiKgnGLxdolbwDohW9zRRg+Azq89LlNABF6wl\nG1xpvu7omrLeOWAiIipInPN2iVvFakVSEG1aCyCp5sYjAEqVEGQxdcGakWadd7KTZtSiOCBj+riB\nGR9LRESUDoO3i5N5C2aGLEgaBMXMvEt9pZCTsm33110ZNpclEXMOHdJ7B0xERAWJw+YuMavS3O8M\nm6vOsHmZrzT9UrE07VGJiIiyIWPw3rp1ay6Oo1+IWcPmPqMIgJV5W8Pm5f4yyHLP5ryJiIh6Q8bg\n/e1vfxsXXHABVq1ahXA4nOnhec0eNldgBm9zztvMvCuKyrwFa8jc25yIiCgbMs55P//88/jkk0/w\n4osvYuHChZg4cSLOPfdcTJ06NRfHl1N2gxZBV2BoIgRJBXwRGLqIkFKMqBRJ/URD5LA5ERHlTJfm\nvMePH4/rr78eS5YswdatW7F48WJcdNFF+O9//5vlw8stO/OGIQG6DLG4GWKgHdq+wVCUpA5rbhw2\nJyKiHMqYee/cuRN/+tOf8Je//AVjx47F1VdfjWOPPRYbN27EzTffjGeeeSYXx5kT9py3oUkwNAmC\nYt6v7hwHRfL2Nvd0WwOYeRMRUc5kDN4LFy7E17/+dfzhD3/AoEGDnPunTp2aceh8xYoVeP/99yEI\nApYuXep5/K5du/Cd73wH8XgckyZNwp133tmDt9E77A5rhi4CmnlqDEOAEQtAlrztURXZvc5b5Jw3\nERHlTMZh8zVr1mDkyJFO4H7iiSfQ1tYGALj99tvTPu/NN9/Etm3b8NRTT2H58uVYvny55/t33303\nLr/8cjz77LOQJAlffvllT95Hr7CXihmqBEO39u6OKwAESJLgqTZP7rbGYXMiIsqVjMH7e9/7Hurr\n653bkUgEt9xyS8YXXrduHebOnQsAGDNmDJqamtDa2goA0HUdb7/9Nk466SQAwLJly1BTU9OtN9Cb\n7DlvXXNl3roMSTSryd0BO3nZGIfNiYgoVzIG78bGRixatMi5fdlll6G5uTnjC9fX16OiosK5XVlZ\nibq6OgDA/v37UVxcjLvuugsXXHAB7r///u4ce6+z57x1VYSzFEyTnEDtnvNOzrwZvImIKFcyznnH\n43Fs3boVY8aMAQBs2rQJ8Xg8w7M6MgzD8/WePXuwaNEi1NbW4qqrrsLLL7+ME044Ie3zKyqCkGXp\ngH9uZ6qqQp7bwqfmMUqiD7D28jZ0CQFFQlVVCEUlifddFFDgXMIYAgYOKO7weoWiUN93b+I57Dme\nw97B89hzuTiHGYP39773PSxevBgtLS3QNA2VlZW49957M75wdXW1Z7h97969qKqqAgBUVFSgpqYG\nw4cPBwDMnj0bn376aafBu6GhPePPPBBVVSHU1bV47mtpN39GuN2A4Lf28tYlSKKAuroWxOJa4sGu\nixEYApoa2+EvwOQ71XmkA8Nz2HM8h72D57HnevscprsQyDhsPm3aNKxduxbPP/881q5dixdffLFL\nmfecOXOwdu1aAMDmzZtRXV2NkpISAIAsyxg2bJizTnzz5s0YNWpUV99L1tjV5mocTuYNXXIqy93z\n3JJnqRg7rBERUe5kzLxbW1vx5z//GQ0NDQDMYfRVq1bhtdde6/R5M2bMwOTJk7FgwQIIgoBly5Zh\n9erVCIVCmDdvHpYuXYolS5bAMAyMHz/eKV7rS1E9BlmUoaqAPedtqDJ8VtB2B2jJ9bVhCDBARESU\nGxmD9w033ICamhq89tpr+MpXvoLXX38dP/jBD7r04jfddJPn9oQJE5yvR4wYgSeeeOLAjjbL4loc\nPlFBXNUhfjEdyvCPEd5+CJSqjgMU7gI1QTAYvImIKGcyDptHo1HceeedqK2txa233orHHnsML774\nYi6OLeeiWgw+yYeYqkNRy1C291hA9UNJUSgnJbdKNRi+iYgoNzIG73g8jvb2dui6joaGBpSXl2P7\n9u25OLaci2kx+CQz81ZkCbpuBmR3NzWbuymLJAuoCAVydpxERFTYMg6bn3XWWXj66adx7rnn4rTT\nTkNlZSVGjBiRi2PLuZgeQ7lYigZVQzCgQNV0AHDmvN3cwfvsY0alDPBERETZkDF42wVngLmka9++\nfZg4cWLWDyzXDMNATIvDJ/kQ13Qosoj2iAogc+bNGW8iIsqljOmiu7vaoEGDMGnSJCeYH0xUXYUB\nwwzeqg6fLELVzcxbSbEVqOgJ3kRERLmTMfOeOHEifvKTn2D69OlQFMW5f/bs2Vk9sFyLWq1RFVGB\nqhlQZBGaZs15KykK1kT3rmIM30RElDsZg/eHH34IAHjrrbec+wRBOOiCt92gRbY28VZkyZnzTpV5\ne4fN9RwcIRERkSlj8H788cdzcRx9zt4ONBG8RSd4y3IiUFeVB1DXGPEOmzPzJiKiHMoYvC+88MKU\nc9wrV67MygH1leTM2yeLUK1hc9k1RL78yqMQi2t4+p9bnftYsEZERLnUpQ5rtng8jvXr1yMYDGb1\noPqCvZe3CHN+293HXHb1MZcl0dkaVI8UQQyEUSQX5fBIiYio0GUM3rNmzfLcnjNnDq688sqsHVBf\nienmsLmExLC5rUM3Nfs5Hx+B4NCdOPb4g2v+n4iI+reMwTu5m9quXbvw+eefZ+2A+krMybzNU+Ju\nzCKLqZbGGTCixZD3TIFPUlJ8n4iIKDsyBu9LLrnE+VoQBJSUlOC6667L6kH1BSd4GzIA3ZN5y510\nTzv4VrwTEVF/lzF4/+Mf/4Cu6xCtoq14PO5Z732wiOnu4B3zbEYipxk2JyIi6gsZo9LatWuxePFi\n5/ZFF12El156KasH1RfsgjUY5ilxr+2WUgybc3UYERH1lYzB+9FHH8WPf/xj5/bvfvc7PProo1k9\nqL4Qt9Z5Q7fmvBV3wVr6wfGDsVUsERH1bxmDt2EYCIVCzu2SkpKDMmBFtCgAQLCCtzvzdq/ztjHx\nJiKivpJxznvKlCm44YYbMGvWLBiGgVdffRVTpkzJxbHllB287cxbUdzrvDnnTURE/UfG4H3bbbdh\nzZo12LBhAwRBwJlnnolTTjklF8eWU1HVCt6anXm7C9YOvpEGIiLKXxmDdzgchqIouP322wEATzzx\nBMLhMIqLi7N+cLlkZ96GZgZt91KxUNDX4fFVZQEAQG3VwXUeiIio/8s4Hnzrrbeivr7euR2JRHDL\nLbdk9aD6gp1566oZvH2yiOVXHolLTjkEIwaHOjz+lCOH44KTx+HKMybl9DiJiIgyBu/GxkYsWrTI\nuX3ZZZehubk5qwfVFyJaBIqoQNPM24osYsiAYhx/WG3KxyuyhHkzh6XMyomIiLIpY/COx+PYujWx\ng9bGjRsRj8ezelB9IaJFEZD8iKvWHt6ddFUjIiLqSxnnvL/3ve9h8eLFaGlpga7rqKiowL333puL\nY8upqBpFQPYjxuBNRET9XMYINW3aNKxduxarVq3CkiVLUF1djWuuuSYXx5ZTyZm3z9UelYiIqD/J\nmHm/9957WL16NV544QXouo4f/ehHmD9/fi6OLWd0Q0dUi8Ev+xHXmHkTEVH/ljZC/eY3v8Fpp52G\nG2+8EZWVlVi1ahWGDx+O008//aDbmMTuax6Q/IjHzYo1Bm8iIuqv0mbeDz74IMaOHYs77rgDRx11\nFICDt4931FrjHZADaGPmTURE/Vza4P3yyy/jT3/6E5YtWwZd13H22WcflFXmABCx1nj7JbNgTRBS\n7yRGRETUH6RNL6uqqnDVVVdh7dq1WLFiBb744gvs3LkTV199NV555ZVcHmPWOZm3VbDmk6WDdpSB\niIjyX5fGhmfOnIm7774br776Kk444QT8/Oc/z/Zx5VRYjQCAWbCm6hwyJyKifu2AolRJSQkWLFiA\np59+OlvH0ye8mbfG4E1ERP0aoxSA9ngYAFAkB9DcHkdx4OCqpiciooMLgzeAdtUM3qLuRzSmoao8\n0MdHRERElB6DN4D2eDsAIBoxT0dVeVFfHg4REVGnGLwBtFmZd6SNwZuIiPo/Bm8kMu+WVvM2gzcR\nEfVnDN5IzHk3NZnd1TjnTURE/RmDN4C2eDsUUUFLmxm8K0L+Pj4iIiKi9Bi8YQ6bFytBRGPmpiQ+\nhduBEhFR/8XgDXPYPCgXIRLX4FNEiGyNSkRE/VjBB2/d0BFWIwgqRYjFNfiZdRMRUT9X8ME7rEZg\nwECxHEQ6NoGoAAAYmElEQVSUwZuIiPIAg7ddad6so6E5Cr+PwZuIiPq3gg/eMc3co3zL9jYYADNv\nIiLq9wo+eMd1M3gbunkqGLyJiKi/Y/DWVfMLBm8iIsoTDN5W5g3dDNo+peBPCRER9XMFH6ni1pw3\nDPNUBFiwRkRE/RyDtzPnbWfeDN5ERNS/MXhzzpuIiPIMg7cz583gTURE+YHBW/MOmzN4ExFRf5fV\n4L1ixQqcf/75WLBgATZs2JDyMffffz8WLlyYzcPolDNsbhWsiSI3JSEiov4ta8H7zTffxLZt2/DU\nU09h+fLlWL58eYfHbNmyBf/5z3+ydQhdkrxUTNP0PjwaIiKizLIWvNetW4e5c+cCAMaMGYOmpia0\ntrZ6HnP33XfjxhtvzNYhdEksqcOapht9eThEREQZZS1419fXo6KiwrldWVmJuro65/bq1asxa9Ys\n1NbWZusQukR1qs3NzLu4SOnDoyEiIspMztUPMoxERtvY2IjVq1fj0UcfxZ49e7r0/IqKIGS5d4vJ\nqqpCkD63bugi5h85Al89aTwkznsfkKqqUF8fQt7jOew5nsPewfPYc7k4h1kL3tXV1aivr3du7927\nF1VVVQCA9evXY//+/bjooosQi8XwxRdfYMWKFVi6dGna12toaO/V46uqCqGurgXN7ebrGrqEuTNq\nsH9fa4Znkpt9Hqn7eA57juewd/A89lxvn8N0FwJZGzafM2cO1q5dCwDYvHkzqqurUVJSAgA45ZRT\n8MILL+Dpp5/Gz372M0yePLnTwJ1NqqvaXBILfuUcERHlgaxl3jNmzMDkyZOxYMECCIKAZcuWYfXq\n1QiFQpg3b162fuwBi7matMgSh8uJiKj/y+qc90033eS5PWHChA6PGTp0KB5//PFsHkannI1JdAmy\nxMybiIj6v4KPVqquWg1aBBaqERFRXij44B3T4xAMs4qdmTcREeWDgo9WcSt4CwJboxIRUX5g8NZU\nCKw0JyKiPFLwESuuxwFDYqU5ERHlDQZvPW4tEyv4U0FERHmioCOWYRiIaXFAl1lpTkREeaOgg7dq\naDBgwGCDFiIiyiMFHbxjWsz8QpcgcdiciIjyREFHLDt4G5rEYXMiIsobhR28rb7mhsaCNSIiyh8F\nHbHszFtn5k1ERHmkwIM3M28iIso/BR2xYnoi82a1ORER5YvCDt4sWCMiojxU4MHb3stb5FIxIiLK\nGwUdsRLrvGXOeRMRUd4o6IjlLBXTRQ6bExFR3ijs4O3qsMaCNSIiyhcM3gCgSdzPm4iI8kZBR6zE\nsLkERSnoU0FERHmkoCOWe9hcYcEaERHliYKOWFFnqZgEHzNvIiLKEwUdseJWhzWDmTcREeWRgo5Y\nMVfmrchS3x4MERFRFxV08I46c94iFLmgTwUREeWRgo5Yqq5CggRAgI/Bm4iI8kRBRyzVUCEKMgAw\n8yYiorxR0BErrschwpzrZvAmIqJ8UdARK66pruDNgjUiIsoPBR28VUOFYJingJk3ERHli4KOWKqu\nQrAybxasERFRvijoiBXXVQgG57yJiCi/FGzEMgzDzLw5bE5ERHmmYCOWqqvmF8y8iYgozxRsxIpr\nVvDW7cyb1eZERJQfCjd423t5W8PmLFgjIqJ8UbARy868DZ1z3kRElF8KNmLFrMwbmghBACRR6NsD\nIiIi6qKCDd6qlXnrugBFFiEIDN5ERJQfCjZ423t5G5oIRSrY00BERHmoYKOWXbCmaQJ8CivNiYgo\nfxRu8LaHzTWBmTcREeWVgo1acd0O3iIUpWBPAxER5aGCjVpxa85bUwXIYsGeBiIiykMFG7Xcw+ay\nzEpzIiLKH4UbvK2CNV0TITHzJiKiPFKwUcvpbW6IkCVm3kRElD8KN3jbvc11ETKrzYmIKI8UbNSy\nm7TAENkalYiI8krBBm9nP29dhMTMm4iI8kjBRq2Ys6uYBJmZNxER5ZGCDd5x97A5C9aIiCiPyNl8\n8RUrVuD999+HIAhYunQppk6d6nxv/fr1eOCBByCKIkaNGoXly5dDzOGSrYgaNb/QJBasERFRXsla\n1HrzzTexbds2PPXUU1i+fDmWL1/u+f4dd9yBhx56CE8++STa2trw6quvZutQUgqrEQCAocssWCMi\norySteC9bt06zJ07FwAwZswYNDU1obW11fn+6tWrMXjwYABAZWUlGhoasnUoKUXiZvCGJjPzJiKi\nvJK1qFVfX4+KigrndmVlJerq6pzbJSUlAIC9e/fi9ddfx/HHH5+tQ0kprEYhQLCqzZl5ExFR/sjq\nnLebYRgd7tu3bx+uvvpqLFu2zBPoU6moCEKWe2/f7XA8Ap/kRzsElJYEUFUV6rXXLjQ8dz3Hc9hz\nPIe9g+ex53JxDrMWvKurq1FfX+/c3rt3L6qqqpzbra2tuPLKK3HDDTfgmGOOyfh6DQ3tvXp8YTUC\nBQoAIBqNo66upVdfv1BUVYV47nqI57DneA57B89jz/X2OUx3IZC1YfM5c+Zg7dq1AIDNmzejurra\nGSoHgLvvvhuXXHIJjjvuuGwdQqci8QgU0QcALFgjIqK8krXMe8aMGZg8eTIWLFgAQRCwbNkyrF69\nGqFQCMcccwyee+45bNu2Dc8++ywA4IwzzsD555+frcPpIKxGUSGXAgAL1oiIKK9kdc77pptu8tye\nMGGC8/WmTZuy+aM7FddVqLoKQTffPoM3EVHfevnlv+OEE07u0mN/8pP7ce65C1BTU5vlo+q/CjJq\nRa0GLbv2xgBw2JyIqC/t2vUl/va3tV1+/PXXf7egAzeQw2rz/iSimcHb0MzqdS4VIyLqOw88cA8+\n/HAzHn30N9B1HV9+uRO7dn2JBx/8Be66607U1e1FOBzG5ZdfhTlzjsV1112F73znFvzzn39HW1sr\nvvhiG3bu3IFvf/u7mD17jvO6qqpi+fIfdHj+J598hPvvvweiKGDKlGm49trrU95n/5zRo8di1aqn\n0NjYiOnTD8eTT/4v2tvbcd11N+Ldd9/Gyy//HbquY/bsObj11u+ipaUFd955G9ra2lBSUoI77vgf\nXH75Rfj9759AMBjEhg3v4cknV2LFih93+5wVZPCOWsEb9rB5DtuyEhH1Z0//Ywv+89HeXn3NmROq\ncd5JY9N+/4ILFmL16qdx2WVX4pFHHoaqxvGLX/wWDQ37MWvWUTj11DOwc+cO3H77EsyZc6znuXv3\n7sF99z2E9ev/jT//eZUneLe0NKd8/oMP3oebb16KsWPH4Uc/ugO7d+9KeV86W7duwRNPrIbP58O7\n776NX/zitxBFEeeddxauvfabeOKJxzFr1myce+4CPPXUSrzzzls47rgT8dpr/8L8+afgtddewbx5\nX+nROS3I4G33NTc08+0z8yYi6j8mTpwMAAiFSvHhh5uxZs1qCIKI5uamDo+dOvUwAObyZHcXz86e\n/8UX2zB27DgAwO2335n2vnTGjh0Hn89crRQIBHDddVdBkiQ0NjaisbERn3zyEa644hoAwPnnXwQA\nqKmpxW9/+0vMn38K3n33bXzjG1cf+IlxKczgrSU2JQFYsEZEZDvvpLGdZsm5oChmD46//vUlNDc3\n4+c//y2am5txxRULOzxWkhLNu5KbgaV7fqpNsFLdJwiJxE5V1Q7Ht3v3Ljz11Er87ncrEQwGsXDh\nedZrSTAM3fNaY8eOw759+/Dhh5sxatQY+P3+zk9CBgUZtSL2piR25s2CNSKiPiOKIjRN63B/Y2Mj\nhgypgSiKeOWVfyAejx/Q66Z7/siRo7B5s7ni6a677sR///t5yvuKi4uxb5/ZbGzjxvdTvn5FRQWC\nwSA+/vgj7N69G/F4HBMnTsLbb/8HAPDcc6vw4ot/AQCcdNI8PPDAPZg375QDeh+pFGTwthlx88qH\nmTcRUd8ZMWIUPv74Izz00P2e+0844ST8+9+v4vrrr0FRURGqq6vx6KO/6fLrpnv+9dffhJ/97P/D\nNdd8A6FQKUaOHJXyvjPPPAf3338vbr75egwcWNXh9ceNG4+ioiCuueZy/P3v/4ezzjoHP/zhD3Hu\nuRdg06YNuO66q/Dvf7+G448/EQBw8snzsHfvXhx++MyenTAAgpGq6Xg/1Jvt5uJaHNf89hnojdWA\nIeKWC6ZjwojOe6tTamyn2HM8hz3Hc9g7eB57rrNz+Pzza7B79y584xvfPKDXS6Ug57wVSYHeMNi5\nzcybiIiy6Z57/gdffrkTd911X6+8XkEG72SsNiciomy69dbbevX1CjLl1HXvTAEL1oiIKJ8UZPCO\nxr1VjRw2JyKifFKQUSvWIXgz8yYiovxRkME7OfOW2B6ViIjySEFGrWjc2/mGmTcRUd96+eW/H/Bz\n3nvvHTQ07M/C0fR/hRm8Y0mZN+e8iYj6zIFuCWp7/vk1BRu8C3KpWMdhc2beRER9xb0l6PnnX4gV\nK36IlpYWaJqGG264GWPHjsP//u/v8cor/4Qoipgz51hMnDgJr776Mj7//DP8z//ci8GDzd4dfbEN\n6OWXX+VsAxqLReD3F2VlG1A3Bm+w2pyIyLZ6y1/w7t6Nvfqa06sPxTljz0j7ffeWoL///W9x5JFH\n4//9v6/i888/w09+ch8efPAXePLJ/8Vzz70ESZLw3HOrMHPmURg7djy+851bnMAN9M02oOeff6Gz\nDejixVfiZz/7VVa2AXVj8AabtBAR9RcbN25AY2MD1q59AQAQjZobSZ1wwsm44YbFmDfvFMyfn35j\nj77YBrS5uTkn24C6FWTwrgz54ZNF6IYBVTMgCgzeREQAcM7YMzrNkrNNUWTceOPNmDJlquf+m276\nHrZt+y/+8Y+/4lvf+iZ+/es/pHz+wbwNqOfYe+2V8sghwyvw1IrT8fBNJ+DXN5/Q14dDRFTQ3FuC\nTpo0Bf/618sAgM8//wxPPvm/aG1txaOP/gYjRozEZZddiVCoDO3tbSm3Ej2YtwH1nLNefbU8Iksi\nBEHgfDcRUR9zbwn69a+fj507t2Px4itwzz3/g8MOm4GSkhI0NjbgyisX4dvfvhqTJ09BaWkZDjts\nBm677VZ89tlW57X6YhvQ+++/x9kGdOHChVnbBtStILcEBbj1XW/heew5nsOe4znsHTyPPZd8Druz\nDWjy66VSkHPeRERE2dbb24C6MXgTERFlQW9vA+rGCV8iIqI8w+BNRESUZxi8iYiI8gyDNxERUZ5h\n8CYiIsozDN5ERER5hsGbiIgozzB4ExER5Zm8aY9KREREJmbeREREeYbBm4iIKM8weBMREeUZBm8i\nIqI8w+BNRESUZxi8iYiI8kxB7ue9YsUKvP/++xAEAUuXLsXUqVP7+pD6tU8++QSLFy/GpZdeiosv\nvhi7du3CLbfcAk3TUFVVhR//+Mfw+XxYs2YN/vCHP0AURZx33nk499xz+/rQ+417770Xb7/9NlRV\nxTe/+U0ceuihPIcHIBwOY8mSJdi3bx+i0SgWL16MCRMm8Bx2UyQSwRlnnIHFixdj9uzZPI8H4I03\n3sD111+PcePGAQDGjx+PK664Ivfn0Cgwb7zxhnHVVVcZhmEYW7ZsMc4777w+PqL+ra2tzbj44ouN\n2267zXj88ccNwzCMJUuWGC+88IJhGIZx//33GytXrjTa2tqM+fPnG83NzUY4HDZOP/10o6GhoS8P\nvd9Yt26dccUVVxiGYRj79+83jj/+eJ7DA/T8888bv/71rw3DMIwdO3YY8+fP5znsgQceeMA455xz\njFWrVvE8HqD169cb3/rWtzz39cU5LLhh83Xr1mHu3LkAgDFjxqCpqQmtra19fFT9l8/nw29+8xtU\nV1c7973xxhs4+eSTAQAnnngi1q1bh/fffx+HHnooQqEQAoEAZsyYgXfeeaevDrtfmTlzJn7yk58A\nAEpLSxEOh3kOD9Bpp52GK6+8EgCwa9cuDBo0iOewm7Zu3YotW7bghBNOAMD/z72hL85hwQXv+vp6\nVFRUOLcrKytRV1fXh0fUv8myjEAg4LkvHA7D5/MBAAYMGIC6ujrU19ejsrLSeQzPa4IkSQgGgwCA\nZ599FscddxzPYTctWLAAN910E5YuXcpz2E333HMPlixZ4tzmeTxwW7ZswdVXX40LLrgAr7/+ep+c\nw4Kc83Yz2B22R9KdP57Xjv72t7/h2Wefxe9+9zvMnz/fuZ/nsOuefPJJfPjhh7j55ps954fnsGue\ne+45HHbYYRg2bFjK7/M8ZjZy5Ehcd911OPXUU7F9+3YsWrQImqY538/VOSy44F1dXY36+nrn9t69\ne1FVVdWHR5R/gsEgIpEIAoEA9uzZg+rq6pTn9bDDDuvDo+xfXn31VfzqV7/Cb3/7W4RCIZ7DA7Rp\n0yYMGDAAQ4YMwcSJE6FpGoqLi3kOD9DLL7+M7du34+WXX8bu3bvh8/n4t3iABg0ahNNOOw0AMHz4\ncAwcOBAbN27M+TksuGHzOXPmYO3atQCAzZs3o7q6GiUlJX18VPnl6KOPds7h//3f/+HYY4/FtGnT\nsHHjRjQ3N6OtrQ3vvPMOjjjiiD4+0v6hpaUF9957Lx5++GGUl5cD4Dk8UG+99RZ+97vfATCnvtrb\n23kOu+HBBx/EqlWr8PTTT+Pcc8/F4sWLeR4P0Jo1a/DII48AAOrq6rBv3z6cc845OT+HBbmr2H33\n3Ye33noLgiBg2bJlmDBhQl8fUr+1adMm3HPPPdi5cydkWcagQYNw3333YcmSJYhGo6ipqcFdd90F\nRVHw0ksv4ZFHHoEgCLj44otx5pln9vXh9wtPPfUUfvrTn2LUqFHOfXfffTduu+02nsMuikQi+P73\nv49du3YhEonguuuuw5QpU3DrrbfyHHbTT3/6U9TW1uKYY47heTwAra2tuOmmm9Dc3Ix4PI7rrrsO\nEydOzPk5LMjgTURElM8KbticiIgo3zF4ExER5RkGbyIiojzD4E1ERJRnGLyJiIjyTME1aSHKN/fe\ney82btyIaDSKDz74ANOnTwcAfO1rX8NXv/rVLr3Gr3/9a4wfP97pZ53KwoUL8fvf/x6SJPXGYXvs\n2bMHn332GWbPnt3rr01UiLhUjChP7NixAxdeeCH+9a9/9fWhHLA1a9Zg69atuPHGG/v6UIgOCsy8\nifLYT3/6U+zYsQNffvklbr31VkQiEdx3333w+XyIRCJYtmwZJk+ejCVLluDwww/H7Nmzcc011+CY\nY47Bhg0b0NbWhocffhiDBg3CIYccgs2bN+OXv/wlGhsbsXv3bmzbtg1HHnkkbr/9dkSjUdx6663Y\nuXMnBg8eDEmSMGfOHM8exW1tbfjud7+L5uZmqKqKE088EWeccQYefPBBGIaB8vJyXHTRRbjzzjux\nbds2tLW14YwzzsDll1+O1atX469//SsEQcCePXswevRorFixAoqi9OEZJuqfOOdNlOd27NiBxx57\nDFOmTEFjYyN+8IMf4LHHHsOiRYvw8MMPd3j81q1bcc4552DlypWYOHEiXnzxxQ6P+eCDD/DQQw/h\n2WefxerVq9HU1IQ1a9ZAVVU888wzuOOOO/D66693eN6///1vqKqKP/7xj3jyyScRDAZRW1uLs88+\nG2eeeSYuu+wyPPbYY6iursbjjz+OZ555Bs8//zw++ugjAMDGjRv///bu2CW1MIzj+NcONQQRQi3W\nYnBsjDoSBFKNOVaEo0M4REO4HGyrKQin5ob+gDBaoiVyECEipakhWkKkQKFoiERPd5DOzYxLlysX\njvw+4+F5X97tx/PyHh7S6TSHh4eUy2VP3jKI/A/qvEU8bmJiAp/PB8DQ0BC7u7u8vb3x8vLC4OBg\nW73f78c0TQACgQBPT09tNZZlYRgGhmHg9/t5fn7m5uaG6elpAIaHh7Esq23d1NQUe3t7bGxsMDc3\nx8rKCj09rT3CxcUFDw8PXF5eAlCr1bi/v3fXf4xPnZyc5O7uzp2TLCK/KbxFPO7ztbJt22xvbzMz\nM8P5+bk7zOOzrw/Svnv28l2N4zgtQfw1lKE5y/j4+JhiscjZ2RnLy8scHR211PT19bG+vs7CwkLL\n90wmg+M4fzyXiDTp2lyki1QqFUzTpNFocHp6Sq1W69jeY2NjFItFAKrVKldXV201uVyObDaLZVnY\ntk1/fz/VahWfz0e9XgeaXf3HVb3jOOzs7Ljd//X1Na+vr7y/v1MoFBgfH+/Y+UW6iTpvkS6SSCSI\nx+MEAgFWV1exbZuDg4OO7L20tEQ2myUWizE6Oko4HG7r0IPBIKlUiv39fQzDIBKJMDIyQjgcJplM\n0tvby9raGre3t8RiMRqNBvPz8+6o1FAoxObmJqVSCdM0iUQiHTm7SLfRr2Ii8iOPj48UCgWi0SiO\n47C4uMjW1pb73/m/ymQy5PN50ul0R/YT6WbqvEXkRwYGBjg5OXHnE8/OznYsuEXk76jzFhER8Rg9\nWBMREfEYhbeIiIjHKLxFREQ8RuEtIiLiMQpvERERj1F4i4iIeMwvRph4T/csGFUAAAAASUVORK5C\nYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "metadata": { + "id": "HNqUFL4deCsL", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# 4. Case study: building an RNN\n" + ] + }, + { + "metadata": { + "id": "YkC1k4HEQ7rw", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "In this exercise we build and train a model similar to the RNNColorbot model that was used in the main Eager notebook. The model is adapted for converting and training in graph mode." + ] + }, + { + "metadata": { + "id": "7nkPDl5CTCNb", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "To get started, we load the colorbot dataset. The code is identical to that used in the other exercise and its details are unimportant." + ] + }, + { + "metadata": { + "id": "A0uREmVXCQEw", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def parse(line):\n", + " \"\"\"Parses a line from the colors dataset.\n", + " \n", + " Args:\n", + " line: A comma-separated string containing four items:\n", + " color_name, red, green, and blue, representing the name and\n", + " respectively the RGB value of the color, as an integer\n", + " between 0 and 255.\n", + "\n", + " Returns:\n", + " A tuple of three tensors (rgb, chars, length), of shapes: (batch_size, 3),\n", + " (batch_size, max_sequence_length, 256) and respectively (batch_size).\n", + " \"\"\"\n", + " items = tf.string_split([line], \",\").values\n", + " rgb = tf.string_to_number(items[1:], out_type=tf.float32) / 255.0\n", + " color_name = items[0]\n", + " chars = tf.one_hot(tf.decode_raw(color_name, tf.uint8), depth=256)\n", + " length = tf.cast(tf.shape(chars)[0], dtype=tf.int64)\n", + " return rgb, chars, length\n", + "\n", + "\n", + "def maybe_download(filename, work_directory, source_url):\n", + " \"\"\"Downloads the data from source url.\"\"\"\n", + " if not tf.gfile.Exists(work_directory):\n", + " tf.gfile.MakeDirs(work_directory)\n", + " filepath = os.path.join(work_directory, filename)\n", + " if not tf.gfile.Exists(filepath):\n", + " temp_file_name, _ = six.moves.urllib.request.urlretrieve(source_url)\n", + " tf.gfile.Copy(temp_file_name, filepath)\n", + " with tf.gfile.GFile(filepath) as f:\n", + " size = f.size()\n", + " print('Successfully downloaded', filename, size, 'bytes.')\n", + " return filepath\n", + "\n", + "\n", + "def load_dataset(data_dir, url, batch_size, training=True):\n", + " \"\"\"Loads the colors data at path into a tf.PaddedDataset.\"\"\"\n", + " path = maybe_download(os.path.basename(url), data_dir, url)\n", + " dataset = tf.data.TextLineDataset(path)\n", + " dataset = dataset.skip(1)\n", + " dataset = dataset.map(parse)\n", + " dataset = dataset.cache()\n", + " dataset = dataset.repeat()\n", + " if training:\n", + " dataset = dataset.shuffle(buffer_size=3000)\n", + " dataset = dataset.padded_batch(batch_size, padded_shapes=([None], [None, None], []))\n", + " return dataset\n", + "\n", + "\n", + "train_url = \"https://raw.githubusercontent.com/random-forests/tensorflow-workshop/master/extras/colorbot/data/train.csv\"\n", + "test_url = \"https://raw.githubusercontent.com/random-forests/tensorflow-workshop/master/extras/colorbot/data/test.csv\"\n", + "data_dir = \"tmp/rnn/data\"" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "waZ89t3DTUla", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Next, we set up the RNNColobot model, which is very similar to the one we used in the main exercise.\n", + "\n", + "Autograph doesn't fully support classes yet (but it will soon!), so we'll write the model using simple functions." + ] + }, + { + "metadata": { + "id": "9v8AJouiC44V", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def model_components():\n", + " lower_cell = tf.contrib.rnn.LSTMBlockCell(256)\n", + " lower_cell.build(tf.TensorShape((None, 256)))\n", + " upper_cell = tf.contrib.rnn.LSTMBlockCell(128)\n", + " upper_cell.build(tf.TensorShape((None, 256)))\n", + " relu_layer = tf.layers.Dense(3, activation=tf.nn.relu)\n", + " relu_layer.build(tf.TensorShape((None, 128)))\n", + " return lower_cell, upper_cell, relu_layer\n", + "\n", + "\n", + "def rnn_layer(chars, cell, batch_size, training):\n", + " \"\"\"A simple RNN layer.\n", + " \n", + " Args:\n", + " chars: A Tensor of shape (max_sequence_length, batch_size, input_size)\n", + " cell: An object of type tf.contrib.rnn.LSTMBlockCell\n", + " batch_size: Int, the batch size to use\n", + " training: Boolean, whether the layer is used for training\n", + "\n", + " Returns:\n", + " A Tensor of shape (max_sequence_length, batch_size, output_size).\n", + " \"\"\"\n", + " hidden_outputs = []\n", + " autograph.utils.set_element_type(hidden_outputs, tf.float32)\n", + " state, output = cell.zero_state(batch_size, tf.float32)\n", + " n = tf.shape(chars)[0]\n", + " i = 0\n", + " while i < n:\n", + " ch = chars[i]\n", + " cell_output, (state, output) = cell.call(ch, (state, output))\n", + " hidden_outputs.append(cell_output)\n", + " i += 1\n", + " hidden_outputs = hidden_outputs.stack()\n", + " if training:\n", + " hidden_outputs = tf.nn.dropout(hidden_outputs, 0.5)\n", + " return hidden_outputs\n", + "\n", + "\n", + "def model(inputs, lower_cell, upper_cell, relu_layer, batch_size, training):\n", + " \"\"\"RNNColorbot model.\n", + " \n", + " The model consists of two RNN layers (made by lower_cell and upper_cell),\n", + " followed by a fully connected layer with ReLU activation.\n", + " \n", + " Args:\n", + " inputs: A tuple (chars, length)\n", + " lower_cell: An object of type tf.contrib.rnn.LSTMBlockCell\n", + " upper_cell: An object of type tf.contrib.rnn.LSTMBlockCell\n", + " relu_layer: An object of type tf.layers.Dense\n", + " batch_size: Int, the batch size to use\n", + " training: Boolean, whether the layer is used for training\n", + " \n", + " Returns:\n", + " A Tensor of shape (batch_size, 3) - the model predictions.\n", + " \"\"\"\n", + " (chars, length) = inputs\n", + " chars_time_major = tf.transpose(chars, [1, 0, 2])\n", + " chars_time_major.set_shape((None, batch_size, 256))\n", + "\n", + " hidden_outputs = rnn_layer(chars_time_major, lower_cell, batch_size, training)\n", + " final_outputs = rnn_layer(hidden_outputs, upper_cell, batch_size, training)\n", + "\n", + " # Grab just the end-of-sequence from each output.\n", + " indices = tf.stack([length - 1, range(batch_size)], axis=1)\n", + " sequence_ends = tf.gather_nd(final_outputs, indices)\n", + " return relu_layer(sequence_ends)\n", + "\n", + "def loss_fn(labels, predictions):\n", + " return tf.reduce_mean((predictions - labels) ** 2)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "JjK4gXFvFsf4", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "The train and test functions are also similar to the ones used in the Eager notebook. Since the network requires a fixed batch size, we'll train in a single shot, rather than by epoch." + ] + }, + { + "metadata": { + "id": "ZWQMExk0S6X6", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "def train(optimizer, train_data, lower_cell, upper_cell, relu_layer, batch_size, num_steps):\n", + " iterator = train_data.make_one_shot_iterator()\n", + " step = 0\n", + " while step < num_steps:\n", + " labels, chars, sequence_length = iterator.get_next()\n", + " predictions = model((chars, sequence_length), lower_cell, upper_cell, relu_layer, batch_size, training=True)\n", + " loss = loss_fn(labels, predictions)\n", + " optimizer.minimize(loss)\n", + " if step % (num_steps // 10) == 0:\n", + " print('Step', step, 'train loss', loss)\n", + " step += 1\n", + " return step\n", + "\n", + "\n", + "def test(eval_data, lower_cell, upper_cell, relu_layer, batch_size, num_steps):\n", + " total_loss = 0.0\n", + " iterator = eval_data.make_one_shot_iterator()\n", + " step = 0\n", + " while step < num_steps:\n", + " labels, chars, sequence_length = iterator.get_next()\n", + " predictions = model((chars, sequence_length), lower_cell, upper_cell, relu_layer, batch_size, training=False)\n", + " total_loss += loss_fn(labels, predictions)\n", + " step += 1\n", + " print('Test loss', total_loss)\n", + " return total_loss\n", + "\n", + "\n", + "def train_model(train_data, eval_data, batch_size, lower_cell, upper_cell, relu_layer, train_steps):\n", + " optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n", + "\n", + " train(optimizer, train_data, lower_cell, upper_cell, relu_layer, batch_size, num_steps=tf.constant(train_steps))\n", + " test(eval_data, lower_cell, upper_cell, relu_layer, 50, num_steps=tf.constant(2))\n", + "\n", + " print('Colorbot is ready to generate colors!\\n\\n')\n", + " \n", + " # In graph mode, every op needs to be a dependent of another op.\n", + " # Here, we create a no_op that will drive the execution of all other code in\n", + " # this function. Autograph will add the necessary control dependencies.\n", + " return tf.no_op()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "iopcs5hXG2od", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Finally, we add code to run inference on a single input, which we'll read from the input.\n", + "\n", + "Note the `do_not_convert` annotation that lets us disable conversion for certain functions and run them as a `py_func` instead, so you can still call them from compiled code." + ] + }, + { + "metadata": { + "id": "DyU0wnnAFEYj", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } + } + }, + "cell_type": "code", + "source": [ + "@autograph.do_not_convert(run_as=autograph.RunMode.PY_FUNC)\n", + "def draw_prediction(color_name, pred):\n", + " pred = pred * 255\n", + " pred = pred.astype(np.uint8)\n", + " plt.axis('off')\n", + " plt.imshow(pred)\n", + " plt.title(color_name)\n", + " plt.show()\n", + "\n", + "\n", + "def inference(color_name, lower_cell, upper_cell, relu_layer):\n", + " _, chars, sequence_length = parse(color_name)\n", + " chars = tf.expand_dims(chars, 0)\n", + " sequence_length = tf.expand_dims(sequence_length, 0)\n", + " pred = model((chars, sequence_length), lower_cell, upper_cell, relu_layer, 1, training=False)\n", + " pred = tf.minimum(pred, 1.0)\n", + " pred = tf.expand_dims(pred, 0)\n", + " draw_prediction(color_name, pred)\n", + " # Create an op that will drive the entire function.\n", + " return tf.no_op()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Nt0Kv5OCHip0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Finally, we put everything together.\n", + "\n", + "Note that the entire training and testing code is all compiled into a single op (`tf_train_model`) that you only execute once! We also still use a `sess.run` loop for the inference part, because that requires keyboard input." + ] + }, + { + "metadata": { + "id": "-GmWa0GtYWdh", + "colab_type": "code", + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + }, + "output_extras": [ + { + "item_id": 12 + }, + { + "item_id": 13 + }, + { + "item_id": 14 + }, + { + "item_id": 15 + }, + { + "item_id": 16 + }, + { + "item_id": 17 + }, + { + "item_id": 18 + }, + { + "item_id": 19 + }, + { + "item_id": 20 + }, + { + "item_id": 21 + }, + { + "item_id": 23 + }, + { + "item_id": 24 + }, + { + "item_id": 25 + }, + { + "item_id": 26 + }, + { + "item_id": 27 + }, + { + "item_id": 28 + }, + { + "item_id": 29 + }, + { + "item_id": 30 + }, + { + "item_id": 31 + }, + { + "item_id": 32 + }, + { + "item_id": 33 + }, + { + "item_id": 34 + }, + { + "item_id": 35 + } + ], + "base_uri": "https://localhost:8080/", + "height": 668 + }, + "outputId": "61f4af1d-c81e-44db-9079-1a7b8ed8ce58", + "executionInfo": { + "status": "ok", + "timestamp": 1522345877153, + "user_tz": 240, + "elapsed": 75500, + "user": { + "displayName": "Dan Moldovan", + "photoUrl": "//lh5.googleusercontent.com/-Rneh8xjecyk/AAAAAAAAAAI/AAAAAAAACB4/c5vwsJpbktY/s50-c-k-no/photo.jpg", + "userId": "112023154726779574577" + } + } + }, + "cell_type": "code", + "source": [ + "def run_input_loop(sess, inference_ops, color_name_placeholder):\n", + " \"\"\"Helper function that reads from input and calls the inference ops in a loop.\"\"\"\n", + "\n", + " tb = widgets.TabBar([\"RNN Colorbot\"])\n", + " while True:\n", + " with tb.output_to(0):\n", + " try:\n", + " color_name = six.moves.input(\"Give me a color name (or press 'enter' to exit): \")\n", + " except (EOFError, KeyboardInterrupt):\n", + " break\n", + " if not color_name:\n", + " break\n", + " with tb.output_to(0):\n", + " tb.clear_tab()\n", + " sess.run(inference_ops, {color_name_placeholder: color_name})\n", + " plt.show()\n", + "\n", + "with tf.Graph().as_default():\n", + " # Read the data.\n", + " batch_size = 64\n", + " train_data = load_dataset(data_dir, train_url, batch_size)\n", + " eval_data = load_dataset(data_dir, test_url, 50, training=False)\n", + " \n", + " # Create the model components.\n", + " lower_cell, upper_cell, relu_layer = model_components()\n", + " # Create the helper placeholder for inference.\n", + " color_name_placeholder = tf.placeholder(tf.string, shape=())\n", + " \n", + " # Compile the train / test code.\n", + " tf_train_model = autograph.to_graph(train_model)\n", + " train_model_ops = tf_train_model(\n", + " train_data, eval_data, batch_size, lower_cell, upper_cell, relu_layer, train_steps=100)\n", + " \n", + " # Compile the inference code.\n", + " tf_inference = autograph.to_graph(inference)\n", + " inference_ops = tf_inference(color_name_placeholder, lower_cell, upper_cell, relu_layer)\n", + " \n", + " with tf.Session() as sess:\n", + " sess.run(tf.global_variables_initializer())\n", + " \n", + " # Run training and testing.\n", + " sess.run(train_model_ops)\n", + " \n", + " # Run the inference loop.\n", + " run_input_loop(sess, inference_ops, color_name_placeholder)" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "text": [ + "('Successfully downloaded', 'train.csv', 28010L, 'bytes.')\n", + "('Successfully downloaded', 'test.csv', 2414L, 'bytes.')\n", + "Step 0 train loss 0.37890616\n", + "Step 10 train loss 0.18515904\n", + "Step 20 train loss 0.0892782\n", + "Step 30 train loss 0.07883155\n", + "Step 40 train loss 0.08585831\n", + "Step 50 train loss 0.09302989\n", + "Step 60 train loss 0.089012615\n", + "Step 70 train loss 0.07275697\n", + "Step 80 train loss 0.06644974\n", + "Step 90 train loss 0.0854013\n", + "Test loss 0.13216865Colorbot is ready to generate colors!\n", + "\n", + "\n", + "\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"b102d936-3379-11e8-ac70-0242ac110002\"] = colab_lib.createTabBar({\"contentBorder\": [\"0px\"], \"borderColor\": [\"#a7a7a7\"], \"tabNames\": [\"RNN Colorbot\"], \"initialSelection\": 0, \"location\": \"top\", \"contentHeight\": [\"initial\"], \"elementId\": \"id1\"});\n", + "//# sourceURL=js_e223a56194" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"b103532a-3379-11e8-ac70-0242ac110002\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_b8c6a821fb" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"b105b28c-3379-11e8-ac70-0242ac110002\"] = google.colab.output.getActiveOutputArea();\n", + "//# sourceURL=js_44805e254b" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"b106197a-3379-11e8-ac70-0242ac110002\"] = document.querySelector(\"#id1_content_0\");\n", + "//# sourceURL=js_a63d3c6c47" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"b1069f44-3379-11e8-ac70-0242ac110002\"] = google.colab.output.setActiveOutputArea(window[\"b106197a-3379-11e8-ac70-0242ac110002\"]);\n", + "//# sourceURL=js_7e203b8bce" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"b1070f38-3379-11e8-ac70-0242ac110002\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_d53293d4a7" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c6d90d5c-3379-11e8-ac70-0242ac110002\"] = google.colab.output.setActiveOutputArea(window[\"b105b28c-3379-11e8-ac70-0242ac110002\"]);\n", + "//# sourceURL=js_3000dc2c05" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c6da872c-3379-11e8-ac70-0242ac110002\"] = google.colab.output.getActiveOutputArea();\n", + "//# sourceURL=js_4136f669a3" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c6dac868-3379-11e8-ac70-0242ac110002\"] = document.querySelector(\"#id1_content_0\");\n", + "//# sourceURL=js_2f70dd9aee" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c6db07d8-3379-11e8-ac70-0242ac110002\"] = google.colab.output.setActiveOutputArea(window[\"c6dac868-3379-11e8-ac70-0242ac110002\"]);\n", + "//# sourceURL=js_7226726048" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c6dcc6fe-3379-11e8-ac70-0242ac110002\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_72e7709865" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVQAAAFZCAYAAADHDNdrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAB9JJREFUeJzt3E1Lle0ax+HTF4jeEAyMBhE0DawI\nwsCH0AIlaGBWNJBo0CDoA0TQhmDXuKAGDioiCA2KlEAlnl05FD9Co8BeaGCQoBDa2jPZsXt4Bvu/\n0+o4Rmvd1zW4rsmP84bFamo0Go0C4H/WvNYHAPhVCCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKDy\nUxgeHq5Dhw7V4OBgPXz4sHp7e+vWrVt15cqVOnnyZN2/f78ajUbdvn27+vr6qqenp65du1YrKytV\nVfXhw4e6cOFC9fX1VV9fX01PT1dV1dzcXHV3d9eDBw/q+PHj9ccff9TExMRaXpWfWOtaHwD+zuvX\nr+vOnTs1MTFRbW1tdf78+dW16enpGh8fr/b29hobG6upqal6/Phxbdy4sS5evFgjIyM1NDRUly5d\nqv3799fw8HC9efOmTp8+XVNTU1VV9enTp2pubq5nz57V5ORk3bhxo44dO7ZW1+UnZkJl3Zudna2D\nBw9WR0dHbdiwoQYHB1fX9u7dW+3t7VVV9fLlyxocHKytW7dWa2trnTp1qp4/f16Li4s1MzNT586d\nq6qqXbt21YEDB1an1OXl5Tpx4kRVVe3Zs6fevXv3Yy/IL8OEyrr3+fPnamtrW/2+ffv21c//+Xxh\nYaHu3r1bjx49qqqqlZWVam9vr4WFhWo0GnXmzJnVvYuLi9XV1VVVVS0tLbVp06aqqmpubq6vX7/+\nX+/Dr0tQWfe2bNlSi4uLq98/fvz43X0dHR3V29tbQ0ND3zxfXl6ulpaWevLkSW3evPmbtbm5ufyB\n+W155Wfd6+zsrJmZmZqfn68vX77U2NjYd/cdOXKkxsfHa2lpqaqqRkdH6+nTp9Xa2lqHDx+u0dHR\nqqpaWlqqy5cv1/v373/YHfg9CCrrXmdnZw0MDNTAwECdPXu2enp6vrvv6NGj1dPTUwMDA9Xf318v\nXryo7u7uqqq6evVqzc7OVn9/fw0MDNTOnTtrx44dP/Ia/Aaa/B8qP4NGo1FNTU1VVfXq1au6efPm\nX06qsFZMqKx78/Pz1dXVVW/fvq1Go1GTk5O1b9++tT4W/BcTKj+FkZGRunfvXjU1NdXu3bvr+vXr\ntW3btrU+FnxDUAFCvPIDhAgqQMi6+WH/kX8eXesjAPytf/3jz79cM6EChAgqQIigAoQIKkCIoAKE\nCCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQI\nKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgq\nQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpA\niKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCI\noAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIig\nAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAC\nhAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKE\nCCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQI\nKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgq\nQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpA\niKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCI\noAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIig\nAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAC\nhAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKE\nCCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQI\nKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgq\nQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpA\niKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkCIoAKECCpAiKAChAgqQIigAoQIKkBI\nU6PRaKz1IQB+BSZUgBBBBQgRVIAQQQUIEVSAEEEFCBFUgBBBBQgRVIAQQQUIEVSAEEEFCBFUgBBB\nBQgRVIAQQQUIEVSAEEEFCBFUgBBBBQgRVIAQQQUIEVSAkH8D1Aj8lNhhe7QAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1", + "user_output" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c70592aa-3379-11e8-ac70-0242ac110002\"] = google.colab.output.setActiveOutputArea(window[\"c6da872c-3379-11e8-ac70-0242ac110002\"]);\n", + "//# sourceURL=js_25c3aaf79a" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c70842c0-3379-11e8-ac70-0242ac110002\"] = google.colab.output.getActiveOutputArea();\n", + "//# sourceURL=js_984c56b816" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c708dec4-3379-11e8-ac70-0242ac110002\"] = document.querySelector(\"#id1_content_0\");\n", + "//# sourceURL=js_e0451a1217" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c7092726-3379-11e8-ac70-0242ac110002\"] = google.colab.output.setActiveOutputArea(window[\"c708dec4-3379-11e8-ac70-0242ac110002\"]);\n", + "//# sourceURL=js_7aa23d7385" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c7099044-3379-11e8-ac70-0242ac110002\"] = window[\"id1\"].setSelectedTabIndex(0);\n", + "//# sourceURL=js_5722756ddb" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + }, + { + "output_type": "stream", + "text": [ + "Give me a color name (or press 'enter' to exit): \n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "window[\"c7baac12-3379-11e8-ac70-0242ac110002\"] = google.colab.output.setActiveOutputArea(window[\"c70842c0-3379-11e8-ac70-0242ac110002\"]);\n", + "//# sourceURL=js_cdd622e58f" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [ + "id1_content_0", + "outputarea_id1" + ] + } + } + ] + }, + { + "metadata": { + "id": "AHJ2c47U-A5W", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Where do we go next?\n", + "\n", + "Autograph is available in tensorflow.contrib, but it's still in its early stages. We're excited about the possibilities it brings — write your machine learning code in the flexible Eager style, but still enjoy all the benefits that come with running in graph mode. A beta version will be available soon -- stay tuned!" + ] + } + ] +} \ No newline at end of file -- GitLab From ebba4f0dfdd4a1a42eba4a59d32222532beec031 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Fri, 30 Mar 2018 04:26:58 +0800 Subject: [PATCH 696/960] Fix math equation format in tf.contrib.bayesflow.monte_carlo (#18089) * Fix math equation format in contrib\bayesflow * Fix minor pylint error --- .../bayesflow/python/ops/monte_carlo_impl.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py index 985177e897..d193a8459d 100644 --- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py @@ -44,14 +44,14 @@ def expectation_importance_sampler(f, n=None, seed=None, name='expectation_importance_sampler'): - r"""Monte Carlo estimate of `E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]`. + r"""Monte Carlo estimate of `\\(E_p[f(Z)] = E_q[f(Z) p(Z) / q(Z)]\\)`. - With `p(z) := exp{log_p(z)}`, this `Op` returns + With `\\(p(z) := exp^{log_p(z)}\\)`, this `Op` returns ``` - n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ], z_i ~ q, - \approx E_q[ f(Z) p(Z) / q(Z) ] - = E_p[f(Z)] + \\(n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ], z_i ~ q,\\) + \\(\approx E_q[ f(Z) p(Z) / q(Z) ]\\) + \\(= E_p[f(Z)]\\) ``` This integral is done in log-space with max-subtraction to better handle the @@ -95,9 +95,9 @@ def expectation_importance_sampler(f, log_values = log_f_z + log_p_z - q_log_prob_z return _logspace_mean(log_values) - # With f_plus(z) = max(0, f(z)), f_minus(z) = max(0, -f(z)), - # E_p[f(Z)] = E_p[f_plus(Z)] - E_p[f_minus(Z)] - # = E_p[f_plus(Z) + 1] - E_p[f_minus(Z) + 1] + # With \\(f_{plus}(z) = max(0, f(z)), f_{minus}(z) = max(0, -f(z))\\), + # \\(E_p[f(Z)] = E_p[f_{plus}(Z)] - E_p[f_{minus}(Z)]\\) + # \\( = E_p[f_{plus}(Z) + 1] - E_p[f_{minus}(Z) + 1]\\) # Without incurring bias, 1 is added to each to prevent zeros in logspace. # The logarithm is approximately linear around 1 + epsilon, so this is good # for small values of 'z' as well. @@ -121,13 +121,13 @@ def expectation_importance_sampler_logspace( name='expectation_importance_sampler_logspace'): r"""Importance sampling with a positive function, in log-space. - With `p(z) := exp{log_p(z)}`, and `f(z) = exp{log_f(z)}`, this `Op` - returns + With `\\(p(z) := exp^{log_p(z)}\\)`, and `\\(f(z) = exp{log_f(z)}\\)`, + this `Op` returns ``` - Log[ n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ] ], z_i ~ q, - \approx Log[ E_q[ f(Z) p(Z) / q(Z) ] ] - = Log[E_p[f(Z)]] + \\(Log[ n^{-1} sum_{i=1}^n [ f(z_i) p(z_i) / q(z_i) ] ], z_i ~ q,\\) + \\(\approx Log[ E_q[ f(Z) p(Z) / q(Z) ] ]\\) + \\(= Log[E_p[f(Z)]]\\) ``` This integral is done in log-space with max-subtraction to better handle the @@ -196,12 +196,12 @@ def _logspace_mean(log_values): def expectation(f, samples, log_prob=None, use_reparametrization=True, axis=0, keep_dims=False, name=None): - """Computes the Monte-Carlo approximation of `E_p[f(X)]`. + """Computes the Monte-Carlo approximation of `\\(E_p[f(X)]\\)`. This function computes the Monte-Carlo approximation of an expectation, i.e., ```none - E_p[f(X)] approx= m**-1 sum_i^m f(x_j), x_j ~iid p(X) + \\(E_p[f(X)] \approx= m^{-1} sum_i^m f(x_j), x_j\ ~iid\ p(X)\\) ``` where: @@ -216,8 +216,8 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, parameterless distribution (e.g., `Normal(Y; m, s) <=> Y = sX + m, X ~ Normal(0,1)`), we can swap gradient and expectation, i.e., - `grad[ Avg{ s_i : i=1...n } ] = Avg{ grad[s_i] : i=1...n }` where - `S_n = Avg{s_i}` and `s_i = f(x_i), x_i ~ p`. + `grad[ Avg{ \\(s_i : i=1...n\\) } ] = Avg{ grad[\\(s_i\\)] : i=1...n }` where + `S_n = Avg{\\(s_i\\)}` and `\\(s_i = f(x_i), x_i ~ p\\)`. However, if p is not reparameterized, TensorFlow's gradient will be incorrect since the chain-rule stops at samples of non-reparameterized distributions. @@ -296,7 +296,8 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, Args: f: Python callable which can return `f(samples)`. samples: `Tensor` of samples used to form the Monte-Carlo approximation of - `E_p[f(X)]`. A batch of samples should be indexed by `axis` dimensions. + `\\(E_p[f(X)]\\)`. A batch of samples should be indexed by `axis` + dimensions. log_prob: Python callable which can return `log_prob(samples)`. Must correspond to the natural-logarithm of the pdf/pmf of each sample. Only required/used if `use_reparametrization=False`. @@ -316,7 +317,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, Returns: approx_expectation: `Tensor` corresponding to the Monte-Carlo approximation - of `E_p[f(X)]`. + of `\\(E_p[f(X)]\\)`. Raises: ValueError: if `f` is not a Python `callable`. -- GitLab From 622c0416bd6a00f9baf53e54a65ad5e5d3b87e30 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 13:24:38 -0700 Subject: [PATCH 697/960] Updating a test in constant_folding_test.cc that uses a graph with placeholder nodes by providing input to those nodes. This will allow evaluation of the fetch nodes in the optimized and original graph and check whether the output tensors produced by them are the same. PiperOrigin-RevId: 190976595 --- .../optimizers/constant_folding_test.cc | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index e0ff9b17b1..16a19ba8ce 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -82,6 +82,14 @@ class ConstantFoldingTest : public GrapplerTest { } }; +template +Tensor GetRandomTensor(const TensorShape& shape) { + typedef typename EnumToDataType::Type T; + Tensor tensor(DTYPE, shape); + tensor.flat() = tensor.flat().random(); + return tensor; +} + TEST_F(ConstantFoldingTest, SimpleFolding) { // Build a simple graph with a few trivially prunable ops. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); @@ -371,6 +379,23 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(2, t.tensor_shape().dim(1).size()); } } + auto a_t = GetRandomTensor(TensorShape({3, 2})); + auto b_t = GetRandomTensor(TensorShape({2, 3})); + auto x_t = GetRandomTensor(TensorShape({2, 2})); + auto y_t = GetRandomTensor(TensorShape({2, 2})); + auto bias_t = GetRandomTensor(TensorShape({2})); + + auto tensors_expected = EvaluateNodes( + item.graph, item.fetch, + {{"x", x_t}, {"y", y_t}, {"a", a_t}, {"b", b_t}, {"bias", bias_t}}); + EXPECT_EQ(item.fetch.size(), tensors_expected.size()); + auto tensors = EvaluateNodes( + output, item.fetch, + {{"x", x_t}, {"y", y_t}, {"a", a_t}, {"b", b_t}, {"bias", bias_t}}); + EXPECT_EQ(item.fetch.size(), tensors.size()); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectTensorNear(tensors_expected[i], tensors[i], 1e-6); + } } } -- GitLab From 405efdd47c20919e5a05c86b0ae2e6c8c150e534 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 13:27:01 -0700 Subject: [PATCH 698/960] Use GraphProperties directly in ArithmeticOptimizer. PiperOrigin-RevId: 190976918 --- .../optimizers/arithmetic_optimizer.cc | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 5dd0b6f4b0..36b26c18f9 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -196,8 +196,6 @@ void SetSourceDataType(DataType dtype, NodeDef* node) { bool IsNumberType(DataType dtype) { return kNumberTypes.Contains(dtype); } -const char kOutputShapesAttr[] = "_output_shapes"; - // Shape is symbolically defined if it has a known rank, and each dimension is // defined, or is an unknown symbol (dim.size <= -2). bool ShapeIsSymbolicallyDefined(const TensorShapeProto& shape) { @@ -234,16 +232,20 @@ bool ShapesSymbolicallyEqual(const OpInfo::TensorProperties& left, // Returns whether `reshape` is an identity op. The tensor that `reshape` // reshapes is the `output_pos`-th output of node `input`. bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, - const int output_pos) { - if (!reshape.attr().count(kOutputShapesAttr) || - !input.attr().count(kOutputShapesAttr)) { + const int output_pos, + const GraphProperties& graph_properties) { + const std::vector& reshape_props = + graph_properties.GetOutputProperties(reshape.name()); + const std::vector& input_props = + graph_properties.GetOutputProperties(input.name()); + if (reshape_props.empty() || input_props.empty() || + input_props.size() <= output_pos) { return false; } - PartialTensorShape src_shape( - input.attr().at(kOutputShapesAttr).list().shape(output_pos)); - PartialTensorShape dst_shape( - reshape.attr().at(kOutputShapesAttr).list().shape(0)); + const PartialTensorShape& src_shape = input_props[output_pos].shape(); + const PartialTensorShape& dst_shape = reshape_props[0].shape(); + if (src_shape.unknown_rank() || dst_shape.unknown_rank()) { return false; } @@ -256,7 +258,8 @@ bool ReshapeIsIdentity(const NodeDef& reshape, const NodeDef& input, // sizes. auto num_unknown_dim_sizes = [](const PartialTensorShape& partial_shape) { auto dim_sizes = partial_shape.dim_sizes(); - return std::count(dim_sizes.begin(), dim_sizes.end(), -1); + return std::count_if(dim_sizes.begin(), dim_sizes.end(), + [](int dim) { return dim < 0; }); }; int src_num_unknown_dim_sizes = num_unknown_dim_sizes(src_shape); int dst_num_unknown_dim_sizes = num_unknown_dim_sizes(dst_shape); @@ -1272,7 +1275,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( // outputs tensors of shape [M, N] while feeding it with tensors of shape // [M*N] (or worse). The reshape nodes are then necessary to update the // tensor metadata to the required shape. - if (ReshapeIsIdentity(*reshape, *input, output_pos)) { + if (ReshapeIsIdentity(*reshape, *input, output_pos, *graph_properties_)) { return reshape->input(0); } } @@ -1695,18 +1698,11 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, // Shapes are only needed in aggressive mode. graph_properties_.reset(new GraphProperties(item)); TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); - // TODO(ezhulenev): Use GraphProperties to lookup tensor shapes directly - TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_)); // Perform the optimizations. DedupComputations(); TF_RETURN_IF_ERROR(SimplifyArithmeticOps()); - // Clear output shapes. - for (int i = 0; i < optimized_graph->node_size(); ++i) { - optimized_graph_->mutable_node(i)->mutable_attr()->erase(kOutputShapesAttr); - } - return Status::OK(); } -- GitLab From 0390fbec15f3d99c3badce3d666893ff124f7846 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Thu, 29 Mar 2018 13:28:05 -0700 Subject: [PATCH 699/960] Docs: Move TFLite docs into tensorflow.org PiperOrigin-RevId: 190977057 --- tensorflow/contrib/lite/README.md | 240 +----------------- .../lite/g3doc/TFLite-Architecture.jpg | Bin 48710 -> 0 bytes tensorflow/docs_src/mobile/leftnav_files | 1 + .../docs_src/mobile/tflite/demo_android.md | 156 ++++++++++-- tensorflow/docs_src/mobile/tflite/demo_ios.md | 2 +- tensorflow/docs_src/mobile/tflite/devguide.md | 224 ++++++++++++++++ tensorflow/docs_src/mobile/tflite/index.md | 4 +- 7 files changed, 363 insertions(+), 264 deletions(-) delete mode 100644 tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg create mode 100644 tensorflow/docs_src/mobile/tflite/devguide.md diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index c15ae3f233..a676b705f1 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -1,238 +1,8 @@ # TensorFlow Lite -TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded devices. It enables low-latency inference of on-device machine learning models with a small binary size and fast performance supporting hardware acceleration. -TensorFlow Lite uses many techniques for achieving low latency like optimizing the kernels for specific mobile apps, pre-fused activations, quantized kernels that allow smaller and faster (fixed-point math) models, and in the future, leverage specialized machine learning hardware to get the best possible performance for a particular model on a particular device. +TensorFlow Lite is TensorFlow's lightweight solution for mobile and embedded +devices. It enables low-latency inference of on-device machine learning models +with a small binary size and fast performance supporting hardware acceleration. -![image](g3doc/TFLite-Architecture.jpg) -# Getting Started with an Android Demo App - -This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using either a quantized Mobilenet model or a floating point Inception-v3 model. A device running Android 5.0 ( API 21) or higher is required to run the demo. - -There are 3 ways to get the demo app to your device - - Download the prebuilt binary or - - Use Android Studio to build the application or - - Download the source code for TensorFlow Lite and the demo and build it using bazel - -## Description -In the demo app, inference is done using the TensorFlow Lite Java API. The demo app classifies frames in real-time, displaying the top most probable classifications. It also displays the time taken to detect the object. - -## Downloading the pre-built binary -The fastest path to trying the demo, is to download the pre-built binary -[TfLiteCameraDemo.apk](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) - -Once the apk is installed, click the app icon to start the app. The first-time the app is opened, the app asks for runtime permissions to access the device camera. The demo app opens the back-camera of the device and recognizes the objects in the camera's field of view. At the bottom of the image (or at the left of the image if the device is in landscape mode), it shows the latency of classification and the top three objects classified. - -## Building in Android Studio using TensorFlow Lite AAR from JCenter -The simplest way to compile the demo app, and try out changes to the project code is to use AndroidStudio. - - - Install the latest version of Android Studio 3 as specified [here](https://developer.android.com/studio/index.html). - - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings). - - Import the `tensorflow/contrib/lite/java/demo` directory as a new Android Studio project. - - Click through installing all the Gradle extensions it requests. - - Either - - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) - - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory: - `tensorflow/contrib/lite/java/demo/app/src/main/assets/` - - Or download the floating point Inception-v3 model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) - - unzip and copy inceptionv3_non_slim_2015.tflite to the assets directory - - change the chosen classifier in [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java) from - `classifier = new ImageClassifierQuantizedMobileNet(getActivity());` - to - `classifier = new ImageClassifierFloatInception(getActivity());` - - Build and run the demo app - -## Building TensorFlow Lite and the demo app from source - -### Clone the TensorFlow repo -- git clone - [https://github.com/tensorflow/tensorflow](https://github.com/tensorflow/tensorflow) - -### Install Bazel -If bazel is not installed on your system, install it now by following [these directions](https://bazel.build/versions/master/docs/install.html) - -NOTE: Bazel does not fully support building Android on Windows yet. Full support for Gradle/CMake builds is coming soon, but in the meantime Windows users should download the [prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) instead. - -### Install Android NDK and SDK -Bazel is the primary build system for TensorFlow. Bazel and the Android NDK and SDK must be installed on your system. - - Install the latest version of Bazel as per the instructions on the [Bazel website](https://bazel.build/versions/master/docs/install.html) - - The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The current recommended version is 14b, which can be found [here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads). - - The Android SDK and build tools may be obtained [here](https://developer.android.com/tools/revisions/build-tools.html), or alternatively as part of [Android Studio](https://developer.android.com/studio/index.html). Build tools API >= 23 is required to build the TF Android demo (though it will run on API >= 21 devices). - - In the root of the TensorFlow repository update the `WORKSPACE` file with the `api_level` and location of the SDK and NDK. If you installed it with AndroidStudio the SDK path can be found in the SDK manager, and the default NDK path is:`{SDK path}/ndk-bundle.` - -``` -android_sdk_repository ( - name = "androidsdk", - api_level = 23, - build_tools_version = "23.0.2", - path = "/home/xxxx/android-sdk-linux/", -) - -android_ndk_repository( - name = "androidndk", - path = "/home/xxxx/android-ndk-r10e/", - api_level = 19, -) -``` - -Additional details on building with Android can be found [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md). - -### Build the source code -Run bazel with the following command to build the demo. - -Build the demo app: - -``` -bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo -``` - -### Note - -Currently, we only support building the Android demo app within a Python 2 -environment (due to a Bazel bug). - -### More about the demo -The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used (299 * 299 for Inception-v3). The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. 224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. Both models have 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The model file must be downloaded and bundled within the assets directory of the app. - -# iOS Demo App - -Similar to the Android demo app, there's an iOS camera app that uses exactly the same model (224 * 224 quantized Mobilenet). - -This demo app requires a camera so it doesn't work with simulators. It need to be executed on a real iOS device. Follow the instructions to build and run the demo app: - -1. Run `tensorflow/contrib/lite/examples/ios/download_models.sh` to download the model files used by the demo app. -1. Install [CocoaPods](https://cocoapods.org/) if it wasn't installed yet: `sudo gem install cocoapods`. -1. Run `pod install` in `tensorflow/contrib/lite/examples/ios/camera` to generate the workspace file. -1. Open the project by running `open tflite_camera_example.xcworkspace`, and build the app in XCode. - -# TensorFlow Lite Quick Start - -## Step 1. Decide which GraphDef to use - Depending on the use case, the developer may choose to use one of the popular - open-sourced models such as InceptionV3 or MobileNets, re-train these models - with their own custom data set or even build their own custom model. - -### Using a pre-trained model - -[MobileNets](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html) is a family of mobile-first computer vision models for [TensorFlow](https://www.tensorflow.org/) designed to effectively maximize accuracy while being mindful of the restricted resources for an on-device or embedded application. MobileNets are small, low-latency, low-power models parameterized to meet the resource constraints of a variety of use cases. They can be built upon for classification, detection, embeddings and segmentation similar to how other popular large scale models, such as [Inception](https://arxiv.org/pdf/1602.07261.pdf), are used. Google provides 16 pre-trained [ImageNet](http://www.image-net.org/challenges/LSVRC/) classification checkpoints for MobileNets for use in mobile projects of all sizes. - -[Inception-v3](https://arxiv.org/abs/1512.00567) is an image recognition model which achieves fairly high accuracy in recognizing general objects with 1000 classes, like "Zebra", "Dalmatian", and "Dishwasher". The model extracts general features from input images using a convolutional neural network and classifies them based on those features with fully-connected and softmax layers. - -[On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) is an on-device model which provides one-touch replies for an incoming text message by suggesting contextually relevant messages. The model is built specifically for memory constrained devices such as watches & phones and it has been successfully used to surface [Smart Replies on Android Wear](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html). Note that this model only works on Android as of now. - -These pre-trained models can be downloaded from [here](g3doc/models.md). - -### Retrain Inception-V3 or MobileNet for a custom data set -The above pre-trained models have been trained on the ImageNet data set, which consists of 1000 predefined classes. A model will need to be re-trained if these classes are not relevant or useful for a given use case. This technique is called transfer learning, which starts with a model that has been already trained on a problem and will then be retrained on a similar problem. Deep learning from scratch can take days, but transfer learning can be done fairly quickly. In order to do this, a developer will need to generate their custom data set labeled with the relevant classes. - -The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) codelab walks through this process step-by-step. The retraining code supports retraining for both floating point and quantized inference. - -# Getting started with RaspberryPi - -Using RaspberryPi can be accomplished by following the [Makefile instructions](g3doc/rpi.md). That will give a you a static library (.a) that you can build your app against. Python bindings will be coming soon as well as a demo app. - -### Train a custom model -A developer may choose to train a custom model using Tensorflow. TensorFlow documentation has [several tutorials](https://www.tensorflow.org/tutorials/) for building and training models. If the user has written a model using TensorFlow's Slim Framework the first step is to export this to a GraphDef file. This is necessary because Slim does not store the model structure outside the code, so to communicate with other parts of the framework it needs to be exported. Documentation for the export can be found [here](https://github.com/tensorflow/models/tree/master/research/slim#Export). The output of this step will be a .pb file for the custom model. - -TensorFlow Lite currently supports a subset of TensorFlow operators. Please refer to [this document](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for details of supported operators and their usage. This -set will continue to expand in future releases of Tensorflow Lite. - - -## Step 2. Model format conversion - -The model generated in Step 1 is a standard Tensorflow model. After the completion of Step 1 a user should have a standard .pb or .pbtxt GraphDef file. If the application developer is using a pre-trained model (as defined in Step 1 above), they can download a ready to use, already converted model for use from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/models.md). Models generated using retraining (aka transfer learning) or custom models will need to be converted using the steps mentioned below. - -A prerequisite to converting the model to the Tensorflow Lite format is to freeze the graph. - -Since we employ several formats, the following definitions may be useful: - - GraphDef (.pb) - a protobuf that represents the TensorFlow training and or computation graph. This contains operators, tensors, and variables definitions. - - - CheckPoint (.ckpt) - Serialized variables from a TensorFlow graph. Note, this does not contain the graph structure, so alone it cannot typically be interpreted. - - - FrozenGraphDef - a subclass of GraphDef that contains no variables. A GraphDef can be converted to a frozen graphdef by taking a checkpoint and a graphdef and converting every variable into a constant with the value looked up in the checkpoint. - - - SavedModel - A collection of GraphDef and CheckPoint together with a signature that labels input and output arguments to a model. A GraphDef and Checkpoint can be extracted from a saved model. - - - TensorFlow lite model (.tflite) - a serialized flatbuffer, containing TensorFlow lite operators and Tensors for the TensorFlow lite interpreter. This is most analogous to TensorFlow frozen GraphDefs. - -### Freeze Graph -To use this .pb GraphDef file within TensorFlow Lite, the application developer will need checkpoints containing trained weight parameters. The .pb contains only the structure of the graph. The process of merging the checkpoint values with the graph structure is known as "freezing" the graph. - -The developer should know where the checkpoints folder is present or checkpoints can also be downloaded for a pre-trained model (Example: Here is a link to the [MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)). - -Graph freezing can be done using the command below (and modifying the arguments appropriately) - -``` -bazel build tensorflow/python/tools:freeze_graph - -bazel-bin/tensorflow/python/tools/freeze_graph\ - --input_graph=/tmp/mobilenet_v1_224.pb \ - --input_checkpoint=/tmp/checkpoints/mobilenet-10202.ckpt \ - --input_binary=true --output_graph=/tmp/frozen_mobilenet_v1_224.pb \ - --output_node_names=MobilenetV1/Predictions/Reshape_1 -``` - -The user has to first build the freeze_graph script using bazel and then run the script. The input_binary flag has to be enabled to ensure that the protobuf is read and written in binary format. The user has to input the .pb and the .ckpt files to freeze the graph The output_node_names may not be obvious outside of the code that built the model. The easiest way to find them is to visualize the graph, either with -graphviz, or [in tensorboard](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/#3). - -This frozen Graphdef is now ready to be converted to flatbuffer format (.tflite) for use on Android or iOS. On Android users have the flexibility to use either the float or quantized versions of the frozen graphdef, if available, using the Tensorflow Optimizing Converter tool. - -Here is a sample command line to convert the frozen Graphdef to '.tflite' format for The Tensorflow Optimizing Converter supports both float and quantized models, however, different configuration parameters are needed depending on whether a FLOAT or QUANTIZED mode is being used. -(Here is a link to the pb [file](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz)). - -``` -bazel build tensorflow/contrib/lite/toco:toco - -bazel-bin/tensorflow/contrib/lite/toco/toco \ - --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ - --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ - --output_file=/tmp/mobilenet_v1_1.0_224.tflite --inference_type=FLOAT \ - --input_type=FLOAT --input_arrays=input \ - --output_arrays=MobilenetV1/Predictions/Reshape_1 --input_shapes=1,224,224,3 -``` - -- The input_file argument should point to the frozen GraphDef file that holds the model architecture. -- The output_file argument should point to where the TensorFlow Lite model file should be generated. -- The input_type and inference_type arguments should be set to FLOAT, unless converted a [quantized](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/) model. -- Setting the input_array, output_array and input_shape arguments are a bit trickier. The easiest way to find these values is to explore the graph in tensorboard . The user should reuse the arguments that were used for specifying the output nodes for inference in the `freeze_graph`step. - -Note, it is also possible to use the Tensorflow Optimizing Converter through protos either from Python or from the command line see the -documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/python/toco_from_protos.py). A developer can then integrate the conversion step into their model design workflow to ensure that a model will be easily convertible to a mobile inference graph. For example, - -```python -import tensorflow as tf - -img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) -val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) -out = tf.identity(val, name="out") -with tf.Session() as sess: - tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) - open("converteds_model.tflite", "wb").write(tflite_model) - -``` -For detailed instructions on how to use the Tensorflow Optimizing Converter, please see [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). - -You may refer to the [Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) for troubleshooting help. If that doesn't help, please file an [issue](https://github.com/tensorflow/tensorflow/issues). - -If you would like to see a visual description of your TensorFlow Lite model after conversion, you can use tensorflow/contrib/lite/tools/visualize.py by running -```sh -bazel run tensorflow/contrib/lite/tools:visualize -- model.tflite model_viz.html -``` -and then visualize the resulting HTML file in a browser. - -## Step 3. Use the TensorFlow Lite model for inference in a mobile app - -After completion of Step 2 the developer should have a .tflite model. - -### For Android -Because Android apps need to be written in Java, and core TensorFlow is in C++, a JNI library is provided to interface between the two. Its interface is aimed only at inference, so it provides the ability to load a graph, set up inputs, and run the model to calculate particular outputs. The full documentation for the set of methods can be seen [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/). The demo app is also open sourced on [github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). - -The [demo app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app) uses this interface, so it's a good place to look for example usage. You can also download the prebuilt binary [here](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). - -Note that you'd need to follow instructions for installing TensorFlow on Android, setting up bazel and Android Studio outlined [here](https://www.tensorflow.org/mobile/android_build). - -### For iOS -Follow the documentation [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) to get integrate a TFLite model into your app. - -## Core ML support - -Core ML is a machine learning framework used across Apple products. In addition to using Tensorflow Lite models directly in their applications, developers have the option to convert their trained Tensorflow models to the [CoreML](https://developer.apple.com/machine-learning/) format for use on Apple devices. For information on how to use the converter please refer to the [Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml). +See the documentation: https://www.tensorflow.org/mobile/tflite/ +Documentation edits can be made here: [tensorflow/docs_src/mobile/tflite](../../docs_src/mobile/tflite) diff --git a/tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg b/tensorflow/contrib/lite/g3doc/TFLite-Architecture.jpg deleted file mode 100644 index bc83946647c6a923a8a0bd3a041b42e4febe6a31..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 48710 zcmex=oIr{vTivovIz$!vMUve7&T5@$f4}C@t|nX z#SbdRNkvVZTw>x9l2WQ_>Kd9_CZ=ZQ7M51dF0O9w9-dyoA)#U65s^{JDXD4c8JStd zC8cHM6_r)ZEv;?s9i3g1CQq3GGAU*RJ2VdF$b$$4{OPfBE|D`;VW$K>lK6U=!0ld(M2vX z6_bamA3_l z2KW9s^}peD{?{gb|3uhd5yr+*!$*TZdX|KG2@sy;|tI(UwiTr}yrfW3cYnj`{8NpZM~>&;QThBzil5(NzQ$BbS-) zR=0j!|EDKP{r7){#|~0c0$mwIsD?vIyT4vGS>3((s`yj6wYM7d1LMr)wyro{u(@{E zgQ>YUjJDZpeLc;+W&ehzSN6M=*#1;^F>D08R+4?;q6RJ}rzTGZz9knpZ27XZ*Z=r3 z|KHB}PcoP+zG_P@5j41ILa^I?UG0IHJwGMgy^8$M**i8xwUzDabaA_=Ik!uNHy69T)>wZuEm%B-i zE@><3njF=TY z^yB(JH(lYs{~126d$l88>OaGYql@bI|1)7+3HCBt!XZ~IM!x^6?j^_NZg=$Gs~lgS z_~-S`Ss_jAvz(ZerX^J~#+j~&+xl<0>64ju$Ltrmwzz zm-*A`#N(^x@iJ8Tcgs#bT<+5o+91>1d?kGD%k5Ho4|`@Ns=WKmV|LAwq59i;$B)VE zANF3>JT*0lf5nM)$q&E(Y1ONL-TzgDarML1()amq$KQF?5oFNVeoorrT$R+br&mhr zMPAKMZ*vPx@;b66G_=-#ZF!tFPmO8ycFS9w$CtSl*)eFD&TKsPnDh3}E{2sWUi>=W zE~>ZhLfN!qKVm-AzFz;~-?m@pnhP#Jek-ujUHQk32iNXCzU7|h`?AG(;(TlA;vZKd zo?L#iao4}|*-1>Y|J-c(jQfu@*sS;b9sg|&)A4_H-GcHPmDlEE{gb)2CUcgB;( zIfC<#ZQPStXj7*=H7HZGOrPrr-&FO6`X{#Pe+%-TY3=X*&+sI5`ro7fLKrW_XPVDC z-d=Jzw@As*dFN@vpPSg17R;($vVGPRvyC1?k@8vF{Mq(Kd;MpaSakj0-u6%HO+P%h zQ{VKTVcq@54zk~7^X!xQ@a1j2`JCjm$qJJvI2?F>Z^!;^*>>7j&KPKnS{|$A zFuh$-94obL!gb-dk%>Q3kNeF$TWlUD|1tKBn)K?a({!F^pFLKuwR=fMRNLf5uU;1j z8y%j)aq|WLj|*&Si`QkvDP3h|~ zR;eDd=LulVFx8DdbZh<1cPizs$~8H+r?#(H@7t8Jqsel@Q(0%`H`(Xj{ZW6U{nBN7 zkh{)|1L@P}gqJUg_D)}NJNkUTk;S2Hd{qIgw?I7vN-?Zf^$a}!Z;}6Z|4$3J>iPaN zJdwHo-u_>U_O^BE5AXkJp8fave})UKWmgyiT}4oEIQQfDpAuXAzyD`AwtLmXn(Y4! z%3_`W{xkfzz_wH*(1DdoSi(;Ccg`-ytsA#b3ZJ=c-@!ThRYyLQx2HF~s-Ej~W!Y7( zZmqBCE7v{iVrYI-w*T{6{=dIP5C7PGc)C$mcJcCCCY?$(k4%3rssFk$_OA%ziAt-f z*Bh5?o^>g)T!KSjy$+*<(&`Frul*aYq*t$wObqL1*m2A+d&i;%e*F8QW;9b1no->#6 ziH{FMt0o`K40MnYZ#J3hbUboW^lHz)494V3Cg<=VAkZ%J3isYO=d z3okl|Y*`U!D|JmWWc3=YHOp5#+VkrB;akThuaaDw`*dR9cUK+f@;Zz4L5ms;OhWcu zHSz8!-EqHU$vVlhxjX(dNIiM?M}N(W>-RG*?>QOj7Zl-thJB&@lklY%9pnnW%Nbl* z{M)eaVQ3pChw|e)A7iIACYC2IS-SVjqfGX^-QmA?zK_}xz&rEqC9(%Dg9ZajUd$rP|KT zvox9v?wmeaB`U&rpyIg475%N}t*)JmO#XBwFZ9HD)9Wt^PUbI)JgOx!QHgQNSGFx0 z3wYuMUvRFA(g@=$l=I1}s^u*#`?gQxg6!h$Z3Ro7v;@4U{e69Tpo3g>e(IyWao;4< zCuNuRZfCDih+ovV^#zAya=`Io#*@dRN>MWmdIlnv4{!6V@`B}AhG4YjPN6+=*2Qde zA+>i*?;_{i22+hiT?|yk>&lNswO*UM#8r36F-^II*7lf$`7dLBc|Ouq4wpK}*j9gD zv^U39OVg_F*p?L`t3PPZYjv&sR^$4g!Ey4xdajrs-T#`^|NIj@|95HYzvBEAkL;&> z;nF_;`H}uFp821D1kM+%zmW2uVg8~1w>tWt+piSLpQ>UNeg5;K_+K{vpMNyYckF** zRDb@V`?qQ0KM%jM=znU<)b;s4!;cmJ86XmWLh8Rr+CTrW__sUvpNC&%+&|50bgldc zcY-m@s=td{{}ue|Q~v4iuxQVJ20Nwy3=2;FXRvRTgjx3Q;-UWx7Panzf0i!@jH`#c ztq1B0k^0QU`Z;!sJN^Y<&{%H|bDJC7F8eJ7`_IV-9;w%SAre0S`H^^t%Mez{-?E5* z&QGOK_yc#=QFmx~qdGwNkpll8KmMP8w9^k(O?|g8w@hE%*rbSINncgimqL@(@i%wh zIJTB+%0AQN6^pvGixn1iX)sU&uXz5SAyEDI@}D77W&Sg~Qu)tN@}EJAVflym{~XW$ z`~08bh12e=M|Gg;*>eBA`oA3ExtLWSAMKt=l{Jm z*YRjb%&8aWwxm4x8Qc4NK3`OnfZCO-+&m2@d;V?h%E>Di*!F4DSN6mwRoC8k|7W-! zUB4si-sTnOm%qH0oflHs*>>uA57*c1#1%WwMo)FSon1XIR>w6}!uqpi`6o+{w|Q#5 zA03rfyj$h5dRJ`g-q&Zg3wcU9P7tl1TN*esci&Nw&7wMmPY;Tps_lHy|taSn_PPj?sbNs=X8UN+j*- z$V=P&p{Cg3b_$P!#W}m=aFvMneiKa(wWa^;tDd{_U+MI3DXL3S*Y&?Q{dZwb>{D6O z_`>7T-c$R}WLL7(-_t7e?c!X=DRAhM;PtG@UGG(Pdc0rKLdOtvqWc~F;q;5?kV1KT{|YS*CKfBY1Y(~vV?TaOM7;2Sk$H6@&-H>tFfp- zaNo;c!FBTg8CLB7&#b=u#xKh>c5$NE3RgyQ-={~3M>th+U6#z0&dissq> zXPBTO|Mi3Y7qL~>{xf`hEB?FrXXt$S{|qOD_<#M7|HU1eUH{`-_uqqmRv$QTCn0w( za8IYkYB8rPcN#0sDIT}By6#}oEAsN$?=w3;u?Nqq*9q@;X16-IW9#y=s?v>ti{7f7 zi|e{L*+2U9q+KCJr#a#xj~Uc$W!aVcM4Xdlg3*cRT(LDU4N!)%Yvy2eT&w_ zM8rm&+UU!?dOMfloWEM-7fmG|evZ2Ot#O`#|5K;MVZU5o@NL!BKAGiMUSC{u>bg+A zmVpIR_gbfCUzPhR%hx1x8L(-7`xW~3j#7j|x>MoAu4|hvtaY1jdTc>z@1rCcMH$9e zlNCwX@ujt;maoDN2YILG9p(7rxYCuQH_t+rGr-p%dsf5DrNV#SER1*)=l(Q2dHY4x zT}{@R>SdK8qJ@n;p98XX<=stvo3l{;DT~(UsB;o44$t)ab=$pI!=LBrsfT&H7EQdO z*W0n#=-&gGx3_GIuTAsydKZz{xy)Me`1A(GnCi>#N@l-y;Wyo_HP84;klplZnTswL zWIz2qIPq+;X=jx6x7VMn!mrL-b-yv;M|-egW!mFy=DCt9a=upI7C0o?C-J0BQu6CE zz3Ek-y@d|ke)RjXa#wEat~;*;4yrlrPI$aFXrAgh^St*i-ffbuH?{MQ$G)0A{o!u!iam9P~ z)ElouS8mUK<>*=xW!I>d6ORRD2bc+nOu7Y^9FDx+@X~{%EPT&B^@j zzHFP8MTgg-<+3F^=iD#(tGS>5Kf{&y{|wh1|7M>3?tf|ahyM(JUH>!uEs?*rcTV@) zKmEq$x75>JMZz!4Qy{X2LFsK_(=)^xhEHvMfN8O~2S@1L?ixa0As_0qi2H;dJ_huQl0>SrxZ-@NnM z3u$SG(%#4=cWzHJyY>9zuaL{9{J&M%ZaMk;;nt-I{*TXP_cpzXPQTByR=R3+(p=TA zXD;bXiaBv^f5|jUd9N>;&%8_?%Y`pIY{OhCvvW&&jGo$mhL)`hKgS$>`uFUek}r$y zoLX5kKi%j+T3%xPoZ^=;4SRM7SKiN4 zyYKGb6Lk7^l(R5F zvmW^yMb5obqI2`8*R83Kk3O}%vu4II?pf>KYTWy?HoHDPu+}MTyV0%l{}``u_bWT> zn|1Zuqp%z9zpu=SeZ0Kwo!%0rbe<#a|Mcc&PZBp;qb6wF(I0a6;2+JPXDJ;=HhU=B z23Ruu6P^F9JIL>>cJ#B%udJsYmwerS;#PCm@4BU_@;4LPa}I85o*aE)%D=n&0~}@? zn_c|H^x%>k}LgimKmwX3W|1Nz(n}bjv@lG}44xa!t#NRwpEzzq8hsP3?+x zUJ@@GIy;k{$?fFJu$(-_>7LuKs6KtWIPu40_MhR>wtvrFYMuA+3G-%iQK`E=8zsWh z1?5&qytQx5mN`B3rP$S7?@~T}S6wsp{pF3fIbOUrQ4HJ;J#qEps?XvQRyJV}|=d_8;|>#Ba7&yEtcEQ#golJHOW3)r{jy}Dui zz1WwVOAhV&&+v8Shr2qL?mM-AFMA=t__}%i=eIHHxAkn^Tfa5>Ipf&h=k{}zV$Vw^ zO&8Mp7tdFhzqa&0L)PoM@az8=Pye0uHZTA6%BzBf`fa+?eRWh=@&)I}*I!#dy;uEm z(Az0@a}_4P-ah}@jt>2Gy2VfYJ~m&^4p!MQDYNFLYFhn!v1bd@0;5*!e!1M&TI2)o z?qcn%KVPl0HNLW5*SlBrpCL{8$;qF!8#HsyRG~MQxudWKN2_>#Le@w{C0UnMbp{bk~;O5472pn|0m)z?-GBOOMU!&(}%u{IsP- z_`=t9-i_z>laP-d}ng& z#uNL8jNAS*ENDIdMB_h03)Ag?P2%t=)74bgq<(~9$^U@-uQsegsd}3d#X|26!vAFuF zu6>IC878P6zff-&BU(~5z2o7&4b$!_O!R&{$zA5UWt^DR5-nc~>-phR16I0C?sd&| z+TG7ne?rv$e*W_q+Y%}N4#mrB#O^+r*pXDkaol7>sIuSECvVzk1d&nxkFvc zm)|%2V^@Cjo{D|R&foXFcE-I)S+cHw@*Ek})4GScG*+ol+V{eze`xi~L_(`?|Bkid zw|bY_W%HJG_k6E&{~21=yZmRE|K&f!KK~y9Y-dG#|1)sLb^d3tfBB!GX8wo8|Ah7m z|Es_J@2CAk2WgWq^FOLp>VN(-EdTqd{!sr9uP^>T|1kox5sz3j${=WJjP~k_s#olhLw)uw-evi4S zCcXNI_3!=<>q46NI1b9L-iGRdFtM$b3+}P)*tV}X{goRB|F6>Z=HkuIzQ24|y|XU! zbXMuSxsu(ndYum+tKFV`^-!rte|>S}FVl1Fd9Fv_zDW!Go_tWT_H0Q!vM=VSalgIt zyZel>_r1B|pUoH;7}yw=uE|I7x;(OmuO)UhHy3T(kK}t~pEIX*9Yb|>AZm~=;GLx{ zjTG$e$U)vy8I)zOpz|=XqqPpR{Z*LliYe>0o9Iz0tZFhyM%{WFP|* zYgU-77HcvyJK-7NoAm?xFkYNoX;49rWo&=6R!iE`GwZ_to!s!BVSdQs-)e{d)h4f= z65jZA*8Ow;zE#H^n*M41j-`JmC;n%!Tj~DIxBbs=_t>!R>YPi_H%~5iR7^WKaf7i=Eg7q@8uXEi2Q%%4X)(*qiRTz^LUZaz->IuG!S?y!7d_<=^{nC0gz* z`Q4Wf2^C1-1nv6I@Wbuj@j~IV#lroj?9;cuXF3ozZ9!*tfzW;Dwi4}02ifCK?MiGt zTC_cAaoE@Ti7g*3>QWZ6ynnIZ`22r{O*$UuZwmA;TmlL5h}XLsr{(eo7w1fn+5Yap z@BJEUPUrKU*4As}q}3*itsS0(O`Dy~GEZ{%%!j>8f^H-&nE2zOpT$hSkUO_BI^=%6=f8D8oCXL!Bf-`00ewyN9J zrPuZ=n=Y4F@oBZ}T#>-IkB_M}ugeM>&Rha7OafT#kymqt3!@DFW6V-%BQ58Oy^1{c zpTQhNt))j$bNGUf|7ZBb()@29`@g2EQTPAZF8a?Q3dL zB~vZeNNKN|rxmdL#PsMnR}D^;THbz{yl2we)R}p??{bbQcdei1w&~MxS48oHvllpI2RlG}(liucBwBohaqWY?Wn4<=m>ytwT0 z!yOTKtaCnF_dE%HvTN(cZI|EJU6^F$v2=!<2=lBb87m@Zdq2GXMf~ekG{nCpg9xRws zx%Fw7m9O0=E!dSC3y^D<15v@KHLwx7*Ck$p1gXx*b_s zk7tMYzTCb450hM^?~?VGf2D2jcz5k}XufWmP++&&@ZIAM*w-PVP&cX$2R=E-w%<0CE! zbIC_LrL;YguUcyrY}$Kb9#`wK*Sb*t2`@C&vgNE@bm*oI zkBjE#HDZJeyS}eF>3R2TwRx`ny5*gIdiJTmU2EO{GaRx_NHjJ}n^-@ij z4}bLJy&UUzQ9@>=LgDeM{omFqWTl6Sw``368u8rXXt;0Mt!Q2s>y}C7s=_i88`wXs znyRA`dn-0~rPealwbCzK?OM~eeTY3+svPa{FtXzbQ{k-OhfDumkB&F|(=7M*?8VagS?;Ur ze}tyU$!#y4RhcK7JF#me_p}_D^v{dp`IPQOttmE+_MafpBX}&URz7mORK(_K#kpF~ zb7q`)?Oz*qQ~%B3EM6mJv58#_>@0lE_tpj-+*O#Vo8!E)yXRK!M7!Rlk#~>FY(JBF z&(4SW3ro_HYhQL(_qyHVP7*4Q3(ynKw>M463NJXl-t>y^KHXP0zaG<0`y8?%*6DUo zSH0oXnr%INh38ETrb!jfk@AeKJ?drp=%MwtH~&7VeK@;1zfF53?uvISwSkjGXF!VlsZH-ntt;?7 z334YMd6CxjQskx43|AIWX&xVH4)c_cKh&LD8V~G{{Lip2YUhhR?r9U1B%YsI!OmrW zY_Cp(8wX2JW#-?be^$>HZdu;x8SWSV zuKxFv`ra!0Y4h*IpX%4O-?4vc;eUqYhr8|fy!>~0{a@vC_77Dz{b!h0{GZ{z@}G?V z3@y`6|EoOzpP{Dz<9hc`mw$Kv41Bu$yZEQYzvX#v{(bm&dHgT;IrWFUQ~xvAJpa$I z&;7?mWNYL4A*xUNztex}{>@H$^Y6pI{q}$9w?Hv^@jszx{h!b6|9-MRwpSja7~;k^ zHRkEx+rQ1Nen0 zMb#J9YrXt;{BT))s}0j4Gh1+!=%~0dF#`!JYV0nGVab_)~Yo5W@5Kv6RR(}ol6`S~a2vN_{ zJL;cs7JiXG6V4j@zQZ2Ij#Eqi<5KJ9y;S^_=$(q>e-11E{jRrq_O2|#^6#yfu}5d+zKmD>D^GvsxE3?%QvA%hVQ+HRtxpq(e)s+0f%sy-%Oxkj zYJPJuXnw!*#iHG_r8!J~7m8^A`z*Jn_Eo`-$dH=zE@6rNtsI|pQ;hvCFTVWXhQvw5 z-{u{E>(FFgFu@KE>Ke})%)^JgxGd*H>Q&9#;DJ~{Hv-IJWDW$}1c z&ObSgYwvkaon<*E;<0zL^RY*_p0x-&Y}k?VGhJ=w`Tq=XpSZ((XL_ankiPh>DE{s1 z=WZ`rcCYaL*|4o2exJ~jdz9Ly5wY6Lcux7^*A-VTJ&CW-vEyC+c{4NI{=R(StnUI_ zf?iDAWpI$)^upZ?BX=F9TPbtZEy84y?_8byaK>p9!~1U6y>=Qtntt7I)2?qB_2Gw( zo%NkFcYDXzYzr|76OXIx3vI64|1j-!{TFusGuIuHA3oc)by8-!z>GVq*<=l`&$@A0 z=h3#y?_4-VZ!ezPvch_Y=vp2(tuQ|=t^3zn!k4y(XZiU&Jl<`3sK6ve{Z(f4(W+N- z%z765lIeOKSADrSYr0g5*{jP7?kz}PYaV*($yL+ueMW*o29f<)sy*vZS7`6Poz~eW zH{EpEo3;0ztN&-<%-jB-!J*{OS$VjlobB7Ezq#a_UN)Uc=RHG_p~pSBe5;Gevx{PQ z-W|XD*mUx-zM+^~d?Q+VnC<^&Zn1 zi_>e>OhYAACLb-iCAaL>zNw;*k140Ah_Wj;cRelD|H*czen)ox!TL|Sj{iQ_TYdfX zCuL(m(WI^Y=F)9_a-ObtUT7@p`ogC`t!iSZRSArTe_a3PTor%)SJ%eSKiU5o6!+@C z{_7gCe%5*WPI>3Mr|dXcMFQgl->`}V`sj&XxPABb`j)5LGbi{Ix2VtCdcGws=+u(N zD~*>XZC@A_l6UED#G$Pk*EZ#+7cc+tZ8=L-&9q5A8%|FOoKsL7eCL&u+P1FfA1Z%e zP3Ks^z<93s)=nwo*n%_9rLx3sJewEGd8K&Y^MLEu_J5eBdtjcu`srhTKF$f*c71~M z>dkD6HlDa7v|5eJeE(0=6OX&%wmhEc$Mmj=Rf>Cp#H0nE0y}Yca~ib*fBj|n|Zk2#U-nbSAVd49m{b|XL8gW$;qc?nKC&PF-oqp zUS3t%7TV*LoY1~Fx&PdzgP9&;##^|dAjyp>GY~ALO&|IPR-An%4F1h ze&IU5`A}9Hn}1%v?8JYD zR{h`uXZLr?J1!iEx zuA8R0@Oh@pBE_a^!Dc z&}G?vE4koy^@p3QtJ-#E>Xf}??(tOGa;Jp(+U}hn-X03d`sn?PLtkPsB*tleEZL{sa6%4r-G)=eP3JlHtAuoAjgW;U$)KuzSHZzQ)72%PzlNq&(H$%;Lat)A+Ymb*mTMK3G_$`=nWtrHUuyyVcz(=GTH; z)n`g6D^IXwcs&2otTri=J0^iOdrOmw*K2h}eb)Y^wD?CFo6zz6invFHlh*7I`F(Nw z!D|nC1P;ibx%zUe2jiratJ6;>PVG%*dA4bN{@Z6C?ig@7&M}KDJRj_}rQpj$`}EY3 zUqAb%tiCd(^6O^j8jXcpEtwhIr}*0kMor%?b;PbR+QO#wj8m|WDb%R|q?QcF>(jT*>J$A>0 zlQU97nH;PHV>gv(=|!IQUVq|A*IM?otZU~#3*2=w|F$VS-8Mhl_Rb*-P&kY=;nFC;&1lSi?jVA*GFAU z4U69w{qbd>Yr%9Wu_YI6)=H@Gn|9i=mO1M#ee+&3_SKh9mZ|-#&u1Jv?fd6kP_%bP zlIN1vYv#Y#xJUIgEoVvieLY^qVCk|+*M+=Z#1`y4Z}DjM+MCh$Uw!*_a;JLgX)_DX z^RFXAPD}MZ_?`JGljrcgE!S&n`P|m{CKl%BiyoOcUn_8G;)!*0eiR*YR^6p~$YZfm z*zYA*W2MY?Y`S7{@|;Vh_M7^@NlQ~Nbw8aS@;2}Ff_rvGF&AF$Uq10{LZIhc@3`bJ zTh(<7A^pxE&Duz=GC=#EqBv;dG?T(_d?^QIWh6U)j`{~Z@<3h_13g4 zGv4!TIjbME>GIdiyDm*PuV+m@DIaxm(zD7$kGQam%Tt3YL`6&Yr0e=sWv~5u|DVK@ z2=B{JR{VCt^r&#*TfJ6~K*3!$XJ6HX@Bm3sb-rSWW8LG|md-KbWY|8ec(iPx%6O6<8c-{qvHIpdwW zl7|l*?!PXLZu`amZ?8=Sv!DHIy?3`?oU&h$bLHR4{m<}x zPyP2Bt5-kJm95%)xi&@qLdg6N%^CYYpUeOKBkP*0{PRB?|E|}cO3E@9*fzI-<>I7( z2W+DI5v@^^3T0RMPsf7)GvxZ_+_-W3<+je;oYth62g0+yY=|&z?d~eyd9drKjmwl- zTf0pHUCZV`^Caxh5%TGwl$93sJ>y(b@px+F<@Xx4T2Y?MH#`*=Pb<2oeNq3b_G<6h zqV5@tr#2_gX^wcaPo?>9-Xxi-!G2K9QK__C|}-!IV%^EqGSR9?RPp-leTj2gv-@;?iiUn+HZuzIhQ)`R;zhObhm9Q4<1Lh&Y zv5t8sHl0`z^U%@OSM5@Ga$r%J%a%Y_L+t8FJaS>sFTY`!z~=Aens7?eL!qb1`bv2J z)0;m38SXmOdp+4wFJRh|%X=VG=~%FA$ggnQ7?(_6w^+YZ5|-DOJ>_dpR$0lDtI{Vb zKIzB@?MYYvxP5-tbY@ET!n(I#zN~rc9lqnrlAq^pwBPM1k$)Wd_1UAsb#-sdY}fFG zN|nvcjn6ysZTjTMJ8M2{laZ_a+gm;>Z>iYQ9F_id8LM)(J)gFgXyniOnX*}etFA16 zcfWJwgO5TjKg)e?8y&p7+JEkD(<|>@Eb98+KN()3Axh#Yl*VFYITr0np+o{yf zGTHxDz&`5X>Z51k-wWBzR}EWqe@38d_K*5EYJYTZH68yO_kQ>Ms)x(@DmCsMUm3N0 z)3WE;H>P`j__Dn?YVa5YEJ*XN$_v|+OpL5-fRRpSBw9=`YJkWv2X2^oR6mS zZ`D`E=Hv>$i#o91`VVM9O%?we#NEz^gqKJexd&i7pmg_a7^k)VmH_= zShO!5Sv@j)RPV3|E5H4-<1gsvZhvjA_oiIa^L~$f)vsm8+-Cc4n|LnvQCRXT4-v*B z=xSf$4=!A}%HzXJ^G(N&#wwHrSqm)N7k*jg`q!J!KYg7&CB-u7JO8=5DU4s|ihlaf z@QuH-{)MgnKNe5-Z|pFpgM2_>y#9ZNgo^3^nlJrlc;GUn9*NEPl@+WNu6|VOPz|Q3 z%Rk%>y83pX^6kveufl~DbtY+homt!XB7^w__&hgytnQIFt=Rp~TlV_j^7y|j;jS_7 zyZ1lYc(?v~>3@a`t|cLO1}N<`Qs#yBN!;?3ZdhgH|ImSF(UMu5YnK}O3m;W@wQh<0 zi7O=xfv&a>rztG+xXaA5?y0f~!zr!oYVYptnz<~S4J4IJbR7O@sRgYGZDf%VsFq^D zw>{BfzI6Qwx8r{=|2bM~zwAH53BAkzcBubr(tgH|T|$HDDv7O6k*)K8GT#5saDB`C zU!3b_+Hn78kT>7&`fux(dz=3=C|Lhz`1Nh_e+Ea>&+3i!kDq;ivHx0wbrjCl`|^`K z8&n(e?U@~ro&vynmYg^353UBEb^7QKBJ^Piz{ z|C0@l{~2QCe}+4va0A#cl?1vn&}rwWUDn6)Khvz&|9ZFo7stA;o%hA-PbTmB&#>I3P$>fXC{UNMzk@pz(ej>gTC>MLJ5 z81J~gWw&nd;qO(p3*xs#x-E5(X=0Z@wZoqGhtu`Qi|f|ETYv9rLXz|o76x{e>yAtR zxNn*rpZ`pb!v3_e_ML_zb!^{1n7>-{3!=EcQ zm2L6j`&*v`omklxVB~qs{c-dz`{pfYxuY+i`E{&R=e}Pmd%aBZc0Pi@&~6wF8QrqlvSU*y?0;1DI;5t zwflu%Ouu#J7H|KpiGNO<7W>rnhJj}uziCvhzsrZ~t?8F?OhvD#?@01q?s|@`){1Ly z<)go9_d_l`?|PZmr!mX2i^->wA#H|ujsAknD7B5Hvh!Bt~tWtO6Fr+@T&R~ zQJ??qw*S|XeMM*8ALFI3ukYIQdZWkn?js2)`hJgJzKzRNXnG%C`&ldcbV0G^3z6+n zR{aZK++*3i^7E8U5` z{~5TxyJz1ldGYnQRYUA+LB|8%WnV3u{Xjdjd?t$6P*~adQ99#{C=c=v;mYL{k zAN?+M_1?P9f{Aa&9>{tb}7o%;tWi#9y0kId*t(% zy%fy4mHKh_rX)rk24$P{&+FXwG9Q_>ZsPJERLc{XWB3Sn*VbwoB#Eo{TIP_g9@zT4q_`7k#@-5(l!4XPVoI_ z__ue-e})UbH76g$|Mct^!n{B zAM;<1!5g9(&av;X%|4zl#lCmu+Qk_U(p3d_t@yG&@KXL3fBw_9mwqg`6S?8<-Ff9X zzj&U15q&Ohbo*7*@6$*3Zu$IV@jV!q3cybd=GE5ke{!% z^KJg27yGRH{dtnM?Wuk7%8JRT@W-`pn;-Y8`*vMA?!S54m9>u@Pk&h9?vqlwI&*EN zo${`WdXp+1T~V5@c7INT+HRC6YsdoUCwd0wd#-4vNd~kJ@&)4tu~*fv!lD) z&$Of`oH%GQ?Lgr_g%71$+q&h}FO8pl+DySP!N~Z2*AwjvrGLXcF55`TH7>ue`=-b3 zrSJmDllFhJKEHAh33N5U7+-q+hxb2&Y7_sj7ylVf?cze=HrP!Db;{}0xn}$vj?%UW zJ@*5t<{1kRiSSRO~OHYTuk@>`RDncp@z%;A=~!<4D(Ogzi)K^c1-x^ ze#Y9M^~s%!cIW?aT>GEl`6vJ14?u@%AT(?VMm#|GAF&*V4G- z!mefak-UVYJSVY!_6Kor5Nut3|NPSXM=Nted<|2_Lf;haTDQD# zP5j=sH=T_iTZc3EXKl@WWb1U@WwuA};@>Ja?{L-J`6cxNIbIN6(5%mX2sH;!MEVmO z`c#Za>_)jJRy4czyG;}EjPf@S^5#hvUCVQvKdNRz)h}TiTMxZVTu)eg-a?b&u)PV%1*pA$RH4JJXvU-P3exy}g$?dHveb`8R&$ z|7Ct#|Nhc{hSo}pHMb^lAxr*}Rr313y8 z{Br*n{;mHR_Iv*mLFMEJ{|nP6PMJM%%H#v57oU1{w)M-c+%M+Sp4QyxdwR#nIBxpi z{$>9eF4$fF_gDW%AS!3C|Idh8x7hO*SGPvbP5b*^@0;WdhB7^N(X{JF%D>H0`Q_tg zzv+x;L9p)6>$Rv_Q4HPd|5E~Fj`pIx!N28w|1&J8UH|u2{D;M;oT~YsHVlb)`_F(J zar1B7%K!5t>OaH%h5tnAK@kTI>eur>nE#o^VgGvZe+I4BUsqg`O?qWb-A&%o1j zJ>)~^Vr|vBT@vnsZ0oBZ^d~vvS;Y}C>!<(TGyV4hsq?R{=I4dQZ*4Ey zDw-E-_DbUWo*KR#mPbQYtn@0myKIfcIiqVj8f$0UYH8*iJ~`j8QTv?M+}1_jH3>6h zc%I9Ihini{dNBXijgv_V@1F!M+UB}$)_1oXI!%5v&x_nJIpG|$W@fVdUCX1He`kij z+47&^0r%^`Z7WVJYBDHvtvvG*qk)30`~0Y;_&cTE+X`{ZEvW|GoT|v~T|9`X}O<{~2P0|1&s>erCrd^Ub}F)H6?* zrs3-Q@0)$C{s}wiR)I_PFVyy>e9-?>bMLSCuM6y(D#ZWhe*eAlKf~tm^qBO<`fK;= zcg_E8T9!Py>&Km|@#3m4swKj+G`m)v{~ogaLfQn-@ipJqo<7+(GoPjG#exk_CWhvz z_XkB>ve;OBr;l;P>ZTUkbM>_)+n)%oSAQSvY}R$XR&n2f9sPgRZT?&poj27uhTqQq zF3ZN}$K#z_rdj;^qqgu;d9PKl)VqS?8Fo9?eV=-3`{rktuJ1hm zb$#)yV~Q-LEP>5E53W0Qrf-=2_M+AVzFX@n*uSZ;RCR?K-HX_{GGppDnZoeQ=(Rmr z8jJQUUW0F*0$J|U+v~oxcKs`KUUc>QI!`xB>yw)4ADHP ze5|PNRBfO3BMrYfDSwyb^=BK_PO1Ie;w958RVKg6d=$xMUva1U+I!QUxleiLUYvhbD#|$Q zaj@9;AeQa@6&ClZE`PZ>DNkR`lpUL(6&DkK6yW zR%}BOTZz1;^XO%Kv#_WgYX^t7KFWQxPH0n{GW#R{~2E2 z^8d?n{meYp{|x=J?=Sk#5XJD#pXoot+I{~SO6|X8z4^(KwBtOo!0X4x={Yg8KH1Ju z|1fdG=bNv@KCbyRV~N-L%EDYJm9pe#ahn*MBY!N~JkPGUJ7(4U^?}FvMZRC&xZu`o zY1S5#rhv+tD`nF^Ia}?S-IFgck>>FTi-sduITwP ztYmk0wdUL^C@L|_{W>MHW15Of-Wqlj*Ye!_+=7Xx zPaA3;T$81}Fm3AEwX?6s2b;IFJ4NcH&q#b)Cf9QH_UsiYmln0g8|fJtWSez8Z_`>5 zGWq&!=g1)6^$Lr+v}asFE(Y+ihhP!GUw@(uTtdVnl@OaQ3NF2-dr>9h#&*6PC;q&a z@&Aze;`H*5i?6I$bN|ih^}o1d|1;dz{LgUY;@_F2{}~*!_n)-?G3!TtI{$OqbLlrO zE!teaS#SRf-SVI6{~3hczolOP%ewAAgUWw~N14?Y_4dzXF1^VNjNE^7di)o$-G3(k zXXsq~+vxSbrt9^S>OY!5l$_tV)HUrtLt1S8g{b$R{Qs%-L#zh%p^sdA^YG7qh8e}) z#3-^mHdS;svPqFF5py>h!lyy7%acXQ}S7&MIvcN@5uiQ6J-B0{H^x<&#++D&&db&fBaniCH{51?`!){%=`Z{tk1OnA{=j2 zA^y)Uzpndl2J=lD?tj~U|Gn~`;pXbi>x>WVf1SVouK(XzyRCklx&8N@`{tH^;Dy7N zm*6Wx2+0k>B8KQz$csf=b#fD<-FNSZeNYnhqsLAz^!ceR8{dYkSg`KQF71H5t#@}% z=w|8uUims8^5xqinT;v8j4WP9on|#xmCE({+Yx$o_N1*{AtCd8W`BE=xBu>H^Ig?Z zZ*E0?=b2{}rp+IC^sKI6-Wt8B-!%i>x8~h?xuLexWtGi!mgaj~4n0)w%ynOAnyr;- zmKYc%mAlFI_fexfE`GMPi*EXsUAv#G<>vM5%64>!hQPcV3=%VTY%6 z+Qt{_yACJGa&FIFzVOY}Y+`YN@@5bV1?|$0az0cZQzv|Db4ORw;p9?Q-%vcfGr#vYj zzwmF^e50}(8&d`5y%pUdc~Wg#`u;86TX|%*)G+ajo)7fr@0-d|bTI4gJ5hsIOV-`I zxTbK~3iYtqb?r9aSD9Pwd1=zo$i~N9zk1l>&#Byw~-eYZQC! zy3gBbc}4zF#};)xUEfif$g%aqKea6{i#+!|bQK9V+(1krjNGvX-54VCpW*n``M-Pr z9IdK!|9$D_oE!Te8XvDAc^q^7CYvOBOugGq22}u%T<{oNGrR!xu}eSB-2VGYe>&$M z70@=b)Zc{)bcYeKX!h}i$z^G8U|`iDN5&L{_5MO)!ug9w02bgBzo+}e#8oo z_$B`tPWZ{MP43vfY3gsO!0;6lU&w!$7*T)u_}?G-pc8(j`F~aXXV}mCPt>XF$2~9m z(?9+*9I%`G>e9cY{|tXr%;dj({LfG)4L-HE?eM>O_U{kZw+1ZggR10T?frBA`aSzW zm-V*)wW+_~1{#rlsJP)j!}9*$AMQU4(O5jG|I{}~wU z*6Kg|x9`)Sb?MgYhp+1_Y}xRj=d1n0$k3IoOY<|A{`t@FK+wudUZe}B7t{Xqvb-I3CW*7ZNi z)8jv1lRxvHp;i7vUu^yPh>q^wXJ~<^Bzc4%R>vcB)E}}e z{m)>V_~-nOKuT<`EoQ#|@jpX@-1E=>SmP0i4Rq`>FFd_&{m;Ph^!lH#?Vs%*dLpsC zp~21Y-G0tmlpuRr6J5{x=i6DfKS~qI7smTXy-E4-_e`Kon@7Q_hzIlTw-($}`Onbb zVSm^D+}@md>*UrcTE*J@XLumW& z`KN#CAFv~wi7mBab0#jYV{s;|)I)P-SWeE8{WFeAYfad_>Bz7Ck1OYIXo*>}C9eJh zs}`TuI~UX}uMID|k+M8;dGYMmGxq(Ua`M=Pe?{B>sDvSx=J3Fe{Ljz=%jb(R3Jmsl z_4W_KP$LUof#ArqK`?v4<4N7#vgAP|AhpVUwLQY+4|1%&; zwovW2R@{WU7rt6s2?f<~GSkl@poptq@=~7$IY!xbOB|EW_R z|MhPD7s1%N$^RJ?E$r{DDU?4neX-fXKOVmpZ(Kbwb9(TNy^k~umX*u=3blIm<@`_K zb^jS2gnqgaWw=^x>CMeAK87y!(mcM)(rc20>W(R{T;jzSxL&7c>)IVU z`|?X`)`VS)Q|L_G=KA@vrTn4V)vGLvcKOBb7oHW_ynVOae}={*-ol5q-LbAYLU&Ei*3WmS~rTKm8?)n!}c zc4rlqgcr^7of36XiN)}wU~Wqa^V`rh4=;bsb!@u1rtsF2+z0HI@4M=6ZrbJN^``!o z;npj$6OUi*)LECb-MdpUwe#O=xwhThv!yN9sv6yna#yK2{&as#)d$U8tw)L~>hAxQ zH8c~^PdQ)c`}CCNXYXY#^%*-&Ps=SAb}c`>EGlwIz}mH&7xUkHV~OxG)0WRP;FmN9|R-s|OSuPs^^ zYqlmm`g3)^q1wEIt|DR1;JHFAvEAFew`|FLEUvfon$qEFjaLq@``k@U&1T(U>e5&O zS~|;OUjOmA{omsJr*WQ-{D1st|9kn*>uc}+Gdw<5|LfWPQ+p;Kng8)4|L^5LuV4Q# z|KpOF{|tARf3n$)dprx@Sl3^kPu_Hx4fyKMTe|ZnF3ag|eXcxfD%tm+cG7LmE_%8D=7-i#-^Gsy zAOCUf^6ABAGfv&LD>GVgCJ?5fW8T&~)Zoc(0m#pdHny-&}&=UpwfS>#*yt>@CNWiS8EH(P5r zt>i^h>$wkBv%2nGJ9XG&$E@s-*(awqoRGBn=>Dzxu5WZ~=?ne0qT99i&F?&Rd7_l3 zs6bwt1w*;%jhENTri)F=^3LR}e7`&2GY&#bbp0(~<;nnt~5!3xQ zPB@)!x^+zKumE7qM@+ctCI;bPTUeutm3XPo@L z&M7@oy*%{Rnj@`W*4em8-?(XBc5aQG`fc;nlwCb*>-p+WIQjp2Qa@u&*Yyw2|Fo$5 zXISU{v+L8x`|mD)F*#Dt|ZT~30 z5?tZTY^BQe-;!SqY?a1s} zrC1vW3D5cS%YA+)ZF;eO!t$VyyN}MVTjx}?|pZ0fqVxe1?~eo8zqEABtEeD;wd z?-kK^yGw%wZf`Fu=RdVkV{P5`uM} z=UTU-<0rd!?Hkv4Y0Wd*oO@?#^P;J1O1K}nOcRN|E%8ppf7g>M1{JLfC%rwos(uNB z!Pv-a@R#Y~O)t0Knh?E!kD1{FgZhiNaf>7DbU)0S|5#T3x809DsWP5C zA7yO(tOWmwK7YESBk^Ov>fqG+sh7il=2cGiJ$B;hKFuH5&g~D+e)*XBZQ7=rTDDsz z99eOqe8mCfjqDQt8MuyhAGxP@W$S+i?u;_2C1v%uta!|WPwVbF6?*RPC5B3sRvF>G z?8txOuXb(RzV)Mi&w5i|JvJU)7A9pitNh#}^Pj)dwm&K*`}F;>UMXFpSGk4@FaBpJ zEah>WrnFX1WaWdN!hYwtXJX4_FMku7wc^s8b(b?U)-IWPZ`FJMJ?A{{ecQVvqIXvq}1b83i#Ft)(|i%>-ke zOdd0CE<1L2?Up=Ai$DBpOTNW(zm4|3u6XUM=2q*!>G`+vZ8l2AF4x>x$;{4vxxuh& zU?)zNY>fX8Nw~^R9gZr;eCpR#>zW&UvY3JJF z|R}cKXv#0i{GjuWYTlFlrD!)|JJB0wr=C(l+AN)Yo6Wr!N)$xzJINE z;<}34$HJuccWlYcyYI7M(=3m-Ilik-K4)S0^QJPU$*TN~)%C#o2h}(Cx>=%F6I+MNXK@*qk+$45djl1Bs#Ds9= zKI`PQUdIyEB|dxEoHC3$_A>JKsXOb#)~2tI4Lv-~Vm{Xh83(zy^B^ExOd z`p)VjvdpWM*d$Lfq-s3Cg|EY2Q_wrxqmpiI|GNsP{y;ZMF&S=JkewD4*)^y+z zW7SRn=TQEi;pHeuI&;F_(MKikDmX7|LfXc2jHxqF{l4N zbHAMWpW)q$JCl#(JUjDu2LEQ6{`v0<`~5UK*Kd05zvAy4p_ALw{>`<1d~4&1HE~9p zukJVYUlkb3yIWsCI=g%Oq#K^Hr^PgPFV$V&;li!Mo7JE3WTIHzi!Z;zWJRu-ZQ45T zP=%HL!fm&{WS8gahzTB>ke=0i^2G6&pqJYheLnKC=#}z4&tt`X+I)Ret{E+!RCHCn z&_>o&Z1LVTrMKGli}o!nP~7acbwgj*4 zU2XPt(;KY{tpKlAZ?0W7J-%^!cILw9?y`Suk40i%Yii$~U2U--BD$#Y-lmKdzwYLR za=+;QwN_oPIxFw)kDjxIyl2c-iykVid0y~k`PoMnD*Wo-a-1GsXFYlT%euO*`q;JW zc2EAQWtF*cW$~J=$0mz5HfT8UeYjiCb$U_Pv8A3N1uk1FfV0{2;CTJ+^B3m0 ze^^rTCD65|^U*x-57+%SUAGB-^fx}VNqeuWDO+pJ7yhRTIt&b7UiL4#BD|zd;>YDm zzK+g(-fR6T-vwP5TqlNhSXc3d`v2khb=`Y)jn$5P!IXJG5wLe{M|p6a>i1BS40OLxj9?q=ppS1-rJLJGw`v; z%$gthV{-M%*HyKx%Qo$k+fr+#ee+%2vAj8Zx+L7h*&8H^kJnXkt$r|{G3(ZUhQqt9 zBW+yElb6PNye(foJ5^=sgumA9eaU%xt+n9|@ms+`X|}_35FRoC<F^j>!MeqXC~twpDf z&%60!e%!S!`=v7c*GZpvTXMOO^~chR-ckylD(%c6glc8hJKW{W}2=4yJuUntJ`exQeo6U;8@ki_UKen|m%l>u7>gu#jXN7ibi-_Z7v&wyCbo%=J z-+t~ zpzqo{5t`8^X0N&2{67kCp8Wnzwm{qc&^=y%-ps1C{kNaaE#+RR$J4P)t-q?VRbleP zK7P@Y;o>d5mPIR0EuFNhuRpw6ZR+0J+BYl9HQmBL*_?L$(Y*HT%4ogikFH$)%3QeZ zxzCl7gCdGXb)gd-6AJsH<>M^BM}78Q-+F7>%3QXYt1Gruzm=2Rb5LemS02YnYo8;N z?|l9hzLY`KL59IVd$!r?)>RL6Yh>8D7t>*ZSlyRDb-4{k!?k zuIt6_nGp()BiK<2A!4M@D^u6LLLY>vHF+qn=o@AnY){K_r@*e=8>gl`^<7f>CCvYiN?84!+0grMV9a>quQKkRj8FdB8~@aF#hUp) zJl6bY*lGUvll`%NB=-C(h4Lp3-w1g_D6R1)25(&Jn)09FkZbzillvhTsKA-E?t*`M zY+L<~Z55sTpMg_!{&zL_H8}_l$VCR+$ zw^Z~$!xSYxrgZBz?e&bUYk6E!HEv1=EDj9uU3~Vd{!flO_4~4?BJBrxwN)Nd@NZoE zhsQ4}w(jrv>kpc>zj8*)@;}2B&wnrfg{s1aFB&vauTk1DzrFqwU;g*`{~4S_Z(|X|=r zEo;@rxOD#~S@NGh=Kp7?>;G{PiGB5z_M?9r^-jODTYqES`}Z0@rP}}eiT=-UfAK$) zN(9@qs>=?fia%-h{ymG4^rM=k@$1NL{T;Sl@BV%2pSYs_{L%e?f7w5FM=--zEPgHY zX5WKv-n)OFvU6MbpTS=2Kg0JI{~3DaPIWQ^Lc)PqFEB{#U2_pW*$*{|r(noIMp5%IrH#0>5^a#TQho-u?SjUOC`D!~E|0 z-(TvF^&_#@2e|QZfSvjfzq(b{O`#Hv*O?~G-#$O}JNu^o^6yh>a5`YGtNt4QO~2dU zOedgBK4Ln^wAlOi0zaAZ|NOE2Kf`|aKN(2utgj-qNwL%K$ek>&uXI0g#s2xD`~UvR ze_V`UuD-HcI_>ZB{B;%!!)M>r-w}7t_Wr#9Br~v?XP-h$3{N65hW(IZIN|k4D}Ro8 zF2!3OCr$jDy=Gndk^3J%v;S`Yd41v3Yeh$vT>9N7b7)pC+q?5RI9Ao;T5Z4L=cOS~ zfP-2Bc=6(`OzGk)>rc2n{`)y*edB)k@g1Sq1aAd`#!e@${^S0tR^3%3Y*`|GPRn9` zwjSjKk2fayKe-P6`&#%qA!&@Hxl~t7pZ|+h@Pgm;RI@I!cN6A`m-l}8e z()|n^|n19lO;! zcFEfXf3^MFPH`XYdYb;2xAD}hrzS^vS4C?xI8G?2+j=?jZ?kXoo>zjFvsHMyo|Rnp z*s7gg#`V>p=~}%u z;^(znC1D@6PO7mp%kI8(_jTdz*RM^2I(2U{?Ylhj-kPkaqqE{7&dRJ*QkFf)SHQ9E z*8H#)%ic{}X}{-b`}TIV>37$EGQNG7%dt~`;<~1HhL2XBUSwnCxVO?HHEEem$#dE4 zrPBj@cFwc)x|yiF>_XHE*W%ojFFxJcd#BJVW>R!R<+;L$D~A$euas}zw*P~7o4W9x zZM*i(|G@M3;inBZckcMl(BH&<_vOS{(;gr8<`i7}CRbB|#isD}wJpo{Zt698G3lMT zzz!Z~J_d=iX|XYp-j7;mg}ZNmtb3(o#me?W2cu4hO?!H1pHlAYP5Ft>4;yYMY5OZz z60<33>5cG%>o|TkKlsmZKXl#nZx?+PtAFRKPXC^-IpYm7GVHVoK1_QO|Qp)^KYr=nqsvZ9suokF^ zUH^FgN^Xf3Zrl)=Jxfa>0TCz3c$%=KGp8H&0Uz)j) zSLu{^VW@v=>i4JW=|F?&66D zHTMNobJ=e+dA(((PF{+!^YwCP@84$oC)8&IezrU{^R1q8V|v95m%9>uy!oz+GB3WI z7#ruWQoXC8`dZJff-5E7(G~H6?)kigpWmq16Te}T*V{Qt zi+3KT1VOYYKuZ;;{80ksmHJDY4g4j zDcyUn$4vEF(WPr#8jE(E+WrWT^cuXE-x+JuX;GJwct(F1Wf&TPFCxC5B(XGrU+e=<5@jMAJ&iZzGWz;Tzn~N?* zXa4P5wDXqMywu2}>KCR?zvR1l(T*!QiPx3(Z=XBIT!^PcVs-kFZT}f=9ca(r^yc7C z|M;avA;EpAQTDvkwsWR_Su*wAmyH*e?U%~lbZ5fZyw{r*irCls&1##xtb69$lu40k z@+Ey!PhSj{IkxJq+Slz7n#cm1M zmg>a3GWha-(=&mdX`)l^Tzjkfo%cbe#R}W?#fz)&O1#Q0lXBcs^V?iA?CYatd#~y2 z-@LT>t$x`#j)2p>TTcgu{#_hA^{A`%E zm!EDk=Ez}Fc&D^;m$p{Ktt|@{@|x~k>!WJ5`T4x%VY`man6-0nqy&3+HgEg7Y^Kiv zJLXlE-8q$*roww%;#a7Z=Cxy2SIdPjU$8Ra<)!lbSMFRc)y~cpdUY#IDbjdLuFc1Y zlE8?o>$N8Kq<-_w4D7Y^@?H_>D%!XKN97Ld5RKODp!PuDl|YAi$A?Cb=syF}p@uIP z&hoAd^O&8z-Z`?)`a^$L{3F(PE0?_#7Z=>U`lw3gPJf25@^9rw>VGlD)<3XSn;e$x zyS${#{`Rc)Ki~f|9Cy|K_H5yQ2HXD(A1}t_BNWQtyf65l;RS2ye};yy?N>GGm45fu zZI1pq^JaZsmi*`Y{~2UOz^sq|8Tz^&*KgjZ{GZ_h*X#cbj4%G3`_ccO!9jcdANIws zrr1xcJ@|Fj{WIrI|1J*r&oJ*l!;cq>5zKYwzpWqH|7BRW|ASoroA(|68NM*>`pP#Uqg8O z4}Nzf4~H!L)BTQr&9#4NW%2OPkq5E<)IVOd<36(6?2*0uYbpPe_=4KB?VpV|{kwC; z{`q5g;DCZj^w@uf^cwg73=2fB|6{(0WH~rgn_rvmpE-9rA{N}iOndnsFNhDCAE1~7 zyA>R#h5M0W?@K+B?O(m*Pwac}>umnB^i)`YKtksDKP&BCmQ?}&T)(ql%dR)J z)_;4}`Cs*ahR2Kg;Ve?ZX8%7c4R|1t5Hz6hpfJ^d1Bry7ISzB`eiDMF&i>;?NDSi5 zQeSF=+CS;*;?6=v@rpP`PnQ62( z$m`PNj8izqurhXk=7*8OKdZPNPB#vA^@RWJUX_MZWg_ILbeKvs@g9Bh?;>iD&5|Cw`#5gPj6&i>B; zDZw7&jMl}kuGCLuzozw{!PuG~p#ko4?S7;*uo%hds{{Xq$dCFGM?Eqcmkf^=(LBXk zS*m2FWXe39@nqKX8l+=AC;zs7WDcp`x7Nx1@IET=p#5zC%gIY^ZywZa=l&M_d#^)(Y2l4GiGV>J-cz& PEh0SG0d`57{QsK(>> diff --git a/tensorflow/docs_src/mobile/tflite/demo_android.md b/tensorflow/docs_src/mobile/tflite/demo_android.md index c94b5597a6..7f2f8882a2 100644 --- a/tensorflow/docs_src/mobile/tflite/demo_android.md +++ b/tensorflow/docs_src/mobile/tflite/demo_android.md @@ -1,42 +1,144 @@ -# TensorFlow Lite Demo for Android +# Android Demo App -The TensorFlow Lite demo is a camera app that continuously classifies whatever -it sees from your device's back camera, using a quantized MobileNet model. +An example Android application using TensorFLow Lite is available +[on GitHub](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). +The demo is a sample camera app that classifies images continuously +using either a quantized Mobilenet model or a floating point Inception-v3 model. +To run the demo, a device running Android 5.0 ( API 21) or higher is required. -You'll need an Android device running Android 5.0 or higher to run the demo. +In the demo app, inference is done using the TensorFlow Lite Java API. The demo +app classifies frames in real-time, displaying the top most probable +classifications. It also displays the time taken to detect the object. -To get you started working with TensorFlow Lite on Android, we'll walk you -through building and deploying our TensorFlow demo app in Android Studio. +There are three ways to get the demo app to your device: -Note: For a more detailed guide see the -[TFLite Codelab](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-tflite/index.html#0) +* Download the [prebuilt binary APK](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). +* Use Android Studio to build the application. +* Download the source code for TensorFlow Lite and the demo and build it using + bazel. -It's also possible to build the demo app with Bazel, but we only recommend -this for advanced users who are very familiar with the Bazel build -environment. For more information on that, see our page [on Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite#building-tensorflow-lite-and-the-demo-app-from-source). -## Build and deploy with Android Studio +## Download the pre-built binary -1. Clone the TensorFlow repository from GitHub if you haven't already: +The easiest way to try the demo is to download the +[pre-built binary APK](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk) - git clone https://github.com/tensorflow/tensorflow +Once the APK is installed, click the app icon to start the program. The first +time the app is opened, it asks for runtime permissions to access the device +camera. The demo app opens the back-camera of the device and recognizes objects +in the camera's field of view. At the bottom of the image (or at the left +of the image if the device is in landscape mode), it displays top three objects +classified and the classification latency. -2. Install the latest version of Android Studio from [here](https://developer.android.com/studio/index.html). -3. From the **Welcome to Android Studio** screen, use the **Import Project - (Gradle, Eclipse ADT, etc)** option to import the - `tensorflow/contrib/lite/java/demo` directory as an existing Android Studio - Project. +## Build in Android Studio with TensorFlow Lite AAR from JCenter - Android Studio may prompt you to install Gradle upgrades and other tool - versions; you should accept these upgrades. +Use Android Studio to try out changes in the project code and compile the demo +app: -4. Download the TensorFlow Lite MobileNet model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip). +* Install the latest version of + [Android Studio](https://developer.android.com/studio/index.html). +* Make sure the Android SDK version is greater than 26 and NDK version is greater + than 14 (in the Android Studio settings). +* Import the `tensorflow/contrib/lite/java/demo` directory as a new + Android Studio project. +* Install all the Gradle extensions it requests. - Unzip this and copy the `mobilenet_quant_v1_224.tflite` file to the assets - directory: `tensorflow/contrib/lite/java/demo/app/src/main/assets/` +To get a model, either: -5. Build and run the app in Android Studio. +* Download the quantized [Mobilenet TensorFlow Lite model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) + and unzip and copy `mobilenet_quant_v1_224.tflite` to the assets directory: + `tensorflow/contrib/lite/java/demo/app/src/main/assets/`. +* Or, download the floating point [Inception-v3 model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) + and unzip and copy `inceptionv3_non_slim_2015.tflite` to the assets + directory. Change the chosen classifier in + [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java)
+ from: `classifier = new ImageClassifierQuantizedMobileNet(getActivity());`
+ to: `classifier = new ImageClassifierFloatInception(getActivity());`. -You'll have to grant permissions for the app to use the device's camera. Point -the camera at various objects and enjoy seeing how the model classifies things! +Now you can build and run the demo app. + + +## Build TensorFlow Lite and the demo app from source + +### Clone the TensorFlow repo + +```sh +git clone https://github.com/tensorflow/tensorflow +``` + +### Install Bazel + +If `bazel` is not installed on your system, see +[Installing Bazel](https://bazel.build/versions/master/docs/install.html). + +Note: Bazel does not currently support Android builds on Windows. Windows users +should download the +[prebuilt binary](https://storage.googleapis.com/download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). + +### Install Android NDK and SDK + +The Android NDK is required to build the native (C/C++) TensorFlow Lite code. The +current recommended version is *14b* and can be found on the +[NDK Archives](https://developer.android.com/ndk/downloads/older_releases.html#ndk-14b-downloads) +page. + +The Android SDK and build tools can be +[downloaded separately](https://developer.android.com/tools/revisions/build-tools.html) +or used as part of +[Android Studio](https://developer.android.com/studio/index.html). To build the +TensorFlow Lite Android demo, build tools require API >= 23 (but it will run on +devices with API >= 21). + +In the root of the TensorFlow repository, update the `WORKSPACE` file with the +`api_level` and location of the SDK and NDK. If you installed it with +Android Studio, the SDK path can be found in the SDK manager. The default NDK +path is:`{SDK path}/ndk-bundle.` For example: + +``` +android_sdk_repository ( + name = "androidsdk", + api_level = 23, + build_tools_version = "23.0.2", + path = "/home/xxxx/android-sdk-linux/", +) + +android_ndk_repository( + name = "androidndk", + path = "/home/xxxx/android-ndk-r10e/", + api_level = 19, +) +``` + +Some additional details are available on the +[TF Lite Android App page](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md). + +### Build the source code + +To build the demo app, run `bazel`: + +``` +bazel build --cxxopt=--std=c++11 //tensorflow/contrib/lite/java/demo/app/src/main:TfLiteCameraDemo +``` + +Caution: Because of an bazel bug, we only support building the Android demo app +within a Python 2 environment. + + +## About the demo + +The demo app is resizing each camera image frame (224 width * 224 height) to +match the quantized MobileNets model (299 * 299 for Inception-v3). The resized +image is converted—row by row—into a +[ByteBuffer](https://developer.android.com/reference/java/nio/ByteBuffer.html). +Its size is 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. +224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents +the 3 colors of a pixel. + +This demo uses the TensorFlow Lite Java inference API +for models which take a single input and provide a single output. This outputs a +two-dimensional array, with the first dimension being the category index and the +second dimension being the confidence of classification. Both models have 1001 +unique categories and the app sorts the probabilities of all the categories and +displays the top three. The model file must be downloaded and bundled within the +assets directory of the app. diff --git a/tensorflow/docs_src/mobile/tflite/demo_ios.md b/tensorflow/docs_src/mobile/tflite/demo_ios.md index 3ee9b1cbca..3be21da89f 100644 --- a/tensorflow/docs_src/mobile/tflite/demo_ios.md +++ b/tensorflow/docs_src/mobile/tflite/demo_ios.md @@ -1,4 +1,4 @@ -# TensorFlow Lite Demo for iOS +# iOS Demo App The TensorFlow Lite demo is a camera app that continuously classifies whatever it sees from your device's back camera, using a quantized MobileNet model. These diff --git a/tensorflow/docs_src/mobile/tflite/devguide.md b/tensorflow/docs_src/mobile/tflite/devguide.md new file mode 100644 index 0000000000..5b521dca7b --- /dev/null +++ b/tensorflow/docs_src/mobile/tflite/devguide.md @@ -0,0 +1,224 @@ +# Developer Guide + +Using a TensorFlow Lite model in your mobile app requires multiple +considerations: you must choose a pre-trained or custom model, convert the model +to a TensorFLow Lite format, and finally, integrate the model in your app. + +## 1. Choose a model + +Depending on the use case, you can choose one of the popular open-sourced models, +such as *InceptionV3* or *MobileNets*, and re-train these models with a custom +data set or even build your own custom model. + +### Use a pre-trained model + +[MobileNets](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html) +is a family of mobile-first computer vision models for TensorFlow designed to +effectively maximize accuracy, while taking into consideration the restricted +resources for on-device or embedded applications. MobileNets are small, +low-latency, low-power models parameterized to meet the resource constraints for +a variety of uses. They can be used for classification, detection, embeddings, and +segmentation—similar to other popular large scale models, such as +[Inception](https://arxiv.org/pdf/1602.07261.pdf). Google provides 16 pre-trained +[ImageNet](http://www.image-net.org/challenges/LSVRC/) classification checkpoints +for MobileNets that can be used in mobile projects of all sizes. + +[Inception-v3](https://arxiv.org/abs/1512.00567) is an image recognition model +that achieves fairly high accuracy recognizing general objects with 1000 classes, +for example, "Zebra", "Dalmatian", and "Dishwasher". The model extracts general +features from input images using a convolutional neural network and classifies +them based on those features with fully-connected and softmax layers. + +[On Device Smart Reply](https://research.googleblog.com/2017/02/on-device-machine-intelligence.html) +is an on-device model that provides one-touch replies for incoming text messages +by suggesting contextually relevant messages. The model is built specifically for +memory constrained devices, such as watches and phones, and has been successfully +used in Smart Replies on Android Wear. Currently, this model is Android-specific. + +These pre-trained models are [available for download](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/models.md) + +### Re-train Inception-V3 or MobileNet for a custom data set + +These pre-trained models were trained on the *ImageNet* data set which contains +1000 predefined classes. If these classes are not sufficient for your use case, +the model will need to be re-trained. This technique is called +*transfer learning* and starts with a model that has been already trained on a +problem, then retrains the model on a similar problem. Deep learning from +scratch can take days, but transfer learning is fairly quick. In order to do +this, you need to generate a custom data set labeled with the relevant classes. + +The [TensorFlow for Poets](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/) +codelab walks through the re-training process step-by-step. The code supports +both floating point and quantized inference. + +### Train a custom model + +A developer may choose to train a custom model using Tensorflow (see the +@{$tutorials} for examples of building and training models). If you have already +written a model, the first step is to export this to a @{tf.GraphDef} file. This +is required because some formats do not store the model structure outside the +code, and we must communicate with other parts of the framework. See +[Exporting the Inference Graph](https://github.com/tensorflow/models/blob/master/research/slim/README.md) +to create .pb file for the custom model. + +TensorFlow Lite currently supports a subset of TensorFlow operators. Refer to the +[TensorFlow Lite & TensorFlow Compatibility Guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) +for supported operators and their usage. This set of operators will continue to +grow in future Tensorflow Lite releases. + + +## 2. Convert the model format + +The model generated (or downloaded) in the previous step is a *standard* +Tensorflow model and you should now have a .pb or .pbtxt @{tf.GraphDef} file. +Models generated with transfer learning (re-training) or custom models must be +converted—but, we must first freeze the graph to convert the model to the +Tensorflow Lite format. This process uses several model formats: + +* @{tf.GraphDef} (.pb) —A protobuf that represents the TensorFlow training or + computation graph. It contains operators, tensors, and variables definitions. +* *CheckPoint* (.ckpt) —Serialized variables from a TensorFlow graph. Since this + does not contain a graph structure, it cannot be interpreted by itself. +* `FrozenGraphDef` —A subclass of `GraphDef` that does not contain + variables. A `GraphDef` can be converted to a `FrozenGraphDef` by taking a + CheckPoint and a `GraphDef`, and converting each variable into a constant + using the value retrieved from the CheckPoint. +* `SavedModel` —A `GraphDef` and CheckPoint with a signature that labels + input and output arguments to a model. A `GraphDef` and CheckPoint can be + extracted from a `SavedModel`. +* *TensorFlow Lite model* (.tflite) —A serialized + [FlatBuffer](https://google.github.io/flatbuffers/) that contains TensorFlow + Lite operators and tensors for the TensorFlow Lite interpreter, similiar to a + `FrozenGraphDef`. + +### Freeze Graph + +To use the `GraphDef` .pb file with TensorFlow Lite, you must have checkpoints +that contain trained weight parameters. The .pb file only contains the structure +of the graph. The process of merging the checkpoint values with the graph +structure is called *freezing the graph*. + +You should have a checkpoints folder or download them for a pre-trained model +(for example, +[MobileNets](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)). + +To freeze the graph, use the following command (changing the arguments): + +``` +freeze_graph --input_graph=/tmp/mobilenet_v1_224.pb \ + --input_checkpoint=/tmp/checkpoints/mobilenet-10202.ckpt \ + --input_binary=true \ + --output_graph=/tmp/frozen_mobilenet_v1_224.pb \ + --output_node_names=MobileNetV1/Predictions/Reshape_1 +``` + +The `input_binary` flag must be enabled so the protobuf is read and written in +a binary format. Set the `input_graph` and `input_checkpoint` files. + +The `output_node_names` may not be obvious outside of the code that built the +model. The easiest way to find them is to visualize the graph, either with +[TensorBoard](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2/#3) +or `graphviz`. + +The frozen `GraphDef` is now ready for conversion to the `FlatBuffer` format +(.tflite) for use on Android or iOS devices. For Android, the Tensorflow +Optimizing Converter tool supports both float and quantized models. To convert +the frozen `GraphDef` to the .tflite format: + +``` +toco --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TFLITE \ + --output_file=/tmp/mobilenet_v1_1.0_224.tflite \ + --inference_type=FLOAT \ + --input_type=FLOAT \ + --input_arrays=input \ + --output_arrays=MobilenetV1/Predictions/Reshape_1 \ + --input_shapes=1,224,224,3 +``` + +The `input_file` argument should reference the frozen `GraphDef` file +containing the model architecture. The [frozen_graph.pb](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz) +file used here is available for download. `output_file` is where the TensorFlow +Lite model will get generated. The `input_type` and `inference_type` +arguments should be set to `FLOAT`, unless converting a +@{$performance/quantization$quantized model}. Setting the `input_array`, +`output_array`, and `input_shape` arguments are not as straightforward. The +easiest way to find these values is to explore the graph using Tensorboard. Reuse +the arguments for specifying the output nodes for inference in the +`freeze_graph` step. + +It is also possible to use the Tensorflow Optimizing Converter with protobufs +from either Python or from the command line (see the +[toco_from_protos.py](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/python/toco_from_protos.py) +example). This allows you to integrate the conversion step into the model design +workflow, ensuring the model is easily convertible to a mobile inference graph. +For example: + +```python +import tensorflow as tf + +img = tf.placeholder(name="img", dtype=tf.float32, shape=(1, 64, 64, 3)) +val = img + tf.constant([1., 2., 3.]) + tf.constant([1., 4., 4.]) +out = tf.identity(val, name="out") + +with tf.Session() as sess: + tflite_model = tf.contrib.lite.toco_convert(sess.graph_def, [img], [out]) + open("converteds_model.tflite", "wb").write(tflite_model) +``` + +For usage, see the Tensorflow Optimizing Converter +[command-line examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md). + +Refer to the +[Ops compatibility guide](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md) +for troubleshooting help, and if that doesn't help, please +[file an issue](https://github.com/tensorflow/tensorflow/issues). + +The [development repo](https://github.com/tensorflow/tensorflow) contains a tool +to visualize TensorFlow Lite models after conversion. To build the +[visualize.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/tools/visualize.py) +tool: + +```sh +bazel run tensorflow/contrib/lite/tools:visualize -- model.tflite model_viz.html +``` + +This generates an interactive HTML page listing subgraphs, operations, and a +graph visualization. + + +## 3. Use the TensorFlow Lite model for inference in a mobile app + +After completing the prior steps, you should now have a .tflite model file. + +### Android + +Since Android apps are written in Java and the core TensorFlow library is in C++, +a JNI library is provided as an interface. This is only meant for inference—it +provides the ability to load a graph, set up inputs, and run the model to +calculate outputs. + +The open source Android demo app uses the JNI interface and is available +[on GitHub](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/app). +You can also download a +[prebuilt APK](http://download.tensorflow.org/deps/tflite/TfLiteCameraDemo.apk). +See the @{$tflite/demo_android} guide for details. + +The @{$mobile/android_build} guide has instructions for installing TensorFlow on +Android and setting up `bazel` and Android Studio. + +### iOS + +To integrate a TensorFlow model in an iOS app, see the +[TensorFlow Lite for iOS](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/g3doc/ios.md) +guide and @{$tflite/demo_ios} guide. + +#### Core ML support + +Core ML is a machine learning framework used in Apple products. In addition to +using Tensorflow Lite models directly in your applications, you can convert +trained Tensorflow models to the +[CoreML](https://developer.apple.com/machine-learning/) format for use on Apple +devices. To use the converter, refer to the +[Tensorflow-CoreML converter documentation](https://github.com/tf-coreml/tf-coreml). diff --git a/tensorflow/docs_src/mobile/tflite/index.md b/tensorflow/docs_src/mobile/tflite/index.md index beb24794fc..11f11ea4dc 100644 --- a/tensorflow/docs_src/mobile/tflite/index.md +++ b/tensorflow/docs_src/mobile/tflite/index.md @@ -155,7 +155,9 @@ retraining for both floating point and quantized inference. The following diagram shows the architectural design of TensorFlow Lite: -![tensorflow lite architecture](https://www.tensorflow.org/images/tflite-architecture.jpg) +TensorFlow Lite architecture diagram Starting with a trained TensorFlow model on disk, you'll convert that model to the TensorFlow Lite file format (`.tflite`) using the TensorFlow Lite -- GitLab From 9d1d379bcdd19d496fd8d2659c21a5510e045c5a Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Thu, 29 Mar 2018 13:31:23 -0700 Subject: [PATCH 700/960] Docs: Add Eager Execution guide to Programmer's Guide. PiperOrigin-RevId: 190977505 --- tensorflow/contrib/eager/README.md | 20 +- .../contrib/eager/python/g3doc/guide.md | 898 +--------------- .../docs_src/programmers_guide/eager.md | 992 ++++++++++++++++++ .../docs_src/programmers_guide/leftnav_files | 3 +- 4 files changed, 1015 insertions(+), 898 deletions(-) create mode 100644 tensorflow/docs_src/programmers_guide/eager.md diff --git a/tensorflow/contrib/eager/README.md b/tensorflow/contrib/eager/README.md index 9d2ca07c3a..9a3b780af8 100644 --- a/tensorflow/contrib/eager/README.md +++ b/tensorflow/contrib/eager/README.md @@ -1,12 +1,8 @@ # Eager Execution -> *WARNING*: This is a preview/pre-alpha version. The API and performance -> characteristics are subject to change. - -Eager execution is an experimental interface to TensorFlow that provides an -imperative programming style (à la [NumPy](http://www.numpy.org)). When you -enable eager execution, TensorFlow operations execute immediately; you do not -execute a pre-constructed graph with +Eager execution provides an imperative interface to TensorFlow (similiar to +[NumPy](http://www.numpy.org)). When you enable eager execution, TensorFlow +operations execute immediately; you do not execute a pre-constructed graph with [`Session.run()`](https://www.tensorflow.org/api_docs/python/tf/Session). For example, consider a simple computation in TensorFlow: @@ -33,7 +29,7 @@ print(m) ## Caveats This feature is in early stages and work remains to be done in terms of smooth -support for distributed and multi-GPU training and CPU performance. +support for distributed and multi-GPU training and performance. - [Known issues](https://github.com/tensorflow/tensorflow/issues?q=is%3Aissue%20is%3Aopen%20label%3Acomp%3Aeager) - Feedback is welcome, please consider @@ -41,21 +37,23 @@ support for distributed and multi-GPU training and CPU performance. ## Installation -Eager execution is included in TensorFlow versions 1.5 and above. +Eager execution is included in TensorFlow versions 1.7 and above. Installation instructions at https://www.tensorflow.org/install/ ## Documentation For an introduction to eager execution in TensorFlow, see: -- [User Guide](python/g3doc/guide.md) +- [User Guide](https://www.tensorflow.org/programmers_guide/eager) ([source](../../docs_src/programmers_guide/eager.md)) - Notebook: [Basic Usage](python/examples/notebooks/1_basics.ipynb) - Notebook: [Gradients](python/examples/notebooks/2_gradients.ipynb) - Notebook: [Importing Data](python/examples/notebooks/3_datasets.ipynb) ## Changelog -- 2017/10/31: Initial preview release. +- 2017/10/31: Initial preview release (in TensorFlow 1.5) - 2017/12/01: Example of dynamic neural network: [SPINN: Stack-augmented Parser-Interpreter Neural Network](https://arxiv.org/abs/1603.06021). See [README.md](python/examples/spinn/README.md) for details. +- 2017/03: Core functionality moved out of the experimental tf.contrib namespace + in TensorFlow 1.7. diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md index 11064981c6..2d2aba6908 100644 --- a/tensorflow/contrib/eager/python/g3doc/guide.md +++ b/tensorflow/contrib/eager/python/g3doc/guide.md @@ -1,892 +1,18 @@ -# TensorFlow Eager Execution - -## What is this? +# Eager execution Eager execution is a feature that makes TensorFlow execute operations -immediately: concrete values are returned, instead of a computational graph to -be executed later. - -As a result, enabling eager execution provides: - -- A [NumPy](http://www.numpy.org/)-like library for numerical computation with - support for GPU acceleration and automatic differentiation. -- A flexible platform for machine learning research and experimentation. - -Eager execution is under active development. This guide walks through an -alpha/preview release. In particular, not all TensorFlow APIs currently work -with eager execution enabled, and some models may be slow to execute, compared -to models defined without using eager execution. - -## Installation - -Eager execution is included in TensorFlow versions 1.5 and above. -Installation instructions at https://www.tensorflow.org/install/ - -The contents of this guide are compatible with TensorFlow 1.5. However, if you -run into bugs that are fixed in source but not the release, you may want to -either [build from source](https://www.tensorflow.org/install/install_sources) -or try a nightly build. The nightly builds are available as: - -- [`pip` packages](https://github.com/tensorflow/tensorflow/blob/master/README.md#installation) and - -- [docker](https://hub.docker.com/r/tensorflow/tensorflow/) images. - -For example, to run the latest nightly docker image: - -```sh -# If you have a GPU, use https://github.com/NVIDIA/nvidia-docker -docker pull tensorflow/tensorflow:nightly-gpu -docker run --runtime=nvidia -it -p 8888:8888 tensorflow/tensorflow:nightly-gpu - -# If you do not have a GPU, use the CPU-only image -docker pull tensorflow/tensorflow:nightly -docker run -it -p 8888:8888 tensorflow/tensorflow:nightly -``` - -And then visit http://localhost:8888 in your browser for a Jupyter notebook -environment. - -## Getting Started - -With TensorFlow installed, eager execution is enabled via a single call: - -```python -import tensorflow as tf - -import tensorflow.contrib.eager as tfe - -tfe.enable_eager_execution() -``` - -Enabling eager execution changes how TensorFlow functions behave (in particular, -`Tensor` objects will reference concrete values instead of being symbolic -handles to nodes in a computational graph). As a result, eager execution should -be enabled at the beginning of a program and cannot be disabled afterwards in -the same program. - -Code examples in the rest of this guide assume that eager execution has been -enabled. - -## A library for numerical computation - -A significant fraction of the [TensorFlow -API](https://www.tensorflow.org/api_docs/python/) consists of numerical -operations: -[arithmetic operations](https://www.tensorflow.org/api_guides/python/math_ops#Arithmetic_Operators), -[matrix operations](https://www.tensorflow.org/api_guides/python/math_ops#Matrix_Math_Functions), -[linear algebra operations](https://www.tensorflow.org/versions/master/api_docs/python/tf/linalg), -etc. - -With eager execution enabled, these operations consume and return -multi-dimensional arrays as `Tensor` objects, similar to NumPy -[`ndarray`s](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.ndarray.html). -For example: - -```python -# Multiply two 2x2 matrices -x = tf.matmul([[1, 2], - [3, 4]], - [[4, 5], - [6, 7]]) -# Add one to each element -# (tf.add supports broadcasting) -y = tf.add(x, 1) - -# Create a random random 5x3 matrix -z = tf.random_uniform([5, 3]) - -print(x) -print(y) -print(z) -``` - -Output: - -``` -tf.Tensor( -[[16 19] - [36 43]], shape=(2, 2), dtype=int32) -tf.Tensor( -[[17 20] - [37 44]], shape=(2, 2), dtype=int32) -tf.Tensor( -[[ 0.25058532 0.0929395 0.54113817] - [ 0.3108716 0.93350542 0.84909797] - [ 0.53081679 0.12788558 0.01767385] - [ 0.29725885 0.33540785 0.83588314] - [ 0.38877153 0.39720535 0.78914213]], shape=(5, 3), dtype=float32) -``` - -For convenience, these operations can also be triggered via operator overloading -of the `Tensor` object. For example, the `+` operator is equivalent to `tf.add`, -`-` to `tf.subtract`, `*` to `tf.multiply`, etc.: - -```python -x = (tf.ones([1], dtype=tf.float32) + 1) * 2 - 1 -print(x) -``` - -Output: - -``` -tf.Tensor([ 3.], shape=(1,), dtype=float32) -``` - -### Converting to and from NumPy - -The operations above automatically convert Python objects (like lists of -numbers) and NumPy arrays to `Tensor` objects. `Tensor` objects can also be used -as NumPy arrays by numpy operations. - -```python -import numpy as np - -x = tf.add(1, 1) # tf.Tensor with a value of 2 -y = tf.add(np.array(1), np.array(1)) # tf.Tensor with a value of 2 -z = np.multiply(x, y) # numpy.int64 with a value of 4 -``` - -Alternatively, they can be explicitly converted using -[`tf.constant`](https://www.tensorflow.org/api_docs/python/tf/constant), as -shown in the next example. - -Conversely, you can call the `numpy()` method of a `Tensor` object' to obtain -its NumPy `ndarray` value. For example: - -```python -import numpy as np - -np_x = np.array(2., dtype=np.float32) -x = tf.constant(np_x) - -py_y = 3. -y = tf.constant(py_y) - -z = x + y + 1 - -print(z) -print(z.numpy()) -``` - -Output: - -``` -tf.Tensor(6.0, shape=(), dtype=float32) -6.0 -``` - -### GPU acceleration - -Many TensorFlow operations support GPU acceleration. With eager execution -enabled, [computation is *not* automatically -offloaded](https://www.tensorflow.org/tutorials/using_gpu) to GPUs. Instead, you -must explicitly specify when GPUs should be used. - -The simplest way to do this is to enclose your computation in a `with -tf.device('/gpu:0')` block. Also of interest is the `tfe.num_gpus()` function, -which returns the number of available GPUs. - -For example, consider this snippet to measure the time to multiply two 1000x1000 -matrices on CPU: - -```python -import time - -def measure(x): - # The very first time a GPU is used by TensorFlow, it is initialized. - # So exclude the first run from timing. - tf.matmul(x, x) - - start = time.time() - for i in range(10): - tf.matmul(x, x) - end = time.time() - - return "Took %s seconds to multiply a %s matrix by itself 10 times" % (end - start, x.shape) - -# Run on CPU: -with tf.device("/cpu:0"): - print("CPU: %s" % measure(tf.random_normal([1000, 1000]))) - -# If a GPU is available, run on GPU: -if tfe.num_gpus() > 0: - with tf.device("/gpu:0"): - print("GPU: %s" % measure(tf.random_normal([1000, 1000]))) -``` - -Output (exact numbers will depend on the characteristics of the hardware): - -```python -CPU: Took 0.145531892776 seconds to multiply a (1000, 1000) matrix by itself 10 times -GPU: Took 0.000458955764771 seconds to multiply a (1000, 1000) matrix by itself 10 times -``` - -Alternatively, methods on the `Tensor` object can be used to explicitly copy the -`Tensor` to a different device. Operations are typically executed on the device -on which the inputs are placed. For example: - -```python -x = tf.random_normal([10, 10]) - -x_gpu0 = x.gpu() -x_cpu = x.cpu() - -_ = tf.matmul(x_cpu, x_cpu) # Runs on CPU -_ = tf.matmul(x_gpu0, x_gpu0) # Runs on GPU:0 - -if tfe.num_gpus() > 1: - x_gpu1 = x.gpu(1) - _ = tf.matmul(x_gpu1, x_gpu1) # Runs on GPU:1 -``` - -### Automatic Differentiation - -[Automatic -differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) is -very useful when implementing many machine learning algorithms (e.g., -[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training -neural networks). For this purpose, TensorFlow eager execution provides an -[autograd](https://github.com/HIPS/autograd)-style API for automatic -differentiation. Specifically, the functions: - -- `tfe.gradients_function(f)`: Returns a Python function that computes the - derivatives of the Python function `f` with respect to its arguments. `f` - must return a scalar value. When the returned function is invoked, it - returns a list of `Tensor` objects (one element for each argument of `f`). -- `tfe.value_and_gradients_function(f)`: Similar to `tfe.gradients_function`, - except that when the returned function is invoked, it returns the value of - `f` in addition to the list of derivatives of `f` with respect to its - arguments. - -These functions naturally apply to higher order differentiation as well. For -example: - -```python -def f(x): - return tf.multiply(x, x) # Or x * x -assert 9 == f(3.).numpy() - -df = tfe.gradients_function(f) -assert 6 == df(3.)[0].numpy() - -# Second order deriviative. -d2f = tfe.gradients_function(lambda x: df(x)[0]) -assert 2 == d2f(3.)[0].numpy() - -# Third order derivative: Will be None -d3f = tfe.gradients_function(lambda x : d2f(x)[0]) -assert None == d3f(3.)[0] -``` - -These functions can be used to train models. For example, consider the following -simple linear regression model: - -```python -def prediction(input, weight, bias): - return input * weight + bias - -# A toy dataset of points around 3 * x + 2 -NUM_EXAMPLES = 1000 -training_inputs = tf.random_normal([NUM_EXAMPLES]) -noise = tf.random_normal([NUM_EXAMPLES]) -training_outputs = training_inputs * 3 + 2 + noise - -# A loss function: Mean-squared error -def loss(weight, bias): - error = prediction(training_inputs, weight, bias) - training_outputs - return tf.reduce_mean(tf.square(error)) - -# Function that returns the derivative of loss with respect to -# weight and bias -grad = tfe.gradients_function(loss) - -# Train for 200 steps (starting from some random choice for W and B, on the same -# batch of data). -W = 5. -B = 10. -learning_rate = 0.01 -print("Initial loss: %f" % loss(W, B).numpy()) -for i in range(200): - (dW, dB) = grad(W, B) - W -= dW * learning_rate - B -= dB * learning_rate - if i % 20 == 0: - print("Loss at step %d: %f" % (i, loss(W, B).numpy())) -print("Final loss: %f" % loss(W, B).numpy()) -print("W, B = %f, %f" % (W.numpy(), B.numpy())) -``` - -Output: (the exact numbers may vary depending on the randomness in noise) - -``` -Initial loss: 66.730003 -Loss at step 0: 64.200096 -Loss at step 20: 29.872814 -Loss at step 40: 14.233772 -Loss at step 60: 7.090570 -Loss at step 80: 3.819887 -Loss at step 100: 2.318821 -Loss at step 120: 1.628385 -Loss at step 140: 1.310142 -Loss at step 160: 1.163167 -Loss at step 180: 1.095162 -Final loss: 1.064711 -W, B = 3.094944, 2.161383 -``` - -To utilize the GPU, place the code above within a `with tf.device("/gpu:0"):` -block. (However, this particular model, with only two floating point parameters, -is unlikely to benefit from GPU acceleration.) - -### Customizing gradients - -One may want to define custom gradients for an operation, or for a function. -This may be useful for multiple reasons, including providing a more efficient -or more [numerically stable](https://en.wikipedia.org/wiki/Numerical_stability) -gradient for a sequence of operations. - -For example, consider the function `log(1 + e^x)`, which commonly occurs in the -computation of cross entropy and log likelihoods. - -```python -def log1pexp(x): -  return tf.log(1 + tf.exp(x)) -grad_log1pexp = tfe.gradients_function(log1pexp) - -# Works fine at x = 0. -assert 0.5 == float(grad_log1pexp(0.)[0]) - -# Returns a `nan` at x = 100 due to numerical instability. -import math -assert math.isnan(float(grad_log1pexp(100.)[0])) -``` - -We can define a custom gradient for the above function that analytically -simplifies the gradient expression. - -```python -@tfe.custom_gradient -def log1pexp(x): -  e = tf.exp(x) -  def grad(dy): -    return dy * (1 - 1 / (1 + e)) -  return tf.log(1 + e), grad -grad_log1pexp = tfe.gradients_function(log1pexp) - -# Works as before at x = 0. -assert 0.5 == float(grad_log1pexp(0.)[0]) - -# But now works at x = 100 as well. -assert 1.0 == float(grad_log1pexp(100.)[0]) -``` -Also notice how the gradient function implementation reuses an expression -(`tf.exp(x)`) computed during the forward pass, hence making the gradient -computation more efficient by avoiding redundant computation. - -## Building and training models - -In practice, your computation may have many parameters to be optimized (by -computing derivatives). Encapsulating them into re-usable classes/objects -makes the code easier to follow than writing a single top-level function with -many arguments. - -In fact, eager execution encourages use of the [Keras](https://keras.io)-style -"Layer" classes in the -[`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers) -module. - -Furthermore, you may want to apply more sophisticated techniques to compute -parameter updates, such as those in -[`tf.train.Optimizer`](https://www.tensorflow.org/api_guides/python/train#Optimizers) -implementations. - -This next section walks through using the same `Optimizer` and `Layer` APIs used -to build trainable TensorFlow graphs in an environment where eager execution is -enabled. - -### Variables and Optimizers - -`tfe.Variable` objects store mutable `Tensor` values that can be accessed during -training, making automatic differentiation easier. In particular, parameters of -a model can be encapsulated in Python classes as variables. - -`tfe.gradients_function(f)` introduced earlier computes the derivatives of `f` -with respect to its arguments. However, it requires all parameters of interest -to be arguments of `f`, which becomes cumbersome when `f` depends on a large -number of trainable parameters. - -`tfe.implicit_gradients` is an alternative function with some useful properties: - -- It computes the derivatives of `f` with respect to all the `tfe.Variable`s - used by `f`. -- When the returned function is invoked, it returns a list of - (gradient value, Variable object) tuples. - -Representing model parameters as `Variable` objects, along with the use of -`tfe.implicit_gradients`, typically results in better encapsulation. For -example, the linear regression model described above can be written into a -class: - -```python -class Model(object): - def __init__(self): - self.W = tfe.Variable(5., name='weight') - self.B = tfe.Variable(10., name='bias') - - def predict(self, inputs): - return inputs * self.W + self.B - - -# The loss function to be optimized -def loss(model, inputs, targets): - error = model.predict(inputs) - targets - return tf.reduce_mean(tf.square(error)) - -# A toy dataset of points around 3 * x + 2 -NUM_EXAMPLES = 1000 -training_inputs = tf.random_normal([NUM_EXAMPLES]) -noise = tf.random_normal([NUM_EXAMPLES]) -training_outputs = training_inputs * 3 + 2 + noise - -# Define: -# 1. A model -# 2. Derivatives of a loss function with respect to model parameters -# 3. A strategy for updating the variables based on the derivatives -model = Model() -grad = tfe.implicit_gradients(loss) -optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) - -# The training loop -print("Initial loss: %f" % - loss(model, training_inputs, training_outputs).numpy()) -for i in range(201): - optimizer.apply_gradients(grad(model, training_inputs, training_outputs)) - if i % 20 == 0: - print("Loss at step %d: %f" % - (i, loss(model, training_inputs, training_outputs).numpy())) -print("Final loss: %f" % loss(model, training_inputs, training_outputs).numpy()) -print("W, B = %s, %s" % (model.W.numpy(), model.B.numpy())) -``` - -Output: - -``` -Initial loss: 69.693184 -Loss at step 0: 66.987854 -Loss at step 20: 30.553387 -Loss at step 40: 14.250237 -Loss at step 60: 6.955020 -Loss at step 80: 3.690550 -Loss at step 100: 2.229739 -Loss at step 120: 1.576032 -Loss at step 140: 1.283496 -Loss at step 160: 1.152584 -Loss at step 180: 1.093999 -Final loss: 1.067780 -W, B = 3.0114281, 2.0865183 -``` - -Using `implicit_gradients` avoids the need to provide all the trainable -parameters of the model as arguments to the `loss` function. - -### Using Keras and the Layers API - -[Keras](https://keras.io) is a popular API for defining model structures. The -[`tf.keras.layers`](https://www.tensorflow.org/api_docs/python/tf/keras/layers) -module provides a set of building blocks for models and is implemented using the -`tf.layers.Layer` subclasses in the -[`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers) -module. We encourage the use of these same building blocks when using -TensorFlow's eager execution feature. For example, the very same linear -regression model can be built using `tf.layers.Dense`: - -```python -class Model(object): - def __init__(self): - self.layer = tf.layers.Dense(1) - - def predict(self, inputs): - return self.layer(inputs) -``` - -The `tf.layers` API makes it more convenient to define more sophisticated -models. For example, the following will train an MNIST model: - -```python -class MNISTModel(object): - def __init__(self, data_format): - # 'channels_first' is typically faster on GPUs - # while 'channels_last' is typically faster on CPUs. - # See: https://www.tensorflow.org/performance/performance_guide#data_formats - if data_format == 'channels_first': - self._input_shape = [-1, 1, 28, 28] - else: - self._input_shape = [-1, 28, 28, 1] - self.conv1 = tf.layers.Conv2D(32, 5, - padding='same', - activation=tf.nn.relu, - data_format=data_format) - self.max_pool2d = tf.layers.MaxPooling2D( - (2, 2), (2, 2), padding='same', data_format=data_format) - self.conv2 = tf.layers.Conv2D(64, 5, - padding='same', - activation=tf.nn.relu, - data_format=data_format) - self.dense1 = tf.layers.Dense(1024, activation=tf.nn.relu) - self.dropout = tf.layers.Dropout(0.5) - self.dense2 = tf.layers.Dense(10) - - def predict(self, inputs): - x = tf.reshape(inputs, self._input_shape) - x = self.max_pool2d(self.conv1(x)) - x = self.max_pool2d(self.conv2(x)) - x = tf.layers.flatten(x) - x = self.dropout(self.dense1(x)) - return self.dense2(x) - -def loss(model, inputs, targets): - return tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits( - logits=model.predict(inputs), labels=targets)) - - -# Load the training and validation data -from tensorflow.examples.tutorials.mnist import input_data -data = input_data.read_data_sets("./mnist_data", one_hot=True) - -# Train -device = "gpu:0" if tfe.num_gpus() else "cpu:0" -model = MNISTModel('channels_first' if tfe.num_gpus() else 'channels_last') -optimizer = tf.train.AdamOptimizer(learning_rate=1e-4) -grad = tfe.implicit_gradients(loss) -for i in range(20001): - with tf.device(device): - (inputs, targets) = data.train.next_batch(50) - optimizer.apply_gradients(grad(model, inputs, targets)) - if i % 100 == 0: - print("Step %d: Loss on training set : %f" % - (i, loss(model, inputs, targets).numpy())) -print("Loss on test set: %f" % loss(model, data.test.images, data.test.labels).numpy()) -``` - -For a more complete example, see [the example in the tensorflow/models -repository](https://github.com/tensorflow/models/tree/master/official/mnist/mnist_eager.py). - -### Checkpointing trained variables - -TensorFlow Variables (`tfe.Variable`) provide a way to represent shared, -persistent state of your model. The `tfe.Checkpoint` class provides a means to -save and restore variables to and from _checkpoints_. - -For example: - -```python -# Create variables. -x = tfe.Variable(10.) -y = tfe.Variable(5.) - -# Indicate that the variables should be saved as "x" and "y". -checkpoint = tfe.Checkpoint(x=x, y=y) - -# Assign new values to the variables and save. -x.assign(2.) -save_path = checkpoint.save('/tmp/ckpt') - -# Change the variable after saving. -x.assign(11.) -assert 16. == (x + y).numpy() # 11 + 5 - -# Restore the values in the checkpoint. -checkpoint.restore(save_path) # save_path='/tmp/ckpt-1' - -assert 7. == (x + y).numpy() # 2 + 5 -``` - -### `tf.keras.Model` - -You may often want to organize your models using classes, like the `MNISTModel` -class described above. We recommend inheriting from the `tf.keras.Model` class -as it provides conveniences like keeping track of all model variables. - -Sub-classes of `tf.keras.Model` may register `Layer`s (like classes in -[`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers), or [Keras -layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers)) by -assigning them to attributes (`self.name = layer_object`) and define the -computation in an implementation of `call()`. - -Note that `tf.layers.Layer` objects (like `tf.layers.Dense`) create variables -lazily, when the first input is encountered. - -For example, consider the following two-layer neural network: - -```python -class TwoLayerNet(tf.keras.Model): - def __init__(self): - super(TwoLayerNet, self).__init__() - self.layer1 = tf.layers.Dense(2, activation=tf.nn.relu, use_bias=False) - self.layer2 = tf.layers.Dense(3, use_bias=False) - - def call(self, x): - return self.layer2(self.layer1(x)) - -net = TwoLayerNet() - -# No variables created yet -assert 0 == len(net.variables) - -# They are created on first input: -inp = tf.constant([[1.]]) - -# Since input is a 1x1 matrix, net.l1 has 2 units and net.l2 has 3 units, -# the output is the product of a 1x1 matrix with a 1x2 matrix with a 2x3 -# matrix. -assert [1, 3] == net(inp).shape.as_list() # Invoke net; get output shape. -assert 1 == len(net.layer1.variables) -assert 1 == len(net.layer2.variables) -assert 2 == len(net.variables) # weights for each layer. -assert [1, 2] == net.variables[0].shape.as_list() # weights of layer1. -assert [2, 3] == net.variables[1].shape.as_list() # weights of layer2. -``` - -The `tf.keras.Model` class is itself a sub-class of `tf.layers.Layer`. This -allows instances of `tf.keras.Model` to be embedded in other models. For -example: - -```python -class ThreeLayerNet(tf.keras.Model): - def __init__(self): - super(ThreeLayerNet, self).__init__() - self.a = TwoLayerNet() - self.b = tf.layers.Dense(4, use_bias=False) - - def call(self, x): - return self.b(self.a(x)) - -net = ThreeLayerNet() - -assert [1, 4] == net(inp).shape.as_list() -assert 3 == len(net.variables) -assert [1, 2] == net.variables[0].shape.as_list() -assert [2, 3] == net.variables[1].shape.as_list() -assert [3, 4] == net.variables[2].shape.as_list() -``` - -See more examples in -[`tensorflow/contrib/eager/python/examples`](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples). - -`tfe.Checkpoint` provides a convenient way to save and load training -checkpoints. Let's define something simple to train. We set an objective for the -output of our network, choose an optimizer, and a location for the checkpoint: - -```python -objective = tf.constant([[2., 3., 4., 5.]]) -optimizer = tf.train.AdamOptimizer(0.01) -checkpoint_directory = '/tmp/tfe_example' -checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') -net = ThreeLayerNet() -``` - -We group them in a `tfe.Checkpoint` and request that it be restored. This -ensures that variables created by these objects are restored before their values -are used. Our training loop is the same whether starting training or resuming -from a previous checkpoint: - -```python -global_step = tf.train.get_or_create_global_step() -checkpoint = tfe.Checkpoint( - global_step=global_step, optimizer=optimizer, network=net) -checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) -for _ in range(100): - loss_fn = lambda: tf.norm(net(inp) - objective) - optimizer.minimize(loss_fn, global_step=global_step) - if tf.equal(global_step % 20, 0): - print("Step %d, output %s" % (global_step.numpy(), - net(inp).numpy())) - # Save the checkpoint. - checkpoint.save(checkpoint_prefix) -``` - -The first time it runs, `Model` variables are initialized randomly. Then the -output is trained to match the objective we've set: - -``` -Step 20, output [[ 0.03575622 0.29863232 0.03474367 0.24735749]] -Step 40, output [[ 0.40646029 0.9856872 0.46851286 0.95358551]] -Step 60, output [[ 1.74541104 2.800704 1.79055595 2.74783421]] -Step 80, output [[ 2.14977384 3.44340849 3.96120024 5.16242075]] -Step 100, output [[ 1.99943113 3.02364397 3.93500996 4.9610076 ]] -``` - -In subsequent iterations, variables are initialized with the values read from -the latest checkpoint. Running the same code again, we continue from where we -left off: - -``` -Step 120, output [[ 1.99234128 3.0271616 3.98732996 4.96401167]] -Step 140, output [[ 2.00133467 3.01270437 4.00616646 5.00406504]] -Step 160, output [[ 1.99647415 2.9956708 3.99064088 4.99632359]] -Step 180, output [[ 2.00699997 3.00904822 4.00706148 5.01193142]] -Step 200, output [[ 1.98334622 2.98249531 3.97375059 4.97123432]] -``` - - -### Summaries, metrics and TensorBoard - -[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard) -is a popular tool for understanding, debugging and optimizing the model training -process. To benefit from the visualizations offered by TensorBoard, summary -events need to be written during the course of execution of your program. You -might find many Tensorflow programs that include the -[`tf.summary`](https://www.tensorflow.org/api_guides/python/summary) operations -during graph construction. - -`tf.summary` operations are *not* compatible with eager execution, but an -equivalent alternative exists in -[`tf.contrib.summary`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/summary) -that is compatible with both eager execution and graph construction. - -During model construction simply insert summary operations like -`tf.contrib.summary.scalar`. These operations do nothing by default, unless a -summary writer is currently active and a writing policy is set. - -For example, to record summaries once every 100 global steps, use: - -```python -tf.train.get_or_create_global_step() # Ensuring the global step variable exists -writer = tf.contrib.summary.create_file_writer(logdir) - -for _ in range(iterations): - with writer.as_default(): - with tf.contrib.summary.record_summaries_every_n_global_steps(100): - # your model code goes here - tf.contrib.summary.scalar('loss', loss) - # ... -``` - -See the full mnist example in -[`tensorflow/contrib/eager/python/examples/mnist`](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/mnist) -for a full model using `tf.contrib.summary`. - -Similarly to summaries, the metrics in `tf.metrics` are currently not compatible -with eager execution. We instead provide object-oriented metrics in the -`tfe.metrics` package, which are compatible with graph construction as well. - -Metrics in the `tfe.metrics`, such as `tfe.metrics.Mean` and -`tfe.Metrics.Accuracy`, all implement an intuitive object-oriented -interface. Here's an example of how to use the `tfe.metrics.Mean` metric: - -```python -# Metrics are objects, which can be created and destroyed. -my_mean = tfe.metrics.Mean(name='my_mean') -# While a metric is active, you can call it as a function to accumulate into its -# internal state. -my_mean(0.0) -my_mean(10.0) -# Once you've finished updating the metric, you can get its result. In this case -# a simple average over all the calls to it. If a summary writer is active the -# metric will write the appropriate summaries using the metric name. -assert 5.0 == my_mean.result().numpy() -``` - -For a full example of a model using metrics for evaluation, see the mnist -example in -[`tensorflow/contrib/eager/python/examples/mnist`](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/mnist). - -### Input Pipelines - -The discussion above has been centered around the computation executed by your -model. The -[`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) -module provides APIs to build complex input pipelines from simple, reusable -pieces. - -If you're familiar with constructing `tf.data.Dataset` objects when building -TensorFlow graphs, the same API calls are used when eager execution is enabled. -However, the process of iterating over elements of the dataset differs between -eager execution and graph construction. When eager execution is enabled, the -discussion on iterator creation using `make_one_shot_iterator()` and -`get_next()` in the -[Programmer's Guide](https://www.tensorflow.org/programmers_guide/datasets) is -*not* applicable. Instead, a more Pythonic `Iterator` class is available. - -For example: - -```python -# Create a source Dataset from in-memory numpy arrays. -# For reading from files on disk, you may want to use other Dataset classes -# like the TextLineDataset or the TFRecordDataset. -dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]) - -# Apply transformations, shuffling, batching etc. -dataset = dataset.map(tf.square).shuffle(2).batch(2) - -# Use tfe.Iterator to iterate over the dataset. -for x in tfe.Iterator(dataset): - print(x) -``` - -Output: - -``` -tf.Tensor([4 9], shape=(2,), dtype=int32) -tf.Tensor([16 25], shape=(2,), dtype=int32) -tf.Tensor([36 1], shape=(2,), dtype=int32) -``` - -## Interoperating with Graphs - -Eager execution improves the process of model development in Python; however, -because it is in its earliest stages, it does not yet support some features -available to [TensorFlow -graphs](https://www.tensorflow.org/get_started/get_started#the_computational_graph) -that are desirable when deploying models in production. In particular, eager -execution does not yet support distributed training, exporting models (to other -[programming languages](https://www.tensorflow.org/api_docs/), [TensorFlow -serving](https://www.tensorflow.org/serving/), and mobile applications), and -various memory and computation optimizations that are applied to TensorFlow's -dataflow graphs. - -That said, the APIs used to build modes are exactly the same whether executing -eagerly or constructing graphs. This means that you can iteratively develop your -model with eager execution enabled and later, if needed, use the same code to -reap the benefits of representing models as computational graphs. - -For example, the same model definition used to construct a graph in -[mnist.py`](https://github.com/tensorflow/models/tree/master/official/mnist/mnist.py) -can be trained with eager execution enabled as in [`mnist_eager.py`](https://github.com/tensorflow/models/tree/master/official/mnist/mnist_eager.py). - -Other models in the [examples -directory](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/) -demonstrate this as well. - -Some differences worth noting: - -- There is no notion of a `tf.placeholder` or a `tf.Session` when eager - execution is enabled. -- Many properties on the `tf.Tensor` object, like `tf.Tensor.name`, - `tf.Tensor.op`, `tf.Tensor.inputs` are not meaningful when eager execution - is enabled and their use will raise an `AttributeError`. -- To use `tfe.implicit_gradients` in graph construction, variables must be - created with [`use_resource=True`] provided to - [`tf.get_variable()`](https://www.tensorflow.org/api_docs/python/tf/get_variable) - or - [`tf.variable_scope()`](https://www.tensorflow.org/api_docs/python/tf/variable_scope). -- Some API calls (such as the functional-style `tf.layers.dense`, - `tf.layers.conv2d`) are not compatible with eager execution. Use of such - methods should raise an error indicating the alternative (e.g., the - `tf.layers.Dense` and `tf.layers.Conv2D` classes). - -## What next? +immediately: concrete values are returned, instead of creating a computational +graph that is executed later. -Please give eager execution a spin. This feature is in early stages and is -evolving, so we welcome your feedback via issues on GitHub (see [known -issues](https://github.com/tensorflow/tensorflow/labels/comp:eager)). +A user guide is available: https://www.tensorflow.org/programmers_guide/eager +([source file](../../../../docs_src/programmers_guide/eager.md)) -You may want to browse through some sample code, including benchmarks for some: +We welcome feedback through [GitHub issues](https://github.com/tensorflow/tensorflow/labels/comp:eager). -- [Linear Regression](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/linear_regression) -- [MNIST handwritten digit classifier](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/mnist) -- [ResNet50 image classification](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/resnet50) -- [RNN to generate colors](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/rnn_colorbot) -- [RNN language model](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/rnn_ptb) +Sample code is available, including benchmarks for some: +- [Linear Regression](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/linear_regression) +- [MNIST handwritten digit classifier](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/mnist) +- [ResNet50 image classification](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/resnet50) +- [RNN to generate colors](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/rnn_colorbot) +- [RNN language model](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples/rnn_ptb) diff --git a/tensorflow/docs_src/programmers_guide/eager.md b/tensorflow/docs_src/programmers_guide/eager.md new file mode 100644 index 0000000000..9ae1e602f4 --- /dev/null +++ b/tensorflow/docs_src/programmers_guide/eager.md @@ -0,0 +1,992 @@ +# Eager Execution + +TensorFlow's eager execution is an imperative programming environment that +evaluates operations immediately, without an extra graph-building step. +Operations return concrete values instead of constructing a computational graph +to run later. This makes it easy to get started with TensorFlow, debug models, +reduce boilerplate code, and is fun! To follow along with this guide, run the +code samples below in an interactive `python` interpreter. + +Eager execution supports most TensorFlow operations and GPU acceleration. +Automatic differentiation uses a dynamically-constructed tape instead of a static +graph to compute gradients. Eager execution is a flexible machine learning +platform for research and experimentation that provides: + +* *An intuitive interface* —Structure your code naturally and use Python data + structures. Quickly iterate on small models and small data. +* *Easier debugging* —Call ops directly to inspect running models and test + changes. Use standard Python debugging tools for immediate error reporting. +* *Natural control flow* —Use Python control flow instead of graph control flow, + including support for dynamic models. + +For a collection of examples running in eager execution, see: +[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples). + +Note: Some models may experience increased overhead with eager execution enabled. +Performance improvements are ongoing, but please +[file a bug](https://github.com/tensorflow/tensorflow/issues) if you find a +problem and share your benchmarks. + +## Setup and basic usage + +Install TensorFlow 1.7 to include the updates for eager execution: + +``` +$ pip install --pre --upgrade tensorflow +``` + +To start eager execution, add `tf.enable_eager_execution()` to the beginning of +the program or console session. Do not add this operation to other modules that +the program calls. + +```py +from __future__ import absolute_import, division, print_function + +import tensorflow as tf + +tf.enable_eager_execution() +``` + +Now you can run TensorFlow operations and the results will return immediately: + +```py +tf.executing_eagerly() # => True + +x = [[2.]] +m = tf.matmul(x, x) +print("hello, {}".format(m)) # => "hello, [[4.]]" +``` + +Enabling eager execution changes how TensorFlow operations behave—now they +immediately evaluate and return their values to Python. `tf.Tensor` objects +reference concrete values instead of symbolic handles to nodes in a computational +graph. Since there isn't a computational graph to build and run later in a +session, it's easy to inspect results using `print()` or a debugger. Evaluating, +printing, and checking tensor values does not break the flow for computing +gradients. + +Eager execution works nicely with [NumPy](http://www.numpy.org/). NumPy +operations accept `tf.Tensor` arguments. TensorFlow +[math operations](https://www.tensorflow.org/api_guides/python/math_ops) convert +Python objects and NumPy arrays to `tf.Tensor` objects. The +`tf.Tensor.numpy` method returns the object's value as a NumPy `ndarray`. + +```py +a = tf.constant([[1, 2], + [3, 4]]) +print(a) +# => tf.Tensor([[1 2] +# [3 4]], shape=(2, 2), dtype=int32) + +# Broadcasting support +b = tf.add(a, 1) +print(b) +# => tf.Tensor([[2 3] +# [4 5]], shape=(2, 2), dtype=int32) + +# Operator overloading is supported +print(a * b) +# => tf.Tensor([[ 2 6] +# [12 20]], shape=(2, 2), dtype=int32) + +# Use NumPy values +import numpy as np + +c = np.multiply(a, b) +print(c) +# => [[ 2 6] +# [12 20]] + +# Obtain numpy value from a tensor: +print(a.numpy()) +# => [[1 2] +# [3 4]] +``` + +The `tfe` module contains symbols available to both eager and graph execution +environments and is useful for writing code to [work with graphs](#work_with_graphs): + +```py +import tensorflow.contrib.eager as tfe +``` + +## Eager training + +### Automatic differentiation + +[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation) +is useful for implementing machine learning algorithms such as +[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training +neural networks. During eager execution, use `tfe.GradientTape` to trace +operations for computing gradients later. + +`tfe.GradientTape` is an opt-in feature to provide maximal performance when +not tracing. Since different operations can occur during each call, all +forward-pass operations get recorded to a "tape". To compute the gradient, play +the tape backwards and then discard. A particular `tfe.GradientTape` can only +be computed once, subsequent calls throw a runtime error. + +```py +w = tfe.Variable([[1.0]]) +with tfe.GradientTape() as tape: + loss = w * w + +grad = tape.gradient(loss, [w]) +print(grad) # => [tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)] +``` + +Here's an example of `tfe.GradientTape` that records forward-pass operations +to train a simple model: + +```py +# A toy dataset of points around 3 * x + 2 +NUM_EXAMPLES = 1000 +training_inputs = tf.random_normal([NUM_EXAMPLES]) +noise = tf.random_normal([NUM_EXAMPLES]) +training_outputs = training_inputs * 3 + 2 + noise + +def prediction(input, weight, bias): + return input * weight + bias + +# A loss function using mean-squared error +def loss(weights, biases): + error = prediction(training_inputs, weights, biases) - training_outputs + return tf.reduce_mean(tf.square(error)) + +# Return the derivative of loss with respect to weight and bias +def grad(weights, biases): + with tfe.GradientTape() as tape: + loss_value = loss(weights, biases) + return tape.gradient(loss_value, [weights, biases]) + +train_steps = 200 +learning_rate = 0.01 +# Start with arbitrary values for W and B on the same batch of data +W = tfe.Variable(5.) +B = tfe.Variable(10.) + +print("Initial loss: {:.3f}".format(loss(W, B))) + +for i in range(train_steps): + dW, dB = grad(W, B) + W.assign_sub(dW * learning_rate) + B.assign_sub(dB * learning_rate) + if i % 20 == 0: + print("Loss at step {:03d}: {:.3f}".format(i, loss(W, B))) + +print("Final loss: {:.3f}".format(loss(W, B))) +print("W = {}, B = {}".format(W.numpy(), B.numpy())) +``` + +Output (exact numbers may vary): + +``` +Initial loss: 71.204 +Loss at step 000: 68.333 +Loss at step 020: 30.222 +Loss at step 040: 13.691 +Loss at step 060: 6.508 +Loss at step 080: 3.382 +Loss at step 100: 2.018 +Loss at step 120: 1.422 +Loss at step 140: 1.161 +Loss at step 160: 1.046 +Loss at step 180: 0.996 +Final loss: 0.974 +W = 3.01582956314, B = 2.1191945076 +``` + +Replay the `tfe.GradientTape` to compute the gradients and apply them in a +training loop. This is demonstrated in an excerpt from the +[mnist_eager.py](https://github.com/tensorflow/models/blob/master/official/mnist/mnist_eager.py) +example: + +```py +dataset = tf.data.Dataset.from_tensor_slices((data.train.images, + data.train.labels)) +... +for (batch, (images, labels)) in enumerate(tfe.Iterator(dataset)): + ... + with tfe.GradientTape() as tape: + logits = model(images, training=True) + loss_value = loss(logits, labels) + ... + grads = tape.gradient(loss_value, model.variables) + optimizer.apply_gradients(zip(grads, model.variables), + global_step=tf.train.get_or_create_global_step()) +``` + +#### Dynamic models + +`tfe.GradientTape` can also be used in dynamic models. This example for a +[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search) +algorithm looks like normal NumPy code, except there are gradients and is +differentiable, despite the complex control flow: + +```py +def line_search_step(fn, init_x, rate=1.0): + with tfe.GradientTape() as tape: + # Variables are automatically recorded, but manually watch a tensor + tape.watch(init_x) + value = fn(init_x) + grad, = tape.gradient(value, [init_x]) + grad_norm = tf.reduce_sum(grad * grad) + init_value = value + while value > init_value - rate * grad_norm: + x = init_x - rate * grad + value = fn(x) + rate /= 2.0 + return x, value +``` + +#### Additional functions to compute gradients + +`tfe.GradientTape` is a powerful interface for computing gradients, but there +is another [Autograd](https://github.com/HIPS/autograd)-style API available for +automatic differentiation. These functions are useful if writing math code with +only tensors and gradient functions, and without `tfe.Variables`: + +* `tfe.gradients_function` —Returns a function that computes the derivatives + of its input function parameter with respect to its arguments. The input + function parameter must return a scalar value. When the returned function is + invoked, it returns a list of `tf.Tensor` objects: one element for each + argument of the input function. Since anything of interest must be passed as a + function parameter, this becomes unwieldy if there's a dependency on many + trainable parameters. +* `tfe.value_and_gradients_function` —Similar to + `tfe.gradients_function`, but when the returned function is invoked, it + returns the value from the input function in addition to the list of + derivatives of the input function with respect to its arguments. + +In the following example, `tfe.gradients_function` takes the `square` +function as an argument and returns a function that computes the partial +derivatives of `square` with respect to its inputs. To calculate the derivative +of `square` at `3`, `grad(3.0)` returns `6`. + +```py +def square(x): + return tf.multiply(x, x) + +grad = tfe.gradients_function(square) + +square(3.) # => 9.0 +grad(3.) # => [6.0] + +# The second-order derivative of square: +gradgrad = tfe.gradients_function(lambda x: grad(x)[0]) +gradgrad(3.) # => [2.0] + +# The third-order derivative is None: +gradgradgrad = tfe.gradients_function(lambda x: gradgrad(x)[0]) +gradgradgrad(3.) # => [None] + + +# With flow control: +def abs(x): + return x if x > 0. else -x + +grad = tfe.gradients_function(abs) + +grad(3.) # => [1.0] +grad(-3.) # => [-1.0] +``` + +### Custom gradients + +Custom gradients are an easy way to override gradients in eager and graph +execution. Within the forward function, define the gradient with respect to the +inputs, outputs, or intermediate results. For example, here's an easy way to clip +the norm of the gradients in the backward pass: + +```py +@tf.custom_gradient +def clip_gradient_by_norm(x, norm): + y = tf.identity(x) + def grad_fn(dresult): + return [tf.clip_by_norm(dresult, norm), None] + return y, grad_fn +``` + +Custom gradients are commonly used to provide a numerically stable gradient for a +sequence of operations: + +```py +def log1pexp(x): + return tf.log(1 + tf.exp(x)) +grad_log1pexp = tfe.gradients_function(log1pexp) + +# The gradient computation works fine at x = 0. +grad_log1pexp(0.) # => [0.5] + +# However, x = 100 fails because of numerical instability. +grad_log1pexp(100.) # => [nan] +``` + + +Here, the `log1pexp` function can be analytically simplified with a custom +gradient. The implementation below reuses the value for `tf.exp(x)` that is +computed during the forward pass—making it more efficient by eliminating +redundant calculations: + +```py +@tfe.custom_gradient +def log1pexp(x): + e = tf.exp(x) + def grad(dy): + return dy * (1 - 1 / (1 + e)) + return tf.log(1 + e), grad + +grad_log1pexp = tfe.gradients_function(log1pexp) + +# As before, the gradient computation works fine at x = 0. +grad_log1pexp(0.) # => [0.5] + +# And the gradient computation also works at x = 100. +grad_log1pexp(100.) # => [1.0] +``` + + +## Build and train models + +There are many parameters to optimize when calculating derivatives. TensorFlow +code is easier to read when structured into reusable classes and objects instead +of a single top-level function. Eager execution encourages the use of the +Keras-style layer classes in the `tf.keras.layers` module. Additionally, the +`tf.train.Optimizer` classes provide sophisticated techniques to calculate +parameter updates. + +The following example creates a multi-layer model that classifies the standard +[MNIST handwritten digits](https://www.tensorflow.org/tutorials/layers). It +demonstrates the optimizer and layer APIs to build trainable graphs in an eager +execution environment. + +### Build a model + +The `tf.keras.Sequential` model is a linear stack of layers. It is easy to +use for basic models: + +```py +model = tf.keras.Sequential([ + tf.keras.layers.Dense(10, input_shape=(784,)), # must declare input shape + tf.keras.layers.Dense(10) +]) +``` + +Alternatively, organize models in classes by inheriting from `tf.keras.Model`. +This is a container for layers that is a layer itself, allowing `tf.keras.Model` +objects to contain other `tf.keras.Model` objects. + +```py +class MNISTModel(tf.keras.Model): + def __init__(self): + super(MNISTModel, self).__init__() + self.dense1 = tf.keras.layers.Dense(units=10) + self.dense2 = tf.keras.layers.Dense(units=10) + + def call(self, input): + """Run the model.""" + result = self.dense1(input) + result = self.dense2(result) + result = self.dense2(result) # reuse variables from dense2 layer + return result + +model = MNISTModel() +``` + +It's not required to set an input shape for the `tf.keras.Model` class since +the parameters are set the first time input is passed to the layer. + +`tf.keras.layers` classes create and contain their own model variables that +are tied to the lifetime of their layer objects. To share layer variables, share +their objects. + +### Train a model + +Even without training, call the model and inspect the output in eager execution: + +```py +# Create a tensor representing a blank image +batch = tf.zeros([1, 1, 784]) +print(batch.shape) # => (1, 1, 784) + +result = model(batch) +# => tf.Tensor([[[ 0. 0., ..., 0.]]], shape=(1, 1, 10), dtype=float32) +``` + +This example uses the +[dataset.py module](https://github.com/tensorflow/models/blob/master/official/mnist/dataset.py) +from the +[TensorFlow MNIST example](https://github.com/tensorflow/models/tree/master/official/mnist), +download this file to your local directory. Run the following to download the +MNIST data files to your working directory and prepare a `tf.data.Dataset` +for training: + +```py +import dataset # download dataset.py file +dataset_train = dataset.train('./datasets').shuffle(60000).repeat(4).batch(32) +``` + +To train a model, define a loss function to optimize and then calculate +gradients. Use an optimizer to update the variables: + +```py +def loss(model, x, y): + prediction = model(x) + return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=prediction) + +def grad(model, inputs, targets): + with tfe.GradientTape() as tape: + loss_value = loss(model, inputs, targets) + return tape.gradient(loss_value, model.variables) + +optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) + +x, y = tfe.Iterator(dataset_train).next() +print("Initial loss: {:.3f}".format(loss(model, x, y))) + +# Training loop +for (i, (x, y)) in enumerate(tfe.Iterator(dataset_train)): + # Calculate derivatives of the input function with respect to its parameters. + grads = grad(model, x, y) + # Apply the gradient to the model + optimizer.apply_gradients(zip(grads, model.variables), + global_step=tf.train.get_or_create_global_step()) + if i % 200 == 0: + print("Loss at step {:04d}: {:.3f}".format(i, loss(model, x, y))) + +print("Final loss: {:.3f}".format(loss(model, x, y))) +``` + +Output (exact numbers may vary): + +``` +Initial loss: 2.674 +Loss at step 0000: 2.593 +Loss at step 0200: 2.143 +Loss at step 0400: 2.009 +Loss at step 0600: 2.103 +Loss at step 0800: 1.621 +Loss at step 1000: 1.695 +... +Loss at step 6600: 0.602 +Loss at step 6800: 0.557 +Loss at step 7000: 0.499 +Loss at step 7200: 0.744 +Loss at step 7400: 0.681 +Final loss: 0.670 +``` + +And for faster training, move the computation to a GPU: + +```py +with tf.device("/gpu:0"): + for (i, (x, y)) in enumerate(tfe.Iterator(dataset_train)): + # minimize() is equivalent to the grad() and apply_gradients() calls. + optimizer.minimize(lambda: loss(model, x, y), + global_step=tf.train.get_or_create_global_step()) +``` + +### Variables and optimizers + +`tfe.Variable` objects store mutable `tf.Tensor` values accessed during +training to make automatic differentiation easier. The parameters of a model can +be encapsulated in classes as variables. + +Better encapsulate model parameters by using `tfe.Variable` with +`tfe.GradientTape`. For example, the automatic differentiation example above +can be rewritten: + +```py +class Model(tf.keras.Model): + def __init__(self): + super(Model, self).__init__() + self.W = tfe.Variable(5., name='weight') + self.B = tfe.Variable(10., name='bias') + def predict(self, inputs): + return inputs * self.W + self.B + +# A toy dataset of points around 3 * x + 2 +NUM_EXAMPLES = 2000 +training_inputs = tf.random_normal([NUM_EXAMPLES]) +noise = tf.random_normal([NUM_EXAMPLES]) +training_outputs = training_inputs * 3 + 2 + noise + +# The loss function to be optimized +def loss(model, inputs, targets): + error = model.predict(inputs) - targets + return tf.reduce_mean(tf.square(error)) + +def grad(model, inputs, targets): + with tfe.GradientTape() as tape: + loss_value = loss(model, inputs, targets) + return tape.gradient(loss_value, [model.W, model.B]) + +# Define: +# 1. A model. +# 2. Derivatives of a loss function with respect to model parameters. +# 3. A strategy for updating the variables based on the derivatives. +model = Model() +optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) + +print("Initial loss: {:.3f}".format(loss(model, training_inputs, training_outputs))) + +# Training loop +for i in range(300): + grads = grad(model, training_inputs, training_outputs) + optimizer.apply_gradients(zip(grads, [model.W, model.B]), + global_step=tf.train.get_or_create_global_step()) + if i % 20 == 0: + print("Loss at step {:03d}: {:.3f}".format(i, loss(model, training_inputs, training_outputs))) + +print("Final loss: {:.3f}".format(loss(model, training_inputs, training_outputs))) +print("W = {}, B = {}".format(model.W.numpy(), model.B.numpy())) +``` + +Output (exact numbers may vary): + +``` +Initial loss: 69.066 +Loss at step 000: 66.368 +Loss at step 020: 30.107 +Loss at step 040: 13.959 +Loss at step 060: 6.769 +Loss at step 080: 3.567 +Loss at step 100: 2.141 +Loss at step 120: 1.506 +Loss at step 140: 1.223 +Loss at step 160: 1.097 +Loss at step 180: 1.041 +Loss at step 200: 1.016 +Loss at step 220: 1.005 +Loss at step 240: 1.000 +Loss at step 260: 0.998 +Loss at step 280: 0.997 +Final loss: 0.996 +W = 2.99431324005, B = 2.02129220963 +``` + +## Use objects for state during eager execution + +With graph execution, program state (such as the variables) is stored in global +collections and their lifetime is managed by the `tf.Session` object. In +contrast, during eager execution the lifetime of state objects is determined by +the lifetime of their corresponding Python object. + +### Variables are objects + +During eager execution, variables persist until the last reference to the object +is removed, and is then deleted. + +```py +with tf.device("gpu:0"): + v = tfe.Variable(tf.random_normal([1000, 1000])) + v = None # v no longer takes up GPU memory +``` + +### Object-based saving + +`tfe.Checkpoint` can save and restore `tfe.Variable`s to and from +checkpoints: + +```py +x = tfe.Variable(10.) + +checkpoint = tfe.Checkpoint(x=x) # save as "x" + +x.assign(2.) # Assign a new value to the variables and save. +save_path = checkpoint.save('./ckpt/') + +x.assign(11.) # Change the variable after saving. + +# Restore values from the checkpoint +checkpoint.restore(save_path) + +print(x) # => 2.0 +``` + +To save and load models, `tfe.Checkpoint` stores the internal state of objects, +without requiring hiiden variables. To record the state of a `model`, +an `optimizer`, and a global step, pass them to a `tfe.Checkpoint`: + +```py +model = MyModel() +optimizer = tf.train.AdamOptimizer(learning_rate=0.001) +checkpoint_dir = ‘/path/to/model_dir’ +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") +root = tfe.Checkpoint(optimizer=optimizer, + model=model, + optimizer_step=tf.train.get_or_create_global_step()) + +root.save(file_prefix=checkpoint_prefix) +# or +root.restore(tf.train.latest_checkpoint(checkpoint_dir)) +``` + +### Object-oriented metrics + +`tfe.metrics` are stored as objects. Update a metric by passing the new data to +the callable, and retrieve the result using the `tfe.metrics.result` method, +for example: + +```py +m = tfe.metrics.Mean("loss") +m(0) +m(5) +m.result() # => 2.5 +m([8, 9]) +m.result() # => 5.5 +``` + +#### Summaries and TensorBoard + +@{$summaries_and_tensorboard$TensorBoard} is a visualization tool for +understanding, debugging and optimizing the model training process. It uses +summary events that are written while executing the program. + +`tf.contrib.summary` is compatible with both eager and graph execution +environments. Summary operations, such as `tf.contrib.summary.scalar`, are +inserted during model construction. For example, to record summaries once every +100 global steps: + +```py +tf.train.get_or_create_global_step() # return global step var +writer = tf.contrib.summary.create_file_writer(logdir) +global_step=tf.train.get_or_create_global_step() + +writer.set_as_default() + +for _ in range(iterations): + global_step.assign_add(1) + # Must include a record_summaries method + with tf.contrib.summary.record_summaries_every_n_global_steps(100): + # your model code goes here + tf.contrib.summary.scalar('loss', loss) + ... +``` + +## Performance + +Computation is not automatically offloaded to GPUs during eager execution. To +explicitly direct a computation to a GPU, enclose it in a +`tf.device('/gpu:0')` block: + +```py +import time + +def measure(x, steps): + # TensorFlow initializes a GPU the first time it's used, exclude from timing. + tf.matmul(x, x) + start = time.time() + for i in range(steps): + x = tf.matmul(x, x) + _ = x.numpy() # Make sure to execute op and not just enqueue it + end = time.time() + return end - start + +shape = (1000, 1000) +steps = 200 +print("Time to multiply a {} matrix by itself {} times:".format(shape, steps)) + +# Run on CPU: +with tf.device("/cpu:0"): + print("CPU: {} secs".format(measure(tf.random_normal(shape), steps))) + +# Run on GPU, if available: +if tfe.num_gpus() > 0: + with tf.device("/gpu:0"): + print("GPU: {} secs".format(measure(tf.random_normal(shape), steps))) +else: + print("GPU: not found") +``` + +Output (exact numbers depend on hardware): + +``` +Time to multiply a (1000, 1000) matrix by itself 200 times: +CPU: 4.614904403686523 secs +GPU: 0.5581181049346924 secs +``` + +A `tf.Tensor` object can be copied to a different device to execute its +operations: + +```py +x = tf.random_normal([10, 10]) + +x_gpu0 = x.gpu() +x_cpu = x.cpu() + +_ = tf.matmul(x_cpu, x_cpu) # Runs on CPU +_ = tf.matmul(x_gpu0, x_gpu0) # Runs on GPU:0 + +if tfe.num_gpus() > 1: + x_gpu1 = x.gpu(1) + _ = tf.matmul(x_gpu1, x_gpu1) # Runs on GPU:1 +``` + +### Benchmarks + +For compute-heavy models, such as +[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50) +training on a GPU, eager execution performance is comparable to graph execution. +But this gap grows larger for models with less computation and there is work to +be done for optimizing hot code paths for models with lots of small operations. + + +## Work with graphs + +While eager execution makes development and debugging more interactive, +TensorFlow graph execution has advantages for distributed training, performance +optimizations, and production deployment. However, writing graph code can feel +different than writing regular Python code and more difficult to debug. + +For building and training graph-constructed models, the Python program first +builds a graph representing the computation, then invokes `Session.run` to send +the graph for execution on the C++-based runtime. This provides: + +* Automatic differentiation using static autodiff. +* Simple deployment to a platform independent server. +* Graph-based optimizations (common subexpression elimination, constant-folding, etc.). +* Compilation and kernel fusion. +* Automatic distribution and replication (placing nodes on the distributed system). + +Deploying code written for eager execution is more difficult: either generate a +graph from the model, or run the Python runtime and code directly on the server. + +### Write compatible code + +The same code written for eager execution will also build a graph during graph +execution. Do this by simply running the same code in a new Python session where +eager execution is not enabled. + +Most TensorFlow operations work during eager execution, but there are some things +to keep in mind: + +* Use `tf.data` for input processing instead of queues. It's faster and easier. +* Use object-oriented layer APIs—like `tf.keras.layers` and + `tf.keras.Model`—since they have explicit storage for variables. +* Most model code works the same during eager and graph execution, but there are + exceptions. (For example, dynamic models using Python control flow to change the + computation based on inputs.) +* Once eager execution is enabled with `tf.enable_eager_execution`, it + cannot be turned off. Start a new Python session to return to graph execution. + +It's best to write code for both eager execution *and* graph execution. This +gives you eager's interactive experimentation and debuggability with the +distributed performance benefits of graph execution. + +Write, debug, and iterate in eager execution, then import the model graph for +production deployment. Use `tfe.Checkpoint` to save and restore model +variables, this allows movement between eager and graph execution environments. +See the examples in: +[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples). + +### Use eager execution in a graph environment + +Selectively enable eager execution in a TensorFlow graph environment using +`tfe.py_func`. This is used when `tf.enable_eager_execution()` has *not* +been called. + +```py +def my_py_func(x): + x = tf.matmul(x, x) # You can use tf ops + print(x) # but it's eager! + return x + +with tf.Session() as sess: + x = tf.placeholder(dtype=tf.float32) + # Call eager function in graph! + pf = tfe.py_func(my_py_func, [x], tf.float32) + sess.run(pf, feed_dict={x: [[2.0]]}) # [[4.0]] +``` + + +A `tfe.Checkpoint` stores the complete internal state of the objects passed to it. Nothing else is implicitly included. To record the state of a `model`, an `optimizer`, and a global step pass each one to the checkpoint's constructor: + +```py +model = MyModel() +optimizer = tf.train.AdamOptimizer(learning_rate=0.001) +checkpoint_dir = ‘/path/to/model_dir’ +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") +root = tfe.Checkpoint(optimizer=optimizer, + model=model, + optimizer_step=tf.train.get_or_create_global_step()) + +root.save(file_prefix=checkpoint_prefix) +# or +root.restore(tf.train.latest_checkpoint(checkpoint_dir)) +``` + +### Object-oriented metrics + +`tfe.metrics` are stored as objects. Update a metric by passing the new data to +the callable, and retrieve the result using the `tfe.metrics.result` method, +for example: + +```py +m = tfe.metrics.Mean("loss") +m(0) +m(5) +m.result() # => 2.5 +m([8, 9]) +m.result() # => 5.5 +``` + +#### Summaries and TensorBoard + +@{$summaries_and_tensorboard$TensorBoard} is a visualization tool for +understanding, debugging and optimizing the model training process. It uses +summary events that are written while executing the program. + +`tf.contrib.summary` is compatible with both eager and graph execution +environments. Summary operations, such as `tf.contrib.summary.scalar`, are +inserted during model construction. For example, to record summaries once every +100 global steps: + +```py +tf.train.get_or_create_global_step() # return global step var +writer = tf.contrib.summary.create_file_writer(logdir) + +for _ in range(iterations): + with writer.as_default(): + with tf.contrib.summary.record_summaries_every_n_global_steps(100): + # your model code goes here + tf.contrib.summary.scalar('loss', loss) + ... +``` + +## Performance + +Computation is not automatically offloaded to GPUs during eager execution. To +explicitly direct a computation to a GPU, enclose it in a +`tf.device('/gpu:0')` block: + +```py +import time + +def measure(x, steps): + # TensorFlow initializes a GPU the first time it's used, exclude from timing. + tf.matmul(x, x) + start = time.time() + for i in range(steps): + x = tf.matmul(x, x) + _ = x.numpy() # Make sure to execute op and not just enqueue it + end = time.time() + return end - start + +shape = (1000, 1000) +steps = 200 +print("Time to multiply a {} matrix by itself {} times:".format(shape, steps)) + +# Run on CPU: +with tf.device("/cpu:0"): + print("CPU: {} secs".format(measure(tf.random_normal(shape), steps))) + +# Run on GPU, if available: +if tfe.num_gpus() > 0: + with tf.device("/gpu:0"): + print("GPU: {} secs".format(measure(tf.random_normal(shape), steps))) +else: + print("GPU: not found") +``` + +Output (exact numbers depend on hardware): + +``` +Time to multiply a (1000, 1000) matrix by itself 200 times: +CPU: 4.614904403686523 secs +GPU: 0.5581181049346924 secs +``` + +A `tf.Tensor` object can be copied to a different device to execute its +operations: + +```py +x = tf.random_normal([10, 10]) + +x_gpu0 = x.gpu() +x_cpu = x.cpu() + +_ = tf.matmul(x_cpu, x_cpu) # Runs on CPU +_ = tf.matmul(x_gpu0, x_gpu0) # Runs on GPU:0 + +if tfe.num_gpus() > 1: + x_gpu1 = x.gpu(1) + _ = tf.matmul(x_gpu1, x_gpu1) # Runs on GPU:1 +``` + +### Benchmarks + +For compute-heavy models, such as +[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50) +training on a GPU, eager execution performance is comparable to graph execution. +But this gap grows larger for models with less computation and there is work to +be done for optimizing hot code paths for models with lots of small operations. + + +## Work with graphs + +While eager execution makes development and debugging more interactive, +TensorFlow graph execution has advantages for distributed training, performance +optimizations, and production deployment. However, writing graph code can feel +different than writing regular Python code and more difficult to debug. + +For building and training graph-constructed models, the Python program first +builds a graph representing the computation, then invokes `Session.run` to send +the graph for execution on the C++-based runtime. This provides: + +* Automatic differentiation using static autodiff. +* Simple deployment to a platform independent server. +* Graph-based optimizations (common subexpression elimination, constant-folding, etc.). +* Compilation and kernel fusion. +* Automatic distribution and replication (placing nodes on the distributed system). + +Deploying code written for eager execution is more difficult: either generate a +graph from the model, or run the Python runtime and code directly on the server. + +### Write compatible code + +The same code written for eager execution will also build a graph during graph +execution. Do this by simply running the same code in a new Python session where +eager execution is not enabled. + +Most TensorFlow operations work during eager execution, but there are some things +to keep in mind: + +* Use `tf.data` for input processing instead of queues. It's faster and easier. +* Use object-oriented layer APIs—like `tf.keras.layers` and + `tf.keras.Model`—since they have explicit storage for variables. +* Most model code works the same during eager and graph execution, but there are + exceptions. (For example, dynamic models using Python control flow to change the + computation based on inputs.) +* Once eager execution is enabled with `tf.enable_eager_execution`, it + cannot be turned off. Start a new Python session to return to graph execution. + +It's best to write code for both eager execution *and* graph execution. This +gives you eager's interactive experimentation and debuggability with the +distributed performance benefits of graph execution. + +Write, debug, and iterate in eager execution, then import the model graph for +production deployment. Use `tfe.Checkpoint` to save and restore model +variables, this allows movement between eager and graph execution environments. +See the examples in: +[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples). + +### Use eager execution in a graph environment + +Selectively enable eager execution in a TensorFlow graph environment using +`tfe.py_func`. This is used when `tf.enable_eager_execution()` has *not* +been called. + +```py +def my_py_func(x): + x = tf.matmul(x, x) # You can use tf ops + print(x) # but it's eager! + return x + +with tf.Session() as sess: + x = tf.placeholder(dtype=tf.float32) + # Call eager function in graph! + pf = tfe.py_func(my_py_func, [x], tf.float32) + sess.run(pf, feed_dict={x: [[2.0]]}) # [[4.0]] +``` diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files index 3fe4cb2dda..7ac63bf2e0 100644 --- a/tensorflow/docs_src/programmers_guide/leftnav_files +++ b/tensorflow/docs_src/programmers_guide/leftnav_files @@ -1,8 +1,9 @@ index.md ### High Level APIs -estimators.md +eager.md datasets.md +estimators.md ### Low Level APIs low_level_intro.md -- GitLab From c6911faaf4702096064542790d8c9e8e6f938d52 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 29 Mar 2018 13:35:34 -0700 Subject: [PATCH 701/960] Turns eager device placement on by default. Change the device policy to have silent copies, which are logged when RunMetadata tracking is enabled. In the process, changed TensorHandle to always keep its context around if it gets one. Changed TFE_TensorHandleResolve to, if necessary, copy to the CPU (since the user has no control as to whether this copy is needed by default). PiperOrigin-RevId: 190978086 --- tensorflow/c/eager/c_api.cc | 100 ++++++++++++------ tensorflow/c/eager/c_api.h | 18 ++-- tensorflow/c/eager/c_api_internal.h | 5 +- tensorflow/c/eager/c_api_test.cc | 10 +- .../core/common_runtime/eager/context.cc | 15 +-- .../core/common_runtime/eager/context.h | 16 +-- .../core/common_runtime/eager/execute.cc | 6 +- .../common_runtime/eager/tensor_handle.cc | 17 +-- .../core/common_runtime/eager/tensor_handle.h | 15 ++- tensorflow/core/kernels/function_ops.cc | 5 + tensorflow/python/eager/core_test.py | 12 +-- tensorflow/python/eager/function_test.py | 33 +++--- tensorflow/python/kernel_tests/BUILD | 4 + .../resource_variable_ops_test.py | 1 + 14 files changed, 148 insertions(+), 109 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 028865d360..bb1492fca2 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -201,18 +201,24 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { const tensorflow::Tensor* t = nullptr; status->status = h->handle->TensorAndDevice(&t, &d, &op_device); if (!status->status.ok()) return nullptr; + tensorflow::TensorHandle* h_cpu = nullptr; if (!IsCPU(d)) { - TF_SetStatus(status, TF_UNIMPLEMENTED, - tensorflow::strings::StrCat( - "TFE_TensorHandle can be resolved iff it is on CPU (this " - "handle is on ", - d->name(), - "). Consider using TFE_TensorHandleCopyToDevice to get a " - "copy of the tensor on CPU") - .c_str()); - return nullptr; + status->status = h->handle->CopyToDevice( + h->handle->Context(), h->handle->Context()->HostCPU(), &h_cpu); + if (!status->status.ok()) { + return nullptr; + } + status->status = h_cpu->TensorAndDevice(&t, &d, &op_device); + if (!status->status.ok()) { + h_cpu->Unref(); + return nullptr; + } } - return tensorflow::TF_TensorFromTensor(*t, status); + TF_Tensor* retval = tensorflow::TF_TensorFromTensor(*t, status); + if (h_cpu != nullptr) { + h_cpu->Unref(); + } + return retval; } } // extern "C" @@ -258,17 +264,6 @@ void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { } void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { - if (op->device == nullptr) { - // Questionable heuristic ... - // - If a device was explicitly set on the op, always use that. - // - If not, place on the first non-host device seen. - tensorflow::Device* d = nullptr; - // TODO(agarwal): This call may block if h is not ready. Avoid this if - // possible. - status->status = h->handle->Device(&d); - if (!status->status.ok()) return; - if (!IsCPU(d)) op->device = d; - } h->handle->Ref(); op->inputs.push_back(h->handle); op->attrs.NumInputs(op->inputs.size()); @@ -436,10 +431,39 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, namespace { +// Initializes the step stats if needed. +void MaybeInitializeStepStats(tensorflow::StepStats* step_stats, + tensorflow::EagerContext* ctx) { + // Lazily initialize the RunMetadata with information about all devices if + // this is the first call. + while (step_stats->dev_stats_size() < ctx->devices()->size()) { + int device_idx = step_stats->dev_stats_size(); + auto* dev_stats = step_stats->add_dev_stats(); + dev_stats->set_device(ctx->devices()->at(device_idx)->name()); + } +} + +int StepStatsDeviceIndex(tensorflow::StepStats* step_stats, + tensorflow::EagerContext* ctx, + tensorflow::Device* device) { + // Find the current device's index. + if (device == nullptr) { + device = ctx->HostCPU(); + } + for (int i = 0; i < ctx->devices()->size(); ++i) { + if (ctx->devices()->at(i) == device || + ctx->devices()->at(i)->name() == device->name()) { + return i; + } + } + // TODO(apassos) do not fall back to host CPU if device is unknown. + return 0; +} + tensorflow::Status ValidateInputTypeAndPlacement( - tensorflow::EagerContext* ctx, tensorflow::Device* host_device, - tensorflow::Device* op_device, TFE_Op* op, - const tensorflow::OpKernel* kernel) { + tensorflow::EagerContext* ctx, tensorflow::Device* op_device, TFE_Op* op, + const tensorflow::OpKernel* kernel, tensorflow::RunMetadata* run_metadata) { + tensorflow::Device* host_device = ctx->HostCPU(); const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types(); if (memtypes.size() != op->inputs.size()) { return tensorflow::errors::InvalidArgument( @@ -489,9 +513,22 @@ tensorflow::Status ValidateInputTypeAndPlacement( } // We are only here if the policy is warn or silent copies, so we should // trigger a copy. + auto pre_time = tensorflow::Env::Default()->NowMicros(); tensorflow::TensorHandle* copied_tensor = nullptr; tensorflow::Status status = tensorflow::EagerCopyToDevice( handle, ctx, expected_device->name().c_str(), &copied_tensor); + if (run_metadata != nullptr) { + auto* step_stats = run_metadata->mutable_step_stats(); + MaybeInitializeStepStats(step_stats, ctx); + // Record the sending on the source device for now. + int device_idx = StepStatsDeviceIndex(step_stats, ctx, handle_device); + auto* dev_stats = step_stats->mutable_dev_stats(device_idx); + auto* node_stats = dev_stats->add_node_stats(); + node_stats->set_node_name("_Send"); + node_stats->set_all_start_micros(pre_time); + node_stats->set_op_end_rel_micros( + tensorflow::Env::Default()->NowMicros() - pre_time); + } if (!status.ok()) { if (copied_tensor != nullptr) copied_tensor->Unref(); return tensorflow::errors::Internal( @@ -785,8 +822,12 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, tensorflow::Device* input_op_device = nullptr; status->status = op->inputs[i]->OpDevice(&input_op_device); if (!status->status.ok()) return; + VLOG(2) << "for op " << op->name << " input " << i << " " + << tensorflow::DataTypeString(op->inputs[i]->dtype) << " " + << (input_op_device == nullptr ? "cpu" : input_op_device->name()) + << " " << (op->device == nullptr ? "cpu" : op->device->name()); if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && - input_op_device != op->device) { + (input_op_device != op->device || input_op_device == nullptr)) { tensorflow::Device* d = input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device; VLOG(1) << "Changing device of operation " << op->name << " to " @@ -796,16 +837,13 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } } tensorflow::Device* device = op->device; - if (!ctx->context.SoftPlacement() && device == nullptr) { - device = ctx->context.HostCPU(); - } tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key); if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); - if (ctx->context.SoftPlacement() && device == nullptr) { + if (device == nullptr) { device = SelectDevice(ndef, ctx, status); if (!status->status.ok()) { return; @@ -867,7 +905,9 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, device = kernel->device(); } status->status = ValidateInputTypeAndPlacement( - &ctx->context, ctx->context.HostCPU(), device, op, kernel->kernel()); + &ctx->context, device, op, kernel->kernel(), + ctx->context.ShouldStoreMetadata() ? ctx->context.RunMetadataProto() + : nullptr); if (!status->status.ok()) return; std::unique_ptr maybe_stats; if (ctx->context.ShouldStoreMetadata()) { diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index a5029bf211..3926c22ce1 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -61,17 +61,15 @@ TF_CAPI_EXPORT extern void TFE_ContextOptionsSetConfig( // Controls how to act when we try to run an operation on a given device but // some input tensors are not on that device. typedef enum TFE_ContextDevicePlacementPolicy { - // Running operations with input tensors on the wrong device will fail. When - // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. + // Running operations with input tensors on the wrong device will fail. TFE_DEVICE_PLACEMENT_EXPLICIT = 0, // Copy the tensor to the right device but log a warning. TFE_DEVICE_PLACEMENT_WARN = 1, - // Silently copy the tensor, which has a performance cost since the - // operation will be blocked till the copy completes. + // Silently copy the tensor, which has a performance cost since the operation + // will be blocked till the copy completes. This is the default placement + // policy. TFE_DEVICE_PLACEMENT_SILENT = 2, - // Default placement policy which silently copies int32 tensors but not other - // dtypes. When soft placement is enabled acts like - // TFE_DEVICE_PLACEMENT_SILENT. + // Placement policy which silently copies int32 tensors but not other dtypes. TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, } TFE_ContextDevicePlacementPolicy; @@ -162,7 +160,11 @@ TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName( TFE_TensorHandle* h, TF_Status* status); -// This function will block till the operation that produces `h` has completed. +// This function will block till the operation that produces `h` has +// completed. The memory returned might alias the internal memory used by +// TensorFlow. Hence, callers should not mutate this memory (for example by +// modifying the memory region pointed to by TF_TensorData() on the returned +// TF_Tensor). TF_CAPI_EXPORT extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status); diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index e6d2ab75ff..05dc64f521 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -50,8 +50,7 @@ struct TFE_ContextOptions { TF_SessionOptions session_options; // true if async execution is enabled. bool async = false; - TFE_ContextDevicePlacementPolicy policy{ - TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; + TFE_ContextDevicePlacementPolicy policy{TFE_DEVICE_PLACEMENT_SILENT}; }; struct TFE_Context { @@ -71,7 +70,7 @@ struct TFE_Context { struct TFE_TensorHandle { TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d, tensorflow::Device* op_device) - : handle(new tensorflow::TensorHandle(t, d, op_device)) {} + : handle(new tensorflow::TensorHandle(t, d, op_device, nullptr)) {} TFE_TensorHandle(tensorflow::uint64 node_id, tensorflow::DataType dtype, tensorflow::EagerContext* ctx) diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index d88a6c1dda..701175e494 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -590,7 +590,13 @@ void Execute_MatMul_CPU_Runtime_Error(bool async) { TFE_TensorHandle* m1 = TestMatrixTensorHandle(); TFE_TensorHandle* m2 = TestMatrixTensorHandle3X2(); TFE_Op* matmul = MatMulOp(ctx, m1, m2); + TFE_OpSetDevice(matmul, "/job:localhost/replica:0/task:0/device:CPU:0", + status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_Op* matmul2 = MatMulOp(ctx, m1, m1); + TFE_OpSetDevice(matmul2, "/job:localhost/replica:0/task:0/device:CPU:0", + status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_TensorHandle* retvals[1] = {nullptr}; int num_retvals = 1; TFE_Execute(matmul, &retvals[0], &num_retvals, status); @@ -693,14 +699,14 @@ TEST(CAPI, Execute_Min_CPU) { TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteTensorHandle(retvals[0]); - TFE_DeleteContext(ctx, status); - ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); float output[2] = {0}; EXPECT_EQ(sizeof(output), TF_TensorByteSize(t)); memcpy(&output[0], TF_TensorData(t), TF_TensorByteSize(t)); TF_DeleteTensor(t); EXPECT_EQ(1, output[0]); EXPECT_EQ(3, output[1]); + TFE_DeleteContext(ctx, status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteStatus(status); } diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index 0566329f18..9c47ad6187 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -17,24 +17,11 @@ limitations under the License. namespace tensorflow { -ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, ContextDevicePlacementPolicy original_policy) { - if (!soft_placement) { - return original_policy; - } - if (original_policy == DEVICE_PLACEMENT_EXPLICIT || - original_policy == DEVICE_PLACEMENT_SILENT_FOR_INT32) { - return DEVICE_PLACEMENT_SILENT; - } - return original_policy; -} - EagerContext::EagerContext(const SessionOptions& opts, ContextDevicePlacementPolicy default_policy, bool async, std::unique_ptr device_mgr, Rendezvous* rendezvous) - : soft_placement_(opts.config.allow_soft_placement()), - policy_(PlacementPolicy(soft_placement_, default_policy)), + : policy_(default_policy), device_manager_(std::move(device_mgr)), devices_(device_manager_->ListDevices()), rendezvous_(rendezvous), diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index bc97219dae..a88fa5eaa4 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -43,23 +43,18 @@ namespace tensorflow { // Note: there's a copy enum in eager/c_api.h. It should be kept in sync. enum ContextDevicePlacementPolicy { - // Running operations with input tensors on the wrong device will fail. When - // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. + // Running operations with input tensors on the wrong device will fail. DEVICE_PLACEMENT_EXPLICIT = 0, // Copy the tensor to the right device but log a warning. DEVICE_PLACEMENT_WARN = 1, - // Silently copy the tensor, which has a performance cost since the - // operation will be blocked till the copy completes. + // Silently copy the tensor, which has a performance cost since the operation + // will be blocked till the copy completes. This is the default policy. DEVICE_PLACEMENT_SILENT = 2, // Default placement policy which silently copies int32 tensors but not other - // dtypes. When soft placement is enabled acts like - // TFE_DEVICE_PLACEMENT_SILENT. + // dtypes. DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, }; -ContextDevicePlacementPolicy PlacementPolicy( - bool soft_placement, ContextDevicePlacementPolicy original_policy); - class EagerContext { public: explicit EagerContext(const SessionOptions& opts, @@ -116,8 +111,6 @@ class EagerContext { Device* HostCPU() { return devices_[0]; } - bool SoftPlacement() { return soft_placement_; } - uint64 NextId() { return executor_.NextId(); } void ExecutorAdd(EagerNode* node) { executor_.Add(node); } @@ -148,7 +141,6 @@ class EagerContext { FunctionLibraryDefinition* FuncLibDef() { return &func_lib_def_; } private: - const bool soft_placement_; const ContextDevicePlacementPolicy policy_; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 4f16e42568..98e8471102 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -36,10 +36,6 @@ Status EagerExecute(EagerContext* ctx, Device* device, const gtl::InlinedVector& op_inputs, KernelAndDevice* kernel, NodeExecStats* maybe_stats, TensorHandle** retvals, int num_retvals) { - if (!ctx->SoftPlacement() && device == nullptr) { - device = ctx->HostCPU(); - } - if (device == nullptr) { // TODO(apassos) debug how the assignment below might return a different // device from the one requested above. @@ -100,7 +96,7 @@ Status EagerExecute(EagerContext* ctx, Device* device, d = nullptr; } if (retvals[i] == nullptr) { - retvals[i] = new TensorHandle(outputs[i], d, op_device); + retvals[i] = new TensorHandle(outputs[i], d, op_device, ctx); } else { retvals[i]->SetTensorAndDevice(outputs[i], d, op_device); } diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index 328cd5dd5c..8e11f7b710 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -47,7 +47,7 @@ namespace tensorflow { bool TensorHandle::IsReady() { if (node_id == 0) return true; mutex_lock l(ctx_mutex_); - return ctx_ == nullptr; + return is_ready_; } Status TensorHandle::WaitReady() { @@ -55,7 +55,7 @@ Status TensorHandle::WaitReady() { EagerExecutor* executor = nullptr; { mutex_lock l(ctx_mutex_); - if (ctx_ == nullptr) return Status::OK(); + if (is_ready_) return Status::OK(); executor = ctx_->Executor(); } return executor->WaitFor(node_id); @@ -97,9 +97,10 @@ void TensorHandle::SetTensorAndDevice(const tensorflow::Tensor& tensor, tensorflow::Device* device, tensorflow::Device* op_device) { mutex_lock l(ctx_mutex_); - DCHECK(node_id > 0 && ctx_) << "SetTensorAndDevice should be only called " - << "on non-ready handles."; - ctx_ = nullptr; + DCHECK(node_id > 0 && !is_ready_) + << "SetTensorAndDevice should be only called " + << "on non-ready handles."; + is_ready_ = true; tensor_ = tensor; device_ = device; op_device_ = op_device; @@ -122,7 +123,7 @@ Status TensorHandle::CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd, const bool both_on_cpu = src_cpu && dst_cpu; if (is_same_device || both_on_cpu) { dstd = dst_cpu ? nullptr : dstd; - *output = new tensorflow::TensorHandle(*src, dstd, dstd); + *output = new tensorflow::TensorHandle(*src, dstd, dstd, ctx); return tensorflow::Status::OK(); } if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && @@ -139,7 +140,7 @@ Status TensorHandle::CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd, tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); if (src->shape().num_elements() == 0) { dstd = dst_cpu ? nullptr : dstd; - *output = new tensorflow::TensorHandle(dst, dstd, dstd); + *output = new tensorflow::TensorHandle(dst, dstd, dstd, ctx); return tensorflow::Status::OK(); } tensorflow::DeviceContext* src_device_context = nullptr; @@ -170,7 +171,7 @@ Status TensorHandle::CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd, n.WaitForNotification(); if (status.ok()) { dstd = dst_cpu ? nullptr : dstd; - *output = new tensorflow::TensorHandle(dst, dstd, dstd); + *output = new tensorflow::TensorHandle(dst, dstd, dstd, ctx); } return status; } diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index eb69a13c06..d66c4d95e2 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -49,13 +49,14 @@ namespace tensorflow { // (unrelated to python TensorHandle). class TensorHandle : public core::RefCounted { public: - TensorHandle(const Tensor& t, Device* d, Device* op_device) + TensorHandle(const Tensor& t, Device* d, Device* op_device, EagerContext* ctx) : dtype(t.dtype()), node_id(0), tensor_(t), device_(d), op_device_(op_device), - ctx_(nullptr) {} + ctx_(ctx), + is_ready_(true) {} TensorHandle(uint64 node_id, DataType dtype, EagerContext* ctx) : dtype(dtype), @@ -63,7 +64,8 @@ class TensorHandle : public core::RefCounted { tensor_(dtype), device_(nullptr), op_device_(nullptr), - ctx_(ctx) { + ctx_(ctx), + is_ready_(ctx == nullptr) { DCHECK_GT(node_id, 0); } @@ -88,6 +90,12 @@ class TensorHandle : public core::RefCounted { Status CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd, TensorHandle** output); + // Warning: can return nullptr for CPU tensors. + EagerContext* Context() { + mutex_lock ml(ctx_mutex_); + return ctx_; + } + // dtype for the handle. It must be the same as t.dtype() once the handle is // ready. const DataType dtype; @@ -126,6 +134,7 @@ class TensorHandle : public core::RefCounted { // typically true when the handle was produced during async execution. // `ctx` object is not owned and should outlive this handle. EagerContext* ctx_ GUARDED_BY(ctx_mutex_); + bool is_ready_ GUARDED_BY(ctx_mutex_); }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 351aad7213..f8e0267578 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -144,6 +144,11 @@ TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp) .HostMemory("input") .TypeConstraint("T"), RetvalOp); +REGISTER_KERNEL_BUILDER(Name(kRetOp) + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("input"), + RetvalOp); #undef REGISTER class PassOn : public OpKernel { diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 5f19f64846..3fabe7060e 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -116,8 +116,7 @@ class TFETest(test_util.TensorFlowTestCase): cpu_stats = step_stats.dev_stats[0] self.assertEqual('/job:localhost/replica:0/task:0/device:CPU:0', cpu_stats.device) - self.assertEqual(len(cpu_stats.node_stats), 1) - self.assertEqual(cpu_stats.node_stats[0].node_name, 'Add') + self.assertGreaterEqual(len(cpu_stats.node_stats), 1) def testShouldCopy(self): if not context.context().num_gpus(): @@ -658,10 +657,11 @@ class SendRecvTest(test_util.TensorFlowTestCase): with ops.device('GPU:0'): t0 = constant_op.constant(1.0) self._send(t0, 't0', self.cpu_device) - self.assertAllEqual( - self._recv(dtypes.float32, 't0', gpu_device_name), - 1.0) - self._send(constant_op.constant(2.0), 't1', gpu_device_name) + with ops.device('cpu:0'): + self.assertAllEqual( + self._recv(dtypes.float32, 't0', gpu_device_name), + 1.0) + self._send(constant_op.constant(2.0), 't1', gpu_device_name) with ops.device('GPU:0'): self.assertAllEqual( self._recv(dtypes.float32, 't1', self.cpu_device), diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index fd1d2c25ff..9af197981b 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -26,7 +26,6 @@ from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import function as tf_function from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -377,23 +376,23 @@ class FunctionTest(test.TestCase): self.assertAllEqual(f(constant_op.constant(1.0)), 2.0) def testGradientOfGatherWithDefun(self): + with ops.device('cpu:0'): + v = resource_variable_ops.ResourceVariable([0.0, 1.0, 2.0]) - v = resource_variable_ops.ResourceVariable([0.0, 1.0, 2.0]) + def sum_gather(): + return math_ops.reduce_sum(array_ops.gather(v, [1, 2])) - def sum_gather(): - return math_ops.reduce_sum(array_ops.gather(v, [1, 2])) + grad_fn = backprop.implicit_grad(sum_gather) + gradient = grad_fn() + defun_grad_fn = backprop.implicit_grad(function.defun(sum_gather)) + defun_gradient = defun_grad_fn() + self.assertEqual(len(gradient), len(defun_gradient)) - grad_fn = backprop.implicit_grad(sum_gather) - gradient = grad_fn() - defun_grad_fn = backprop.implicit_grad(function.defun(sum_gather)) - defun_gradient = defun_grad_fn() - self.assertEqual(len(gradient), len(defun_gradient)) - - gradient = gradient[0][0] - defun_gradient = defun_gradient[0][0] - self.assertAllEqual(gradient.values, defun_gradient.values) - self.assertAllEqual(gradient.indices, defun_gradient.indices) - self.assertAllEqual(gradient.dense_shape, defun_gradient.dense_shape) + gradient = gradient[0][0] + defun_gradient = defun_gradient[0][0] + self.assertAllEqual(gradient.values, defun_gradient.values) + self.assertAllEqual(gradient.indices, defun_gradient.indices) + self.assertAllEqual(gradient.dense_shape, defun_gradient.dense_shape) def testReturningIndexedSlicesWithDefun(self): @@ -476,9 +475,7 @@ class FunctionTest(test.TestCase): reshape = function.defun(array_ops.reshape) value = constant_op.constant([1., 2.]) shape = constant_op.constant([2, 1]).gpu() - with self.assertRaises(errors.InvalidArgumentError): - with ops.device('gpu:0'): - reshape(value, shape) + reshape(value, shape) # No error is raised def testDifferentiableFunctionNoneOutputs(self): diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ea210346c1..5eceb9f768 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -96,6 +96,10 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], grpc_enabled = True, + tags = [ + "no_gpu", + "nogpu", + ], ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 742564f9bf..c31d5a1f91 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -87,6 +87,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): with context.eager_mode(): handle = resource_variable_ops.var_handle_op( dtype=dtypes.int32, shape=[1], name="foo") + resource_variable_ops.assign_variable_op(handle, 1) with self.assertRaisesRegexp(errors.InvalidArgumentError, "Trying to read variable with wrong dtype. " "Expected float got int32."): -- GitLab From 1d5069d9f01d509ecd42614b056d3df4d4ba74ac Mon Sep 17 00:00:00 2001 From: Chris Tava Date: Thu, 29 Mar 2018 16:41:02 -0400 Subject: [PATCH 702/960] Updating install_golang.sh - bumping to 1.10 (#17989) --- tensorflow/tools/ci_build/install/install_golang.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh index e1edd62cc5..124ad82e91 100755 --- a/tensorflow/tools/ci_build/install/install_golang.sh +++ b/tensorflow/tools/ci_build/install/install_golang.sh @@ -16,7 +16,7 @@ set -ex -GOLANG_URL="https://storage.googleapis.com/golang/go1.9.2.linux-amd64.tar.gz" +GOLANG_URL="https://storage.googleapis.com/golang/go1.10.linux-amd64.tar.gz" sudo mkdir -p /usr/local wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz -- GitLab From 26fdbe7e8b3ec7fe799654cb72e849a6bfb3c5bf Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Thu, 29 Mar 2018 13:59:36 -0700 Subject: [PATCH 703/960] [XLA] Remove note about what implementations do for DynamicSlice and DynamicUpdateSlice. It is impossible to commit to a particular "implementation-defined behavior" for all implementations. PiperOrigin-RevId: 190981804 --- .../docs_src/performance/xla/operation_semantics.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 32f249cf10..217ab596b7 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -788,9 +788,7 @@ DynamicSlice extracts a sub-array from the input array at dynamic dimension: [start, start + size). The shape of `start_indices` must be rank == 1, with dimension size equal to the rank of `operand`. Note: handling of out-of-bounds slice indices (generated by incorrect runtime -calculation of 'start_indices') is currently implementation-defined. Currently, -slice indices are computed modulo input dimension sizes to prevent out-of-bound -array accesses, but this behavior may change in future implementations. +calculation of 'start_indices') is currently implementation-defined. `DynamicSlice(operand, start_indices, size_indices)` @@ -847,9 +845,7 @@ is updated. The shape of `start_indices` must be rank == 1, with dimension size equal to the rank of `operand`. Note: handling of out-of-bounds slice indices (generated by incorrect runtime -calculation of 'start_indices') is currently implementation-defined. Currently, -slice indices are computed modulo update dimension sizes to prevent out-of-bound -array accesses, but this behavior may change in future implementations. +calculation of 'start_indices') is currently implementation-defined. `DynamicUpdateSlice(operand, update, start_indices)` -- GitLab From 1a9663e9e06075c5b5f8984bb95b36f3458edccf Mon Sep 17 00:00:00 2001 From: Younghee Kwon Date: Thu, 29 Mar 2018 14:17:16 -0700 Subject: [PATCH 704/960] boosted_trees: post-submit clean up - non-public objects are renamed. - is_single_machine is set properly when run_config is not populated properly (i.e. empty). PiperOrigin-RevId: 190984693 --- .../estimator/python/estimator/boosted_trees.py | 12 ++++++------ tensorflow/python/estimator/canned/boosted_trees.py | 12 ++++++------ .../python/estimator/canned/boosted_trees_test.py | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index 5880164519..314c54ed00 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -67,20 +67,20 @@ class _BoostedTreesEstimator(estimator.Estimator): tree_complexity: regularization factor to penalize trees with more leaves. config: `RunConfig` object to configure the runtime settings. """ - # TODO(youngheek): param validations. - + # pylint:disable=protected-access # HParams for the model. - tree_hparams = canned_boosted_trees.TreeHParams( + tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, tree_complexity) def _model_fn(features, labels, mode, config): - return canned_boosted_trees._bt_model_fn( # pylint: disable=protected-access + return canned_boosted_trees._bt_model_fn( features, labels, mode, head, feature_columns, tree_hparams, n_batches_per_layer, config) super(_BoostedTreesEstimator, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config) + # pylint:enable=protected-access def boosted_trees_classifier_train_in_memory( @@ -182,7 +182,7 @@ def boosted_trees_classifier_train_in_memory( n_classes, weight_column, label_vocabulary=label_vocabulary)) # HParams for the model. - tree_hparams = canned_boosted_trees.TreeHParams( + tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, tree_complexity) @@ -298,7 +298,7 @@ def boosted_trees_regressor_train_in_memory( weight_column) # HParams for the model. - tree_hparams = canned_boosted_trees.TreeHParams( + tree_hparams = canned_boosted_trees._TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, tree_complexity) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index a9bbabd598..7f1bcc31f2 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -40,7 +40,7 @@ from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import tf_export -TreeHParams = collections.namedtuple( +_TreeHParams = collections.namedtuple( 'TreeHParams', ['n_trees', 'max_depth', 'learning_rate', 'l1', 'l2', 'tree_complexity']) @@ -259,8 +259,8 @@ def _bt_model_fn( example_id_column_name=None, # TODO(youngheek): replace this later using other options. train_in_memory=False, - name='TreeEnsembleModel'): - """Gradient Boosted Decision Tree model_fn. + name='boosted_trees'): + """Gradient Boosted Trees model_fn. Args: features: dict of `Tensor`. @@ -290,7 +290,7 @@ def _bt_model_fn( Raises: ValueError: mode or params are invalid, or features has the wrong type. """ - is_single_machine = (config.num_worker_replicas == 1) + is_single_machine = (config.num_worker_replicas <= 1) if train_in_memory: assert n_batches_per_layer == 1, ( 'When train_in_memory is enabled, input_fn should return the entire ' @@ -617,7 +617,7 @@ class BoostedTreesClassifier(estimator.Estimator): n_classes, weight_column, label_vocabulary=label_vocabulary) # HParams for the model. - tree_hparams = TreeHParams( + tree_hparams = _TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, tree_complexity) @@ -723,7 +723,7 @@ class BoostedTreesRegressor(estimator.Estimator): head = _create_regression_head(label_dimension, weight_column) # HParams for the model. - tree_hparams = TreeHParams( + tree_hparams = _TreeHParams( n_trees, max_depth, learning_rate, l1_regularization, l2_regularization, tree_complexity) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 9276fbaaa1..01e5cc7a5d 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -195,7 +195,7 @@ class ModelFnTests(test_util.TensorFlowTestCase): feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), BUCKET_BOUNDARIES) for i in range(NUM_FEATURES) } - self._tree_hparams = boosted_trees.TreeHParams( + self._tree_hparams = boosted_trees._TreeHParams( # pylint:disable=protected-access n_trees=2, max_depth=2, learning_rate=0.1, -- GitLab From 489389822636b1229c2e92b717c3e947ccfa23b4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 14:25:10 -0700 Subject: [PATCH 705/960] LSTM support: Add non-uint8 quantized elementwise unary operators. PiperOrigin-RevId: 190986046 --- .../internal/optimized/optimized_ops.h | 154 +++++++++++++++--- 1 file changed, 128 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 4661004d09..3642da311c 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -4092,12 +4092,46 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, inline void Logistic(const int16* input_data, const Dims<4>& input_dims, int16* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Logistic/Int16"); - // This is a copy of the reference implementation. We do not currently have a - // properly optimized version. const int flat_size = RequiredBufferSizeForDims(output_dims); TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); for (int i = 0; i < flat_size; i++) { + } + + int c = 0; + const int16* input_data_ptr = input_data; + int16* output_data_ptr = output_data; +#ifdef GEMMLOWP_NEON + { + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + for (; c <= flat_size - 16; c += 16) { + F3 input0 = F3::FromRaw(vld1q_s16(input_data_ptr)); + F3 input1 = F3::FromRaw(vld1q_s16(input_data_ptr + 8)); + F0 output0 = gemmlowp::logistic(input0); + F0 output1 = gemmlowp::logistic(input1); + vst1q_s16(output_data_ptr, output0.raw()); + vst1q_s16(output_data_ptr + 8, output1.raw()); + + input_data_ptr += 16; + output_data_ptr += 16; + } + for (; c <= flat_size - 8; c += 8) { + F3 input = F3::FromRaw(vld1q_s16(input_data_ptr)); + F0 output = gemmlowp::logistic(input); + vst1q_s16(output_data_ptr, output.raw()); + + input_data_ptr += 8; + output_data_ptr += 8; + } + } +#endif + { // F0 uses 0 integer bits, range [-1, 1]. // This is the return type of math functions such as tanh, logistic, // whose range is in [-1, 1]. @@ -4105,9 +4139,14 @@ inline void Logistic(const int16* input_data, const Dims<4>& input_dims, // F3 uses 3 integer bits, range [-8, 8], the input range expected here. using F3 = gemmlowp::FixedPoint; - const F3 input = F3::FromRaw(input_data[i]); - F0 output = gemmlowp::logistic(input); - output_data[i] = output.raw(); + for (; c < flat_size; ++c) { + F3 input = F3::FromRaw(*input_data_ptr); + F0 output = gemmlowp::logistic(input); + *output_data_ptr = output.raw(); + + ++input_data_ptr; + ++output_data_ptr; + } } } @@ -4274,9 +4313,6 @@ inline void Tanh(const int16* input_data, const Dims<4>& input_dims, int input_left_shift, int16* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Tanh/Int16"); - // This is a copy of the reference implementation. We do not currently have a - // properly optimized version. - // Support for shifts is limited until we have a parameterized version of // SaturatingRoundingMultiplyByPOT(). TFLITE_DCHECK_GE(input_left_shift, 0); @@ -4285,25 +4321,91 @@ inline void Tanh(const int16* input_data, const Dims<4>& input_dims, const int flat_size = RequiredBufferSizeForDims(output_dims); TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); - // F0 uses 0 integer bits, range [-1, 1]. - // This is the return type of math functions such as tanh, logistic, - // whose range is in [-1, 1]. - using F0 = gemmlowp::FixedPoint; - // F3 uses 3 integer bits, range [-8, 8], the input range expected here. - using F3 = gemmlowp::FixedPoint; - - if (input_left_shift == 0) { - for (int i = 0; i < flat_size; i++) { - F3 input = F3::FromRaw(input_data[i]); - F0 output = gemmlowp::tanh(input); - output_data[i] = output.raw(); + int c = 0; + const int16* input_data_ptr = input_data; + int16* output_data_ptr = output_data; +#ifdef GEMMLOWP_NEON + { + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + if (input_left_shift == 0) { + for (; c <= flat_size - 16; c += 16) { + F3 input0 = F3::FromRaw(vld1q_s16(input_data_ptr)); + F3 input1 = F3::FromRaw(vld1q_s16(input_data_ptr + 8)); + F0 output0 = gemmlowp::tanh(input0); + F0 output1 = gemmlowp::tanh(input1); + vst1q_s16(output_data_ptr, output0.raw()); + vst1q_s16(output_data_ptr + 8, output1.raw()); + + input_data_ptr += 16; + output_data_ptr += 16; + } + for (; c <= flat_size - 8; c += 8) { + F3 input = F3::FromRaw(vld1q_s16(input_data_ptr)); + F0 output = gemmlowp::tanh(input); + vst1q_s16(output_data_ptr, output.raw()); + + input_data_ptr += 8; + output_data_ptr += 8; + } + } else { + for (; c <= flat_size - 16; c += 16) { + F3 input0 = F3::FromRaw(gemmlowp::SaturatingRoundingMultiplyByPOT<1>( + vld1q_s16(input_data_ptr))); + F3 input1 = F3::FromRaw(gemmlowp::SaturatingRoundingMultiplyByPOT<1>( + vld1q_s16(input_data_ptr + 8))); + F0 output0 = gemmlowp::tanh(input0); + F0 output1 = gemmlowp::tanh(input1); + vst1q_s16(output_data_ptr, output0.raw()); + vst1q_s16(output_data_ptr + 8, output1.raw()); + + input_data_ptr += 16; + output_data_ptr += 16; + } + for (; c <= flat_size - 8; c += 8) { + F3 input = F3::FromRaw(gemmlowp::SaturatingRoundingMultiplyByPOT<1>( + vld1q_s16(input_data_ptr))); + F0 output = gemmlowp::tanh(input); + vst1q_s16(output_data_ptr, output.raw()); + + input_data_ptr += 8; + output_data_ptr += 8; + } } - } else { - for (int i = 0; i < flat_size; i++) { - F3 input = F3::FromRaw( - gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); - F0 output = gemmlowp::tanh(input); - output_data[i] = output.raw(); + } +#endif + { + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + if (input_left_shift == 0) { + for (; c < flat_size; ++c) { + F3 input = F3::FromRaw(*input_data_ptr); + F0 output = gemmlowp::tanh(input); + *output_data_ptr = output.raw(); + + ++input_data_ptr; + ++output_data_ptr; + } + } else { + for (; c < flat_size; ++c) { + F3 input = F3::FromRaw( + gemmlowp::SaturatingRoundingMultiplyByPOT<1>(*input_data_ptr)); + F0 output = gemmlowp::tanh(input); + *output_data_ptr = output.raw(); + + ++input_data_ptr; + ++output_data_ptr; + } } } } -- GitLab From 79e4a49f7bb458176cbfa5ba1e492b39dada023d Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 29 Mar 2018 15:05:31 -0700 Subject: [PATCH 706/960] TFLite logs to stderr. PiperOrigin-RevId: 190992629 --- tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index bd49d327c9..85aca36874 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -22,7 +22,7 @@ limitations under the License. // helpers -#define NNAPI_LOG(format, ...) printf(format "\n", __VA_ARGS__); +#define NNAPI_LOG(format, ...) fprintf(stderr, format "\n", __VA_ARGS__); #define LOAD_FUNCTION(name) \ static name##_fn fn = reinterpret_cast(loadFunction(#name)); #define EXECUTE_FUNCTION(...) \ -- GitLab From 4b8f6dc1efec882c3fb0e2c8fc3de74586c800ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 15:18:59 -0700 Subject: [PATCH 707/960] [XLA] Remove some dead code from Executable. PiperOrigin-RevId: 190994733 --- tensorflow/compiler/xla/service/BUILD | 1 - tensorflow/compiler/xla/service/cpu/cpu_executable.h | 5 ----- .../compiler/xla/service/cpu/parallel_cpu_executable.h | 6 ------ tensorflow/compiler/xla/service/executable.h | 9 --------- tensorflow/compiler/xla/service/gpu/gpu_executable.h | 5 ----- 5 files changed, 26 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index b7d1bf64d0..3a99d84bea 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -730,7 +730,6 @@ cc_library( ":computation_layout", ":device_memory_allocator", ":hlo", - ":hlo_cost_analysis", ":hlo_execution_profile", ":hlo_graph_dumper", ":pool", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 267b89a10b..d3502b3a03 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -71,11 +71,6 @@ class CpuExecutable : public Executable { ir_module_string_ = ir_module_string; } - const Status EqualOrFail(const Executable& executable) { - // TODO(b/62952745) Implement equality test on CPU executable. - return Unimplemented("Equality test on CPU executable is not implemented."); - } - static int64 ShapeSizeBytes(const Shape& shape); // Type of the computation function we expect in the JIT. diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h index c393e9b8ea..87c0a3df45 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.h @@ -83,12 +83,6 @@ class ParallelCpuExecutable : public Executable { return ShapeUtil::ByteSizeOf(shape, sizeof(void*)); } - const Status EqualOrFail(const Executable& executable) { - // TODO(b/62952745) Implement equality test on CPU parallel executable. - return Unimplemented( - "Equality test on CPU parallel executable is not implemented."); - } - private: // Allocate buffers required for execution and assign them to the elements of // "buffers". "buffers" should be sized to the number of buffers in buffer diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 0aee535ee7..a157235f8a 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" -#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -109,14 +108,6 @@ class Executable { return execution_profile_; } - // Returns Status::ok() if the two executables are equal to each other. - // - // An error status is returned otherwise. - virtual const Status EqualOrFail(const Executable& executable) { - return Unimplemented( - "Equality test on this executable is not implemented."); - } - const HloProfilePrinterData& hlo_profile_printer_data() const { CHECK(hlo_profiling_enabled()); return *hlo_profile_printer_data_; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index b19cfd43de..dcb3991f41 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -83,11 +83,6 @@ class GpuExecutable : public Executable { const ServiceExecutableRunOptions* run_options, tensorflow::gtl::ArraySlice arguments) override; - const Status EqualOrFail(const Executable& executable) { - // TODO(b/62952745) Implement equality test on GPU executable. - return Unimplemented("Equality test on GPU executable is not implemented."); - } - private: // If `block_host_until_done` is false, execution will not block the host // until the kernels have completed. This is used as an optimization for -- GitLab From 40f8291db5c0b05b31d7bbe23b847cdbb2408718 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 29 Mar 2018 15:20:38 -0700 Subject: [PATCH 708/960] Internal change. PiperOrigin-RevId: 190995029 --- tensorflow/contrib/boosted_trees/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/boosted_trees/BUILD b/tensorflow/contrib/boosted_trees/BUILD index ddeda0079c..8eac1243ef 100644 --- a/tensorflow/contrib/boosted_trees/BUILD +++ b/tensorflow/contrib/boosted_trees/BUILD @@ -119,7 +119,7 @@ py_library( py_test( name = "gbdt_batch_test", - size = "small", + size = "medium", srcs = ["python/training/functions/gbdt_batch_test.py"], srcs_version = "PY2AND3", tags = [ -- GitLab From 6f5d7a97cd2c0741ddfa756853ce5321377b5d53 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 15:28:24 -0700 Subject: [PATCH 709/960] Add tf.contrib.distribute, which defines classes DistributionStrategy and MirroredStrategy, and related functionality. Also add tf.contrib.optimizer_v2, an update to the Optimizer API. RELNOTES: Can now pass tf.contrib.distribute.MirroredStrategy() to tf.estimator.RunConfig() to run an Estimator model on multiple GPUs on one machine. PiperOrigin-RevId: 190996247 --- tensorflow/contrib/BUILD | 2 + tensorflow/contrib/__init__.py | 2 + tensorflow/contrib/cmake/python_modules.txt | 3 + tensorflow/contrib/distribute/BUILD | 36 + tensorflow/contrib/distribute/__init__.py | 52 + tensorflow/contrib/distribute/python/BUILD | 431 ++++++ .../contrib/distribute/python/combinations.py | 293 ++++ .../distribute/python/combinations_test.py | 115 ++ .../distribute/python/cross_tower_ops.py | 410 +++++ .../distribute/python/cross_tower_ops_test.py | 185 +++ .../distribute/python/cross_tower_utils.py | 153 ++ .../distribute/python/minimize_loss_test.py | 279 ++++ .../distribute/python/mirrored_strategy.py | 486 ++++++ .../python/mirrored_strategy_multigpu_test.py | 435 ++++++ .../python/mirrored_strategy_test.py | 91 ++ .../contrib/distribute/python/monitor.py | 61 + .../contrib/distribute/python/monitor_test.py | 84 + .../distribute/python/one_device_strategy.py | 148 ++ .../python/one_device_strategy_test.py | 54 + .../distribute/python/optimizer_v2_test.py | 70 + .../distribute/python/prefetching_ops_v2.py | 167 ++ .../python/prefetching_ops_v2_test.py | 68 + .../python/shared_variable_creator.py | 97 ++ .../python/shared_variable_creator_test.py | 75 + .../python/simple_estimator_example.py | 97 ++ .../distribute/python/single_loss_example.py | 102 ++ .../contrib/distribute/python/step_fn.py | 103 ++ .../contrib/distribute/python/step_fn_test.py | 62 + .../distribute/python/strategy_test_lib.py | 225 +++ .../contrib/distribute/python/values.py | 575 +++++++ .../contrib/distribute/python/values_test.py | 807 ++++++++++ tensorflow/contrib/optimizer_v2/BUILD | 205 +++ tensorflow/contrib/optimizer_v2/adadelta.py | 113 ++ .../contrib/optimizer_v2/adadelta_test.py | 167 ++ tensorflow/contrib/optimizer_v2/adagrad.py | 118 ++ .../contrib/optimizer_v2/adagrad_test.py | 282 ++++ tensorflow/contrib/optimizer_v2/adam.py | 202 +++ tensorflow/contrib/optimizer_v2/adam_test.py | 333 ++++ .../optimizer_v2/checkpointable_utils_test.py | 686 +++++++++ .../contrib/optimizer_v2/gradient_descent.py | 69 + .../optimizer_v2/gradient_descent_test.py | 223 +++ tensorflow/contrib/optimizer_v2/momentum.py | 124 ++ .../contrib/optimizer_v2/momentum_test.py | 562 +++++++ .../contrib/optimizer_v2/optimizer_v2.py | 1352 +++++++++++++++++ .../optimizer_v2/optimizer_v2_symbols.py | 42 + .../contrib/optimizer_v2/optimizer_v2_test.py | 294 ++++ tensorflow/contrib/optimizer_v2/rmsprop.py | 233 +++ .../contrib/optimizer_v2/rmsprop_test.py | 449 ++++++ tensorflow/python/training/distribute.py | 7 +- tensorflow/tools/docs/generate_lib.py | 1 - 50 files changed, 11226 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/distribute/BUILD create mode 100644 tensorflow/contrib/distribute/__init__.py create mode 100644 tensorflow/contrib/distribute/python/BUILD create mode 100644 tensorflow/contrib/distribute/python/combinations.py create mode 100644 tensorflow/contrib/distribute/python/combinations_test.py create mode 100644 tensorflow/contrib/distribute/python/cross_tower_ops.py create mode 100644 tensorflow/contrib/distribute/python/cross_tower_ops_test.py create mode 100644 tensorflow/contrib/distribute/python/cross_tower_utils.py create mode 100644 tensorflow/contrib/distribute/python/minimize_loss_test.py create mode 100644 tensorflow/contrib/distribute/python/mirrored_strategy.py create mode 100644 tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py create mode 100644 tensorflow/contrib/distribute/python/mirrored_strategy_test.py create mode 100644 tensorflow/contrib/distribute/python/monitor.py create mode 100644 tensorflow/contrib/distribute/python/monitor_test.py create mode 100644 tensorflow/contrib/distribute/python/one_device_strategy.py create mode 100644 tensorflow/contrib/distribute/python/one_device_strategy_test.py create mode 100644 tensorflow/contrib/distribute/python/optimizer_v2_test.py create mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2.py create mode 100644 tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py create mode 100644 tensorflow/contrib/distribute/python/shared_variable_creator.py create mode 100644 tensorflow/contrib/distribute/python/shared_variable_creator_test.py create mode 100644 tensorflow/contrib/distribute/python/simple_estimator_example.py create mode 100644 tensorflow/contrib/distribute/python/single_loss_example.py create mode 100644 tensorflow/contrib/distribute/python/step_fn.py create mode 100644 tensorflow/contrib/distribute/python/step_fn_test.py create mode 100644 tensorflow/contrib/distribute/python/strategy_test_lib.py create mode 100644 tensorflow/contrib/distribute/python/values.py create mode 100644 tensorflow/contrib/distribute/python/values_test.py create mode 100644 tensorflow/contrib/optimizer_v2/BUILD create mode 100644 tensorflow/contrib/optimizer_v2/adadelta.py create mode 100644 tensorflow/contrib/optimizer_v2/adadelta_test.py create mode 100644 tensorflow/contrib/optimizer_v2/adagrad.py create mode 100644 tensorflow/contrib/optimizer_v2/adagrad_test.py create mode 100644 tensorflow/contrib/optimizer_v2/adam.py create mode 100644 tensorflow/contrib/optimizer_v2/adam_test.py create mode 100644 tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py create mode 100644 tensorflow/contrib/optimizer_v2/gradient_descent.py create mode 100644 tensorflow/contrib/optimizer_v2/gradient_descent_test.py create mode 100644 tensorflow/contrib/optimizer_v2/momentum.py create mode 100644 tensorflow/contrib/optimizer_v2/momentum_test.py create mode 100644 tensorflow/contrib/optimizer_v2/optimizer_v2.py create mode 100644 tensorflow/contrib/optimizer_v2/optimizer_v2_symbols.py create mode 100644 tensorflow/contrib/optimizer_v2/optimizer_v2_test.py create mode 100644 tensorflow/contrib/optimizer_v2/rmsprop.py create mode 100644 tensorflow/contrib/optimizer_v2/rmsprop_test.py diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index c211ad8b9b..0cebb49afb 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -33,6 +33,7 @@ py_library( "//tensorflow/contrib/crf:crf_py", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_py", "//tensorflow/contrib/data", + "//tensorflow/contrib/distribute:distribute", "//tensorflow/contrib/deprecated:deprecated_py", "//tensorflow/contrib/distributions:distributions_py", "//tensorflow/contrib/eager/python:tfe", @@ -74,6 +75,7 @@ py_library( "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_py", "//tensorflow/contrib/nn:nn_py", "//tensorflow/contrib/opt:opt_py", + "//tensorflow/contrib/optimizer_v2:optimizer_v2_py", "//tensorflow/contrib/periodic_resample:init_py", "//tensorflow/contrib/predictor", "//tensorflow/contrib/quantization:quantization_py", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 4f6f539027..a8e05df708 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -30,6 +30,7 @@ from tensorflow.contrib import crf from tensorflow.contrib import cudnn_rnn from tensorflow.contrib import data from tensorflow.contrib import deprecated +from tensorflow.contrib import distribute from tensorflow.contrib import distributions from tensorflow.contrib import estimator from tensorflow.contrib import factorization @@ -84,6 +85,7 @@ from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.optimizer_v2 import optimizer_v2_symbols as optimizer_v2 from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index cc7d791042..b10538d6d6 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -160,6 +160,8 @@ tensorflow/contrib/data/python/ops tensorflow/contrib/decision_trees tensorflow/contrib/decision_trees/proto tensorflow/contrib/deprecated +tensorflow/contrib/distribute +tensorflow/contrib/distribute/python tensorflow/contrib/distributions tensorflow/contrib/distributions/python tensorflow/contrib/distributions/python/ops @@ -342,6 +344,7 @@ tensorflow/contrib/nn/python/ops tensorflow/contrib/opt tensorflow/contrib/opt/python tensorflow/contrib/opt/python/training +tensorflow/contrib/optimizer_v2 tensorflow/contrib/pi_examples tensorflow/contrib/pi_examples/camera tensorflow/contrib/pi_examples/label_image diff --git a/tensorflow/contrib/distribute/BUILD b/tensorflow/contrib/distribute/BUILD new file mode 100644 index 0000000000..74b2cd90a1 --- /dev/null +++ b/tensorflow/contrib/distribute/BUILD @@ -0,0 +1,36 @@ +# Implementation of a prototype TF distributed computation library. + +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +py_library( + name = "distribute", + srcs = ["__init__.py"], + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/contrib/distribute/python:cross_tower_ops", + "//tensorflow/contrib/distribute/python:mirrored_strategy", + "//tensorflow/contrib/distribute/python:monitor", + "//tensorflow/contrib/distribute/python:one_device_strategy", + "//tensorflow/contrib/distribute/python:step_fn", + "//tensorflow/python:training", + "//tensorflow/python:util", + ], +) diff --git a/tensorflow/contrib/distribute/__init__.py b/tensorflow/contrib/distribute/__init__.py new file mode 100644 index 0000000000..76711baf3a --- /dev/null +++ b/tensorflow/contrib/distribute/__init__.py @@ -0,0 +1,52 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Prototype of a distributed computation library for TF.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.distribute.python.cross_tower_ops import * +from tensorflow.contrib.distribute.python.mirrored_strategy import MirroredStrategy +from tensorflow.contrib.distribute.python.monitor import Monitor +from tensorflow.contrib.distribute.python.one_device_strategy import OneDeviceStrategy +from tensorflow.contrib.distribute.python.step_fn import * +from tensorflow.python.training.distribute import * + +from tensorflow.python.util.all_util import remove_undocumented + + +_allowed_symbols = [ + 'AllReduceCrossTowerOps', + 'CrossTowerOps', + 'DistributionStrategy', + 'MirroredStrategy', + 'Monitor', + 'OneDeviceStrategy', + 'ReductionToOneDeviceCrossTowerOps', + 'Step', + 'StandardInputStep', + 'StandardSingleLossStep', + 'TowerContext', + 'get_cross_tower_context', + 'get_distribution_strategy', + 'get_loss_reduction', + 'get_tower_context', + 'has_distribution_strategy', + 'require_tower_context', +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD new file mode 100644 index 0000000000..4dfd3f7228 --- /dev/null +++ b/tensorflow/contrib/distribute/python/BUILD @@ -0,0 +1,431 @@ +# Implementation of a prototype TF distributed computation library. + +package( + default_visibility = [ + "//tensorflow:internal", + ], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +# TODO(priyag): Figure out testonly issues that are preventing us from +# including our tests in pip for now. + +py_library( + name = "values", + srcs = ["values.py"], + visibility = ["//tensorflow:internal"], + deps = [ + ":prefetching_ops_v2", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/contrib/eager/python:datasets", + "//tensorflow/python:array_ops", + "//tensorflow/python:checkpointable", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:training", + "//tensorflow/python:util", + "//tensorflow/python/eager:context", + "@six_archive//:six", + ], +) + +cuda_py_test( + name = "values_test", + srcs = ["values_test.py"], + additional_deps = [ + ":mirrored_strategy", + ":values", + "//tensorflow/core:protos_all_py", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python:errors", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + "//tensorflow/python/estimator:model_fn", + ], +) + +py_library( + name = "mirrored_strategy", + srcs = ["mirrored_strategy.py"], + visibility = ["//tensorflow:internal"], + deps = [ + ":cross_tower_ops", + ":shared_variable_creator", + ":values", + "//tensorflow/python:array_ops", + "//tensorflow/python:device", + "//tensorflow/python:framework_ops", + "//tensorflow/python:pywrap_tensorflow", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:tape", + "@six_archive//:six", + ], +) + +py_library( + name = "one_device_strategy", + srcs = ["one_device_strategy.py"], + visibility = ["//tensorflow:internal"], + deps = [ + ":values", + "//tensorflow/contrib/eager/python:datasets", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:training", + "//tensorflow/python/eager:context", + "@six_archive//:six", + ], +) + +py_library( + name = "strategy_test_lib", + testonly = 1, + srcs = ["strategy_test_lib.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + ], + deps = [ + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:framework_ops", + "//tensorflow/python:layers", + "//tensorflow/python:training", + "//tensorflow/python:variables", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], +) + +py_library( + name = "combinations", + testonly = 1, + srcs = ["combinations.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + ], + deps = [ + ":mirrored_strategy", + ":one_device_strategy", + "//tensorflow/contrib/optimizer_v2:training", + "//tensorflow/python:framework_ops", + "//tensorflow/python:training", + "//tensorflow/python:util", + "//tensorflow/python/eager:context", + "@absl_py//absl/testing:parameterized", + ], +) + +py_test( + name = "combinations_test", + srcs = ["combinations_test.py"], + tags = [ + "no_pip", + ], + deps = [ + ":combinations", + "//tensorflow/python/eager:test", + ], +) + +py_test( + name = "mirrored_strategy_test", + srcs = ["mirrored_strategy_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + ], + deps = [ + ":mirrored_strategy", + ":strategy_test_lib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], +) + +py_test( + name = "one_device_strategy_test", + srcs = ["one_device_strategy_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + ], + deps = [ + ":one_device_strategy", + ":strategy_test_lib", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python/eager:test", + ], +) + +cuda_py_test( + name = "mirrored_strategy_multigpu_test", + srcs = ["mirrored_strategy_multigpu_test.py"], + additional_deps = [ + ":mirrored_strategy", + ":values", + ":strategy_test_lib", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:constant_op", + "//tensorflow/python:layers", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], + tags = [ + "guitar", + "no_pip", + "multi_and_single_gpu", + # Do not perform the extra analysis on this test, because it is already + # performed for the `:mirrored_strategy_test` target. + "no_oss", + "noasan", + "notap", + "notsan", + ], +) + +py_library( + name = "step_fn", + srcs = ["step_fn.py"], + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:training", + "//tensorflow/python/eager:backprop", + ], +) + +cuda_py_test( + name = "minimize_loss_test", + srcs = ["minimize_loss_test.py"], + additional_deps = [ + ":combinations", + ":single_loss_example", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:variables", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + "//tensorflow/python/ops/losses", + ], + tags = [ + "multi_and_single_gpu", + "no_pip", + ], +) + +cuda_py_test( + name = "optimizer_v2_test", + srcs = ["optimizer_v2_test.py"], + additional_deps = [ + ":combinations", + ":single_loss_example", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:variables", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], + tags = [ + "multi_and_single_gpu", + "no_pip", + ], +) + +py_library( + name = "single_loss_example", + srcs = ["single_loss_example.py"], + deps = [ + ":step_fn", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:layers", + "//tensorflow/python:math_ops", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + +cuda_py_test( + name = "step_fn_test", + srcs = ["step_fn_test.py"], + additional_deps = [ + ":single_loss_example", + ":combinations", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:variables", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], + tags = [ + "multi_and_single_gpu", + "no_pip", + ], +) + +py_library( + name = "monitor", + srcs = ["monitor.py"], + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:variables", + "//tensorflow/python/eager:context", + ], +) + +cuda_py_test( + name = "monitor_test", + srcs = ["monitor_test.py"], + additional_deps = [ + ":combinations", + ":monitor", + ":one_device_strategy", + ":single_loss_example", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + "//tensorflow/python:framework_ops", + "//tensorflow/python:training", + ], + tags = [ + "multi_and_single_gpu", + "no_pip", + ], +) + +py_library( + name = "shared_variable_creator", + srcs = ["shared_variable_creator.py"], + visibility = ["//tensorflow:internal"], +) + +py_test( + name = "shared_variable_creator_test", + srcs = ["shared_variable_creator_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":shared_variable_creator", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:variable_scope", + "//tensorflow/python/eager:test", + ], +) + +py_binary( + name = "simple_estimator_example", + srcs = ["simple_estimator_example.py"], + deps = [ + ":mirrored_strategy", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:layers", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/estimator:estimator_py", + "//tensorflow/python/estimator:model_fn", + ], +) + +py_library( + name = "cross_tower_utils", + srcs = ["cross_tower_utils.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/nccl:nccl_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + ], +) + +py_library( + name = "cross_tower_ops", + srcs = ["cross_tower_ops.py"], + srcs_version = "PY2AND3", + deps = [ + ":cross_tower_utils", + ":values", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:training", + "//tensorflow/python/eager:context", + "@six_archive//:six", + ], +) + +py_test( + name = "cross_tower_ops_test", + srcs = ["cross_tower_ops_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_pip", + ], + deps = [ + ":combinations", + ":cross_tower_ops", + ":values", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + "@absl_py//absl/testing:parameterized", + ], +) + +py_library( + name = "prefetching_ops_v2", + srcs = ["prefetching_ops_v2.py"], + deps = [ + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/data/python/ops:prefetching_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", + ], +) + +cuda_py_test( + name = "prefetching_ops_v2_test", + srcs = ["prefetching_ops_v2_test.py"], + additional_deps = [ + ":prefetching_ops_v2", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + ], +) diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py new file mode 100644 index 0000000000..dd8e7c4376 --- /dev/null +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -0,0 +1,293 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Facilities for creating multiple test combinations. + +Here is an example of testing various optimizers in Eager and Graph mode: + +class AdditionExample(test.TestCase, parameterized.TestCase): + @combinations.generate( + combinations.combine(mode=["graph", "eager"], + optimizer=[AdamOptimizer(), + GradientDescentOptimizer()])) + def testOptimizer(self, optimizer): + ... f(optimizer)... + +This will run `testOptimizer` 4 times with the specified optimizers: 2 in +Eager and 2 in Graph mode. +The test will be provided with arguments that match the arguments of combine +by name. It is necessary to request all arguments, except for `mode`, which is +optional. + +`combine()` function is available for creating a cross product of various +options. `times()` function exists for creating a product of N `combine()`-ed +results. See below. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict +import sys +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.contrib.distribute.python import one_device_strategy +from tensorflow.contrib.optimizer_v2 import adam as adam_v2 +from tensorflow.contrib.optimizer_v2 import gradient_descent as gradient_descent_v2 +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.training import adam +from tensorflow.python.training import gradient_descent +from tensorflow.python.util import tf_inspect + + +GPU_TEST = "test_gpu" in sys.argv[0] + + +def generate(combinations): + """A decorator for generating test cases of a test method or a test class. + + Args: + combinations: a list of dictionaries created using combine() and times(). + + Restrictions: + -- there should always be a "mode" argument. Accepted values are "eager" + and "graph". + -- arguments of the test method must match by name to get the corresponding + value of the combination. Tests must accept all arguments (except "mode", + which is optional). + -- distribution argument is special. It is meant for passing instances of + DistributionStrategy. Each instance is to be passed as `(, + )` tuple, where is the number of required + GPUs. If the required number of GPUs for the DistributionStrategy isn't + available then the test case is going to be skipped. + + Returns: + a decorator that will cause the test method to be run under the specified + conditions. + + Raises: + ValueError - if "mode" argument wasn't either "eager" or "graph. + """ + + def decorator(test_function): + """The decorator to be returned.""" + + # Generate good test names that can be used with --test_filter. + for combination in combinations: + # We use OrderedDicts in `combine()` and `times()` to ensure stable + # order of keys in each dictionary. + assert isinstance(combination, OrderedDict) + name = "".join([ + "_{}_{}".format( + "".join(filter(str.isalnum, key)), + "".join(filter(str.isalnum, str(value)))) + for key, value in combination.items() + ]) + combination.update({"testcase_name": "_test{}".format(name)}) + + @parameterized.named_parameters(*combinations) + def decorated(self, **kwargs): + """A wrapped test method that sets up `test_function`.""" + assert "mode" in kwargs + mode = kwargs["mode"] + + if "distribution" in kwargs: + distribution = kwargs["distribution"] + kwargs["distribution"] = distribution.strategy + if not distribution.required_gpus: + if GPU_TEST: + self.skipTest("Test that doesn't require GPUs.") + elif context.num_gpus() < distribution.required_gpus: + self.skipTest( + "{} GPUs are not available for this test. {} GPUs are available". + format(distribution.required_gpus, context.num_gpus())) + + requested_arguments = tf_inspect.getfullargspec(test_function).args + missing_arguments = set(list(kwargs.keys()) + ["self"]).difference( + set(requested_arguments + ["mode"])) + if missing_arguments: + raise ValueError("The test is missing arguments {} .".format( + missing_arguments)) + + kwargs_to_pass = {} + for arg in requested_arguments: + if arg == "self": + kwargs_to_pass[arg] = self + else: + kwargs_to_pass[arg] = kwargs[arg] + + if mode == "eager": + with context.eager_mode(), ops.Graph().as_default(): + test_function(**kwargs_to_pass) + elif mode == "graph": + with context.graph_mode(), ops.Graph().as_default(): + test_function(**kwargs_to_pass) + else: + raise ValueError( + "'mode' has to be either 'eager' or 'graph' and not {}".format( + mode)) + + return decorated + return decorator + + +def combine(**kwargs): + """Generate combinations based on its keyword arguments. + + Two sets of returned combinations can be concatenated using +. Their product + can be computed using `times()`. + + Args: + **kwargs: keyword arguments of form `option=[possibilities, ...]`. + + Returns: + a list of dictionaries for each combination. Keys in the dictionaries are + the keyword argument names. Each key has one value - one of the + corresponding keyword argument values. + """ + if not kwargs: + return [OrderedDict()] + + sort_by_key = lambda k: k[0][0] + kwargs = OrderedDict(sorted(kwargs.items(), key=sort_by_key)) + first = list(kwargs.items())[0] + + rest = dict(list(kwargs.items())[1:]) + rest_combined = combine(**rest) + + key = first[0] + values = first[1] + + return [ + OrderedDict(sorted(list(combined.items()) + [(key, v)], key=sort_by_key)) + for v in values + for combined in rest_combined + ] + + +def times(*combined): + """Generate a product of N sets of combinations. + + times(combine(a=[1,2]), combine(b=[3,4])) == combine(a=[1,2], b=[3,4]) + + Args: + *combined: N lists of dictionaries that specify combinations. + + Returns: + a list of dictionaries for each combination. + + Raises: + ValueError: if some of the inputs have overlapping keys. + """ + assert combined + + if len(combined) == 1: + return combined[0] + + first = combined[0] + rest_combined = times(*combined[1:]) + + combined_results = [] + for a in first: + for b in rest_combined: + if set(a.keys()).intersection(set(b.keys())): + raise ValueError("Keys need to not overlap: {} vs {}".format( + a.keys(), b.keys())) + + combined_results.append(OrderedDict(list(a.items()) + list(b.items()))) + return combined_results + + +class NamedObject(object): + """A class that translates an object into a good test name.""" + + def __init__(self, name, obj): + self._name = name + self._obj = obj + + def __getattr__(self, name): + return getattr(self._obj, name) + + def __call__(self, *args, **kwargs): + return self._obj(*args, **kwargs) + + def __repr__(self): + return self._name + + +class NamedDistribution(object): + """Translates DistributionStrategy and its data into a good name.""" + + def __init__(self, name, distribution, required_gpus): + self._distribution = distribution + self._name = name + self._required_gpus = required_gpus + + def __repr__(self): + return self._name + + @property + def strategy(self): + return self._distribution + + @property + def required_gpus(self): + return self._required_gpus + + +one_device_strategy = NamedDistribution( + "OneDeviceCPU", one_device_strategy.OneDeviceStrategy("/cpu:0"), + None) +mirrored_strategy_with_gpu_and_cpu = NamedDistribution( + "MirroredCPUAndGPU", + mirrored_strategy.MirroredStrategy(["/gpu:0", "/cpu:0"]), 1) +mirrored_strategy_with_two_gpus = NamedDistribution( + "Mirrored2GPUs", + mirrored_strategy.MirroredStrategy(["/gpu:0", "/gpu:1"]), 2) + +adam_optimizer_v1_fn = NamedObject( + "AdamV1", lambda: adam.AdamOptimizer(0.2, epsilon=1)) +gradient_descent_optimizer_v1_fn = NamedObject( + "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) + +adam_optimizer_v2_fn = NamedObject( + "AdamV2", lambda: adam_v2.AdamOptimizer(0.2, epsilon=1)) +gradient_descent_optimizer_v2_fn = NamedObject( + "GradientDescentV2", + lambda: gradient_descent_v2.GradientDescentOptimizer(0.2)) + +graph_and_eager_modes = ["graph", "eager"] + + +def distributions_and_v1_optimizers(): + """A common set of combination with DistributionStrategies and Optimizers.""" + return combine( + distribution=[ + one_device_strategy, mirrored_strategy_with_gpu_and_cpu, + mirrored_strategy_with_two_gpus + ], + optimizer_fn=[adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn]) + + +def distributions_and_v2_optimizers(): + """DistributionStrategies and V2 Optimizers.""" + return combine( + distribution=[ + one_device_strategy, mirrored_strategy_with_gpu_and_cpu, + mirrored_strategy_with_two_gpus + ], + optimizer_fn=[adam_optimizer_v2_fn, gradient_descent_optimizer_v2_fn]) diff --git a/tensorflow/contrib/distribute/python/combinations_test.py b/tensorflow/contrib/distribute/python/combinations_test.py new file mode 100644 index 0000000000..219b24160f --- /dev/null +++ b/tensorflow/contrib/distribute/python/combinations_test.py @@ -0,0 +1,115 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for some testing utils from strategy_test_lib.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.python.eager import test + + +class TestingCombinationsTest(test.TestCase): + + def test_combine(self): + self.assertEqual([{ + "a": 1, + "b": 2 + }, { + "a": 1, + "b": 3 + }, { + "a": 2, + "b": 2 + }, { + "a": 2, + "b": 3 + }], combinations.combine(a=[1, 2], b=[2, 3])) + + def test_add(self): + self.assertEqual( + [{ + "a": 1 + }, { + "a": 2 + }, { + "b": 2 + }, { + "b": 3 + }], + combinations.combine(a=[1, 2]) + + combinations.combine(b=[2, 3])) + + def test_times(self): + c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"]) + c2 = combinations.combine(mode=["eager"], loss=["callable"]) + c3 = combinations.combine(distribution=["d1", "d2"]) + c4 = combinations.times(c3, c1 + c2) + self.assertEqual([ + OrderedDict([("distribution", "d1"), ("loss", "callable"), + ("mode", "graph")]), + OrderedDict([("distribution", "d1"), ("loss", "tensor"), + ("mode", "graph")]), + OrderedDict([("distribution", "d1"), ("loss", "callable"), + ("mode", "eager")]), + OrderedDict([("distribution", "d2"), ("loss", "callable"), + ("mode", "graph")]), + OrderedDict([("distribution", "d2"), ("loss", "tensor"), + ("mode", "graph")]), + OrderedDict([("distribution", "d2"), ("loss", "callable"), + ("mode", "eager")]) + ], c4) + + def test_times_variable_arguments(self): + c1 = combinations.combine(mode=["graph", "eager"]) + c2 = combinations.combine(optimizer=["adam", "gd"]) + c3 = combinations.combine(distribution=["d1", "d2"]) + c4 = combinations.times(c3, c1, c2) + self.assertEqual([ + OrderedDict([("distribution", "d1"), ("mode", "graph"), + ("optimizer", "adam")]), + OrderedDict([("distribution", "d1"), ("mode", "graph"), + ("optimizer", "gd")]), + OrderedDict([("distribution", "d1"), ("mode", "eager"), + ("optimizer", "adam")]), + OrderedDict([("distribution", "d1"), ("mode", "eager"), + ("optimizer", "gd")]), + OrderedDict([("distribution", "d2"), ("mode", "graph"), + ("optimizer", "adam")]), + OrderedDict([("distribution", "d2"), ("mode", "graph"), + ("optimizer", "gd")]), + OrderedDict([("distribution", "d2"), ("mode", "eager"), + ("optimizer", "adam")]), + OrderedDict([("distribution", "d2"), ("mode", "eager"), + ("optimizer", "gd")]) + ], c4) + self.assertEqual( + combinations.combine( + mode=["graph", "eager"], + optimizer=["adam", "gd"], + distribution=["d1", "d2"]), c4) + + def test_overlapping_keys(self): + c1 = combinations.combine(mode=["graph"], loss=["callable", "tensor"]) + c2 = combinations.combine(mode=["eager"], loss=["callable"]) + with self.assertRaisesRegexp(ValueError, ".*Keys.+overlap.+"): + _ = combinations.times(c1, c2) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops.py b/tensorflow/contrib/distribute/python/cross_tower_ops.py new file mode 100644 index 0000000000..cb98351735 --- /dev/null +++ b/tensorflow/contrib/distribute/python/cross_tower_ops.py @@ -0,0 +1,410 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Classes for different algortihms of reduction and broadcasting.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +from tensorflow.contrib.distribute.python import cross_tower_utils +from tensorflow.contrib.distribute.python import values as value_lib +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import device_util + + +def _validate_destinations(destinations): + if not isinstance(destinations, + (value_lib.DistributedValues, six.string_types, list)): + raise ValueError("destinations must be one of a `DistributedValues` object," + " a device string, a list of device strings or None") + + if not destinations: + raise ValueError("destinations can not be empty") + + +def _validate_value_destination_pairs(value_destination_pairs): + # pylint: disable=g-missing-docstring + if not value_destination_pairs: return False + if not isinstance(value_destination_pairs, (list, tuple)): return False + if not all([isinstance(pair, tuple) for pair in value_destination_pairs]): + return False + if not all([isinstance(v[0], value_lib.PerDevice) + for v in value_destination_pairs]): + return False + return True + + +def _get_devices_from(destinations): + if isinstance(destinations, value_lib.DistributedValues): + return list(destinations.devices) + elif isinstance(destinations, six.string_types): + return [device_util.canonicalize(destinations)] + else: + return [ + device_util.canonicalize(destination) for destination in destinations + ] + + +def _devices_match(left, right): + return set(_get_devices_from(left)) == set(_get_devices_from(right)) + + +def _all_devices_match(value_destination_pairs): + if not all([d is None or _devices_match(v, d) + for v, d in value_destination_pairs]): + return False + if not all([_devices_match(v, value_destination_pairs[0][0]) + for v, _ in value_destination_pairs[1:]]): + return False + return True + + +def _simple_broadcast(tensor, destinations): + index = {} + devices = _get_devices_from(destinations) + for d in devices: + with ops.device(d): + index[d] = array_ops.identity(tensor) + return value_lib.Mirrored(index) + + +def _simple_reduce(per_device_value, reduce_to_device, accumulation_fn, + method_string): + # pylint: disable=g-missing-docstring + all_values = [] + count = 0 + for v in per_device_value._index.values(): # pylint: disable=protected-access + if isinstance(v, value_lib.MapOutput): + v_list = v.get() + if not v_list: + continue + count += len(v_list) + # Sum within each device before aggregating across devices. + v = math_ops.add_n(v_list) + else: + count += 1 + all_values.append(v) + if not all_values: + raise ValueError("`per_device_value` must be non-empty") + + with ops.device(reduce_to_device): + with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): + if method_string == "sum": + reduced = accumulation_fn(all_values) + elif method_string == "mean": + reduced = accumulation_fn(all_values) / count + else: + raise ValueError("`method_string` must be 'sum' or 'mean'") + return reduced + + +class CrossTowerOps(object): + """Base class for cross-tower reduction and broadcasting algorithms.""" + + def __init__(self): + pass + + def reduce(self, method_string, per_device_value, destinations=None): + """Reduce `per_device_value` to `destinations`. + + It runs the reduction operation defined by `method_string` and put the + result on `destinations`. + + Args: + method_string: either 'sum' or 'mean' specifying the reduction method. + per_device_value: a PerDevice object. + destinations: the reduction destinations. + + Returns: + a Mirrored object. + + Raises: + ValueError: if per_device_value is not a PerDevice object. + """ + if not isinstance(per_device_value, value_lib.PerDevice): + raise ValueError("`per_device_value` must be a `PerDevice` object.") + if destinations is not None: + _validate_destinations(destinations) + return self._reduce(method_string, per_device_value, destinations) + + def batch_reduce(self, method_string, value_destination_pairs): + """Reduce PerDevice objects in a batch. + + Reduce each first element in `value_destination_pairs` to each second + element which indicates the destinations. + + Args: + method_string: either 'sum' or 'mean' specifying the reduction method. + value_destination_pairs: a list or a tuple of tuples of PerDevice objects + and destinations. If a destionation is None, then the destinations + are set to match the devices of the input PerDevice object. + + Returns: + a list of Mirrored objects. + + Raises: + ValueError: if `value_destination_pairs` is not a list or a tuple of + tuples of PerDevice objects and destinations + """ + if not _validate_value_destination_pairs(value_destination_pairs): + raise ValueError("`value_destination_pairs` must be a list or a tuple of " + "tuples of PerDevice objects and destinations") + for _, d in value_destination_pairs: + if d is not None: + _validate_destinations(d) + + return self._batch_reduce(method_string, value_destination_pairs) + + def broadcast(self, tensor, destinations): + """Broadcast the `tensor` to destinations. + + Args: + tensor: the tensor to broadcast. + destinations: the broadcast destinations. + + Returns: + a Mirrored object. + """ + _validate_destinations(destinations) + return self._broadcast(tensor, destinations) + + def _reduce(self, method_string, per_device_value, destinations): + raise NotImplementedError( + "_reduce method must be implemented in descendants.") + + def _batch_reduce(self, method_string, value_destination_pairs): + raise NotImplementedError( + "_batch_reduce method must be implemented in descendants.") + + def _broadcast(self, tensor, destinations): + return _simple_broadcast(tensor, destinations) + + +class ReductionToOneDeviceCrossTowerOps(CrossTowerOps): + """Always do reduction to one device first and then do broadcasting. + + Batch reduction is done by reduction on each element one by one. + """ + + def __init__(self, reduce_to_device=None, accumulation_fn=math_ops.add_n): + """Constructor. + + Args: + reduce_to_device: the intermediate device to reduce to. If None, reduce + to the first device in `destinations` of the reduce() method. + accumulation_fn: a function that does accumulation. + """ + self.reduce_to_device = reduce_to_device + self.accumulation_fn = accumulation_fn + super(ReductionToOneDeviceCrossTowerOps, self).__init__() + + def _reduce(self, method_string, per_device_value, destinations): + devices = _get_devices_from(destinations or per_device_value) + reduce_to_device = self.reduce_to_device or devices[0] + reduced = _simple_reduce(per_device_value, reduce_to_device, + self.accumulation_fn, method_string) + return self.broadcast(reduced, devices) + + def _batch_reduce(self, method_string, value_destination_pairs): + return [self._reduce(method_string, t, destinations=v) + for t, v in value_destination_pairs] + + +def _group_value_by_device(per_device_values): + """Group values into sublists by their devices. + + This grouping is needed to call the allreduce library. + + Args: + per_device_values: a list of PerDevice obejcts. + + Returns: + a list of lists, each sublist has components for its corresponding device of + PerDevice objects, paired with a None. + """ + destinations = per_device_values[0].devices + grouped = [[] for _ in range(len(destinations))] + for per_device_value in per_device_values: + # pylint: disable=protected-access + for i, v in enumerate(per_device_value._index.values()): + assert per_device_value.devices == destinations + grouped[i].append((v, None)) + return grouped + + +def _ungroup_and_make_mirrored(grouped_reduced, destinations, method_string): + """Ungroup results from allreduce and make Mirrored objects. + + Each allreduce result would be divided by the number of destinations before + Mirrored objects are created if method_string is "mean". + """ + index = [{} for _ in range(len(grouped_reduced[0]))] + for d, per_device_reduced in enumerate(grouped_reduced): + for i, (v, _) in enumerate(per_device_reduced): + if method_string == "mean": + index[i][destinations[d]] = v / len(destinations) + else: + index[i][destinations[d]] = v + return [value_lib.Mirrored(v) for v in index] + + +class AllReduceCrossTowerOps(CrossTowerOps): + """Reduction using all reduce.""" + + def __init__(self, all_reduce_alg="nccl", gradient_repacking=1): + """Initialize this subclass of CrossTowerOps with allreduce. + + Gradients would be repacked for more efficient cross-device transportation. + + Args: + all_reduce_alg: the allreduce algorithm to use, currently only "nccl" or + "hierarchical_copy" are supported. + gradient_repacking: If zero, no gradient repacking would be done. If + non-zero value it specifies the number of split packs that will be + formed. + """ + self.all_reduce_alg = all_reduce_alg + self.gradient_repacking = gradient_repacking + super(AllReduceCrossTowerOps, self).__init__() + + def _reduce(self, method_string, per_device_value, destinations): + if ((destinations is None or _devices_match(per_device_value, destinations)) + and not context.executing_eagerly()): + return self._batch_all_reduce(method_string, [per_device_value])[0] + else: + devices = _get_devices_from(destinations or per_device_value) + reduce_to_device = devices[0] + reduced = _simple_reduce(per_device_value, reduce_to_device, + math_ops.add_n, method_string) + return self.broadcast(reduced, devices) + + def _batch_reduce(self, method_string, value_destination_pairs): + if (_all_devices_match(value_destination_pairs) and + not context.executing_eagerly()): + return self._batch_all_reduce(method_string, + [v[0] for v in value_destination_pairs]) + else: + if not context.executing_eagerly(): + logging.warning("Efficient batch_reduce is not supported if " + "destinations are different.") + return [ + self._reduce(method_string, t, destinations=v) + for t, v in value_destination_pairs + ] + + def _batch_all_reduce(self, method_string, per_device_values): + """All reduce algorithm in a batch.""" + logging.info("batch_all_reduce invoked for batches size = %d with algorithm" + " = %s and gradient repacking = %d", len(per_device_values), + self.all_reduce_alg, self.gradient_repacking) + destinations = per_device_values[0].devices + grouped = _group_value_by_device(per_device_values) + if self.gradient_repacking == 0: + if self.all_reduce_alg == "nccl": + reduced = cross_tower_utils.aggregate_gradients_using_nccl(grouped) + else: + # TODO(yuefengz): check that gpu ids in `destinations` are in ascending + # order. + reduced = ( + cross_tower_utils.aggregate_gradients_using_hierarchical_copy( + destinations, grouped)) + else: + device_grad_packs = [] + all_tower_shapes = [] + all_tower_sizes = [] + for tower_grads_and_vars in grouped: + with ops.colocate_with(tower_grads_and_vars[0][0]): + # Flatten all the grads. + flat_grads = [ + array_ops.reshape(g, [-1]) for g, _ in tower_grads_and_vars + ] + # Remember the original shape of all the grads. + tower_shapes = [array_ops.shape(g) for g, _ in tower_grads_and_vars] + # Remember the original sizes of all the grads. + tower_sizes = [array_ops.size(g) for g, _ in tower_grads_and_vars] + # Concat all the flat grads into a big flat tensor. + concat_grads = array_ops.concat(flat_grads, 0) + + # Split the big tensor into num_splits packs. In cases where the + # total size is not divisible num_splits, the last pack gets + # more elements. + # TODO(zhengxq): it is possible to optimize away the additional + # data movement by copying along the original variable boundary. + # TODO(zhengxq): it is also possible to optimize away all the concat + # as well. + num_splits = self.gradient_repacking + total_grad_size = array_ops.size(concat_grads) + split_size = total_grad_size // num_splits + split_size_last = total_grad_size - split_size * (num_splits - 1) + split_sizes = [split_size] * (num_splits - 1) + [split_size_last] + grad_packs = array_ops.split(concat_grads, split_sizes) + + # Ready to aggregate the repacked gradients, with fake variables. + # TODO(zhengxq): It is hacky to have to use fake variables. + # We should remove the need for variables in + # aggregate_gradients_using*. + device_grad_packs.append(zip(grad_packs, [None] * num_splits)) + all_tower_shapes.append(tower_shapes) + all_tower_sizes.append(tower_sizes) + + # The actual aggregation of the repacked gradients. Note that they are + # sharded among different aggregation trees. So it is important to + # strike the balance on num_splits. + if self.all_reduce_alg == "nccl": + summed_device_grad_packs = ( + cross_tower_utils.aggregate_gradients_using_nccl(device_grad_packs)) + else: + summed_device_grad_packs = ( + cross_tower_utils.aggregate_gradients_using_hierarchical_copy( + destinations, device_grad_packs)) + + aggregated_device_grads = [] + for (summed_tower_grad_packs, tower_grads_and_vars, tower_shapes, + tower_sizes) in zip(summed_device_grad_packs, grouped, + all_tower_shapes, all_tower_sizes): + # pylint: enable=line-too-long + # Reverse the packing operations in the previous steps. Form the + # summed gradients back into their original shapes. + with ops.colocate_with(summed_tower_grad_packs[0][0]): + # Form a list of the summed grad packs. + device_grad_packs = [g for g, _ in summed_tower_grad_packs] + + # Concat them back into a big flat tensor. + device_grads_concat = array_ops.concat(device_grad_packs, 0) + + # Split the tensors back into their original sizes. + grads_with_sizes = array_ops.split(device_grads_concat, tower_sizes) + + # Reshape the tensors back into their original shapes. + grads_with_shapes = [ + array_ops.reshape(grad, shape) + for shape, grad in zip(tower_shapes, grads_with_sizes) + ] + + # Form the list with the original list of variables. + summed_tower_grads = [ + (g, v) + for g, (_, v) in zip(grads_with_shapes, tower_grads_and_vars) + ] + aggregated_device_grads.append(summed_tower_grads) + reduced = aggregated_device_grads + return _ungroup_and_make_mirrored(reduced, per_device_values[0].devices, + method_string) diff --git a/tensorflow/contrib/distribute/python/cross_tower_ops_test.py b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py new file mode 100644 index 0000000000..bb43147f5e --- /dev/null +++ b/tensorflow/contrib/distribute/python/cross_tower_ops_test.py @@ -0,0 +1,185 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for CrossTowerOps.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python import cross_tower_ops as cross_tower_ops_lib +from tensorflow.contrib.distribute.python import values as value_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def _make_per_device(values, devices): + devices = cross_tower_ops_lib._get_devices_from(devices) + assert len(values) == len(devices) + index = {} + for d, v in zip(devices, values): + with ops.device(d): + placed_v = array_ops.identity(v) + index[d] = placed_v + return value_lib.PerDevice(index) + + +# pylint: disable=g-doc-args,g-doc-return-or-yield +def _fake_mirrored(value, devices): + """Create a faked Mirrored object for testing. + + All components of the returned Mirrored have the same objects, which is not + true in reality. + """ + devices = cross_tower_ops_lib._get_devices_from(devices) + return value_lib.Mirrored( + {d: v for d, v in zip(devices, [value] * len(devices))}) + + +_cpu_device = "/device:CPU:0" + + +class CrossTowerOpsTest(test.TestCase, parameterized.TestCase): + + def _assert_value_equal(self, left, right): + if isinstance(left, list): + for l, r in zip(left, right): + self._assert_value_equal(l, r) + else: + self.assertEqual(type(left), type(right)) + self.assertEqual(left.devices, right.devices) + if context.executing_eagerly(): + self.assertEqual([v.numpy() for v in left._index.values()], + list(right._index.values())) + else: + with self.test_session() as sess: + self.assertEqual( + sess.run(list(left._index.values())), list(right._index.values())) + + # TODO(yuefengz): decouple the num_gpus check from distribution in + # combinations module so that we can pass in devices instead of a distribution + # strategy. + reduction_to_one_combinations = combinations.combine( + cross_tower_ops=[ + combinations.NamedObject( + "DefaultReductionToOneDeviceCrossTowerOps", + cross_tower_ops_lib.ReductionToOneDeviceCrossTowerOps()), + combinations.NamedObject( + "ReductionToCPUDeviceCrossTowerOps", + cross_tower_ops_lib.ReductionToOneDeviceCrossTowerOps( + reduce_to_device=_cpu_device)), + combinations.NamedObject( + "AccumulateNCrossTowerOp", + cross_tower_ops_lib.ReductionToOneDeviceCrossTowerOps( + accumulation_fn=math_ops.accumulate_n)), + ], + distribution=[ + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus + ], + mode=["graph", "eager"]) + allreduce_combinations = combinations.combine( + cross_tower_ops=[ + combinations.NamedObject("AllReduce", + cross_tower_ops_lib.AllReduceCrossTowerOps( + "nccl", 1)), + combinations.NamedObject("HierarchicalCopy", + cross_tower_ops_lib.AllReduceCrossTowerOps( + "hierarchical_copy", 8)), + combinations.NamedObject("AllReduceNoGradientRepacking", + cross_tower_ops_lib.AllReduceCrossTowerOps( + "nccl", 0)), + combinations.NamedObject("HierarchicalCopyNoGradientRepacking", + cross_tower_ops_lib.AllReduceCrossTowerOps( + "hierarchical_copy", 0)) + ], + distribution=[ + combinations.mirrored_strategy_with_two_gpus + ], + mode=["graph", "eager"]) + + @combinations.generate(reduction_to_one_combinations + allreduce_combinations) + def testReductionAndBroadcast(self, cross_tower_ops, distribution): + devices = distribution.worker_devices + + values = [constant_op.constant(float(d)) for d in range(len(devices))] + per_device = _make_per_device(values, devices) + mean = (len(devices) - 1.) / 2. + + values_2 = [constant_op.constant(d + 1.0) for d in range(len(devices))] + per_device_2 = _make_per_device(values_2, devices) + mean_2 = mean + 1. + + destination_mirrored = _fake_mirrored(1., devices) + destination_different = _fake_mirrored(1., _cpu_device) + destination_str = _cpu_device + destination_list = devices + + all_destinations = [ + None, destination_mirrored, destination_different, destination_str, + destination_list + ] + + # test reduce() + for destinations in all_destinations: + self._assert_value_equal( + cross_tower_ops.reduce("mean", per_device, destinations=destinations), + _fake_mirrored(mean, destinations or per_device)) + self._assert_value_equal( + cross_tower_ops.reduce( + "mean", per_device_2, destinations=destinations), + _fake_mirrored(mean_2, destinations or per_device)) + self._assert_value_equal( + cross_tower_ops.reduce("sum", per_device, destinations=destinations), + _fake_mirrored(mean * len(devices), destinations or per_device)) + self._assert_value_equal( + cross_tower_ops.reduce( + "sum", per_device_2, destinations=destinations), + _fake_mirrored(mean_2 * len(devices), destinations or per_device)) + + # test batch_reduce() + for d1, d2 in itertools.product(all_destinations, all_destinations): + self._assert_value_equal( + cross_tower_ops.batch_reduce( + "mean", [(per_device, d1), (per_device_2, d2)]), + [_fake_mirrored(mean, d1 or per_device), + _fake_mirrored(mean_2, d2 or per_device_2)]) + self._assert_value_equal( + cross_tower_ops.batch_reduce( + "sum", [(per_device, d1), (per_device_2, d2)]), + [_fake_mirrored(mean * len(devices), d1 or per_device), + _fake_mirrored(mean_2 * len(devices), d2 or per_device_2)]) + + # test broadcast() + for destinations in all_destinations: + if destinations is None: + continue + else: + self._assert_value_equal( + cross_tower_ops.broadcast(constant_op.constant(1.), destinations), + _fake_mirrored(1., destinations)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/cross_tower_utils.py b/tensorflow/contrib/distribute/python/cross_tower_utils.py new file mode 100644 index 0000000000..93acd835d7 --- /dev/null +++ b/tensorflow/contrib/distribute/python/cross_tower_utils.py @@ -0,0 +1,153 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for cross_tower_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib import nccl +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def aggregate_gradients_using_nccl(tower_grads): + """Aggregate gradients using nccl allreduce.""" + agg_all_g_and_v = [] + for single_g_and_v in zip(*tower_grads): + single_grads = [g for g, _ in single_g_and_v] + agg_grads = nccl.all_sum(single_grads) + agg_all_g_and_v.append( + [(g, v) for g, (_, v) in zip(agg_grads, single_g_and_v)]) + + agg_all_g_and_v = list(zip(*agg_all_g_and_v)) + + return agg_all_g_and_v + + +def aggregate_gradients_using_hierarchical_copy(avail_devices, tower_grads): + """Aggregate gradients using hierarchical copies. + + Args: + avail_devices: available GPU devices. + tower_grads: List of lists of (gradient, variable) tuples. The outer list + is over towers. The inner list is over individual gradients. + + Returns: + The list of (aggregated_gradient, variable), where the gradient has been + summed across all towers and the variable is chosen from the first tower. + """ + # This only works for DGX-1 type of machine topology + # Device peer to peer matrix + # DMA: 0 1 2 3 4 5 6 7 + # 0: Y Y Y Y Y N N N + # 1: Y Y Y Y N Y N N + # 2: Y Y Y Y N N Y N + # 3: Y Y Y Y N N N Y + # 4: Y N N N Y Y Y Y + # 5: N Y N N Y Y Y Y + # 6: N N Y N Y Y Y Y + # 7: N N N Y Y Y Y Y + agg_grads = [] + num_devices = len(avail_devices) + # In the special case of DGX-1 machine topology, the two groups have equal + # size. + group_size = num_devices // 2 + for i, single_grads in enumerate(zip(*tower_grads)): + group_0_main_device = i % num_devices + group_1_main_device = (group_0_main_device + group_size) % num_devices + if group_0_main_device < group_size: + group_0_begin = 0 + group_1_begin = group_size + else: + group_0_begin = group_size + group_1_begin = 0 + + # Aggregate the first group. + group_0_device_grads = single_grads[group_0_begin: + group_0_begin + group_size] + with ops.device(avail_devices[group_0_main_device]): + group_0_agg_grads, _ = aggregate_single_gradient_using_copy( + group_0_device_grads, False, False) + + # Aggregate the second group. + group_1_device_grads = single_grads[group_1_begin: + group_1_begin + group_size] + with ops.device(avail_devices[group_1_main_device]): + group_1_agg_grads, _ = aggregate_single_gradient_using_copy( + group_1_device_grads, False, False) + + # Aggregate between the groups. + with ops.device(avail_devices[group_0_main_device]): + (agg_total_grads, _), _ = aggregate_single_gradient_using_copy( + [group_0_agg_grads, group_1_agg_grads], False, False) + + # Broadcast the result back into the root of each group. + with ops.device(avail_devices[group_0_main_device]): + group_0_agg_grads_bcast = array_ops.identity(agg_total_grads) + with ops.device(avail_devices[group_1_main_device]): + group_1_agg_grads_bcast = array_ops.identity(agg_total_grads) + + agg_grads_bcast = [] + for j in range(len(single_grads)): + with ops.device(avail_devices[j]): + # Broadcast the result back to each member in the group from the root. + if (group_0_main_device < group_size) == (j < group_size): + src_device_grad = group_0_agg_grads_bcast + else: + src_device_grad = group_1_agg_grads_bcast + agg_grads_bcast.append(array_ops.identity(src_device_grad)) + + agg_grads.append( + [(g, v) for g, (_, v) in zip(agg_grads_bcast, single_grads)]) + + agg_grads = list(zip(*agg_grads)) + + return agg_grads + + +def aggregate_single_gradient_using_copy(grad_and_vars, use_mean, + check_inf_nan): + """Calculate the average gradient for a shared variable across all towers. + + Note that this function provides a synchronization point across all towers. + + Args: + grad_and_vars: A list or tuple of (gradient, variable) tuples. Each + (gradient, variable) pair within the outer list represents the gradient + of the variable calculated for a single tower, and the number of pairs + equals the number of towers. + use_mean: if True, mean is taken, else sum of gradients is taken. + check_inf_nan: check grads for nans and infs. + + Returns: + The tuple ([(average_gradient, variable),], has_nan_or_inf) where the + gradient has been averaged across all towers. The variable is chosen from + the first tower. The has_nan_or_inf indicates the grads has nan or inf. + """ + grads = [g for g, _ in grad_and_vars] + grad = math_ops.add_n(grads) + + if use_mean and len(grads) > 1: + grad = array_ops.multiply(grad, 1.0 / len(grads)) + + v = grad_and_vars[0][1] + if check_inf_nan: + has_nan_or_inf = array_ops.logical_not( + array_ops.reduce_all(array_ops.is_finite(grads))) + return (grad, v), has_nan_or_inf + else: + return (grad, v), None diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py new file mode 100644 index 0000000000..0fa90df79b --- /dev/null +++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py @@ -0,0 +1,279 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for running legacy optimizer code with DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.contrib.distribute.python.single_loss_example import batchnorm_example +from tensorflow.contrib.distribute.python.single_loss_example import minimize_loss_example +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.ops.losses import losses_impl + + +class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.times( + combinations.distributions_and_v1_optimizers(), + combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + + combinations.combine(mode=["eager"], use_callable_loss=[True]))) + def testTrainNetwork(self, distribution, optimizer_fn, + use_callable_loss=True): + with distribution.scope(): + model_fn, dataset, layer = minimize_loss_example( + optimizer_fn, + use_bias=True, + use_callable_loss=use_callable_loss) + + iterator = distribution.distribute_dataset(dataset) + + def run_step(): + return distribution.group( + distribution.call_for_each_tower( + model_fn, iterator.get_next(), run_concurrently=layer.built)) + + if not context.executing_eagerly(): + with self.test_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(variables_lib.global_variables_initializer()) + + weights, biases = [], [] + for _ in range(10): + run_step() + + weights.append(self.evaluate(distribution.fetch(layer.kernel))) + biases.append(self.evaluate(distribution.fetch(layer.bias))) + + error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) + is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) + self.assertTrue(is_not_increasing) + + @combinations.generate( + combinations.times( + combinations.distributions_and_v1_optimizers() + + combinations.distributions_and_v2_optimizers(), + combinations.combine(mode=["graph", "eager"]))) + def testOptimizerInsideModelFn(self, distribution, optimizer_fn): + created_variables = [] + trainable_variables = [] + + def appending_creator(next_creator, *args, **kwargs): + v = next_creator(*args, **kwargs) + created_variables.append(v.name) + if "trainable" in kwargs and kwargs["trainable"]: + trainable_variables.append(v.name) + return v + + # Creator scope needs to be set before it's used inside + # `distribution.scope`. + with variable_scope.variable_creator_scope( + appending_creator), distribution.scope(): + model_fn, dataset, layer = minimize_loss_example( + optimizer_fn, + use_bias=True, + use_callable_loss=True, + create_optimizer_inside_model_fn=True) + + iterator = distribution.distribute_dataset(dataset) + + def run_step(): + return distribution.group( + distribution.call_for_each_tower( + model_fn, iterator.get_next(), run_concurrently=layer.built)) + + if not context.executing_eagerly(): + with self.test_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(variables_lib.global_variables_initializer()) + + run_step() + + def get_expected_variables(optimizer_fn, num_parameter_devices): + variables_map = { + "GradientDescent": ["dense/kernel", "dense/bias"], + "Adam": [ + "dense/kernel", "dense/bias", "beta1_power", "beta2_power", + "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam", + "dense/bias/Adam_1" + ] + } + variables = variables_map[optimizer_fn().get_name()] + variables.extend([ + v + "/replica_{}".format(replica) + for v in variables + for replica in range(1, num_parameter_devices) + ]) + return set([v + ":0" for v in variables]) + + self.assertEqual( + get_expected_variables(optimizer_fn, + len(distribution.parameter_devices)), + set(created_variables)) + + @combinations.generate( + combinations.times(combinations.distributions_and_v1_optimizers(), + combinations.combine( + mode=["graph", "eager"], + momentum=[0.8, 0.9, 0.99], + renorm=[False, True]))) + def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn, momentum, + renorm): + """Verifies that moving mean updates are reduced across towers.""" + with distribution.scope(): + num_towers = len(distribution.worker_devices) + model_fn, dataset, batchnorm = batchnorm_example( + optimizer_fn, + batch_per_epoch=num_towers, + momentum=momentum, + renorm=renorm) + + # Disable prefetching since that makes the specific input on each device + # to be non deterministic, and this test relies on specific input being + # on each device. + if isinstance(distribution, mirrored_strategy.MirroredStrategy): + distribution._prefetch_on_device = False + iterator = distribution.distribute_dataset(dataset) + + def run_step(): + return control_flow_ops.group( + distribution.unwrap( + distribution.call_for_each_tower( + model_fn, + iterator.get_next(), + run_concurrently=batchnorm.built)) + + ops.get_collection(ops.GraphKeys.UPDATE_OPS)) + + if not context.executing_eagerly(): + with self.test_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(variables_lib.global_variables_initializer()) + + expected_moving_means = [0.] * 8 + + def averaged_batch_mean(i): + # Each batch has shape [16, 8] where the ith element in jth list is + # (8 * j + i + tower_id * 100). So the batch mean in each tower is + # (60 + i + tower_id * 100). So here comes its batch mean over all + # towers: + return 60. + i + (num_towers - 1.) / 2. * 100. + + for _ in range(10): + run_step() + moving_means = self.evaluate(distribution.fetch(batchnorm.moving_mean)) + + # We make sure that the moving_mean is updated as if the sample mean is + # calculated over all towers. + for i, expected_moving_mean in enumerate(expected_moving_means): + expected_moving_means[i] -= (( + expected_moving_mean - averaged_batch_mean(i)) * (1.0 - momentum)) + self.assertNear(expected_moving_means[i], moving_means[i], 0.0001) + + @combinations.generate( + combinations.times( + combinations.combine( + distribution=[combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus], + optimizer_fn=[combinations.gradient_descent_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v2_fn], + loss_reduction=[losses_impl.Reduction.SUM, + losses_impl.Reduction.MEAN, + losses_impl.Reduction.SUM_OVER_BATCH_SIZE, + losses_impl.Reduction.SUM_OVER_NONZERO_WEIGHTS]), + combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + + combinations.combine(mode=["eager"], use_callable_loss=[True]))) + def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction, + use_callable_loss): + with distribution.scope(): + all_vars = [] + + def model_fn(x, y): + + def loss_fn(): + # Use fixed initialization to make the steps deterministic. + w = variable_scope.get_variable("w", initializer=[[2.]]) + all_vars.append(w) + predict = math_ops.matmul(x, w) + return losses_impl.mean_squared_error( + y, predict, reduction=loss_reduction) + + optimizer = optimizer_fn() # GradientDescent with 0.2 learning rate + + if use_callable_loss: + return optimizer.minimize(loss_fn) + else: + return optimizer.minimize(loss_fn()) + + features = dataset_ops.Dataset.from_tensors([[2.], [7.]]) + labels = dataset_ops.Dataset.from_tensors([[6.], [21.]]) + dataset = dataset_ops.Dataset.zip((features, labels)).repeat() + iterator = distribution.distribute_dataset(dataset) + + def run_step(): + return distribution.group( + distribution.call_for_each_tower( + model_fn, *iterator.get_next(), run_concurrently=False)) + + if not context.executing_eagerly(): + with self.test_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(variables_lib.global_variables_initializer()) + + run_step() + + self.assertEqual(distribution.num_towers, len(all_vars)) + v = all_vars[0] + self.assertTrue(all([v is vi for vi in all_vars[1:]])) + weight = numpy.squeeze(self.evaluate(distribution.fetch(v))) + # Our model is: + # predict = x * w + # loss = (predict - y)^2 + # dloss/dpredict = 2*(predict - y) + # dloss/dw = 2 * x^T @ (predict - y) + # For our batch size of 2, assuming sum loss reduction: + # x = [2, 7] + # y = [6, 21] + # w_initial = 2 + # predict = [4, 14] + # predict - y = [-2, -7] + # dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106 + # So unreplicated the update to w with lr=0.2 is -0.2 * -106 = 21.2 + # with sum loss reduction, or 10.6 with mean. + if loss_reduction == losses_impl.Reduction.SUM: + # Note that the "distribution.num_towers" factor will go away once + # we split the input across towers, instead of pulling a complete + # batch of input per tower. + self.assertNear(weight, 2 + 21.2 * distribution.num_towers, 0.0001) + else: + # One of the mean loss reductions. + self.assertNear(weight, 2 + 10.6, 0.0001) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py new file mode 100644 index 0000000000..8cf83c52d8 --- /dev/null +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -0,0 +1,486 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Class MirroredStrategy implementing DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import threading +import six + +from tensorflow.contrib.distribute.python import cross_tower_ops as cross_tower_ops_lib +from tensorflow.contrib.distribute.python import shared_variable_creator +from tensorflow.contrib.distribute.python import values +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.eager import context +from tensorflow.python.eager import tape +from tensorflow.python.framework import device as tf_device +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import coordinator +from tensorflow.python.training import device_util +from tensorflow.python.training import distribute as distribute_lib + + +# TODO(josh11b): Replace asserts in this file with if ...: raise ... + + +def _cpu_device(device): + cpu_device = tf_device.DeviceSpec.from_string(device) + cpu_device.merge_from(tf_device.DeviceSpec(device_type="CPU", device_index=0)) + return cpu_device.to_string() + + +class _RequestedStop(Exception): + pass + + +class MirroredStrategy(distribute_lib.DistributionStrategy): + """Mirrors vars to distribute across multiple devices on a single machine. + + This strategy uses one tower per device and sync replication. + """ + + def __init__(self, + devices=None, + num_gpus=None, + cross_tower_ops=None, + prefetch_on_device=None): + super(MirroredStrategy, self).__init__() + # Convert `num_gpus` into `devices`, shouldn't specify both. + if devices is None: + if num_gpus is None: + num_gpus = context.num_gpus() + devices = ["/device:GPU:%d" % d for d in range(num_gpus)] + elif num_gpus is not None: + raise ValueError("Must only specify one of `devices` and `num_gpus`.") + + assert devices, "Must specify at least one device." + assert len(set(devices)) == len(devices), ( + "No duplicates allowed in `devices` argument.") + # TODO(josh11b): Require at least 2 devices? + self._devices = devices + self._canonical_device_set = set( + [device_util.canonicalize(d) for d in devices]) + self._device_index = values.PerDevice( + dict((d, i) for i, d in enumerate(devices))) + self.cross_tower_ops = ( + cross_tower_ops or + cross_tower_ops_lib.ReductionToOneDeviceCrossTowerOps()) + self._prefetch_on_device = prefetch_on_device + + def _create_variable(self, next_creator, *args, **kwargs): + """Create a mirrored variable. See `DistributionStrategy.scope`.""" + # Figure out what collections this variable should be added to. + # We'll add the MirroredVariable to those collections instead. + collections = kwargs.pop("collections", None) + if collections is None: + collections = [ops.GraphKeys.GLOBAL_VARIABLES] + kwargs["collections"] = [] + + colocate_with = kwargs.pop("colocate_with", None) + devices = self._get_devices_from(colocate_with) + + tower_local = kwargs.pop("tower_local_reduce_method", None) + if tower_local is not None: + kwargs["trainable"] = False + + # TODO(josh11b,apassos): It would be better if variable initialization + # was never recorded on the tape instead of having to do this manually + # here. + with tape.stop_recording(): + index = {} + for i, d in enumerate(devices): + with ops.device(d): + if i > 0: + # Give replicas meaningful distinct names: + var0name = index[devices[0]].name.split(":")[0] + kwargs["name"] = "%s/replica_%d" % (var0name, i) + # Initialize replicas with the same value: + if context.executing_eagerly(): + initial_value = index[devices[0]].value() + else: + initial_value = index[devices[0]].initial_value + kwargs["initial_value"] = array_ops.identity(initial_value) + with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): + v = next_creator(*args, **kwargs) + assert not isinstance(v, values.DistributedVariable) + index[d] = v + + if tower_local is None: + result = values.MirroredVariable(index, index[devices[0]]) + else: + result = values.TowerLocalVariable( + index, index[devices[0]], tower_local) + + if not context.executing_eagerly(): + g = ops.get_default_graph() + # If "trainable" is True, next_creator() will add the member variables + # to the TRAINABLE_VARIABLES collection, so we manually remove + # them and replace with the MirroredVariable. We can't set + # "trainable" to False for next_creator() since that causes functions + # like implicit_gradients to skip those variables. + if kwargs.get("trainable", True): + collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) + l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) + for v in index.values(): + l.remove(v) + g.add_to_collections(collections, result) + return result + + def distribute_dataset(self, dataset): + per_device_dataset = values.PerDeviceDataset( + dataset, self._devices, self._prefetch_on_device) + return per_device_dataset.make_one_shot_iterator() + + def _broadcast(self, tensor, destinations): + # TODO(josh11b): In eager mode, use one thread per device, or async mode. + return self.cross_tower_ops.broadcast(tensor, destinations or self._devices) + + def _call_for_each_tower(self, fn, *args, **kwargs): + """Run `fn` in separate threads, once per tower/worker device. + + Args: + fn: function to run (will be run once per device, each in its own thread). + *args: positional arguments for `fn` + **kwargs: keyword arguments for `fn`. + `"run_concurrently"`: Boolean indicating whether executions of `fn` + can be run concurrently (under eager execution only), defaults to + `True`. + + Returns: + Merged return value of `fn` across all towers. + + Raises: + RuntimeError: If fn() calls get_tower_context().merge_call() a different + number of times for when called for different devices. + """ + run_concurrently = kwargs.pop("run_concurrently", True) + if not context.executing_eagerly(): + # Lots of TF library code isn't thread-safe in graph mode, and + # there is little to be gained by turning on multithreading when + # constructing a graph. + run_concurrently = False + # Needed for per-thread device, etc. contexts in graph mode. + ops.get_default_graph().switch_to_thread_local() + elif run_concurrently is None: + run_concurrently = True + + coord = coordinator.Coordinator( + clean_stop_exception_types=(_RequestedStop,)) + + shared_variable_store = {} + + # TODO(isaprykin): Create these threads once instead of during every run() + # call. + threads = [] + for index, d in enumerate(self._devices): + variable_creator_fn = shared_variable_creator.make_fn( + shared_variable_store, index) + t = MirroredStrategy._MirroredTowerThread( + self, coord, d, variable_creator_fn, fn, + *values.select_device(d, args), **values.select_device(d, kwargs)) + threads.append(t) + + for t in threads: + t.start() + + # When `fn` starts `should_run` event is set on _MirroredTowerThread + # (`MTT`) threads. The execution waits until + # `MTT.has_paused` is set, which indicates that either `fn` is + # complete or a `get_tower_context().merge_call()` is called. If `fn` is + # complete, then `MTT.done` is set to True. Otherwise, arguments + # of `get_tower_context().merge_call` from all paused threads are grouped + # and the `merge_fn` is performed. Results of the + # `get_tower_context().merge_call` are then set to `MTT.merge_result`. + # Each such `get_tower_context().merge_call` call returns the + # `MTT.merge_result` for that thread when `MTT.should_run` event + # is reset again. Execution of `fn` resumes. + + try: + with coord.stop_on_exception(): + all_done = False + while not all_done and not coord.should_stop(): + done = [] + if run_concurrently: + for t in threads: + t.should_run.set() + for t in threads: + t.has_paused.wait() + t.has_paused.clear() + if coord.should_stop(): + return None + done.append(t.done) + else: + for t in threads: + t.should_run.set() + t.has_paused.wait() + t.has_paused.clear() + if coord.should_stop(): + return None + done.append(t.done) + if coord.should_stop(): + return None + all_done = all(done) + if not all_done: + if any(done): + raise RuntimeError("Some towers made a different number of " + "tower_context().merge_call() calls.") + # get_tower_context().merge_call() case + merge_args = values.regroup( + {t.device: t.merge_args for t in threads}) + merge_kwargs = values.regroup( + {t.device: t.merge_kwargs for t in threads}) + merge_result = threads[0].merge_fn( + self, *merge_args, **merge_kwargs) + for t in threads: + t.merge_result = values.select_device(t.device, merge_result) + finally: + for t in threads: + t.should_run.set() + coord.join(threads) + + return values.regroup({t.device: t.main_result for t in threads}) + + def map(self, map_over, fn, *args, **kwargs): + # TODO(josh11b): In eager mode, use one thread per device. + index = {} + i = 0 + for m in map_over: + d = self._devices[i % len(self._devices)] + with ops.device(d): + l = index.get(d, []) + l.append(fn(m, + *values.select_device_mirrored(d, args), + **values.select_device_mirrored(d, kwargs))) + index[d] = l + # TODO(josh11b): Need a values.regroup equivalent that handles MapOutput + # in addition to PerDevice data. + return values.PerDevice({k: values.MapOutput(v) for k, v in index.items()}) + + def _reduce(self, method_string, value, destinations): + if len(self._devices) == 1 and not isinstance(value, values.PerDevice): + value = values.PerDevice({self._devices[0]: value}) + assert isinstance(value, values.PerDevice) + return self.cross_tower_ops.reduce( + method_string, value, destinations=destinations) + + def _batch_reduce(self, method_string, value_destination_pairs): + return self.cross_tower_ops.batch_reduce(method_string, + value_destination_pairs) + + def _update(self, var, fn, *args, **kwargs): + # TODO(josh11b): Also support TowerLocalVariables here? If so, args and + # kwargs don't need to be mirrored. + assert isinstance(var, values.MirroredVariable) + # TODO(josh11b): In eager mode, use one thread per device. + updates = {} + for d, v in var._index.items(): # pylint: disable=protected-access + name = "update_%d" % self._device_index.get(d) + with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name): + updates[d] = fn(v, + *values.select_device_mirrored(d, args), + **values.select_device_mirrored(d, kwargs)) + return values.regroup(updates, values.Mirrored) + + def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + assert isinstance(colocate_with, list) + # TODO(josh11b): In eager mode, use one thread per device. + updates = {} + for d in colocate_with: + name = "update_%d" % self._device_index.get(d) + with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name): + updates[d] = fn(*values.select_device_mirrored(d, args), + **values.select_device_mirrored(d, kwargs)) + return values.regroup(updates, values.Mirrored) + + def _fetch(self, val, destination, fn): + """Return a copy of `val` or `fn(val)` on `destination`.""" + assert isinstance(destination, six.string_types) + if isinstance(val, values.TowerLocalVariable): + val = self.reduce(val.reduce_method, val, destinations=destination) + with ops.device(destination): + return fn(self.unwrap(val)[0]) + + assert isinstance(val, values.Mirrored), ( + "val = %s (type %s)" % (val, val.__class__.__name__)) + if val.on_device(destination): + with ops.device(destination): + # Use an identity here to make sure we are returning a tensor + # instead of e.g. a variable object. + return array_ops.identity(fn(val.get(destination))) + device = None + for d in self._devices: + if val.on_device(d): + device = d + break + assert device is not None, ( + "Could not find destination %s in list of devices %s." % + (destination, val.devices)) + with ops.device(device): + v = fn(val.get(device)) + with ops.device(destination): + return array_ops.identity(v) + + def _unwrap(self, val): + if isinstance(val, values.DistributedValues): + # Return in a deterministic order. + if set(val.devices) == self._canonical_device_set: + return [val.get(device=d) for d in self._devices] + return [val.get(device=d) for d in sorted(val.devices)] + return [val] + + @property + def is_single_tower(self): + return len(self._devices) == 1 + + @property + def num_towers(self): + return len(self._devices) + + def _worker_device_index(self): + return self._device_index + + @property + def worker_devices(self): + # Make a copy to prevent users from accidentally mutating our copy. + return list(self._devices) + + @property + def parameter_devices(self): + return list(self._devices) + + def non_slot_devices(self, var_list): + del var_list + return list(self._devices) + + def _get_devices_from(self, colocate_with=None): + if colocate_with is None: + return self._devices + elif isinstance(colocate_with, values.DistributedValues): + # pylint: disable=protected-access + return list(colocate_with._index.keys()) + elif isinstance(colocate_with, six.string_types): + return [colocate_with] + else: + return colocate_with + + class _MirroredTowerThread(threading.Thread): + """A thread that runs() a function on a device.""" + + def __init__(self, dist, coord, device, variable_creator_fn, fn, *args, + **kwargs): + super(MirroredStrategy._MirroredTowerThread, self).__init__() # pylint: disable=protected-access + self.coord = coord + self.distribution = dist + self.device = device + self.tower_id = dist.worker_devices.index(device) + self.variable_creator_fn = variable_creator_fn + # State needed to run and return the results of `fn`. + self.main_fn = fn + self.main_args = args + self.main_kwargs = kwargs + self.main_result = None + self.done = False + # State needed to run the next merge_call() (if any) requested via + # TowerContext. + self.merge_fn = None + self.merge_args = None + self.merge_kwargs = None + self.merge_result = None + # We use a thread.Event for the main thread to signal when this + # thread should start running (`should_run`), and another for + # this thread to transfer control back to the main thread + # (`has_paused`, either when it gets to a + # `get_tower_context().merge_call` or when `fn` returns). In + # either case the event starts cleared, is signaled by calling + # set(). The receiving thread waits for the signal by calling + # wait() and then immediately clearing the event using clear(). + self.should_run = threading.Event() + self.has_paused = threading.Event() + # These fields have to do with inheriting various contexts from the + # parent thread: + # pylint: disable=protected-access + self.context_mode = context.context()._eager_context.mode + if not context.context()._context_handle: + context.context()._initialize_handle_and_devices() + self.context_device_policy = ( + pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy( + context.context()._context_handle)) + self.graph = ops.get_default_graph() + self._variable_creator_stack = self.graph._variable_creator_stack[:] + self._captured_var_scope = variable_scope.get_variable_scope() + # Adding a "/" at end lets us re-enter this scope later. + self._captured_name_scope = self.graph.get_name_scope() + if self._captured_name_scope: + self._captured_name_scope += "/" + if self.tower_id > 0: + if not self._captured_name_scope: + self._captured_name_scope = "" + self._captured_name_scope += "tower_%d/" % self.tower_id + + def run(self): + # pylint: disable=protected-access + self.graph._variable_creator_stack = self._variable_creator_stack + self.should_run.wait() + self.should_run.clear() + try: + if self.coord.should_stop(): + return + with self.coord.stop_on_exception(), \ + context.context()._mode(self.context_mode), \ + context.context().device_policy(self.context_device_policy), \ + self.graph.as_default(), \ + MirroredTowerContext(self.distribution, self.tower_id), \ + ops.device(self.device), \ + ops.name_scope(self._captured_name_scope), \ + variable_scope.variable_scope( + self._captured_var_scope, reuse=self.tower_id > 0), \ + variable_scope.variable_creator_scope(self.variable_creator_fn): + self.main_result = self.main_fn(*self.main_args, **self.main_kwargs) + self.done = True + finally: + self.has_paused.set() + + +class MirroredTowerContext(distribute_lib.TowerContext): + """TowerContext used in MirroredStrategy.call_for_each_tower(). + + Opened in `_MirroredTowerThread`, to allow the user to invoke + `MirroredStrategy`'s specific implementation of `merge_call()`, + which works by delegating the function and its arguments to + the main thread (the one that invoked + `MirroredStrategy.call_for_each_tower()`). + """ + + def _merge_call(self, fn, *args, **kwargs): + """Delegate to the main thread to actually perform merge_call().""" + t = threading.current_thread() # a _MirroredTowerThread + t.merge_fn = fn + t.merge_args = args + t.merge_kwargs = kwargs + t.has_paused.set() + t.should_run.wait() + t.should_run.clear() + if t.coord.should_stop(): + raise _RequestedStop() + return t.merge_result + + @property + def device(self): + distribute_lib.require_tower_context(self) + return self._distribution_strategy.worker_devices[self._tower_id] diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py new file mode 100644 index 0000000000..9e9f06da8e --- /dev/null +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -0,0 +1,435 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Multi-GPU tests for MirroredStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.contrib.distribute.python import strategy_test_lib +from tensorflow.contrib.distribute.python import values +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.layers import core +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.training import distribute as distribute_lib + +GPU_TEST = "test_gpu" in sys.argv[0] + + +class MirroredTwoDeviceDistributionTest(strategy_test_lib.DistributionTestBase): + + def _get_distribution_strategy(self): + devices = ["/device:CPU:0", "/device:GPU:0"] + if GPU_TEST: + self.assertGreater(context.num_gpus(), 0) + if context.num_gpus() > 1: + devices = ["/device:GPU:0", "/device:GPU:1"] + print(self.id().split(".")[-1], "devices:", ", ".join(devices)) + return mirrored_strategy.MirroredStrategy(devices) + + def testMinimizeLossEager(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + self._test_minimize_loss_eager(self._get_distribution_strategy()) + + def testMinimizeLossGraph(self): + soft_placement = not GPU_TEST + print("testMinimizeLossGraph soft_placement:", soft_placement) + self._test_minimize_loss_graph( + self._get_distribution_strategy(), soft_placement=soft_placement) + + def testMapReduce(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + self._test_map_reduce(self._get_distribution_strategy()) + + def testDeviceIndex(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + self._test_device_index(self._get_distribution_strategy()) + + def testTowerId(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + self._test_tower_id(self._get_distribution_strategy()) + + def testNumTowers(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + self.assertEqual(2, self._get_distribution_strategy().num_towers) + + @test_util.run_in_graph_and_eager_modes() + def testCallAndMergeExceptions(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + self._test_call_and_merge_exceptions(self._get_distribution_strategy()) + + @test_util.run_in_graph_and_eager_modes() + def testRunRegroupError(self): + + def run_fn(device_id): + # Generates a list with different lengths on different devices. + # Will fail in _regroup() (if more than one device). + return list(range(device_id)) + + dist = self._get_distribution_strategy() + with dist.scope(), self.assertRaises(AssertionError): + dist.call_for_each_tower(run_fn, dist.worker_device_index) + + @test_util.run_in_graph_and_eager_modes() + def testReduceToCpu(self): + if not GPU_TEST: + self.skipTest("Not GPU test") + + def run_fn(device_id): + return device_id + + dist = self._get_distribution_strategy() + with dist.scope(): + result = dist.call_for_each_tower(run_fn, dist.worker_device_index) + reduced = dist.reduce("sum", result, destinations="/device:CPU:0") + unwrapped = dist.unwrap(reduced) + self.assertEqual(1, len(unwrapped)) + expected = sum(range(len(dist.worker_devices))) + self.assertEqual(expected, self.evaluate(unwrapped[0])) + + +@test_util.with_c_api +class MirroredStrategyVariableCreationTest(test.TestCase): + + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + def _skip_eager_if_gpus_less_than(self, num_gpus): + if context.num_gpus() < num_gpus and context.executing_eagerly(): + self.skipTest("Enough GPUs not available for this test in eager mode.") + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSingleVariable(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(): + # This variable should be created only once across the threads because of + # special variable_creator functions used by `dist.call_for_each_tower`. + v = variable_scope.variable(1.0, name="foo") + distribute_lib.get_tower_context().merge_call(lambda _: _) + return v + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + self.assertIsInstance(result, values.MirroredVariable) + self.assertEquals("foo:0", result.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testUnnamedVariable(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(): + v = variable_scope.variable(1.0) + distribute_lib.get_tower_context().merge_call(lambda _: _) + return v + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + self.assertIsInstance(result, values.MirroredVariable) + # Default name of "Variable" will be used. + self.assertEquals("Variable:0", result.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testMultipleVariables(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(): + vs = [] + for i in range(5): + vs.append(variable_scope.variable(1.0, name="foo" + str(i))) + distribute_lib.get_tower_context().merge_call(lambda _: _) + return vs + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + for i, v in enumerate(result): + self.assertIsInstance(v, values.MirroredVariable) + self.assertEquals("foo" + str(i) + ":0", v.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testMultipleVariablesWithSameCanonicalName(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(): + vs = [] + vs.append(variable_scope.variable(1.0, name="foo/bar")) + vs.append(variable_scope.variable(1.0, name="foo_1/bar")) + vs.append(variable_scope.variable(1.0, name="foo_1/bar_1")) + vs.append(variable_scope.variable(1.0, name="foo/bar_1")) + distribute_lib.get_tower_context().merge_call(lambda _: _) + return vs + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + for v in result: + self.assertIsInstance(v, values.MirroredVariable) + self.assertEquals(4, len(result)) + self.assertEquals("foo/bar:0", result[0].name) + self.assertEquals("foo_1/bar:0", result[1].name) + self.assertEquals("foo_1/bar_1:0", result[2].name) + self.assertEquals("foo/bar_1:0", result[3].name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testVariableWithSameCanonicalNameAcrossThreads(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(device_id): + v = variable_scope.variable(1.0, name="foo_" + str(device_id)) + distribute_lib.get_tower_context().merge_call(lambda _: _) + return v + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + result = dist.call_for_each_tower( + model_fn, dist.worker_device_index, run_concurrently=False) + self.assertIsInstance(result, values.MirroredVariable) + # The resulting mirrored variable will use the name from the first device. + self.assertEquals("foo_0:0", result.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testWithLayers(self): + self._skip_eager_if_gpus_less_than(1) + def model_fn(features): + with variable_scope.variable_scope("common"): + layer1 = core.Dense(1) + layer1(features) + layer2 = core.Dense(1) + layer2(features) + # This will pause the current thread, and execute the other thread. + distribute_lib.get_tower_context().merge_call(lambda _: _) + layer3 = core.Dense(1) + layer3(features) + return [(layer1.kernel, layer1.bias), + (layer2.kernel, layer2.bias), + (layer3.kernel, layer3.bias)] + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + features = dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) + features = dist.distribute_dataset(features).get_next() + + with dist.scope(): + result = dist.call_for_each_tower( + model_fn, features, run_concurrently=False) + suffixes = ["", "_1", "_2"] + for (kernel, bias), suffix in zip(result, suffixes): + self.assertIsInstance(kernel, values.MirroredVariable) + self.assertEquals("common/dense" + suffix + "/kernel:0", kernel.name) + self.assertIsInstance(bias, values.MirroredVariable) + self.assertEquals("common/dense" + suffix + "/bias:0", bias.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testWithGetVariableAndVariableScope(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(): + v0 = variable_scope.get_variable("var-thread0", [1]) + with variable_scope.variable_scope("common"): + v1 = variable_scope.get_variable("var-thread1", [1]) + # This will pause the current thread, and execute the other thread. + distribute_lib.get_tower_context().merge_call(lambda _: _) + v2 = variable_scope.get_variable("var-thread2", [1]) + + return v0, v1, v2 + + devices = ["/device:CPU:0", "/device:GPU:0"] + dist = mirrored_strategy.MirroredStrategy(devices) + with dist.scope(): + with variable_scope.variable_scope("main"): + v = variable_scope.get_variable("var-main0", [1]) + self.assertEquals("main/var-main0:0", v.name) + + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + self.assertEquals(3, len(result)) + v0, v1, v2 = result + self.assertIsInstance(v0, values.MirroredVariable) + self.assertEquals("main/var-thread0:0", v0.name) + self.assertIsInstance(v1, values.MirroredVariable) + self.assertEquals("main/common/var-thread1:0", v1.name) + self.assertIsInstance(v2, values.MirroredVariable) + self.assertEquals("main/common/var-thread2:0", v2.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testThreeDevices(self): + self._skip_eager_if_gpus_less_than(2) + + def model_fn(): + v = variable_scope.variable(1.0, name="foo") + distribute_lib.get_tower_context().merge_call(lambda _: _) + return v + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:GPU:1", "/device:CPU:0"]) + + with dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + self.assertIsInstance(result, values.MirroredVariable) + self.assertEquals("foo:0", result.name) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testNonMatchingVariableCreation(self): + self._skip_eager_if_gpus_less_than(1) + + def model_fn(name): + v = variable_scope.variable(1.0, name=name) + distribute_lib.get_tower_context().merge_call(lambda _: _) + return v + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + names = values.DistributedValues({ + "/device:CPU:0": "foo", + "/device:GPU:0": "bar" + }) + with self.assertRaises(RuntimeError): + _ = dist.call_for_each_tower(model_fn, names, run_concurrently=False) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testTowerLocalVariable(self): + self._skip_eager_if_gpus_less_than(1) + + all_v_sum = {} + all_v_mean = {} + + def model_fn(device_id): + tower_context = distribute_lib.get_tower_context() + with tower_context.tower_local_var_scope("sum"): + v_sum = variable_scope.variable(1.0) + with tower_context.tower_local_var_scope("mean"): + v_mean = variable_scope.variable(4.0) + self.assertTrue(isinstance(v_sum, values.TowerLocalVariable)) + self.assertTrue(isinstance(v_mean, values.TowerLocalVariable)) + updates = [v_sum.assign_add(2.0 + device_id), + v_mean.assign(6.0 * device_id)] + all_v_sum[device_id] = v_sum + all_v_mean[device_id] = v_mean + return updates, v_sum, v_mean + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with dist.scope(): + # Create "sum" and "mean" versions of TowerLocalVariables. + ret_ops, ret_v_sum, ret_v_mean = dist.call_for_each_tower( + model_fn, dist.worker_device_index, run_concurrently=False) + # Should see the same wrapping instance in all towers. + self.assertIs(all_v_sum[0], ret_v_sum) + self.assertIs(all_v_mean[0], ret_v_mean) + for i in range(1, dist.num_towers): + self.assertIs(all_v_sum[0], all_v_sum[1]) + self.assertIs(all_v_mean[0], all_v_mean[1]) + + # Apply updates + self.evaluate(variables.global_variables_initializer()) + self.evaluate([y for x in ret_ops for y in dist.unwrap(x)]) + expected_sum = 0.0 + expected_mean = 0.0 + for i, d in enumerate(dist.worker_devices): + # Test access within a device scope, should see different values. + with ops.device(d): + v_sum_value = self.evaluate(ret_v_sum.read_value()) + v_mean_value = self.evaluate(ret_v_mean.read_value()) + expected = i + 3.0 + self.assertEqual(expected, v_sum_value) + expected_sum += expected + expected = i * 6.0 + self.assertEqual(expected, v_mean_value) + expected_mean += expected + + # fetch() should return the value you get by applying the + # reduction across all towers. + self.assertEqual(expected_sum, self.evaluate(dist.fetch(ret_v_sum))) + expected_mean /= len(dist.worker_devices) + self.assertEqual(expected_mean, self.evaluate(dist.fetch(ret_v_mean))) + + # NOTE(priyag): Names and name scopes are ignored in eager, hence we are not + # testing this in eager mode. + + def testNameScope(self): + def model_fn(): + with ops.name_scope("foo"): + a = constant_op.constant(1.0, name="a") + distribute_lib.get_tower_context().merge_call(lambda _: _) + b = constant_op.constant(1.0, name="b") + return a, b + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with context.graph_mode(), dist.scope(): + with ops.name_scope("main"): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + self.assertEquals(2, len(result)) + for v, name in zip(result, ["a", "b"]): + self.assertIsInstance(v, values.DistributedValues) + v0, v1 = dist.unwrap(v) + self.assertEquals("main/foo/" + name + ":0", v0.name) + self.assertEquals("main/tower_1/foo/" + name + ":0", v1.name) + + def testWithDefaultName(self): + def model_fn(): + with ops.name_scope(None, "foo"): + a = constant_op.constant(1.0, name="a") + distribute_lib.get_tower_context().merge_call(lambda _: _) + b = constant_op.constant(2.0, name="b") + return a, b + + dist = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:CPU:0"]) + + with context.graph_mode(), dist.scope(): + result = dist.call_for_each_tower(model_fn, run_concurrently=False) + self.assertEquals(2, len(result)) + for v, name in zip(result, ["a", "b"]): + self.assertIsInstance(v, values.DistributedValues) + v0, v1 = dist.unwrap(v) + self.assertEquals("foo/" + name + ":0", v0.name) + self.assertEquals("tower_1/foo/" + name + ":0", v1.name) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_test.py new file mode 100644 index 0000000000..a1ef0ecc77 --- /dev/null +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_test.py @@ -0,0 +1,91 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for class MirroredStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.contrib.distribute.python import strategy_test_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import distribute as distribute_lib + + +@test_util.with_c_api +class MirroredOneCPUDistributionTest(strategy_test_lib.DistributionTestBase): + + def _get_distribution_strategy(self): + return mirrored_strategy.MirroredStrategy(["/device:CPU:0"]) + + def testMinimizeLossEager(self): + self._test_minimize_loss_eager(self._get_distribution_strategy()) + + def testMinimizeLossGraph(self): + self._test_minimize_loss_graph(self._get_distribution_strategy()) + + def testMapReduce(self): + self._test_map_reduce(self._get_distribution_strategy()) + + def testDeviceIndex(self): + self._test_device_index(self._get_distribution_strategy()) + + def testTowerId(self): + self._test_tower_id(self._get_distribution_strategy()) + + @test_util.run_in_graph_and_eager_modes() + def testCallAndMergeExceptions(self): + self._test_call_and_merge_exceptions(self._get_distribution_strategy()) + + +@test_util.with_c_api +class VariableCreatorStackTest(test.TestCase): + + def testCreatorStacksAreThreadLocal(self): + devices = ["/device:CPU:0", "/device:GPU:0"] + dist = mirrored_strategy.MirroredStrategy(devices) + + def model_fn(device_id): + assert isinstance(device_id, int) + def thread_creator_fn(next_creator, *args, **kwargs): + return next_creator(*args, **kwargs) + ":thread_" + str(device_id) + + with variable_scope.variable_creator_scope(thread_creator_fn): + # Create a variable in this scope. + v = variable_scope.variable(1.0) + + # This will pause the current thread, and execute the other thread. + distribute_lib.get_tower_context().merge_call(lambda _: _) + return v + + def main_thread_creator(next_creator, *args, **kwargs): + # We are not using the underlying next_creator for test purposes. + del next_creator, args, kwargs + return "main_thread" + + with context.graph_mode(), \ + dist.scope(), \ + variable_scope.variable_creator_scope(main_thread_creator): + result = dist.call_for_each_tower(model_fn, dist.worker_device_index) + result = dist.unwrap(result) + expected = ["main_thread:thread_0", "main_thread:thread_1"] + self.assertEquals(expected, result) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/monitor.py b/tensorflow/contrib/distribute/python/monitor.py new file mode 100644 index 0000000000..fe80bb4df5 --- /dev/null +++ b/tensorflow/contrib/distribute/python/monitor.py @@ -0,0 +1,61 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Monitor is responsible for training, checkpointing and recovery.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.ops import variables + + +class Monitor(object): + """Executes training steps, recovers and checkpoints. + + Note that this class is particularly preliminary, experimental, and + expected to change. + """ + # TODO(isaprykin): Support step functions that need multiple session calls. + # TODO(isaprykin): Support extra arguments to the step function. + # TODO(isaprykin): Support recovery, checkpointing and summaries. + + def __init__(self, step_callable, session=None): + """Initialize the Monitor with components for executing training steps. + + Args: + step_callable: a training `Step` that's capable of signaling when done. + session: a `Session` instance that's needed for graph mode. + + Raises: + ValueError: if `session` was provided for eager mode or not provided for + graph mode. + """ + if context.executing_eagerly(): + if session is not None: + raise ValueError("Should not provide a `session` in Eager mode.") + self._run_step = step_callable + else: + if session is None: + raise ValueError("Should provide a `session` in Graph mode.") + self._run_step = session.make_callable(step_callable()) + session.run(variables.global_variables_initializer()) + + def run_steps(self, num_steps=None): + step = 0 + done = False + while done is not None and (num_steps is None or step < num_steps): + done = self._run_step() + step += 1 diff --git a/tensorflow/contrib/distribute/python/monitor_test.py b/tensorflow/contrib/distribute/python/monitor_test.py new file mode 100644 index 0000000000..8277e1e791 --- /dev/null +++ b/tensorflow/contrib/distribute/python/monitor_test.py @@ -0,0 +1,84 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for class Monitor.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python import monitor as monitor_lib +from tensorflow.contrib.distribute.python import one_device_strategy +from tensorflow.contrib.distribute.python.single_loss_example import single_loss_example +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import ops +from tensorflow.python.training import gradient_descent + + +class MonitorTest(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.times( + combinations.distributions_and_v1_optimizers(), + combinations.combine(mode=combinations.graph_and_eager_modes))) + def testTrainNetwork(self, distribution, optimizer_fn): + with distribution.scope(): + single_loss_step, layer = single_loss_example(optimizer_fn, distribution) + + if context.executing_eagerly(): + monitor = monitor_lib.Monitor(single_loss_step, None) + else: + with self.test_session() as sess: + monitor = monitor_lib.Monitor(single_loss_step, sess) + + monitor.run_steps(1) + + self.assertEqual(1, len(layer.trainable_variables)) + mirrored_weight_variable = layer.trainable_variables[0] + start_error = self.evaluate(distribution.fetch(mirrored_weight_variable)) + start_error = abs(numpy.array(start_error) - 1) + + monitor.run_steps(9) + end_error = self.evaluate(distribution.fetch(mirrored_weight_variable)) + end_error = abs(numpy.array(end_error) - 1) + self.assertGreaterEqual(start_error, end_error) + + def testPassingASessionInEager(self): + distribution = one_device_strategy.OneDeviceStrategy( + "/device:CPU:0") + step_function, _ = single_loss_example( + lambda: gradient_descent.GradientDescentOptimizer(0.2), distribution) + + with self.test_session() as sess: + with self.assertRaisesRegexp(ValueError, "Should not provide"): + _ = monitor_lib.Monitor(step_function, sess) + + def testNotPassingASessionInGraph(self): + distribution = one_device_strategy.OneDeviceStrategy( + "/device:CPU:0") + step_function, _ = single_loss_example( + lambda: gradient_descent.GradientDescentOptimizer(0.2), distribution) + + with context.graph_mode(), ops.Graph().as_default(): + with self.assertRaisesRegexp(ValueError, "Should provide"): + _ = monitor_lib.Monitor(step_function, session=None) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py new file mode 100644 index 0000000000..39c49442b9 --- /dev/null +++ b/tensorflow/contrib/distribute/python/one_device_strategy.py @@ -0,0 +1,148 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Class OneDeviceStrategy implementing DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +from tensorflow.contrib.distribute.python import values +from tensorflow.contrib.eager.python import datasets +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import distribute as distribute_lib + + +# TODO(josh11b): Replace asserts in this file with if ...: raise ... + + +class OneDeviceStrategy(distribute_lib.DistributionStrategy): + """A distribution strategy for running on a single device.""" + # TODO(josh11b): Do we wrap values in types to generate errors if you are + # doing something that won't work with other DistributionStrategy + # implementations? + + def __init__(self, device): + super(OneDeviceStrategy, self).__init__() + self._device = device + + def _create_variable(self, next_creator, *args, **kwargs): + # No need to distinguish tower-local variables when not mirroring, + # we just enforce that they are not trainable. + if kwargs.pop("tower_local_reduce_method", None) is not None: + kwargs["trainable"] = False + + colocate_with = kwargs.pop("colocate_with", None) + if colocate_with is None: + with ops.device(self._device): + return next_creator(*args, **kwargs) + if isinstance(colocate_with, six.string_types): + with ops.device(colocate_with): + return next_creator(*args, **kwargs) + if (isinstance(colocate_with, list) and len(colocate_with) == 1 and + isinstance(colocate_with[0], six.string_types)): + with ops.device(colocate_with[0]): + return next_creator(*args, **kwargs) + with ops.colocate_with(colocate_with): + return next_creator(*args, **kwargs) + + def distribute_dataset(self, dataset): + if context.executing_eagerly(): + return datasets.Iterator(dataset) + else: + return dataset.make_one_shot_iterator() + + def _broadcast(self, tensor, destinations): + return tensor + + def _call_for_each_tower(self, fn, *args, **kwargs): + # We don't run `fn` in multiple threads in OneDeviceStrategy. + kwargs.pop("run_concurrently", None) + with ops.device(self._device), _OneDeviceTowerContext(self): + return fn(*args, **kwargs) + + def map(self, map_over, fn, *args, **kwargs): + with ops.device(self._device): + return values.MapOutput([fn(m, *args, **kwargs) for m in map_over]) + + def _reduce(self, method_string, value, destinations): + if not isinstance(value, values.MapOutput): + return value + l = value.get() + assert l + with ops.device(self._device): + if method_string == "sum": + return math_ops.add_n(l) + elif method_string == "mean": + return math_ops.add_n(l) / len(l) + else: + assert False + + def _update(self, var, fn, *args, **kwargs): + with ops.device(self._device), distribute_lib.UpdateContext(self._device): + return fn(var, *args, **kwargs) + + def _update_non_slot(self, colocate_with, fn, *args, **kwargs): + del colocate_with + with ops.device(self._device), distribute_lib.UpdateContext(self._device): + return fn(*args, **kwargs) + + def _fetch(self, val, destination, fn): + """Return a copy of `val` or `fn(val)` on `destination`.""" + with ops.device(self._device): + v = fn(val) + with ops.device(destination): + return array_ops.identity(v) + + def _unwrap(self, value): + return [value] + + @property + def is_single_tower(self): + return True + + @property + def num_towers(self): + return 1 + + @property + def worker_devices(self): + return [self._device] + + @property + def parameter_devices(self): + return [self._device] + + def non_slot_devices(self, var_list): + del var_list + return [self._device] + + def _worker_device_index(self): + return 0 + + +class _OneDeviceTowerContext(distribute_lib.TowerContext): + + def __init__(self, distribution_strategy): + distribute_lib.TowerContext.__init__( + self, distribution_strategy, tower_id=0) + + @property + def device(self): + return self._distribution_strategy.worker_devices[0] diff --git a/tensorflow/contrib/distribute/python/one_device_strategy_test.py b/tensorflow/contrib/distribute/python/one_device_strategy_test.py new file mode 100644 index 0000000000..7101ed0756 --- /dev/null +++ b/tensorflow/contrib/distribute/python/one_device_strategy_test.py @@ -0,0 +1,54 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for class OneDeviceStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import one_device_strategy +from tensorflow.contrib.distribute.python import strategy_test_lib +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util + + +@test_util.with_c_api +class OneDeviceStrategyTest(strategy_test_lib.DistributionTestBase): + + def _get_distribution_strategy(self): + return one_device_strategy.OneDeviceStrategy("/device:CPU:0") + + def testMinimizeLossEager(self): + self._test_minimize_loss_eager(self._get_distribution_strategy()) + + def testMinimizeLossGraph(self): + self._test_minimize_loss_graph(self._get_distribution_strategy()) + + def testMapReduce(self): + self._test_map_reduce(self._get_distribution_strategy()) + + def testDeviceIndex(self): + self._test_device_index(self._get_distribution_strategy()) + + def testTowerId(self): + self._test_tower_id(self._get_distribution_strategy()) + + @test_util.run_in_graph_and_eager_modes() + def testCallAndMergeExceptions(self): + self._test_call_and_merge_exceptions(self._get_distribution_strategy()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/optimizer_v2_test.py b/tensorflow/contrib/distribute/python/optimizer_v2_test.py new file mode 100644 index 0000000000..a0912b625f --- /dev/null +++ b/tensorflow/contrib/distribute/python/optimizer_v2_test.py @@ -0,0 +1,70 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for running legacy optimizer code with DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python.single_loss_example import minimize_loss_example +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import variables + + +class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.times( + combinations.distributions_and_v2_optimizers(), + combinations.combine(mode=["graph"], use_callable_loss=[True, False]) + + combinations.combine(mode=["eager"], use_callable_loss=[True]))) + def testTrainNetwork(self, distribution, optimizer_fn, + use_callable_loss=True): + with distribution.scope(): + model_fn, dataset, layer = minimize_loss_example( + optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss) + + iterator = distribution.distribute_dataset(dataset) + + def run_step(): + return control_flow_ops.group(distribution.unwrap( + distribution.call_for_each_tower( + model_fn, iterator.get_next(), run_concurrently=layer.built))) + + if not context.executing_eagerly(): + with self.test_session() as sess: + run_step = sess.make_callable(run_step()) + self.evaluate(variables.global_variables_initializer()) + + weights, biases = [], [] + for _ in range(10): + run_step() + + weights.append(self.evaluate(distribution.fetch(layer.kernel))) + biases.append(self.evaluate(distribution.fetch(layer.bias))) + + error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) + is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) + self.assertTrue(is_not_increasing) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py new file mode 100644 index 0000000000..b9ffd2f266 --- /dev/null +++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2.py @@ -0,0 +1,167 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Extension of prefetching_ops to support more than one device.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import warnings + +from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import +from tensorflow.contrib.data.python.ops import gen_dataset_ops +from tensorflow.contrib.data.python.ops import prefetching_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.util import nest as data_nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.util import nest + + +# pylint: disable=protected-access +class _PrefetchToDeviceIterator(object): + """A replacement for @{tf.data.Iterator} that prefetches to another device.""" + + def __init__(self, input_dataset, devices, buffer_size): + self._input_dataset = input_dataset + self._get_next_call_count = 0 + self._devices = devices + input_iterator = input_dataset.make_one_shot_iterator() + input_iterator_handle = input_iterator.string_handle() + + @function.Defun(dtypes.string) + def _prefetch_fn(handle): + remote_iterator = iterator_ops.Iterator.from_string_handle( + handle, input_iterator.output_types, input_iterator.output_shapes, + input_iterator.output_classes) + return remote_iterator.get_next() + + target_device = gen_dataset_ops.iterator_get_device( + input_iterator._iterator_resource) + self._buffering_resources = [] + for device in nest.flatten(self._devices): + with ops.device(device): + buffer_resource_handle = prefetching_ops.function_buffering_resource( + f=_prefetch_fn, + target_device=target_device, + string_arg=input_iterator_handle, + buffer_size=buffer_size, + thread_pool_size=0) + self._buffering_resources.append(buffer_resource_handle) + + def get_next(self, name=None): + """See @{tf.data.Iterator.get_next}.""" + self._get_next_call_count += 1 + if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD: + warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE) + + flat_result = [] + # TODO(priyag): This will fail if the input size (typically number of + # batches) is not divisible by number of devices. + # How do we handle that more gracefully / let the user know? + for buffer_resource in self._buffering_resources: + flat_ret = gen_dataset_ops.function_buffering_resource_get_next( + buffer_resource, + output_types=data_nest.flatten(sparse.as_dense_types( + self.output_types, self.output_classes)), name=name) + + ret = sparse.deserialize_sparse_tensors( + data_nest.pack_sequence_as(self.output_types, flat_ret), + self.output_types, self.output_shapes, self.output_classes) + + for tensor, shape in zip( + data_nest.flatten(ret), data_nest.flatten(self.output_shapes)): + if isinstance(tensor, ops.Tensor): + tensor.set_shape(shape) + flat_result.append(ret) + + return nest.pack_sequence_as(self._devices, flat_result) + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_types(self): + return self._input_dataset.output_types +# pylint: enable=protected-access + + +class _PrefetchToDeviceDataset(dataset_ops.Dataset): + """A `Dataset` whose iterator prefetches elements to other device(s).""" + + def __init__(self, input_dataset, devices, buffer_size): + self._input_dataset = input_dataset + self._devices = devices + self._buffer_size = buffer_size if buffer_size is not None else 1 + + def make_one_shot_iterator(self): + return _PrefetchToDeviceIterator(self._input_dataset, self._devices, + self._buffer_size) + + def make_initializable_iterator(self, shared_name=None): + raise NotImplementedError("`prefetch_to_devices()` is not currently " + "compatible with initializable iterators. Use " + "`make_one_shot_iterator()` instead.") + + def _as_variant_tensor(self): + # TODO(mrry): Raise this error earlier (e.g. when one of the Dataset + # transformation methods is called. + # TODO(mrry): Investigate support for chaining further transformations after + # the prefetch, including GPU support. + raise NotImplementedError("`prefetch_to_devices()` must be the last " + "transformation in a dataset pipeline.") + + # TODO(priyag): Fix the output types, shapes and classes to match the result + # of get_next (which has the additional nesting layer of devices now). + @property + def output_types(self): + return self._input_dataset.output_types + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_classes(self): + return self._input_dataset.output_classes + + +def prefetch_to_devices(devices, buffer_size=None): + """A transformation that prefetches dataset values to the given `devices`. + + NOTE: Although the transformation creates a @{tf.data.Dataset}, the + transformation must be the final `Dataset` in the input pipeline. + + Args: + devices: A nested structure of devices on which to prefetch the data. It can + be a single device name, or a tuple or list of device names. + buffer_size: (Optional.) The number of elements to buffer on each device. + Defaults to an automatically chosen value. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + def _apply_fn(dataset): + return _PrefetchToDeviceDataset(dataset, devices, buffer_size) + + return _apply_fn diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py new file mode 100644 index 0000000000..8ed16f4607 --- /dev/null +++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py @@ -0,0 +1,68 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for prefetching_ops_v2.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import prefetching_ops_v2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +class PrefetchingOpsV2Test(test.TestCase): + + def testPrefetchToOneDevice(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops_v2.prefetch_to_devices("/gpu:0")) + + iterator = device_dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + for i in range(10): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testPrefetchToTwoDevicesInAList(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + + host_dataset = dataset_ops.Dataset.range(10) + device_dataset = host_dataset.apply( + prefetching_ops_v2.prefetch_to_devices(["/cpu:0", "/gpu:0"])) + + iterator = device_dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + output = [] + with self.test_session() as sess: + for _ in range(5): + result = sess.run(next_element) + self.assertEqual(2, len(result)) + output.extend(result) + self.assertEquals(set(range(10)), set(output)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/shared_variable_creator.py b/tensorflow/contrib/distribute/python/shared_variable_creator.py new file mode 100644 index 0000000000..aca9c7af05 --- /dev/null +++ b/tensorflow/contrib/distribute/python/shared_variable_creator.py @@ -0,0 +1,97 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utility to re-use variables created on first device on subsequent devices.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re + +_VARIABLE_UNIQUIFYING_REGEX = re.compile(r"_\d/") +_VARIABLE_UNIQUIFYING_REGEX_AT_END = re.compile(r"_\d$") + + +def _canonicalize_variable_name(name): + # If no name is specified, uses default name "Variable". + if name is None: + return "Variable" + # Replace all instances of "_/" with "/" + name = _VARIABLE_UNIQUIFYING_REGEX.sub("/", name) + # Replace any instances of "_" at the end of the string with "" + name = _VARIABLE_UNIQUIFYING_REGEX_AT_END.sub("", name) + return name + + +def make_fn(shared_variable_store, device_id): + """Construct the variable creator function for device `device_id`. + + Constructs custom variable creator functions for the given device. + On first device (device_id == 0), it creates the variable using the + `next_creator`, and stores it in the provided `shared_variable_store`. + On all other devices (device_id > 0), it tries to re-use the variable + already created with the same name. If no such variable exists, it throws an + error. + Additionally, we de-uniquify variable names before checking for matches. This + helps re-use variables which are intended to be the same but have different + names due to variable uniquificaton happening upstream. Since this might + mean we may have multiple variables with the same canonical name, we store + them in a list per canonical name and return them in the same order as well. + + Args: + shared_variable_store: A dictionary that we will use to store variables + created on the first device, and re-used by creators for other devices. + device_id: Integer index of the device whose creator should be + constructed. + + Returns: + An appropriate creator function based on device_id. + + """ + variable_scope_access_index = {} + assert isinstance(device_id, int) + + def create_new_variable(next_creator, *args, **kwargs): + """Create the variable using `next_creator` and store it.""" + canonical_name = _canonicalize_variable_name(kwargs.get("name")) + v = next_creator(*args, **kwargs) + + if canonical_name not in shared_variable_store: + shared_variable_store[canonical_name] = [] + shared_variable_store[canonical_name].append(v) + return v + + def reuse_variable(next_creator, *args, **kwargs): + """Re-use existing variable from store with same name (in order).""" + del next_creator, args + name = kwargs.get("name") + canonical_name = _canonicalize_variable_name(name) + + try: + variable_index = variable_scope_access_index.get(canonical_name, 0) + v = shared_variable_store[canonical_name][variable_index] + # TODO(priyag): Make this variable re-use more robust by adding checks + # that the requested shape and dtype match the existing variable. + variable_scope_access_index[canonical_name] = variable_index + 1 + return v + except (KeyError, IndexError): + raise RuntimeError( + "Tried to create variable {} with mismatching name on device {}". + format(name, device_id)) + + if device_id == 0: + return create_new_variable + else: + return reuse_variable diff --git a/tensorflow/contrib/distribute/python/shared_variable_creator_test.py b/tensorflow/contrib/distribute/python/shared_variable_creator_test.py new file mode 100644 index 0000000000..713494d603 --- /dev/null +++ b/tensorflow/contrib/distribute/python/shared_variable_creator_test.py @@ -0,0 +1,75 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for SharedVariableCreator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import shared_variable_creator +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.ops import variable_scope + + +class CanonicalizeVariableNameTest(test.TestCase): + + def _canonicalize(self, name): + return shared_variable_creator._canonicalize_variable_name(name) + + def testNoName(self): + self.assertEquals("Variable", self._canonicalize(None)) + + def testPatternInMiddle(self): + self.assertEquals("foo/bar/baz", self._canonicalize("foo_1/bar_1/baz")) + + def testPatternAtEnd(self): + self.assertEquals("foo", self._canonicalize("foo_1")) + + def testWrongPatterns(self): + self.assertEquals("foo_1:0", self._canonicalize("foo_1:0")) + self.assertEquals("foo1", self._canonicalize("foo1")) + self.assertEquals("foo_a", self._canonicalize("foo_a")) + + +@test_util.with_c_api +class SharedVariableCreatorTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testSharedVariable(self): + + shared_variable_store = {} + num_devices = 3 + creator_fns = [] + for i in range(num_devices): + creator_fn = shared_variable_creator.make_fn(shared_variable_store, i) + creator_fns.append(creator_fn) + + with variable_scope.variable_creator_scope(creator_fns[0]): + v0 = variable_scope.variable(1.0, name="foo") + + with variable_scope.variable_creator_scope(creator_fns[1]): + v1 = variable_scope.variable(1.0, name="foo") + + with variable_scope.variable_creator_scope(creator_fns[2]): + v2 = variable_scope.variable(1.0, name="foo") + + # v1 and v2 should be same as v0 + self.assertIs(v1, v0) + self.assertIs(v2, v0) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/simple_estimator_example.py b/tensorflow/contrib/distribute/python/simple_estimator_example.py new file mode 100644 index 0000000000..7095d801ad --- /dev/null +++ b/tensorflow/contrib/distribute/python/simple_estimator_example.py @@ -0,0 +1,97 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A simple example to test the a DistributionStrategy with Estimators. + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.estimator import estimator as estimator_lib +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator import run_config +from tensorflow.python.framework import constant_op +from tensorflow.python.layers import core +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import app +from tensorflow.python.training import gradient_descent +from tensorflow.python.training import training_util + + +def build_model_fn_optimizer(): + """Simple model_fn with optimizer.""" + # TODO(anjalisridhar): Move this inside the model_fn once OptimizerV2 is + # done? + optimizer = gradient_descent.GradientDescentOptimizer(0.2) + + def model_fn(features, labels, mode): # pylint: disable=unused-argument + """model_fn which uses a single unit Dense layer.""" + # You can also use the Flatten layer if you want to test a model without any + # weights. + layer = core.Dense(1, use_bias=True) + logits = layer(features) + + if mode == model_fn_lib.ModeKeys.PREDICT: + predictions = {"logits": logits} + return model_fn_lib.EstimatorSpec(mode, predictions=predictions) + + def loss_fn(): + y = array_ops.reshape(logits, []) - constant_op.constant(1.) + return y * y + + if mode == model_fn_lib.ModeKeys.EVAL: + return model_fn_lib.EstimatorSpec(mode, loss=loss_fn()) + + assert mode == model_fn_lib.ModeKeys.TRAIN + + global_step = training_util.get_global_step() + train_op = optimizer.minimize(loss_fn(), global_step=global_step) + return model_fn_lib.EstimatorSpec(mode, loss=loss_fn(), train_op=train_op) + + return model_fn + + +def main(_): + distribution = mirrored_strategy.MirroredStrategy( + ["/device:GPU:0", "/device:GPU:1"]) + config = run_config.RunConfig(distribute=distribution) + + def input_fn(): + features = dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) + labels = dataset_ops.Dataset.from_tensors([1.]).repeat(10) + return dataset_ops.Dataset.zip((features, labels)) + + estimator = estimator_lib.Estimator( + model_fn=build_model_fn_optimizer(), config=config) + estimator.train(input_fn=input_fn, steps=10) + + eval_result = estimator.evaluate(input_fn=input_fn) + print("Eval result: {}".format(eval_result)) + + def predict_input_fn(): + predict_features = dataset_ops.Dataset.from_tensors([[1.]]).repeat(10) + return predict_features + + predictions = estimator.predict(input_fn=predict_input_fn) + # TODO(anjalsridhar): This returns a generator object, figure out how to get + # meaningful results here. + print("Prediction results: {}".format(predictions)) + + +if __name__ == "__main__": + app.run(main) diff --git a/tensorflow/contrib/distribute/python/single_loss_example.py b/tensorflow/contrib/distribute/python/single_loss_example.py new file mode 100644 index 0000000000..cef5fd2f89 --- /dev/null +++ b/tensorflow/contrib/distribute/python/single_loss_example.py @@ -0,0 +1,102 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A simple network to use in tests and examples.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.distribute.python import step_fn +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.layers import core +from tensorflow.python.layers import normalization +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def single_loss_example(optimizer_fn, distribution, use_bias=False): + """Build a very simple network to use in tests and examples.""" + dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + optimizer = optimizer_fn() + layer = core.Dense(1, use_bias=use_bias) + + def loss_fn(x): + y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) + return y * y + + single_loss_step = step_fn.StandardSingleLossStep(dataset, loss_fn, optimizer, + distribution) + + # Layer is returned for inspecting the kernels in tests. + return single_loss_step, layer + + +def minimize_loss_example(optimizer_fn, + use_bias=False, + use_callable_loss=True, + create_optimizer_inside_model_fn=False): + """Example of non-distribution-aware legacy code.""" + dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat() + # An Optimizer instance is created either outside or inside model_fn. + outer_optimizer = None + if not create_optimizer_inside_model_fn: + outer_optimizer = optimizer_fn() + + layer = core.Dense(1, use_bias=use_bias) + + def model_fn(x): + """A very simple model written by the user.""" + + def loss_fn(): + y = array_ops.reshape(layer(x), []) - constant_op.constant(1.) + return y * y + + optimizer = outer_optimizer or optimizer_fn() + + if use_callable_loss: + return optimizer.minimize(loss_fn) + else: + return optimizer.minimize(loss_fn()) + + return model_fn, dataset, layer + + +def batchnorm_example(optimizer_fn, + batch_per_epoch=1, + momentum=0.9, + renorm=False): + """Example of non-distribution-aware legacy code with batch normalization.""" + # input shape is [16, 8], input values are increasing in both dimensions. + dataset = dataset_ops.Dataset.from_tensor_slices( + [[[float(x * 8 + y + z * 100) + for y in range(8)] + for x in range(16)] + for z in range(batch_per_epoch)]).repeat() + optimizer = optimizer_fn() + batchnorm = normalization.BatchNormalization( + renorm=renorm, momentum=momentum, fused=False) + + def model_fn(x): + + def loss_fn(): + y = math_ops.reduce_sum(batchnorm(x, training=True), axis=1) + loss = math_ops.reduce_mean(y - constant_op.constant(1.)) + return loss + + # Callable loss. + return optimizer.minimize(loss_fn) + + return model_fn, dataset, batchnorm diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py new file mode 100644 index 0000000000..82514c64be --- /dev/null +++ b/tensorflow/contrib/distribute/python/step_fn.py @@ -0,0 +1,103 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The step function abstraction represents a single training step.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import backprop +from tensorflow.python.training import optimizer as optimizer_lib + + +class Step(object): + """Interface for performing each step of a training algorithm.""" + + def __init__(self, distribution): + self._distribution = distribution + + @property + def distribution(self): + return self._distribution + + def __call__(self): + """Perform one step of this training algorithm.""" + return self.step(self.inputs()) + + def inputs(self): + """For the generating the input to be passed to `step()`.""" + raise NotImplementedError("must be implemented in descendants") + + def step(self, inputs): + """Perform the main computation of this training algorithm.""" + raise NotImplementedError("must be implemented in descendants") + + +class StandardInputStep(Step): + """Step with a standard implementation of input handling. + + Args: + input_dataset: a tf.data Dataset that provides input. + """ + + def __init__(self, input_dataset, distribution): + Step.__init__(self, distribution) + self._distributed_input = distribution.distribute_dataset(input_dataset) + + def inputs(self): + return self._distributed_input.get_next() + + +class StandardSingleLossStep(StandardInputStep): + """A step function that implements a training step for a feed forward network. + + An instance of this class is intended to be used as a callable: + + ```python + ... + step = step_fn.StandardSingleLossStep(dataset, loss_fn, optimizer) + step.initialize(distribution) + + # Run a single training step on a given DistributionStrategy: + step(distribution) + ... + ``` + + Args: + input_dataset: a tf.data Dataset that provides input. + loss_fn: a function that returns loss. + optimizer: an optimizer that implements an update rule. + distribution: a `DistributionStrategy` object. + """ + + def __init__(self, input_dataset, loss_fn, optimizer, distribution): + StandardInputStep.__init__(self, input_dataset, distribution) + self._loss_fn = loss_fn + self._optimizer = optimizer + self._is_run_concurrently = False + + def step(self, inputs): + with self._distribution.scope(): + gradients_fn = backprop.implicit_grad(self._loss_fn) + gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn) + + grads_and_vars = self.distribution.call_for_each_tower( + gradients_fn, inputs, run_concurrently=self._is_run_concurrently) + # If threads use layers, then we need to run the first step sequentially, + # so that layers.build() is not executed in parallel. Otherwise, multiple + # sets of mirrored variables are going to be created. + self._is_run_concurrently = True + return self._optimizer._distributed_apply( # pylint: disable=protected-access + self.distribution, grads_and_vars) diff --git a/tensorflow/contrib/distribute/python/step_fn_test.py b/tensorflow/contrib/distribute/python/step_fn_test.py new file mode 100644 index 0000000000..75c5ec9659 --- /dev/null +++ b/tensorflow/contrib/distribute/python/step_fn_test.py @@ -0,0 +1,62 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for class Step.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python.single_loss_example import single_loss_example +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.ops import variables + + +class SingleLossStepTest(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.times( + combinations.distributions_and_v1_optimizers(), + combinations.combine(mode=combinations.graph_and_eager_modes))) + def testTrainNetwork(self, distribution, optimizer_fn): + with distribution.scope(): + single_loss_step, layer = single_loss_example( + optimizer_fn, distribution, use_bias=True) + + if context.executing_eagerly(): + run_step = single_loss_step + else: + with self.test_session() as sess: + run_step = sess.make_callable(single_loss_step()) + self.evaluate(variables.global_variables_initializer()) + + weights, biases = [], [] + for _ in range(10): + run_step() + + weights.append(self.evaluate(distribution.fetch(layer.kernel))) + biases.append(self.evaluate(distribution.fetch(layer.bias))) + + error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1) + is_not_increasing = all(y <= x for x, y in zip(error, error[1:])) + self.assertTrue(is_not_increasing) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py new file mode 100644 index 0000000000..2b4ad9f146 --- /dev/null +++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py @@ -0,0 +1,225 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library for testing DistributionStrategy descendants.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.layers import core +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.training import distribute as distribute_lib +from tensorflow.python.training import optimizer + + +class _TestException(Exception): + pass + + +# May be the argument to either distribution.call_for_each_tower() or +# get_tower_context().merge_call() +def _raise_exception_fn(_=None): + raise _TestException() + + +# Must be the argument to a distribution.call_for_each_tower() call, calls a +# get_tower_context().merge_call() that raises an exception. +def _merge_raises_fn(): + distribute_lib.get_tower_context().merge_call(_raise_exception_fn) + + +# Must be the argument to a get_tower_context().merge_call() call, calls +# dist.call_for_each_tower() with a function that raises an exception. +def _call_raises_fn(dist): + dist.call_for_each_tower(_raise_exception_fn) + + +# Must be the argument to a distribution.call_for_each_tower() call, +# calls a get_tower_context().merge_call() that calls a +# call_for_each_tower() that raises an exception. +def _merge_call_raises_fn(): + distribute_lib.get_tower_context().merge_call(_call_raises_fn) + + +# Must be the argument to a get_tower_context().merge_call() call, calls +# dist.call_for_each_tower() with a function that calls a +# get_tower_context().merge_call() that raises an exception. +def _call_merge_raises_fn(dist): + dist.call_for_each_tower(_merge_raises_fn) + + +# Must be the argument to a distribution.call_for_each_tower() call, calls a +# get_tower_context().merge_call() that calls a call_for_each_tower() that +# calls a get_tower_context().merge_call() that raises an exception. +def _merge_call_merge_raises_fn(): + distribute_lib.get_tower_context().merge_call(_call_merge_raises_fn) + + +class DistributionTestBase(test.TestCase): + """Some tests that should work with any DistributionStrategy.""" + + def _test_minimize_loss_eager(self, d): + with d.scope(): + l = core.Dense(1, use_bias=False) + + def loss(x): + # TODO(josh11b): What if this constant was instead a captured + # value? Would it need to be a value that has been passed + # through d.broadcast()? + y = array_ops.reshape(l(x), []) - constant_op.constant(1.) + return y * y + # TODO(isaprykin): Extract implicit_grad+get_filtered_grad_fn into a + # common `implicit_grad` function and put it in DistributionStrategy. + grad_fn = backprop.implicit_grad(loss) + grad_fn = optimizer.get_filtered_grad_fn(grad_fn) + + def update(v, g): + return v.assign_sub(0.2 * g) + + one = d.broadcast(constant_op.constant([[1.]])) + + def step(): + """Perform one optimization step.""" + # Run forward & backward to get gradients, variables list. + g_v = d.call_for_each_tower(grad_fn, one, run_concurrently=l.built) + + # Update the variables using the gradients and the update() function. + before_list = [] + after_list = [] + for g, v in g_v: + fetched = d.fetch(v) + before_list.append(fetched) + # control_dependencies irrelevant but harmless in eager execution + with ops.control_dependencies([fetched]): + g = d.reduce("sum", g, destinations=v) + with ops.control_dependencies(d.unwrap(d.update(v, update, g))): + after_list.append(d.fetch(v)) + return before_list, after_list + + for i in range(10): + b, a = step() + if i == 0: + before, = b # pylint: disable=unbalanced-tuple-unpacking + after, = a # pylint: disable=unbalanced-tuple-unpacking + + error_before = abs(before.numpy() - 1) + error_after = abs(after.numpy() - 1) + # Error should go down + self.assertLess(error_after, error_before) + + def _test_minimize_loss_graph(self, d, soft_placement=False): + config = config_pb2.ConfigProto() + config.allow_soft_placement = soft_placement + config.gpu_options.per_process_gpu_memory_fraction = 0.3 + with context.graph_mode(), \ + ops.Graph().as_default(), \ + self.test_session(config=config) as sess, \ + d.scope(): + l = core.Dense(1, use_bias=False) + + def loss(x): + # TODO(josh11b): What if this constant was instead a captured + # value? Would it need to be a value that has been passed + # through d.broadcast()? + y = array_ops.reshape(l(x), []) - constant_op.constant(1.) + return y * y + + grad_fn = backprop.implicit_grad(loss) + + def update(v, g): + return v.assign_sub(0.2 * g) + + one = d.broadcast(constant_op.constant([[1.]])) + + def step(): + """Perform one optimization step.""" + # Run forward & backward to get gradients, variables list. + g_v = d.call_for_each_tower(grad_fn, one) + + # Update the variables using the gradients and the update() function. + before_list = [] + after_list = [] + for g, v in g_v: + fetched = d.fetch(v) + before_list.append(fetched) + with ops.control_dependencies([fetched]): + g = d.reduce("sum", g, destinations=v) + with ops.control_dependencies(d.unwrap(d.update(v, update, g))): + after_list.append(d.fetch(v)) + return before_list, after_list + + before_out, after_out = step() + variables.global_variables_initializer().run() + for i in range(10): + b, a = sess.run((before_out, after_out)) + if i == 0: + before, = b + after, = a + + error_before = abs(before - 1) + error_after = abs(after - 1) + # Error should go down + self.assertLess(error_after, error_before) + + def _test_map_reduce(self, d, in_graph=None): + with d.scope(): + map_in = [constant_op.constant(i) for i in range(10)] + map_out = d.map(map_in, lambda x, y: x * y, 2) + observed = d.fetch(d.reduce("sum", map_out)) + expected = 90 # 2 * (0 + 1 + ... + 9) + self.assertEqual(expected, observed.numpy()) + + def _test_device_index(self, d): + with d.scope(): + expected_devices = [False] * len(d.worker_devices) + + def mark_devices_fn(device_id): + self.assertLess(device_id, len(d.worker_devices)) + self.assertFalse(expected_devices[device_id]) + expected_devices[device_id] = True + + d.call_for_each_tower(mark_devices_fn, d.worker_device_index) + self.assertAllEqual(expected_devices, [True] * len(d.worker_devices)) + + def _test_tower_id(self, d): + with d.scope(): + expected_devices = [False] * len(d.worker_devices) + + def mark_devices_fn(): + tower_id = distribute_lib.get_tower_context().tower_id + self.assertLess(tower_id, len(d.worker_devices)) + self.assertFalse(expected_devices[tower_id]) + expected_devices[tower_id] = True + + d.call_for_each_tower(mark_devices_fn) + self.assertAllEqual(expected_devices, [True] * len(d.worker_devices)) + + def _test_call_and_merge_exceptions(self, dist): + with dist.scope(): + with self.assertRaises(_TestException): + dist.call_for_each_tower(_raise_exception_fn) + with self.assertRaises(_TestException): + dist.call_for_each_tower(_merge_raises_fn) + with self.assertRaises(_TestException): + dist.call_for_each_tower(_merge_call_raises_fn) + with self.assertRaises(_TestException): + dist.call_for_each_tower(_merge_call_merge_raises_fn) diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py new file mode 100644 index 0000000000..c1ba22ed5a --- /dev/null +++ b/tensorflow/contrib/distribute/python/values.py @@ -0,0 +1,575 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Various classes representing distributed values. + +See go/tf-distribution-strategy. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import weakref + +import six + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.distribute.python import prefetching_ops_v2 +from tensorflow.contrib.eager.python import datasets +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.training import checkpointable +from tensorflow.python.training import device_util +from tensorflow.python.training import distribute as distribute_lib +from tensorflow.python.training import saver +from tensorflow.python.util import nest + + +# pylint: disable=line-too-long +# TODO(josh11b): Should device values be strings or DeviceSpec objects +# Not sure DeviceSpec objects are usable as a dict key. +class DistributedValues(object): + """Holds a map from device to values. Either PerDevice or Mirrored.""" + + def __init__(self, index): + self._index = {device_util.canonicalize(key): value + for key, value in six.iteritems(index)} + + def get(self, device=None): + """Returns the value for the current device or raises a ValueError.""" + if device is None: + tower_context = distribute_lib.get_tower_context() + if tower_context: + device = tower_context.device + else: + device = distribute_lib.get_update_device() + if device is None: + device = device_util.current() + device = device_util.canonicalize(device) + try: + return self._index[device] + except KeyError: + raise ValueError("Device %s not found in %s (current device %s)" % + (device, self._index.keys(), device_util.current())) + + def on_device(self, device): + device = device_util.canonicalize(device) + return device in self._index + + @property + def devices(self): + return self._index.keys() + + def __str__(self): + return "%s:%s" % (self.__class__.__name__, self._index) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._index) + + # TODO(josh11b): Possibly make an accessor for _index for use by + # DistributionStrategy implementations. + + +class DistributedDelegate(DistributedValues): + """A map from device to values; acts as the same type as the values.""" + + def __init__(self, index): + super(DistributedDelegate, self).__init__(index) + + def __getattr__(self, name): + return getattr(self.get(), name) + + # pylint: disable=multiple-statements + def __add__(self, o): return self.get() + o + def __radd__(self, o): return o + self.get() + def __sub__(self, o): return self.get() - o + def __rsub__(self, o): return o - self.get() + def __mul__(self, o): return self.get() * o + def __rmul__(self, o): return o * self.get() + def __truediv__(self, o): return self.get() / o + def __rtruediv__(self, o): return o / self.get() + def __floordiv__(self, o): return self.get() // o + def __rfloordiv__(self, o): return o // self.get() + def __mod__(self, o): return self.get() % o + def __rmod__(self, o): return o % self.get() + def __lt__(self, o): return self.get() < o + def __le__(self, o): return self.get() <= o + def __gt__(self, o): return self.get() > o + def __ge__(self, o): return self.get() >= o + def __and__(self, o): return self.get() & o + def __rand__(self, o): return o & self.get() + def __or__(self, o): return self.get() | o + def __ror__(self, o): return o | self.get() + def __xor__(self, o): return self.get() ^ o + def __rxor__(self, o): return o ^ self.get() + def __getitem__(self, o): return self.get()[o] + def __pow__(self, o, modulo=None): return pow(self.get(), o, modulo) + def __rpow__(self, o): return pow(o, self.get()) + def __invert__(self): return ~self.get() + def __neg__(self): return -self.get() + def __abs__(self): return abs(self.get()) + + def __div__(self, o): + try: + return self.get().__div__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __rdiv__(self, o): + try: + return self.get().__rdiv__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __matmul__(self, o): + try: + return self.get().__matmul__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + def __rmatmul__(self, o): + try: + return self.get().__rmatmul__(o) + except AttributeError: + # See https://docs.python.org/3/library/constants.html#NotImplemented + return NotImplemented + + # TODO(josh11b): Even more operator overloads. + + +class PerDevice(DistributedValues): + """Holds a map from device to unsynchronized values.""" + pass + + +class Mirrored(DistributedValues): + """Holds a map from device to values which are kept in sync.""" + pass + + +def _assign_on_device(device, variable, tensor): + with ops.device(device): + return variable.assign(array_ops.identity(tensor)) + + +DistributedVarOp = collections.namedtuple( + "DistributedVarOp", ["name", "graph", "type"]) + + +class DistributedVariable(DistributedDelegate): + """Holds a map from device to variables.""" + # TODO(josh11b): Support changing the set of variables if e.g. if new + # devices are joining or a device is to leave. + + def __init__(self, index): + # Child class must set self._primary_var before calling + # super(...).__init__(index). + self._common_name = self._primary_var.name.split(":")[0] + super(DistributedVariable, self).__init__(index) + + @property + def initializer(self): + return control_flow_ops.group([v.initializer for v in self._index.values()]) + + @property + def graph(self): + return self._primary_var.graph + + @property + def _shared_name(self): + return self._common_name + + @property + def _unique_id(self): + return self._primary_var._unique_id # pylint: disable=protected-access + + @property + def name(self): + return self._primary_var.name + + @property + def dtype(self): + return self._primary_var.dtype + + @property + def shape(self): + return self._primary_var.shape + + def get_shape(self): + return self._primary_var.get_shape() + + @property + def op(self): + # We want cross-tower code that does some var.op.X calls + # to work (even if the current device isn't in self.devices), but + # other uses of var.op in a cross-tower context to fail. + if distribute_lib.get_cross_tower_context(): + return DistributedVarOp(self._primary_var.op.name, + self._primary_var.op.graph, + self._primary_var.op.type) + return self.get().op + + def _should_act_as_resource_variable(self): + """Pass resource_variable_ops.is_resource_variable check.""" + pass + + +# Register a conversion function which reads the value of the variable, +# allowing instances of the class to be used as tensors. +def _tensor_conversion(var, dtype=None, name=None, as_ref=False): + # Try to avoid assignments to and other mutations of MirroredVariable + # state except through a DistributionStrategy.update() call. + assert not as_ref + return ops.internal_convert_to_tensor( + var.get(), dtype=dtype, name=name, as_ref=as_ref) + + +ops.register_tensor_conversion_function(DistributedVariable, _tensor_conversion) +# TODO(josh11b): ops.register_dense_tensor_like_type(DistributedVariable)? + + +class _MirroredSaveable(saver.BaseSaverBuilder.ResourceVariableSaveable): + """Class for defining how to restore a MirroredVariable.""" + + def __init__(self, mirrored_variable, primary_variable, name): + self._mirrored_variable = mirrored_variable + super(_MirroredSaveable, self).__init__(primary_variable, "", name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into all variables.""" + tensor, = restored_tensors + return control_flow_ops.group([ + _assign_on_device(d, v, tensor) + for d, v in six.iteritems(self._mirrored_variable._index)]) # pylint: disable=protected-access + + +def _get_update_device(): + """Validate we are in update/update_non_slot() and return current device. + + This is used in MirroredVariable.assign* members, to make sure they + are only called via an update method, to make sure all components of the + variable are being updated in a consistent way. + + Returns: + A string device. + + Raises: + RuntimeError: If not in distribution.update()/.update_non_slot(). + """ + device = distribute_lib.get_update_device() + if device is None: + raise RuntimeError( + "Use DistributionStrategy.update() to modify a MirroredVariable.") + return device + + +class MirroredVariable(DistributedVariable, Mirrored, + checkpointable.CheckpointableBase): + """Holds a map from device to variables whose values are kept in sync.""" + + def __init__(self, index, primary_var): + # Use a weakref to make it easy to map from the contained values + # to the container without introducing a reference cycle. + for v in six.itervalues(index): + v._mirrored_container = weakref.ref(self) # pylint: disable=protected-access + self._primary_var = primary_var + super(MirroredVariable, self).__init__(index) + + # We use _get_update_device() for the assign* methods to enforce + # that we are in an update() function. The arguments to update() are + # automatically unwrapped so the update() function would normally + # see regular variables, not MirroredVariables. However, the update + # function can still operate on wrapped MirroredVariables through + # object members, captured arguments, etc. This is more likely in an + # update_non_slot() function (like OptimizerV2._finish), which can + # update several non-slot variables in one call. + def assign_sub(self, *args, **kwargs): + return self.get(device=_get_update_device()).assign_sub(*args, **kwargs) + + def assign_add(self, *args, **kwargs): + return self.get(device=_get_update_device()).assign_add(*args, **kwargs) + + def assign(self, *args, **kwargs): + return self.get(device=_get_update_device()).assign(*args, **kwargs) + + def _gather_saveables_for_checkpoint(self): + """Overrides CheckpointableBase method. + + This allows both name-based and object-based save and restore of + MirroredVariables. + + Returns: + A dictionary mapping attribute names to `SaveableObject` factories. + """ + def _saveable_factory(name=self._common_name): + return _MirroredSaveable(self, self._primary_var, name) + return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory} + + +class _TowerLocalSaveable(saver.BaseSaverBuilder.SaveableObject): + """Class for defining how to restore a TowerLocalVariable.""" + + def __init__(self, tower_local_variable, name): + self._tower_local_variable = tower_local_variable + # We use a callable so that we don't have to evaluate this expression + # in the case where we are trying to restore instead of save. + def tensor(): + return distribute_lib.get_distribution_strategy().fetch( + tower_local_variable) + spec = saver.BaseSaverBuilder.SaveSpec( + tensor=tensor, + slice_spec="", + name=name, + dtype=tower_local_variable.dtype) + super(_TowerLocalSaveable, self).__init__(tensor, [spec], name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into all variables.""" + tensor, = restored_tensors + # To preserve the sum across save and restore, we have to divide the + # total across all devices when restoring a variable that was summed + # when saving. + if self._tower_local_variable.reduce_method == "sum": + tensor *= 1. / len(self._tower_local_variable.devices) + return control_flow_ops.group([ + _assign_on_device(d, v, tensor) + for d, v in six.iteritems(self._tower_local_variable._index)]) # pylint: disable=protected-access + + +class TowerLocalVariable(DistributedVariable, PerDevice, + checkpointable.CheckpointableBase): + """Holds a map from device to variables whose values are reduced on save.""" + + def __init__(self, index, primary_var, reduce_method): + self._primary_var = primary_var + self._reduce_method = reduce_method + super(TowerLocalVariable, self).__init__(index) + + def assign_sub(self, *args, **kwargs): + return self.get().assign_sub(*args, **kwargs) + + def assign_add(self, *args, **kwargs): + return self.get().assign_add(*args, **kwargs) + + def assign(self, *args, **kwargs): + return self.get().assign(*args, **kwargs) + + @property + def reduce_method(self): + return self._reduce_method + + def _gather_saveables_for_checkpoint(self): + """Overrides CheckpointableBase method. + + This allows both name-based and object-based save and restore of + TowerLocalVariables. + + Returns: + A dictionary mapping attribute names to `SaveableObject` factories. + """ + def _saveable_factory(name=self._common_name): + return _TowerLocalSaveable(self, name) + return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory} + + +def _devices_match(d1, d2): + return device_util.canonicalize(d1) == device_util.canonicalize(d2) + + +def regroup(per_device, wrap_class=PerDevice): + """Makes device->nest map into a nest of PerDevice/Mirrored values.""" + items = list(per_device.items()) + assert items + v0 = items[0][1] # First value + + if isinstance(v0, list): + for _, v in items[1:]: + assert isinstance(v, list) + assert len(v) == len(v0), ("len(v) == %d, len(v0) == %d, v: %s, v0: %s" % + (len(v), len(v0), v, v0)) + return [regroup({k: v[i] for k, v in items}, wrap_class) + for i in range(len(v0))] + + if isinstance(v0, tuple): + for _, v in items[1:]: + assert isinstance(v, tuple) + assert len(v) == len(v0) + regrouped_tuple = tuple(regroup({k: v[i] for k, v in items}, wrap_class) + for i in range(len(v0))) + if hasattr(v0, "_fields"): + # This tuple is in fact a namedtuple! Create a new namedtuple instance + # and initialize it with the regrouped values: + assert hasattr(type(v0), "_make") + return type(v0)._make(regrouped_tuple) + else: + return regrouped_tuple + + if isinstance(v0, dict): + v0keys = set(v0.keys()) + for _, v in items[1:]: + assert isinstance(v, dict) + assert set(v.keys()) == v0keys + return {key: regroup({k: v[key] for k, v in items}, wrap_class) + for key in v0keys} + + # If exactly the same object across all devices, return it unwrapped. + same_id = True + for _, v in items[1:]: + if v is not v0: + same_id = False + break + # Consider three cases where same_id is true: + # * If v0 is a MirroredVariable (and same_id means it is the same + # across all devices), we want to return it. We check + # MirroredVariable specifically since it can look like it + # has a _mirrored_container member since its members do. + # * If v0 is a member of a mirrored variable, in which case + # hasattr(v0, "_mirrored_container") is true, we want to + # return the MirroredVariable that contains it using the + # _mirrored_container logic below. This case can trigger + # same_id when there is only one device. + # * In any other situation, same_id means we return v0. + if same_id and (isinstance(v0, MirroredVariable) or + not hasattr(v0, "_mirrored_container")): + return v0 + + # Detect the case where each device has a parallel component of the + # same MirroredVariable. In this case we want to return the + # containing MirroredVariable, after a bunch of sanity checking. + # In particular, each component should have the same container, + # and the devices of the variables should match the keys of the + # per-device dictionary. + # TODO(josh11b): Do we need similar logic for TowerLocalVariables? + if hasattr(v0, "_mirrored_container"): + # pylint: disable=protected-access + assert not isinstance(v0, MirroredVariable), ( + "ids = %s, items = %s" % ([id(v[1]) for v in items], items)) + assert _devices_match(v0.device, items[0][0]), ( + "v0.device = %s, items = %s" % (v0.device, items)) + mirrored_container = v0._mirrored_container() + assert mirrored_container is not None + for d, v in items[1:]: + assert _devices_match(v.device, d), ( + "v.device = %s, d = %s, items = %s" % (v.device, d, items)) + assert mirrored_container is v._mirrored_container() + return mirrored_container + # pylint: enable=protected-access + + return wrap_class(per_device) + + +def select_device(device, structured): + """Specialize a nest of regular & per-device values for one device.""" + def _get(x): + return x.get(device) if isinstance(x, DistributedValues) else x + + return nest.map_structure(_get, structured) + + +def select_device_mirrored(device, structured): + """Specialize a nest of regular & mirrored values for one device.""" + def _get_mirrored(x): + if isinstance(x, DistributedValues): + if not isinstance(x, Mirrored): + raise TypeError( + "Expected value to be mirrored across towers: %s in %s." % + (x, structured)) + return x.get(device) + else: + return x + + return nest.map_structure(_get_mirrored, structured) + + +class PerDeviceDataIterator(object): + """An iterator (like `tf.data.Iterator`) into a `PerDeviceDataset`.""" + + def __init__(self, iterator, devices, prefetch_on_device=None): + self._iterator = iterator + self._devices = devices + self._prefetch_on_device = prefetch_on_device + + def get_next(self, name=None): + """Scatter the input across devices.""" + if self._prefetch_on_device: + data_list = self._iterator.get_next(name=name) + index = dict(zip(self._devices, data_list)) + else: + batch = self._iterator.get_next(name=name) + index = {} + def get_ith(i): + return lambda x: x[i] + + for i, d in enumerate(self._devices): + index[d] = nest.map_structure(get_ith(i), batch) + if context.executing_eagerly(): + with ops.device(d): + index[d] = nest.map_structure(array_ops.identity, index[d]) + + return regroup(index) + + +class PerDeviceDataset(object): + """Like `tf.data.Dataset` split devices, producing `PerDevice` data.""" + + def __init__(self, dataset, devices, prefetch_on_device=None): + self._devices = devices + + # Default to using prefetching in graph mode, unless specified. + # TODO(priyag): Enable prefetching in eager mode. + self._prefetch_on_device = prefetch_on_device + if self._prefetch_on_device is None: + self._prefetch_on_device = not context.executing_eagerly() + assert not (self._prefetch_on_device and context.executing_eagerly()), ( + "Prefetching is only supported in graph mode currently") + + if self._prefetch_on_device: + self._dataset = dataset + else: + # TODO(priyag): If dropping remainder is not appropriate, find another + # approach to distributing the dataset when not possible to divide evenly. + # Possibly not an issue when we start using PartitionedDataset. + self._dataset = dataset.apply( + batching.batch_and_drop_remainder(len(devices))) + + def make_one_shot_iterator(self): + """Get a one time use iterator for the distributed PerDeviceDataset.""" + if self._prefetch_on_device: + on_device_dataset = self._dataset.apply( + prefetching_ops_v2.prefetch_to_devices(self._devices)) + dataset_iterator = on_device_dataset.make_one_shot_iterator() + elif context.executing_eagerly(): + dataset_iterator = datasets.Iterator(self._dataset) + else: + dataset_iterator = self._dataset.make_one_shot_iterator() + + return PerDeviceDataIterator( + dataset_iterator, self._devices, self._prefetch_on_device) + + +class MapOutput(object): + """Map can result in multiple outputs per device.""" + + def __init__(self, l): + self._l = l + + def get(self): + return self._l diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py new file mode 100644 index 0000000000..5c0d4b7d6c --- /dev/null +++ b/tensorflow/contrib/distribute/python/values_test.py @@ -0,0 +1,807 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the distributed values library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.distribute.python import mirrored_strategy +from tensorflow.contrib.distribute.python import values +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import device_util +from tensorflow.python.training import saver as saver_lib + + +@test_util.with_c_api +class DistributedValuesTest(test.TestCase): + + def testGetEager(self): + with ops.device("/device:CPU:0"): + one = constant_op.constant(1) + two = constant_op.constant(2) + v = values.DistributedValues({"/device:CPU:0": one, "/device:GPU:0": two}) + self.assertEqual(two, v.get("/device:GPU:0")) + self.assertEqual(one, v.get()) + with self.assertRaises(ValueError): + self.assertIsNone(v.get("/device:GPU:2")) + + def testGetGraph(self): + with context.graph_mode(), \ + ops.Graph().as_default(), \ + ops.device("/device:CPU:0"): + one = constant_op.constant(1) + two = constant_op.constant(2) + v = values.DistributedValues({"/device:CPU:0": one, "/device:GPU:0": two}) + self.assertEqual(two, v.get("/device:GPU:0")) + self.assertEqual(one, v.get()) + with self.assertRaises(ValueError): + self.assertIsNone(v.get("/device:GPU:2")) + + def testCanonicalization(self): + canonical_cpu = ["/job:localhost/replica:0/task:0/device:CPU:0"] + v = values.DistributedValues({"": 42}) + self.assertEqual(canonical_cpu, list(v._index.keys())) + v = values.DistributedValues({"/device:CPU:0": 42}) + self.assertEqual(canonical_cpu, list(v._index.keys())) + v = values.DistributedValues({"/cpu:0": 42}) + self.assertEqual(canonical_cpu, list(v._index.keys())) + v = values.DistributedValues({"/CPU:0": 42}) + self.assertEqual(canonical_cpu, list(v._index.keys())) + with self.assertRaises(AssertionError): + v = values.DistributedValues({"/device:cpu:0": 42}) + + +@test_util.with_c_api +class DistributedDelegateTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testGetAttr(self): + with ops.device("/device:CPU:0"): + + class Foo(object): + + def __init__(self, x): + self.x = x + + v = values.DistributedDelegate( + {"/device:CPU:0": Foo(7), "/device:GPU:0": Foo(8)}) + self.assertEqual(7, v.x) + with self.assertRaises(AttributeError): + _ = v.y + + @test_util.run_in_graph_and_eager_modes() + def testOperatorOverride(self): + with ops.device("/device:CPU:0"): + v = values.DistributedDelegate({"/device:CPU:0": 7, "/device:GPU:0": 8}) + # v should act like int(7). + self.assertEqual(8, v + 1) + self.assertEqual(10, 3 + v) + self.assertEqual(14, v + v) + self.assertEqual(5, v - 2) + self.assertEqual(6, 13 - v) + self.assertEqual(0, v - v) + self.assertEqual(14, v * 2) + self.assertEqual(21, 3 * v) + self.assertEqual(49, v * v) + self.assertEqual(3.5, v / 2) + self.assertEqual(1.5, 10.5 / v) + self.assertEqual(3, v // 2) + self.assertEqual(2, 15 // v) + self.assertEqual(1, v % 2) + self.assertEqual(2, 16 % v) + self.assertTrue(v < 12) + self.assertTrue(v <= 12) + self.assertFalse(v > 12) + self.assertFalse(v >= 12) + self.assertFalse(12 < v) + self.assertFalse(12 <= v) + self.assertTrue(12 > v) + self.assertTrue(12 >= v) + self.assertEqual(3, v & 3) + self.assertEqual(3, 11 & v) + self.assertEqual(15, v | 8) + self.assertEqual(23, 16 | v) + self.assertEqual(4, v ^ 3) + self.assertEqual(12, 11 ^ v) + self.assertEqual(343, pow(v, 3)) + self.assertEqual(3, pow(v, 3, 10)) + self.assertEqual(128, pow(2, v)) + self.assertEqual(-7, -v) + self.assertEqual(~7, ~v) + self.assertEqual(7, abs(v)) + with self.assertRaises(TypeError): + _ = v[2] + + +def _device_str(d): + return "/device:GPU:" + str(d) + + +def _nested_value(d): + return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) + + +def _make_mirrored(): + v = [] + index = {} + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + index[d] = v[-1] + mirrored = values.MirroredVariable(index, v[0]) + return v, devices, mirrored + + +@test_util.with_c_api +class RegroupAndSelectDeviceTest(test.TestCase): + + def _is_per_device(self, result, expected, klass=values.PerDevice): + self.assertIsInstance(result, klass) + # We canonicalize the devices to match the device strings returned + # by PerDevice, which also does device string canonicalization. + devices = [device_util.canonicalize(_device_str(i)) + for i in range(len(expected))] + self.assertEqual(set(devices), set(result.devices)) + for i, d in enumerate(devices): + self.assertEqual(expected[i], result.get(d)) + self.assertEqual(expected[i], result.get(_device_str(i))) + + def testNested(self): + result = values.regroup({_device_str(0): _nested_value("1"), + _device_str(1): _nested_value("2")}) + self.assertIsInstance(result, tuple) + self.assertEqual(3, len(result)) + self._is_per_device(result[0], ["a1", "a2"]) + self._is_per_device(result[2], ["h1", "h2"]) + + self.assertIsInstance(result[1], list) + self.assertEqual(3, len(result[1])) + self._is_per_device(result[1][0], ["b1", "b2"]) + self._is_per_device(result[1][2], ["g1", "g2"]) + + self.assertIsInstance(result[1][1], dict) + self.assertEqual(set(["c", "e"]), set(result[1][1].keys())) + self._is_per_device(result[1][1]["c"], ["d1", "d2"]) + self._is_per_device(result[1][1]["e"], ["f1", "f2"]) + + # Also test that we can undo the merge using select_device() + self.assertEqual(_nested_value("1"), + values.select_device(_device_str(0), result)) + self.assertEqual(_nested_value("2"), + values.select_device(_device_str(1), result)) + # select_device_mirrored() should fail due to non-mirrored values + with self.assertRaises(TypeError): + values.select_device_mirrored(_device_str(0), result) + with self.assertRaises(TypeError): + values.select_device_mirrored(_device_str(1), result) + + def testWrapClass(self): + # Normally a mirrored value would be the same across devices, but + # for a test it is convenient to be able to tell the values apart. + result = values.regroup({_device_str(0): _nested_value("1"), + _device_str(1): _nested_value("2")}, + values.Mirrored) + self.assertIsInstance(result, tuple) + self.assertEqual(3, len(result)) + self._is_per_device(result[0], ["a1", "a2"], values.Mirrored) + self._is_per_device(result[2], ["h1", "h2"], values.Mirrored) + + self.assertIsInstance(result[1], list) + self.assertEqual(3, len(result[1])) + self._is_per_device(result[1][0], ["b1", "b2"], values.Mirrored) + self._is_per_device(result[1][2], ["g1", "g2"], values.Mirrored) + + self.assertIsInstance(result[1][1], dict) + self.assertEqual(set(["c", "e"]), set(result[1][1].keys())) + self._is_per_device(result[1][1]["c"], ["d1", "d2"], values.Mirrored) + self._is_per_device(result[1][1]["e"], ["f1", "f2"], values.Mirrored) + + # Also test that we can undo the merge using select_device() + self.assertEqual(_nested_value("1"), + values.select_device(_device_str(0), result)) + self.assertEqual(_nested_value("2"), + values.select_device(_device_str(1), result)) + # Values are marked as mirrored, so select_device_mirrored() is allowed. + self.assertEqual(_nested_value("1"), + values.select_device_mirrored(_device_str(0), result)) + self.assertEqual(_nested_value("2"), + values.select_device_mirrored(_device_str(1), result)) + + def testMirroredContainer(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + v, devices, mirrored = _make_mirrored() + result = values.regroup(dict(zip(devices, v))) + self.assertIs(mirrored, result) + + def testSameId(self): + foo = object() + result = values.regroup({_device_str(0): ("a", foo), + _device_str(1): ("b", foo)}) + self.assertIsInstance(result, tuple) + self.assertEqual(2, len(result)) + self._is_per_device(result[0], ["a", "b"]) + self.assertIs(foo, result[1]) + + # Test select_device(), should undo the merge done by regroup(). + result_0 = values.select_device(_device_str(0), result) + self.assertIsInstance(result_0, tuple) + self.assertEqual(2, len(result_0)) + self.assertEqual("a", result_0[0]) + self.assertIs(foo, result_0[1]) + result_1 = values.select_device(_device_str(1), result) + self.assertIsInstance(result_1, tuple) + self.assertEqual(2, len(result_1)) + self.assertEqual("b", result_1[0]) + self.assertIs(foo, result_1[1]) + + def testOneDevice(self): + result = values.regroup({_device_str(0): _nested_value("1")}) + # On one device regroup() and select_device() are basically identity. + self.assertEqual(_nested_value("1"), result) + self.assertEqual(_nested_value("1"), + values.select_device(_device_str(0), result)) + + # The one exception has to do with MirroredVariables. + d = "/device:CPU:0" + with ops.device(d): + v = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + index = {d: v} + mirrored = values.MirroredVariable(index, v) + result = values.regroup(index) + self.assertIs(mirrored, result) + + def testNamedTupleEstimatorSpec(self): + with context.graph_mode(), ops.Graph().as_default(): + created_estimator_specs = {} + to_regroup = {} + + for device_id in range(3): + spec = model_fn_lib.EstimatorSpec( + mode=model_fn_lib.ModeKeys.TRAIN, + loss=constant_op.constant(device_id / 2), + train_op=array_ops.identity(constant_op.constant(device_id))) + created_estimator_specs[device_id] = spec + to_regroup[_device_str(device_id)] = spec + + merged_estimator_spec = values.regroup(to_regroup) + + self.assertTrue( + isinstance(merged_estimator_spec, model_fn_lib.EstimatorSpec)) + self.assertEquals(model_fn_lib.ModeKeys.TRAIN, merged_estimator_spec.mode) + for device_id in range(3): + d = _device_str(device_id) + self.assertEquals(created_estimator_specs[device_id].loss, + merged_estimator_spec.loss.get(d)) + self.assertEquals(created_estimator_specs[device_id].train_op, + merged_estimator_spec.train_op.get(d)) + # Scaffold is populated by `EstimatorSpec.__new__`. + self.assertEquals(created_estimator_specs[device_id].scaffold, + merged_estimator_spec.scaffold.get(d)) + # Also test that we can undo the merge using select_device() + self.assertEquals(created_estimator_specs[device_id], + values.select_device(_device_str(device_id), + merged_estimator_spec)) + + +@test_util.with_c_api +class PerDeviceDatasetTest(test.TestCase): + + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + def _test_iterator_no_prefetch(self, devices, dataset, expected_values): + per_device_dataset = values.PerDeviceDataset( + dataset, devices, prefetch_on_device=False) + iterator = per_device_dataset.make_one_shot_iterator() + + for expected_value in expected_values: + next_element = iterator.get_next() + actual = self.evaluate([ + values.select_device(d, next_element) for d in devices]) + self.assertEqual(expected_value, actual) + + with self.assertRaises(errors.OutOfRangeError): + next_element = iterator.get_next() + self.evaluate([ + values.select_device(d, next_element) for d in devices]) + + def _test_iterator_with_prefetch(self, devices, dataset, expected_values): + if not context.executing_eagerly(): + per_device_dataset = values.PerDeviceDataset( + dataset, devices, prefetch_on_device=True) + iterator = per_device_dataset.make_one_shot_iterator() + + # With prefetching, we cannot guarantee which input ends up on which + # device, so we verify that the complete set seen on all devices is + # correct, and equal numbers are distributed to each device. + combined_actual = [] + combined_expected = [] + for expected_value in expected_values: + next_element = iterator.get_next() + combined_actual.extend(self.evaluate([ + values.select_device(d, next_element) for d in devices])) + combined_expected.extend(expected_value) + + self.assertEqual(set(combined_expected), set(combined_actual)) + + with self.assertRaises(errors.OutOfRangeError): + next_element = iterator.get_next() + self.evaluate([ + values.select_device(d, next_element) for d in devices]) + + def _test_iterator(self, devices, dataset, expected_values): + self._test_iterator_no_prefetch(devices, dataset, expected_values) + self._test_iterator_with_prefetch(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes() + def testOneDevice(self): + devices = ["/device:CPU:0"] + dataset = dataset_ops.Dataset.range(10) + + expected_values = [[i] for i in range(10)] + + self._test_iterator(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testMultipleDevices(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + devices = ["/device:CPU:0", "/device:GPU:0"] + dataset = dataset_ops.Dataset.range(10) + + expected_values = [[i, i+1] for i in range(0, 10, 2)] + + self._test_iterator(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testTupleDataset(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + devices = ["/device:CPU:0", "/device:GPU:0"] + dataset1 = dataset_ops.Dataset.range(10) + dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2) + dataset = dataset_ops.Dataset.zip((dataset1, dataset2)) + + expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)] + + self._test_iterator(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testUnevenDatasetBatches(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + devices = ["/device:CPU:0", "/device:GPU:0"] + dataset = dataset_ops.Dataset.range(11) + + expected_values = [[i, i+1] for i in range(0, 10, 2)] + self._test_iterator(devices, dataset, expected_values) + + +@test_util.with_c_api +class MirroredVariableTest(test.TestCase): + + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + @test_util.run_in_graph_and_eager_modes(config=config) + def testProperties(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + v, _, mirrored = _make_mirrored() + + self.assertEquals(v[0].name, mirrored.name) + self.assertEquals(v[0].dtype, mirrored.dtype) + self.assertEquals(v[0].shape, mirrored.shape) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testVariableOnAnotherDevice(self): + v = variable_scope.get_variable( + name="v", initializer=[1.], use_resource=True) + index = {"/job:foo/device:CPU:0": v} + mirrored = values.MirroredVariable(index, v) + + self.assertEquals(v.name, mirrored.name) + self.assertEquals(v.dtype, mirrored.dtype) + self.assertEquals(v.shape, mirrored.shape) + + def _assign_mirrored(self, devices, v, new): + for d, var, n in zip(devices, v, new): + with ops.device(d): + self.evaluate(var.assign(n)) + + def _save_return_saver(self, sess, var): + saver = saver_lib.Saver(var_list=[var]) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + return saver.save(sess, prefix), saver + + def _save(self, sess, var): + save_path, _ = self._save_return_saver(sess, var) + return save_path + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveAndRestoreMirroredOneGraph(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + with self.test_session() as sess: + v, devices, mirrored = _make_mirrored() + + # Overwrite the initial values. + self._assign_mirrored(devices, v, [3., 4.]) + + # Saves the current value of v[0], 3. + save_path, saver = self._save_return_saver(sess, mirrored) + + # Change the values between save and restore. + self._assign_mirrored(devices, v, [5., 6.]) + + # Restores the saved value of 3. to both variables. + saver.restore(sess, save_path) + self.assertEqual([3., 3.], self.evaluate([v[0], v[1]])) + + def _save_mirrored(self): + """Save variables with mirroring, returns save_path.""" + with self.test_session(graph=ops.Graph()) as sess: + v, devices, mirrored = _make_mirrored() + + # Overwrite the initial values. + self._assign_mirrored(devices, v, [3., 4.]) + + # Saves the current value of v[0], 3. + save_path = self._save(sess, mirrored) + + # Change the values between save and restore. + self._assign_mirrored(devices, v, [5., 6.]) + return save_path + + def _save_normal(self): + """Save variables without mirroring, returns save_path.""" + with self.test_session(graph=ops.Graph()) as sess: + var = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + + # Overwrite the initial value. + self.evaluate(var.assign(3.)) + + # Saves the current value of var, 3. + save_path = self._save(sess, var) + + # Change the values between save and restore. + self.evaluate(var.assign(5.)) + return save_path + + def _restore_normal(self, save_path): + """Restore to variables without mirroring in a fresh graph.""" + with self.test_session(graph=ops.Graph()) as sess: + var = variable_scope.get_variable( + name="v", initializer=7., use_resource=True) + + # Overwrite the initial value. + self.evaluate(var.assign(8.)) + + # Restores the saved value of 3. to `var`. + saver = saver_lib.Saver(var_list=[var]) + saver.restore(sess, save_path) + self.assertEqual(3., self.evaluate(var)) + + def _restore_mirrored(self, save_path): + """Restore to variables with mirroring in a fresh graph.""" + with self.test_session(graph=ops.Graph()) as sess: + v, devices, mirrored = _make_mirrored() + + # Overwrite the initial values. + self._assign_mirrored(devices, v, [7., 8.]) + + # Restores the saved value of 3. to both variables. + saver = saver_lib.Saver(var_list=[mirrored]) + saver.restore(sess, save_path) + self.assertEqual([3., 3.], self.evaluate([v[0], v[1]])) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveMirroredRestoreMirrored(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_mirrored() + self._restore_mirrored(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveMirroredRestoreNormal(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_mirrored() + self._restore_normal(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveNormalRestoreMirrored(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_normal() + self._restore_mirrored(save_path) + + +_devices = ["/device:GPU:0", "/device:CPU:0"] + + +def _make_tower_local(method): + v = [] + index = {} + for d, n, init in zip(_devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + index[d] = v[-1] + tower_local = values.TowerLocalVariable(index, v[0], method) + return v, tower_local + + +@test_util.with_c_api +class TowerLocalVariableTest(test.TestCase): + + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + @test_util.run_in_graph_and_eager_modes(config=config) + def testProperties(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + v, tower_local = _make_tower_local("sum") + + self.assertEquals(v[0].name, tower_local.name) + self.assertEquals(v[0].dtype, tower_local.dtype) + self.assertEquals(v[0].shape, tower_local.shape) + self.assertEquals("sum", tower_local.reduce_method) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testVariableOnAnotherDevice(self): + v = variable_scope.get_variable( + name="v", initializer=[1.], use_resource=True) + index = {"/job:foo/device:CPU:0": v} + tower_local = values.TowerLocalVariable(index, v, "mean") + + self.assertEquals(v.name, tower_local.name) + self.assertEquals(v.dtype, tower_local.dtype) + self.assertEquals(v.shape, tower_local.shape) + self.assertEquals("mean", tower_local.reduce_method) + + def _assign_tower_local(self, devices, v, new): + for d, var, n in zip(devices, v, new): + with ops.device(d): + self.evaluate(var.assign(n)) + + def _save_return_saver(self, sess, var): + saver = saver_lib.Saver(var_list=[var]) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + return saver.save(sess, prefix), saver + + def _save(self, sess, var): + save_path, _ = self._save_return_saver(sess, var) + return save_path + + def _dist_scope(self): + return mirrored_strategy.MirroredStrategy(_devices).scope() + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveAndRestoreTowerLocalSumOneGraph(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + with self.test_session() as sess: + v, tower_local = _make_tower_local("sum") + + # Overwrite the initial values. + self._assign_tower_local(_devices, v, [3., 4.]) + + with self._dist_scope(): + # Saves the current value of v[0] + v[1], 7. + save_path, saver = self._save_return_saver(sess, tower_local) + + # Change the values between save and restore. + self._assign_tower_local(_devices, v, [5., 6.]) + + # Restores the saved value of 7. which gets divided equally + # between the variables. + saver.restore(sess, save_path) + self.assertEqual([3.5, 3.5], self.evaluate([v[0], v[1]])) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveAndRestoreTowerLocalMeanOneGraph(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + with self.test_session() as sess: + v, tower_local = _make_tower_local("mean") + + # Overwrite the initial values. + self._assign_tower_local(_devices, v, [3., 4.]) + + with self._dist_scope(): + # Saves the current value of (v[0] + v[1])/2, 3.5. + save_path, saver = self._save_return_saver(sess, tower_local) + + # Change the values between save and restore. + self._assign_tower_local(_devices, v, [5., 6.]) + + # Restores the saved value of 3.5 to both variables. + saver.restore(sess, save_path) + self.assertEqual([3.5, 3.5], self.evaluate([v[0], v[1]])) + + def _save_tower_local_mean(self): + """Save variables with mirroring, returns save_path.""" + with self.test_session(graph=ops.Graph()) as sess: + v, tower_local = _make_tower_local("mean") + + # Overwrite the initial values. + self._assign_tower_local(_devices, v, [3., 4.]) + + with self._dist_scope(): + # Saves the current value of (v[0] + v[1])/2, 3.5 + save_path = self._save(sess, tower_local) + + # Change the values between save and restore. + self._assign_tower_local(_devices, v, [5., 6.]) + return save_path + + def _save_tower_local_sum(self): + """Save variables with mirroring, returns save_path.""" + with self.test_session(graph=ops.Graph()) as sess: + v, tower_local = _make_tower_local("sum") + + # Overwrite the initial values. + self._assign_tower_local(_devices, v, [1.5, 2.]) + + with self._dist_scope(): + # Saves the current value of v[0] + v[1], 3.5 + save_path = self._save(sess, tower_local) + + # Change the values between save and restore. + self._assign_tower_local(_devices, v, [5., 6.]) + return save_path + + def _save_normal(self): + """Save variables without mirroring, returns save_path.""" + with self.test_session(graph=ops.Graph()) as sess: + var = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + + # Overwrite the initial value. + self.evaluate(var.assign(3.5)) + + # Saves the current value of var, 3.5. + save_path = self._save(sess, var) + + # Change the values between save and restore. + self.evaluate(var.assign(5.)) + return save_path + + def _restore_normal(self, save_path): + """Restore to variables without mirroring in a fresh graph.""" + with self.test_session(graph=ops.Graph()) as sess: + var = variable_scope.get_variable( + name="v", initializer=7., use_resource=True) + + # Overwrite the initial value. + self.evaluate(var.assign(8.)) + + # Restores the saved value of 3.5 to `var`. + saver = saver_lib.Saver(var_list=[var]) + saver.restore(sess, save_path) + self.assertEqual(3.5, self.evaluate(var)) + + def _restore_tower_local_mean(self, save_path): + """Restore to variables with mirroring in a fresh graph.""" + with self.test_session(graph=ops.Graph()) as sess: + v, tower_local = _make_tower_local("mean") + + # Overwrite the initial values. + self._assign_tower_local(_devices, v, [7., 8.]) + + with self._dist_scope(): + # Restores the saved value of 3.5 to both variables. + saver = saver_lib.Saver(var_list=[tower_local]) + saver.restore(sess, save_path) + self.assertEqual([3.5, 3.5], self.evaluate([v[0], v[1]])) + + def _restore_tower_local_sum(self, save_path): + """Restore to variables with mirroring in a fresh graph.""" + with self.test_session(graph=ops.Graph()) as sess: + v, tower_local = _make_tower_local("sum") + + # Overwrite the initial values. + self._assign_tower_local(_devices, v, [7., 8.]) + + with self._dist_scope(): + # Restores the saved value of 3.5 to both variables. + saver = saver_lib.Saver(var_list=[tower_local]) + saver.restore(sess, save_path) + self.assertEqual([1.75, 1.75], self.evaluate([v[0], v[1]])) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveTowerLocalRestoreTowerLocalMean(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_tower_local_mean() + self._restore_tower_local_mean(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveTowerLocalRestoreTowerLocalSum(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_tower_local_sum() + self._restore_tower_local_sum(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveTowerLocalMeanRestoreNormal(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_tower_local_mean() + self._restore_normal(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveTowerLocalSumRestoreNormal(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_tower_local_sum() + self._restore_normal(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveNormalRestoreTowerLocalMean(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_normal() + self._restore_tower_local_mean(save_path) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testSaveNormalRestoreTowerLocalSum(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + save_path = self._save_normal() + self._restore_tower_local_sum(save_path) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/BUILD b/tensorflow/contrib/optimizer_v2/BUILD new file mode 100644 index 0000000000..26ea9135f5 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/BUILD @@ -0,0 +1,205 @@ +# Prototype of OptimizerV2. + +package( + default_visibility = ["//tensorflow:internal"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +py_library( + name = "optimizer_v2_py", + srcs = ["optimizer_v2_symbols.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + ":training", + "//tensorflow/python:util", + ], +) + +py_library( + name = "training", + srcs = [ + "adadelta.py", + "adagrad.py", + "adam.py", + "gradient_descent.py", + "momentum.py", + "optimizer_v2.py", + "rmsprop.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ], +) + +cuda_py_test( + name = "adadelta_test", + size = "medium", + srcs = ["adadelta_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:client_testlib", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:platform_test", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + +cuda_py_test( + name = "adagrad_test", + size = "small", + srcs = ["adagrad_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:platform_test", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + +cuda_py_test( + name = "adam_test", + size = "small", + srcs = ["adam_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:platform_test", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + +cuda_py_test( + name = "checkpointable_utils_test", + srcs = ["checkpointable_utils_test.py"], + additional_deps = [ + ":training", + "@six_archive//:six", + "//tensorflow/contrib/eager/python:checkpointable_utils", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:init_ops", + "//tensorflow/python:layers", + "//tensorflow/python:layers_base", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + "//tensorflow/python/keras", + ], + tags = ["notsan"], +) + +cuda_py_test( + name = "gradient_descent_test", + size = "medium", + srcs = ["gradient_descent_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:client_testlib", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:resources", + "//tensorflow/python:variables", + ], +) + +cuda_py_test( + name = "momentum_test", + size = "medium", + srcs = ["momentum_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:client_testlib", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:resources", + "//tensorflow/python:variables", + "//tensorflow/python/eager:context", + ], +) + +cuda_py_test( + name = "optimizer_v2_test", + size = "medium", + srcs = ["optimizer_v2_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:array_ops", + "//tensorflow/python:clip_ops", + "//tensorflow/python:gradients", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:variables", + ], +) + +cuda_py_test( + name = "rmsprop_test", + size = "small", + srcs = ["rmsprop_test.py"], + additional_deps = [ + ":training", + "//tensorflow/python:array_ops", + "//tensorflow/python:embedding_ops", + "//tensorflow/python:framework", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:platform_test", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) diff --git a/tensorflow/contrib/optimizer_v2/adadelta.py b/tensorflow/contrib/optimizer_v2/adadelta.py new file mode 100644 index 0000000000..b206f9f61b --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/adadelta.py @@ -0,0 +1,113 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Adadelta for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.training import training_ops + + +class AdadeltaOptimizer(optimizer_v2.OptimizerV2): + """Optimizer that implements the Adadelta algorithm. + + See [M. D. Zeiler](http://arxiv.org/abs/1212.5701) + ([pdf](http://arxiv.org/pdf/1212.5701v1.pdf)) + """ + + def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8, + use_locking=False, name="Adadelta"): + """Construct a new Adadelta optimizer. + + Some of the args below are hyperparameters, where a hyperparameter is + defined as a scalar Tensor, a regular Python value or a callable (which + will be evaluated when `apply_gradients` is called) returning a scalar + Tensor or a Python value. + + Args: + learning_rate: A float hyperparameter. The learning rate. + To match the exact form in the original paper use 1.0. + rho: A float hyperparameter. The decay rate. + epsilon: A float hyperparameter. A constant epsilon used to better + condition the grad update. + use_locking: If `True` use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "Adadelta". + """ + super(AdadeltaOptimizer, self).__init__(use_locking, name) + self._set_hyper("learning_rate", learning_rate) + self._set_hyper("rho", rho) + self._set_hyper("epsilon", epsilon) + + def _create_vars(self, var_list, state): + for v in var_list: + state.zeros_slot(v, "accum") + state.zeros_slot(v, "accum_update") + + def _apply_dense(self, grad, var, state): + accum = state.get_slot(var, "accum") + accum_update = state.get_slot(var, "accum_update") + return training_ops.apply_adadelta( + var, + accum, + accum_update, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("rho", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _resource_apply_dense(self, grad, var, state): + accum = state.get_slot(var, "accum") + accum_update = state.get_slot(var, "accum_update") + return training_ops.resource_apply_adadelta( + var.handle, + accum.handle, + accum_update.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("rho", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var, state): + accum = state.get_slot(var, "accum") + accum_update = state.get_slot(var, "accum_update") + return training_ops.sparse_apply_adadelta( + var, + accum, + accum_update, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("rho", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad.values, + grad.indices, + use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices, state): + accum = state.get_slot(var, "accum") + accum_update = state.get_slot(var, "accum_update") + return training_ops.resource_sparse_apply_adadelta( + var.handle, + accum.handle, + accum_update.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("rho", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + indices, + use_locking=self._use_locking) diff --git a/tensorflow/contrib/optimizer_v2/adadelta_test.py b/tensorflow/contrib/optimizer_v2/adadelta_test.py new file mode 100644 index 0000000000..31cfec0d50 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/adadelta_test.py @@ -0,0 +1,167 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Adadelta Optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.optimizer_v2 import adadelta +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class AdadeltaOptimizerTest(test.TestCase): + + def doTestBasic(self, use_resource=False): + num_updates = 4 # number of ADADELTA steps to perform + for dtype in [dtypes.half, dtypes.float32]: + for grad in [0.2, 0.1, 0.01]: + for lr in [1.0, 0.5, 0.1]: + with self.test_session(): + var0_init = [1.0, 2.0] + var1_init = [3.0, 4.0] + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_init, dtype=dtype) + var1 = resource_variable_ops.ResourceVariable( + var1_init, dtype=dtype) + else: + var0 = variables.Variable(var0_init, dtype=dtype) + var1 = variables.Variable(var1_init, dtype=dtype) + + grads = constant_op.constant([grad, grad], dtype=dtype) + + accum = 0.0 + accum_update = 0.0 + + # ADADELTA gradient optimizer + rho = 0.95 + epsilon = 1e-8 + adadelta_opt = adadelta.AdadeltaOptimizer(lr, rho, epsilon) + adadelta_update = adadelta_opt.apply_gradients( + zip([grads, grads], [var0, var1])) + + opt_vars = adadelta_opt.variables() + self.assertStartsWith(opt_vars[0].name, var0._shared_name) + self.assertStartsWith(opt_vars[1].name, var0._shared_name) + self.assertStartsWith(opt_vars[2].name, var1._shared_name) + self.assertStartsWith(opt_vars[3].name, var1._shared_name) + self.assertEqual(4, len(opt_vars)) + + variables.global_variables_initializer().run() + + # Assign slots + slot = [None] * 2 + slot_update = [None] * 2 + self.assertEqual(["accum", "accum_update"], + adadelta_opt.get_slot_names()) + slot[0] = adadelta_opt.get_slot(var0, "accum") + self.assertEquals(slot[0].get_shape(), var0.get_shape()) + self.assertFalse(slot[0] in variables.trainable_variables()) + + slot_update[0] = adadelta_opt.get_slot(var0, "accum_update") + self.assertEquals(slot_update[0].get_shape(), var0.get_shape()) + self.assertFalse(slot_update[0] in variables.trainable_variables()) + + slot[1] = adadelta_opt.get_slot(var1, "accum") + self.assertEquals(slot[1].get_shape(), var1.get_shape()) + self.assertFalse(slot[1] in variables.trainable_variables()) + + slot_update[1] = adadelta_opt.get_slot(var1, "accum_update") + self.assertEquals(slot_update[1].get_shape(), var1.get_shape()) + self.assertFalse(slot_update[1] in variables.trainable_variables()) + + # Fetch params to validate initial values + self.assertAllClose(var0_init, var0.eval()) + self.assertAllClose(var1_init, var1.eval()) + + update = [None] * num_updates + tot_update = 0 + for step in range(num_updates): + # Run adadelta update for comparison + adadelta_update.run() + + # Perform initial update without previous accum values + accum = accum * rho + (grad**2) * (1 - rho) + update[step] = (np.sqrt(accum_update + epsilon) * + (1. / np.sqrt(accum + epsilon)) * grad) + accum_update = (accum_update * rho + (update[step]**2) * + (1.0 - rho)) + tot_update += update[step] * lr + + # Check that the accumulators have been updated + for slot_idx in range(2): + self.assertAllCloseAccordingToType( + np.array([accum, accum], dtype=dtype.as_numpy_dtype()), + slot[slot_idx].eval(), + rtol=1e-5) + + self.assertAllCloseAccordingToType( + np.array( + [accum_update, accum_update], + dtype=dtype.as_numpy_dtype()), + slot_update[slot_idx].eval(), + rtol=1e-5) + + # Check that the parameters have been updated + self.assertAllCloseAccordingToType( + np.array( + [var0_init[0] - tot_update, var0_init[1] - tot_update], + dtype=dtype.as_numpy_dtype()), + var0.eval(), + rtol=1e-5) + + self.assertAllCloseAccordingToType( + np.array( + [var1_init[0] - tot_update, var1_init[1] - tot_update], + dtype=dtype.as_numpy_dtype()), + var1.eval(), + rtol=1e-5) + + def testBasic(self): + self.doTestBasic(use_resource=False) + + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = adadelta.AdadeltaOptimizer( + 1.0, 1.0, 1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], var0.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py new file mode 100644 index 0000000000..e54f990cca --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/adagrad.py @@ -0,0 +1,118 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Adagrad optimizer for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.training import training_ops + + +class AdagradOptimizer(optimizer_v2.OptimizerV2): + """Optimizer that implements the Adagrad algorithm. + + See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + or this + [intro](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf). + """ + + def __init__(self, learning_rate, initial_accumulator_value=0.1, + use_locking=False, name="Adagrad"): + """Construct a new Adagrad optimizer. + + The learning_rate arg below is a hyperparameter, where a hyperparameter is + defined as a scalar Tensor, a regular Python value or a callable (which + will be evaluated when `apply_gradients` is called) returning a scalar + Tensor or a Python value. + + Args: + learning_rate: A float hyperparameter. The learning rate. + initial_accumulator_value: A floating point value. + Starting value for the accumulators, must be positive. + use_locking: If `True` use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "Adagrad". + + Raises: + ValueError: If the `initial_accumulator_value` is invalid. + """ + if initial_accumulator_value <= 0.0: + raise ValueError("initial_accumulator_value must be positive: %s" % + initial_accumulator_value) + super(AdagradOptimizer, self).__init__(use_locking, name) + self._set_hyper("learning_rate", learning_rate) + + self._initial_accumulator_value = initial_accumulator_value + + def _create_vars(self, var_list, state): + for v in var_list: + with ops.colocate_with(v): + dtype = v.dtype.base_dtype + if v.get_shape().is_fully_defined(): + init = init_ops.constant_initializer(self._initial_accumulator_value, + dtype=dtype) + else: + # Use a Tensor instead of initializer if variable does not have static + # shape. + init_constant = gen_array_ops.fill( + array_ops.shape(v), self._initial_accumulator_value) + init = math_ops.cast(init_constant, dtype) + state.create_slot_with_initializer(v, init, v.get_shape(), dtype, + "accumulator") + + def _apply_dense(self, grad, var, state): + acc = state.get_slot(var, "accumulator") + return training_ops.apply_adagrad( + var, + acc, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _resource_apply_dense(self, grad, var, state): + acc = state.get_slot(var, "accumulator") + return training_ops.resource_apply_adagrad( + var.handle, + acc.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var, state): + acc = state.get_slot(var, "accumulator") + return training_ops.sparse_apply_adagrad( + var, + acc, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad.values, + grad.indices, + use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices, state): + acc = state.get_slot(var, "accumulator") + return training_ops.resource_sparse_apply_adagrad( + var.handle, + acc.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + indices, + use_locking=self._use_locking) diff --git a/tensorflow/contrib/optimizer_v2/adagrad_test.py b/tensorflow/contrib/optimizer_v2/adagrad_test.py new file mode 100644 index 0000000000..18191c3ef2 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/adagrad_test.py @@ -0,0 +1,282 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for aggregate operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.optimizer_v2 import adagrad +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class AdagradOptimizerTest(test.TestCase): + + def doTestBasic(self, use_locking=False, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + if use_resource: + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) + else: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = adagrad.AdagradOptimizer( + 3.0, initial_accumulator_value=0.1, use_locking=use_locking) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 3 steps of adagrad + for _ in range(3): + ada_update.run() + # Validate updated params + self.assertAllCloseAccordingToType( + np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([2.715679168701172, 3.715679168701172]), var1.eval()) + + def testBasic(self): + self.doTestBasic(use_locking=False) + + def testBasicResource(self): + self.doTestBasic(use_locking=False, use_resource=True) + + def testBasicLocked(self): + self.doTestBasic(use_locking=True) + + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable( + [[1.0, 2.0], [3.0, 4.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = adagrad.AdagradOptimizer(1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType( + [[1.0, 2.0], [3.0, 4.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1], [3, 4]], var0.eval(), atol=0.01) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = adagrad.AdagradOptimizer( + constant_op.constant(3.0), initial_accumulator_value=0.1) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 3 steps of adagrad + for _ in range(3): + ada_update.run() + # Validate updated params + self.assertAllCloseAccordingToType( + np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([2.715679168701172, 3.715679168701172]), var1.eval()) + + def testSparseBasic(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant( + [0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), + constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant( + [0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + ada_opt = adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1) + ada_update = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([[1.0], [2.0]], var0.eval()) + self.assertAllClose([[3.0], [4.0]], var1.eval()) + # Run 3 step of sgd + for _ in range(3): + ada_update.run() + # Validate updated params + self.assertAllCloseAccordingToType( + np.array([[-1.6026098728179932], [2.0]]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([[3.0], [3.715679168701172]]), var1.eval()) + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant( + [0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant( + [0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + repeated_update = adagrad.AdagradOptimizer(3.0).apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update = adagrad.AdagradOptimizer(3.0).apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + + def testSparseRepeatedIndicesResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var_repeated = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype) + loss_repeated = math_ops.reduce_sum( + embedding_ops.embedding_lookup(var_repeated, [0, 0])) + var_aggregated = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype) + loss_aggregated = 2 * math_ops.reduce_sum( + embedding_ops.embedding_lookup(var_aggregated, [0])) + update_op_repeated = adagrad.AdagradOptimizer( + 2.0).minimize(loss_repeated) + update_op_aggregated = adagrad.AdagradOptimizer( + 2.0).minimize(loss_aggregated) + variables.global_variables_initializer().run() + self.assertAllCloseAccordingToType( + var_repeated.eval(), var_aggregated.eval()) + for _ in range(3): + update_op_repeated.run() + update_op_aggregated.run() + self.assertAllCloseAccordingToType( + var_repeated.eval(), var_aggregated.eval()) + + def testSparseStability(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + shape = [1, 6] + var0 = variables.Variable( + [[ + 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257, + -0.0105945 + ]], + dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant( + [[ + -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05, + -8.4877e-05, -9.48906e-05 + ]], + shape=shape, + dtype=dtype), + constant_op.constant([0]), + constant_op.constant(shape)) + ada_opt = adagrad.AdagradOptimizer(1.0, initial_accumulator_value=0.1) + ada_update = ada_opt.apply_gradients(zip([grads0], [var0])) + self.assertEqual(["accumulator"], ada_opt.get_slot_names()) + slot0 = ada_opt.get_slot(var0, "accumulator") + init = variables.global_variables_initializer() + for _ in range(100): + init.run() + ada_update.run() + self.assertAllCloseAccordingToType( + np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]), slot0.eval()) + self.assertAllCloseAccordingToType( + np.array([[ + 0.00891194, -0.10712013, 0.11047515, 0.22636929, -0.0144573, + -0.01029443 + ]]), var0.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + ada_opt = adagrad.AdagradOptimizer(3.0) + # Apply the optimizer twice. Both applications will use + # the same accums. + ada_update1 = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + ada_update2 = ada_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.assertEqual(["accumulator"], ada_opt.get_slot_names()) + slot0 = ada_opt.get_slot(var0, "accumulator") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = ada_opt.get_slot(var1, "accumulator") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values. + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Mix the first and the second adagrad for 3 steps. + ada_update1.run() + ada_update2.run() + ada_update1.run() + # Validate updated params (the same as with only 1 Adagrad). + self.assertAllCloseAccordingToType( + np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([2.715679168701172, 3.715679168701172]), var1.eval()) + + def testDynamicShapeVariable_Ok(self): + with self.test_session(): + v = variable_scope.get_variable("v", initializer=constant_op.constant(1.), + validate_shape=False) + self.assertFalse(v.shape.is_fully_defined()) + # Creating optimizer should cause no exception. + adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/adam.py b/tensorflow/contrib/optimizer_v2/adam.py new file mode 100644 index 0000000000..42b7f92a76 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/adam.py @@ -0,0 +1,202 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Adam optimizer for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import training_ops + + +class AdamOptimizer(optimizer_v2.OptimizerV2): + """Optimizer that implements the Adam algorithm. + + See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). + """ + + def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, + use_locking=False, name="Adam"): + """Construct a new Adam optimizer. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + v_0 <- 0 (Initialize initial 2nd moment vector) + t <- 0 (Initialize timestep) + ``` + + The update rule for `variable` with gradient `g` uses an optimization + described at the end of section2 of the paper: + + ``` + t <- t + 1 + lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) + + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g + variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon) + ``` + + The default value of 1e-8 for epsilon might not be a good default in + general. For example, when training an Inception network on ImageNet a + current good choice is 1.0 or 0.1. Note that since AdamOptimizer uses the + formulation just before Section 2.1 of the Kingma and Ba paper rather than + the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon + hat" in the paper. + + The sparse implementation of this algorithm (used when the gradient is an + IndexedSlices object, typically because of `tf.gather` or an embedding + lookup in the forward pass) does apply momentum to variable slices even if + they were not used in the forward pass (meaning they have a gradient equal + to zero). Momentum decay (beta1) is also applied to the entire momentum + accumulator. This means that the sparse behavior is equivalent to the dense + behavior (in contrast to some momentum implementations which ignore momentum + unless a variable slice was actually used). + + Some of the args below are hyperparameters where a hyperparameter is + defined as a scalar Tensor, a regular Python value or a callable (which + will be evaluated when `apply_gradients` is called) returning a scalar + Tensor or a Python value. + + Args: + learning_rate: A float hyperparameter. The learning rate. + beta1: A float hyperparameter. The exponential decay rate for the 1st + moment estimates. + beta2: A float hyperparameter. The exponential decay rate for the 2nd + moment estimates. + epsilon: A float hyperparameter. This epsilon is "epsilon hat" in the + Kingma and Ba paper (in the formula just before Section 2.1), not the + epsilon in Algorithm 1 of the paper. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "Adam". + """ + super(AdamOptimizer, self).__init__(use_locking, name) + + self._set_hyper("learning_rate", learning_rate) + self._set_hyper("beta1", beta1) + self._set_hyper("beta2", beta2) + self._set_hyper("epsilon", epsilon) + + def _get_beta_accumulators(self, state=None): + if state is None: + state = self._get_per_graph_state() + return (state.get_non_slot("beta1_power"), + state.get_non_slot("beta2_power")) + + def _create_vars(self, var_list, state): + # Non-slot variables end up on the same device(s). + state.create_non_slot(initial_value=state.get_hyper("beta1"), + name="beta1_power") + state.create_non_slot(initial_value=state.get_hyper("beta2"), + name="beta2_power") + + # Create slots for the first and second moments. + for v in var_list: + state.zeros_slot(v, "m") + state.zeros_slot(v, "v") + + def _apply_dense(self, grad, var, state): + m = state.get_slot(var, "m") + v = state.get_slot(var, "v") + beta1_power, beta2_power = self._get_beta_accumulators(state) + return training_ops.apply_adam( + var, m, v, + math_ops.cast(beta1_power, var.dtype.base_dtype), + math_ops.cast(beta2_power, var.dtype.base_dtype), + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("beta1", var.dtype.base_dtype), + state.get_hyper("beta2", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var, state): + m = state.get_slot(var, "m") + v = state.get_slot(var, "v") + beta1_power, beta2_power = self._get_beta_accumulators(state) + return training_ops.resource_apply_adam( + var.handle, m.handle, v.handle, + math_ops.cast(beta1_power, grad.dtype.base_dtype), + math_ops.cast(beta2_power, grad.dtype.base_dtype), + state.get_hyper("learning_rate", grad.dtype.base_dtype), + state.get_hyper("beta1", grad.dtype.base_dtype), + state.get_hyper("beta2", grad.dtype.base_dtype), + state.get_hyper("epsilon", grad.dtype.base_dtype), + grad, use_locking=self._use_locking) + + def _apply_sparse_shared(self, grad, var, indices, scatter_add, state): + beta1_power, beta2_power = self._get_beta_accumulators(state) + beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype) + beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype) + lr_t = state.get_hyper("learning_rate", var.dtype.base_dtype) + beta1_t = state.get_hyper("beta1", var.dtype.base_dtype) + beta2_t = state.get_hyper("beta2", var.dtype.base_dtype) + epsilon_t = state.get_hyper("epsilon", var.dtype.base_dtype) + lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power)) + # m_t = beta1 * m + (1 - beta1) * g_t + m = state.get_slot(var, "m") + m_scaled_g_values = grad * (1 - beta1_t) + m_t = state_ops.assign(m, m * beta1_t, + use_locking=self._use_locking) + with ops.control_dependencies([m_t]): + m_t = scatter_add(m, indices, m_scaled_g_values) + # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) + v = state.get_slot(var, "v") + v_scaled_g_values = (grad * grad) * (1 - beta2_t) + v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) + with ops.control_dependencies([v_t]): + v_t = scatter_add(v, indices, v_scaled_g_values) + v_sqrt = math_ops.sqrt(v_t) + var_update = state_ops.assign_sub(var, + lr * m_t / (v_sqrt + epsilon_t), + use_locking=self._use_locking) + return control_flow_ops.group(*[var_update, m_t, v_t]) + + def _apply_sparse(self, grad, var, state): + return self._apply_sparse_shared( + grad.values, var, grad.indices, + lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking), + state) + + def _resource_scatter_add(self, x, i, v): + with ops.control_dependencies( + [resource_variable_ops.resource_scatter_add( + x.handle, i, v)]): + return x.value() + + def _resource_apply_sparse(self, grad, var, indices, state): + return self._apply_sparse_shared( + grad, var, indices, self._resource_scatter_add, state) + + def _finish(self, state): + # Update the power accumulators. + beta1_power, beta2_power = self._get_beta_accumulators(state) + update_beta1 = beta1_power.assign( + beta1_power * state.get_hyper("beta1"), + use_locking=self._use_locking) + update_beta2 = beta2_power.assign( + beta2_power * state.get_hyper("beta2"), + use_locking=self._use_locking) + return control_flow_ops.group(update_beta1, update_beta2) diff --git a/tensorflow/contrib/optimizer_v2/adam_test.py b/tensorflow/contrib/optimizer_v2/adam_test.py new file mode 100644 index 0000000000..d9ad58b0a6 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/adam_test.py @@ -0,0 +1,333 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Adam optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.optimizer_v2 import adam +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def adam_update_numpy(param, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + alpha_t = alpha * np.sqrt(1 - beta2**t) / (1 - beta1**t) + + m_t = beta1 * m + (1 - beta1) * g_t + v_t = beta2 * v + (1 - beta2) * g_t * g_t + + param_t = param - alpha_t * m_t / (np.sqrt(v_t) + epsilon) + return param_t, m_t, v_t + + +class AdamOptimizerTest(test.TestCase): + + def doTestSparse(self, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([0, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = adam.AdamOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of Adam + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + update.run() + + var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + self.doTestSparse(use_resource=False) + + def testResourceSparse(self): + self.doTestSparse(use_resource=True) + + def testSparseDevicePlacement(self): + for index_dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(force_gpu=test.is_gpu_available()): + # If a GPU is available, tests that all optimizer ops can be placed on + # it (i.e. they have GPU kernels). + var = variables.Variable([[1.0], [2.0]]) + indices = constant_op.constant([0, 1], dtype=index_dtype) + gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices)) + optimizer = adam.AdamOptimizer(3.0) + minimize_op = optimizer.minimize(gathered_sum) + variables.global_variables_initializer().run() + minimize_op.run() + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant( + [0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant( + [0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + repeated_update = adam.AdamOptimizer().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update = adam.AdamOptimizer().apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + + def doTestBasic(self, use_resource=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + with self.test_session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = adam.AdamOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + opt_variables = opt.variables() + beta1_power, beta2_power = opt._get_beta_accumulators() + self.assertTrue(beta1_power is not None) + self.assertTrue(beta2_power is not None) + self.assertIn(beta1_power, opt_variables) + self.assertIn(beta2_power, opt_variables) + + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of Adam + for t in range(1, 4): + if not context.executing_eagerly(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + self.assertAllCloseAccordingToType(0.9**(t + 1), + self.evaluate(beta1_power)) + self.assertAllCloseAccordingToType(0.999**(t + 1), + self.evaluate(beta2_power)) + + var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + if use_resource: + self.assertEqual("var0_%d/Adam:0" % (i,), + opt.get_slot(var=var0, name="m").name) + + def testBasic(self): + with self.test_session(): + self.doTestBasic(use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adam.AdamOptimizer(constant_op.constant(0.001)) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of Adam + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + update.run() + + var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adam.AdamOptimizer() + update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of intertwined Adam1 and Adam2. + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testTwoSessions(self): + optimizer = adam.AdamOptimizer() + g = ops.Graph() + with g.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + optimizer.apply_gradients([(grads0, var0)]) + + gg = ops.Graph() + with gg.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + + # If the optimizer saves any state not keyed by graph the following line + # fails. + optimizer.apply_gradients([(grads0, var0)]) + + def testSlotsUniqueEager(self): + with context.eager_mode(): + v1 = resource_variable_ops.ResourceVariable(1.) + v2 = resource_variable_ops.ResourceVariable(1.) + opt = adam.AdamOptimizer(1.) + opt.minimize(lambda: v1 + v2) + # There should be two non-slot variables, and two unique slot variables + # for v1 and v2 respectively. + self.assertEqual(6, len(set(opt.variables()))) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py new file mode 100644 index 0000000000..08f9699e85 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py @@ -0,0 +1,686 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# TODO(josh11b): Forked from contrib/eager/python to test OptimizerV2 the same way +# OptimizerV1 is tested. This file should be removed once the fork is resolved. + +import functools +import os + +import six + +from tensorflow.contrib.eager.python import checkpointable_utils +from tensorflow.contrib.optimizer_v2 import adam +from tensorflow.python.client import session as session_lib +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.layers import core +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import checkpointable +from tensorflow.python.training import saver as core_saver +from tensorflow.python.training import training_util + + +class NonLayerCheckpointable(checkpointable.Checkpointable): + + def __init__(self): + super(NonLayerCheckpointable, self).__init__() + self.a_variable = checkpointable_utils.add_variable( + self, name="a_variable", shape=[]) + + +# pylint: disable=not-callable +class MyModel(training.Model): + """A concrete Model for testing.""" + + def __init__(self): + super(MyModel, self).__init__() + self._named_dense = core.Dense(1, use_bias=True) + self._second = core.Dense(1, use_bias=False) + # We can still track Checkpointables which aren't Layers. + self._non_layer = NonLayerCheckpointable() + + def call(self, values): + ret = self._second(self._named_dense(values)) + return ret + + +class _MirroringSaveable( + core_saver.BaseSaverBuilder.ResourceVariableSaveable): + + def __init__(self, primary_variable, mirrored_variable, name): + self._primary_variable = primary_variable + self._mirrored_variable = mirrored_variable + super(_MirroringSaveable, self).__init__( + self._primary_variable, "", name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into both variables.""" + tensor, = restored_tensors + return control_flow_ops.group( + self._primary_variable.assign(tensor), + self._mirrored_variable.assign(tensor)) + + +class _OwnsMirroredVariables(checkpointable.CheckpointableBase): + """A Checkpointable object which returns a more complex SaveableObject.""" + + def __init__(self): + self.non_dep_variable = variable_scope.get_variable( + name="non_dep_variable", initializer=6., use_resource=True) + self.mirrored = variable_scope.get_variable( + name="mirrored", initializer=15., use_resource=True) + + def _gather_saveables_for_checkpoint(self): + def _saveable_factory(name=self.non_dep_variable.name): + return _MirroringSaveable( + primary_variable=self.non_dep_variable, + mirrored_variable=self.mirrored, + name=name) + return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory} + + # The Saver sorts by name before parsing, so we need a name property. + @property + def name(self): + return self.non_dep_variable.name + + +class CheckpointingTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNamingWithOptimizer(self): + input_value = constant_op.constant([[3.]]) + model = MyModel() + # A nuisance Model using the same optimizer. Its slot variables should not + # go in the checkpoint, since it is never depended on. + other_model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + optimizer_step = training_util.get_or_create_global_step() + root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step) + if context.executing_eagerly(): + optimizer.minimize( + lambda: model(input_value), + global_step=optimizer_step) + optimizer.minimize( + lambda: other_model(input_value), + global_step=optimizer_step) + else: + train_op = optimizer.minimize( + model(input_value), global_step=optimizer_step) + optimizer.minimize( + other_model(input_value), + global_step=optimizer_step) + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) + self.evaluate(train_op) + named_variables, serialized_graph = ( + checkpointable_utils._serialize_object_graph(root_checkpointable)) + expected_checkpoint_names = ( + # Created in the root node, so no prefix. + "optimizer_step", + "model/_second/kernel", + "model/_named_dense/kernel", + "model/_named_dense/bias", + # non-Layer dependency of the model + "model/_non_layer/a_variable", + # The optimizer creates two non-slot variables + "optimizer/beta1_power", + "optimizer/beta2_power", + # Slot variables + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + ) + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + expected_checkpoint_names = [ + name + suffix for name in expected_checkpoint_names] + six.assertCountEqual(self, expected_checkpoint_names, + named_variables.keys()) + # Check that we've mapped to the right variable objects (not exhaustive) + self.assertEqual( + "global_step:0", + named_variables["optimizer_step" + suffix].name) + self.assertEqual( + "my_model/dense_1/kernel:0", + named_variables["model/_second/kernel" + suffix].name) + self.assertEqual( + "my_model/dense/kernel:0", + named_variables["model/_named_dense/kernel" + suffix].name) + self.assertEqual( + "beta1_power:0", + named_variables["optimizer/beta1_power" + suffix].name) + self.assertEqual( + "beta2_power:0", + named_variables["optimizer/beta2_power" + suffix].name) + # Spot check the generated protocol buffers. + self.assertEqual("optimizer", + serialized_graph.nodes[0].children[1].local_name) + optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ + 1].node_id] + self.assertEqual("beta1_power", + optimizer_node.children[0].local_name) + self.assertEqual("beta1_power", + serialized_graph.nodes[optimizer_node.children[0].node_id] + .attributes[0].full_name) + self.assertEqual( + "my_model/dense/kernel", + serialized_graph.nodes[optimizer_node.slot_variables[0] + .original_variable_node_id] + .attributes[0].full_name) + # We strip off the :0 suffix, as variable.name-based saving does. + self.assertEqual( + "my_model/dense/kernel/Adam", + serialized_graph.nodes[optimizer_node.slot_variables[0] + .slot_variable_node_id] + .attributes[0].full_name) + self.assertEqual( + "my_model/dense/kernel/Adam:0", + optimizer.get_slot( + var=named_variables["model/_named_dense/kernel" + suffix], + name="m").name) + self.assertEqual( + "model/_named_dense/kernel" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0] + .original_variable_node_id].attributes[0].checkpoint_key) + self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) + self.assertEqual( + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0] + .slot_variable_node_id].attributes[0].checkpoint_key) + + @test_util.run_in_graph_and_eager_modes() + def testSaveRestore(self): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model) + input_value = constant_op.constant([[3.]]) + if context.executing_eagerly(): + optimizer.minimize( + lambda: model(input_value)) + else: + train_op = optimizer.minimize(model(input_value)) + # TODO(allenl): Make initialization more pleasant when graph building. + root_checkpointable.save_counter # pylint: disable=pointless-statement + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(m_bias_slot, [1.5])) + save_path = root_checkpointable.save(file_prefix=prefix) + self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3)) + optimizer_variables = self.evaluate(optimizer.variables()) + self.evaluate(state_ops.assign(m_bias_slot, [-2.])) + # Immediate restoration + status = root_checkpointable.restore(save_path=save_path).assert_consumed() + status.run_restore_ops() + self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) + self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + if not context.executing_eagerly(): + return # Restore-on-create is only supported when executing eagerly + on_create_model = MyModel() + on_create_optimizer = adam.AdamOptimizer( + 0.001, + # Preserve beta1_power and beta2_power when appying gradients so we can + # test that they've been restored correctly. + beta1=1.0, beta2=1.0) + on_create_root = checkpointable_utils.Checkpoint( + optimizer=on_create_optimizer, model=on_create_model) + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + on_create_model(constant_op.constant([[3.]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual([42.], + self.evaluate( + on_create_model._named_dense.variables[1])) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_model._named_dense.variables[1], "m") + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + self.assertAllEqual(optimizer_variables[2:], + self.evaluate(on_create_optimizer.variables())) + dummy_var = resource_variable_ops.ResourceVariable([1.]) + on_create_optimizer.minimize(loss=dummy_var.read_value) + status.assert_consumed() + beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() + self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) + self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) + + # TODO(allenl): Debug garbage created by this test in python3. + def testDeferredRestorationUsageEager(self): + """An idiomatic eager execution example.""" + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, + optimizer_step=training_util.get_or_create_global_step()) + root.restore(core_saver.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + # TODO(allenl): Use a Dataset and serialize/checkpoint it. + input_value = constant_op.constant([[3.]]) + optimizer.minimize( + lambda: model(input_value), # pylint: disable=cell-var-from-loop + global_step=root.optimizer_step) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + root.optimizer_step.numpy()) + + def testUsageGraph(self): + """Expected usage when graph building.""" + with context.graph_mode(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + input_value = constant_op.constant([[3.]]) + train_op = optimizer.minimize( + model(input_value), + global_step=root.global_step) + checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + with self.test_session(graph=ops.get_default_graph()) as session: + status = root.restore(save_path=checkpoint_path) + status.initialize_or_restore(session=session) + if checkpoint_path is None: + self.assertEqual(0, training_continuation) + with self.assertRaises(AssertionError): + status.assert_consumed() + else: + status.assert_consumed() + for _ in range(num_training_steps): + session.run(train_op) + root.save(file_prefix=checkpoint_prefix, session=session) + self.assertEqual((training_continuation + 1) * num_training_steps, + session.run(root.global_step)) + self.assertEqual(training_continuation + 1, + session.run(root.save_counter)) + + @test_util.run_in_graph_and_eager_modes() + def testAgnosticUsage(self): + """Graph/eager agnostic usage.""" + # Does create garbage when executing eagerly due to ops.Graph() creation. + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(), self.test_session( + graph=ops.get_default_graph()), test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) + + def _get_checkpoint_name(self, name): + root = checkpointable.Checkpointable() + checkpointable_utils.add_variable( + root, name=name, shape=[1, 2], dtype=dtypes.float64) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + checkpoint_name, = named_variables.keys() + with ops.name_scope("root/" + checkpoint_name): + pass # Make sure we can use this as an op name if we prefix it. + return checkpoint_name + + def testAnonymousVarsInInit(self): + + class Model(training.Model): + + def __init__(self): + super(Model, self).__init__() + self.w = resource_variable_ops.ResourceVariable(0.0) + self.b = resource_variable_ops.ResourceVariable(0.0) + self.vars = [self.w, self.b] + + def call(self, x): + return x * self.w + self.b + + with context.eager_mode(): + model = Model() + optimizer = adam.AdamOptimizer(learning_rate=0.05) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + checkpoint = checkpointable_utils.Checkpoint( + model=model, optimizer=optimizer) + for _ in range(2): + checkpoint.save(checkpoint_prefix) + with backprop.GradientTape() as tape: + loss = (constant_op.constant(1.) + - model(constant_op.constant(1.))) ** 2 + grad = tape.gradient(loss, model.vars) + optimizer.apply_gradients( + [(g, v) for g, v in zip(grad, model.vars)]) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testDeferredSlotRestoration(self): + checkpoint_directory = self.get_temp_dir() + + root = checkpointable.Checkpointable() + root.var = checkpointable_utils.add_variable( + root, name="var", initializer=0.) + optimizer = adam.AdamOptimizer(0.1) + if context.executing_eagerly(): + optimizer.minimize(root.var.read_value) + else: + train_op = optimizer.minimize(root.var) + # Note that `optimizer` has not been added as a dependency of + # `root`. Create a one-off grouping so that slot variables for `root.var` + # get initialized too. + self.evaluate(checkpointable_utils.gather_initializers( + checkpointable_utils.Checkpoint(root=root, optimizer=optimizer))) + self.evaluate(train_op) + self.evaluate(state_ops.assign(root.var, 12.)) + no_slots_path = checkpointable_utils.CheckpointableSaver(root).save( + os.path.join(checkpoint_directory, "no_slots")) + root.optimizer = optimizer + self.evaluate(state_ops.assign(root.var, 13.)) + self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), + 14.)) + slots_path = checkpointable_utils.CheckpointableSaver(root).save( + os.path.join(checkpoint_directory, "with_slots")) + new_root = checkpointable.Checkpointable() + # Load the slot-containing checkpoint (deferred), then immediately overwrite + # the non-slot variable (also deferred). + slot_status = checkpointable_utils.CheckpointableSaver( + new_root).restore(slots_path) + no_slot_status = checkpointable_utils.CheckpointableSaver( + new_root).restore(no_slots_path) + with self.assertRaises(AssertionError): + no_slot_status.assert_consumed() + new_root.var = checkpointable_utils.add_variable( + new_root, name="var", shape=[]) + no_slot_status.assert_consumed() + no_slot_status.run_restore_ops() + self.assertEqual(12., self.evaluate(new_root.var)) + new_root.optimizer = adam.AdamOptimizer(0.1) + with self.assertRaisesRegexp(AssertionError, "beta1_power"): + slot_status.assert_consumed() + self.assertEqual(12., self.evaluate(new_root.var)) + if context.executing_eagerly(): + # Slot variables are only created with restoring initializers when + # executing eagerly. + self.assertEqual(14., self.evaluate( + new_root.optimizer.get_slot(name="m", var=new_root.var))) + else: + self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var), + None) + if context.executing_eagerly(): + new_root.optimizer.minimize(new_root.var.read_value) + else: + train_op = new_root.optimizer.minimize(new_root.var) + # The slot variable now exists; restore() didn't create it, but we should + # now have a restore op for it. + slot_status.run_restore_ops() + self.assertEqual(14., self.evaluate( + new_root.optimizer.get_slot(name="m", var=new_root.var))) + self.evaluate(train_op) + slot_status.assert_consumed() + + def testManySavesGraph(self): + """Saves after the first should not modify the graph.""" + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + obj.opt = adam.AdamOptimizer(0.1) + obj.opt.minimize(obj.var.read_value()) + self.evaluate(checkpointable_utils.gather_initializers(obj)) + saver = checkpointable_utils.CheckpointableSaver(obj) + saver.save(checkpoint_prefix) + before_ops = graph.get_operations() + saver.save(checkpoint_prefix) + self.assertEqual(before_ops, graph.get_operations()) + + def testManyRestoresGraph(self): + """Restores after the first should not modify the graph.""" + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + obj.opt = adam.AdamOptimizer(0.1) + obj.opt.minimize(obj.var.read_value()) + self.evaluate(checkpointable_utils.gather_initializers(obj)) + saver = checkpointable_utils.CheckpointableSaver(obj) + save_path = saver.save(checkpoint_prefix) + saver.restore(save_path) + before_ops = graph.get_operations() + saver.restore(save_path) + self.assertEqual(before_ops, graph.get_operations()) + + def testMultipleGraphsNonSlotVariables(self): + with context.graph_mode(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer = adam.AdamOptimizer(0.001) + # Construct a model in one graph + first_graph = ops.Graph() + first_session = session_lib.Session(graph=first_graph) + with first_graph.as_default(), first_session.as_default(): + first_variable = resource_variable_ops.ResourceVariable([1.]) + first_root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, variable=first_variable) + train_op = optimizer.minimize(first_variable.read_value) + self.evaluate(checkpointable_utils.gather_initializers( + first_root_checkpointable)) + self.evaluate(train_op) + self.evaluate(first_variable.assign([1.])) + self.evaluate(optimizer.get_slot( + var=first_variable, name="m").assign([2.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.)) + + # Save and load in a second graph + second_graph = ops.Graph() + with second_graph.as_default(), session_lib.Session(graph=second_graph): + second_variable = resource_variable_ops.ResourceVariable([1.]) + second_root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, variable=second_variable) + train_op = optimizer.minimize(second_variable.read_value) + second_root_checkpointable.restore(None).initialize_or_restore() + self.evaluate(train_op) + self.evaluate(second_variable.assign([4.])) + self.evaluate(optimizer.get_slot( + var=second_variable, name="m").assign([5.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(6.)) + save_path = second_root_checkpointable.save(checkpoint_prefix) + self.evaluate(second_variable.assign([7.])) + self.evaluate(optimizer.get_slot( + var=second_variable, name="m").assign([8.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(6., self.evaluate(beta1_power)) + status = second_root_checkpointable.restore(save_path) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([4.], self.evaluate(second_variable)) + self.assertAllEqual([5.], self.evaluate(optimizer.get_slot( + var=second_variable, name="m"))) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(6., self.evaluate(beta1_power)) + + # Check that the first graph is unmolested + with first_graph.as_default(), first_session.as_default(): + self.assertAllEqual([1.], self.evaluate(first_variable)) + self.assertAllEqual([2.], self.evaluate(optimizer.get_slot( + var=first_variable, name="m"))) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(3., self.evaluate(beta1_power)) + + +class CheckpointCompatibilityTests(test.TestCase): + + def _initialized_model(self): + input_value = constant_op.constant([[3.]]) + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + optimizer_step = training_util.get_or_create_global_step() + root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, model=model, optimizer_step=optimizer_step) + train_op = optimizer.minimize( + functools.partial(model, input_value), + global_step=optimizer_step) + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) + self.evaluate(train_op) + # A regular variable, a slot variable, and a non-slot Optimizer variable + # with known values to check when loading. + self.evaluate(model._named_dense.bias.assign([1.])) + self.evaluate(optimizer.get_slot( + var=model._named_dense.bias, name="m").assign([2.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.)) + return root_checkpointable + + def _set_sentinels(self, root_checkpointable): + self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.])) + self.evaluate( + root_checkpointable.optimizer.get_slot( + var=root_checkpointable.model._named_dense.bias, name="m") + .assign([102.])) + beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(103.)) + + def _check_sentinels(self, root_checkpointable): + self.assertAllEqual( + [1.], self.evaluate(root_checkpointable.model._named_dense.bias)) + self.assertAllEqual([2.], self.evaluate( + root_checkpointable.optimizer.get_slot( + var=root_checkpointable.model._named_dense.bias, name="m"))) + beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() + self.assertAllEqual(3., self.evaluate(beta1_power)) + + def _write_name_based_checkpoint(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session( + graph=save_graph) as session: + root = self._initialized_model() + name_saver = core_saver.Saver() + return name_saver.save( + sess=session, save_path=checkpoint_prefix, + global_step=root.optimizer_step) + + @test_util.run_in_graph_and_eager_modes() + def testLoadFromNameBasedSaver(self): + """Save a name-based checkpoint, load it using the object-based API.""" + with test_util.device(use_gpu=True): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) + status = object_saver.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_consumed() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status.initialize_or_restore() + self._check_sentinels(root) + + # TODO(allenl): Test for the core name-based saver loading object-based + # checkpoints once object-based checkpointing is in core. + + def testSaveGraphLoadEager(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session( + graph=save_graph) as session: + root = self._initialized_model() + object_saver = checkpointable_utils.CheckpointableSaver(root) + save_path = object_saver.save( + session=session, file_prefix=checkpoint_prefix) + with context.eager_mode(): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed() + self._check_sentinels(root) + + def testSaveEagerLoadGraph(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.eager_mode(): + root = self._initialized_model() + object_saver = checkpointable_utils.CheckpointableSaver(root) + save_path = object_saver.save(file_prefix=checkpoint_prefix) + with context.graph_mode(): + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session( + graph=save_graph): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed().run_restore_ops() + self._check_sentinels(root) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/gradient_descent.py b/tensorflow/contrib/optimizer_v2/gradient_descent.py new file mode 100644 index 0000000000..945c8de559 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/gradient_descent.py @@ -0,0 +1,69 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""GradientDescent optimizer for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.framework import ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import training_ops + + +class GradientDescentOptimizer(optimizer_v2.OptimizerV2): + """Optimizer that implements the gradient descent algorithm.""" + + def __init__(self, learning_rate, use_locking=False, name="GradientDescent"): + """Construct a new gradient descent optimizer. + + The learning rate arg below is a hyperparameter where a hyperparameter is + defined as a scalar Tensor, a regular Python value or a callable (which + will be evaluated when `apply_gradients` is called) returning a scalar + Tensor or a Python value. + + Args: + learning_rate: A float hyperparameter. The learning rate to use. + use_locking: If True use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "GradientDescent". + """ + super(GradientDescentOptimizer, self).__init__(use_locking, name) + self._set_hyper("learning_rate", learning_rate) + + def _apply_dense(self, grad, var, state): + return training_ops.apply_gradient_descent( + var, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, handle, state): + lr = state.get_hyper("learning_rate", grad.dtype.base_dtype) + return training_ops.resource_apply_gradient_descent( + handle.handle, lr, grad, use_locking=self._use_locking) + + def _resource_apply_sparse_duplicate_indices( + self, grad, handle, indices, state): + lr = state.get_hyper("learning_rate", grad.dtype.base_dtype) + return resource_variable_ops.resource_scatter_add( + handle.handle, indices, -grad * lr) + + def _apply_sparse_duplicate_indices(self, grad, var, state): + delta = ops.IndexedSlices( + grad.values * state.get_hyper("learning_rate", var.dtype.base_dtype), + grad.indices, grad.dense_shape) + return var.scatter_sub(delta, use_locking=self._use_locking) diff --git a/tensorflow/contrib/optimizer_v2/gradient_descent_test.py b/tensorflow/contrib/optimizer_v2/gradient_descent_test.py new file mode 100644 index 0000000000..ad9aef804f --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/gradient_descent_test.py @@ -0,0 +1,223 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional test for GradientDescent optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import gradient_descent +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import resources +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class GradientDescentOptimizerTest(test.TestCase): + + def testBasic(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + optimizer = gradient_descent.GradientDescentOptimizer(3.0) + sgd_op = optimizer.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], + var0.eval()) + self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], + var1.eval()) + self.assertEqual(0, len(optimizer.variables())) + + def testBasicResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients( + zip([grads0, grads1], [var0, var1])) + # TODO(apassos) calling initialize_resources on all resources here + # doesn't work because the sessions and graph are reused across unit + # tests and this would mean trying to reinitialize variables. Figure out + # a long-term solution for this. + resources.initialize_resources([var0, var1]).run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], + var0.eval()) + self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], + var1.eval()) + + def testMinimizeResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(var0, x) + var1 + loss = pred * pred + sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss) + # TODO(apassos) calling initialize_resources on all resources here + # doesn't work because the sessions and graph are reused across unit + # tests and this would mean trying to reinitialize variables. Figure out + # a long-term solution for this. + resources.initialize_resources([var0, var1]).run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + self.assertAllCloseAccordingToType([3.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 + np_grad = 2 * np_pred + self.assertAllCloseAccordingToType( + [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval()) + self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval()) + + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + pred += var1 + loss = pred * pred + sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss) + # TODO(apassos) calling initialize_resources on all resources here + # doesn't work because the sessions and graph are reused across unit + # tests and this would mean trying to reinitialize variables. Figure out + # a long-term solution for this. + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + self.assertAllCloseAccordingToType([3.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 + np_grad = 2 * np_pred + self.assertAllCloseAccordingToType( + [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval()) + self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval()) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + lrate = constant_op.constant(3.0) + sgd_op = gradient_descent.GradientDescentOptimizer( + lrate).apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], + var0.eval()) + self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], + var1.eval()) + + def testGradWrtRef(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + opt = gradient_descent.GradientDescentOptimizer(3.0) + values = [1.0, 3.0] + vars_ = [variables.Variable([v], dtype=dtype) for v in values] + grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) + variables.global_variables_initializer().run() + for grad, _ in grads_and_vars: + self.assertAllCloseAccordingToType([1.0], grad.eval()) + + def testWithGlobalStep(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + global_step = variables.Variable(0, trainable=False) + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients( + zip([grads0, grads1], [var0, var1]), global_step=global_step) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([1.0, 2.0], var0.eval()) + self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params and global_step + self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], + var0.eval()) + self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], + var1.eval()) + self.assertAllCloseAccordingToType(1, global_step.eval()) + + def testSparseBasic(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant( + [0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), + constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant( + [0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + sgd_op = gradient_descent.GradientDescentOptimizer(3.0).apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval()) + self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], + var0.eval()) + self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], + var1.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/momentum.py b/tensorflow/contrib/optimizer_v2/momentum.py new file mode 100644 index 0000000000..0a5aadc2d1 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/momentum.py @@ -0,0 +1,124 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Momentum for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.training import training_ops + + +class MomentumOptimizer(optimizer_v2.OptimizerV2): + """Optimizer that implements the Momentum algorithm. + + Computes (if `use_nesterov = False`): + + ``` + accumulation = momentum * accumulation + gradient + variable -= learning_rate * accumulation + ``` + + Note that in the dense version of this algorithm, `accumulation` is updated + and applied regardless of a gradient's value, whereas the sparse version (when + the gradient is an `IndexedSlices`, typically because of `tf.gather` or an + embedding) only updates variable slices and corresponding `accumulation` terms + when that part of the variable was used in the forward pass. + """ + + def __init__(self, learning_rate, momentum, + use_locking=False, name="Momentum", use_nesterov=False): + """Construct a new Momentum optimizer. + + Some of the args below are hyperparameters, where a hyperparameter is + defined as a scalar Tensor, a regular Python value or a callable (which + will be evaluated when `apply_gradients` is called) returning a scalar + Tensor or a Python value. + + Args: + learning_rate: A float hyperparameter. The learning rate. + momentum: A float hyperparameter. The momentum. + use_locking: If `True` use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "Momentum". + use_nesterov: If `True` use Nesterov Momentum. + See [Sutskever et al., 2013]( + http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). + This implementation always computes gradients at the value of the + variable(s) passed to the optimizer. Using Nesterov Momentum makes the + variable(s) track the values called `theta_t + mu*v_t` in the paper. + + @compatibility(eager) + When eager execution is enabled, learning_rate and momentum can each be a + callable that takes no arguments and returns the actual value to use. This + can be useful for changing these values across different invocations of + optimizer functions. + @end_compatibility + """ + super(MomentumOptimizer, self).__init__(use_locking, name) + self._set_hyper("learning_rate", learning_rate) + self._set_hyper("momentum", momentum) + self._use_nesterov = use_nesterov + + def _create_vars(self, var_list, state): + for v in var_list: + state.zeros_slot(v, "momentum") + + def _apply_dense(self, grad, var, state): + mom = state.get_slot(var, "momentum") + return training_ops.apply_momentum( + var, + mom, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + state.get_hyper("momentum", var.dtype.base_dtype), + use_locking=self._use_locking, + use_nesterov=self._use_nesterov).op + + def _resource_apply_dense(self, grad, var, state): + mom = state.get_slot(var, "momentum") + return training_ops.resource_apply_momentum( + var.handle, + mom.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + state.get_hyper("momentum", var.dtype.base_dtype), + use_locking=self._use_locking, + use_nesterov=self._use_nesterov) + + def _apply_sparse(self, grad, var, state): + mom = state.get_slot(var, "momentum") + return training_ops.sparse_apply_momentum( + var, + mom, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad.values, + grad.indices, + state.get_hyper("momentum", var.dtype.base_dtype), + use_locking=self._use_locking, + use_nesterov=self._use_nesterov).op + + def _resource_apply_sparse(self, grad, var, indices, state): + mom = state.get_slot(var, "momentum") + return training_ops.resource_sparse_apply_momentum( + var.handle, + mom.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + grad, + indices, + state.get_hyper("momentum", var.dtype.base_dtype), + use_locking=self._use_locking, + use_nesterov=self._use_nesterov) diff --git a/tensorflow/contrib/optimizer_v2/momentum_test.py b/tensorflow/contrib/optimizer_v2/momentum_test.py new file mode 100644 index 0000000000..f37eb48181 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/momentum_test.py @@ -0,0 +1,562 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Momentum.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin + +from tensorflow.contrib.optimizer_v2 import momentum as momentum_lib +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class MomentumOptimizerTest(test.TestCase): + + def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum): + var = var + accum * lr * momentum + accum = accum * momentum + g + var = var - lr * accum + var = var - accum * lr * momentum + return var, accum + + def doTestBasic(self, use_resource=False, use_callable_params=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + [3.0, 4.0], dtype=dtype, name="var1_%d" % i) + else: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + learning_rate = lambda: 2.0 + momentum = lambda: 0.9 + if not use_callable_params: + learning_rate = learning_rate() + momentum = momentum() + mom_opt = momentum_lib.MomentumOptimizer( + learning_rate=learning_rate, momentum=momentum) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Check we have slots + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + if not context.executing_eagerly(): + self.assertFalse(slot0 in variables.trainable_variables()) + self.assertFalse(slot1 in variables.trainable_variables()) + + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate + if not context.executing_eagerly(): + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), + self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + # Step 2: the momentum accumulators contain the previous update. + if context.executing_eagerly(): + mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + else: + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ( + (0.9 * 0.01 + 0.01) * 2.0) + ]), self.evaluate(var1)) + + def testBasic(self): + with self.test_session(): + self.doTestBasic(use_resource=False) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testBasicCallableParams(self): + with context.eager_mode(): + self.doTestBasic(use_resource=True, use_callable_params=True) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testVariablesAcrossGraphs(self): + optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5) + with ops.Graph().as_default(): + var0 = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtypes.float32, name="var0") + var1 = resource_variable_ops.ResourceVariable( + [3.0, 4.0], dtype=dtypes.float32, name="var1") + if context.executing_eagerly(): + loss = lambda: math_ops.reduce_sum(var0 + var1) + else: + loss = math_ops.reduce_sum(var0 + var1) + optimizer.minimize(loss) + optimizer_variables = optimizer.variables() + self.assertStartsWith(optimizer_variables[0].name, "var0") + self.assertStartsWith(optimizer_variables[1].name, "var1") + self.assertEquals(2, len(optimizer_variables)) + + with ops.Graph().as_default(): + var2 = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtypes.float32, name="var2") + var3 = resource_variable_ops.ResourceVariable( + [3.0, 4.0], dtype=dtypes.float32, name="var3") + if context.executing_eagerly(): + loss = lambda: math_ops.reduce_sum(var2 + var3) + else: + loss = math_ops.reduce_sum(var2 + var3) + optimizer.minimize(loss) + optimizer_variables = optimizer.variables() + self.assertStartsWith(optimizer_variables[0].name, "var2") + self.assertStartsWith(optimizer_variables[1].name, "var3") + self.assertEquals(2, len(optimizer_variables)) + + def testNesterovMomentum(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + cost = 5 * var0 * var0 + 3 * var1 + global_step = variables.Variable( + array_ops.zeros([], dtypes.int64), name="global_step") + mom_op = momentum_lib.MomentumOptimizer( + learning_rate=2.0, momentum=0.9, use_nesterov=True) + opt_op = mom_op.minimize(cost, global_step, [var0, var1]) + variables.global_variables_initializer().run() + for t in range(1, 5): + opt_op.run() + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np, + accum1_np, + 3, 2.0, 0.9) + self.assertAllClose(var0_np, var0.eval()) + self.assertAllClose(var1_np, var1.eval()) + + def testSparseNesterovMomentum(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + grads = [] + for t in range(1, 5): + grads.append(var0_np * 10) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np, + accum1_np, + 3, 2.0, 0.9) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + loss = 5 * var0 * var0 + 3 * var1 + mom_op = momentum_lib.MomentumOptimizer( + learning_rate=2.0, momentum=0.9, use_nesterov=True) + x_feed = array_ops.placeholder(dtype) + y_feed = ops.IndexedSlices( + x_feed, constant_op.constant([0, 1]), constant_op.constant([2])) + grads_and_vars = [(y_feed, var0), (constant_op.constant( + [3.0, 3.0], dtype=dtype), var1)] + opt_update = mom_op.apply_gradients(grads_and_vars) + variables.global_variables_initializer().run() + for t in range(1, 5): + opt_update.run(feed_dict={x_feed: grads[t - 1]}) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np, + accum1_np, + 3, 2.0, 0.9) + self.assertAllClose(var0_np, var0.eval()) + self.assertAllClose(var1_np, var1.eval()) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + + # pylint: disable=cell-var-from-loop + def loss(): + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + return pred * pred + # pylint: enable=cell-var-from-loop + + opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0) + sgd_op = opt.minimize(loss) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testMinimizeWith2DIndiciesForEmbeddingLookup(self): + var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2])) + + def loss(): + return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]])) + + opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0) + sgd_op = opt.minimize(loss) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(sgd_op) + self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) + + def testTensorLearningRateAndMomentum(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = momentum_lib.MomentumOptimizer( + learning_rate=constant_op.constant(2.0), + momentum=constant_op.constant(0.9)) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + # Check we have slots + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + self.assertFalse(slot0 in variables.trainable_variables()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + self.assertFalse(slot1 in variables.trainable_variables()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate + mom_update.run() + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval()) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval()) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval()) + # Step 2: the momentum accumulators contain the previous update. + mom_update.run() + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval()) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ( + (0.9 * 0.01 + 0.01) * 2.0) + ]), var1.eval()) + + def _dbParamsMom01(self): + """Return dist-belief momentum values. + + Return values been generated from the dist-belief momentum unittest, + running with a learning rate of 0.1 and a momentum of 0.1. + + These values record how a parameter vector of size 10, initialized with 0.0, + gets updated with 10 consecutive momentum steps. It uses random gradients. + + Returns: + db_grad: The gradients to apply + db_out: The parameters after the momentum update. + """ + db_grad = [[]] * 10 + db_out = [[]] * 10 + # pylint: disable=line-too-long + db_grad[0] = [ + 0.00096264342, 0.17914793, 0.93945462, 0.41396621, 0.53037018, + 0.93197989, 0.78648776, 0.50036013, 0.55345792, 0.96722615 + ] + db_out[0] = [ + -9.6264346e-05, -0.017914793, -0.093945466, -0.041396622, -0.053037018, + -0.093197994, -0.078648776, -0.050036013, -0.055345792, -0.096722618 + ] + db_grad[1] = [ + 0.17075552, 0.88821375, 0.20873757, 0.25236958, 0.57578111, 0.15312378, + 0.5513742, 0.94687688, 0.16012503, 0.22159521 + ] + db_out[1] = [ + -0.017181443, -0.10852765, -0.12421377, -0.070773244, -0.11591884, + -0.11783017, -0.14165108, -0.14972731, -0.076892875, -0.1285544 + ] + db_grad[2] = [ + 0.35077485, 0.47304362, 0.44412705, 0.44368884, 0.078527533, 0.81223965, + 0.31168157, 0.43203235, 0.16792089, 0.24644311 + ] + db_out[2] = [ + -0.053967446, -0.1648933, -0.1716533, -0.1180798, -0.13005978, + -0.20151734, -0.17911947, -0.20289968, -0.095839672, -0.15638189 + ] + db_grad[3] = [ + 0.9694621, 0.75035888, 0.28171822, 0.83813518, 0.53807181, 0.3728098, + 0.81454384, 0.03848977, 0.89759839, 0.93665648 + ] + db_out[3] = [ + -0.15459226, -0.24556576, -0.20456907, -0.20662397, -0.18528105, + -0.24716705, -0.2643207, -0.21206589, -0.18749419, -0.2528303 + ] + db_grad[4] = [ + 0.38578293, 0.8536852, 0.88722926, 0.66276771, 0.13678469, 0.94036359, + 0.69107032, 0.81897682, 0.5433259, 0.67860287 + ] + db_out[4] = [ + -0.20323303, -0.33900154, -0.29658359, -0.28175515, -0.20448165, + -0.34576839, -0.34194785, -0.29488021, -0.25099224, -0.33033544 + ] + db_grad[5] = [ + 0.27885768, 0.76100707, 0.24625534, 0.81354135, 0.18959245, 0.48038563, + 0.84163809, 0.41172323, 0.83259648, 0.44941229 + ] + db_out[5] = [ + -0.23598288, -0.42444581, -0.33041057, -0.3706224, -0.22536094, + -0.40366709, -0.43387437, -0.34433398, -0.34060168, -0.38302717 + ] + db_grad[6] = [ + 0.27233034, 0.056316052, 0.5039115, 0.24105175, 0.35697976, 0.75913221, + 0.73577434, 0.16014607, 0.57500273, 0.071136251 + ] + db_out[6] = [ + -0.26649091, -0.43862185, -0.38418442, -0.40361428, -0.26314685, + -0.48537019, -0.51664448, -0.36529395, -0.40706289, -0.39540997 + ] + db_grad[7] = [ + 0.58697265, 0.2494842, 0.08106143, 0.39954534, 0.15892942, 0.12683646, + 0.74053431, 0.16033, 0.66625422, 0.73515922 + ] + db_out[7] = [ + -0.32823896, -0.46498787, -0.39766794, -0.446868, -0.28281838, + -0.50622416, -0.59897494, -0.38342294, -0.48033443, -0.47016418 + ] + db_grad[8] = [ + 0.8215279, 0.41994119, 0.95172721, 0.68000203, 0.79439718, 0.43384039, + 0.55561525, 0.22567581, 0.93331909, 0.29438227 + ] + db_out[8] = [ + -0.41656655, -0.50961858, -0.49418902, -0.51919359, -0.36422527, + -0.55169362, -0.6627695, -0.40780342, -0.58099347, -0.50707781 + ] + db_grad[9] = [ + 0.68297005, 0.67758518, 0.1748755, 0.13266537, 0.70697063, 0.055731893, + 0.68593478, 0.50580865, 0.12602448, 0.093537711 + ] + db_out[9] = [ + -0.49369633, -0.58184016, -0.52132869, -0.5396927, -0.44306302, + -0.56181377, -0.73774242, -0.46082234, -0.60366184, -0.52012295 + ] + # pylint: enable=line-too-long + return db_grad, db_out + + def testLikeDistBeliefMom01(self): + with self.test_session(): + db_grad, db_out = self._dbParamsMom01() + num_samples = len(db_grad) + var0 = variables.Variable([0.0] * num_samples) + grads0 = constant_op.constant([0.0] * num_samples) + mom_opt = momentum_lib.MomentumOptimizer(learning_rate=0.1, momentum=0.1) + mom_update = mom_opt.apply_gradients(zip([grads0], [var0])) + variables.global_variables_initializer().run() + for i in xrange(num_samples): + mom_update.run(feed_dict={grads0: db_grad[i]}) + self.assertAllClose(np.array(db_out[i]), var0.eval()) + + def testSparse(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) + var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2])) + grads0 = ops.IndexedSlices( + constant_op.constant( + [[.1, .1]], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([4, 2])) + grads1 = ops.IndexedSlices( + constant_op.constant( + [[.01, .01], [.01, .01]], dtype=dtype), + constant_op.constant([2, 3]), + constant_op.constant([4, 2])) + mom_opt = momentum_lib.MomentumOptimizer( + learning_rate=2.0, momentum=0.9) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Check we have slots + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + + # Fetch params to validate initial values + self.assertAllClose([0, 0], var0.eval()[0]) + self.assertAllClose([0, 0], var0.eval()[1]) + self.assertAllClose([1, 1], var1.eval()[2]) + + # Step 1: the momentum accumulators are 0. So we should see a normal + # update: v -= grad * learning_rate + mom_update.run() + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0, 0]), slot0.eval()[0]) + self.assertAllCloseAccordingToType(np.array([.1, .1]), slot0.eval()[1]) + self.assertAllCloseAccordingToType( + np.array([.01, .01]), slot1.eval()[2]) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType(np.array([0, 0]), var0.eval()[0]) + self.assertAllCloseAccordingToType( + np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), var0.eval()[1]) + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), var1.eval()[2]) + # Step 2: the momentum accumulators contain the previous update. + mom_update.run() + # Check that the momentum accumulators have been updated. + self.assertAllClose(np.array([0, 0]), slot0.eval()[0]) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()[1]) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + slot1.eval()[2]) + # Check that the parameters have been updated. + self.assertAllClose(np.array([0, 0]), var0.eval()[0]) + self.assertAllCloseAccordingToType( + np.array([ + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), -(0.1 * 2.0) - ( + (0.9 * 0.1 + 0.1) * 2.0) + ]), var0.eval()[1]) + self.assertAllCloseAccordingToType( + np.array([ + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), 0.98 - ( + (0.9 * 0.01 + 0.01) * 2.0) + ]), var1.eval()[2]) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = momentum_lib.MomentumOptimizer( + learning_rate=2.0, momentum=0.9) + mom_update1 = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + mom_update2 = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate + mom_update1.run() + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval()) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval()) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval()) + # Step 2: the second momentum accumulators contain the previous update. + mom_update2.run() + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval()) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ( + (0.9 * 0.01 + 0.01) * 2.0) + ]), var1.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py new file mode 100644 index 0000000000..471992fdac --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py @@ -0,0 +1,1352 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Version 2 of class Optimizer.""" +# pylint: disable=g-bad-name + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.training import checkpointable +from tensorflow.python.training import distribute as distribute_lib +from tensorflow.python.training import optimizer as optimizer_v1 +from tensorflow.python.training import slot_creator +from tensorflow.python.util import nest + + +class _OptimizableVariable(object): + """Interface for abstracting over variables in the optimizers.""" + + @abc.abstractmethod + def target(self): + """Returns the optimization target for this variable.""" + raise NotImplementedError("Calling an abstract method.") + + @abc.abstractmethod + def update_op(self, optimizer, g, *args): + """Returns the update ops for updating the variable.""" + raise NotImplementedError("Calling an abstract method.") + + +class _RefVariableProcessor(_OptimizableVariable): + """Processor for Variable.""" + + def __init__(self, v): + self._v = v + + def target(self): + return self._v._ref() # pylint: disable=protected-access + + def update_op(self, optimizer, g, *args): + if isinstance(g, ops.Tensor): + update_op = optimizer._apply_dense(g, self._v, *args) # pylint: disable=protected-access + if self._v.constraint is not None: + with ops.control_dependencies([update_op]): + return self._v.assign(self._v.constraint(self._v)) + else: + return update_op + else: + assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a " + "tensor nor IndexedSlices.") + if self._v.constraint is not None: + raise RuntimeError( + "Cannot use a constraint function on a sparse variable.") + # pylint: disable=protected-access + return optimizer._apply_sparse_duplicate_indices(g, self._v, *args) + + +class _DenseReadResourceVariableProcessor(_OptimizableVariable): + """Processor for dense ResourceVariables.""" + + def __init__(self, v): + self._v = v + + def target(self): + return self._v + + def update_op(self, optimizer, g, *args): + # pylint: disable=protected-access + update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args) + if self._v.constraint is not None: + with ops.control_dependencies([update_op]): + return self._v.assign(self._v.constraint(self._v)) + else: + return update_op + + +class _DenseResourceVariableProcessor(_OptimizableVariable): + """Processor for dense ResourceVariables.""" + + def __init__(self, v): + self._v = v + + def target(self): + return self._v + + def update_op(self, optimizer, g, *args): + # pylint: disable=protected-access + if isinstance(g, ops.IndexedSlices): + if self._v.constraint is not None: + raise RuntimeError( + "Cannot use a constraint function on a sparse variable.") + return optimizer._resource_apply_sparse_duplicate_indices( + g.values, self._v, g.indices, *args) + update_op = optimizer._resource_apply_dense(g, self._v, *args) + if self._v.constraint is not None: + with ops.control_dependencies([update_op]): + return self._v.assign(self._v.constraint(self._v)) + else: + return update_op + + +class _StreamingModelPortProcessor(_OptimizableVariable): + """Processor for streaming ModelPorts.""" + + def __init__(self, v): + self._v = v + + def target(self): + return self._v + + def update_op(self, optimizer, g, *args): + return g + + +class _TensorProcessor(_OptimizableVariable): + """Processor for ordinary Tensors. + + Even though a Tensor can't really be updated, sometimes it is useful to + compute the gradients with respect to a Tensor using the optimizer. Updating + the Tensor is, of course, unsupported. + """ + + def __init__(self, v): + self._v = v + + def target(self): + return self._v + + def update_op(self, optimizer, g, *args): + raise NotImplementedError("Trying to update a Tensor ", self._v) + + +def _get_processor(v): + """The processor of v.""" + if context.executing_eagerly(): + if isinstance(v, ops.Tensor): + return _TensorProcessor(v) + else: + return _DenseResourceVariableProcessor(v) + if v.op.type == "VarHandleOp": + return _DenseResourceVariableProcessor(v) + if isinstance(v, variables.Variable): + return _RefVariableProcessor(v) + if v.op.type == "SubmodelPort": + return _StreamingModelPortProcessor(v) + if isinstance(v, ops.Tensor): + return _TensorProcessor(v) + raise NotImplementedError("Trying to optimize unsupported type ", v) + + +def _var_key_v2(var): + """Key for representing a primary variable, for looking up slots.""" + # pylint: disable=protected-access + if hasattr(var, "_mirrored_container"): + mirrored_container = var._mirrored_container() + assert mirrored_container is not None + if context.executing_eagerly(): + return mirrored_container._unique_id + return mirrored_container._shared_name + if context.executing_eagerly(): + return var._unique_id + return var.op.name + + +def _resolve(value, name): + if callable(value): + value = value() + return ops.convert_to_tensor(value, name=name) + + +def _is_dynamic(value): + """Returns true if __init__ arg `value` should be re-evaluated each step.""" + if callable(value): return True + # Don't need to do anything special in graph mode, since dynamic values + # will propagate correctly automatically. + # TODO(josh11b): Add per-device caching across steps using variables for + # truly static values once we add distributed support. + if context.executing_eagerly() and isinstance( + value, resource_variable_ops.ResourceVariable): + return True + return False + + +class _OptimizerV2State(object): + """Holds per-graph and per-step optimizer state. + + Use _init_with_static_hyper() to create the state for a graph, and then + _copy_with_dynamic_hyper() to convert that to state for a particular step. + The difference between the two is that the former only has hyper + parameter values that are static and the latter also has values that + can change every step (according to _is_dynamic()). + """ + + def __init__(self, op_name): + self._op_name = op_name + + def _init_with_static_hyper(self, hyper): + """Initialize a fresh state object from hyper dict.""" + # self._hyper contains a dict from name to a dict with the Tensor values. + # This dict starts with a single item with key "None" with the hyper + # parameter value converted to a Tensor. Other items have dtype keys + # with that Tensor cast to that dtype. + self._hyper = {name: {None: ops.convert_to_tensor(value, name=name)} + for name, (dynamic, value) in hyper.items() if not dynamic} + self._slots = {} + self._non_slot_dict = {} + # Extra state to help Optimizers implement Checkpointable. Holds information + # about variables which will be restored as soon as they're created. + self._deferred_dependencies = {} # Non-slot variables + self._deferred_slot_restorations = {} # Slot variables + + def _copy_with_dynamic_hyper(self, hyper, distribution, non_slot_devices): + """Create a new state object for a particular step.""" + ret = _OptimizerV2State(self._op_name) + # pylint: disable=protected-access + ret._slots = self._slots + ret._non_slot_dict = self._non_slot_dict + ret._deferred_dependencies = self._deferred_dependencies + ret._deferred_slot_restorations = self._deferred_slot_restorations + ret._hyper = {name: {None: _resolve(value, name)} + for name, (dynamic, value) in hyper.items() if dynamic} + ret._hyper.update(self._hyper) + ret._non_slot_devices = non_slot_devices + ret._distribution = distribution + return ret + + def _variables(self): + """Returns a list of all variables held by self.""" + optimizer_variables = list(self._non_slot_dict.values()) + for variable_dict in self._slots.values(): + for slot_for_variable in variable_dict.values(): + optimizer_variables.append(slot_for_variable) + # Sort variables by name so that the return is deterministic. + return sorted(optimizer_variables, key=lambda v: v.name) + + def _slot_dict(self, slot_name): + """Returns a dict for caching slots created under the given name. + + Args: + slot_name: Name for the slot. + + Returns: + A dict that maps primary `Variable` objects to the slot created + for that variable, under the given slot name. + """ + named_slots = self._slots.get(slot_name, None) + if named_slots is None: + named_slots = {} + self._slots[slot_name] = named_slots + return named_slots + + def create_slot(self, var, val, slot_name, optional_op_name=None): + """Find or create a slot for a variable. + + Args: + var: A `Variable` object. + val: A `Tensor`. The initial value of the slot. + slot_name: Name for the slot. + optional_op_name: Name to use when scoping the Variable that + needs to be created for the slot. + + Returns: + A `Variable` object. + """ + named_slots = self._slot_dict(slot_name) + var_key = _var_key_v2(var) + if var_key not in named_slots: + new_slot_variable = slot_creator.create_slot( + var, val, optional_op_name or self._op_name) + self._restore_slot_variable( + slot_name=slot_name, variable=var, + slot_variable=new_slot_variable) + named_slots[var_key] = new_slot_variable + return named_slots[var_key] + + def create_slot_with_initializer(self, var, initializer, shape, dtype, + slot_name, optional_op_name=None): + """Find or create a slot for a variable, using an Initializer. + + Args: + var: A `Variable` object. + initializer: An `Initializer`. The initial value of the slot. + shape: Shape of the initial value of the slot. + dtype: Type of the value of the slot. + slot_name: Name for the slot. + optional_op_name: Name to use when scoping the Variable that + needs to be created for the slot. + + Returns: + A `Variable` object. + """ + named_slots = self._slot_dict(slot_name) + var_key = _var_key_v2(var) + if var_key not in named_slots: + new_slot_variable = slot_creator.create_slot_with_initializer( + var, initializer, shape, dtype, optional_op_name or self._op_name) + self._restore_slot_variable( + slot_name=slot_name, variable=var, + slot_variable=new_slot_variable) + named_slots[var_key] = new_slot_variable + return named_slots[var_key] + + def zeros_slot(self, var, slot_name, optional_op_name=None): + """Find or create a slot initialized with 0.0. + + Args: + var: A `Variable` object. + slot_name: Name for the slot. + optional_op_name: Name to use when scoping the Variable that + needs to be created for the slot. + + Returns: + A `Variable` object. + """ + named_slots = self._slot_dict(slot_name) + var_key = _var_key_v2(var) + if var_key not in named_slots: + new_slot_variable = slot_creator.create_zeros_slot( + var, optional_op_name or self._op_name) + self._restore_slot_variable( + slot_name=slot_name, variable=var, + slot_variable=new_slot_variable) + named_slots[var_key] = new_slot_variable + return named_slots[var_key] + + def _create_or_restore_slot_variable( + self, slot_variable_position, slot_name, variable, + optional_op_name=None): + """Restore a slot variable's value, possibly creating it. + + Called when a variable which has an associated slot variable is created or + restored. When executing eagerly, we create the slot variable with a + restoring initializer. + + No new variables are created when graph building. Instead, + _restore_slot_variable catches these after normal creation and adds restore + ops to the graph. This method is nonetheless important when graph building + for the case when a slot variable has already been created but `variable` + has just been added to a dependency graph (causing us to realize that the + slot variable needs to be restored). + + Args: + slot_variable_position: A `checkpointable._CheckpointPosition` object + indicating the slot variable `Checkpointable` object to be restored. + slot_name: The name of this `Optimizer`'s slot to restore into. + variable: The variable object this slot is being created for. + optional_op_name: Name to use when scoping the Variable that + needs to be created for the slot. + """ + slot_variable = self.get_slot(var=variable, name=slot_name) + if (slot_variable is None and context.executing_eagerly() and + slot_variable_position.is_simple_variable()): + initializer = checkpointable.CheckpointInitialValue( + checkpoint_position=slot_variable_position) + slot_variable = self.create_slot( + var=variable, + val=initializer, + slot_name=slot_name, + optional_op_name=optional_op_name) + # Optimizers do not have unconditional dependencies on their slot + # variables (nor do any other objects). They are only saved if the + # variables they were created for are also saved. + if slot_variable is not None: + # If we've either made this slot variable, or if we've pulled out an + # existing slot variable, we should restore it. + slot_variable_position.restore(slot_variable) + else: + # We didn't make the slot variable. Defer restoring until it gets created + # normally. We keep a list rather than the one with the highest restore + # UID in case slot variables have their own dependencies, in which case + # those could differ between restores. + variable_key = _var_key_v2(variable) + self._deferred_slot_restorations.setdefault( + slot_name, {}).setdefault(variable_key, []).append( + slot_variable_position) + + def get_slot(self, var, name): + """Return a slot named `name` created for `var` by the Optimizer. + + Some `Optimizer` subclasses use additional variables. For example + `Momentum` and `Adagrad` use variables to accumulate updates. This method + gives access to these `Variable` objects if for some reason you need them. + + Use `get_slot_names()` to get the list of slot names created by the + `Optimizer`. + + Args: + var: A variable passed to `minimize()` or `apply_gradients()`. + name: A string. + + Returns: + The `Variable` for the slot if it was created, `None` otherwise. + """ + named_slots = self._slots.get(name, None) + if not named_slots: + return None + return named_slots.get(_var_key_v2(var), None) + + def get_slot_names(self): + """Return a list of the names of slots created by the `Optimizer`. + + See `get_slot()`. + + Returns: + A list of strings. + """ + return sorted(self._slots.keys()) + + def create_non_slot(self, initial_value, name, colocate_with=None): + """Add an extra variable, not associated with a slot.""" + v = self._non_slot_dict.get(name, None) + if v is None: + if colocate_with is None: colocate_with = self._non_slot_devices + with self._distribution.colocate_vars_with(colocate_with): + # TODO(josh11b): Use get_variable() except for the legacy Adam use case. + v = variable_scope.variable(initial_value, name=name, trainable=False) + self._non_slot_dict[name] = v + deferred_dependencies_list = self._deferred_dependencies.pop(name, ()) + for checkpoint_position in sorted( + deferred_dependencies_list, + key=lambda restore: restore.checkpoint.restore_uid, + reverse=True): + checkpoint_position.restore(v) + return v + + def _restore_slot_variable(self, slot_name, variable, slot_variable): + """Restore a newly created slot variable's value.""" + variable_key = _var_key_v2(variable) + deferred_restorations = self._deferred_slot_restorations.get( + slot_name, {}).pop(variable_key, []) + # Iterate over restores, highest restore UID first to minimize the number + # of assignments. + deferred_restorations.sort(key=lambda position: position.restore_uid, + reverse=True) + for checkpoint_position in deferred_restorations: + checkpoint_position.restore(slot_variable) + + def get_non_slot(self, name): + """Returns the non-slot variable identified by `name`.""" + return self._non_slot_dict.get(name, None) + + def get_hyper(self, name, dtype=None): + """Returns the `name` hyper parameter, optionally cast to `dtype`.""" + dtype_dict = self._hyper[name] + # Do we have the value cast to dtype already cached? This should always + # succeed when dtype is None. + if dtype in dtype_dict: + return dtype_dict[dtype] + # Not cached, cast to dtype and save the result in the cache. + result = math_ops.cast(dtype_dict[None], dtype) + dtype_dict[dtype] = result + return result + + +class OptimizerV2(optimizer_v1.Optimizer): + """Updated base class for optimizers. + + This class defines the API to add Ops to train a model. You never use this + class directly, but instead instantiate one of its subclasses such as + `GradientDescentOptimizer`, `AdagradOptimizer`, or `MomentumOptimizer`. + + ### Usage + + ```python + # Create an optimizer with the desired parameters. + opt = GradientDescentOptimizer(learning_rate=0.1) + # Add Ops to the graph to minimize a cost by updating a list of variables. + # "cost" is a Tensor, and the list of variables contains tf.Variable + # objects. + opt_op = opt.minimize(cost, var_list=) + ``` + + In the training program you will just have to run the returned Op. + + ```python + # Execute opt_op to do one step of training: + opt_op.run() + ``` + + ### Processing gradients before applying them. + + Calling `minimize()` takes care of both computing the gradients and + applying them to the variables. If you want to process the gradients + before applying them you can instead use the optimizer in three steps: + + 1. Compute the gradients with `compute_gradients()`. + 2. Process the gradients as you wish. + 3. Apply the processed gradients with `apply_gradients()`. + + Example: + + ```python + # Create an optimizer. + opt = GradientDescentOptimizer(learning_rate=0.1) + + # Compute the gradients for a list of variables. + grads_and_vars = opt.compute_gradients(loss, ) + + # grads_and_vars is a list of tuples (gradient, variable). Do whatever you + # need to the 'gradient' part, for example cap them, etc. + capped_grads_and_vars = [(MyCapper(gv[0]), gv[1]) for gv in grads_and_vars] + + # Ask the optimizer to apply the capped gradients. + opt.apply_gradients(capped_grads_and_vars) + ``` + + ### Gating Gradients + + Both `minimize()` and `compute_gradients()` accept a `gate_gradients` + argument that controls the degree of parallelism during the application of + the gradients. + + The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`. + + `GATE_NONE`: Compute and apply gradients in parallel. This provides + the maximum parallelism in execution, at the cost of some non-reproducibility + in the results. For example the two gradients of `matmul` depend on the input + values: With `GATE_NONE` one of the gradients could be applied to one of the + inputs _before_ the other gradient is computed resulting in non-reproducible + results. + + `GATE_OP`: For each Op, make sure all gradients are computed before + they are used. This prevents race conditions for Ops that generate gradients + for multiple inputs where the gradients depend on the inputs. + + `GATE_GRAPH`: Make sure all gradients for all variables are computed + before any one of them is used. This provides the least parallelism but can + be useful if you want to process all gradients before applying any of them. + + ### Slots + + Some optimizer subclasses, such as `MomentumOptimizer` and `AdagradOptimizer` + allocate and manage additional variables associated with the variables to + train. These are called Slots. Slots have names and you can ask the + optimizer for the names of the slots that it uses. Once you have a slot name + you can ask the optimizer for the variable it created to hold the slot value. + + This can be useful if you want to log debug a training algorithm, report stats + about the slots, etc. + + ### Non-slot variables + + Some optimizer subclasses, such as `AdamOptimizer` have variables that + are not associated with the variables to train, just the step itself. + + ### Hyper parameters + + These are arguments passed to the optimizer subclass constructor + (the `__init__` method), and then passed to `self._set_hyper()`. + They can be either regular Python values (like 1.0), tensors, or + callables. If they are callable, the callable will be called during + `apply_gradients()` to get the value for the hyper parameter. + + ### State + + Internal methods apre passed a `state` argument with the correct + values to use for the slot and non-slot variables, and the hyper + parameters. + """ + + # Values for gate_gradients. + GATE_NONE = 0 + GATE_OP = 1 + GATE_GRAPH = 2 + + def __init__(self, use_locking, name): + """Create a new Optimizer. + + This must be called by the constructors of subclasses. + Note that Optimizer instances should not bind to a single graph, + and so shouldn't keep Tensors as member variables. Generally + you should be able to use the _set_hyper()/state.get_hyper() + facility instead. + + Args: + use_locking: Bool. If True apply use locks to prevent concurrent updates + to variables. + name: A non-empty string. The name to use for accumulators created + for the optimizer. + + Raises: + ValueError: If name is malformed. + RuntimeError: If _create_slots has been overridden instead of + _create_vars. + """ + # Note: We intentionally don't call parent __init__. + + # Optimizer._create_slots was replaced by _create_vars in OptimizerV2. + if (self.__class__._create_slots.__code__ is not # pylint: disable=protected-access + OptimizerV2._create_slots.__code__): + raise RuntimeError("Override _create_vars instead of _create_slots when " + "descending from OptimizerV2 (class %s)" % + self.__class__.__name__) + if not name: + raise ValueError("Must specify the optimizer name") + + self._use_locking = use_locking + self._name = name + # Map from graph_key to state for that graph. We use the graph_key + # since it works in both eager and graph mode, and gives the outer + # graph inside functions. + tower_context = distribute_lib.get_tower_context() + if tower_context is None: + # In a cross-tower context for a DistributionStrategy, which means + # only one Optimizer will be created, not one per tower. + self._per_graph_state = {} + else: + # We use get_tower_context().merge_call() to get a single dict + # shared across all model replicas when running with a + # DistributionStrategy. + self._per_graph_state = tower_context.merge_call(lambda _: {}) + + # Hyper parameters, and whether they should be re-evaluated every step. + self._hyper = {} + + def _set_hyper(self, name, value): + self._hyper[name] = (_is_dynamic(value), value) + + def minimize(self, loss, global_step=None, var_list=None, + gate_gradients=GATE_OP, aggregation_method=None, + colocate_gradients_with_ops=False, name=None, + grad_loss=None, stop_gradients=None, + scale_loss_by_num_towers=None): + """Add operations to minimize `loss` by updating `var_list`. + + This method simply combines calls `compute_gradients()` and + `apply_gradients()`. If you want to process the gradient before applying + them call `compute_gradients()` and `apply_gradients()` explicitly instead + of using this function. + + Args: + loss: A `Tensor` containing the value to minimize. + global_step: Optional `Variable` to increment by one after the + variables have been updated. + var_list: Optional list or tuple of `Variable` objects to update to + minimize `loss`. Defaults to the list of variables collected in + the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. + gate_gradients: How to gate the computation of gradients. Can be + `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class `AggregationMethod`. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + name: Optional name for the returned operation. + grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. + stop_gradients: Optional. A Tensor or list of tensors not to differentiate + through. + scale_loss_by_num_towers: Optional boolean. If true, scale the loss + down by the number of towers. By default, auto-detects whether this + is needed. + + Returns: + An Operation that updates the variables in `var_list`. If `global_step` + was not `None`, that operation also increments `global_step`. + + Raises: + ValueError: If some of the variables are not `Variable` objects. + + @compatibility(eager) + When eager execution is enabled, `loss` should be a Python function that + takes elements of `var_list` as arguments and computes the value to be + minimized. If `var_list` is None, `loss` should take no arguments. + Minimization (and gradient computation) is done with respect to the + elements of `var_list` if not None, else with respect to any trainable + variables created during the execution of the `loss` function. + `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and + `grad_loss` are ignored when eager execution is enabled. + @end_compatibility + """ + grads_and_vars = self.compute_gradients( + loss, var_list=var_list, gate_gradients=gate_gradients, + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + grad_loss=grad_loss, stop_gradients=stop_gradients, + scale_loss_by_num_towers=scale_loss_by_num_towers) + + vars_with_grad = [v for g, v in grads_and_vars if g is not None] + if not vars_with_grad: + raise ValueError( + "No gradients provided for any variable, check your graph for ops" + " that do not support gradients, between variables %s and loss %s." % + ([str(v) for _, v in grads_and_vars], loss)) + + return self.apply_gradients(grads_and_vars, global_step=global_step, + name=name) + + def compute_gradients(self, loss, var_list=None, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None, stop_gradients=None, + scale_loss_by_num_towers=None): + """Compute gradients of `loss` for the variables in `var_list`. + + This is the first part of `minimize()`. It returns a list + of (gradient, variable) pairs where "gradient" is the gradient + for "variable". Note that "gradient" can be a `Tensor`, an + `IndexedSlices`, or `None` if there is no gradient for the + given variable. + + Args: + loss: A Tensor containing the value to minimize or a callable taking + no arguments which returns the value to minimize. When eager execution + is enabled it must be a callable. + var_list: Optional list or tuple of `tf.Variable` to update to minimize + `loss`. Defaults to the list of variables collected in the graph + under the key `GraphKeys.TRAINABLE_VARIABLES`. + gate_gradients: How to gate the computation of gradients. Can be + `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class `AggregationMethod`. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. + stop_gradients: Optional. A Tensor or list of tensors not to differentiate + through. + scale_loss_by_num_towers: Optional boolean. If true, scale the loss + down by the number of towers. By default, auto-detects whether this + is needed. + + Returns: + A list of (gradient, variable) pairs. Variable is always present, but + gradient can be `None`. + + Raises: + TypeError: If `var_list` contains anything else than `Variable` objects. + ValueError: If some arguments are invalid. + RuntimeError: If called with eager execution enabled and `loss` is + not callable. + + @compatibility(eager) + When eager execution is enabled, `gate_gradients`, `aggregation_method`, + and `colocate_gradients_with_ops` are ignored. + @end_compatibility + """ + # TODO(josh11b): Test that we handle weight decay in a reasonable way. + if callable(loss): + with backprop.GradientTape() as tape: + if var_list is not None: + tape.watch(var_list) + loss_value = loss() + + # Scale loss for number of towers (callable-loss case). In this case, + # we have to be careful to call distribute_lib.get_loss_reduction() + # *after* loss() is evaluated, so we know what loss reduction it uses. + if scale_loss_by_num_towers is None: + scale_loss_by_num_towers = ( + distribute_lib.get_loss_reduction() == "mean") + if scale_loss_by_num_towers: + num_towers = distribute_lib.get_distribution_strategy().num_towers + if num_towers > 1: + loss_value *= 1. / num_towers + + if var_list is None: + var_list = tape.watched_variables() + grads = tape.gradient(loss_value, var_list, grad_loss) + return list(zip(grads, var_list)) + if context.executing_eagerly(): + raise RuntimeError( + "`loss` passed to Optimizer.compute_gradients should " + "be a function when eager execution is enabled.") + + # Scale loss for number of towers (non-callable-loss case). + if scale_loss_by_num_towers is None: + scale_loss_by_num_towers = ( + distribute_lib.get_loss_reduction() == "mean") + if scale_loss_by_num_towers: + num_towers = distribute_lib.get_distribution_strategy().num_towers + if num_towers > 1: + loss *= 1. / num_towers + + if gate_gradients not in [optimizer_v1.Optimizer.GATE_NONE, + optimizer_v1.Optimizer.GATE_OP, + optimizer_v1.Optimizer.GATE_GRAPH]: + raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " + "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % + gate_gradients) + self._assert_valid_dtypes([loss]) + if grad_loss is not None: + self._assert_valid_dtypes([grad_loss]) + if var_list is None: + var_list = ( + variables.trainable_variables() + + ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) + else: + var_list = nest.flatten(var_list) + # pylint: disable=protected-access + var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS) + # pylint: enable=protected-access + processors = [_get_processor(v) for v in var_list] + if not var_list: + raise ValueError("No variables to optimize.") + var_refs = [p.target() for p in processors] + grads = gradients.gradients( + loss, var_refs, grad_ys=grad_loss, + gate_gradients=(gate_gradients == optimizer_v1.Optimizer.GATE_OP), + aggregation_method=aggregation_method, + colocate_gradients_with_ops=colocate_gradients_with_ops, + stop_gradients=stop_gradients) + if gate_gradients == optimizer_v1.Optimizer.GATE_GRAPH: + grads = control_flow_ops.tuple(grads) + grads_and_vars = list(zip(grads, var_list)) + self._assert_valid_dtypes( + [v for g, v in grads_and_vars + if g is not None and v.dtype != dtypes.resource]) + return grads_and_vars + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Apply gradients to variables. + + This is the second part of `minimize()`. It returns an `Operation` that + applies gradients. + + Args: + grads_and_vars: List of (gradient, variable) pairs as returned by + `compute_gradients()`. + global_step: Optional `Variable` to increment by one after the + variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the `Optimizer` constructor. + + Returns: + An `Operation` that applies the specified gradients. If `global_step` + was not None, that operation also increments `global_step`. + + Raises: + TypeError: If `grads_and_vars` is malformed. + ValueError: If none of the variables have gradients. + """ + # This is a default implementation of apply_gradients() that can be shared + # by most optimizers. It relies on the subclass implementing the following + # methods: _create_vars(), _prepare(), _apply_dense(), and _apply_sparse(). + + # Filter out variables with gradients of `None`. + grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. + if not grads_and_vars: + raise ValueError("No variables provided.") + filtered = tuple((g, v) for (g, v) in grads_and_vars if g is not None) + if not filtered: + raise ValueError("No gradients provided for any variable: %s." % + ([str(v) for _, v in grads_and_vars],)) + return distribute_lib.get_tower_context().merge_call( + self.distributed_apply, filtered, global_step=global_step, name=name) + + def _get_or_create_state(self, var_list=None): + """Either looks up or creates `_OptimizerV2State`. + + If any variables are available, they should be passed via the `var_list` + argument, and these will be used to determine the graph to create/retrieve + state for. Otherwise the returned state is for the current default graph. + + Args: + var_list: A list of variables to extract a graph from. + + Returns: + An `_OptimizerV2State` object. + """ + # Determine the graph_key from the current graph. + eager_execution = context.executing_eagerly() + if eager_execution or var_list is None: + graph = ops.get_default_graph() + else: + graph = ops._get_graph_from_inputs(var_list) # pylint: disable=protected-access + assert graph is not None + graph_key = graph._graph_key # pylint: disable=protected-access + + # Get the per graph state by looking up the graph_key. + if graph_key in self._per_graph_state: + per_graph_state = self._per_graph_state[graph_key] + else: + per_graph_state = _OptimizerV2State(self._name) + per_graph_state._init_with_static_hyper(self._hyper) # pylint: disable=protected-access + self._per_graph_state[graph_key] = per_graph_state + return per_graph_state + + def distributed_apply(self, distribution, grads_and_vars, global_step, name): + """`apply_gradients` for use with a `DistributionStrategy`.""" + reduced_grads = distribution.batch_reduce("sum", grads_and_vars) + var_list = [v for _, v in grads_and_vars] + grads_and_vars = zip(reduced_grads, var_list) + + unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)] + eager_execution = context.executing_eagerly() + if eager_execution: + # Give a clear error in this case instead of "name not supported + # for Eager Tensors" when we compute non_slot_devices. + for v in unwrapped_var_list: + if isinstance(v, ops.Tensor): + raise NotImplementedError("Trying to update a Tensor ", v) + + with ops.name_scope(name, self._name) as name: + per_graph_state = self._get_or_create_state(var_list=unwrapped_var_list) + # Include the current value of any dynamic hyper parameters in `state`. + non_slot_devices = distribution.non_slot_devices(var_list) + state = per_graph_state._copy_with_dynamic_hyper( # pylint: disable=protected-access + self._hyper, distribution, non_slot_devices) + + # Create any slot and non-slot variables we need in `state`. + with ops.init_scope(): + self._create_vars(var_list, state) + + with ops.name_scope(name): # Re-enter name_scope created above + # Give the child class a chance to do something before we start + # applying gradients. + self._prepare(state) + + def update(v, g): + """Update variable `v` using gradient `g`.""" + assert v is not None + + # Convert the grad to Tensor or IndexedSlices if necessary, and + # look up a processor for each variable's type. + try: + g = ops.convert_to_tensor_or_indexed_slices(g) + except TypeError: + raise TypeError( + "Gradient must be convertible to a Tensor" + " or IndexedSlices, or None: %s" % g) + if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): + raise TypeError( + "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) + processor = _get_processor(v) + + # We colocate all ops created in _apply_dense or _apply_sparse + # on the same device as the variable. + # TODO(apassos): figure out how to get the variable name here. + scope_name = "" if eager_execution else v.op.name + # device_policy is set because non-mirrored tensors will be read in + # `update_op`. + # TODO(josh11b): Make different state objects for each device to + # avoid needing to set the device_policy. + with ops.name_scope("update_" + scope_name), \ + context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): + return processor.update_op(self, g, state) + + # Use the processors to update the variables. + update_ops = [] + for grad, var in grads_and_vars: + update_ops.extend(distribution.unwrap(distribution.update( + var, update, grad))) + + # Give the child class a chance to do something after applying + # gradients + def finish(): + # TODO(josh11b): Make different state objects for each device to + # avoid needing to set the device_policy. + with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): + return self._finish(state) + + update_ops = control_flow_ops.group(update_ops) + with ops.control_dependencies([update_ops]): + finish_updates = distribution.update_non_slot(non_slot_devices, finish) + if finish_updates is None: + finish_updates = update_ops + + # Update `global_step` (if any). + if global_step is None: + apply_updates = distribution.group(finish_updates, name=name) + else: + with ops.control_dependencies(distribution.unwrap(finish_updates)): + + def update_global_step(global_step): + if isinstance(global_step, resource_variable_ops.ResourceVariable): + return global_step.assign_add( + ops.convert_to_tensor(1, dtype=global_step.dtype), + read_value=False) + else: + return state_ops.assign_add(global_step, 1) + + apply_updates = distribution.group( + distribution.update(global_step, update_global_step), name=name) + + # Add the training op to the TRAIN_OP graph collection in graph mode. + if not eager_execution: + if isinstance(apply_updates, ops.Tensor): + apply_updates = apply_updates.op + train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + if apply_updates not in train_op: + train_op.append(apply_updates) + + return apply_updates + + def get_slot(self, var, name): + """Return a slot named `name` created for `var` by the Optimizer. + + Some `Optimizer` subclasses use additional variables. For example + `Momentum` and `Adagrad` use variables to accumulate updates. This method + gives access to these `Variable` objects if for some reason you need them. + + Use `get_slot_names()` to get the list of slot names created by the + `Optimizer`. + + Args: + var: A variable passed to `minimize()` or `apply_gradients()`. + name: A string. + + Returns: + The `Variable` for the slot if it was created, `None` otherwise. + """ + state = self._get_state_for_var(var) + return state.get_slot(var, name) if state is not None else None + + def get_slot_names(self): + """Return a list of the names of slots created by the `Optimizer`. + + See `get_slot()`. + + Returns: + A list of strings. + """ + state = self._get_per_graph_state() + return state.get_slot_names() if state is not None else [] + + def variables(self): + """A list of variables which encode the current state of `Optimizer`. + + Includes slot variables and additional global variables created by the + optimizer in the current default graph. + + Returns: + A list of variables. + """ + state = self._get_per_graph_state() + return state._variables() if state is not None else [] # pylint: disable=protected-access + + # -------------- + # Methods to be implemented by subclasses if they want to use the + # inherited implementation of apply_gradients() or compute_gradients(). + # -------------- + def _create_vars(self, var_list, state): + """Create all slots needed by the variables and any non-slot variables. + + Args: + var_list: A list of `Variable` objects. + state: An object with these methods: + `create_slot(var, val, slot_name, optional_op_name)`, + `create_slot_with_initializer(` + `var, initializer, shape, dtype, slot_name, optional_op_name)`, + `zeros_slot(var, slot_name, optional_op_name)`, + `create_non_slot_variable(initial_value, name, colocate_with)`, + `get_hyper(name)` + """ + # No slots needed by default + pass + + def _prepare(self, state): + """Code to execute before applying gradients. + + Note that most uses of _prepare() in Optimizer have been subsumed + by explicit support for hyper parameters in OptimizerV2 + + Args: + state: An object with a `get_hyper(name)` method. + + Returns: + Return value will be ignored. + """ + pass + + def _apply_dense(self, grad, var, state): + """Add ops to apply dense gradients to `var`. + + Args: + grad: A `Tensor`. + var: A `Variable` object. + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + An `Operation`. + """ + raise NotImplementedError() + + def _resource_apply_dense(self, grad, handle, state): + """Add ops to apply dense gradients to the variable `handle`. + + Args: + grad: a `Tensor` representing the gradient. + handle: a `Tensor` of dtype `resource` which points to the variable + to be updated. + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + An `Operation` which updates the value of the variable. + """ + raise NotImplementedError() + + def _resource_apply_sparse_duplicate_indices( + self, grad, handle, indices, state): + """Add ops to apply sparse gradients to `handle`, with repeated indices. + + Optimizers which override this method must deal with repeated indices. See + the docstring of `_apply_sparse_duplicate_indices` for details. By default + the correct behavior, to sum non-unique indices and their associated + gradients, is enforced by first pre-processing `grad` and `indices` and + passing them on to `_resource_apply_sparse`. Optimizers which deal correctly + with duplicate indices may instead override this method to avoid the + overhead of summing. + + Args: + grad: a `Tensor` representing the gradient for the affected indices. + handle: a `Tensor` of dtype `resource` which points to the variable + to be updated. + indices: a `Tensor` of integral type representing the indices for + which the gradient is nonzero. Indices may be repeated. + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + An `Operation` which updates the value of the variable. + """ + # pylint: disable=protected-access + summed_grad, unique_indices = optimizer_v1._deduplicate_indexed_slices( + values=grad, indices=indices) + # pylint: enable=protected-access + return self._resource_apply_sparse( + summed_grad, handle, unique_indices, state) + + def _resource_apply_sparse(self, grad, handle, indices, state): + """Add ops to apply sparse gradients to the variable `handle`. + + Similar to `_apply_sparse`, the `indices` argument to this method has been + de-duplicated. Optimizers which deal correctly with non-unique indices may + instead override `_resource_apply_sparse_duplicate_indices` to avoid this + overhead. + + Args: + grad: a `Tensor` representing the gradient for the affected indices. + handle: a `Tensor` of dtype `resource` which points to the variable + to be updated. + indices: a `Tensor` of integral type representing the indices for + which the gradient is nonzero. Indices are unique. + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + An `Operation` which updates the value of the variable. + """ + raise NotImplementedError() + + def _apply_sparse_duplicate_indices(self, grad, var, state): + """Add ops to apply sparse gradients to `var`, with repeated sparse indices. + + Optimizers which override this method must deal with IndexedSlices objects + such as the following: + + IndexedSlicesValue(values=[1, 1], indices=[0, 0], dense_shape=[1]) + + The correct interpretation is: + + IndexedSlicesValue(values=[2], indices=[0], dense_shape=[1]) + + Many optimizers deal incorrectly with repeated indices when updating based + on sparse gradients (e.g. summing squares rather than squaring the sum, or + applying momentum terms multiple times). Adding first is always the correct + behavior, so this is enforced here by reconstructing the IndexedSlices to + have only unique indices, then calling _apply_sparse. + + Optimizers which deal correctly with repeated indices may instead override + this method to avoid the overhead of summing indices. + + Args: + grad: `IndexedSlices`. + var: A `Variable` object. + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + An `Operation`. + """ + # pylint: disable=protected-access + summed_values, unique_indices = optimizer_v1._deduplicate_indexed_slices( + values=grad.values, indices=grad.indices) + # pylint: enable=protected-access + gradient_no_duplicate_indices = ops.IndexedSlices( + indices=unique_indices, + values=summed_values, + dense_shape=grad.dense_shape) + return self._apply_sparse(gradient_no_duplicate_indices, var, state) + + def _apply_sparse(self, grad, var, state): + """Add ops to apply sparse gradients to `var`. + + The IndexedSlices object passed to `grad` in this function is by default + pre-processed in `_apply_sparse_duplicate_indices` to remove duplicate + indices (see its docstring for details). Optimizers which can tolerate or + have correct special cases for duplicate sparse indices may override + `_apply_sparse_duplicate_indices` instead of this function, avoiding that + overhead. + + Args: + grad: `IndexedSlices`, with no repeated indices. + var: A `Variable` object. + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + An `Operation`. + """ + raise NotImplementedError() + + def _finish(self, state): + """Do what is needed to finish the update. + + This is called inside a scope colocated with any non-slot variables. + + Args: + state: An object with `get_slot(var, name)`, `get_non_slot(self, name)`, + and `get_hyper(name)` methods. + + Returns: + The operation to apply updates, or None if no updates. + """ + return None + + # -------------- + # Utility methods for subclasses. + # -------------- + def _get_per_graph_state(self): + # pylint: disable=protected-access + return self._per_graph_state.get(ops.get_default_graph()._graph_key, None) + + def _get_state_for_var(self, var): + # pylint: disable=protected-access + return self._per_graph_state.get(var._graph_key, None) + + # -------------- + # Overridden methods from Checkpointable. + # -------------- + + def _track_checkpointable(self, *args, **kwargs): + """Optimizers may not track dependencies. Raises an error.""" + raise NotImplementedError( + "Optimizers may not have dependencies. File a feature request if this " + "limitation bothers you.") + + @property + def _checkpoint_dependencies(self): + """From Checkpointable. Gather graph-specific non-slot variables to save.""" + current_graph_non_slot_variables = [] + state = self._get_per_graph_state() + if state is not None: + for name, variable_object in sorted( + state._non_slot_dict.items(), # pylint: disable=protected-access + # Avoid comparing variables + key=lambda item: item[0]): + current_graph_non_slot_variables.append( + checkpointable.CheckpointableReference( + name=name, ref=variable_object)) + # Note: ignores super(); Optimizers may not have any dependencies outside of + # state objects. + return current_graph_non_slot_variables + + def _lookup_dependency(self, name): + """From Checkpointable. Find a non-slot variable in the current graph.""" + state = self._get_per_graph_state() + if state is None: + return None + else: + return state.get_non_slot(name) + + @property + def _deferred_dependencies(self): + """Lets Checkpointable know where non-slot variables are created. + + If necessary, creates a new state object for the current default graph. + Checkpointable will then add entries to that state's deferred dependency + dictionary. The state object will check that dictionary when creating + non-slot variables, restoring their value if an entry is found. + + Returns: + A dictionary which holds deferred dependencies for the current default + graph. + """ + state = self._get_or_create_state() + return state._deferred_dependencies # pylint: disable=protected-access + + def _create_or_restore_slot_variable( + self, slot_variable_position, slot_name, variable): + """Checkpointable: Restore a slot variable's value, possibly creating it. + + Called when a variable which has an associated slot variable is created or + restored. + + Args: + slot_variable_position: A `checkpointable._CheckpointPosition` object + indicating the slot variable `Checkpointable` object to be restored. + slot_name: The name of this `Optimizer`'s slot to restore into. + variable: The variable object this slot is being created for. + """ + state = self._get_or_create_state(var_list=[variable]) + state._create_or_restore_slot_variable( # pylint: disable=protected-access + slot_variable_position=slot_variable_position, + slot_name=slot_name, + variable=variable, + optional_op_name=self._name) + + # -------------- + # Unsupported parent methods + # -------------- + def _slot_dict(self, slot_name): + raise NotImplementedError( + "_slot_dict() method unsupported in OptimizerV2") + + def _get_or_make_slot(self, var, val, slot_name, op_name): + raise NotImplementedError( + "_get_or_make_slot() method unsupported in OptimizerV2") + + def _get_or_make_slot_with_initializer(self, var, initializer, shape, dtype, + slot_name, op_name): + raise NotImplementedError( + "_get_or_make_slot_with_initializer() method unsupported in " + "OptimizerV2") + + def _create_non_slot_variable(self, initial_value, name, colocate_with): + raise NotImplementedError( + "_create_non_slot_variable() method unsupported in OptimizerV2") + + def _get_non_slot_variable(self, name, graph=None): + raise NotImplementedError( + "_get_non_slot_variable() method unsupported in OptimizerV2") + + def _non_slot_variables(self): + raise NotImplementedError( + "_non_slot_variables() method unsupported in OptimizerV2") diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2_symbols.py b/tensorflow/contrib/optimizer_v2/optimizer_v2_symbols.py new file mode 100644 index 0000000000..24eada06cc --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/optimizer_v2_symbols.py @@ -0,0 +1,42 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Distribution-aware version of Optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from tensorflow.contrib.optimizer_v2.adadelta import AdadeltaOptimizer +from tensorflow.contrib.optimizer_v2.adagrad import AdagradOptimizer +from tensorflow.contrib.optimizer_v2.adam import AdamOptimizer +from tensorflow.contrib.optimizer_v2.gradient_descent import GradientDescentOptimizer +from tensorflow.contrib.optimizer_v2.momentum import MomentumOptimizer +from tensorflow.contrib.optimizer_v2.optimizer_v2 import OptimizerV2 +from tensorflow.contrib.optimizer_v2.rmsprop import RMSPropOptimizer + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + 'AdadeltaOptimizer', + 'AdagradOptimizer', + 'AdamOptimizer', + 'GradientDescentOptimizer', + 'MomentumOptimizer', + 'OptimizerV2', + 'RMSPropOptimizer', +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py b/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py new file mode 100644 index 0000000000..8599af32f6 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py @@ -0,0 +1,294 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional test for OptimizerV2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import gradient_descent +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +class OptimizerTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testBasic(self): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + # Note that we name the variables uniquely here since the variables don't + # seem to be getting deleted at the end of the loop. + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, + name='a_%d' % i) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, + name='b_%d' % i) + def loss(): + return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop + # Note that for eager execution, minimize expects a function instead of a + # Tensor. + global_step = resource_variable_ops.ResourceVariable( + array_ops.zeros([], dtypes.int64), name='global_step_%d' % i) + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Run 1 step of sgd through optimizer + opt_op = sgd_op.minimize(loss, global_step, [var0, var1]) + self.evaluate(opt_op) + # Validate updated params + self.assertAllClose([-14., -13.], self.evaluate(var0)) + self.assertAllClose([-6., -5.], self.evaluate(var1)) + + def testAggregationMethod(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + cost = 5 * var0 + 3 * var1 + global_step = variables.Variable( + array_ops.zeros([], dtypes.int64), name='global_step') + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + opt_op = sgd_op.minimize( + cost, + global_step, [var0, var1], + aggregation_method=gradients_impl.AggregationMethod. + EXPERIMENTAL_ACCUMULATE_N) + + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 1 step of sgd through optimizer + opt_op.run() + # Validate updated params + self.assertAllClose([-14., -13.], var0.eval()) + self.assertAllClose([-6., -5.], var1.eval()) + + def testPrecomputedGradient(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + cost = 5 * var0 + 3 * var1 + grad_loss = constant_op.constant([42, -42], dtype=dtype) + global_step = variables.Variable( + array_ops.zeros([], dtypes.int64), name='global_step') + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + opt_op = sgd_op.minimize( + cost, global_step, [var0, var1], grad_loss=grad_loss) + + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 1 step of sgd through optimizer + opt_op.run() + # Validate updated params + self.assertAllClose([1.0 - 3 * 5 * 42.0, 2.0 - 3 * 5 * (-42.0)], + var0.eval()) + self.assertAllClose([3.0 - 3 * 3 * 42.0, 4.0 - 3 * 3 * (-42.0)], + var1.eval()) + + @test_util.run_in_graph_and_eager_modes() + def testNoVariables(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + # pylint: disable=cell-var-from-loop + def loss(): + var0 = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype, trainable=False, name='a') + var1 = resource_variable_ops.ResourceVariable( + [3.0, 4.0], dtype=dtype, trainable=False, name='b') + return 5 * var0 + var1 + # pylint: enable=cell-var-from-loop + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + with self.assertRaisesRegexp(ValueError, 'No.*variables'): + sgd_op.minimize(loss) + + @test_util.run_in_graph_and_eager_modes() + def testNoGradients(self): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + # Note that we name the variables uniquely here since the variables don't + # seem to be getting deleted at the end of the loop. + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, + name='a%d' % i) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, + name='b%d' % i) + # pylint: disable=cell-var-from-loop + def loss(): + return 5 * var0 + # pylint: enable=cell-var-from-loop + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + with self.assertRaisesRegexp(ValueError, 'No gradients'): + # var1 has no gradient + sgd_op.minimize(loss, var_list=[var1]) + + @test_util.run_in_graph_and_eager_modes() + def testNoGradientsForAnyVariables_Minimize(self): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + # Note that we name the variables uniquely here since the variables don't + # seem to be getting deleted at the end of the loop. + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, + name='a_%d' % i) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, + name='b_%d' % i) + def loss(): + return constant_op.constant(5.0) + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + with self.assertRaisesRegexp(ValueError, + 'No gradients provided for any variable'): + sgd_op.minimize(loss, var_list=[var0, var1]) + + @test_util.run_in_graph_and_eager_modes() + def testNoGradientsForAnyVariables_ApplyGradients(self): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + # Note that we name the variables uniquely here since the variables don't + # seem to be getting deleted at the end of the loop. + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, + name='a_%d' % i) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, + name='b_%d' % i) + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + with self.assertRaisesRegexp(ValueError, + 'No gradients provided for any variable'): + sgd_op.apply_gradients([(None, var0), (None, var1)]) + + @test_util.run_in_graph_and_eager_modes() + def testGradientsAsVariables(self): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + # Note that we name the variables uniquely here since the variables don't + # seem to be getting deleted at the end of the loop. + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype, + name='a%d' % i) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype, + name='b%d' % i) + def loss(): + return 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + grads_and_vars = sgd_op.compute_gradients(loss, [var0, var1]) + # Convert gradients to tf.Variables + converted_grads = [ + resource_variable_ops.ResourceVariable(array_ops.zeros([2], dtype), + name='c_%d_%d' % (i, j)) + for j, gv in enumerate(grads_and_vars) + ] + convert_ops = [ + state_ops.assign(converted_grads[j], gv[0]) + for j, gv in enumerate(grads_and_vars) + ] + + self.evaluate(variables.global_variables_initializer()) + # Run convert_ops to achieve the gradietns converting + self.evaluate(convert_ops) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 1 step of sgd through optimizer + converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) + opt_op = sgd_op.apply_gradients(converted_grads_and_vars) + self.evaluate(opt_op) + + # Validate updated params + self.assertAllClose([-14., -13.], self.evaluate(var0)) + self.assertAllClose([-6., -5.], self.evaluate(var1)) + + @test_util.run_in_graph_and_eager_modes() + def testComputeGradientsWithTensors(self): + x = ops.convert_to_tensor(1.0) + def f(): + return x * x + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + grads_and_vars = sgd_op.compute_gradients(f, [x]) + self.assertEqual(1, len(grads_and_vars)) + grad, x_as_var = grads_and_vars[0] + self.assertIs(x, x_as_var) + self.assertEqual(2.0, self.evaluate(grad)) + + with self.assertRaises(NotImplementedError): + sgd_op.apply_gradients(grads_and_vars) + + def testTrainOp(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0]) + var1 = variables.Variable([3.0, 4.0]) + cost = 5 * var0 + 3 * var1 + global_step = variables.Variable( + array_ops.zeros([], dtypes.int64), name='global_step') + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + opt_op = sgd_op.minimize(cost, global_step, [var0, var1]) + self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP)) + + def testConstraint(self): + constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.) + constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.) + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], + constraint=constraint_01) + var1 = variables.Variable([3.0, 4.0], + constraint=constraint_0) + cost = 5 * var0 + 3 * var1 + global_step = variables.Variable( + array_ops.zeros([], dtypes.int64), name='global_step') + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + opt_op = sgd_op.minimize(cost, global_step, [var0, var1]) + + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Run 1 step of sgd through optimizer + opt_op.run() + # Validate updated params + self.assertAllClose([-0.1, -0.1], var0.eval()) + self.assertAllClose([0., 0.], var1.eval()) + + def testStopGradients(self): + with self.test_session(): + var0 = variables.Variable([1.0, 2.0], name='var0') + var1 = variables.Variable([3.0, 4.0], name='var1') + var0_id = array_ops.identity(var0) + cost = 5 * var0_id + 3 * var1 + sgd_op = gradient_descent.GradientDescentOptimizer(3.0) + grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1], + stop_gradients=[var0_id]) + grad_dict = {var.op.name: grad for grad, var in grads_and_vars} + self.assertIsNone(grad_dict['var0']) + self.assertIsNotNone(grad_dict['var1']) + + def testDoNotOverrideCreateSlots(self): + class ShouldNotOverrideCreateSlots(optimizer_v2.OptimizerV2): + + def _create_slots(self, var_list): + """In OptimizerV2 _create_slots was renamed _create_vars.""" + return var_list + + with self.assertRaises(RuntimeError): + ShouldNotOverrideCreateSlots(True, 'name') + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/optimizer_v2/rmsprop.py b/tensorflow/contrib/optimizer_v2/rmsprop.py new file mode 100644 index 0000000000..164ff0ea06 --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/rmsprop.py @@ -0,0 +1,233 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""RMSprop optimizer for Tensorflow. + +rmsprop algorithm [tieleman2012rmsprop] + +A detailed description of rmsprop. + +- maintain a moving (discounted) average of the square of gradients +- divide gradient by the root of this average + +mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2 +mom = momentum * mom{t-1} + learning_rate * g_t / sqrt(mean_square + epsilon) +delta = - mom + +This implementation of RMSProp uses plain momentum, not Nesterov momentum. + +The centered version additionally maintains a moving (discounted) average of the +gradients, and uses that average to estimate the variance: + +mean_grad = decay * mean_square{t-1} + (1-decay) * gradient +mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2 +mom = momentum * mom{t-1} + learning_rate * g_t / + sqrt(mean_square - mean_grad**2 + epsilon) +delta = - mom +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.optimizer_v2 import optimizer_v2 +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops + +from tensorflow.python.training import training_ops + + +class RMSPropOptimizer(optimizer_v2.OptimizerV2): + """Optimizer that implements the RMSProp algorithm. + + See the + [paper](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf). + """ + + def __init__(self, + learning_rate, + decay=0.9, + momentum=0.0, + epsilon=1e-10, + use_locking=False, + centered=False, + name="RMSProp"): + """Construct a new RMSProp optimizer. + + Note that in the dense implementation of this algorithm, variables and their + corresponding accumulators (momentum, gradient moving average, square + gradient moving average) will be updated even if the gradient is zero + (i.e. accumulators will decay, momentum will be applied). The sparse + implementation (used when the gradient is an `IndexedSlices` object, + typically because of `tf.gather` or an embedding lookup in the forward pass) + will not update variable slices or their accumulators unless those slices + were used in the forward pass (nor is there an "eventual" correction to + account for these omitted updates). This leads to more efficient updates for + large embedding lookup tables (where most of the slices are not accessed in + a particular graph execution), but differs from the published algorithm. + + Some of the args below are hyperparameters, where a hyperparameter is + defined as a scalar Tensor, a regular Python value or a callable (which + will be evaluated when `apply_gradients` is called) returning a scalar + Tensor or a Python value. + + Args: + learning_rate: A float hyperparameter. The learning rate. + decay: A float hyperparameter. Discounting factor for the history/coming + gradient. + momentum: A float hyperparameter. + epsilon: A float hyperparameter. Small value to avoid zero denominator. + use_locking: If True use locks for update operation. + centered: If True, gradients are normalized by the estimated variance of + the gradient; if False, by the uncentered second moment. Setting this to + True may help with training, but is slightly more expensive in terms of + computation and memory. Defaults to False. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "RMSProp". + """ + super(RMSPropOptimizer, self).__init__(use_locking, name) + self._set_hyper("learning_rate", learning_rate) + self._set_hyper("decay", decay) + self._set_hyper("momentum", momentum) + self._set_hyper("epsilon", epsilon) + + self._centered = centered + + def _create_vars(self, var_list, state): + for v in var_list: + if v.get_shape().is_fully_defined(): + init_rms = init_ops.ones_initializer(dtype=v.dtype.base_dtype) + else: + init_rms = array_ops.ones_like(v) + state.create_slot_with_initializer(v, init_rms, v.get_shape(), + v.dtype.base_dtype, "rms") + if self._centered: + state.zeros_slot(v, "mg") + state.zeros_slot(v, "momentum") + + def _apply_dense(self, grad, var, state): + rms = state.get_slot(var, "rms") + mom = state.get_slot(var, "momentum") + if self._centered: + mg = state.get_slot(var, "mg") + return training_ops.apply_centered_rms_prop( + var, + mg, + rms, + mom, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + else: + return training_ops.apply_rms_prop( + var, + rms, + mom, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var, state): + rms = state.get_slot(var, "rms") + mom = state.get_slot(var, "momentum") + if self._centered: + mg = state.get_slot(var, "mg") + return training_ops.resource_apply_centered_rms_prop( + var.handle, + mg.handle, + rms.handle, + mom.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + else: + return training_ops.resource_apply_rms_prop( + var.handle, + rms.handle, + mom.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var, state): + rms = state.get_slot(var, "rms") + mom = state.get_slot(var, "momentum") + if self._centered: + mg = state.get_slot(var, "mg") + return training_ops.sparse_apply_centered_rms_prop( + var, + mg, + rms, + mom, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad.values, + grad.indices, + use_locking=self._use_locking) + else: + return training_ops.sparse_apply_rms_prop( + var, + rms, + mom, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad.values, + grad.indices, + use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices, state): + rms = state.get_slot(var, "rms") + mom = state.get_slot(var, "momentum") + if self._centered: + mg = self.get_slot(var, "mg") + return training_ops.resource_sparse_apply_centered_rms_prop( + var.handle, + mg.handle, + rms.handle, + mom.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + indices, + use_locking=self._use_locking) + else: + return training_ops.resource_sparse_apply_rms_prop( + var.handle, + rms.handle, + mom.handle, + state.get_hyper("learning_rate", var.dtype.base_dtype), + state.get_hyper("decay", var.dtype.base_dtype), + state.get_hyper("momentum", var.dtype.base_dtype), + state.get_hyper("epsilon", var.dtype.base_dtype), + grad, + indices, + use_locking=self._use_locking) diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py new file mode 100644 index 0000000000..ed68f6afbf --- /dev/null +++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py @@ -0,0 +1,449 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rmsprop optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import itertools +import math + +import numpy as np + +from tensorflow.contrib.optimizer_v2 import rmsprop +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + +_DATA_TYPES = [dtypes.half, dtypes.float32] + +_TEST_PARAM_VALUES = [ + # learning_rate, decay, momentum, epsilon, centered, use_resource + [0.5, 0.9, 0.0, 1e-3, True, False], + [0.5, 0.9, 0.0, 1e-3, False, False], + [0.5, 0.9, 0.0, 1e-3, True, True], + [0.5, 0.9, 0.0, 1e-3, False, True], + [0.1, 0.9, 0.0, 1e-3, True, False], + [0.5, 0.95, 0.0, 1e-3, False, False], + [0.5, 0.95, 0.0, 1e-5, True, False], + [0.5, 0.95, 0.9, 1e-5, True, False], +] + +_TESTPARAMS = [ + [data_type] + values + for data_type, values in itertools.product(_DATA_TYPES, _TEST_PARAM_VALUES) +] + + +class RMSPropOptimizerTest(test.TestCase): + + def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, decay, momentum, + epsilon, centered): + rms_t = rms * decay + (1 - decay) * g * g + denom_t = rms_t + epsilon + if centered: + mg_t = mg * decay + (1 - decay) * g + denom_t -= mg_t * mg_t + else: + mg_t = mg + mom_t = momentum * mom + lr * g / np.sqrt(denom_t, dtype=denom_t.dtype) + var_t = var - mom_t + return var_t, mg_t, rms_t, mom_t + + def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom, + lr, decay, momentum, epsilon, centered): + mg_t = copy.deepcopy(mg) + rms_t = copy.deepcopy(rms) + mom_t = copy.deepcopy(mom) + var_t = copy.deepcopy(var) + for i in range(len(gindexs)): + gindex = gindexs[i] + gvalue = gvalues[i] + rms_t[gindex] = rms[gindex] * decay + (1 - decay) * gvalue * gvalue + denom_t = rms_t[gindex] + epsilon + if centered: + mg_t[gindex] = mg_t[gindex] * decay + (1 - decay) * gvalue + denom_t -= mg_t[gindex] * mg_t[gindex] + mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(denom_t) + var_t[gindex] = var[gindex] - mom_t[gindex] + return var_t, mg_t, rms_t, mom_t + + def testDense(self): + # TODO(yori): Use ParameterizedTest when available + for (dtype, learning_rate, decay, momentum, + epsilon, centered, use_resource) in _TESTPARAMS: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = rmsprop.RMSPropOptimizer( + learning_rate=learning_rate, + decay=decay, + momentum=momentum, + epsilon=epsilon, + centered=centered) + + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + mg0 = opt.get_slot(var0, "mg") + self.assertEqual(mg0 is not None, centered) + mg1 = opt.get_slot(var1, "mg") + self.assertEqual(mg1 is not None, centered) + rms0 = opt.get_slot(var0, "rms") + self.assertTrue(rms0 is not None) + rms1 = opt.get_slot(var1, "rms") + self.assertTrue(rms1 is not None) + mom0 = opt.get_slot(var0, "momentum") + self.assertTrue(mom0 is not None) + mom1 = opt.get_slot(var1, "momentum") + self.assertTrue(mom1 is not None) + + mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + rms0_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype) + rms1_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype) + mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 4 steps of RMSProp + for _ in range(1, 5): + update.run() + + var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy( + var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate, + decay, momentum, epsilon, centered) + var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy( + var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate, + decay, momentum, epsilon, centered) + + # Validate updated params + if centered: + self.assertAllCloseAccordingToType(mg0_np, mg0.eval()) + self.assertAllCloseAccordingToType(mg1_np, mg1.eval()) + self.assertAllCloseAccordingToType(rms0_np, rms0.eval()) + self.assertAllCloseAccordingToType(rms1_np, rms1.eval()) + self.assertAllCloseAccordingToType(mom0_np, mom0.eval()) + self.assertAllCloseAccordingToType(mom1_np, mom1.eval()) + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = rmsprop.RMSPropOptimizer( + learning_rate=1.0, + decay=0.0, + momentum=0.0, + epsilon=0.0, + centered=False).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0., 1.]], var0.eval(), atol=0.01) + + def testMinimizeSparseResourceVariableCentered(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = rmsprop.RMSPropOptimizer( + learning_rate=1.0, + decay=0.0, + momentum=0.0, + epsilon=1.0, + centered=True).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], var0.eval(), atol=0.01) + + def testSparse(self): + # TODO(yori): Use ParameterizedTest when available + for (dtype, learning_rate, decay, + momentum, epsilon, centered, _) in _TESTPARAMS: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0_np_indices = np.array([0], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([1])) + grads1_np_indices = np.array([1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([1])) + opt = rmsprop.RMSPropOptimizer( + learning_rate=learning_rate, + decay=decay, + momentum=momentum, + epsilon=epsilon, + centered=centered) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + mg0 = opt.get_slot(var0, "mg") + self.assertEqual(mg0 is not None, centered) + mg1 = opt.get_slot(var1, "mg") + self.assertEqual(mg1 is not None, centered) + rms0 = opt.get_slot(var0, "rms") + self.assertTrue(rms0 is not None) + rms1 = opt.get_slot(var1, "rms") + self.assertTrue(rms1 is not None) + mom0 = opt.get_slot(var0, "momentum") + self.assertTrue(mom0 is not None) + mom1 = opt.get_slot(var1, "momentum") + self.assertTrue(mom1 is not None) + + mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + rms0_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype) + rms1_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype) + mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 4 steps of RMSProp + for _ in range(1, 5): + update.run() + + var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy( + var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np, + learning_rate, decay, momentum, epsilon, centered) + var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy( + var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np, + learning_rate, decay, momentum, epsilon, centered) + + # Validate updated params + if centered: + self.assertAllCloseAccordingToType(mg0_np, mg0.eval()) + self.assertAllCloseAccordingToType(mg1_np, mg1.eval()) + self.assertAllCloseAccordingToType(rms0_np, rms0.eval()) + self.assertAllCloseAccordingToType(rms1_np, rms1.eval()) + self.assertAllCloseAccordingToType(mom0_np, mom0.eval()) + self.assertAllCloseAccordingToType(mom1_np, mom1.eval()) + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testWithoutMomentum(self): + for dtype in [dtypes.half, dtypes.float32]: + with self.test_session(use_gpu=True): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + opt = rmsprop.RMSPropOptimizer( + learning_rate=2.0, decay=0.9, momentum=0.0, epsilon=1.0) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + rms0 = opt.get_slot(var0, "rms") + self.assertTrue(rms0 is not None) + rms1 = opt.get_slot(var1, "rms") + self.assertTrue(rms1 is not None) + mom0 = opt.get_slot(var0, "momentum") + self.assertTrue(mom0 is not None) + mom1 = opt.get_slot(var1, "momentum") + self.assertTrue(mom1 is not None) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Step 1: the rms accumulators where 1. So we should see a normal + # update: v -= grad * learning_rate + update.run() + # Check the root mean square accumulators. + self.assertAllCloseAccordingToType( + np.array([0.901, 0.901]), rms0.eval()) + self.assertAllCloseAccordingToType( + np.array([0.90001, 0.90001]), rms1.eval()) + # Check the parameters. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)), + 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) + ]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)), + 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) + ]), var1.eval()) + # Step 2: the root mean square accumulators contain the previous update. + update.run() + # Check the rms accumulators. + self.assertAllCloseAccordingToType( + np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval()) + self.assertAllCloseAccordingToType( + np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval()) + # Check the parameters. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) - + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1.0)), + 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) - + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1.0)) + ]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) - + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5 + 1.0)), + 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) - + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5 + 1.0)) + ]), var1.eval()) + + def testWithMomentum(self): + for dtype in [dtypes.half, dtypes.float32]: + with self.test_session(use_gpu=True): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + + opt = rmsprop.RMSPropOptimizer( + learning_rate=2.0, decay=0.9, momentum=0.5, epsilon=1e-5) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + rms0 = opt.get_slot(var0, "rms") + self.assertTrue(rms0 is not None) + rms1 = opt.get_slot(var1, "rms") + self.assertTrue(rms1 is not None) + mom0 = opt.get_slot(var0, "momentum") + self.assertTrue(mom0 is not None) + mom1 = opt.get_slot(var1, "momentum") + self.assertTrue(mom1 is not None) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + # Step 1: rms = 1, mom = 0. So we should see a normal + # update: v -= grad * learning_rate + update.run() + # Check the root mean square accumulators. + self.assertAllCloseAccordingToType( + np.array([0.901, 0.901]), rms0.eval()) + self.assertAllCloseAccordingToType( + np.array([0.90001, 0.90001]), rms1.eval()) + # Check the momentum accumulators + self.assertAllCloseAccordingToType( + np.array([(0.1 * 2.0 / math.sqrt(0.901 + 1e-5)), + (0.1 * 2.0 / math.sqrt(0.901 + 1e-5))]), mom0.eval()) + self.assertAllCloseAccordingToType( + np.array([(0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)), + (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5))]), mom1.eval()) + + # Check that the parameters. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)), + 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) + ]), var0.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)), + 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) + ]), var1.eval()) + + # Step 2: the root mean square accumulators contain the previous update. + update.run() + # Check the rms accumulators. + self.assertAllCloseAccordingToType( + np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval()) + self.assertAllCloseAccordingToType( + np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) + + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5)), + 0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) + + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5)) + ]), mom0.eval()) + self.assertAllCloseAccordingToType( + np.array([ + 0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) + + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5)), + 0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) + + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5)) + ]), mom1.eval()) + + # Check the parameters. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) - + (0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) + + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5))), + 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) - + (0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) + + (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5))) + ]), var0.eval()) + + self.assertAllCloseAccordingToType( + np.array([ + 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) - + (0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) + + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5))), + 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) - + (0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) + + (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5))) + ]), var1.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index d5106752dd..899fda67fe 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -99,6 +99,7 @@ _update_device = threading.local() def get_update_device(): + """Get the current device if in a `DistributionStrategy.update()` call.""" try: return _update_device.current except AttributeError: @@ -406,19 +407,19 @@ class DistributionStrategy(object): different across devices, and "Mirrored" when the value are the same. * Unwrapping and merging: Consider calling a function `fn` on multiple devices, like `call_for_each_tower(fn, w)` with an - argument `w that is a wrapped value. This means `w` will have a + argument `w` that is a wrapped value. This means `w` will have a map taking tower device `d0` to `w0`, tower device `d1` to `w1`, etc. `call_for_each_tower()` unwraps `w` before calling `fn`, so it calls `fn(w0)` on `d0`, `fn(w1)` on `d1`, etc. It then merges the return values from `fn()`, which can possibly result in wrapped values. For example, let's say `fn()` returns a tuple with - three components: (x, a, v0) from tower 0, (x, b, v1) on tower 1, + three components: `(x, a, v0)` from tower 0, `(x, b, v1)` on tower 1, etc. If the first component is the same object `x` from every tower, then the first component of the merged result will also be `x`. If the second component is different (`a`, `b`, ...) from each tower, then the merged value will have a wrapped map from tower device to the different values. If the third component is - the members of a mirrored variable (`v` maps `d0` to `v0, `d1` to + the members of a mirrored variable (`v` maps `d0` to `v0`, `d1` to `v1`, etc.), then the merged result will be that mirrored variable (`v`). * Tower context vs. Cross-tower context: _tower context_ is when we diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index d22a465376..34dd419f15 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -211,7 +211,6 @@ def _get_default_do_not_descend_map(): 'tf': ['cli', 'lib', 'wrappers'], 'tf.contrib': [ 'compiler', - 'distribute', 'grid_rnn', # Block contrib.keras to de-clutter the docs 'keras', -- GitLab From 79a5ae8ccf1af9e46e10a1e9f8347b33343b06e8 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Thu, 29 Mar 2018 15:32:14 -0700 Subject: [PATCH 710/960] Internal Change PiperOrigin-RevId: 190996815 --- tensorflow/contrib/distribute/README.md | 143 ++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 tensorflow/contrib/distribute/README.md diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md new file mode 100644 index 0000000000..ba9a392c77 --- /dev/null +++ b/tensorflow/contrib/distribute/README.md @@ -0,0 +1,143 @@ +# Distribution Strategy + +> *NOTE*: This is a experimental feature. The API and performance +> characteristics are subject to change. + +## Overview + +[`DistributionStrategy`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/DistributionStrategy) +API is an easy way to distribute your training +across multiple devices/machines. Our goal is to allow users to use existing +models and training code with minimal changes to enable distributed training. +Moreover, we've design the API in such a way that it works with both eager and +graph execution. + +Currently we support one type of strategy, called +[`MirroredStrategy`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/MirroredStrategy). +It does in-graph replication with synchronous training +on many GPUs on one machine. Essentially, we create copies of all variables in +the model's layers on each device. We then use all-reduce to combine gradients +across the devices before applying them to the variables to keep them in sync. +In the future, we intend to support other kinds of training configurations such +as multi-node, synchronous, +[asynchronous](https://www.tensorflow.org/deploy/distributed#putting_it_all_together_example_trainer_program), +parameter servers and model parallelism. + +## Example + +Let's demonstrate how to use this API with a simple example. We will use the +[`Estimator`](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator) +approach, and show you how to scale your model to run on multiple GPUs on one +machine using `MirroredStrategy`. + +Let's consider a very simple model function which tries to learn a simple +function. + +```python +def model_fn(features, labels, mode): + layer = tf.layers.Dense(1) + logits = layer(features) + + if mode == tf.estimator.ModeKeys.PREDICT: + predictions = {"logits": logits} + return tf.estimator.EstimatorSpec(mode, predictions=predictions) + + loss = tf.losses.mean_squared_error( + labels=labels, predictions=tf.reshape(logits, [])) + + if mode == tf.estimator.ModeKeys.EVAL: + return tf.estimator.EstimatorSpec(mode, loss=loss) + + if mode == tf.estimator.ModeKeys.TRAIN: + train_op = tf.train.GradientDescentOptimizer(0.2).minimize(loss_fn()) + return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) +``` + +Let's also define a simple input function to feed data for training this model. +Note that we require using +[`tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) +with `DistributionStrategy`. + + +```python +def input_fn(): + features = tf.data.Dataset.from_tensors([[1.]]).repeat(100) + labels = tf.data.Dataset.from_tensors(1.).repeat(100) + return dataset_ops.Dataset.zip((features, labels)) +``` + +Now that we have a model function and input function defined, we can define the +estimator. To use `MirroredStrategy`, all we need to do is: + +* Create an instance of the `MirroredStrategy` class. +* Pass it to the +[`RunConfig`](https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig) +parameter of `Estimator`. + + +```python +distribution = tf.contrib.distribute.MirroredStrategy() +config = tf.estimator.RunConfig(distribute=distribution) +classifier = tf.estimator.Estimator(model_fn=model_fn, config=config) +classifier.train(input_fn=input_fn) +``` + +That's it! This change will now configure estimator to run on all GPUs on your +machine, with the `MirroredStrategy` approach. It will take care of distributing +the input dataset, replicating layers and variables on each device, and +combining and applying gradients. + +The model and input functions do not have to change because we have changed the +underlying components of TensorFlow (such as +optimizer, batch norm and summaries) to become distribution-aware. +That means those components learn how to +combine their state across devices. Further, saving and checkpointing works +seamlessly, so you can save with one or no distribution strategy and resume with +another. + +Above, we showed the easiest way to use [`MirroredStrategy`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/MirroredStrategy#__init__). +There are few things you can customize in practice: + +* You can specify a list of specific GPUs (using param `devices`) or the number +of GPUs (using param `num_gpus`), in case you don't want auto detection. +* You can specify various parameters for all reduce with the `cross_tower_ops` +param, such as the all reduce algorithm to use, and gradient repacking. + +## Performance Tips + +We've tried to make it such that you get the best performance for your existing +model. We also recommend you follow the tips from +[Input Pipeline Performance Guide](https://www.tensorflow.org/performance/datasets_performance). +Specifically, we found using [`map_and_batch`](https://www.tensorflow.org/performance/datasets_performance#map_and_batch) +and [`dataset.prefetch`](https://www.tensorflow.org/performance/datasets_performance#pipelining) +in the input function gives a solid boost in performance. When using +`dataset.prefetch`, use `buffer_size=None` to let it detect optimal buffer size. + +## Caveats +This feature is in early stages and there are a lot of improvements forthcoming: + +* Metrics are not yet supported during distributed training. +* Summaries are currently computed in every tower. +* Evaluation is not yet distributed. +* Eager support is in the works; performance can be more challenging with eager +execution. +* As mentioned earlier, multi-node and other distributed strategies will be +introduced in the future. +* If you are [`batching`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch) +your input data, we will place one batch on each GPU in each step. So your +effective batch size will be `num_gpus * batch_size`. Therefore, consider +adjusting your learning rate or batch size according to the number of GPUs. +We are working on addressing this limitation by splitting each batch across GPUs +instead. +* Dictionaries inside dataset in the input are not supported when prefetching +on GPUs is turned on. (If you need to use dictionaries in the dataset, turn off +prefetching on GPUs by passing param `prefetch_on_device=False` to +`MirroredStrategy`) + +## What's next? + +Please give distribution strategies a try. This feature is in early stages and +is evolving, so we welcome your feedback via +[issues on GitHub](https://github.com/tensorflow/tensorflow/issues/new). + + -- GitLab From 497dab37519a1856a52e6564d8eb1d03382911c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 15:32:42 -0700 Subject: [PATCH 711/960] capture_tpu_profile will fallback to old behavior if user specify local directories as model directory. PiperOrigin-RevId: 190996878 --- tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index e6811d4ad2..f2003e04dd 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -70,8 +70,12 @@ ProfileResponse Profile(const string& service_addr, int duration_ms, ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); - request.set_repository_root(repository_root); - request.set_session_id(session_id); + if (tensorflow::str_util::StartsWith(repository_root, "gs://")) { + // For backward compatibilities, only generate tracetable etc when the + // user provide a GCS path for model directory. + request.set_repository_root(repository_root); + request.set_session_id(session_id); + } request.add_tools("input_pipeline"); request.add_tools("overview_page"); *request.mutable_opts() = opts; -- GitLab From af670bdc0e61802778f61778dd1623c87f30e874 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 29 Mar 2018 15:36:14 -0700 Subject: [PATCH 712/960] Undisables broken list_ops_test PiperOrigin-RevId: 190997355 --- tensorflow/core/kernels/list_kernels.h | 16 +++++++--------- tensorflow/python/kernel_tests/BUILD | 4 ---- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h index 8af48f0a67..f3bbf3b6e3 100644 --- a/tensorflow/core/kernels/list_kernels.h +++ b/tensorflow/core/kernels/list_kernels.h @@ -160,15 +160,13 @@ class TensorListFromTensor : public OpKernel { tmp_shape.RemoveDim(0); OP_REQUIRES(c, tmp.CopyFrom(tmp, tmp_shape), errors::Unknown("Unexpected shape error.")); - if (tmp.IsAligned() || !DataTypeCanUseMemcpy(DataTypeToEnum::value)) { - output_list.tensors.push_back(tmp); - } else { - Tensor aligned; - OP_REQUIRES_OK(c, c->allocate_temp(tmp.dtype(), tmp.shape(), &aligned)); - aligned.flat().device(c->eigen_device()) = - tmp.unaligned_flat(); - output_list.tensors.push_back(aligned); - } + // TODO(apassos) maybe not always align; but weird compiler bugs seem to + // prevent this. + Tensor aligned; + OP_REQUIRES_OK(c, c->allocate_temp(tmp.dtype(), tmp.shape(), &aligned)); + aligned.flat().device(c->eigen_device()) = + tmp.unaligned_flat(); + output_list.tensors.push_back(aligned); } output_tensor->scalar()() = std::move(output_list); } diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5eceb9f768..ea210346c1 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -96,10 +96,6 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], grpc_enabled = True, - tags = [ - "no_gpu", - "nogpu", - ], ) cuda_py_test( -- GitLab From 2bc52cd2d481a89c9724d20e827097efa4ff3f1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 15:40:14 -0700 Subject: [PATCH 713/960] - Expose slim arg_scope function to compute keys to enable tessting. - Add is_training=None option to mobinenet arg_scopes. This allows the users to set is_training from an outer scope. PiperOrigin-RevId: 190997959 --- .../contrib/framework/python/ops/arg_scope.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/arg_scope.py b/tensorflow/contrib/framework/python/ops/arg_scope.py index 3cad1fee19..5b15033995 100644 --- a/tensorflow/contrib/framework/python/ops/arg_scope.py +++ b/tensorflow/contrib/framework/python/ops/arg_scope.py @@ -68,7 +68,7 @@ from tensorflow.python.util import tf_decorator __all__ = [ 'arg_scope', 'add_arg_scope', 'current_arg_scope', 'has_arg_scope', - 'arg_scoped_arguments' + 'arg_scoped_arguments', 'arg_scope_func_key' ] _ARGSTACK = [{}] @@ -89,7 +89,7 @@ def current_arg_scope(): return stack[-1] -def _key_op(op): +def arg_scope_func_key(op): return getattr(op, '_key_op', str(op)) @@ -103,9 +103,9 @@ def _kwarg_names(func): def _add_op(op): - key_op = _key_op(op) - if key_op not in _DECORATED_OPS: - _DECORATED_OPS[key_op] = _kwarg_names(op) + key = arg_scope_func_key(op) + if key not in _DECORATED_OPS: + _DECORATED_OPS[key] = _kwarg_names(op) @tf_contextlib.contextmanager @@ -147,16 +147,16 @@ def arg_scope(list_ops_or_scope, **kwargs): try: current_scope = current_arg_scope().copy() for op in list_ops_or_scope: - key_op = _key_op(op) + key = arg_scope_func_key(op) if not has_arg_scope(op): raise ValueError('%s is not decorated with @add_arg_scope', _name_op(op)) - if key_op in current_scope: - current_kwargs = current_scope[key_op].copy() + if key in current_scope: + current_kwargs = current_scope[key].copy() current_kwargs.update(kwargs) - current_scope[key_op] = current_kwargs + current_scope[key] = current_kwargs else: - current_scope[key_op] = kwargs.copy() + current_scope[key] = kwargs.copy() _get_arg_stack().append(current_scope) yield current_scope finally: @@ -176,14 +176,14 @@ def add_arg_scope(func): def func_with_args(*args, **kwargs): current_scope = current_arg_scope() current_args = kwargs - key_func = _key_op(func) + key_func = arg_scope_func_key(func) if key_func in current_scope: current_args = current_scope[key_func].copy() current_args.update(kwargs) return func(*args, **current_args) _add_op(func) - setattr(func_with_args, '_key_op', _key_op(func)) + setattr(func_with_args, '_key_op', arg_scope_func_key(func)) return tf_decorator.make_decorator(func, func_with_args) @@ -196,7 +196,7 @@ def has_arg_scope(func): Returns: a boolean. """ - return _key_op(func) in _DECORATED_OPS + return arg_scope_func_key(func) in _DECORATED_OPS def arg_scoped_arguments(func): @@ -209,4 +209,4 @@ def arg_scoped_arguments(func): a list of kwargs names. """ assert has_arg_scope(func) - return _DECORATED_OPS[_key_op(func)] + return _DECORATED_OPS[arg_scope_func_key(func)] -- GitLab From e302cd64afacb5cc9057f03b5fbbee6315a33573 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Thu, 29 Mar 2018 16:01:20 -0700 Subject: [PATCH 714/960] Updated eager guide to use tensorflow 1.7. Code snippets still work. PiperOrigin-RevId: 191001008 --- .../docs_src/programmers_guide/eager.md | 203 +----------------- 1 file changed, 5 insertions(+), 198 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/eager.md b/tensorflow/docs_src/programmers_guide/eager.md index 9ae1e602f4..8db65737dc 100644 --- a/tensorflow/docs_src/programmers_guide/eager.md +++ b/tensorflow/docs_src/programmers_guide/eager.md @@ -29,10 +29,10 @@ problem and share your benchmarks. ## Setup and basic usage -Install TensorFlow 1.7 to include the updates for eager execution: +Upgrade to TensorFlow 1.7 to include updates for eager execution: ``` -$ pip install --pre --upgrade tensorflow +$ pip install --upgrade tensorflow ``` To start eager execution, add `tf.enable_eager_execution()` to the beginning of @@ -322,14 +322,13 @@ grad_log1pexp(0.) # => [0.5] grad_log1pexp(100.) # => [nan] ``` - Here, the `log1pexp` function can be analytically simplified with a custom gradient. The implementation below reuses the value for `tf.exp(x)` that is computed during the forward pass—making it more efficient by eliminating redundant calculations: ```py -@tfe.custom_gradient +@tf.custom_gradient def log1pexp(x): e = tf.exp(x) def grad(dy): @@ -605,7 +604,7 @@ print(x) # => 2.0 ``` To save and load models, `tfe.Checkpoint` stores the internal state of objects, -without requiring hiiden variables. To record the state of a `model`, +without requiring hidden variables. To record the state of a `model`, an `optimizer`, and a global step, pass them to a `tfe.Checkpoint`: ```py @@ -649,9 +648,8 @@ inserted during model construction. For example, to record summaries once every 100 global steps: ```py -tf.train.get_or_create_global_step() # return global step var writer = tf.contrib.summary.create_file_writer(logdir) -global_step=tf.train.get_or_create_global_step() +global_step=tf.train.get_or_create_global_step() # return global step var writer.set_as_default() @@ -733,197 +731,6 @@ But this gap grows larger for models with less computation and there is work to be done for optimizing hot code paths for models with lots of small operations. -## Work with graphs - -While eager execution makes development and debugging more interactive, -TensorFlow graph execution has advantages for distributed training, performance -optimizations, and production deployment. However, writing graph code can feel -different than writing regular Python code and more difficult to debug. - -For building and training graph-constructed models, the Python program first -builds a graph representing the computation, then invokes `Session.run` to send -the graph for execution on the C++-based runtime. This provides: - -* Automatic differentiation using static autodiff. -* Simple deployment to a platform independent server. -* Graph-based optimizations (common subexpression elimination, constant-folding, etc.). -* Compilation and kernel fusion. -* Automatic distribution and replication (placing nodes on the distributed system). - -Deploying code written for eager execution is more difficult: either generate a -graph from the model, or run the Python runtime and code directly on the server. - -### Write compatible code - -The same code written for eager execution will also build a graph during graph -execution. Do this by simply running the same code in a new Python session where -eager execution is not enabled. - -Most TensorFlow operations work during eager execution, but there are some things -to keep in mind: - -* Use `tf.data` for input processing instead of queues. It's faster and easier. -* Use object-oriented layer APIs—like `tf.keras.layers` and - `tf.keras.Model`—since they have explicit storage for variables. -* Most model code works the same during eager and graph execution, but there are - exceptions. (For example, dynamic models using Python control flow to change the - computation based on inputs.) -* Once eager execution is enabled with `tf.enable_eager_execution`, it - cannot be turned off. Start a new Python session to return to graph execution. - -It's best to write code for both eager execution *and* graph execution. This -gives you eager's interactive experimentation and debuggability with the -distributed performance benefits of graph execution. - -Write, debug, and iterate in eager execution, then import the model graph for -production deployment. Use `tfe.Checkpoint` to save and restore model -variables, this allows movement between eager and graph execution environments. -See the examples in: -[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples). - -### Use eager execution in a graph environment - -Selectively enable eager execution in a TensorFlow graph environment using -`tfe.py_func`. This is used when `tf.enable_eager_execution()` has *not* -been called. - -```py -def my_py_func(x): - x = tf.matmul(x, x) # You can use tf ops - print(x) # but it's eager! - return x - -with tf.Session() as sess: - x = tf.placeholder(dtype=tf.float32) - # Call eager function in graph! - pf = tfe.py_func(my_py_func, [x], tf.float32) - sess.run(pf, feed_dict={x: [[2.0]]}) # [[4.0]] -``` - - -A `tfe.Checkpoint` stores the complete internal state of the objects passed to it. Nothing else is implicitly included. To record the state of a `model`, an `optimizer`, and a global step pass each one to the checkpoint's constructor: - -```py -model = MyModel() -optimizer = tf.train.AdamOptimizer(learning_rate=0.001) -checkpoint_dir = ‘/path/to/model_dir’ -checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") -root = tfe.Checkpoint(optimizer=optimizer, - model=model, - optimizer_step=tf.train.get_or_create_global_step()) - -root.save(file_prefix=checkpoint_prefix) -# or -root.restore(tf.train.latest_checkpoint(checkpoint_dir)) -``` - -### Object-oriented metrics - -`tfe.metrics` are stored as objects. Update a metric by passing the new data to -the callable, and retrieve the result using the `tfe.metrics.result` method, -for example: - -```py -m = tfe.metrics.Mean("loss") -m(0) -m(5) -m.result() # => 2.5 -m([8, 9]) -m.result() # => 5.5 -``` - -#### Summaries and TensorBoard - -@{$summaries_and_tensorboard$TensorBoard} is a visualization tool for -understanding, debugging and optimizing the model training process. It uses -summary events that are written while executing the program. - -`tf.contrib.summary` is compatible with both eager and graph execution -environments. Summary operations, such as `tf.contrib.summary.scalar`, are -inserted during model construction. For example, to record summaries once every -100 global steps: - -```py -tf.train.get_or_create_global_step() # return global step var -writer = tf.contrib.summary.create_file_writer(logdir) - -for _ in range(iterations): - with writer.as_default(): - with tf.contrib.summary.record_summaries_every_n_global_steps(100): - # your model code goes here - tf.contrib.summary.scalar('loss', loss) - ... -``` - -## Performance - -Computation is not automatically offloaded to GPUs during eager execution. To -explicitly direct a computation to a GPU, enclose it in a -`tf.device('/gpu:0')` block: - -```py -import time - -def measure(x, steps): - # TensorFlow initializes a GPU the first time it's used, exclude from timing. - tf.matmul(x, x) - start = time.time() - for i in range(steps): - x = tf.matmul(x, x) - _ = x.numpy() # Make sure to execute op and not just enqueue it - end = time.time() - return end - start - -shape = (1000, 1000) -steps = 200 -print("Time to multiply a {} matrix by itself {} times:".format(shape, steps)) - -# Run on CPU: -with tf.device("/cpu:0"): - print("CPU: {} secs".format(measure(tf.random_normal(shape), steps))) - -# Run on GPU, if available: -if tfe.num_gpus() > 0: - with tf.device("/gpu:0"): - print("GPU: {} secs".format(measure(tf.random_normal(shape), steps))) -else: - print("GPU: not found") -``` - -Output (exact numbers depend on hardware): - -``` -Time to multiply a (1000, 1000) matrix by itself 200 times: -CPU: 4.614904403686523 secs -GPU: 0.5581181049346924 secs -``` - -A `tf.Tensor` object can be copied to a different device to execute its -operations: - -```py -x = tf.random_normal([10, 10]) - -x_gpu0 = x.gpu() -x_cpu = x.cpu() - -_ = tf.matmul(x_cpu, x_cpu) # Runs on CPU -_ = tf.matmul(x_gpu0, x_gpu0) # Runs on GPU:0 - -if tfe.num_gpus() > 1: - x_gpu1 = x.gpu(1) - _ = tf.matmul(x_gpu1, x_gpu1) # Runs on GPU:1 -``` - -### Benchmarks - -For compute-heavy models, such as -[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50) -training on a GPU, eager execution performance is comparable to graph execution. -But this gap grows larger for models with less computation and there is work to -be done for optimizing hot code paths for models with lots of small operations. - - ## Work with graphs While eager execution makes development and debugging more interactive, -- GitLab From 0f01f076f86882104c4c358b2679cce1ad85057c Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Thu, 29 Mar 2018 16:02:26 -0700 Subject: [PATCH 715/960] Add support for running benchmarks in XLA unit tests. In the XLA internal test 'main', parse the --benchmarks flag if it exists and runs the specified benchmarks. Previously microbenchmarks defined in unit tests were never run. PiperOrigin-RevId: 191001183 --- .../xla/tests/xla_internal_test_main.cc | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tensorflow/compiler/xla/tests/xla_internal_test_main.cc b/tensorflow/compiler/xla/tests/xla_internal_test_main.cc index 92b2b1ee77..0af40bc15a 100644 --- a/tensorflow/compiler/xla/tests/xla_internal_test_main.cc +++ b/tensorflow/compiler/xla/tests/xla_internal_test_main.cc @@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h" +#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" GTEST_API_ int main(int argc, char** argv) { std::vector flag_list; @@ -25,7 +28,37 @@ GTEST_API_ int main(int argc, char** argv) { return 2; } + // If the --benchmarks flag is passed in then only run the benchmarks, not the + // tests. + for (int i = 1; i < argc; i++) { + tensorflow::StringPiece arg(argv[i]); + if (arg == "--benchmarks" || arg.starts_with("--benchmarks=")) { + const char* pattern = nullptr; + if (arg.starts_with("--benchmarks=")) { + pattern = argv[i] + strlen("--benchmarks="); + } else { + // Handle flag of the form '--benchmarks foo' (no '='). + if (i + 1 >= argc || + tensorflow::StringPiece(argv[i + 1]).starts_with("--")) { + LOG(ERROR) << "--benchmarks flag requires an argument."; + return 2; + } + pattern = argv[i + 1]; + } + // Unfortunately Google's internal benchmark infrastructure has a + // different API than Tensorflow's. +#if defined(PLATFORM_GOOGLE) + base::SetFlag(&FLAGS_benchmarks, pattern); + RunSpecifiedBenchmarks(); +#else + tensorflow::testing::Benchmark::Run(pattern); +#endif + return 0; + } + } + testing::InitGoogleTest(&argc, argv); + if (argc > 1) { LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage; return 2; -- GitLab From 72205dadc2a973b746b3fdb6708429fd882a5d23 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Thu, 29 Mar 2018 16:37:05 -0700 Subject: [PATCH 716/960] Minor language change in readme. PiperOrigin-RevId: 191006151 --- tensorflow/contrib/distribute/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md index ba9a392c77..4af51bec1a 100644 --- a/tensorflow/contrib/distribute/README.md +++ b/tensorflow/contrib/distribute/README.md @@ -89,8 +89,8 @@ combining and applying gradients. The model and input functions do not have to change because we have changed the underlying components of TensorFlow (such as -optimizer, batch norm and summaries) to become distribution-aware. -That means those components learn how to +optimizer, batch norm and summaries) to become distribution-aware. +That means those components know how to combine their state across devices. Further, saving and checkpointing works seamlessly, so you can save with one or no distribution strategy and resume with another. @@ -133,6 +133,7 @@ instead. on GPUs is turned on. (If you need to use dictionaries in the dataset, turn off prefetching on GPUs by passing param `prefetch_on_device=False` to `MirroredStrategy`) +* PartitionedVariables are not supported yet. ## What's next? -- GitLab From 1ba89338bdb4afb85ae56e64b47acc93a3a28703 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 29 Mar 2018 16:50:34 -0700 Subject: [PATCH 717/960] Fixing a subtle bug where in some cases the post cancellation work wasn't being done correctly. This is the scenario in which FunctionBufferingResource::Cancel() got called while buffering was being done, but then the buffer filled up in which case FillBuffer() wasn't ever called and the Cancel() method would get stuck waiting on a notification from the condition variable leading to timeouts. This CL fixes this by making sure FillBuffer() got called one last time in this case. Tested by running contrib/data/python/kernel_tests:prefetching_ops_test 500 times and ran contrib/distribute/python:values_test 500 times with no timeouts. PiperOrigin-RevId: 191007895 --- tensorflow/contrib/data/kernels/prefetching_kernels.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index 2afb8dbbf4..207f2820bf 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -224,6 +224,13 @@ class FunctionBufferingResource : public ResourceBase { if (buffer_.size() < buffer_size_ && !end_of_sequence_) { restart_buffering = true; } else { + // When the buffer is full, we don't want to call + // FillBuffer() unless we're in cancellation phase in which + // case FillBuffer() will do the final cleanup post + // cancellation. + if (cancelled_) { + restart_buffering = true; + } is_buffering_ = false; } } -- GitLab From 6628001dcc10c429aec33da186ff281d26729ac3 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 29 Mar 2018 16:56:42 -0700 Subject: [PATCH 718/960] Updating documentation. PiperOrigin-RevId: 191008662 --- tensorflow/contrib/lite/toco/README.md | 33 +- .../lite/toco/g3doc/cmdline_examples.md | 404 ++++++++---------- .../lite/toco/g3doc/cmdline_reference.md | 119 ++---- .../contrib/lite/toco/g3doc/python_api.md | 7 + .../lite/toco/g3doc/toco_landscape.svg | 1 + 5 files changed, 258 insertions(+), 306 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md index 281b2ea5e4..522e260ad2 100644 --- a/tensorflow/contrib/lite/toco/README.md +++ b/tensorflow/contrib/lite/toco/README.md @@ -1,26 +1,27 @@ -# The TensorFlow Lite Optimizing Converter +# TOCO: TensorFlow Lite Optimizing Converter -The TensorFlow Lite Optimizing Converter's most typical use is converting from the TensorFlow GraphDef to the TensorFlow Lite -format, but it supports much more than that. +The TensorFlow Lite Optimizing Converter converts TensorFlow graphs into +TensorFlow Lite graphs. There are additional usages that are also detailed in +the usage documentation. ## Usage documentation Usage information is given in these documents: +* [Command-line glossary](g3doc/cmdline_reference.md) * [Command-line examples](g3doc/cmdline_examples.md) -* [Command-line reference](g3doc/cmdline_reference.md) -* [Python API](g3doc/python_api.md) - -## Design documentation - -Coming soon! +* [Python API examples](g3doc/python_api.md) ## Where the converter fits in the TensorFlow landscape -In the typical case, an application developer is using TensorFlow to design and -train models, then uses TensorFlow's freeze_graph.py to generate a frozen -inference graph, then uses the converter to convert that into a TensorFlow Lite flatbuffer file, -then ships that file to client devices where the TensorFlow Lite interpreter handles them -on-device. This is represented in the following diagram: - -![drawing](https://storage.googleapis.com/download.tensorflow.org/example_images/tensorflow_landscape.svg) +Once an application developer has a trained TensorFlow model, TOCO will accept +that model and generate a TensorFlow Lite +[FlatBuffer](https://google.github.io/flatbuffers/) file. TOCO currently supports +[SavedModels](https://www.tensorflow.org/programmers_guide/saved_model#using_savedmodel_with_estimators) +and frozen graphs (models generated via +[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)). +The TensorFlow Lite FlatBuffer file can be shipped to client devices, generally +mobile devices, where the TensorFlow Lite interpreter handles them on-device. +This flow is represented in the diagram below. + +![drawing](g3doc/toco_landscape.svg) diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md index 372c525589..495014c6fc 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -1,73 +1,72 @@ # TensorFlow Lite Optimizing Converter command-line examples -This page is a guide to using the TensorFlow Lite Optimizing Converter by -looking at some example command lines. It is complemented by the following other -documents: +This page provides examples on how to use TOCO via command line. It is +complemented by the following documents: * [README](../README.md) -* [Command-line reference](cmdline_reference.md) +* [Command-line glossary](cmdline_reference.md) +* [Python API examples](python_api.md) Table of contents: -[TOC] - -## Convert a TensorFlow GraphDef to TensorFlow Lite for float inference - -In this example, we look at the most common task: we have an ordinary TensorFlow -GraphDef and want to convert it to a TensorFlow Lite flatbuffer to perform -floating-point inference. +* [Convert a TensorFlow SavedModel to TensorFlow Lite](#savedmodel) +* [Convert a TensorFlow GraphDef to TensorFlow Lite for float + inference](#graphdef-float) +* [Quantization](#quantization) + * [Convert a TensorFlow GraphDef to TensorFlow Lite for quantized + inference](#graphdef-quant) + * [Use "dummy-quantization" to try out quantized inference on a float + graph](#dummy-quant) +* [Specifying input and output arrays](#specifying-input-and-output-arrays) + * [Multiple output arrays](#multiple-output-arrays) + * [Multiple input arrays](#multiple-input-arrays) + * [Specifying subgraphs](#specifying-subgraphs) +* [Other conversions supported by TOCO](#other-conversions) + * [Optimize a TensorFlow GraphDef](#optimize-graphdef) + * [Convert a TensorFlow Lite FlatBuffer back into TensorFlow GraphDef + format](#to-graphdef) +* [Logging](#logging) + * [Standard logging](#standard-logging) + * [Verbose logging](#verbose-logging) + * [Graph "video" logging](#graph-video-logging) +* [Graph visualizations](#graph-visualizations) + * [Using --output_format=GRAPHVIZ_DOT](#using-output-formatgraphviz-dot) + * [Using --dump_graphviz](#using-dump-graphviz) + * [Legend for the graph visualizations](#graphviz-legend) + +## Convert a TensorFlow SavedModel to TensorFlow Lite
+ +The follow example converts a basic TensorFlow SavedModel into a Tensorflow Lite +FlatBuffer to perform floating-point inference. ``` -curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ - | tar xzv -C /tmp bazel run --config=opt \ - //tensorflow/contrib/lite/toco:toco -- \ - --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ - --output_file=/tmp/foo.tflite \ - --input_format=TENSORFLOW_GRAPHDEF \ - --output_format=TFLITE \ - --inference_type=FLOAT \ - --input_shape=1,128,128,3 \ - --input_array=input \ - --output_array=MobilenetV1/Predictions/Reshape_1 + third_party/tensorflow/contrib/lite/toco:toco -- \ + --savedmodel_directory=/tmp/saved_model \ + --output_file=/tmp/foo.tflite ``` -To explain each of these flags: - -* `--input_format` and `--output_format` determine the formats of the input - and output files: here we are converting from `TENSORFLOW_GRAPHDEF` to - `TFLITE`. -* `--input_file` specifies the path of the input file, to be converted. When - `--input_format=TENSORFLOW_GRAPHDEF`, this file should be a - *[frozen](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)* - *inference* graph. Being frozen means in particular that the input file is - self-contained, and does not reference any external "checkpoint" file. An - *inference* graph is a version of a graph meant to be used for inference, - typically not the same graph file as was used for training a given model. -* `--output_file` specifies the destination to write the converted file to. -* `--input_array` specifies the input activations, that is, the input "tensor" - in the input TensorFlow GraphDef file. The array designated by - `--input_array` is the one that the user will have to provide the contents - of as input to the runtime inference code. -* `--output_array` specifies the output activations, that is, the output - "tensor" in the input TensorFlow GraphDef file. The runtime inference code - will store its results in the array designated by `--output_array`. -* `--input_shape` specifies the shape of the input array. It is currently - required, but the plan is for a future version to no longer require it, - allowing to defer the specification of the input shape until runtime. The - format of `input_shape` is always a comma-separated list of dimensions, - always in TensorFlow convention. -* `--inference_type` specifies what type of arithmetic the output file should - be relying on. It implies in particular the choice of type of the output - arrays in the output file. - -## Just optimize a TensorFlow GraphDef +[SavedModel](https://www.tensorflow.org/programmers_guide/saved_model#using_savedmodel_with_estimators) +has fewer required flags than frozen graphs (described [below](#graphdef-float)) +due to access to additional data contained within the SavedModel. The values for +`--input_arrays` and `--output_arrays` are an aggregated, alphabetized list of +the inputs and outputs in the +[SignatureDefs](https://www.tensorflow.org/serving/signature_defs) within the +[MetaGraphDef](https://www.tensorflow.org/programmers_guide/saved_model#apis_to_build_and_load_a_savedmodel) +specified by `--savedmodel_tagset`. The value for `input_shapes` is +automatically determined from the MetaGraphDef whenever possible. The default +value for `--inference_type` for SavedModels is `FLOAT`. -The converter accepts both TENSORFLOW_GRAPHDEF and TFLITE file formats as both -`--input_format` and `--output_format`. This means that conversion from and to -any supported format is possible, and in particular, same-format "conversions" -are possible, and effectively ask the converter to optimize and simplify a -graph. Example: +There is currently no support for MetaGraphDefs without a SignatureDef or for +MetaGraphDefs that use the [`assets/` +directory](https://www.tensorflow.org/programmers_guide/saved_model#structure_of_a_savedmodel_directory). + +## Convert a TensorFlow GraphDef to TensorFlow Lite for float inference + +The follow example converts a basic TensorFlow GraphDef (frozen by +[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)) +into a TensorFlow Lite FlatBuffer to perform floating-point inference. Frozen +graphs contain the variables stored in Checkpoint files as Const ops. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ @@ -75,56 +74,27 @@ curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_ bazel run --config=opt \ //tensorflow/contrib/lite/toco:toco -- \ --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ - --output_file=/tmp/foo.pb \ - --input_format=TENSORFLOW_GRAPHDEF \ - --output_format=TENSORFLOW_GRAPHDEF \ + --output_file=/tmp/foo.tflite \ + --inference_type=FLOAT \ --input_shape=1,128,128,3 \ --input_array=input \ --output_array=MobilenetV1/Predictions/Reshape_1 ``` -Here we did not pass `--inference_type` because it is not considered applicable -to the TensorFlow GraphDef format (as far as we are concerned, TensorFlow -GraphDefs are technically always float, and the only flavor of "quantized" -GraphDef that the converter deals with is "FakeQuantized" graphs that are still -technically float graphs). +## Quantization -Below in the section about passing arbitrary input/output arrays we give another -example, using the converter to extract just a sub-graph from a TensorFlow -GraphDef. +### Convert a TensorFlow GraphDef to TensorFlow Lite for quantized inference -## Convert a TensorFlow Lite flatbuffer back into TensorFlow GraphDef format +TOCO is compatible with fixed point quantization models described +[here](https://www.tensorflow.org/performance/quantization). These are float +models with +[`FakeQuant*`](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization) +ops inserted at the boundaries of fused layers to record min-max range +information. This generates a quantized inference workload that reproduces the +quantization behavior that was used during training. -As we mentioned that the converter supports file format conversions in any -direction, let us just give an example of that: - -``` -bazel run --config=opt \ - //tensorflow/contrib/lite/toco:toco -- \ - --input_file=/tmp/foo.tflite \ - --output_file=/tmp/foo.pb \ - --input_format=TFLITE \ - --output_format=TENSORFLOW_GRAPHDEF \ - --input_shape=1,128,128,3 \ - --input_array=input \ - --output_array=MobilenetV1/Predictions/Reshape_1 -``` - -## Convert a TensorFlow GraphDef to TensorFlow Lite for quantized inference - -Let us now look at a quantized model. As mentioned above, the only flavor of -quantized TensorFlow GraphDefs that the converter is concerned with, is -"FakeQuantized" models. These are technically float models, but with special -`FakeQuant*` ops inserted at the boundaries of fused layers to record min-max -range information allowing to generate a quantized inference workload that is -able to reproduce exactly the specific quantization behavior that was used -during training. Indeed, the whole point of quantized training is to allow for -both training and inference to perform exactly the same arithmetic, so that the -way that the training process about around quantization inaccuracy is -effectively helping the quantized inference process to be more accurate. - -Given a quantized TensorFlow GraphDef, generating a quantized TensorFlow Lite -flatbuffer is done like this: +The following command generates a quantized TensorFlow Lite FlatBuffer from a +"quantized" TensorFlow GraphDef. ``` bazel run --config=opt \ @@ -141,36 +111,17 @@ bazel run --config=opt \ --std_value=127 ``` -Here, besides changing `--input_file` to point to a (fake-)quantized GraphDef, -the only other changes are: - -* To change `--inference_type` to `QUANTIZED_UINT8`. This effectively tells - the converter to generate an output file that performs quantized inference - on a quantized input. -* To pass `--mean_value` and `--std_value` flags to describe how the quantized - uint8 input array values are to be interpreted as the mathematical real - numbers that the graph is concerned with (keep in mind that even a - "fake-quantized" TensorFlow GraphDef is still technically a float graph). - The meaning of `--mean_value` and `--std_value` is explained in the - command-line reference; it suffices for now to say that they are a property - of each model. +### Use \"dummy-quantization\" to try out quantized inference on a float graph -## Use dummy-quantization to try out quantized inference on a float graph +In order to evaluate the possible benefit of generating a quantized graph, TOCO +allows "dummy-quantization" on float graphs. The flags `--default_ranges_min` +and `--default_ranges_max` accept plausable values for the min-max ranges of the +values in all arrays that do not have min-max information. "Dummy-quantization" +will produce lower accuracy but will emulate the performance of a correctly +quantized model. -Sometimes, one only has a plain float graph, and one is curious as to how much -faster inference might run if one could perform quantized inference instead of -float inference. Rather than requiring users to first invest in quantizing their -graphs before they can evaluate a possible benefit, the converter allows to -simply experiment with what we call "dummy quantization": provide some vaguely -plausible values for the min-max ranges of values in all arrays that do not have -min-max information, so that quantization can carry on, certainly producing -inaccurate results (do not use that in production!) but with performance -characteristics that should be identical to those of an actually quantized -flavor of the model. - -In the present example, we have a model using Relu6 activation functions almost -everywhere, so a reasonable guess is that most activation ranges should be -contained in [0, 6] and roughly comparable to it. +The example below contains a model using Relu6 activation functions. Therefore, +a reasonable guess is that most activation ranges should be contained in [0, 6]. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ @@ -191,15 +142,13 @@ bazel run --config=opt \ --std_value=127.5 ``` -## Multiple output arrays +## Specifying input and output arrays -Some models have multiple outputs. Even in a model with only one output, you may -want for the inference code to return the contents of other arrays as well, or -to perform inference on a subgraph with multiple outputs (see the section below -on specifying arbitrary arrays as input/output arrays). +### Multiple output arrays -Either way, using `--output_arrays` instead of `--output_array` allows to -specify a comma-separated list of output arrays. +The flag `output_arrays` takes in a comma-separated list of output arrays as +seen in the example below. This is useful for models or subgraphs with multiple +outputs. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ @@ -216,18 +165,11 @@ bazel run --config=opt \ --output_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu ``` -## Multiple input arrays - -Some models have multiple inputs; even in a model with a single input, you may -want for the inference code to implement only a subgraph with multiple inputs -(see the section below on specifying arbitrary arrays as input/output arrays). +### Multiple input arrays -Either way, multiple input arrays are specified by using `--input_arrays` -instead of `--input_array` to specify a comma-separated list of input arrays. In -that case, one also needs to use `--input_shapes` instead of `--input_shape`. -The syntax for `--input_shapes` is a bit trickier, since already the singular -`--input_shape` was a comma-separated list of integers! Multiple input shapes -are delimited by a colon (`:`) in `--input_shapes`. +The flag `input_arrays` takes in a comma-separated list of input arrays as seen +in the example below. This is useful for models or subgraphs with multiple +inputs. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ @@ -244,54 +186,93 @@ bazel run --config=opt \ --output_array=InceptionV1/Logits/Predictions/Reshape_1 ``` -## Specifying arbitrary arrays in a graph as input or output arrays +Note that `input_shapes` is provided as a colon-separated list. Each input shape +corresponds to the input array at the same position in the respective list. -Any array in the input file can be specified as an input or output array. This -allows to use the converter to extract a sub-graph out of the input graph file. -The converter then automatically discards any part of the graph that is not -needed for the subgraph identified by the specified input and output arrays. -Another use case for specifying multiple output arrays is to get inference code -to return the contents of some specified intermediate activations array, not -just the output activations. +### Specifying subgraphs -In order to know which array you want to pass as `--input_arrays` / -`--output_arrays`, it helps to have a visualization of the graph. See the -section below on graph visualization. When using graph visualization for that -purpose, make sure to use `--dump_graphviz=` to visualize exactly the graph as -it is in the actual final form being exported to the output file. +Any array in the input file can be specified as an input or output array in +order to extract subgraphs out of an input graph file. TOCO discards the parts +of the graph outside of the specific subgraph. Use [graph +visualizations](#graph-visualizations) to identify the input and output arrays +that make up the desired subgraph. + +The follow command shows how to extract a single fused layer out of a TensorFlow +GraphDef. + +``` +curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ + | tar xzv -C /tmp +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --output_file=/tmp/foo.pb \ + --input_format=TENSORFLOW_GRAPHDEF \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \ + --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \ + --output_array=InceptionV1/InceptionV1/Mixed_3b/concat_v2 +``` Note that the final representation of an on-device inference workload (say, in -TensorFlow Lite flatbuffers format) tends to have coarser granularity than the +TensorFlow Lite FlatBuffers format) tends to have coarser granularity than the very fine granularity of the TensorFlow GraphDef representation. For example, while a fully-connected layer is typically represented as at least four separate ops in TensorFlow GraphDef (Reshape, MatMul, BiasAdd, Relu...), it is typically represented as a single "fused" op (FullyConnected) in the converter's optimized representation and in the final on-device representation (e.g. in TensorFlow -Lite flatbuffer format). As the level of granularity gets coarser, some +Lite FlatBuffer format). As the level of granularity gets coarser, some intermediate arrays (say, the array between the MatMul and the BiasAdd in the TensorFlow GraphDef) are dropped. When specifying intermediate arrays as -`--input_arrays` / `--output_arrays`, it is generally at least desirable (and -often required) to specify arrays that are meant to survive in the final form of -the graph, after fusing. These are typically the outputs of activation functions -(since everything in each layer until the activation function tends to get -fused). +`--input_arrays` / `--output_arrays`, it is desirable (and often required) to +specify arrays that are meant to survive in the final form of the graph, after +fusing. These are typically the outputs of activation functions (since +everything in each layer until the activation function tends to get fused). + +## Other conversions supported by TOCO + +The converter accepts both TENSORFLOW_GRAPHDEF and TFLITE file formats as both +`--input_format` and `--output_format`. This means that conversion to and from +any supported format is possible. -Here is an example of extracting just a sub-graph, namely just a single fused -layer, out of a TensorFlow GraphDef, and exporting a TensorFlow GraphDef -containing just that subgraph: +### Optimize a TensorFlow GraphDef + +Same-format "conversions" can be used to optimize and simplify a graph or be +used to [get a subgraph](#specifying-subgraphs) of a graph. The flag +`--inference_type` is not required because TensorFlow graphs, including those +containing the +[`FakeQuant*`](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization) +ops are always float graphs. ``` -curl https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz \ +curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ | tar xzv -C /tmp bazel run --config=opt \ //tensorflow/contrib/lite/toco:toco -- \ - --input_file=/tmp/inception_v1_2016_08_28_frozen.pb \ + --input_file=/tmp/mobilenet_v1_0.50_128/frozen_graph.pb \ --output_file=/tmp/foo.pb \ --input_format=TENSORFLOW_GRAPHDEF \ --output_format=TENSORFLOW_GRAPHDEF \ - --input_shapes=1,28,28,96:1,28,28,16:1,28,28,192:1,28,28,64 \ - --input_arrays=InceptionV1/InceptionV1/Mixed_3b/Branch_1/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_2/Conv2d_0a_1x1/Relu,InceptionV1/InceptionV1/Mixed_3b/Branch_3/MaxPool_0a_3x3/MaxPool,InceptionV1/InceptionV1/Mixed_3b/Branch_0/Conv2d_0a_1x1/Relu \ - --output_array=InceptionV1/InceptionV1/Mixed_3b/concat_v2 + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 +``` + +### Convert a TensorFlow Lite FlatBuffer back into TensorFlow GraphDef format + +The converter supports file format conversions from TensorFlow Lite, back into +TensorFlow GraphDef format. + +``` +bazel run --config=opt \ + //tensorflow/contrib/lite/toco:toco -- \ + --input_file=/tmp/foo.tflite \ + --output_file=/tmp/foo.pb \ + --input_format=TFLITE \ + --output_format=TENSORFLOW_GRAPHDEF \ + --input_shape=1,128,128,3 \ + --input_array=input \ + --output_array=MobilenetV1/Predictions/Reshape_1 ``` ## Logging @@ -299,8 +280,8 @@ bazel run --config=opt \ ### Standard logging The converter generates some informative log messages during processing. The -easiest way to view them is to add `--logtostderr` to command lines. For the -previous example, that gives: +easiest way to view them is to add `--logtostderr` to command lines as seen in +the following example. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ @@ -333,42 +314,34 @@ I1101 21:51:33.309484 5339 toco_tooling.cc:249] Estimated count of arithmetic For debugging purposes, the converter supports two levels of verbose logging, which can be set by passing a `--v=` flag: -* At `--v=1`, the converter generates text dumps of the graph at various - points during processing, as well as log messages about every graph - transformation that did take place, typically answering questions of the - form "why was my graph transformed in this way"? -* At `--v=2`, the converter additionally generates log messages about graph - transformations that were considered but not actually performed, typically - answering questions of the form "why was my graph NOT transformed when I - expected it would be?". +* For `--v=1`, the converter generates text dumps of the graph at various + points during processing as well as log messages about every graph + transformation that took place. +* For `--v=2`, the converter additionally generates log messages about graph + transformations that were considered but not performed. ### Graph "video" logging -When `--dump_graphviz=` is used (see the section on Graph visualizations), one -may additionally pass `--dump_graphviz_video`, which causes a graph -visualization to be dumped after each individual graph transformations, often -resulting in thousands of files. Typically, one would then bisect into these -files to understand when a given change was introduced in the graph. +When `--dump_graphviz=` is used (see the section on [graph +visualizations](#graph-visualizations)), one may additionally pass +`--dump_graphviz_video`, which causes a graph visualization to be dumped after +each individual graph transformation. This results in thousands of files. +Typically, one would then bisect into these files to understand when a given +change was introduced in the graph. ## Graph visualizations -The converter is able to export a graph to the GraphViz Dot format, for easy -visualization. Combined with the converter's ability to transform the graph into -a simpler, coarser-granularity representation, that makes it a very powerful -visualization tool. - -There are two ways to get the converter to export a GraphViz Dot file, -corresponding to two separate use cases. Understanding the difference between -them is key to getting useful graph visualizations. +TOCO can export a graph to the GraphViz Dot format for easy visualization via +either the `--output_format` flag or the `--dump_graphviz` flag. The subsections +below outline the use cases for each. ### Using `--output_format=GRAPHVIZ_DOT` -The first way to get a graphviz rendering is to pass -`--output_format=GRAPHVIZ_DOT`, instead of the `--output_format` that you would -otherwise use. This says: "I just want to get a plausible visualization of that -graph". The upside is that it makes for very simple command lines, and makes the -converter very lax about aspects of the graph or the command line that it would -otherwise complain about. Example: +The first way to get a graphviz rendering is to pass `GRAPHVIZ_DOT` into +`--output_format`. This results in a plausable visualization of the graph. This +reduces the requirements that normally exist during conversion between other +input and output formats. For example, this may be useful if conversion from +TENSORFLOW_GRAPHDEF to TFLITE is failing. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ @@ -391,7 +364,7 @@ dot -Tpdf -O /tmp/foo.dot ``` And the resulting `.dot.pdf` can be viewed in any PDF viewer, but we suggest one -with a good ability to pan and zoom across a very large page; Google Chrome does +with a good ability to pan and zoom across a very large page. Google Chrome does well in that respect. ``` @@ -400,14 +373,14 @@ google-chrome /tmp/foo.dot.pdf Example PDF files are viewable online in the next section. -### Using `--dump_graphviz=` +### Using `--dump_graphviz` -The second way to get a graphviz rendering is to pass a `--dump_graphviz=` flag -specifying a destination directory to dump GraphViz rendering to. Unlike the -previous approach, this one allows you to keep your real command-line (with your -real `--output_format` and other flags) unchanged, just appending a -`--dump_graphviz=` flag to it. This says: "I want visualizations of the actual -graph during this specific conversion process". Example: +The second way to get a graphviz rendering is to pass the `--dump_graphviz=` +flag, specifying a destination directory to dump GraphViz rendering to. Unlike +the previous approach, this one allows you to keep your real command-line (with +your real `--output_format` and other flags) unchanged, just appending a +`--dump_graphviz=` flag to it. This provides a visualization of the actual graph +during a specific conversion process. ``` curl https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_128_frozen.tgz \ @@ -425,8 +398,8 @@ bazel run --config=opt \ --dump_graphviz=/tmp ``` -This generates a few files in the destination directory, here `/tmp`. Most -important are these two files: +This generates a few files in the destination directory, here `/tmp`. The two +most important files are: ``` /tmp/toco_AT_IMPORT.dot @@ -442,8 +415,7 @@ conversion subsequently fails). `toco_AFTER_TRANSFORMATIONS.dot` represents the graph after all transformations were applied to it, just before it was exported to the `--output_file`. -Typically, this is a much smaller graph, and it conveys much more information -about each node. +Typically, this is a much smaller graph with more information about each node. Again, these can be rendered to PDFs: @@ -451,12 +423,12 @@ Again, these can be rendered to PDFs: dot -Tpdf -O /tmp/toco_*.dot ``` -The resulting files can be seen here: +Sample output files can be seen here: * [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf) * [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf). -### Legend for the graph visualizations +### Legend for the graph visualizations * Operators are red square boxes with the following hues of red: * Most operators are diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md index 5e07795223..9e99287f82 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md @@ -1,84 +1,47 @@ -# TensorFlow Lite Optimizing Converter command-line reference +# TensorFlow Lite Optimizing Converter command-line glossary This page is complete reference of command-line flags. It is complemented by the following other documents: * [README](../README.md) * [Command-line examples](cmdline_examples.md) +* [Python API examples](python_api.md) Table of contents: -[TOC] - -## High-level overview - -A full list and detailed specification of all flags is given in the next -section. For now we focus on a higher-level description of command lines: - -``` -toco \ - --input_format=... \ - --output_format=... \ - --input_file=... \ - --output_file=... \ - [model flags...] \ - [transformation flags...] \ - [logging flags...] -``` - -In other words, the converter requires at least the following mandatory flags: -`--input_format`, `--output_format`, `--input_file`, `--output_file`. Depending -on the input and output formats, additional flags may be allowed or mandatory: - -* *Model flags* provide additional information about the model stored in the - input file. - * `--output_array` or `--output_arrays` specify which arrays in the input - file are to be considered the output activations. - * `--input_array` or `--input_arrays` specify which arrays in the input - file are to be considered the input activations. - * `--input_shape` or `--input_shapes` specify the shapes of the input - arrays. - * `--input_data_type` or `--input_data_types` specify the data types of - input arrays, which can be used if the input file does not already - specify them. - * `--mean_value` or `--mean_values`, and `--std_value` or `--std_values`, - give the dequantization parameters of the input arrays, for the case - when the output file will accept quantized input arrays. -* *Transformation flags* specify options of the transformations to be applied - to the graph, i.e. they specify requested properties that the output file - should have. - * `--inference_type` specifies the type of real-numbers arrays in the - output file. This only affects arrays of real numbers and allows to - control their quantization or dequantization, effectively switching - between floating-point and quantized arithmetic for the inference - workload, as far as real numbers are concerned. Other data types are - unaffected (e.g. plain integers, and strings). - * `--inference_input_type` is like `--inference_type` but specifically - controlling input arrays, separately from other arrays. If not - specified, then `--inference_type` is used. The use case for specifying - `--inference_input_type` is when one wants to perform floating-point - inference on a quantized input, as is common in image models operating - on bitmap image inputs. - * Some transformation flags allow to carry on with quantization when the - input graph is not properly quantized: `--default_ranges_min`, - `--default_ranges_max`, `--drop_fake_quant`, - `--reorder_across_fake_quant`. -* *Logging flags* described below. - -## Command-line flags complete reference - -### Mandatory flags - -* `--input_format`. Type: string. Specifies the format of the input file. - Allowed values: +* [High-level flags](#high-level-flags) +* [Model flags](#model-flags) +* [Transformation flags](#transformation-flags) +* [Logging flags](#logging-flags) + +## High-level flags + +The following high level flags specify the location of the input and output +files. The flag `--output_file` is always required. Additionally, either +`--input_file` or `--savedmodel_directory` is required. + +* `--savedmodel_directory`. Type: string. Specifies the full path to the + directory containing the SavedModel. +* `--savedmodel_tagset`. Type: string. Default: + [kSavedModelTagServe](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/tag_constants.h). + Specifies a comma-separated set of tags identifying the MetaGraphDef within + the SavedModel to analyze. All tags in the tag set must be specified. +* `--input_file`. Type: string. Specifies the path of the input file. This may + be either an absolute or a relative path. +* `--output_file`. Type: string. Specifies the path of the output file. + +The following high level flags specify the types of the input and output files: + +* `--input_format`. Type: string. Default: `TENSORFLOW_GRAPHDEF`. Specifies + the format of the input file. Allowed values: * `TENSORFLOW_GRAPHDEF` — The TensorFlow GraphDef format. Both binary and text proto formats are allowed. - * `TFLITE` — The TensorFlow Lite flatbuffers format. -* `--output_format`. Type: string. Specifies the format of the output file. - Allowed values: + * `TFLITE` — The TensorFlow Lite FlatBuffers format. +* `--output_format`. Type: string. Default: `TFLITE`. Specifies the format of + the output file. Allowed values: * `TENSORFLOW_GRAPHDEF` — The TensorFlow GraphDef format. Always produces a file in binary (not text) proto format. - * `TFLITE` — The TensorFlow Lite flatbuffers format. + * `TFLITE` — The TensorFlow Lite FlatBuffers format. * Whether a float or quantized TensorFlow Lite file will be produced depends on the `--inference_type` flag. * `GRAPHVIZ_DOT` — The GraphViz `.dot` format. This asks the @@ -95,11 +58,11 @@ on the input and output formats, additional flags may be allowed or mandatory: you get in your actual output format as opposed to just a merely plausible visualization of a model, consider using `--dump_graphviz` instead and keeping your true `--output_format`. -* `--input_file`. Type: string. Specifies the path of the input file. This may - be either an absolute or a relative path. -* `--output_file`. Type: string. Specifies the path of the output file. -### Model flags +## Model flags + +*Model flags* provide additional information about the model stored in the input +file. * `--output_array`. Type: string. Specifies a single array as the output activations. Incompatible with `--output_arrays`. @@ -111,6 +74,10 @@ on the input and output formats, additional flags may be allowed or mandatory: * `--input_arrays`. Type: comma-separated list of strings. Specifies a list of arrays as the input activations, for models with multiple inputs. Incompatible with `--input_array`. +* `--batch_size`. Type: integer. Default: 1. Specifies the batch size for the + model. Replaces the first dimension of an input size array if undefined. Use + only with SavedModels when neither `--input_shape` nor `input_shapes` flags + are specified. Incompatible with GraphDefs. When `--input_array` is used, the following flags are available to provide additional information about the single input array: @@ -160,7 +127,11 @@ additional information about the multiple input arrays: the input arrays specified in `--input_arrays`, in the same order. See `--mean_value`, `--std_value` for details. -### Transformation flags +## Transformation flags + +*Transformation flags* specify options of the transformations to be applied to +the graph, i.e. they specify requested properties that the output file should +have. * `--inference_type`. Type: string. Sets the type of real-number arrays in the output file, that is, controls the representation (quantization) of real @@ -232,7 +203,7 @@ additional information about the multiple input arrays: graph transformations on them, at the cost of no longer faithfully matching inference and training arithmetic. -### Logging flags +## Logging flags The following are standard Google logging flags: diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/toco/g3doc/python_api.md index 36e2d9c372..f0fd638a61 100644 --- a/tensorflow/contrib/lite/toco/g3doc/python_api.md +++ b/tensorflow/contrib/lite/toco/g3doc/python_api.md @@ -1,5 +1,12 @@ # TensorFlow Lite Optimizing Converter (TOCO) Python API reference +This page provides examples on how to use TOCO via the Python API. It is +complemented by the following documents: + +* [README](../README.md) +* [Command-line examples](cmdline_examples.md) +* [Command-line glossary](cmdline_reference.md) + ## High-level overview While the TensorFlow Lite Optimizing Converter can be used from the command diff --git a/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg b/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg new file mode 100644 index 0000000000..a47c088991 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg @@ -0,0 +1 @@ + \ No newline at end of file -- GitLab From 6ccdb724858ac1ac343b47e73b75802e6e8fa004 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 29 Mar 2018 17:15:33 -0700 Subject: [PATCH 719/960] Add details of new mailing lists PiperOrigin-RevId: 191011187 --- tensorflow/docs_src/about/uses.md | 6 ++-- tensorflow/docs_src/community/contributing.md | 25 +++---------- tensorflow/docs_src/community/index.md | 6 +++- tensorflow/docs_src/community/leftnav_files | 1 + tensorflow/docs_src/community/lists.md | 32 ++++++++++++----- tensorflow/docs_src/community/swift.md | 35 +++++++++++++++++++ 6 files changed, 73 insertions(+), 32 deletions(-) create mode 100644 tensorflow/docs_src/community/swift.md diff --git a/tensorflow/docs_src/about/uses.md b/tensorflow/docs_src/about/uses.md index d646880bd3..d3db98203e 100644 --- a/tensorflow/docs_src/about/uses.md +++ b/tensorflow/docs_src/about/uses.md @@ -18,9 +18,9 @@ This section describes some of the current uses of the TensorFlow system. > If you are using TensorFlow for research, for education, or for production > usage in some product, we would love to add something about your usage here. -> Please feel free to email us a brief description of how you're using -> TensorFlow, or even better, send us a pull request to add an entry to this -> file. +> Please feel free to [email us](mailto:usecases@tensorflow.org) a brief +> description of how you're using TensorFlow, or even better, send us a +> pull request to add an entry to this file. * **Deep Speech**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.7.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.7.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A